I’m trying to delete duplicates from my database. It’s just gotten past the 500k documents mark, so it complains that the aggregate return is too big and I need to allow disk use. So I do. But… nothing happens. Same error.
def deleteDups(datab):
col = db[datab]
pipeline = [
{'$group': {
'_id': {
'CASE NUMBER': '$CASE NUMBER',
'JURISDICTION': '$JURISDICTION'},#needs to be case insensitive
'count': {'$sum': 1},
'ids': {'$push': '$_id'}
}
},
{'$match': {'count': {'$gt': 1}}},
]
results = col.aggregate(pipeline, allowDiskUse = True)
count = 0
for result in results:
doc_count = 0
print(result)
it = iter(result['ids'])
next(it)
for id in it:
deleted = col.delete_one({'_id': id})
count += 1
doc_count += 1
#print("API call recieved:", deleted.acknowledged) debug, is the database recieving requests
print("Total documents deleted:", count)
Errors out on results = col.aggregate(pipeline, allowDiskUse = True)
:
File "C:\Users\Laura\Documents\GitHub\Ant\controller.py", line 202, in deleteDups
results = col.aggregate(pipeline, allowDiskUse = True)
File "C:\Python38\lib\site-packages\pymongo\collection.py", line 2375, in aggregate
return self._aggregate(_CollectionAggregationCommand,
File "C:\Python38\lib\site-packages\pymongo\collection.py", line 2297, in _aggregate
return self.__database.client._retryable_read(
File "C:\Python38\lib\site-packages\pymongo\mongo_client.py", line 1464, in _retryable_read
return func(session, server, sock_info, slave_ok)
File "C:\Python38\lib\site-packages\pymongo\aggregation.py", line 136, in get_cursor
result = sock_info.command(
File "C:\Python38\lib\site-packages\pymongo\pool.py", line 603, in command
return command(self.sock, dbname, spec, slave_ok,
File "C:\Python38\lib\site-packages\pymongo\network.py", line 165, in command
helpers._check_command_response(
File "C:\Python38\lib\site-packages\pymongo\helpers.py", line 159, in _check_command_response
raise OperationFailure(msg % errmsg, code, response)
pymongo.errors.OperationFailure: Exceeded memory limit for $group, but didn't allow external sort. Pass allowDiskUse:true to opt in.
I can’t even right now. I’m expecting this to be something dumb, but for the life of me I can’t see it. Thank you.