Hello Everyone!
I’m having a similar issue to Mongodb update_many and limit.
I have a database with 17 MM of records, but I would like to select a range with 5k records inside this collection, that matches with a single query, but I cannot get the first register, because the database does an abend. I’m running a machine at AWS using an instance t3.xlarge with replication.
This query that I’m running in python
query = {'$and': [{u'tag_list': '33854'}, {u'customs': {u'$not': {u'$elemMatch': {u'k': u'rule_id', u'v': 301}}}}, {u'customer_id': 4275L}, {'status': {'$nin': [u'invalid_domain', u'inexistent_address', u'mailbox_full', u'smsfail', u'whatsappfail']}}, {'customer_id': {'$in': [4275]}, 'opt_out': False}, {u'campaigns': {u'$not': {u'$elemMatch': {u'id': 112129L}}}}]}
cursor = Contact._get_collection().find(query,{'id': 1}).hint([('customer_id',1), ('tag_list',1), ('status',1), ('opt_out',1)]).batch_size(5000).limit(5000)[1100001:1149999]
for c in cursor:
contact = Contact._from_son(c)
bulk_operations.append(
UpdateOne({
'_id': contact.id,
'campaigns': {
'$not':{
'$elemMatch':{
'id': campaign_id,
}
}
}
}, update_exec)
)
results = Contact._get_collection().bulk_write(bulk_operations, ordered=False)
I’m using mongoengine to specify my class, so I can describe the attributes below:
# Create your models here.
class Contact(mongo.DynamicDocument):
STATUS_CHOICES = (('ok', _('Ativo')),
('mx', _('Falha na entrega')),
('invalid_domain', _('Dominio inválido')),
('inexistent_address', _('E-mail não existe')),
('mailbox_full', _('Caixa cheia')),
('size_limit', _('Limite da mensagem excedido')),
('mail_loop', _('E-mail em loop')),
('spam', _('Spam')),
('unknown', _('Erro desconhecido')),
('complaint', _('Reclamação')),
('abuse', _('Denúncia de Abuse')),
('smsfail', _('Falha na Entrega de SMS')),
('whatsappfail', _('Falha na Entrega de WhatsApp')))
customer_id = mongo.IntField(verbose_name=_(u'Cliente'), unique_with='email')
name = mongo.StringField(max_length=255, verbose_name=_(u'Nome'))
email = mongo.EmailField(verbose_name=_(u'E-mail'))
campaigns = mongo.ListField(mongo.DictField(), verbose_name=_(u'Campanhas que o usuário participou'))
customs = mongo.ListField(mongo.DictField(), verbose_name=_(u'Campos customizados do cliente'))
status = mongo.StringField(choices=STATUS_CHOICES, default='ok', max_length='10', verbose_name=_(u'Status do E-mail'))
date_created = mongo.DateTimeField(verbose_name=_(u'Criado em'))
last_updated = mongo.DateTimeField(verbose_name=_(u'Última atualização em'))
tag_list = mongo.ListField(mongo.StringField(), verbose_name=_(u'Listas que o contato faz parte'))
meta = {
'index_background': True,
'index_drop_dups': True,
'indexes': [
('customer_id', 'tag_list', 'status', 'opt_out'),
('customer_id', 'tag_list', 'status'),
('customer_id', 'tag_list', 'opt_out'),
('customer_id', 'customs.k', 'customs.v', 'status', 'opt_out')
],
}
class Meta:
using = 'mongodb'
verbose_name = _(u'Contato')
verbose_name_plural = _(u'Contatos')
I must confess that I’ve tried everything to solve this performance issue I’m almost looking for a bount hunt to help to solve this problem.
Any help will be very appreciated
Thanks so much!