Hi everyone,
My aggregation query is extremely slow. Even knowing that I have 22 million rows per collection I believe there is something wrong with my indexes.
This is the query:
db.getCollection("SourceRecon").aggregate(
[
{
"$match" : {
"DtoName" : "CashflowInventory"
}
},
{
"$match" : {
"BusinessDate" : "20200703"
}
},
{
"$match" : {
"SourceSystem" : "TRM"
}
},
{
"$lookup" : {
"from" : "FileRecon",
"localField" : "PrimaryKey",
"foreignField" : "PrimaryKey",
"as" : "FileRecon"
}
},
{
"$unwind" : {
"path" : "$FileRecon",
"preserveNullAndEmptyArrays" : true
}
},
{
"$match" : {
"$expr" : {
"$ne" : [
"$Hash",
"$FileRecon.Hash"
]
}
}
},
{
"$match" : {
"FileRecon.Hash" : {
"$exists" : true
}
}
},
{
"$project" : {
"_id" : 0.0,
"NoSQLSourceStructure.Hash" : "$Hash",
"NoSQLSourceStructure.LstHashColumns" : "$LstHashColumns",
"NoSQLFilePreparedStructure.Hash" : "$FileRecon.Hash",
"NoSQLFilePreparedStructure.LstHashColumns" : "$FileRecon.LstHashColumns"
}
}
],
{
"allowDiskUse" : false
}
);
This is the explain where I can see rejected plans. Is that the reason why is so slow?
{
"stages" : [
{
"$cursor" : {
"query" : {
"$and" : [
{
"$and" : [
{
"DtoName" : "CashflowInventory"
},
{
"BusinessDate" : "20200703"
}
]
},
{
"SourceSystem" : "TRM"
}
]
},
"fields" : {
"FileRecon.Hash" : NumberInt(1),
"FileRecon.LstHashColumns" : NumberInt(1),
"Hash" : NumberInt(1),
"LstHashColumns" : NumberInt(1),
"NoSQLFilePreparedStructure" : NumberInt(1),
"NoSQLSourceStructure" : NumberInt(1),
"PrimaryKey" : NumberInt(1),
"_id" : NumberInt(0)
},
"queryPlanner" : {
"plannerVersion" : NumberInt(1),
"namespace" : "EibIrrBb_Recon.SourceRecon",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"BusinessDate" : {
"$eq" : "20200703"
}
},
{
"DtoName" : {
"$eq" : "CashflowInventory"
}
},
{
"SourceSystem" : {
"$eq" : "TRM"
}
}
]
},
"queryHash" : "4E57EB9A",
"planCacheKey" : "B1C176CE",
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"BusinessDate" : NumberInt(-1),
"SourceSystem" : NumberInt(1),
"DtoName" : NumberInt(1)
},
"indexName" : "BusinessDate_-1_SourceSystem_1_DtoName_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"BusinessDate" : [
],
"SourceSystem" : [
],
"DtoName" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt(2),
"direction" : "forward",
"indexBounds" : {
"BusinessDate" : [
"[\"20200703\", \"20200703\"]"
],
"SourceSystem" : [
"[\"TRM\", \"TRM\"]"
],
"DtoName" : [
"[\"CashflowInventory\", \"CashflowInventory\"]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"BusinessDate" : NumberInt(-1),
"SourceSystem" : NumberInt(1),
"DtoName" : NumberInt(1),
"PrimaryKey" : NumberInt(1)
},
"indexName" : "BusinessDate_-1_SourceSystem_1_DtoName_1_PrimaryKey_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"BusinessDate" : [
],
"SourceSystem" : [
],
"DtoName" : [
],
"PrimaryKey" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt(2),
"direction" : "forward",
"indexBounds" : {
"BusinessDate" : [
"[\"20200703\", \"20200703\"]"
],
"SourceSystem" : [
"[\"TRM\", \"TRM\"]"
],
"DtoName" : [
"[\"CashflowInventory\", \"CashflowInventory\"]"
],
"PrimaryKey" : [
"[MinKey, MaxKey]"
]
}
}
}
]
}
}
},
{
"$lookup" : {
"from" : "FileRecon",
"as" : "FileRecon",
"localField" : "PrimaryKey",
"foreignField" : "PrimaryKey",
"unwinding" : {
"preserveNullAndEmptyArrays" : true
}
}
},
{
"$match" : {
"$and" : [
{
"$expr" : {
"$ne" : [
"$Hash",
"$FileRecon.Hash"
]
}
},
{
"FileRecon.Hash" : {
"$exists" : true
}
}
]
}
},
{
"$project" : {
"_id" : false,
"NoSQLSourceStructure" : {
"Hash" : "$Hash",
"LstHashColumns" : "$LstHashColumns"
},
"NoSQLFilePreparedStructure" : {
"Hash" : "$FileRecon.Hash",
"LstHashColumns" : "$FileRecon.LstHashColumns"
}
}
}
],
"ok" : 1.0
}