Aggregation slow

Hi everyone,

My aggregation query is extremely slow. Even knowing that I have 22 million rows per collection I believe there is something wrong with my indexes.

This is the query:

db.getCollection("SourceRecon").aggregate(
    [
        { 
            "$match" : { 
                "DtoName" : "CashflowInventory"
            }
        }, 
        { 
            "$match" : { 
                "BusinessDate" : "20200703"
            }
        }, 
        { 
            "$match" : { 
                "SourceSystem" : "TRM"
            }
        }, 
        { 
            "$lookup" : { 
                "from" : "FileRecon", 
                "localField" : "PrimaryKey", 
                "foreignField" : "PrimaryKey", 
                "as" : "FileRecon"
            }
        }, 
        { 
            "$unwind" : { 
                "path" : "$FileRecon", 
                "preserveNullAndEmptyArrays" : true
            }
        }, 
        { 
            "$match" : { 
                "$expr" : { 
                    "$ne" : [
                        "$Hash", 
                        "$FileRecon.Hash"
                    ]
                }
            }
        }, 
        { 
            "$match" : { 
                "FileRecon.Hash" : { 
                    "$exists" : true
                }
            }
        }, 
        { 
            "$project" : { 
                "_id" : 0.0, 
                "NoSQLSourceStructure.Hash" : "$Hash", 
                "NoSQLSourceStructure.LstHashColumns" : "$LstHashColumns", 
                "NoSQLFilePreparedStructure.Hash" : "$FileRecon.Hash", 
                "NoSQLFilePreparedStructure.LstHashColumns" : "$FileRecon.LstHashColumns"
            }
        }
    ], 
    { 
        "allowDiskUse" : false
    }
);

This is the explain where I can see rejected plans. Is that the reason why is so slow?

{ 
    "stages" : [
        { 
            "$cursor" : { 
                "query" : { 
                    "$and" : [
                        { 
                            "$and" : [
                                { 
                                    "DtoName" : "CashflowInventory"
                                }, 
                                { 
                                    "BusinessDate" : "20200703"
                                }
                            ]
                        }, 
                        { 
                            "SourceSystem" : "TRM"
                        }
                    ]
                }, 
                "fields" : { 
                    "FileRecon.Hash" : NumberInt(1), 
                    "FileRecon.LstHashColumns" : NumberInt(1), 
                    "Hash" : NumberInt(1), 
                    "LstHashColumns" : NumberInt(1), 
                    "NoSQLFilePreparedStructure" : NumberInt(1), 
                    "NoSQLSourceStructure" : NumberInt(1), 
                    "PrimaryKey" : NumberInt(1), 
                    "_id" : NumberInt(0)
                }, 
                "queryPlanner" : { 
                    "plannerVersion" : NumberInt(1), 
                    "namespace" : "EibIrrBb_Recon.SourceRecon", 
                    "indexFilterSet" : false, 
                    "parsedQuery" : { 
                        "$and" : [
                            { 
                                "BusinessDate" : { 
                                    "$eq" : "20200703"
                                }
                            }, 
                            { 
                                "DtoName" : { 
                                    "$eq" : "CashflowInventory"
                                }
                            }, 
                            { 
                                "SourceSystem" : { 
                                    "$eq" : "TRM"
                                }
                            }
                        ]
                    }, 
                    "queryHash" : "4E57EB9A", 
                    "planCacheKey" : "B1C176CE", 
                    "winningPlan" : { 
                        "stage" : "FETCH", 
                        "inputStage" : { 
                            "stage" : "IXSCAN", 
                            "keyPattern" : { 
                                "BusinessDate" : NumberInt(-1), 
                                "SourceSystem" : NumberInt(1), 
                                "DtoName" : NumberInt(1)
                            }, 
                            "indexName" : "BusinessDate_-1_SourceSystem_1_DtoName_1", 
                            "isMultiKey" : false, 
                            "multiKeyPaths" : { 
                                "BusinessDate" : [

                                ], 
                                "SourceSystem" : [

                                ], 
                                "DtoName" : [

                                ]
                            }, 
                            "isUnique" : false, 
                            "isSparse" : false, 
                            "isPartial" : false, 
                            "indexVersion" : NumberInt(2), 
                            "direction" : "forward", 
                            "indexBounds" : { 
                                "BusinessDate" : [
                                    "[\"20200703\", \"20200703\"]"
                                ], 
                                "SourceSystem" : [
                                    "[\"TRM\", \"TRM\"]"
                                ], 
                                "DtoName" : [
                                    "[\"CashflowInventory\", \"CashflowInventory\"]"
                                ]
                            }
                        }
                    }, 
                    "rejectedPlans" : [
                        { 
                            "stage" : "FETCH", 
                            "inputStage" : { 
                                "stage" : "IXSCAN", 
                                "keyPattern" : { 
                                    "BusinessDate" : NumberInt(-1), 
                                    "SourceSystem" : NumberInt(1), 
                                    "DtoName" : NumberInt(1), 
                                    "PrimaryKey" : NumberInt(1)
                                }, 
                                "indexName" : "BusinessDate_-1_SourceSystem_1_DtoName_1_PrimaryKey_1", 
                                "isMultiKey" : false, 
                                "multiKeyPaths" : { 
                                    "BusinessDate" : [

                                    ], 
                                    "SourceSystem" : [

                                    ], 
                                    "DtoName" : [

                                    ], 
                                    "PrimaryKey" : [

                                    ]
                                }, 
                                "isUnique" : false, 
                                "isSparse" : false, 
                                "isPartial" : false, 
                                "indexVersion" : NumberInt(2), 
                                "direction" : "forward", 
                                "indexBounds" : { 
                                    "BusinessDate" : [
                                        "[\"20200703\", \"20200703\"]"
                                    ], 
                                    "SourceSystem" : [
                                        "[\"TRM\", \"TRM\"]"
                                    ], 
                                    "DtoName" : [
                                        "[\"CashflowInventory\", \"CashflowInventory\"]"
                                    ], 
                                    "PrimaryKey" : [
                                        "[MinKey, MaxKey]"
                                    ]
                                }
                            }
                        }
                    ]
                }
            }
        }, 
        { 
            "$lookup" : { 
                "from" : "FileRecon", 
                "as" : "FileRecon", 
                "localField" : "PrimaryKey", 
                "foreignField" : "PrimaryKey", 
                "unwinding" : { 
                    "preserveNullAndEmptyArrays" : true
                }
            }
        }, 
        { 
            "$match" : { 
                "$and" : [
                    { 
                        "$expr" : { 
                            "$ne" : [
                                "$Hash", 
                                "$FileRecon.Hash"
                            ]
                        }
                    }, 
                    { 
                        "FileRecon.Hash" : { 
                            "$exists" : true
                        }
                    }
                ]
            }
        }, 
        { 
            "$project" : { 
                "_id" : false, 
                "NoSQLSourceStructure" : { 
                    "Hash" : "$Hash", 
                    "LstHashColumns" : "$LstHashColumns"
                }, 
                "NoSQLFilePreparedStructure" : { 
                    "Hash" : "$FileRecon.Hash", 
                    "LstHashColumns" : "$FileRecon.LstHashColumns"
                }
            }
        }
    ], 
    "ok" : 1.0
}

Merge the first three match stages into a single match stage:-

$match:{
"DtoName" : "CashflowInventory",
"BusinessDate" : "20200703",
"SourceSystem" : "TRM"}

Same for the second set of matches used after unwind stage.

In the project stage, _id should be having a value “0” only.

Finally, you have set allowDiskUse to false, for an aggregation query involving 22 million documents. That’s fine, but you should understand that when you use the unwind stage, each and every key in the “Filerecon” array is converted into a single document and then aggregated upon. The 22million rows increases to quite a number when you’re doing that.

An alternative to this is to use a project stage instead of an unwind stage, which you can directly use without involving match stages(in some cases). However, you haven’t posted any details regarding the index you’re using, neither have you posted an example data set without which it becomes impossible for me to provide further suggestions.

1 Like