Optimization of pulling "latest record earlier than X" in a large (500 million record) collection using index

Say I have a 4 field collection:

sku (string)
warehouse (string)
record_time (ISODate)
qty (int)

I would like to query the data at a “snapshot” in time. For example, I may say “show me the qty of everything as of Jan 1, 2020”. The data is such that some combinations of sku + warehouse may not have any entries for days/months.

First, I see that mongo has a recommended practice here:
https://docs.mongodb.com/master/reference/operator/aggregation/group/#group-pipeline-optimization
(jira ticket https://jira.mongodb.org/browse/SERVER-9507)

So, I have something like the following index:

db.test.createIndex({
  sku: 1,
  warehouse: 1,
  record_time: -1,
});

So, to get a specific SKU + warehouse at certain time, I could run:

db.test.aggregate([
  {$match: {sku: 'a', warehouse: 'a', record_time: {$lte: ISODate('2020-01-01T00:00:00')}}},
  {$sort: {record_time: -1}},
  {$limit: 1}
])

If I want to get all distinct warehouse + SKU, then:

db.test.aggregate([
  {$match: {record_time: {$lte: ISODate('2020-01-01T00:00:00')}}},
  {$sort: {
    sku: 1,
    warehouse: 1,
    record_time: -1,
  }},
  {$group: {
    _id: {
      sku: '$sku',
      warehouse: '$warehouse',
    },
    qty: {'$first': '$qty'},
    last_record_time: {'$first': '$record_time'}
  }},
], {allowDiskUse: true});

Which would give me the data I want. However, this query runs VERY SLOWLY (10+ minutes on an m50 in atlas)

I can see one problem in that the $match is running off of “record_time”, which isn’t first order indexed, so it’s probably causing a large part of the slowdown. However, if I just remove the $match portion, the query takes just as long to run.

Based on my desired outcome, is there a different way to structure the data/indexes to allow for the query “give me the latest entry before ISODate X for every sku + warehouse combination” to be run in a reasonable timeframe?

Thank you for any advice.

I am using Mongo 4.2.3

Explain output from the aggregation

db.test.aggregate([
  {$sort: {
    sku: 1,
    warehouse: 1,
    record_time: -1,
  }},
  {$group: {
    _id: {
      sku: '$sku',
      warehouse: '$warehouse',
    },
    qty: {'$first': '$qty'},
    record_time: {'$first': '$record_time'}
  }},
], {allowDiskUse: true, explain: true});
{
	"stages" : [
		{
			"$cursor" : {
				"query" : {
					
				},
				"sort" : {
					"sku" : 1,
					"warehouse" : 1,
					"record_time" : -1
				},
				"fields" : {
					"qty" : 1,
					"record_time" : 1,
					"sku" : 1,
					"warehouse" : 1,
					"_id" : 0
				},
				"queryPlanner" : {
					"plannerVersion" : 1,
					"namespace" : "test.test",
					"indexFilterSet" : false,
					"parsedQuery" : {
						
					},
					"queryHash" : "E47CEE36",
					"planCacheKey" : "E47CEE36",
					"winningPlan" : {
						"stage" : "FETCH",
						"inputStage" : {
							"stage" : "IXSCAN",
							"keyPattern" : {
								"sku" : 1,
								"warehouse" : 1,
								"record_time" : -1
							},
							"indexName" : "sku_1_warehouse_1_record_time_-1",
							"isMultiKey" : false,
							"multiKeyPaths" : {
								"sku" : [ ],
								"warehouse" : [ ],
								"record_time" : [ ]
							},
							"isUnique" : false,
							"isSparse" : false,
							"isPartial" : false,
							"indexVersion" : 2,
							"direction" : "forward",
							"indexBounds" : {
								"sku" : [
									"[MinKey, MaxKey]"
								],
								"warehouse" : [
									"[MinKey, MaxKey]"
								],
								"record_time" : [
									"[MaxKey, MinKey]"
								]
							}
						}
					},
					"rejectedPlans" : [ ]
				}
			}
		},
		{
			"$group" : {
				"_id" : {
					"sku" : "$sku",
					"warehouse" : "$warehouse"
				},
				"qty" : {
					"$first" : "$qty"
				},
				"last_record_time" : {
					"$first" : "$record_time"
				}
			}
		}
	],
}

I tried adding qty to the index so that the query could utilize a “covered index”, but it had minimal impact.

{
	"stages" : [
		{
			"$cursor" : {
				"query" : {
					
				},
				"sort" : {
					"sku" : 1,
					"warehouse" : 1,
					"record_time" : -1
				},
				"fields" : {
					"qty" : 1,
					"record_time" : 1,
					"sku" : 1,
					"warehouse" : 1,
					"_id" : 0
				},
				"queryPlanner" : {
					"plannerVersion" : 1,
					"namespace" : "test.test",
					"indexFilterSet" : false,
					"parsedQuery" : {
						
					},
					"queryHash" : "28987361",
					"planCacheKey" : "28987361",
					"winningPlan" : {
						"stage" : "PROJECTION_COVERED",
						"transformBy" : {
							"qty" : 1,
							"record_time" : 1,
							"sku" : 1,
							"warehouse" : 1,
							"_id" : 0
						},
						"inputStage" : {
							"stage" : "IXSCAN",
							"keyPattern" : {
								"sku" : 1,
								"warehouse" : 1,
								"record_time" : -1,
								"qty" : 1
							},
							"indexName" : "sku_1_warehouse_1_record_time_-1_qty_1",
							"isMultiKey" : false,
							"multiKeyPaths" : {
								"sku" : [ ],
								"warehouse" : [ ],
								"record_time" : [ ],
								"qty" : [ ]
							},
							"isUnique" : false,
							"isSparse" : false,
							"isPartial" : false,
							"indexVersion" : 2,
							"direction" : "forward",
							"indexBounds" : {
								"sku" : [
									"[MinKey, MaxKey]"
								],
								"warehouse" : [
									"[MinKey, MaxKey]"
								],
								"record_time" : [
									"[MaxKey, MinKey]"
								],
								"qty" : [
									"[MinKey, MaxKey]"
								]
							}
						}
					},
					"rejectedPlans" : [ ]
				}
			}
		},
		{
			"$group" : {
				"_id" : {
					"sku" : "$sku",
					"warehouse" : "$warehouse"
				},
				"qty" : {
					"$first" : "$qty"
				},
				"record_time" : {
					"$first" : "$record_time"
				}
			}
		}
	]
}

If I want to get all distinct warehouse + SKU, then:

db.test.aggregate([
  { $match: { record_time: { $lte: ISODate('2020-01-01T00:00:00') } } },
...

With the available index { sku: 1, warehouse: 1, record_time: -1 } the aggregation query will not be able to apply it for the $match stage. Having the $match stage at the beginning of the pipeline is very good, but without an index it is a very slow query with all that data.

I think defining another index only on the record_time field is the right approach. I am sure there will be performance gain (how much improvement depends upon the data). I suggest you try this approach on a sample data set. Generate the query plans before and after creating the new index, and use the “executionStats” mode with the explain().

Reference: Compound Index prefixes

Yes, setting up an index on record_time will be a great help.

Then I don’t think you need to sort en sku and warehouse but only on record_time because what is important is to get values in reverse record_time values. For sku and warehouse, the $group will do the job.

So you may try :

db.test.createIndex({ record_time: 1 })

db.test.aggregate([ 
  {$match: {record_time: {$lte: ISODate('2020-01-01T00:00:00')}}}, 
  {$sort: { record_time: -1 }}, 
  {$group: { _id: { sku: '$sku', warehouse: '$warehouse', }, 
             qty: {'$first': '$qty'}, 
             last_record_time: {'$first': '$record_time'} }} ], 
{allowDiskUse: true});

@RemiJ @Prasad_Saya
Ya, I should have not even mentioned the $match stage you are talking about. If you re-read my post, you can see that the query slowness is not because of this match:

In the explain plans that follow, I removed the $match anyway.

Regardless, thanks for at least replying. I ended up building a process to recombine data in hourly snapshots, and “cross fill” records for datehours during which there was no delta, so that the query could just be run against a single hour without sorting to get the latest record.

I’d be interested to hear if anyone has a similar type of data set and use case (insert-only collection of events, and grouping by “last event per group id before time X”).

Hi nefiga,

I have similar issue on a collection with 2.6 millions of documents

I want to group plan (with at 10 electrics elements or more) per user with the last plan updated at first position in the group

The first $match returns approximately 75.000 documents

I created indexes data.summary.elecScore_1 and updated_at_-1

cluster info: M30 (replica set 3 nodes)

db.getCollection("test").explain("executionStats").aggregate(
    [
        { 
            "$match" : { 
                "data.summary.elecScore" : { 
                    "$gte" : 10
                }
            }
        },
        { 
            "$sort" : { 
                "updated_at" : -1
            }
        }, 
        { 
            "$group" : { 
                "_id" : "$user.user_id", 
                "plan_id" : { 
                    "$first" : "$plan_id"
                }, 
                "habitable_area" : { 
                    "$first" : "$data.summary.habitableArea"
                }, 
                "last_created_at" : { 
                    "$first" : "$created_at"
                }, 
                "last_updated_at" : { 
                    "$first" : "$updated_at"
                }, 
                "count_plans" : { 
                    "$sum" : NumberInt(1)
                }, 
                "total_elecs" : { 
                    "$sum" : "$data.summary.elecScore"
                }
            }
        }, 
        { 
            "$project" : { 
                "_id" : 0, 
                "user_id" : "$_id", 
                "plan_id" : 1, 
                "habitable_area" : 1, 
                "last_created_at" : 1, 
                "last_updated_at" : 1, 
                "count_plans" : 1, 
                "total_elecs" : 1
            }
        }
    ], 
    { 
        "allowDiskUse" : true
    }
);

explain:

{
	"stages" : [
		{
			"$cursor" : {
				"query" : {
					"data.summary.elecScore" : {
						"$gte" : 10
					}
				},
				"sort" : {
					"updated_at" : -1
				},
				"fields" : {
					"created_at" : 1,
					"data.summary.elecScore" : 1,
					"data.summary.habitableArea" : 1,
					"plan_id" : 1,
					"updated_at" : 1,
					"user.user_id" : 1,
					"_id" : 0
				},
				"queryPlanner" : {
					"plannerVersion" : 1,
					"namespace" : "kazadata.test",
					"indexFilterSet" : false,
					"parsedQuery" : {
						"data.summary.elecScore" : {
							"$gte" : 10
						}
					},
					"winningPlan" : {
						"stage" : "FETCH",
						"filter" : {
							"data.summary.elecScore" : {
								"$gte" : 10
							}
						},
						"inputStage" : {
							"stage" : "IXSCAN",
							"keyPattern" : {
								"updated_at" : 1
							},
							"indexName" : "updated_at_1",
							"isMultiKey" : false,
							"multiKeyPaths" : {
								"updated_at" : [ ]
							},
							"isUnique" : false,
							"isSparse" : false,
							"isPartial" : false,
							"indexVersion" : 2,
							"direction" : "backward",
							"indexBounds" : {
								"updated_at" : [
									"[MaxKey, MinKey]"
								]
							}
						}
					},
					"rejectedPlans" : [ ]
				},
				"executionStats" : {
					"executionSuccess" : true,
					"nReturned" : 76865,
					"executionTimeMillis" : 732025,
					"totalKeysExamined" : 2528539,
					"totalDocsExamined" : 2528539,
					"executionStages" : {
						"stage" : "FETCH",
						"filter" : {
							"data.summary.elecScore" : {
								"$gte" : 10
							}
						},
						"nReturned" : 76865,
						"executionTimeMillisEstimate" : 715211,
						"works" : 2528540,
						"advanced" : 76865,
						"needTime" : 2451674,
						"needYield" : 0,
						"saveState" : 39376,
						"restoreState" : 39376,
						"isEOF" : 1,
						"invalidates" : 0,
						"docsExamined" : 2528539,
						"alreadyHasObj" : 0,
						"inputStage" : {
							"stage" : "IXSCAN",
							"nReturned" : 2528539,
							"executionTimeMillisEstimate" : 6944,
							"works" : 2528540,
							"advanced" : 2528539,
							"needTime" : 0,
							"needYield" : 0,
							"saveState" : 39376,
							"restoreState" : 39376,
							"isEOF" : 1,
							"invalidates" : 0,
							"keyPattern" : {
								"updated_at" : 1
							},
							"indexName" : "updated_at_1",
							"isMultiKey" : false,
							"multiKeyPaths" : {
								"updated_at" : [ ]
							},
							"isUnique" : false,
							"isSparse" : false,
							"isPartial" : false,
							"indexVersion" : 2,
							"direction" : "backward",
							"indexBounds" : {
								"updated_at" : [
									"[MaxKey, MinKey]"
								]
							},
							"keysExamined" : 2528539,
							"seeks" : 1,
							"dupsTested" : 0,
							"dupsDropped" : 0,
							"seenInvalidated" : 0
						}
					}
				}
			}
		},
		{
			"$group" : {
				"_id" : "$user.user_id",
				"plan_id" : {
					"$first" : "$plan_id"
				},
				"habitable_area" : {
					"$first" : "$data.summary.habitableArea"
				},
				"last_created_at" : {
					"$first" : "$created_at"
				},
				"last_updated_at" : {
					"$first" : "$updated_at"
				},
				"count_plans" : {
					"$sum" : {
						"$const" : 1
					}
				},
				"total_elecs" : {
					"$sum" : "$data.summary.elecScore"
				}
			}
		},
		{
			"$project" : {
				"_id" : false,
				"count_plans" : true,
				"habitable_area" : true,
				"plan_id" : true,
				"last_created_at" : true,
				"total_elecs" : true,
				"last_updated_at" : true,
				"user_id" : "$_id"
			}
		}
	],
	"serverInfo" : {
		"host" : "kdt-0-shard-00-00-xoqdb.gcp.mongodb.net",
		"port" : 27017,
		"version" : "4.0.18",
		"gitVersion" : "6883bdfb8b8cff32176b1fd176df04da9165fd67"
	},
	"ok" : 1,
	"operationTime" : Timestamp(1592560381, 1),
	"$clusterTime" : {
		"clusterTime" : Timestamp(1592560381, 1),
		"signature" : {
			"hash" : BinData(0,"4CBPnwnRqeD0qZJYLu1kavioahc="),
			"keyId" : NumberLong("6790057799909900289")
		}
	}
} 

It took around 20 minutes to execute this query, the FETCH stage seems to be very very slow
My documents have some nested object

Did I miss something ?

Thanks for your help !

Hello Fabien,

The time taken is because the index on the data.summary.elecScore is not used in the first $match stage. As the plan shows only the sort stage used the index defined on the updated_at field. The query had to scan all documents to filter the 75k documents.

To get an idea about how to use indexes with filter and sort stages see: Use Indexes to Sort Query Results. The sub-topic Sort and Non-prefix Subset of an Index is related to this aggregation query.

P.S. You may want to post a sample document from your collection.

Thanks for your reply @Prasad_Saya,

I created a compound index data.summary.elecScore_1_updated_at_1

and tried to do this simple case :

       db.getCollection('test')
        .explain('executionStats')
        .aggregate([
            {
                $match: { "data.summary.elecScore": { $gte: 10 } }
            },
            {
                $sort: { "updated_at": -1 }
            },
            {
                allowDiskUse: true,
                explain: true
            }
        ])

and I got

{
	"stages" : [
		{
			"$cursor" : {
				"query" : {
					"data.summary.elecScore" : {
						"$gte" : 10
					}
				},
				"sort" : {
					"updated_at" : -1
				},
				"queryPlanner" : {
					"plannerVersion" : 1,
					"namespace" : "kazadata.test",
					"indexFilterSet" : false,
					"parsedQuery" : {
						"data.summary.elecScore" : {
							"$gte" : 10
						}
					},
					"winningPlan" : {
						"stage" : "FETCH",
						"filter" : {
							"data.summary.elecScore" : {
								"$gte" : 10
							}
						},
						"inputStage" : {
							"stage" : "IXSCAN",
							"keyPattern" : {
								"updated_at" : 1
							},
							"indexName" : "updated_at_1",
							"isMultiKey" : false,
							"multiKeyPaths" : {
								"updated_at" : [ ]
							},
							"isUnique" : false,
							"isSparse" : false,
							"isPartial" : false,
							"indexVersion" : 2,
							"direction" : "backward",
							"indexBounds" : {
								"updated_at" : [
									"[MaxKey, MinKey]"
								]
							}
						}
					},
					"rejectedPlans" : [ ]
				}
			}
		}
	],
	"serverInfo" : {
		"host" : "kdt-0-shard-00-00-xoqdb.gcp.mongodb.net",
		"port" : 27017,
		"version" : "4.0.18",
		"gitVersion" : "6883bdfb8b8cff32176b1fd176df04da9165fd67"
	},
	"ok" : 1,
	"operationTime" : Timestamp(1592574905, 1),
	"$clusterTime" : {
		"clusterTime" : Timestamp(1592574905, 1),
		"signature" : {
			"hash" : BinData(0,"hZpCbjUQMejIHMoEY5EddyhKrmU="),
			"keyId" : NumberLong("6790057799909900289")
		}
	}
}

This is a sample document:

{ 
    "_id" : ObjectId("5eeb1e90bf7d06673af6cad9"), 
    "plan_id" : NumberInt(11153473), 
    "partner_id" : NumberInt(8), 
    "created_at" : ISODate("2020-06-18T07:57:43.000+0000"), 
    "updated_at" : ISODate("2020-06-18T07:57:43.000+0000"), 
    "deleted_at" : null, 
    "data" : {
        "floors" : [
            {
                "index" : NumberInt(0), 
                "customName" : null
            }, 
            {
                "index" : NumberInt(1), 
                "customName" : "Comble"
            }
        ], 
        "rooms" : [
            {
                "floorIndex" : NumberInt(0), 
                "roomIndex" : NumberInt(0), 
                "customName" : null, 
                "area" : 5.58, 
                "habitableArea" : 5.58, 
                "kind" : "terraceext", 
                "id" : "ac1ff9b3-6861-4e8a-907c-5017764b823c", 
                "surfaceType" : "outside", 
                "isInside" : false, 
                "isHabitable" : false, 
                "isMain" : false, 
                "isOutside" : true, 
                "isAnnex" : false
            }, 
            {
                "floorIndex" : NumberInt(0), 
                "roomIndex" : NumberInt(1), 
                "customName" : null, 
                "area" : 5.05, 
                "habitableArea" : 5.05, 
                "kind" : "bathRoom", 
                "id" : "4927dd48-527c-4c9c-810b-f2eac81de59b", 
                "surfaceType" : "habitable", 
                "isInside" : true, 
                "isHabitable" : true, 
                "isMain" : false, 
                "isOutside" : false, 
                "isAnnex" : false
            }, 
            {
                "floorIndex" : NumberInt(0), 
                "roomIndex" : NumberInt(2), 
                "customName" : null, 
                "area" : 15.91, 
                "habitableArea" : 15.91, 
                "kind" : "livingdiningRoom", 
                "id" : "74f7d362-d9fb-4195-aa92-1cd97331bb0c", 
                "surfaceType" : "habitable", 
                "isInside" : true, 
                "isHabitable" : true, 
                "isMain" : true, 
                "isOutside" : false, 
                "isAnnex" : false
            }, 
            {
                "floorIndex" : NumberInt(0), 
                "roomIndex" : NumberInt(3), 
                "customName" : null, 
                "area" : 16.43, 
                "habitableArea" : 16.43, 
                "kind" : "kitchen", 
                "id" : "54ac840d-053c-48a2-83d0-084f440695bd", 
                "surfaceType" : "habitable", 
                "isInside" : true, 
                "isHabitable" : true, 
                "isMain" : false, 
                "isOutside" : false, 
                "isAnnex" : false
            }, 
            {
                "floorIndex" : NumberInt(0), 
                "roomIndex" : NumberInt(4), 
                "customName" : null, 
                "area" : 28.88, 
                "habitableArea" : 28.88, 
                "kind" : "garage", 
                "id" : "e50cadf4-878d-425e-8bb1-6713f490c6ea", 
                "surfaceType" : "annex", 
                "isInside" : true, 
                "isHabitable" : false, 
                "isMain" : false, 
                "isOutside" : false, 
                "isAnnex" : true
            }, 
            {
                "floorIndex" : NumberInt(1), 
                "roomIndex" : NumberInt(5), 
                "customName" : null, 
                "area" : 28.64, 
                "habitableArea" : 28.64, 
                "kind" : "bedroom", 
                "id" : "05cdd678-9747-4285-8b0e-be8e68b5ff93", 
                "surfaceType" : "habitable", 
                "isInside" : true, 
                "isHabitable" : true, 
                "isMain" : true, 
                "isOutside" : false, 
                "isAnnex" : false
            }, 
            {
                "floorIndex" : NumberInt(1), 
                "roomIndex" : NumberInt(6), 
                "customName" : null, 
                "area" : 3.7, 
                "habitableArea" : 3.7, 
                "kind" : "hopper", 
                "id" : "78245c6e-2cba-4f80-855f-6abcd8940df2", 
                "surfaceType" : "building", 
                "isInside" : false, 
                "isHabitable" : false, 
                "isMain" : false, 
                "isOutside" : false, 
                "isAnnex" : false
            }
        ], 
        "models" : [
            {
                "catalogId" : "model-6926", 
                "fromSplitGroupId" : null, 
                "addedAt" : ISODate("2020-06-13T14:37:24.753+0000"), 
                "width" : 138.5, 
                "height" : NumberInt(42), 
                "depth" : NumberInt(45), 
                "isDimensionEdited" : true, 
                "isMaterialEdited" : true, 
                "updatedAt" : ISODate("2020-06-18T07:17:46.693+0000"), 
                "updatedMaterials" : [
                    "material-701", 
                    "material-painting-component-Luxens-WHITE-2"
                ], 
                "position" : [
                    147.24388122558594, 
                    -577.5023803710938
                ], 
                "rotation" : {
                    "x" : NumberInt(0), 
                    "y" : -0.00000000000000024492937051703357, 
                    "z" : NumberInt(0)
                }, 
                "roomIndex" : NumberInt(2), 
                "roomKind" : "livingdiningRoom"
            }, 
            ...
        ], 
        "materials" : [
            {
                "isModified" : false, 
                "typeOfMerge" : null, 
                "orig" : "catalog", 
                "catalogId" : "material-559", 
                "area" : 5.58, 
                "surface" : "floor", 
                "addedAt" : ISODate("2020-06-14T09:02:13.221+0000"), 
                "roomIndex" : NumberInt(0), 
                "serializations" : [

                ], 
                "roomKind" : "terraceext"
            }, 
            ...
        ], 
        "elecs" : [
            {
                "products" : [

                ], 
                "quantities" : [

                ], 
                "firstRoomIndex" : NumberInt(0), 
                "roomIdsByFloor" : [
                    "ac1ff9b3-6861-4e8a-907c-5017764b823c", 
                    "4927dd48-527c-4c9c-810b-f2eac81de59b", 
                    "74f7d362-d9fb-4195-aa92-1cd97331bb0c", 
                    "54ac840d-053c-48a2-83d0-084f440695bd", 
                    "e50cadf4-878d-425e-8bb1-6713f490c6ea"
                ], 
                "roomKind" : null
            }, 
            {
                "products" : [

                ], 
                "quantities" : [

                ], 
                "firstRoomIndex" : NumberInt(5), 
                "roomIdsByFloor" : [
                    "05cdd678-9747-4285-8b0e-be8e68b5ff93", 
                    "78245c6e-2cba-4f80-855f-6abcd8940df2"
                ], 
                "roomKind" : null
            }
        ], 
        "buildingBlocks" : {
            "openings" : [
                {
                    "kind" : "opening.casementwindow", 
                    "addedAt" : ISODate("2020-06-13T14:26:08.046+0000"), 
                    "updatedAt" : ISODate("2020-06-18T07:23:03.017+0000"), 
                    "width" : NumberInt(80), 
                    "height" : NumberInt(95), 
                    "nbCasement" : NumberInt(2), 
                    "hasWindow" : null, 
                    "thirdFixed" : null, 
                    "atelierStyle" : false
                }, 
                {
                    "kind" : "opening.door", 
                    "addedAt" : ISODate("2020-06-14T08:16:45.601+0000"), 
                    "updatedAt" : ISODate("2020-06-18T07:22:09.224+0000"), 
                    "width" : NumberInt(90), 
                    "height" : NumberInt(215), 
                    "nbCasement" : NumberInt(1), 
                    "hasWindow" : null, 
                    "thirdFixed" : null, 
                    "atelierStyle" : false
                }, 
                {
                    "kind" : "opening.door", 
                    "addedAt" : ISODate("2020-06-14T11:11:26.153+0000"), 
                    "updatedAt" : ISODate("2020-06-18T07:22:14.704+0000"), 
                    "width" : NumberInt(90), 
                    "height" : NumberInt(215), 
                    "nbCasement" : NumberInt(1), 
                    "hasWindow" : null, 
                    "thirdFixed" : null, 
                    "atelierStyle" : false
                }, 
                {
                    "kind" : "opening.door", 
                    "addedAt" : ISODate("2020-06-14T13:06:50.586+0000"), 
                    "updatedAt" : ISODate("2020-06-18T07:22:21.611+0000"), 
                    "width" : NumberInt(90), 
                    "height" : NumberInt(215), 
                    "nbCasement" : NumberInt(1), 
                    "hasWindow" : null, 
                    "thirdFixed" : null, 
                    "atelierStyle" : false
                }, 
                {
                    "kind" : "opening.fixedwindow", 
                    "addedAt" : ISODate("2020-06-15T08:28:50.610+0000"), 
                    "updatedAt" : ISODate("2020-06-18T07:47:18.607+0000"), 
                    "width" : NumberInt(60), 
                    "height" : NumberInt(60), 
                    "nbCasement" : NumberInt(1), 
                    "hasWindow" : null, 
                    "thirdFixed" : null, 
                    "atelierStyle" : false
                }, 
                {
                    "kind" : "opening.door", 
                    "addedAt" : ISODate("2020-06-17T09:07:57.934+0000"), 
                    "updatedAt" : ISODate("2020-06-17T09:55:30.324+0000"), 
                    "width" : NumberInt(240), 
                    "height" : NumberInt(200), 
                    "nbCasement" : NumberInt(2), 
                    "hasWindow" : null, 
                    "thirdFixed" : null, 
                    "atelierStyle" : false
                }, 
                {
                    "kind" : "opening.door", 
                    "addedAt" : ISODate("2020-06-14T08:16:45.601+0000"), 
                    "updatedAt" : ISODate("2020-06-18T07:21:51.487+0000"), 
                    "width" : NumberInt(90), 
                    "height" : NumberInt(215), 
                    "nbCasement" : NumberInt(1), 
                    "hasWindow" : null, 
                    "thirdFixed" : null, 
                    "atelierStyle" : false
                }, 
                {
                    "kind" : "opening.casementwindow", 
                    "addedAt" : ISODate("2020-06-13T14:26:08.046+0000"), 
                    "updatedAt" : ISODate("2020-06-18T07:23:32.771+0000"), 
                    "width" : NumberInt(80), 
                    "height" : NumberInt(95), 
                    "nbCasement" : NumberInt(2), 
                    "hasWindow" : null, 
                    "thirdFixed" : null, 
                    "atelierStyle" : false
                }, 
                {
                    "kind" : "opening.casementwindow", 
                    "addedAt" : ISODate("2020-06-14T08:38:39.199+0000"), 
                    "updatedAt" : ISODate("2020-06-18T07:48:34.485+0000"), 
                    "width" : NumberInt(100), 
                    "height" : NumberInt(60), 
                    "nbCasement" : NumberInt(2), 
                    "hasWindow" : null, 
                    "thirdFixed" : null, 
                    "atelierStyle" : false
                }, 
                {
                    "kind" : "opening.casementwindow", 
                    "addedAt" : ISODate("2020-06-14T08:38:39.199+0000"), 
                    "updatedAt" : ISODate("2020-06-18T07:49:31.403+0000"), 
                    "width" : NumberInt(100), 
                    "height" : NumberInt(60), 
                    "nbCasement" : NumberInt(2), 
                    "hasWindow" : null, 
                    "thirdFixed" : null, 
                    "atelierStyle" : false
                }
            ], 
            "walls" : {
                "2" : 15.955894497658939, 
                "7" : 16.42514439746712, 
                "8" : 6.761795964841681, 
                "20" : 47.59999994721013
            }, 
            "swimmingPools" : [

            ], 
            "stairways" : [
                {
                    "addedAt" : ISODate("2020-06-14T12:22:36.854+0000"), 
                    "bearing" : true, 
                    "height" : NumberInt(270), 
                    "position" : [
                        282.6345520019531, 
                        -29.525814056396484
                    ], 
                    "railA" : false, 
                    "railB" : false, 
                    "stairDepth" : NumberInt(25), 
                    "stairOffset" : NumberInt(1), 
                    "stairThickness" : NumberInt(3), 
                    "stairWidth" : NumberInt(90), 
                    "stickSpacement" : NumberInt(11), 
                    "type" : "stairway.stairway-straight", 
                    "updatedAt" : ISODate("2020-06-17T09:05:31.072+0000")
                }
            ], 
            "roofV1" : NumberInt(0), 
            "roofV2" : NumberInt(8), 
            "elecScore" : NumberInt(3)
        }, 
        "stats" : {
            "hasCatalogEvents" : true, 
            "hasShoppingList" : true, 
            "models" : [
                {
                    "click" : NumberInt(1), 
                    "contextBrandClick" : NumberInt(0), 
                    "catalogBrandClick" : NumberInt(0), 
                    "shoppingView" : NumberInt(0), 
                    "shoppingClick" : NumberInt(0), 
                    "shoppingPrint" : NumberInt(0), 
                    "catalogId" : "model-6926"
                }, 
                ...
            ], 
            "modelTags" : [
                {
                    "click" : NumberInt(2), 
                    "tagId" : "model-tags-78"
                }, 
                ...a
            ]
        }, 
        "id" : null, 
        "title" : "PROJET 2502", 
        "summary" : {
            "nbModel" : NumberInt(32), 
            "nbMaterial" : NumberInt(18), 
            "nbFloor" : NumberInt(2), 
            "nbRoom" : NumberInt(5), 
            "nbMainRoom" : NumberInt(2), 
            "totalArea" : 94.91, 
            "outsideArea" : 5.58, 
            "habitableArea" : 66.03, 
            "nbStairway" : NumberInt(1), 
            "nbSwimmingPool" : NumberInt(0), 
            "elecScore" : NumberInt(3), 
            "nbRoofV2" : NumberInt(8), 
            "nbRoofV1" : NumberInt(0), 
            "hasShopping" : true
        }, 
        "geolocation" : null, 
        "metadata" : {
            "uuid" : "5b600380-203b-4c03-bd2b-1b6bb93aeb9e", 
            "kazaplanVersion" : "3.58.8", 
            "version" : "3.58.8", 
            "creationDate" : ISODate("2020-06-13T14:23:18.932+0000"), 
            "lastUpdate" : ISODate("2020-06-18T07:57:44.478+0000")
        }
    }, 
    "user" : {
        "user_id" : NumberInt(2860431), 
        "email" : "daniels.shopping@outlook.com", 
        "first_name" : null, 
        "last_name" : null, 
        "country" : "fr", 
        "created_at" : ISODate("2020-06-10T06:56:33.000+0000"), 
        "updated_at" : ISODate("2020-06-10T06:56:33.000+0000")
    }
}

So, I tried to remove index I created previously updated_at_1 and now it works, it use compound index.

My question is: Should I force index that I want to use with hint() in this case ?

but this aggregate above, is still, very very very slow…

Thanks !

It seems that the slow process is fetch with a lot of “complex” documents, like my previous example… I thought it was not a problem for mongo… but it’s a limitation for me… :confused:

Using the hint is considered as not a best practice. I suggest you have just one index, i.e., on the field “data.summary.elecScore”, and remove remaining indexes. See what the explain shows. This index will be applied in the $match stage, and I think it will get a better performance.

You should to use that $match stage that filters document by ‘record_time’, because it can cut out significant amount of docs. Also, you should add index { record_time: 1 } to speed up that $match stage.

As @RemiJ already stated:

You only need index on ‘record_time’ field.
I did a test on your example documents on 10 Million collection size, and having only this index:

{ record_time: 1 }

instead of this:

{
  sku: 1,
  warehouse: 1,
  record_time: -1,
},

Gives about 25% of boost to get the same output.

There is nothing match to optimize in your pipeline. You just need to add proper indexes. Here is the aggregation pipeline, that would be the most performant in your case:

db.test.aggregate([
  // match only the docs, needed by the pipeline
  // record_time should have index for better performance
  {
    $match: {
      record_time: {
        $lte: new Date('2018-06-24T04:45:47.947Z'),
      },
    },
  },
  // it is important to do a reverse-sort here
  // so, $group stage does efficient document pick
  {
    $sort: {
      record_time: -1,
    },
  },
  {
    $group: {
      _id: {
        sku: '$sku',
        warehouse: '$warehouse',
      },
      doc: {
        $first: '$$CURRENT',
      },
    },
  },
])

A different way of getting the desired result would be:

  1. add new collection ‘test2’, that would store latest record for a combination: before_date + warehouse + sku.
{
  before_date: ISODate | String,
  sku: String,
  warehouse: String,
  doc_id: ObjectId,
}
  1. create index on
{
  beforeDate: ISODate | String,
  sku: String,
  warehouse: String
}
  1. With each write to ‘test’ collection, you will have to add consecutive write to ‘test2’ collection:
db.test2.updateOne(
  {
    beforeDate: <d>,
    warehouse: <w>,
    sku: <s>,
  }, 
  { 
    $set: { 
      doc_Id: <ObjectId>, 
    }
  }, 
  { 
    upsert: true,
  }
);
  1. Extract the latest documents before specified date like this:
db.test2.aggregate([
  {
    $match: {
      before_date: <d>,
    },
  },
  {
    $group: {
      docs_ids: {
        $addToSet: '$doc_id',
      }
    }
  },
  {
    $lookup: {
      from: 'test',
      localField: '$docs_ids',
      foreignField: '_id',
      as: 'latest',
    }
  },
  {
    $unwind: '$latest',
  },
  {
    $replaceRoot: {
      newRoot: '$latest',
    }
  }
]);

You can achieve even more faster queries, if you add the latest record as nested object in ‘test2’ document, like this:

{
  before_date: ISODate | String,
  sku: String,
  warehouse: String,
  doc_id: { qty: Number, record_time: ISODate },  
}

with this you can use simple .find() operator to get the desired result:

db.test2.find({ before_date: <d> });