Hi
I need some advise. I have aggregation pipeline in MongoDB. I try to run this pipeline with small number of document like 200K documents, it’s seem to ok.
Then I try it with bigger sample ( longer timestamp period in $match) and I found that there is an error said
‘$push used too much memory and cannot spill to disk. Memory limit: 104857600 bytes’, ‘code’: 146, ‘codeName’: ‘ExceededMemoryLimit’
So I google and see that $push is limited memory to 100 MB. Then I increased memory by using this command
db.adminCommand({setParameter: 1, internalQueryMaxPushBytes:2048576000});
db.adminCommand({setParameter: 1, internalQueryMaxBlockingSortMemoryUsageBytes:2048576000});
Then run the same query then it gave me longer running time then different error message like
{
"ok" : 0.0,
"errmsg" : "BufBuilder attempted to grow() to 67108865 bytes, past the 64MB limit.",
"code" : 13548.0,
"codeName" : "Location13548"
}
It’s not always and permanent error, in same period of timestamp as this
"$match": {
"$and":
[
{ "TID": /XP/ },
{ "timestamp": { "$gte": "2021-05-15 00:00:00" } },
{ "timestamp": { "$lte": "2021-05-25 23:59:59" } },
{ "Col": /^\d.*$/ },
{ "MT0": /^\d.*$/ },
{ "Row": /^\d.*$/ },
{ "TRM": /^\d.*$/ },
{ "WRLFTR_0": /^\d.*$/ },
{ "WRLFTR_1": /^\d.*$/ }
]
}
which 43,000,000 documents from $match stage I can get result from this code with no error message.
But In some period of timestamp
"$match": {
"$and":
[
{ "TID": /XP/ },
{ "timestamp": { "$gte": "2021-05-27 00:00:00" } },
{ "timestamp": { "$lte": "2021-05-27 23:59:59" } },
{ "Col": /^\d.*$/ },
{ "MT0": /^\d.*$/ },
{ "Row": /^\d.*$/ },
{ "TRM": /^\d.*$/ },
{ "WRLFTR_0": /^\d.*$/ },
{ "WRLFTR_1": /^\d.*$/ }
]
}
which 7,000,000 documents from $match stage I got this error “BufBuilder attempted to grow() to 67108865 bytes, past the 64MB limit.”
Here is the code, it’s everything I just change parameter name from actual to something else. Could you please give some acvise if spot some typo or same items that need to improve. thanks.
db.collection.aggregate(
[
{
"$match": {
"$and":
[
{ "TID": /XP/ },
{ "timestamp": { "$gte": "2021-05-15 00:00:00" } },
{ "timestamp": { "$lte": "2021-05-25 23:59:59" } },
{ "Col": /^\d.*$/ },
{ "MT0": /^\d.*$/ },
{ "Row": /^\d.*$/ },
{ "TRM": /^\d.*$/ },
{ "WRLFTR_0": /^\d.*$/ },
{ "WRLFTR_1": /^\d.*$/ }
]
}
},
{
"$group": {
"_id": {
"TID": "$TID",
"Opt": "$Opt",
"DSN1": "$DSN1",
"DSN2": "$DSN2",
"Col": "$Col",
"Row": "$Row",
"CSN": "$CSN",
"PID": {
"$switch": {
"branches": [
{
"case": { "$eq": [ "$Opt","S5" ] }, "then": "6350"
},
{
"case": { "$eq": [ "$Opt","FUNC" ] }, "then": "6400"
},
{
"case": { "$eq": [ "$Opt","STR" ] }, "then": "6600"
},
{
"case": { "$eq": [ "$Opt","FIN" ] }, "then": "6800"
},
{
"case": { "$eq": [ "$Opt", "FTG" ] }, "then": "9000"
}
],
"default": "$_id.Opt"
}
},
"CID": {
"$switch": {
"branches": [
{
"case": { "$and":
[
{ "$gte": [ {"$toDecimal": "$Col"}, 1.0 ] },
{ "$lte": [ {"$toDecimal": "$Col"}, 12.0 ] }
]
},
"then": 1.0
},
{
"case": { "$and":
[
{ "$gte": [ {"$toDecimal": "$Col"}, 13.0 ] },
{ "$lte": [ {"$toDecimal": "$Col"}, 24.0 ] }
]
},
"then": 2.0
},
{
"case": { "$and":
[
{ "$gte": [ {"$toDecimal": "$Col"}, 25.0 ] },
{ "$lte": [ {"$toDecimal": "$Col"}, 36.0 ] }
]
},
"then": 3.0
},
{
"case": { "$and":
[
{ "$gte": [ {"$toDecimal": "$Col"}, 37.0 ] },
{ "$lte": [ {"$toDecimal": "$Col"}, 48.0 ] }
]
},
"then": 4.0
},
{
"case": { "$and":
[
{ "$gte": [ {"$toDecimal": "$Col"}, 49.0 ] },
{ "$lte": [ {"$toDecimal": "$Col"}, 60.0 ] }
]
},
"then": 5.0
},
{
"case": { "$and":
[
{ "$gte": [ {"$toDecimal": "$Col"}, 61.0 ] },
{ "$lte": [ {"$toDecimal": "$Col"}, 72.0 ] }
]
},
"then": 6.0
},
],
"default": 0.0
}
},
},
"details": {
"$push": { // partition over
"MT0": "$MT0",
"WRLFTR_0": "$WRLFTR_0",
"WRLFTR_1": "$WRLFTR_1",
"TRM": "$TRM",
"timestamp": "$timestamp"
}
},
}
},
{
"$sort": {
"_id": 1.0,
"timestamp": 1.0 // order by timestamp
}
},
{
"$unwind": {
"path": "$details",
"includeArrayIndex": "array_idx" // row_number
}
},
{
"$match": { // select only order 30 - 180 in array
"$and": [
{
"array_idx": {
"$gte": 30.0
}
},
{
"array_idx": {
"$lte": 180.0
}
}
]
}
},
{
"$sort": { // sort TRM for percentile calculation
"details.TRM": 1.0
}
},
{
"$group": { // group parameter back to array by partition "_id"
"_id": "$_id",
"timestamp": { "$push": {"$toDate":"$details.timestamp"} },
"MT0": { "$push": {"$toDouble":"$details.MT0"} },
"TRM": { "$push": {"$toDouble":"$details.TRM"} },
"WRLFTR_0": { "$push": {"$toDouble":"$details.WRLFTR_0"} },
"WRLFTR_1": { "$push": {"$toDouble":"$details.WRLFTR_1"} },
}
},
{ // reporting "_id" and calculating
"$project": {
"_id": 0,
"TID": "$_id.TID",
"Col": {
"$toDecimal": "$_id.Col"
},
"Row": {
"$toDecimal": "$_id.Row"
},
"CSN": {
"$substr": [
"$_id.CSN",
2.0,
-6.0
]
},
"Opt": "$_id.Opt",
"PID": "$_id.PID",
"DSN1": "$_id.DSN1",
"DSN2": "$_id.DSN2",
"CID": "$_id.CID",
"MAX_TRM": {
"$max": "$TRM"
},
"Q3_TRM": {
"$arrayElemAt": [ "$TRM", {"$floor": { "$multiply": [0.75,{"$size": "$TRM"}] }} ]
},
"AVG_TRM": {
"$avg": "$TRM"
},
"MAX_WRLFTR_0": {
"$max": "$WRLFTR_0"
},
"MAX_WRLFTR_1": {
"$max": "$WRLFTR_1"
},
"MAX_MT0": {
"$max": "$MT0"
},
"MAX_timestamp": {
"$max": "$timestamp"
},
"DAY_ID":{"$concat":
[
{"$substr":[{"$toString":{"$max": "$timestamp"}}, 0, 4]},
{"$substr":[{"$toString":{"$max": "$timestamp"}}, 5, 2]},
{"$substr":[{"$toString":{"$max": "$timestamp"}},8,2]}
]
},
}
},
],
{
"allowDiskUse": true
}
);