Hi there:
Some weeks ago I was trying to get a solution to this problem:
I got it, but the data format of the files has changed a little bit:
Broadcasting files:
{
"idSensor": 3,
"idDevice": 55,
"dateTime":
{
"instant": "2020-07-24T10:00:40Z",
"offset": "+02:00"
},
"data":
{
"inicio": "2020-07-24T12:00:40+02:00",
"fin": "2020-07-24T12:00:52+02:00",
"archivo": " EKT_captaREVOLUTION_VERT_1_corto.mp4",
"tipo": "video"
}
}
Reaction files:
{
"idSensor": 2,
"idDevice": 48,
"dateTime":
{
"instant": "2020-06-09T14:01:58.521Z",
"offset": "+02:00"
},
"data":
{
"BeginTime": "2020-06-09T16:01:58.521891+02:00",
"CountingLines":
[{
"Direction": "EnteredLeaving",
"LineID": 0
}],
"FaceInfo":
{
"Age": 32.03,
"Emotion": "SURPRISE",
"IsDetected": true,
"MaleProbability": 0.91,
"gazeTime": 0.36,
"numGazes": 4
},
"ImageSize":
{
"Height": 1080,
"Width": 1920
},
"LookingDuration": 1,
"PersonID": 102340,
"ReIDInfo": {"NumReIDs": 1},
"RoiInfo": {"RoiDuration": 10.909090909090908},
"SensorID": 51,
"SocialDistance": [],
"TrackingDuration": 11.64,
"Trajectory": null,
"direction": null,
"id": 1,
"roiName": 1,
"roiType": 1
}
}
So my query:
var pipeline =
[
{
"$match": //Deja pasar sólo los archivos de emisiones y de reacciones
{
"$or": [{"idSensor": 2}, {"idSensor": 3}]
}
},
{
"$lookup": //Une cada documento con los demás -incluyendo consigo mismo-, que agrupa en un array con tantas posiciones como documentos
{
"from": "sensorsData",
"localField": "idDevice",
"foreignField": "idDevice",
"as": "array"
}
},
{
"$unwind": "$array" //Descompone los archivos en función de las posiciones del array
},
{
"$match": //Deja pasar sólo aquellos archivos con estructura emisión-reacción y elimina las demás combinaciones: emisión-emisión, reacción-reacción y reacción-emisión (redundantes)
{
"$and":
[
{"data.inicio": {"$exists": true}},
{"array.data.BeginTime": {"$exists": true}}
]
}
},
{
"$addFields": //Creación de los parámetros temporales
{
"dtBroadcastStart": {"$toDate": "$data.inicio"},
"dtBroadcastEnd": {"$toDate": "$data.fin"},
"dtTrackingStart": {"$toDate": "$array.data.BeginTime"},
"dtTrackingEnd": {"$add": [{"$toDate": "$array.data.BeginTime"}, {"$multiply": ["$array.data.TrackingDuration", 1000]}]}
}
},
{
"$match": //Filtrado de los documentos que cumplen las condiciones de solapamiento temporal
{
"$expr":
{
"$and":
[
{"$lt": ["$dtBroadcastStart", "$dtTrackingEnd"]},
{"$gt": ["$dtBroadcastEnd", "$dtTrackingStart"]}
]
}
}
},
{
"$project": //Selección final de parámetros
{
"_id": 0,
"idDevice": "$idDevice",
"naBroadcast": "$data.archivo",
"naType": "$data.tipo",
"dtBroadcastStart": 1,
"dtBroadcastEnd": 1,
"qtBroadcastDurationS": {"$divide": [{"$subtract": [{"$toDate": "$dtBroadcastEnd"}, {"$toDate": "$dtBroadcastStart"}]}, 1000]}
"naWeekday":
{
"$switch":
{
"branches":
[
{"case": {"$eq": [{"$dayOfWeek": {"$toDate": "$data.inicio"}}, 1]}, "then": "Domingo"},
{"case": {"$eq": [{"$dayOfWeek": {"$toDate": "$data.inicio"}}, 2]}, "then": "Lunes"},
{"case": {"$eq": [{"$dayOfWeek": {"$toDate": "$data.inicio"}}, 3]}, "then": "Martes"},
{"case": {"$eq": [{"$dayOfWeek": {"$toDate": "$data.inicio"}}, 4]}, "then": "Miércoles"},
{"case": {"$eq": [{"$dayOfWeek": {"$toDate": "$data.inicio"}}, 5]}, "then": "Jueves"},
{"case": {"$eq": [{"$dayOfWeek": {"$toDate": "$data.inicio"}}, 6]}, "then": "Viernes"},
{"case": {"$eq": [{"$dayOfWeek": {"$toDate": "$data.inicio"}}, 7]}, "then": "Sábado"}
],
"default": "Fecha incorrecta"
}
},
"naMonth":
{
"$switch":
{
"branches":
[
{"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "01"]}, "then": "Enero"},
{"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "02"]}, "then": "Febrero"},
{"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "03"]}, "then": "Marzo"},
{"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "04"]}, "then": "Abril"},
{"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "05"]}, "then": "Mayo"},
{"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "06"]}, "then": "Junio"},
{"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "07"]}, "then": "Julio"},
{"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "08"]}, "then": "Agosto"},
{"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "09"]}, "then": "Septiembre"},
{"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "10"]}, "then": "Octubre"},
{"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "11"]}, "then": "Noviembre"},
{"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "12"]}, "then": "Diciembre"}
],
"default": "Fecha incorrecta"
}
},
"idPerson": "$array.data.PersonID",
"dtTrackingStart": 1,
"dtTrackingEnd": 1,
"qtFaceDetected":
{
"$cond":
{
"if": {"$eq": ["$array.data.FaceInfo.IsDetected", true]},
"then": 1,
"else": 0
}
},
"qtMaleProbability": "$array.data.FaceInfo.MaleProbability",
"qtAge": "$array.data.FaceInfo.Age",
"naEmotion": "$array.data.FaceInfo.Emotion",
"qtGaze": "$array.data.FaceInfo.numGazes",
"qtGazeDurationS": "$array.data.FaceInfo.gazeTime",
"qtFaceDurationS": "$array.data.LookingDuration",
"qtTrackingDurationS": "$array.data.TrackingDuration",
"qtReId": "$array.data.ReIDInfo.NumReIDs"
}
}
]
db.sensorsData.aggregate(pipeline)
It works, but it seems this stage:
{
"$match": //Filtrado de los documentos que cumplen las condiciones de solapamiento temporal
{
"$expr":
{
"$and":
[
{"$lt": ["$dtBroadcastStart", "$dtTrackingEnd"]},
{"$gt": ["$dtBroadcastEnd", "$dtTrackingStart"]}
]
}
}
},
It’s too much when querying against the production DB, and it gets stuck. Any idea about how to optimize my query?
Thanks in advance!