Determining time overlapping between two different kinds of files from the same collection (II)

Hi there:

Some weeks ago I was trying to get a solution to this problem:

I got it, but the data format of the files has changed a little bit:

Broadcasting files:

{
  "idSensor": 3,
  "idDevice": 55,
  "dateTime": 
  {
    "instant": "2020-07-24T10:00:40Z",
    "offset": "+02:00"
  },
  "data": 
  {
    "inicio": "2020-07-24T12:00:40+02:00",
    "fin": "2020-07-24T12:00:52+02:00",
    "archivo": " EKT_captaREVOLUTION_VERT_1_corto.mp4",
    "tipo": "video"
  }
}

Reaction files:

{
  "idSensor": 2,
  "idDevice": 48,
  "dateTime": 
  {
    "instant": "2020-06-09T14:01:58.521Z",
    "offset": "+02:00"
  },
  "data": 
  {
    "BeginTime": "2020-06-09T16:01:58.521891+02:00",
    "CountingLines": 
    [{
      "Direction": "EnteredLeaving",
      "LineID": 0
    }],
    "FaceInfo": 
    {
      "Age": 32.03,
      "Emotion": "SURPRISE",
      "IsDetected": true,
      "MaleProbability": 0.91,
      "gazeTime": 0.36,
      "numGazes": 4
    },
    "ImageSize": 
    {
      "Height": 1080,
      "Width": 1920
    },
    "LookingDuration": 1,
    "PersonID": 102340,
    "ReIDInfo": {"NumReIDs": 1},
    "RoiInfo": {"RoiDuration": 10.909090909090908},
    "SensorID": 51,
    "SocialDistance": [],
    "TrackingDuration": 11.64,
    "Trajectory": null,
    "direction": null,
    "id": 1,
    "roiName": 1,
    "roiType": 1
  }
}

So my query:

var pipeline = 
[
	{
		"$match": //Deja pasar sólo los archivos de emisiones y de reacciones
		{
		    "$or": [{"idSensor": 2}, {"idSensor": 3}]
		}
	},
    {
        "$lookup": //Une cada documento con los demás -incluyendo consigo mismo-, que agrupa en un array con tantas posiciones como documentos
        {
            "from": "sensorsData",
            "localField": "idDevice",
            "foreignField": "idDevice",
            "as": "array"
        }
    },
    {
        "$unwind": "$array" //Descompone los archivos en función de las posiciones del array
    },
    {
        "$match": //Deja pasar sólo aquellos archivos con estructura emisión-reacción y elimina las demás combinaciones: emisión-emisión, reacción-reacción y reacción-emisión (redundantes)
        {
            "$and": 
            [
                {"data.inicio": {"$exists": true}}, 
                {"array.data.BeginTime": {"$exists": true}}
            ]
        }
    },
    {
        "$addFields": //Creación de los parámetros temporales
        {
            "dtBroadcastStart": {"$toDate": "$data.inicio"},
            "dtBroadcastEnd": {"$toDate": "$data.fin"},
            "dtTrackingStart": {"$toDate": "$array.data.BeginTime"},
			"dtTrackingEnd": {"$add": [{"$toDate": "$array.data.BeginTime"}, {"$multiply": ["$array.data.TrackingDuration", 1000]}]}
        }
    },
    {
        "$match": //Filtrado de los documentos que cumplen las condiciones de solapamiento temporal
        {
            "$expr":
            {
                "$and": 
                [
                    {"$lt": ["$dtBroadcastStart", "$dtTrackingEnd"]},
                    {"$gt": ["$dtBroadcastEnd", "$dtTrackingStart"]}
                ]
            }
        }
    },
    {
        "$project": //Selección final de parámetros
        {
            "_id": 0,
            "idDevice": "$idDevice",
            "naBroadcast": "$data.archivo",
			"naType": "$data.tipo",
            "dtBroadcastStart": 1,
            "dtBroadcastEnd": 1,
            "qtBroadcastDurationS": {"$divide": [{"$subtract": [{"$toDate": "$dtBroadcastEnd"}, {"$toDate": "$dtBroadcastStart"}]}, 1000]}
            "naWeekday": 
			{
			    "$switch": 
			    {
			        "branches": 
			        [
			            {"case": {"$eq": [{"$dayOfWeek": {"$toDate": "$data.inicio"}}, 1]}, "then": "Domingo"},
			            {"case": {"$eq": [{"$dayOfWeek": {"$toDate": "$data.inicio"}}, 2]}, "then": "Lunes"},
			            {"case": {"$eq": [{"$dayOfWeek": {"$toDate": "$data.inicio"}}, 3]}, "then": "Martes"},
			            {"case": {"$eq": [{"$dayOfWeek": {"$toDate": "$data.inicio"}}, 4]}, "then": "Miércoles"},
			            {"case": {"$eq": [{"$dayOfWeek": {"$toDate": "$data.inicio"}}, 5]}, "then": "Jueves"},
			            {"case": {"$eq": [{"$dayOfWeek": {"$toDate": "$data.inicio"}}, 6]}, "then": "Viernes"},
			            {"case": {"$eq": [{"$dayOfWeek": {"$toDate": "$data.inicio"}}, 7]}, "then": "Sábado"}
			        ],
			        "default": "Fecha incorrecta"
			    }
			},
			"naMonth":
			{
			    "$switch": 
			    {
			        "branches": 
			        [
			            {"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "01"]}, "then": "Enero"},
			            {"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "02"]}, "then": "Febrero"},
			            {"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "03"]}, "then": "Marzo"},
			            {"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "04"]}, "then": "Abril"},
			            {"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "05"]}, "then": "Mayo"},
			            {"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "06"]}, "then": "Junio"},
			            {"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "07"]}, "then": "Julio"},
			            {"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "08"]}, "then": "Agosto"},
			            {"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "09"]}, "then": "Septiembre"},
			            {"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "10"]}, "then": "Octubre"},
			            {"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "11"]}, "then": "Noviembre"},
			            {"case": {"$eq": [{"$substr": ["$data.inicio", 5, 2]}, "12"]}, "then": "Diciembre"}
			        ],
			        "default": "Fecha incorrecta"
			    }
			},
            "idPerson": "$array.data.PersonID",
			"dtTrackingStart": 1,
			"dtTrackingEnd": 1,
			"qtFaceDetected": 
			{
				"$cond": 
				{
					"if": {"$eq": ["$array.data.FaceInfo.IsDetected", true]}, 
					"then": 1, 
					"else": 0
				}
			},
			"qtMaleProbability": "$array.data.FaceInfo.MaleProbability",
			"qtAge": "$array.data.FaceInfo.Age",
			"naEmotion": "$array.data.FaceInfo.Emotion",
			"qtGaze": "$array.data.FaceInfo.numGazes",
			"qtGazeDurationS": "$array.data.FaceInfo.gazeTime",
			"qtFaceDurationS": "$array.data.LookingDuration",
			"qtTrackingDurationS": "$array.data.TrackingDuration",
			"qtReId": "$array.data.ReIDInfo.NumReIDs"
        }
	}
]

db.sensorsData.aggregate(pipeline)

It works, but it seems this stage:

{
    "$match": //Filtrado de los documentos que cumplen las condiciones de solapamiento temporal
    {
        "$expr":
        {
            "$and": 
            [
                {"$lt": ["$dtBroadcastStart", "$dtTrackingEnd"]},
                {"$gt": ["$dtBroadcastEnd", "$dtTrackingStart"]}
            ]
        }
    }
},

It’s too much when querying against the production DB, and it gets stuck. Any idea about how to optimize my query?

Thanks in advance!

Hi @Javier_Blanco,

Why your query lookup from sensorData to sensorData ? Is all data in the same collection?

If you need your query execution analysis please provide:

 db.sensorsData.explain().aggregate(pipeline);
db.sensorData.getIndexes();
db.sensorData.stats();

Thanks,
Pavel