Invariant failure on Config server replica set

We have been using MongoDB on production for the past 8 months.
We have a cluster with 9 replica sets. The config server and the shards are co-hosted.
Recently we have been seeing instances where some of the Config server processes abruptly goes down.

The logs in such cases always have these messages

2020-12-13T22:26:01.495+0530 F - [TaskExecutorPool-0] Invariant failure pool->_requests.empty() src/mongo/executor/connection_pool.cpp 1085
2020-12-13T22:26:01.495+0530 F - [TaskExecutorPool-0]
2020-12-13T22:26:01.529+0530 F - [TaskExecutorPool-0] Got signal: 6 (Aborted).


***aborting after invariant() failure

Could anyone please point out, what this could actually be the reason for. Thank you

Welcome to the MongoDB forums @Chaitra_KR!

Can you confirm the specific version of MongoDB you are using? Are all members of your sharded cluster running the same version (if not, what versions are being used)?

Regards,
Stennie

Hello Stennie,
The version of MongoDB in use is 4.2.1. And yes, all members of the replica set are of the same version

@Chaitra_KR

Can you share the rs.conf() and rs.status() command output from your config server replica set.

Thanks
Brajmohan

Hi, I am facing similar problem from quite a time. Same version mongo 4.2.1
rs.status() and rs.conf() returning right output, which is expected

@BM_Sharma

Here is the result of rs.conf()

{
	"_id" : "configServerReplSet",
	"version" : 7,
	"configsvr" : true,
	"protocolVersion" : NumberLong(1),
	"writeConcernMajorityJournalDefault" : true,
	"members" : [
		{
			"_id" : 1,
			"host" : "*****",
			"arbiterOnly" : false,
			"buildIndexes" : true,
			"hidden" : false,
			"priority" : 1,
			"tags" : {
				
			},
			"slaveDelay" : NumberLong(0),
			"votes" : 1
		},
		{
			"_id" : 2,
			"host" : "*****",
			"arbiterOnly" : false,
			"buildIndexes" : true,
			"hidden" : false,
			"priority" : 1,
			"tags" : {
				
			},
			"slaveDelay" : NumberLong(0),
			"votes" : 1
		},
		{
			"_id" : 3,
			"host" : "*****",
			"arbiterOnly" : false,
			"buildIndexes" : true,
			"hidden" : false,
			"priority" : 1,
			"tags" : {
				
			},
			"slaveDelay" : NumberLong(0),
			"votes" : 1
		},
		{
			"_id" : 4,
			"host" : "*****",
			"arbiterOnly" : false,
			"buildIndexes" : true,
			"hidden" : false,
			"priority" : 0,
			"tags" : {
				
			},
			"slaveDelay" : NumberLong(0),
			"votes" : 0
		},
		{
			"_id" : 5,
			"host" : "*****",
			"arbiterOnly" : false,
			"buildIndexes" : true,
			"hidden" : false,
			"priority" : 0,
			"tags" : {
				
			},
			"slaveDelay" : NumberLong(0),
			"votes" : 0
		}
	],
	"settings" : {
		"chainingAllowed" : true,
		"heartbeatIntervalMillis" : 2000,
		"heartbeatTimeoutSecs" : 10,
		"electionTimeoutMillis" : 10000,
		"catchUpTimeoutMillis" : -1,
		"catchUpTakeoverDelayMillis" : 30000,
		"getLastErrorModes" : {
			
		},
		"getLastErrorDefaults" : {
			"w" : 1,
			"wtimeout" : 0
		},
		"replicaSetId" : ObjectId("*****")
	}
}

And here is the rs.status()

{
	"set" : "configServerReplSet",
	"date" : ISODate("2020-12-21T04:29:09.101Z"),
	"myState" : 2,
	"term" : NumberLong(26),
	"syncingTo" : "*****",
	"syncSourceHost" : "*****",
	"syncSourceId" : 1,
	"configsvr" : true,
	"heartbeatIntervalMillis" : NumberLong(2000),
	"majorityVoteCount" : 2,
	"writeMajorityCount" : 2,
	"optimes" : {
		"lastCommittedOpTime" : {
			"ts" : Timestamp(1608524948, 74),
			"t" : NumberLong(26)
		},
		"lastCommittedWallTime" : ISODate("2020-12-21T04:29:08.915Z"),
		"readConcernMajorityOpTime" : {
			"ts" : Timestamp(1608524948, 74),
			"t" : NumberLong(26)
		},
		"readConcernMajorityWallTime" : ISODate("2020-12-21T04:29:08.915Z"),
		"appliedOpTime" : {
			"ts" : Timestamp(1608524948, 74),
			"t" : NumberLong(26)
		},
		"durableOpTime" : {
			"ts" : Timestamp(1608524948, 74),
			"t" : NumberLong(26)
		},
		"lastAppliedWallTime" : ISODate("2020-12-21T04:29:08.915Z"),
		"lastDurableWallTime" : ISODate("2020-12-21T04:29:08.915Z")
	},
	"lastStableRecoveryTimestamp" : Timestamp(1608524936, 56),
	"lastStableCheckpointTimestamp" : Timestamp(1608524936, 56),
	"members" : [
		{
			"_id" : 1,
			"name" : "****",
			"ip" : "****",
			"health" : 1,
			"state" : 1,
			"stateStr" : "PRIMARY",
			"uptime" : 40208,
			"optime" : {
				"ts" : Timestamp(1608524947, 35),
				"t" : NumberLong(26)
			},
			"optimeDurable" : {
				"ts" : Timestamp(1608524947, 35),
				"t" : NumberLong(26)
			},
			"optimeDate" : ISODate("2020-12-21T04:29:07Z"),
			"optimeDurableDate" : ISODate("2020-12-21T04:29:07Z"),
			"lastHeartbeat" : ISODate("2020-12-21T04:29:07.810Z"),
			"lastHeartbeatRecv" : ISODate("2020-12-21T04:29:08.004Z"),
			"pingMs" : NumberLong(0),
			"lastHeartbeatMessage" : "",
			"syncingTo" : "",
			"syncSourceHost" : "",
			"syncSourceId" : -1,
			"infoMessage" : "",
			"electionTime" : Timestamp(1608484751, 1),
			"electionDate" : ISODate("2020-12-20T17:19:11Z"),
			"configVersion" : 7
		},
		{
			"_id" : 2,
			"name" : "****",
			"ip" : "****",
			"health" : 1,
			"state" : 2,
			"stateStr" : "SECONDARY",
			"uptime" : 40210,
			"optime" : {
				"ts" : Timestamp(1608524948, 74),
				"t" : NumberLong(26)
			},
			"optimeDate" : ISODate("2020-12-21T04:29:08Z"),
			"syncingTo" : "*****",
			"syncSourceHost" : "*****",
			"syncSourceId" : 1,
			"infoMessage" : "",
			"configVersion" : 7,
			"self" : true,
			"lastHeartbeatMessage" : ""
		},
		{
			"_id" : 3,
			"name" : "*****",
			"ip" : "****",
			"health" : 0,
			"state" : 8,
			"stateStr" : "SECONDARY",
			"uptime" : 0,
			"optime" : {
				"ts" : Timestamp(0, 0),
				"t" : NumberLong(-1)
			},
			"optimeDurable" : {
				"ts" : Timestamp(0, 0),
				"t" : NumberLong(-1)
			},
			"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
			"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
			"lastHeartbeat" : ISODate("2020-12-21T04:29:07.351Z"),
			"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
			"pingMs" : NumberLong(0),
			"lastHeartbeatMessage" : "",
			"syncingTo" : "",
			"syncSourceHost" : "",
			"syncSourceId" : -1,
			"infoMessage" : "",
			"configVersion" : -1
		},
		{
			"_id" : 4,
			"name" : "*****",
			"ip" : "*****",
			"health" : 1,
			"state" : 2,
			"stateStr" : "SECONDARY",
			"uptime" : 40208,
			"optime" : {
				"ts" : Timestamp(1608524947, 88),
				"t" : NumberLong(26)
			},
			"optimeDurable" : {
				"ts" : Timestamp(1608524947, 88),
				"t" : NumberLong(26)
			},
			"optimeDate" : ISODate("2020-12-21T04:29:07Z"),
			"optimeDurableDate" : ISODate("2020-12-21T04:29:07Z"),
			"lastHeartbeat" : ISODate("2020-12-21T04:29:08.157Z"),
			"lastHeartbeatRecv" : ISODate("2020-12-21T04:29:08.178Z"),
			"pingMs" : NumberLong(12),
			"lastHeartbeatMessage" : "",
			"syncingTo" : "*****",
			"syncSourceHost" : "*****",
			"syncSourceId" : 1,
			"infoMessage" : "",
			"configVersion" : 7
		},
		{
			"_id" : 5,
			"name" : "******",
			"ip" : "******",
			"health" : 1,
			"state" : 2,
			"stateStr" : "SECONDARY",
			"uptime" : 40208,
			"optime" : {
				"ts" : Timestamp(1608524947, 88),
				"t" : NumberLong(26)
			},
			"optimeDurable" : {
				"ts" : Timestamp(1608524947, 88),
				"t" : NumberLong(26)
			},
			"optimeDate" : ISODate("2020-12-21T04:29:07Z"),
			"optimeDurableDate" : ISODate("2020-12-21T04:29:07Z"),
			"lastHeartbeat" : ISODate("2020-12-21T04:29:08.811Z"),
			"lastHeartbeatRecv" : ISODate("2020-12-21T04:29:08.951Z"),
			"pingMs" : NumberLong(12),
			"lastHeartbeatMessage" : "",
			"syncingTo" : "*****",
			"syncSourceHost" : "*****",
			"syncSourceId" : 1,
			"infoMessage" : "",
			"configVersion" : 7
		}
	],
	"ok" : 1,
	"$gleStats" : {
		"lastOpTime" : Timestamp(0, 0),
		"electionId" : ObjectId("000000000000000000000000")
	},
	"lastCommittedOpTime" : Timestamp(1608524948, 74),
	"$clusterTime" : {
		"clusterTime" : Timestamp(1608524948, 74),
		"signature" : {
			"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
			"keyId" : NumberLong(0)
		}
	},
	"operationTime" : Timestamp(1608524948, 74)
}

fixed in https://jira.mongodb.org/browse/SERVER-42930