Unable to add new member to replica set

Hi all,

I currently have a 3-member replica set with version 4.2.2 of mongodb. We updated these from 3.4 about 3 months ago and they have been running smoothly.

I’d like to add a few members to the replica set, so I followed this document: to add a new member.

The problem I’m having is that the new member is stuck in the “STARTUP” state (for a few days). Here’s a copy of my rs.status() from the primary:

{
    "set" : "replicaSet",
    "date" : ISODate("2020-03-23T16:26:50.262Z"),
    "myState" : 1,
    "term" : NumberLong(87),
    "syncingTo" : "",
    "syncSourceHost" : "",
    "syncSourceId" : -1,
    "heartbeatIntervalMillis" : NumberLong(2000),
    "majorityVoteCount" : 2,
    "writeMajorityCount" : 2,
    "optimes" : {
            "lastCommittedOpTime" : {
                    "ts" : Timestamp(1584980810, 25),
                    "t" : NumberLong(87)
            },
            "lastCommittedWallTime" : ISODate("2020-03-23T16:26:50.255Z"),
            "readConcernMajorityOpTime" : {
                    "ts" : Timestamp(1584980810, 25),
                    "t" : NumberLong(87)
            },
            "readConcernMajorityWallTime" : ISODate("2020-03-23T16:26:50.255Z"),
            "appliedOpTime" : {
                    "ts" : Timestamp(1584980810, 25),
                    "t" : NumberLong(87)
            },
            "durableOpTime" : {
                    "ts" : Timestamp(1584980810, 25),
                    "t" : NumberLong(87)
            },
            "lastAppliedWallTime" : ISODate("2020-03-23T16:26:50.255Z"),
            "lastDurableWallTime" : ISODate("2020-03-23T16:26:50.255Z")
    },
    "lastStableRecoveryTimestamp" : Timestamp(1584980730, 26),
    "lastStableCheckpointTimestamp" : Timestamp(1584980730, 26),
    "electionCandidateMetrics" : {
            "lastElectionReason" : "priorityTakeover",
            "lastElectionDate" : ISODate("2020-03-22T22:26:16.373Z"),
            "electionTerm" : NumberLong(87),
            "lastCommittedOpTimeAtElection" : {
                    "ts" : Timestamp(1584915976, 1),
                    "t" : NumberLong(86)
            },
            "lastSeenOpTimeAtElection" : {
                    "ts" : Timestamp(1584915976, 1),
                    "t" : NumberLong(86)
            },
            "numVotesNeeded" : 2,
            "priorityAtElection" : 2,
            "electionTimeoutMillis" : NumberLong(10000),
            "priorPrimaryMemberId" : 3,
            "numCatchUpOps" : NumberLong(0),
            "newTermStartDate" : ISODate("2020-03-22T22:26:17.311Z"),
            "wMajorityWriteAvailabilityDate" : ISODate("2020-03-22T22:26:17.944Z")
    },
    "members" : [
            {
                    "_id" : 0,
                    "name" : "node_1:27017",
                    "ip" : "node_1",
                    "health" : 1,
                    "state" : 1,
                    "stateStr" : "PRIMARY",
                    "uptime" : 64849,
                    "optime" : {
                            "ts" : Timestamp(1584980810, 25),
                            "t" : NumberLong(87)
                    },
                    "optimeDate" : ISODate("2020-03-23T16:26:50Z"),
                    "syncingTo" : "",
                    "syncSourceHost" : "",
                    "syncSourceId" : -1,
                    "infoMessage" : "",
                    "electionTime" : Timestamp(1584915976, 2),
                    "electionDate" : ISODate("2020-03-22T22:26:16Z"),
                    "configVersion" : 26,
                    "self" : true,
                    "lastHeartbeatMessage" : ""
            },
            {
                    "_id" : 3,
                    "name" : "node_2:27017",
                    "ip" : "node_2",
                    "health" : 1,
                    "state" : 2,
                    "stateStr" : "SECONDARY",
                    "uptime" : 64844,
                    "optime" : {
                            "ts" : Timestamp(1584980808, 4),
                            "t" : NumberLong(87)
                    },
                    "optimeDurable" : {
                            "ts" : Timestamp(1584980808, 4),
                            "t" : NumberLong(87)
                    },
                    "optimeDate" : ISODate("2020-03-23T16:26:48Z"),
                    "optimeDurableDate" : ISODate("2020-03-23T16:26:48Z"),
                    "lastHeartbeat" : ISODate("2020-03-23T16:26:48.980Z"),
                    "lastHeartbeatRecv" : ISODate("2020-03-23T16:26:50.197Z"),
                    "pingMs" : NumberLong(0),
                    "lastHeartbeatMessage" : "",
                    "syncingTo" : "52.43.109.55:27017",
                    "syncSourceHost" : "52.43.109.55:27017",
                    "syncSourceId" : 0,
                    "infoMessage" : "",
                    "configVersion" : 26
            },
            {
                    "_id" : 4,
                    "name" : "node_3:27017",
                    "ip" : "node_3",
                    "health" : 1,
                    "state" : 2,
                    "stateStr" : "SECONDARY",
                    "uptime" : 64844,
                    "optime" : {
                            "ts" : Timestamp(1584980808, 4),
                            "t" : NumberLong(87)
                    },
                    "optimeDurable" : {
                            "ts" : Timestamp(1584980808, 4),
                            "t" : NumberLong(87)
                    },
                    "optimeDate" : ISODate("2020-03-23T16:26:48Z"),
                    "optimeDurableDate" : ISODate("2020-03-23T16:26:48Z"),
                    "lastHeartbeat" : ISODate("2020-03-23T16:26:48.976Z"),
                    "lastHeartbeatRecv" : ISODate("2020-03-23T16:26:49.651Z"),
                    "pingMs" : NumberLong(0),
                    "lastHeartbeatMessage" : "",
                    "syncingTo" : "52.37.102.156:27017",
                    "syncSourceHost" : "52.37.102.156:27017",
                    "syncSourceId" : 3,
                    "infoMessage" : "",
                    "configVersion" : 26
            },
            {
                    "_id" : 5,
                    "name" : "new_node:27017",
                    "ip" : "new_node",
                    "health" : 1,
                    "state" : 0,
                    "stateStr" : "STARTUP",
                    "uptime" : 4674,
                    "optime" : {
                            "ts" : Timestamp(0, 0),
                            "t" : NumberLong(-1)
                    },
                    "optimeDurable" : {
                            "ts" : Timestamp(0, 0),
                            "t" : NumberLong(-1)
                    },
                    "optimeDate" : ISODate("1970-01-01T00:00:00Z"),
                    "optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
                    "lastHeartbeat" : ISODate("2020-03-23T16:26:49.001Z"),
                    "lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
                    "pingMs" : NumberLong(67),
                    "lastHeartbeatMessage" : "",
                    "syncingTo" : "",
                    "syncSourceHost" : "",
                    "syncSourceId" : -1,
                    "infoMessage" : "",
                    "configVersion" : -2
            }
    ],
    "ok" : 1,
    "$clusterTime" : {
            "clusterTime" : Timestamp(1584980810, 25),
            "signature" : {
                    "hash" : BinData(0,"+gnKxCL0nU/rl4vBBmjEOi70eow="),
                    "keyId" : NumberLong("6772291770769211394")
            }
    },
    "operationTime" : Timestamp(1584980810, 25)

}

If I look at the logfile on the new node, it says:

2020-03-23T16:19:31.346+0000 I  CONTROL  [LogicalSessionCacheReap] Sessions collection is not set up; waiting until next sessions reap interval: config.system.sessions does not exist
2020-03-23T16:19:31.346+0000 I  CONTROL  [LogicalSessionCacheRefresh] Sessions collection is not set up; waiting until next sessions refresh interval: Replication has not yet been configured
2020-03-23T16:18:17.534+0000 I  NETWORK  [conn6184] end connection node_1:37766 (5 connections now open)
2020-03-23T16:17:17.804+0000 I  ACCESS   [conn6215] Successfully authenticated as principal __system on local from client node_1:37768
2020-03-23T16:17:17.530+0000 I  NETWORK  [conn6215] received client metadata from node_1:37768 conn6215: { driver: { name: "NetworkInterfaceTL", version: "4.2.2" }, os: { type: "Linux", name: "Ubuntu", architecture: "x86_64", version: "16.04" } }

I’m not certain what “Sessions collection is not set up; waiting until next sessions refresh interval: Replication has not yet been configured” means or how to troubleshoot this.

Any help would be most appreciated
AJ

1 Like

For anybody with the same issue, the issue was that the new Mongo DB instance needed permissions to access itself via public IP (on aws). Once I added itself to the security group, the instance transitioned to the “STARTUP2” state and is syncing data.

2 Likes

Thanks for following up with the answer!

Hi all, I’ve (think) the same problem, with a 4.2 cluster
Adding a slave (on AWS) to a cluster (On premises)
The securitygroup has all the (private) IPs of all the nodes, and telnet on 27017 is ok everywere.
Which IP should I have to include considering that I don’t have public IPs assigned to any of the nodes?

PS the nodes aren’t listening on 127.0.0.1