From 50ff6b2c7cef8e986984ea1d04b56674dc443974 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 26 Feb 2010 23:15:23 +0100 Subject: [PATCH 06/16] drbd: fix broken state change after split-brain attach while connected Situation: we have diverging data sets, i.e. we had a split brain somewhen, but currently are connected, one node diskless. Then we try to attach that disk, figure it is consistent, but has a diverging data set, we refuse to attach. This led to strange state changes: 22:18:35 bb drbd1: peer( Unknown -> Primary ) conn( WFReportParams -> Connected) pdsk( DUnknown -> UpToDate ) 22:19:30 bb drbd1: disk( Diskless -> Attaching ) 22:19:30 bb drbd1: disk( Attaching -> Negotiating ) 22:19:30 bb drbd1: drbd_sync_handshake: 22:19:30 bb drbd1: self 97BF25798B9D5222:F33D1F62ADE698DD:4269796F9D027C83:AC45D8B5C3C1BF93 bits:19449 flags:0 22:19:30 bb drbd1: peer 280DFB6E125465D3:F33D1F62ADE698DC:4269796F9D027C82:AC45D8B5C3C1BF93 bits:2575806 flags:0 22:19:30 bb drbd1: uuid_compare()=100 by rule 90 22:19:30 bb drbd1: Split-Brain detected, dropping connection! 22:19:30 bb drbd1: disk( Negotiating -> Diskless ) while the other side says: 22:19:30 aa drbd1: Split-Brain detected, dropping connection! 22:19:30 aa drbd1: Disk attach process on the peer node was aborted. 22:19:30 aa drbd1: conn( Connected -> TOO_LARGE ) pdsk( Diskless -> Consistent ) This should be fixed now. --- drbd/drbd_receiver.c | 7 ++++++- 1 files changed, 6 insertions(+), 1 deletions(-) diff --git a/drbd/drbd_receiver.c b/drbd/drbd_receiver.c index bb6903f..a9469b4 100644 --- a/drbd/drbd_receiver.c +++ b/drbd/drbd_receiver.c @@ -2578,6 +2578,10 @@ STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } if (hg == -100) { + /* FIXME this log message is not correct if we end up here + * after an attempted attach on a diskless node. + * We just refuse to attach -- well, we drop the "connection" + * to that disk, in a way... */ dev_alert(DEV, "Split-Brain detected, dropping connection!\n"); drbd_khelper(mdev, "split-brain"); return C_MASK; @@ -3184,12 +3188,13 @@ STATIC int receive_state(struct drbd_conf *mdev, struct p_header *h) put_ldev(mdev); if (nconn == C_MASK) { + nconn = C_CONNECTED; if (mdev->state.disk == D_NEGOTIATING) { drbd_force_state(mdev, NS(disk, D_DISKLESS)); - nconn = C_CONNECTED; } else if (peer_state.disk == D_NEGOTIATING) { dev_err(DEV, "Disk attach process on the peer node was aborted.\n"); peer_state.disk = D_DISKLESS; + real_peer_disk = D_DISKLESS; } else { D_ASSERT(oconn == C_WF_REPORT_PARAMS); drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); -- 1.6.3.3