[ClusterLabs] drbd nfs slave not working

Neil McFadyen neil at neilmcfadyen.com
Sun Nov 14 11:47:57 EST 2021


I have a Ubuntu 20.04 drbd nfs pacemaker/corosync setup for 2 nodes,  it
was working fine before but now I can't get the 2nd node to show as a slave
under the Clone Set.  So if I do a failover both nodes show as stopped.

root at testnfs30:/etc/drbd.d# crm status
Cluster Summary:
  * Stack: corosync
  * Current DC: testnfs32 (version 2.0.3-4b1f869f0f) - partition with quorum
  * Last updated: Sun Nov 14 11:35:09 2021
  * Last change:  Sun Nov 14 10:31:41 2021 by root via cibadmin on testnfs30
  * 2 nodes configured
  * 5 resource instances configured

Node List:
  * Node testnfs32: standby
  * Online: [ testnfs30 ]

Full List of Resources:
  * Resource Group: HA:
    * vip       (ocf::heartbeat:IPaddr2):        Started testnfs30
    * fs_nfs    (ocf::heartbeat:Filesystem):     Started testnfs30
    * nfs       (ocf::heartbeat:nfsserver):      Started testnfs30
  * Clone Set: ms_drbd_nfs [drbd_nfs] (promotable):
    * Masters: [ testnfs30 ]
    * Stopped: [ testnfs32 ]

This used to show as
* Slaves: [ testnfs32 ]

testnfs30# cat /proc/drbd
version: 8.4.11 (api:1/proto:86-101)
srcversion: FC3433D849E3B88C1E7B55C
 0: cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate C r-----
    ns:352 nr:368 dw:720 dr:4221 al:6 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f
oos:0


testnfs30:/etc/drbd.d# drbdadm status
nfs1 role:Primary
  volume:1 disk:UpToDate
  peer role:Secondary
    volume:1 replication:Established peer-disk:UpToDate

root at testnfs30:/etc/drbd.d# crm config show
node 1: testnfs30 \
        attributes standby=off
node 2: testnfs32 \
        attributes standby=on
primitive drbd_nfs ocf:linbit:drbd \
        params drbd_resource=nfs1 \
        op monitor interval=31s timeout=20s role=Slave \
        op monitor interval=30s timeout=20s role=Master
primitive fs_nfs Filesystem \
        params device="/dev/drbd0" directory="/nfs1srv" fstype=ext4
options="noatime,nodiratime" \
        op start interval=0 timeout=60 \
        op stop interval=0 timeout=120 \
        op monitor interval=15s timeout=60s
primitive nfs nfsserver \
        params nfs_init_script="/etc/init.d/nfs-kernel-server"
nfs_shared_infodir="/nfs1srv/nfs_shared" nfs_ip=172.17.1.35 \
        op monitor interval=5s
primitive vip IPaddr2 \
        params ip=172.17.1.35 cidr_netmask=16 nic=bond0 \
        op monitor interval=20s timeout=20s \
        op start interval=0s timeout=20s \
        op stop interval=0s timeout=20s
group HA vip fs_nfs nfs \
        meta target-role=Started
ms ms_drbd_nfs drbd_nfs \
        meta master-max=1 master-node-max=1 clone-max=2 clone-node-max=1
notify=true
order fs-nfs-before-nfs inf: fs_nfs:start nfs:start
order ip-before-ms-drbd-nfs Mandatory: vip:start ms_drbd_nfs:promote
location loc ms_drbd_nfs 100: testnfs30
order ms-drbd-nfs-before-fs-nfs Mandatory: ms_drbd_nfs:promote fs_nfs:start
colocation ms-drbd-nfs-with-ha inf: ms_drbd_nfs:Master HA
property cib-bootstrap-options: \
        have-watchdog=false \
        dc-version=2.0.3-4b1f869f0f \
        cluster-infrastructure=corosync \
        cluster-name=debian \
        no-quorum-policy=ignore \
        stonith-enabled=false

I noticed that this line was added since last time I checked so I removed
it but that didn't help'

location drbd-fence-by-handler-nfs1-ms_drbd_nfs ms_drbd_nfs \
        rule $role=Master -inf: #uname ne testnfs32
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.clusterlabs.org/pipermail/users/attachments/20211114/364587bd/attachment.htm>


More information about the Users mailing list