[ClusterLabs] Why Do All The Services Go Down When Just One Fails?

Eric Robinson eric.robinson at psmnv.com
Sat Feb 16 15:36:25 EST 2019


As a follow-up, here is the whole config.

[root at 001db01a ~]# pcs config
Cluster Name: 001db01ab
Corosync Nodes:
001db01a 001db01b
Pacemaker Nodes:
001db01a 001db01b

Resources:
Resource: p_vip_clust01 (class=ocf provider=heartbeat type=IPaddr2)
  Attributes: cidr_netmask=32 ip=10.51.14.75
  Operations: monitor interval=30s (p_vip_clust01-monitor-interval-30s)
              start interval=0s timeout=20s (p_vip_clust01-start-interval-0s)
              stop interval=0s timeout=20s (p_vip_clust01-stop-interval-0s)
Master: ms_drbd0
  Meta Attrs: master-node-max=1 clone-max=2 notify=true master-max=1 clone-node-max=1
 Resource: p_drbd0 (class=ocf provider=linbit type=drbd)
   Attributes: drbd_resource=ha01_mysql
   Operations: demote interval=0s timeout=90 (p_drbd0-demote-interval-0s)
               monitor interval=60s (p_drbd0-monitor-interval-60s)
               notify interval=0s timeout=90 (p_drbd0-notify-interval-0s)
               promote interval=0s timeout=90 (p_drbd0-promote-interval-0s)
               reload interval=0s timeout=30 (p_drbd0-reload-interval-0s)
               start interval=0s timeout=240 (p_drbd0-start-interval-0s)
               stop interval=0s timeout=100 (p_drbd0-stop-interval-0s)
Master: ms_drbd1
  Meta Attrs: master-node-max=1 clone-max=2 notify=true master-max=1 clone-node-max=1
  Resource: p_drbd1 (class=ocf provider=linbit type=drbd)
   Attributes: drbd_resource=ha02_mysql
   Operations: demote interval=0s timeout=90 (p_drbd1-demote-interval-0s)
               monitor interval=60s (p_drbd1-monitor-interval-60s)
               notify interval=0s timeout=90 (p_drbd1-notify-interval-0s)
               promote interval=0s timeout=90 (p_drbd1-promote-interval-0s)
               reload interval=0s timeout=30 (p_drbd1-reload-interval-0s)
               start interval=0s timeout=240 (p_drbd1-start-interval-0s)
               stop interval=0s timeout=100 (p_drbd1-stop-interval-0s)
Resource: p_fs_clust01 (class=ocf provider=heartbeat type=Filesystem)
  Attributes: device=/dev/drbd0 directory=/ha01_mysql fstype=ext4 options=noatime
  Operations: monitor interval=20 timeout=40 (p_fs_clust01-monitor-interval-20)
              notify interval=0s timeout=60 (p_fs_clust01-notify-interval-0s)
              start interval=0s timeout=60 (p_fs_clust01-start-interval-0s)
              stop interval=0s timeout=60 (p_fs_clust01-stop-interval-0s)
Resource: p_fs_clust02 (class=ocf provider=heartbeat type=Filesystem)
  Attributes: device=/dev/drbd1 directory=/ha02_mysql fstype=ext4 options=noatime
  Operations: monitor interval=20 timeout=40 (p_fs_clust02-monitor-interval-20)
              notify interval=0s timeout=60 (p_fs_clust02-notify-interval-0s)
              start interval=0s timeout=60 (p_fs_clust02-start-interval-0s)
              stop interval=0s timeout=60 (p_fs_clust02-stop-interval-0s)
Resource: p_vip_clust02 (class=ocf provider=heartbeat type=IPaddr2)
  Attributes: cidr_netmask=32 ip=10.51.14.76
  Operations: monitor interval=30s (p_vip_clust02-monitor-interval-30s)
              start interval=0s timeout=20s (p_vip_clust02-start-interval-0s)
              stop interval=0s timeout=20s (p_vip_clust02-stop-interval-0s)
Resource: p_mysql_001 (class=lsb type=mysql_001)
  Operations: force-reload interval=0s timeout=15 (p_mysql_001-force-reload-interval-0s)
              monitor interval=15 timeout=15 (p_mysql_001-monitor-interval-15)
              restart interval=0s timeout=15 (p_mysql_001-restart-interval-0s)
              start interval=0s timeout=15 (p_mysql_001-start-interval-0s)
              stop interval=0s timeout=15 (p_mysql_001-stop-interval-0s)
Resource: p_mysql_000 (class=lsb type=mysql_000)
  Operations: force-reload interval=0s timeout=15 (p_mysql_000-force-reload-interval-0s)
              monitor interval=15 timeout=15 (p_mysql_000-monitor-interval-15)
              restart interval=0s timeout=15 (p_mysql_000-restart-interval-0s)
              start interval=0s timeout=15 (p_mysql_000-start-interval-0s)
              stop interval=0s timeout=15 (p_mysql_000-stop-interval-0s)
Resource: p_mysql_002 (class=lsb type=mysql_002)
  Operations: force-reload interval=0s timeout=15 (p_mysql_002-force-reload-interval-0s)
              monitor interval=15 timeout=15 (p_mysql_002-monitor-interval-15)
              restart interval=0s timeout=15 (p_mysql_002-restart-interval-0s)
              start interval=0s timeout=15 (p_mysql_002-start-interval-0s)
              stop interval=0s timeout=15 (p_mysql_002-stop-interval-0s)
Resource: p_mysql_003 (class=lsb type=mysql_003)
  Operations: force-reload interval=0s timeout=15 (p_mysql_003-force-reload-interval-0s)
              monitor interval=15 timeout=15 (p_mysql_003-monitor-interval-15)
              restart interval=0s timeout=15 (p_mysql_003-restart-interval-0s)
              start interval=0s timeout=15 (p_mysql_003-start-interval-0s)
              stop interval=0s timeout=15 (p_mysql_003-stop-interval-0s)
Resource: p_mysql_004 (class=lsb type=mysql_004)
  Operations: force-reload interval=0s timeout=15 (p_mysql_004-force-reload-interval-0s)
              monitor interval=15 timeout=15 (p_mysql_004-monitor-interval-15)
              restart interval=0s timeout=15 (p_mysql_004-restart-interval-0s)
              start interval=0s timeout=15 (p_mysql_004-start-interval-0s)
              stop interval=0s timeout=15 (p_mysql_004-stop-interval-0s)
Resource: p_mysql_005 (class=lsb type=mysql_005)
  Operations: force-reload interval=0s timeout=15 (p_mysql_005-force-reload-interval-0s)
              monitor interval=15 timeout=15 (p_mysql_005-monitor-interval-15)
              restart interval=0s timeout=15 (p_mysql_005-restart-interval-0s)
              start interval=0s timeout=15 (p_mysql_005-start-interval-0s)
              stop interval=0s timeout=15 (p_mysql_005-stop-interval-0s)
Resource: p_mysql_006 (class=lsb type=mysql_006)
  Operations: force-reload interval=0s timeout=15 (p_mysql_006-force-reload-interval-0s)
              monitor interval=15 timeout=15 (p_mysql_006-monitor-interval-15)
              restart interval=0s timeout=15 (p_mysql_006-restart-interval-0s)
              start interval=0s timeout=15 (p_mysql_006-start-interval-0s)
              stop interval=0s timeout=15 (p_mysql_006-stop-interval-0s)
Resource: p_mysql_007 (class=lsb type=mysql_007)
  Operations: force-reload interval=0s timeout=15 (p_mysql_007-force-reload-interval-0s)
              monitor interval=15 timeout=15 (p_mysql_007-monitor-interval-15)
              restart interval=0s timeout=15 (p_mysql_007-restart-interval-0s)
              start interval=0s timeout=15 (p_mysql_007-start-interval-0s)
             stop interval=0s timeout=15 (p_mysql_007-stop-interval-0s)
Resource: p_mysql_008 (class=lsb type=mysql_008)
  Operations: force-reload interval=0s timeout=15 (p_mysql_008-force-reload-interval-0s)
              monitor interval=15 timeout=15 (p_mysql_008-monitor-interval-15)
              restart interval=0s timeout=15 (p_mysql_008-restart-interval-0s)
              start interval=0s timeout=15 (p_mysql_008-start-interval-0s)
              stop interval=0s timeout=15 (p_mysql_008-stop-interval-0s)
Resource: p_mysql_622 (class=lsb type=mysql_622)
  Operations: force-reload interval=0s timeout=15 (p_mysql_622-force-reload-interval-0s)
              monitor interval=15 timeout=15 (p_mysql_622-monitor-interval-15)
              restart interval=0s timeout=15 (p_mysql_622-restart-interval-0s)
              start interval=0s timeout=15 (p_mysql_622-start-interval-0s)
              stop interval=0s timeout=15 (p_mysql_622-stop-interval-0s)

Stonith Devices:
Fencing Levels:

Location Constraints:
  Resource: p_vip_clust02
    Enabled on: 001db01b (score:INFINITY) (role: Started) (id:cli-prefer-p_vip_clust02)
Ordering Constraints:
  promote ms_drbd0 then start p_fs_clust01 (kind:Mandatory)
  promote ms_drbd1 then start p_fs_clust02 (kind:Mandatory)
  start p_fs_clust01 then start p_vip_clust01 (kind:Mandatory)
  start p_fs_clust02 then start p_vip_clust02 (kind:Mandatory)
  start p_vip_clust01 then start p_mysql_001 (kind:Mandatory)
  start p_vip_clust01 then start p_mysql_002 (kind:Mandatory)
  start p_vip_clust01 then start p_mysql_003 (kind:Mandatory)
  start p_vip_clust01 then start p_mysql_004 (kind:Mandatory)
  start p_vip_clust01 then start p_mysql_005 (kind:Mandatory)
  start p_vip_clust02 then start p_mysql_006 (kind:Mandatory)
  start p_vip_clust02 then start p_mysql_007 (kind:Mandatory)
  start p_vip_clust02 then start p_mysql_008 (kind:Mandatory)
  start p_vip_clust01 then start p_mysql_622 (kind:Mandatory)
Colocation Constraints:
  p_fs_clust01 with ms_drbd0 (score:INFINITY) (with-rsc-role:Master)
  p_fs_clust02 with ms_drbd1 (score:INFINITY) (with-rsc-role:Master)
  p_vip_clust01 with p_fs_clust01 (score:INFINITY)
  p_vip_clust02 with p_fs_clust02 (score:INFINITY)
  p_mysql_001 with p_vip_clust01 (score:INFINITY)
  p_mysql_000 with p_vip_clust01 (score:INFINITY)
  p_mysql_002 with p_vip_clust01 (score:INFINITY)
  p_mysql_003 with p_vip_clust01 (score:INFINITY)
  p_mysql_004 with p_vip_clust01 (score:INFINITY)
  p_mysql_005 with p_vip_clust01 (score:INFINITY)
  p_mysql_006 with p_vip_clust02 (score:INFINITY)
  p_mysql_007 with p_vip_clust02 (score:INFINITY)
  p_mysql_008 with p_vip_clust02 (score:INFINITY)
  p_mysql_622 with p_vip_clust01 (score:INFINITY)
Ticket Constraints:

Alerts:
No alerts defined

Resources Defaults:
resource-stickiness: 100
Operations Defaults:
No defaults set

Cluster Properties:
cluster-infrastructure: corosync
cluster-name: 001db01ab
dc-version: 1.1.18-11.el7_5.3-2b07d5c5a9
have-watchdog: false
last-lrm-refresh: 1550347798
maintenance-mode: false
no-quorum-policy: ignore
stonith-enabled: false

--Eric


From: Users <users-bounces at clusterlabs.org> On Behalf Of Eric Robinson
Sent: Saturday, February 16, 2019 12:34 PM
To: Cluster Labs - All topics related to open-source clustering welcomed <users at clusterlabs.org>
Subject: [ClusterLabs] Why Do All The Services Go Down When Just One Fails?

These are the resources on our cluster.

[root at 001db01a ~]# pcs status
Cluster name: 001db01ab
Stack: corosync
Current DC: 001db01a (version 1.1.18-11.el7_5.3-2b07d5c5a9) - partition with quorum
Last updated: Sat Feb 16 15:24:55 2019
Last change: Sat Feb 16 15:10:21 2019 by root via cibadmin on 001db01b

2 nodes configured
18 resources configured

Online: [ 001db01a 001db01b ]

Full list of resources:

p_vip_clust01  (ocf::heartbeat:IPaddr2):       Started 001db01a
Master/Slave Set: ms_drbd0 [p_drbd0]
     Masters: [ 001db01a ]
     Slaves: [ 001db01b ]
Master/Slave Set: ms_drbd1 [p_drbd1]
     Masters: [ 001db01b ]
     Slaves: [ 001db01a ]
p_fs_clust01   (ocf::heartbeat:Filesystem):    Started 001db01a
p_fs_clust02   (ocf::heartbeat:Filesystem):    Started 001db01b
p_vip_clust02  (ocf::heartbeat:IPaddr2):       Started 001db01b
p_mysql_001    (lsb:mysql_001):        Started 001db01a
p_mysql_000    (lsb:mysql_000):        Started 001db01a
p_mysql_002    (lsb:mysql_002):        Started 001db01a
p_mysql_003    (lsb:mysql_003):        Started 001db01a
p_mysql_004    (lsb:mysql_004):        Started 001db01a
p_mysql_005    (lsb:mysql_005):        Started 001db01a
p_mysql_006    (lsb:mysql_006):        Started 001db01b
p_mysql_007    (lsb:mysql_007):        Started 001db01b
p_mysql_008    (lsb:mysql_008):        Started 001db01b
p_mysql_622    (lsb:mysql_622):        Started 001db01a

Daemon Status:
  corosync: active/enabled
  pacemaker: active/enabled
  pcsd: active/enabled

Why is it that when one of the resources that start with p_mysql_* goes into a FAILED state, all the other MySQL services also stop?

[root at 001db01a ~]# pcs constraint
Location Constraints:
  Resource: p_vip_clust02
    Enabled on: 001db01b (score:INFINITY) (role: Started)
Ordering Constraints:
  promote ms_drbd0 then start p_fs_clust01 (kind:Mandatory)
  promote ms_drbd1 then start p_fs_clust02 (kind:Mandatory)
  start p_fs_clust01 then start p_vip_clust01 (kind:Mandatory)
  start p_fs_clust02 then start p_vip_clust02 (kind:Mandatory)
  start p_vip_clust01 then start p_mysql_001 (kind:Mandatory)
  start p_vip_clust01 then start p_mysql_002 (kind:Mandatory)
  start p_vip_clust01 then start p_mysql_003 (kind:Mandatory)
  start p_vip_clust01 then start p_mysql_004 (kind:Mandatory)
  start p_vip_clust01 then start p_mysql_005 (kind:Mandatory)
  start p_vip_clust02 then start p_mysql_006 (kind:Mandatory)
  start p_vip_clust02 then start p_mysql_007 (kind:Mandatory)
  start p_vip_clust02 then start p_mysql_008 (kind:Mandatory)
  start p_vip_clust01 then start p_mysql_622 (kind:Mandatory)
Colocation Constraints:
  p_fs_clust01 with ms_drbd0 (score:INFINITY) (with-rsc-role:Master)
  p_fs_clust02 with ms_drbd1 (score:INFINITY) (with-rsc-role:Master)
  p_vip_clust01 with p_fs_clust01 (score:INFINITY)
  p_vip_clust02 with p_fs_clust02 (score:INFINITY)
  p_mysql_001 with p_vip_clust01 (score:INFINITY)
  p_mysql_000 with p_vip_clust01 (score:INFINITY)
  p_mysql_002 with p_vip_clust01 (score:INFINITY)
  p_mysql_003 with p_vip_clust01 (score:INFINITY)
  p_mysql_004 with p_vip_clust01 (score:INFINITY)
  p_mysql_005 with p_vip_clust01 (score:INFINITY)
  p_mysql_006 with p_vip_clust02 (score:INFINITY)
  p_mysql_007 with p_vip_clust02 (score:INFINITY)
  p_mysql_008 with p_vip_clust02 (score:INFINITY)
  p_mysql_622 with p_vip_clust01 (score:INFINITY)
Ticket Constraints:

--Eric





-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.clusterlabs.org/pipermail/users/attachments/20190216/2570d21d/attachment-0002.html>


More information about the Users mailing list