[Pacemaker] Strange failover behaviour
Hans Lammerts
j.lammerts at chello.nl
Tue Nov 29 18:14:22 UTC 2011
Hi there,
I have something strange I would like the community to give it’s opinion on. I can’t figure out
what is going wrong.
I have a 2 node cluster (named cl1 and cl2). On this cluster I’m running MySQL, Apache, and
Zarafa. Both clusters run CentOS 6.
I have downloaded all latest sources for DRBD, Cluster Glue, Resource Agents, Heartbeat
and Pacemaker and compiled them. Everything seems to be OK.
I believe my Pacemaker setup to be OK, but I may be mistaken. Will attach the config below.
What I experience when I do a failover from cl1 to cl2 is that MySQL and Zarafa failover without
any problems, but httpd seems to be getting in a loop of starting and stopping.
The error that is displayed is this :
apache2_monitor_10000 (node=cl2, call=502, rc=7, status=complete): not running
If I remember to set the failcount of the apache2 resource to 0, httpd will eventually start after
quite a number of retries :
[root at cl2 httpd]# crm resource failcount apache2 show cl2
scope=status name=fail-count-apache2 value=69
If I forget to reset the failcount (something you should not need to do), the failcount will reach
infinity at some time in the future, and httpd won’t start. The number of times Pacemaker
retries Is also different every time.
Wait, it gets stranger…
Putting cl1 online again, the fallback is initiated, and this goes without any problems. So, it looks
like the problems reside only on the second cluster half. The hardware of cl2 is different from cl1, and
it is the slower machine of the two.
Yes, I made very sure every configuration file is the same on both nodes.
And yes, I made sure the server-status section in httpd.conf is uncommented, as is the
ExtendedStatus directive. Doing a wget -O - http://localhost/server-status?auto works
perfectly.
Can anyone please tell me what the problem could be here ?
Thanks.
Versioninfo:
CentOS 6.0
DRBD 8.4.0
Glue 1.0.8
Resource agents 3.9.2
Heartbeat 3.0.5
Pacemaker 1.0.11
Pacemaker config:
node $id="62b94e0a-532f-4f99-acdb-57d6052a5635" cl1 \
attributes standby="on"
node $id="7444dfb4-2c9b-4130-83c4-c0cd3d7ec006" cl2 \
attributes standby="off"
primitive apache2 lsb:httpd \
op monitor interval="10" timeout="30" \
op start interval="0" timeout="120" \
op stop interval="0" timeout="120" \
meta target-role="Started"
primitive drbd_http ocf:linbit:drbd \
params drbd_resource="http" \
op start interval="0" timeout="240" \
op stop interval="0" timeout="100" \
op monitor interval="59s" role="Master" timeout="30s" \
op monitor interval="60s" role="Slave" timeout="30s"
primitive drbd_mysql ocf:linbit:drbd \
params drbd_resource="mysql" \
op start interval="0" timeout="240" \
op stop interval="0" timeout="100" \
op monitor interval="59s" role="Master" timeout="30s" \
op monitor interval="60s" role="Slave" timeout="30s"
primitive drbd_zarafa ocf:linbit:drbd \
params drbd_resource="zarafa" \
op start interval="0" timeout="240" \
op stop interval="0" timeout="100" \
op monitor interval="59s" role="Master" timeout="30s" \
op monitor interval="60s" role="Slave" timeout="30s"
primitive http_fs ocf:heartbeat:Filesystem \
params device="/dev/drbd1" directory="/var/www/html" fstype="ext4" options="noatime" \
op monitor interval="30s"
primitive http_ip ocf:heartbeat:IPaddr2 \
params ip="192.168.2.50" cidr_netmask="24" nic="eth0" \
op monitor interval="30s"
primitive mysql_fs ocf:heartbeat:Filesystem \
params device="/dev/drbd0" directory="/var/lib/mysql" fstype="ext4" options="noatime" \
op monitor interval="30s"
primitive mysql_ip ocf:heartbeat:IPaddr2 \
params ip="192.168.2.30" cidr_netmask="24" nic="eth0" \
op monitor interval="30s"
primitive mysqld lsb:mysqld \
op monitor interval="10" timeout="30" \
op start interval="0" timeout="120" \
op stop interval="0" timeout="120"
primitive zarafa-dagent lsb:zarafa-dagent \
op monitor interval="10" timeout="30" \
op start interval="0" timeout="120" \
op stop interval="0" timeout="120"
primitive zarafa-gateway lsb:zarafa-gateway \
op monitor interval="10" timeout="30" \
op start interval="0" timeout="120" \
op stop interval="0" timeout="120"
primitive zarafa-ical lsb:zarafa-ical \
op monitor interval="10" timeout="30" \
op start interval="0" timeout="120" \
op stop interval="0" timeout="120"
primitive zarafa-licensed lsb:zarafa-licensed \
op monitor interval="10" timeout="30" \
op start interval="0" timeout="120" \
op stop interval="0" timeout="120"
primitive zarafa-monitor lsb:zarafa-monitor \
op monitor interval="10" timeout="30" \
op start interval="0" timeout="120" \
op stop interval="0" timeout="120"
primitive zarafa-server lsb:zarafa-server \
op monitor interval="10" timeout="30" \
op start interval="0" timeout="120" \
op stop interval="0" timeout="120"
primitive zarafa-spooler lsb:zarafa-spooler \
op monitor interval="10" timeout="30" \
op start interval="0" timeout="120" \
op stop interval="0" timeout="120"
primitive zarafa_fs ocf:heartbeat:Filesystem \
params device="/dev/drbd2" directory="/var/lib/zarafa" fstype="ext4" options="noatime" \
op monitor interval="30s"
primitive zarafa_ip ocf:heartbeat:IPaddr2 \
params ip="192.168.2.40" cidr_netmask="24" nic="eth0" \
op monitor interval="30s"
group HTTP http_fs http_ip apache2 \
meta target-role="Started"
group MYSQL mysql_fs mysql_ip mysqld \
meta target-role="Started"
group ZARAFA zarafa_fs zarafa_ip zarafa-server zarafa-spooler zarafa-dagent zarafa-licensed zarafa-monitor zarafa-gateway zarafa-ical \
meta target-role="Started"
ms ms_drbd_http drbd_http \
meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
ms ms_drbd_mysql drbd_mysql \
meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
ms ms_drbd_zarafa drbd_zarafa \
meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
location cli-prefer-HTTP HTTP \
rule $id="cli-prefer-rule-HTTP" inf: #uname eq cl1
location cli-prefer-MYSQL MYSQL \
rule $id="cli-prefer-rule-MYSQL" inf: #uname eq cl1
location cli-prefer-ZARAFA ZARAFA \
rule $id="cli-prefer-rule-ZARAFA" inf: #uname eq cl1
colocation http_on_drbd inf: HTTP ms_drbd_http:Master
colocation mysql_on_drbd inf: MYSQL ms_drbd_mysql:Master
colocation zarafa_on_drbd inf: ZARAFA ms_drbd_zarafa:Master
order http_after_drbd inf: ms_drbd_http:promote HTTP:start
order mysql_after_drbd inf: ms_drbd_mysql:promote MYSQL:start
order zarafa_after_drbd inf: ms_drbd_zarafa:promote ZARAFA:start
order zarafa_after_mysql inf: MYSQL:start ZARAFA:start
property $id="cib-bootstrap-options" \
dc-version="1.0.11-9af47ddebcad19e35a61b2a20301dc038018e8e8" \
cluster-infrastructure="Heartbeat" \
stonith-enabled="false" \
no-quorum-policy="ignore"
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.clusterlabs.org/pipermail/pacemaker/attachments/20111129/56466f70/attachment-0003.html>
More information about the Pacemaker
mailing list