[Pacemaker] crmd exits and restarts after failback

Simon Jansen simon.jansen1 at googlemail.com
Wed Dec 8 05:58:03 EST 2010


Hi,

I have set up a pacemaker cluster on Ubuntu 10.04 LTS Server.
Further I wrote an multistate OCF RA for the Rsyslog service. This RA passes
all tests that are run by the ocf-tester tool.

Now the problem:
When I firstly start the msSyslog resource it promotes on node1 and is fully
functional. After that I set node1 to standby. The other node (node2) takes
the master role. This behaviour is just as expected. Then I set node1 to
online again to test if the failback works. There the error occurs: the crmd
exits and starts again. These actions occur in an endless loop and I can
just reboot both nodes several times to come in a functional state again.
I attached a summary of the log file so that you can see what's happening
exactly. In addition I attached the Rsyslog RA and the cluster config.

Maybe someone has a clue why the crmd is restarting all the time after the
failback. I think that there is an error in the Rsyslog RA because the
cluster works fine when I stop the Rsyslog resource manually.

-- 


Regards,

Simon Jansen


---------------------------
Simon Jansen
64291 Darmstadt
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.clusterlabs.org/pipermail/pacemaker/attachments/20101208/444056a8/attachment.html>
-------------- next part --------------
node node1 \
	attributes standby="off"
node node2 \
	attributes standby="off"
primitive resApache ocf:heartbeat:apache \
	params configfile="/mnt/DRBD/drbd0/apache/cnf/apache2.conf" \
	op monitor interval="10" timeout="20" \
	op start interval="0" timeout="40" \
	op stop interval="0" timeout="60" \
	meta target-role="Started"
primitive resDHCP ocf:T-Systems:dhcp3 \
	params config="/mnt/DRBD/drbd1/dhcp/cnf/dhcpd.conf" leases="/mnt/DRBD/drbd1/dhcp/data/dhcpd.leases" \
	op monitor interval="10s" timeout="30s" \
	op start interval="0" timeout="120s" \
	op stop interval="0" timeout="120s" \
	meta target-role="Started"
primitive resDRBD0 ocf:linbit:drbd \
	params drbd_resource="drbd0" \
	op monitor interval="60s" role="Master" timeout="120s" \
	op monitor interval="59s" \
	op start interval="0" timeout="240s" \
	op stop interval="0" timeout="100s"
primitive resDRBD1 ocf:linbit:drbd \
	params drbd_resource="drbd1" \
	op monitor interval="60s" role="Master" timeout="120s" \
	op monitor interval="59s" \
	op start interval="0" timeout="240s" \
	op stop interval="0" timeout="100s"
primitive resFSys0 ocf:heartbeat:Filesystem \
	params device="/dev/drbd0" fstype="ext4" directory="/mnt/DRBD/drbd0" \
	op monitor interval="20s" timeout="40s" \
	op start interval="0" timeout="60s" \
	op stop interval="0" timeout="60s" \
	meta target-role="Started"
primitive resFSys1 ocf:heartbeat:Filesystem \
	params device="/dev/drbd1" fstype="ext4" directory="/mnt/DRBD/drbd1" \
	op monitor interval="20s" timeout="40s" \
	op start interval="0" timeout="60s" \
	op stop interval="0" timeout="60s" \
	meta target-role="Started"
primitive resIP0 ocf:heartbeat:IPaddr2 \
	params ip="10.32.194.246" nic="eth0" cidr_netmask="24" iflabel="0" \
	op monitor interval="10s" \
	meta target-role="Started"
primitive resIP1 ocf:heartbeat:IPaddr2 \
	params ip="10.32.194.247" nic="eth0" cidr_netmask="24" iflabel="1" \
	op monitor interval="10s" \
	meta target-role="Started"
primitive resIPVM ocf:heartbeat:IPaddr2 \
	params ip="192.168.200.30" nic="eth2" cidr_netmask="24" \
	op monitor interval="10s"
primitive resMySQL ocf:heartbeat:mysql \
	params binary="/usr/bin/mysqld_safe" pid="/var/run/mysqld/mysqld.pid" socket="/var/run/mysqld/mysqld.sock" datadir="/mnt/DRBD/drbd0/mysql/data" config="/mnt/DRBD/drbd0/mysql/cnf/my.cnf" \
	op monitor interval="10s" timeout="30s" \
	op start interval="0" timeout="120s" \
	op stop interval="0" timeout="120s" \
	meta target-role="Started"
primitive resNagios lsb:nagios3 \
	op monitor interval="15" timeout="20s" \
	op start interval="0" timeout="120s" \
	op stop interval="0" timeout="120s"
primitive resSendmail lsb:sendmail \
	op monitor interval="20s" timeout="60s" \
	op start interval="0" timeout="120s" \
	op stop interval="0" timeout="120s"
primitive resSquid ocf:heartbeat:Squid \
	params squid_exe="/usr/sbin/squid" squid_pidfile="/var/run/squid.pid" squid_conf="/mnt/DRBD/drbd1/squid/cnf/squid.conf" squid_port="3128" \
	op monitor interval="10s" timeout="30s" \
	op start interval="0" timeout="60s" \
	op stop interval="0" timeout="120s"
primitive resSyslog ocf:T-Systems:Rsyslog \
	params master_config="/mnt/DRBD/drbd0/Rsyslog/cnf/rsyslog_master.conf" slave_config="/etc/rsyslog.conf" \
	op monitor interval="10s" role="Master" timeout="30s" \
	op monitor interval="11s" timeout="33s" \
	op start interval="0" timeout="120s" \
	op stop interval="0" timeout="120s"
primitive resVMPS ocf:T-Systems:OpenVMPS \
	params vlan_db="/mnt/DRBD/drbd1/OpenVMPS/data/vlan.db" \
	op monitor interval="10s" timeout="30s" \
	op start interval="0" timeout="120s" \
	op stop interval="0" timeout="120s"
primitive resVPN ocf:T-Systems:OpenVPN \
	params config="/mnt/DRBD/drbd0/OpenVPN/cnf/openvpn.conf" \
	op monitor interval="10s" timeout="30s" \
	op start interval="0" timeout="120s" \
	op stop interval="0" timeout="120s"
group groupIPVPN resIPVM resVPN
group groupNagiosApache resApache resNagios
ms msDRBD0 resDRBD0 \
	meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" globally-unique="false" target-role="Started"
ms msDRBD1 resDRBD1 \
	meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" globally-unique="false" target-role="Started"
ms msSyslog resSyslog \
	meta target-role="Started"
location locDRBD0Node1 msDRBD0 \
	rule $id="locDRBD0Node1-rule" $role="Master" 1000: #uname eq node1
location locDRBD1Node2 msDRBD1 \
	rule $id="locDRBD1Node2-rule" $role="Master" 1000: #uname eq node2
location locIP0Node1 resIP0 \
	rule $id="locIP0Node1-rule" 1000: #uname eq node1
location locIP1Node2 resIP1 \
	rule $id="locIP1Node2-rule" 1000: #uname eq node2
colocation colDRBD0FSys0 inf: resFSys0 msDRBD0:Master
colocation colDRBD1FSys1 inf: resFSys1 msDRBD1:Master
colocation colFSys0Apache inf: groupNagiosApache resFSys0
colocation colFSys0MySQL inf: resMySQL resFSys0
colocation colFSys0Syslog inf: msSyslog:Master resFSys0
colocation colFSys0VPN inf: groupIPVPN resFSys0
colocation colFSys1DHCP inf: resDHCP resFSys1
colocation colFSys1Sendmail inf: resSendmail resFSys1
colocation colFSys1Squid inf: resSquid resFSys1
colocation colFSys1VMPS inf: resVMPS resFSys1
colocation colIP0Apache inf: groupNagiosApache resIP0
colocation colIP0MySQL inf: resMySQL resIP0
colocation colIP0Syslog inf: msSyslog:Master resIP0
colocation colIP0VPN inf: groupIPVPN resIP0
colocation colIP1Sendmail inf: resSendmail resIP1
colocation colIP1Squid inf: resSquid resIP1
colocation colIP1VMPS inf: resVMPS resIP1
colocation colVPNSyslog 1000: msSyslog:Master groupIPVPN
order orderDRBD0FSys0 inf: msDRBD0:promote resFSys0:start
order orderDRBD1FSys1 inf: msDRBD1:promote resFSys1:start
order orderFSys0Apache inf: resFSys0 groupNagiosApache
order orderFSys0MySQL inf: resFSys0 resMySQL
order orderFSys0Syslog inf: resFSys0 msSyslog:promote
order orderFSys0VPN inf: resFSys0 groupIPVPN
order orderFSys1DHCP inf: resFSys1 resDHCP
order orderFSys1Sendmail inf: resFSys1 resSendmail
order orderFSys1Squid inf: resFSys1 resSquid
order orderFSys1VMPS inf: resFSys1 resVMPS
order orderIP0Apache inf: resIP0 groupNagiosApache
order orderIP0MySQL inf: resIP0 resMySQL
order orderIP0Syslog inf: resIP0 msSyslog:promote
order orderIP0VPN inf: resIP0 groupIPVPN
order orderIP1Sendmail inf: resIP1 resSendmail
order orderIP1Squid inf: resIP1 resSquid
order orderIP1VMPS inf: resIP1 resVMPS
order orderVPNSyslog inf: groupIPVPN msSyslog:promote
property $id="cib-bootstrap-options" \
	dc-version="1.0.8-042548a451fce8400660f6031f4da6f0223dd5dd" \
	cluster-infrastructure="openais" \
	expected-quorum-votes="2" \
	stonith-enabled="false" \
	no-quorum-policy="ignore" \
	last-lrm-refresh="1291802109"
-------------- next part --------------
A non-text attachment was scrubbed...
Name: crmd_restart.log
Type: application/octet-stream
Size: 22534 bytes
Desc: not available
URL: <http://lists.clusterlabs.org/pipermail/pacemaker/attachments/20101208/444056a8/attachment-0004.obj>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: Rsyslog
Type: application/octet-stream
Size: 16395 bytes
Desc: not available
URL: <http://lists.clusterlabs.org/pipermail/pacemaker/attachments/20101208/444056a8/attachment-0005.obj>


More information about the Pacemaker mailing list