Hi,<br><br>I have set up a pacemaker cluster on Ubuntu 10.04 LTS Server.<br>Further I wrote an multistate OCF RA for the Rsyslog service. This RA passes all tests that are run by the ocf-tester tool. <br><br>Now the problem:<br>


When I firstly start the msSyslog resource it promotes on node1 and is 

fully functional. After that I set node1 to standby. The other node 

(node2) takes the master role. This behaviour is just as expected. Then I

 set node1 to online again to test if the failback works. There the 

error occurs: the crmd exits and starts again. These actions occur in an

 endless loop and I can just reboot both nodes several times to come in a

 functional state again. <br>

I attached a summary of the log file so that you can see what's 

happening exactly.<br>The cluster is configured as follows:<br>node node1 \<br>    attributes standby="off"<br>node node2 \<br>    attributes standby="off"<br>primitive resApache ocf:heartbeat:apache \<br>

    params configfile="/mnt/DRBD/drbd0/apache/cnf/apache2.conf" \<br>    op monitor interval="10" timeout="20" \<br>    op start interval="0" timeout="40" \<br>    op stop interval="0" timeout="60" \<br>

    meta target-role="Started"<br>primitive resDHCP ocf:T-Systems:dhcp3 \<br>    params config="/mnt/DRBD/drbd1/dhcp/cnf/dhcpd.conf" leases="/mnt/DRBD/drbd1/dhcp/data/dhcpd.leases" \<br>    op monitor interval="10s" timeout="30s" \<br>

    op start interval="0" timeout="120s" \<br>    op stop interval="0" timeout="120s" \<br>    meta target-role="Started"<br>primitive resDRBD0 ocf:linbit:drbd \<br>    params drbd_resource="drbd0" \<br>

    op monitor interval="60s" role="Master" timeout="120s" \<br>    op monitor interval="59s" \<br>    op start interval="0" timeout="240s" \<br>    op stop interval="0" timeout="100s"<br>

primitive resDRBD1 ocf:linbit:drbd \<br>    params drbd_resource="drbd1" \<br>    op monitor interval="60s" role="Master" timeout="120s" \<br>    op monitor interval="59s" <br>

    op start interval="0" timeout="240s" \<br>    op stop interval="0" timeout="100s"<br>primitive resFSys0 ocf:heartbeat:Filesystem \<br>    params device="/dev/drbd0" fstype="ext4" directory="/mnt/DRBD/drbd0" \<br>

    op monitor interval="20s" timeout="40s" \<br>    op start interval="0" timeout="60s" \<br>    op stop interval="0" timeout="60s" \<br>    meta target-role="Started"<br>

primitive resFSys1 ocf:heartbeat:Filesystem \<br>    params device="/dev/drbd1" fstype="ext4" directory="/mnt/DRBD/drbd1" \<br>    op monitor interval="20s" timeout="40s" \<br>

    op start interval="0" timeout="60s" \<br>    op stop interval="0" timeout="60s" \<br>    meta target-role="Started"<br>primitive resIP0 ocf:heartbeat:IPaddr2 \<br>    params ip="10.32.194.246" nic="eth0" cidr_netmask="24" iflabel="0" \<br>

    op monitor interval="10s" \<br>    meta target-role="Started"<br>primitive resIP1 ocf:heartbeat:IPaddr2 \<br>    params ip="10.32.194.247" nic="eth0" cidr_netmask="24" iflabel="1" \<br>

    op monitor interval="10s" \<br>    meta target-role="Started"<br>primitive resIPVM ocf:heartbeat:IPaddr2 \<br>    params ip="192.168.200.30" nic="eth2" cidr_netmask="24" \<br>

    op monitor interval="10s"<br>primitive resMySQL ocf:heartbeat:mysql \<br>    params binary="/usr/bin/mysqld_safe" pid="/var/run/mysqld/mysqld.pid" socket="/var/run/mysqld/mysqld.sock" datadir="/mnt/DRBD/drbd0/mysql/data" config="/mnt/DRBD/drbd0/mysql/cnf/my.cnf" \<br>

    op monitor interval="10s" timeout="30s" \<br>    op start interval="0" timeout="120s" \<br>    op stop interval="0" timeout="120s" \<br>    meta target-role="Started"<br>

primitive resNagios lsb:nagios3 \<br>    op monitor interval="15" timeout="20s" \<br>    op start interval="0" timeout="120s" \<br>    op stop interval="0" timeout="120s"<br>

primitive resSendmail lsb:sendmail \<br>    op monitor interval="20s" timeout="60s" \<br>    op start interval="0" timeout="120s" \<br>    op stop interval="0" timeout="120s"<br>

primitive resSquid ocf:heartbeat:Squid \<br>    params squid_exe="/usr/sbin/squid" squid_pidfile="/var/run/squid.pid" squid_conf="/mnt/DRBD/drbd1/squid/cnf/squid.conf" squid_port="3128" \<br>

    op monitor interval="10s" timeout="30s" \<br>    op start interval="0" timeout="60s" \<br>    op stop interval="0" timeout="120s"<br>primitive resSyslog ocf:T-Systems:Rsyslog \<br>

    params master_config="/mnt/DRBD/drbd0/Rsyslog/cnf/rsyslog_master.conf" slave_config="/etc/rsyslog.conf" \<br>    op monitor interval="10s" role="Master" timeout="30s" \<br>

    op monitor interval="11s" timeout="33s" \<br>    op start interval="0" timeout="120s" \<br>    op stop interval="0" timeout="120s"<br>primitive resVMPS ocf:T-Systems:OpenVMPS \<br>

    params vlan_db="/mnt/DRBD/drbd1/OpenVMPS/data/vlan.db" \<br>    op monitor interval="10s" timeout="30s" \<br>    op start interval="0" timeout="120s" \<br>    op stop interval="0" timeout="120s"<br>

primitive resVPN ocf:T-Systems:OpenVPN \<br>    params config="/mnt/DRBD/drbd0/OpenVPN/cnf/openvpn.conf" \<br>    op monitor interval="10s" timeout="30s" \<br>    op start interval="0" timeout="120s" \<br>

    op stop interval="0" timeout="120s"<br>group groupIPVPN resIPVM resVPN<br>group groupNagiosApache resApache resNagios<br>ms msDRBD0 resDRBD0 \<br>    meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" globally-unique="false" target-role="Started"<br>

ms msDRBD1 resDRBD1 \<br>    meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" globally-unique="false" target-role="Started"<br>

ms msSyslog resSyslog \<br>    meta target-role="Started"<br>location locDRBD0Node1 msDRBD0 \<br>    rule $id="locDRBD0Node1-rule" $role="Master" 1000: #uname eq node1<br>location locDRBD1Node2 msDRBD1 \<br>

    rule $id="locDRBD1Node2-rule" $role="Master" 1000: #uname eq node2<br>location locIP0Node1 resIP0 \<br>    rule $id="locIP0Node1-rule" 1000: #uname eq node1<br>location locIP1Node2 resIP1 \<br>

    rule $id="locIP1Node2-rule" 1000: #uname eq node2<br>colocation colDRBD0FSys0 inf: resFSys0 msDRBD0:Master<br>colocation colDRBD1FSys1 inf: resFSys1 msDRBD1:Master<br>colocation colFSys0Apache inf: groupNagiosApache resFSys0<br>

colocation colFSys0MySQL inf: resMySQL resFSys0<br>colocation colFSys0Syslog inf: msSyslog:Master resFSys0<br>colocation colFSys0VPN inf: groupIPVPN resFSys0<br>colocation colFSys1DHCP inf: resDHCP resFSys1<br>colocation colFSys1Sendmail inf: resSendmail resFSys1<br>

colocation colFSys1Squid inf: resSquid resFSys1<br>colocation colFSys1VMPS inf: resVMPS resFSys1<br>colocation colIP0Apache inf: groupNagiosApache resIP0<br>colocation colIP0MySQL inf: resMySQL resIP0<br>colocation colIP0Syslog inf: msSyslog:Master resIP0<br>

colocation colIP0VPN inf: groupIPVPN resIP0<br>colocation colIP1Sendmail inf: resSendmail resIP1<br>colocation colIP1Squid inf: resSquid resIP1<br>colocation colIP1VMPS inf: resVMPS resIP1<br>colocation colVPNSyslog 1000: msSyslog:Master groupIPVPN<br>

order orderDRBD0FSys0 inf: msDRBD0:promote resFSys0:start<br>order orderDRBD1FSys1 inf: msDRBD1:promote resFSys1:start<br>order orderFSys0Apache inf: resFSys0 groupNagiosApache<br>order orderFSys0MySQL inf: resFSys0 resMySQL<br>

order orderFSys0Syslog inf: resFSys0 msSyslog:promote<br>order orderFSys0VPN inf: resFSys0 groupIPVPN<br>order orderFSys1DHCP inf: resFSys1 resDHCP<br>order orderFSys1Sendmail inf: resFSys1 resSendmail<br>order orderFSys1Squid inf: resFSys1 resSquid<br>

order orderFSys1VMPS inf: resFSys1 resVMPS<br>order orderIP0Apache inf: resIP0 groupNagiosApache<br>order orderIP0MySQL inf: resIP0 resMySQL<br>order orderIP0Syslog inf: resIP0 msSyslog:promote<br>order orderIP0VPN inf: resIP0 groupIPVPN<br>

order orderIP1Sendmail inf: resIP1 resSendmail<br>order orderIP1Squid inf: resIP1 resSquid<br>order orderIP1VMPS inf: resIP1 resVMPS<br>order orderVPNSyslog inf: groupIPVPN msSyslog:promote<br>property $id="cib-bootstrap-options" \<br>

    dc-version="1.0.8-042548a451fce8400660f6031f4da6f0223dd5dd" \<br>    cluster-infrastructure="openais" \<br>    expected-quorum-votes="2" \<br>    stonith-enabled="false" \<br>    no-quorum-policy="ignore" \<br>

    last-lrm-refresh="1291802109"<br><br><br>Maybe someone has a clue why the crmd is restarting all 

the time after the failback. <br><br>Thank you for your help.<br clear="all">

<br>-- <br><br><br>Regards,<br><br>Simon Jansen<br><br><br>---------------------------<br><font color="#888888">Simon Jansen<br>64291 Darmstadt</font><br>