[Pacemaker] Being fenced node is killed again and again even the connection is recovered!

Javen Wu wu.javen at gmail.com
Fri May 14 10:02:15 UTC 2010


I forget mention the version I used.
I used SLES11-SP1-HAE Beta5
Pacemaker 1.0.7
Corosync 1.2.0
Cluster Glue 1.0.3


2010/5/14 Javen Wu <wu.javen at gmail.com>

> Hi Folks,
>
> I setup a three nodes cluster with SBD STONITH configured.
> After I manually isolate one node by running "ifconfig eth1 down" on the
> node. The node is fenced as expected.
> But after reboot, even the network is recovered, the node is killed again
> once I start openais&pacemaker.
> I saw the state of the node become from OFFLINE to ONLINE from `crm_mon -n`
> before being killed. And I saw SBD slot from reset->clear->reset.
>
> I attached the syslog and corosync log.
> And my CIB configuration is very simple.
>
> Could you help me check what's the problem? In my mind, it's not expected
> behaviour.
>
> ===%<====CIB information=====================
>
> <cib validate-with="pacemaker-1.0" crm_feature_set="3.0.1" have-quorum="1"
> admin_epoch="0" epoch="349" num_updates="99" cib-last-written="Fri May 14
> 14:50:21 2010" dc-uuid="vm209">
>   <configuration>
>     <crm_config>
>       <cluster_property_set id="cib-bootstrap-options">
>         <nvpair id="cib-bootstrap-options-dc-version" name="dc-version"
> value="1.1.1-530add2a3721a0ecccb24660a97dbfdaa3e68f51"/>
>         <nvpair id="cib-bootstrap-options-cluster-infrastructure"
> name="cluster-infrastructure" value="openais"/>
>         <nvpair id="cib-bootstrap-options-expected-quorum-votes"
> name="expected-quorum-votes" value="3"/>
>       </cluster_property_set>
>     </crm_config>
>     <nodes>
>       <node id="vm208" uname="vm208" type="normal"/>
>       <node id="vm209" uname="vm209" type="normal"/>
>       <node id="vm210" uname="vm210" type="normal"/>
>     </nodes>
>     <resources>
>       <clone id="Fencing">
>         <primitive class="stonith" id="sbd-fencing" type="external/sbd">
>           <instance_attributes id="sbd-fencing-instance_attributes">
>             <nvpair id="sbd-fencing-instance_attributes-sbd_device"
> name="sbd_device" value="/dev/sdc"/>
>           </instance_attributes>
>           <operations>
>             <op id="sbd-fencing-monitor-20s" interval="20s"
> name="monitor"/>
>           </operations>
>         </primitive>
>       </clone>
>     </resources>
>     <constraints/>
>     <rsc_defaults/>
>     <op_defaults/>
>   </configuration>
>   <status>
>     <node_state id="vm209" uname="vm209" ha="active" in_ccm="true"
> crmd="online" join="member" expected="member"
> crm-debug-origin="post_cache_update" shutdown="0">
>       <transient_attributes id="vm209">
>         <instance_attributes id="status-vm209">
>           <nvpair id="status-vm209-probe_complete" name="probe_complete"
> value="true"/>
>         </instance_attributes>
>       </transient_attributes>
>       <lrm id="vm209">
>         <lrm_resources>
>           <lrm_resource id="sbd-fencing:0" type="external/sbd"
> class="stonith">
>             <lrm_rsc_op id="sbd-fencing:0_monitor_0" operation="monitor"
> crm-debug-origin="build_active_RAs" crm_feature_set="3.0.1"
> transition-key="4:1:7:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
> transition-magic="0:7;4:1:7:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
> call-id="2" rc-code="7" op-status="0" interval="0" last-run="1273820137"
> last-rc-change="1273820137" exec-time="60" queue-time="0"
> op-digest="4c3fd39434577fbb6540606d808ed050"/>
>             <lrm_rsc_op id="sbd-fencing:0_start_0" operation="start"
> crm-debug-origin="build_active_RAs" crm_feature_set="3.0.1"
> transition-key="5:1:0:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
> transition-magic="0:0;5:1:0:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
> call-id="3" rc-code="0" op-status="0" interval="0" last-run="1273820137"
> last-rc-change="1273820137" exec-time="10" queue-time="0"
> op-digest="4c3fd39434577fbb6540606d808ed050"/>
>             <lrm_rsc_op id="sbd-fencing:0_monitor_20000"
> operation="monitor" crm-debug-origin="build_active_RAs"
> crm_feature_set="3.0.1"
> transition-key="6:2:0:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
> transition-magic="0:0;6:2:0:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
> call-id="4" rc-code="0" op-status="0" interval="20000" last-run="1273822956"
> last-rc-change="1273820137" exec-time="1170" queue-time="0"
> op-digest="4029bbaef749649e82d602afb46dd872"/>
>           </lrm_resource>
>         </lrm_resources>
>       </lrm>
>     </node_state>
>     <node_state id="vm208" uname="vm208" ha="dead" in_ccm="false"
> crmd="offline" crm-debug-origin="send_stonith_update" join="down"
> expected="down" shutdown="0"/>
>     <node_state id="vm210" uname="vm210" ha="active" in_ccm="true"
> crmd="online" crm-debug-origin="post_cache_update" join="member"
> expected="member" shutdown="0">
>       <transient_attributes id="vm210">
>         <instance_attributes id="status-vm210">
>           <nvpair id="status-vm210-probe_complete" name="probe_complete"
> value="true"/>
>         </instance_attributes>
>       </transient_attributes>
>       <lrm id="vm210">
>         <lrm_resources>
>           <lrm_resource id="sbd-fencing:2" type="external/sbd"
> class="stonith">
>             <lrm_rsc_op id="sbd-fencing:2_monitor_0" operation="monitor"
> crm-debug-origin="build_active_RAs" crm_feature_set="3.0.1"
> transition-key="8:5:7:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
> transition-magic="0:7;8:5:7:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
> call-id="2" rc-code="7" op-status="0" interval="0" last-run="1273820388"
> last-rc-change="1273820388" exec-time="20" queue-time="0"
> op-digest="4c3fd39434577fbb6540606d808ed050"/>
>             <lrm_rsc_op id="sbd-fencing:2_start_0" operation="start"
> crm-debug-origin="build_active_RAs" crm_feature_set="3.0.1"
> transition-key="13:5:0:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
> transition-magic="0:0;13:5:0:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
> call-id="3" rc-code="0" op-status="0" interval="0" last-run="1273820388"
> last-rc-change="1273820388" exec-time="10" queue-time="0"
> op-digest="4c3fd39434577fbb6540606d808ed050"/>
>             <lrm_rsc_op id="sbd-fencing:2_monitor_20000"
> operation="monitor" crm-debug-origin="build_active_RAs"
> crm_feature_set="3.0.1"
> transition-key="14:5:0:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
> transition-magic="0:0;14:5:0:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
> call-id="4" rc-code="0" op-status="0" interval="20000" last-run="1273822976"
> last-rc-change="1273820389" exec-time="1040" queue-time="0"
> op-digest="4029bbaef749649e82d602afb46dd872"/>
>           </lrm_resource>
>         </lrm_resources>
>       </lrm>
>     </node_state>
>   </status>
> </cib>
>
>
>
>
>


-- 
Javen Wu
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.clusterlabs.org/pipermail/pacemaker/attachments/20100514/db184be5/attachment-0002.htm>


More information about the Pacemaker mailing list