[Pacemaker] IPaddr2 monitoring failure

Larry Brigman larry.brigman at gmail.com
Fri Feb 3 15:04:49 EST 2012


I have a few clusters that we are about to put into production.
One of the tests was to pull the public network cable on the
node that hosted the Virtual IP for the Cluster.
We did and the Virtual IP didn't move.  It stayed on the node originally
selected.

Now, I'm thinking that I don't have the configuation setup correctly.

Setup:  3 to 5 nodes.  Two Private switches for corosync/pacemaker
communications.
Active-Backup Bond into the private switches.

Public Interfaces of all the nodes on the same network and pingable between
them.

corosync 1.4.2.
openais 1.1.3
pacemaker 1.1.5
cluster-glue 1.0.6
resource-agents 1.0.4
RHEL5 with 2.6.32.49 kernel.

Pacemaker config xml
[root at mfg311 ~]# cibadmin --query
<cib epoch="43" num_updates="83" admin_epoch="0"
validate-with="pacemaker-1.2" crm_feature_set="3.0.5" have-quorum="1"
dc-uuid="mfg2-1">
  <configuration>
    <crm_config>
      <cluster_property_set id="cib-bootstrap-options">
        <nvpair id="cib-bootstrap-options-dc-version" name="dc-version"
value="1.1.5-1.2.sme-01e86afaaa6d4a8c4836f68df80ababd6ca3902f"/>
        <nvpair id="cib-bootstrap-options-cluster-infrastructure"
name="cluster-infrastructure" value="openais"/>
        <nvpair id="cib-bootstrap-options-expected-quorum-votes"
name="expected-quorum-votes" value="3"/>
        <nvpair id="cib-bootstrap-options-stonith-enabled"
name="stonith-enabled" value="false"/>
        <nvpair id="cib-bootstrap-options-no-quorum-policy"
name="no-quorum-policy" value="ignore"/>
        <nvpair id="cib-bootstrap-options-default-resource-stickiness"
name="default-resource-stickiness" value="1000"/>
        <nvpair id="cib-bootstrap-options-enable-acl" name="enable-acl"
value="true"/>
      </cluster_property_set>
    </crm_config>
    <nodes>
      <node id="mfg2-1" uname="mfg2-1" type="normal">
        <instance_attributes id="nodes-mfg2-1">
          <nvpair id="nodes-mfg2-1-internalname" name="internalname"
value="node1.local"/>
          <nvpair id="nodes-mfg2-1-internalip" name="internalip"
value="4.0.0.1"/>
          <nvpair id="nodes-mfg2-1-internalid" name="internalid" value="1"/>
          <nvpair id="nodes-mfg2-1-adminip" name="adminip"
value="134.242.157.62"/>
        </instance_attributes>
      </node>
      <node id="mfg2-2" uname="mfg2-2" type="normal">
        <instance_attributes id="nodes-mfg2-2">
          <nvpair id="nodes-mfg2-2-internalname" name="internalname"
value="node2.local"/>
          <nvpair id="nodes-mfg2-2-internalip" name="internalip"
value="4.0.0.2"/>
          <nvpair id="nodes-mfg2-2-internalid" name="internalid" value="2"/>
          <nvpair id="nodes-mfg2-2-adminip" name="adminip"
value="134.242.157.60"/>
        </instance_attributes>
      </node>
      <node id="mfg2-3" uname="mfg2-3" type="normal">
        <instance_attributes id="nodes-mfg2-3">
          <nvpair id="nodes-mfg2-3-internalname" name="internalname"
value="node3.local"/>
          <nvpair id="nodes-mfg2-3-internalip" name="internalip"
value="4.0.0.3"/>
          <nvpair id="nodes-mfg2-3-internalid" name="internalid" value="3"/>
          <nvpair id="nodes-mfg2-3-adminip" name="adminip"
value="134.242.157.58"/>
        </instance_attributes>
      </node>
    </nodes>
    <resources>
      <primitive class="ocf" id="ClusterIP" provider="heartbeat"
type="IPaddr2">
        <instance_attributes id="ClusterIP-instance_attributes">
          <nvpair id="ClusterIP-instance_attributes-ip" name="ip"
value="134.242.157.65"/>
          <nvpair id="ClusterIP-instance_attributes-cidr_netmask"
name="cidr_netmask" value="32"/>
          <nvpair id="ClusterIP-instance_attributes-nic" name="nic"
value="lan0"/>
        </instance_attributes>
        <operations>
          <op id="ClusterIP-monitor-30s" interval="30s" name="monitor"/>
        </operations>
      </primitive>
    </resources>
    <constraints/>
    <acls>
      <acl_role id="monitor">
        <read id="monitor-read" xpath="/cib"/>
      </acl_role>
      <acl_role id="admin">
        <write id="admin-write" xpath="/cib"/>
      </acl_role>
      <acl_user id="nvs">
        <role_ref id="monitor"/>
      </acl_user>
    </acls>
  </configuration>
  <status>
    <node_state id="mfg2-1" uname="mfg2-1" crmd="online"
crm-debug-origin="do_state_transition" ha="active" in_ccm="true"
join="member" expected="member" shutdown="0">
      <transient_attributes id="mfg2-1">
        <instance_attributes id="status-mfg2-1">
          <nvpair id="status-mfg2-1-probe_complete" name="probe_complete"
value="true"/>
        </instance_attributes>
      </transient_attributes>
      <lrm id="mfg2-1">
        <lrm_resources>
          <lrm_resource id="ClusterIP" type="IPaddr2" class="ocf"
provider="heartbeat">
            <lrm_rsc_op id="ClusterIP_monitor_0" operation="monitor"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.5"
transition-key="6:15:7:35783a82-6ec5-4e0a-b8e9-fb7705874df6"
transition-magic="0:7;6:15:7:35783a82-6ec5-4e0a-b8e9-fb7705874df6"
call-id="2" rc-code="7" op-status="0" interval="0" last-run="1328131950"
last-rc-change="1328131950" exec-time="30" queue-time="0"
op-digest="3743ac5035464b9704ea522bbbe4b1df"/>
            <lrm_rsc_op id="ClusterIP_stop_0" operation="stop"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.5"
transition-key="7:114:0:c9a528d7-1112-4e18-aef2-d076d88d3f77"
transition-magic="0:0;7:114:0:c9a528d7-1112-4e18-aef2-d076d88d3f77"
call-id="8" rc-code="0" op-status="0" interval="0" last-run="1328231542"
last-rc-change="1328231542" exec-time="30" queue-time="0"
op-digest="3743ac5035464b9704ea522bbbe4b1df"/>
            <lrm_rsc_op id="ClusterIP_start_0" operation="start"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.5"
transition-key="8:117:0:c9a528d7-1112-4e18-aef2-d076d88d3f77"
transition-magic="0:0;8:117:0:c9a528d7-1112-4e18-aef2-d076d88d3f77"
call-id="9" rc-code="0" op-status="0" interval="0" last-run="1328232621"
last-rc-change="1328232621" exec-time="40" queue-time="0"
op-digest="3743ac5035464b9704ea522bbbe4b1df"/>
            <lrm_rsc_op id="ClusterIP_monitor_30000" operation="monitor"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.5"
transition-key="9:117:0:c9a528d7-1112-4e18-aef2-d076d88d3f77"
transition-magic="0:0;9:117:0:c9a528d7-1112-4e18-aef2-d076d88d3f77"
call-id="10" rc-code="0" op-status="0" interval="30000"
last-rc-change="1328232622" exec-time="20" queue-time="0"
op-digest="6a42b0adc3d196e1e2e5f47ee2281b5f"/>
          </lrm_resource>
        </lrm_resources>
      </lrm>
    </node_state>
    <node_state id="mfg2-2" uname="mfg2-2" crmd="online"
crm-debug-origin="do_state_transition" ha="active" in_ccm="true"
join="member" expected="member" shutdown="0">
      <transient_attributes id="mfg2-2">
        <instance_attributes id="status-mfg2-2">
          <nvpair id="status-mfg2-2-probe_complete" name="probe_complete"
value="true"/>
        </instance_attributes>
      </transient_attributes>
      <lrm id="mfg2-2">
        <lrm_resources>
          <lrm_resource id="ClusterIP" type="IPaddr2" class="ocf"
provider="heartbeat">
            <lrm_rsc_op id="ClusterIP_monitor_0" operation="monitor"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.5"
transition-key="7:16:7:35783a82-6ec5-4e0a-b8e9-fb7705874df6"
transition-magic="0:7;7:16:7:35783a82-6ec5-4e0a-b8e9-fb7705874df6"
call-id="2" rc-code="7" op-status="0" interval="0" last-run="1328131971"
last-rc-change="1328131971" exec-time="30" queue-time="0"
op-digest="3743ac5035464b9704ea522bbbe4b1df"/>
            <lrm_rsc_op id="ClusterIP_start_0" operation="start"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.5"
transition-key="8:114:0:c9a528d7-1112-4e18-aef2-d076d88d3f77"
transition-magic="0:0;8:114:0:c9a528d7-1112-4e18-aef2-d076d88d3f77"
call-id="6" rc-code="0" op-status="0" interval="0" last-run="1328231542"
last-rc-change="1328231542" exec-time="30" queue-time="0"
op-digest="3743ac5035464b9704ea522bbbe4b1df"/>
            <lrm_rsc_op id="ClusterIP_stop_0" operation="stop"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.5"
transition-key="7:117:0:c9a528d7-1112-4e18-aef2-d076d88d3f77"
transition-magic="0:0;7:117:0:c9a528d7-1112-4e18-aef2-d076d88d3f77"
call-id="8" rc-code="0" op-status="0" interval="0" last-run="1328232621"
last-rc-change="1328232621" exec-time="30" queue-time="0"
op-digest="3743ac5035464b9704ea522bbbe4b1df"/>
          </lrm_resource>
        </lrm_resources>
      </lrm>
    </node_state>
    <node_state id="mfg2-3" uname="mfg2-3" crmd="online"
crm-debug-origin="do_update_resource" ha="active" in_ccm="true"
join="member" expected="member" shutdown="0">
      <lrm id="mfg2-3">
        <lrm_resources>
          <lrm_resource id="ClusterIP" type="IPaddr2" class="ocf"
provider="heartbeat">
            <lrm_rsc_op id="ClusterIP_monitor_0" operation="monitor"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.5"
transition-key="7:133:7:c9a528d7-1112-4e18-aef2-d076d88d3f77"
transition-magic="0:7;7:133:7:c9a528d7-1112-4e18-aef2-d076d88d3f77"
call-id="2" rc-code="7" op-status="0" interval="0" last-run="1328243701"
last-rc-change="1328243701" exec-time="20" queue-time="0"
op-digest="3743ac5035464b9704ea522bbbe4b1df"/>
          </lrm_resource>
        </lrm_resources>
      </lrm>
      <transient_attributes id="mfg2-3">
        <instance_attributes id="status-mfg2-3">
          <nvpair id="status-mfg2-3-probe_complete" name="probe_complete"
value="true"/>
        </instance_attributes>
      </transient_attributes>
    </node_state>
  </status>
</cib>

Should IPaddr2 correctly detect link loss and move the Virtual IP?
Examining the code doesn't show that it would detect that failure.
What other resources/agents would recommend be configure to correctly
migrate
the Virtual IP?
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.clusterlabs.org/pipermail/pacemaker/attachments/20120203/e557775c/attachment-0002.html>


More information about the Pacemaker mailing list