[Pacemaker] Exec Failure issues.

James Horsfall (CTR) jameshorsfall at stratosgsi.com
Tue Oct 18 13:38:40 EDT 2011


Hello all, I'm having some problems getting resources to fail over
properly I need the IP's to swith to a different node when it cannot
ping. We're doing a "shut" on the respective interfaces to simulate
cables being unplugged but I keep getting exec timeouts and unknown
errors. 



crm_mon -fortA

============
Last updated: Tue Oct 18 17:32:10 2011
Stack: openais
Current DC: sgn-pau-hub0 - partition with quorum
Version: 1.1.2-f059ec7ced7a86f18e5490b67ebf4a0b963bccfe
2 Nodes configured, 2 expected votes
2 Resources configured.
============

Online: [ sgn-pau-hub0 sgn-pau-hub1 ]

Full list of resources:

 Resource Group: IPS
     ETH2       (ocf::heartbeat:IPaddr):        Started sgn-pau-hub0
(unmanaged) FAILED
     ETH3       (ocf::heartbeat:IPaddr):        Started sgn-pau-hub0
(unmanaged) FAILED
 Clone Set: ping-On-both
     peth2:1    (ocf::pacemaker:ping):  Started sgn-pau-hub0 FAILED
     Started: [ sgn-pau-hub1 ]

Node Attributes:
* Node sgn-pau-hub0:   #sometimes this says :1000 (degraded)
* Node sgn-pau-hub1:
    + pingd                             : 2000

Operations:
* Node sgn-pau-hub0:
   ETH2: migration-threshold=1000000
    + (5) start: last-rc-change='Tue Oct 18 17:28:11 2011' last-run='Tue
Oct 18 17:28:11 2011' exec-time=100ms queue-time=0ms rc=0 (ok)
    + (7) monitor: interval=30000ms last-rc-change='Tue Oct 18 17:28:11
2011' last-run='Tue Oct 18 17:28:41 2011' exec-time=30ms queue-time=0ms
rc=0 (
ok)
    + (15) stop: last-rc-change='Tue Oct 18 17:30:29 2011' last-run='Tue
Oct 18 17:30:09 2011' exec-time=20000ms queue-time=0ms rc=-2 (unknown
exec er
ror)
   ETH3: migration-threshold=1000000
    + (8) start: last-rc-change='Tue Oct 18 17:28:11 2011' last-run='Tue
Oct 18 17:28:11 2011' exec-time=80ms queue-time=0ms rc=0 (ok)
    + (9) monitor: interval=30000ms last-rc-change='Tue Oct 18 17:28:11
2011' last-run='Tue Oct 18 17:28:41 2011' exec-time=30ms queue-time=0ms
rc=0 (
ok)
    + (12) stop: last-rc-change='Tue Oct 18 17:29:45 2011' last-run='Tue
Oct 18 17:29:25 2011' exec-time=20000ms queue-time=0ms rc=-2 (unknown
exec er
ror)
   peth2:1: migration-threshold=1000000
    + (24) stop: last-rc-change='Tue Oct 18 17:33:10 2011' last-run='Tue
Oct 18 17:33:10 2011' exec-time=10020ms queue-time=0ms rc=0 (ok)
    + (25) start: last-rc-change='Tue Oct 18 17:33:20 2011'
last-run='Tue Oct 18 17:33:20 2011' exec-time=19030ms queue-time=0ms
rc=1 (unknown error)
* Node sgn-pau-hub1:
   peth2:0: migration-threshold=1000000
    + (5) start: last-rc-change='Tue Oct 18 17:26:36 2011' last-run='Tue
Oct 18 17:26:36 2011' exec-time=8070ms queue-time=0ms rc=0 (ok)
    + (6) monitor: interval=10000ms last-rc-change='Tue Oct 18 17:26:45
2011' last-run='Tue Oct 18 17:27:21 2011' exec-time=8030ms
queue-time=0ms rc=0
 (ok)

Failed actions:
    ETH2_stop_0 (node=sgn-pau-hub0, call=15, rc=-2, status=Timed Out):
unknown exec error
    ETH3_stop_0 (node=sgn-pau-hub0, call=12, rc=-2, status=Timed Out):
unknown exec error
    peth2:1_start_0 (node=sgn-pau-hub0, call=25, rc=1, status=complete):
unknown error

-------------------------------------------------------CRM
configuration----------------------------------------------------- 

ode sgn-pau-hub0
node sgn-pau-hub1
primitive ETH2 ocf:heartbeat:IPaddr \
        params ip="10.151.9.42" cidr_netmask="255.255.255.248"
nic="eth2" \
        op monitor interval="30s" timeout="60" \
        meta target-role="Started" allow-migrate="true"
primitive ETH3 ocf:heartbeat:IPaddr \
        params ip="10.151.9.49" cidr_netmask="255.255.255.248"
nic="eth3" \
        op monitor interval="30s" timeout="60" \
        meta target-role="Started" allow-migrate="true"
primitive peth2 ocf:pacemaker:ping \
        params multiplier="1000" host_list="10.151.9.41 10.151.9.50" \
        operations $id="peth2-operations" \
        op monitor interval="10" timeout="20"
group IPS ETH2 ETH3 \
        meta target-role="Started"
clone ping-On-both peth2 \
        meta target-role="Started"
location UPchk IPS \
        rule $id="UPchk-rule" pingd: defined pingd
property $id="cib-bootstrap-options" \
        dc-version="1.1.2-f059ec7ced7a86f18e5490b67ebf4a0b963bccfe" \
        cluster-infrastructure="openais" \
        stonith-enabled="false" \
        default-resource-stickiness="100" \
        no-quorum-policy="ignore" \
        last-lrm-refresh="1318948973" \
        expected-quorum-votes="2"


-------------------------------------------- Cib.xml
----------------------------------------------------------------

<?xml version="1.0" ?>
<cib admin_epoch="0" crm_feature_set="3.0.2" dc-uuid="sgn-pau-hub0"
epoch="10" have-quorum="1" num_updates="5"
validate-with="pacemaker-1.2">
  <configuration>
    <crm_config>
      <cluster_property_set id="cib-bootstrap-options">
        <nvpair id="cib-bootstrap-options-dc-version" name="dc-version"
value="1.1.2-f059ec7ced7a86f18e5490b67ebf4a0b963bccfe"/>
        <nvpair id="cib-bootstrap-options-cluster-infrastructure"
name="cluster-infrastructure" value="openais"/>
        <nvpair id="cib-bootstrap-options-stonith-enabled"
name="stonith-enabled" value="false"/>
        <nvpair id="cib-bootstrap-options-default-resource-stickiness"
name="default-resource-stickiness" value="100"/>
        <nvpair id="cib-bootstrap-options-no-quorum-policy"
name="no-quorum-policy" value="ignore"/>
        <nvpair id="cib-bootstrap-options-last-lrm-refresh"
name="last-lrm-refresh" value="1318948973"/>
        <nvpair id="cib-bootstrap-options-expected-quorum-votes"
name="expected-quorum-votes" value="2"/>
      </cluster_property_set>
    </crm_config>
    <rsc_defaults/>
    <op_defaults/>
    <nodes>
      <node id="sgn-pau-hub1" type="normal" uname="sgn-pau-hub1"/>
      <node id="sgn-pau-hub0" type="normal" uname="sgn-pau-hub0"/>
    </nodes>
    <resources>
      <clone id="ping-On-both">
        <meta_attributes id="ping-On-both-meta_attributes">
          <nvpair id="ping-On-both-meta_attributes-target-role"
name="target-role" value="Started"/>
        </meta_attributes>
        <primitive class="ocf" id="peth2" provider="pacemaker"
type="ping">
          <instance_attributes id="peth2-instance_attributes">
            <nvpair id="peth2-instance_attributes-multiplier"
name="multiplier" value="1000"/>
            <nvpair id="peth2-instance_attributes-host_list"
name="host_list" value="10.151.9.41 10.151.9.50"/>
          </instance_attributes>
          <operations id="peth2-operations">
            <op id="peth2-monitor-10" interval="10" name="monitor"
timeout="20"/>
          </operations>
        </primitive>
      </clone>
      <group id="IPS">
        <meta_attributes id="IPS-meta_attributes">
          <nvpair id="IPS-meta_attributes-target-role"
name="target-role" value="Started"/>
        </meta_attributes>
        <primitive class="ocf" id="ETH2" provider="heartbeat"
type="IPaddr">
          <instance_attributes id="ETH2-instance_attributes">
            <nvpair id="ETH2-instance_attributes-ip" name="ip"
value="10.151.9.42"/>
            <nvpair id="ETH2-instance_attributes-cidr_netmask"
name="cidr_netmask" value="255.255.255.248"/>
            <nvpair id="ETH2-instance_attributes-nic" name="nic"
value="eth2"/>
          </instance_attributes>
          <operations>
            <op id="ETH2-monitor-30s" interval="30s" name="monitor"
timeout="60"/>
          </operations>
          <meta_attributes id="ETH2-meta_attributes">
            <nvpair id="ETH2-meta_attributes-target-role"
name="target-role" value="Started"/>
            <nvpair id="ETH2-meta_attributes-allow-migrate"
name="allow-migrate" value="true"/>
          </meta_attributes>
        </primitive>
        <primitive class="ocf" id="ETH3" provider="heartbeat"
type="IPaddr">
          <instance_attributes id="ETH3-instance_attributes">
            <nvpair id="ETH3-instance_attributes-ip" name="ip"
value="10.151.9.49"/>
            <nvpair id="ETH3-instance_attributes-cidr_netmask"
name="cidr_netmask" value="255.255.255.248"/>
            <nvpair id="ETH3-instance_attributes-nic" name="nic"
value="eth3"/>
          </instance_attributes>
          <operations>
            <op id="ETH3-monitor-30s" interval="30s" name="monitor"
timeout="60"/>
          </operations>
          <meta_attributes id="ETH3-meta_attributes">
            <nvpair id="ETH3-meta_attributes-target-role"
name="target-role" value="Started"/>
            <nvpair id="ETH3-meta_attributes-allow-migrate"
name="allow-migrate" value="true"/>
          </meta_attributes>
        </primitive>
      </group>
    </resources>
    <constraints>
      <rsc_location id="UPchk" rsc="IPS">
        <rule id="UPchk-rule" score-attribute="pingd">
          <expression attribute="pingd" id="UPchk-expression"
operation="defined"/>
        </rule>
      </rsc_location>
    </constraints>
  </configuration>
</cib>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.clusterlabs.org/pipermail/pacemaker/attachments/20111018/ea9a746f/attachment-0002.html>


More information about the Pacemaker mailing list