<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body text="#000000" bgcolor="#FFFFFF">
    <div class="moz-cite-prefix">On 09/05/2017 08:54 AM, Abhay B wrote:<br>
    </div>
    <blockquote type="cite"
cite="mid:CAJ=hLKGe3xUoRbD09Ta19xKAUEVMNKsYcN9A636MMV9Whnu-cg@mail.gmail.com">
      <div dir="ltr">
        <div>Ken,</div>
        <div><br>
        </div>
        <div>I have another set of logs : </div>
        <div><br>
        </div>
        <div><font size="2" face="monospace">Sep 01 09:10:05 [1328] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>      
            crmd:     info: do_lrm_rsc_op: Performing
            key=5:50864:0:86160921-abd7-4e14-94d4-f53cee278858
            op=SVSDEHA_monitor_2000<br>
            SvsdeStateful(SVSDEHA)[6174]:   2017/09/01_09:10:06 ERROR:
            Resource is in failed state<br>
            Sep 01 09:10:06 [1328] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>      
            crmd:     info: action_synced_wait:    Managed
            SvsdeStateful_meta-data_0 process 6274 exited with rc=4<br>
            Sep 01 09:10:06 [1328] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>      
            crmd:    error: generic_get_metadata:  Failed to receive
            meta-data for ocf:pacemaker:SvsdeStateful<br>
            Sep 01 09:10:06 [1328] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>      
            crmd:    error: build_operation_update:    No metadata for
            ocf::pacemaker:SvsdeStateful<br>
            Sep 01 09:10:06 [1328] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>      
            crmd:     info: process_lrm_event: Result of monitor
            operation for SVSDEHA on <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>:
            0 (ok) | call=939 key=SVSDEHA_monitor_2000 confirmed=false
            cib-update=476<br>
            Sep 01 09:10:06 [1325] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>       
            cib:     info: cib_process_request:   Forwarding cib_modify
            operation for section status to all (origin=local/crmd/476)<br>
            Sep 01 09:10:06 [1325] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>       
            cib:     info: cib_perform_op:    Diff: --- 0.37.4054 2<br>
            Sep 01 09:10:06 [1325] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>       
            cib:     info: cib_perform_op:    Diff: +++ 0.37.4055 (null)<br>
            Sep 01 09:10:06 [1325] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>       
            cib:     info: cib_perform_op:    +  /cib: 
            @num_updates=4055<br>
            Sep 01 09:10:06 [1325] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>       
            cib:     info: cib_perform_op:    ++
/cib/status/node_state[@id='1']/lrm[@id='1']/lrm_resources/lrm_resource[@id='SVSDEHA']: 
            <lrm_rsc_op id="SVSDEHA_monitor_2000"
            operation_key="SVSDEHA_monitor_2000" operation="monitor"
            crm-debug-origin="do_update_resource"
            crm_feature_set="3.0.10"
            transition-key="5:50864:0:86160921-abd7-4e14-94d4-f53cee278858"
transition-magic="0:0;5:50864:0:86160921-abd7-4e14-94d4-f53cee278858"
            on_node="TPC-F9-26.phaedrus.sandvi<br>
            Sep 01 09:10:06 [1325] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>       
            cib:     info: cib_process_request:   Completed cib_modify
            operation for section status: OK (rc=0, origin=<a
              href="http://TPC-F9-26.phaedrus.sandvine.com/crmd/476"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com/crmd/476</a>,
            version=0.37.4055)<br>
            <b>Sep 01 09:10:12 [1325] <a
                href="http://TPC-F9-26.phaedrus.sandvine.com"
                moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>       
              cib:     info: cib_process_ping:  Reporting our current
              digest to <a
                href="http://TPC-E9-23.phaedrus.sandvine.com"
                moz-do-not-send="true">TPC-E9-23.phaedrus.sandvine.com</a>:
              74bbb7e9f35fabfdb624300891e32018 for 0.37.4055
              (0x7f5719954560 0)<br>
              Sep 01 09:15:33 [1325] <a
                href="http://TPC-F9-26.phaedrus.sandvine.com"
                moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>       
              cib:     info: cib_perform_op:    Diff: --- 0.37.4055 2</b><br>
            Sep 01 09:15:33 [1325] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>       
            cib:     info: cib_perform_op:    Diff: +++ 0.37.4056 (null)<br>
            Sep 01 09:15:33 [1325] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>       
            cib:     info: cib_perform_op:    +  /cib: 
            @num_updates=4056<br>
            Sep 01 09:15:33 [1325] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>       
            cib:     info: cib_perform_op:    ++
/cib/status/node_state[@id='2']/lrm[@id='2']/lrm_resources/lrm_resource[@id='SVSDEHA']: 
            <lrm_rsc_op id="SVSDEHA_last_failure_0"
            operation_key="SVSDEHA_monitor_1000" operation="monitor"
            crm-debug-origin="do_update_resource"
            crm_feature_set="3.0.10"
            transition-key="7:50662:8:86160921-abd7-4e14-94d4-f53cee278858"
transition-magic="2:1;7:50662:8:86160921-abd7-4e14-94d4-f53cee278858"
            on_node="TPC-E9-23.phaedrus.sand<br>
            Sep 01 09:15:33 [1325] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>       
            cib:     info: cib_process_request:   Completed cib_modify
            operation for section status: OK (rc=0, origin=<a
              href="http://TPC-E9-23.phaedrus.sandvine.com/crmd/53508"
              moz-do-not-send="true">TPC-E9-23.phaedrus.sandvine.com/crmd/53508</a>,
            version=0.37.4056)<br>
            Sep 01 09:15:33 [1327] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>     
            attrd:     info: attrd_peer_update: Setting
            fail-count-SVSDEHA[<a
              href="http://TPC-E9-23.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-E9-23.phaedrus.sandvine.com</a>]:
            (null) -> 1 from <a
              href="http://TPC-E9-23.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-E9-23.phaedrus.sandvine.com</a><br>
            Sep 01 09:15:33 [1327] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>     
            attrd:     info: attrd_peer_update: Setting
            last-failure-SVSDEHA[<a
              href="http://TPC-E9-23.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-E9-23.phaedrus.sandvine.com</a>]:
            (null) -> 1504271733 from <a
              href="http://TPC-E9-23.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-E9-23.phaedrus.sandvine.com</a><br>
            Sep 01 09:15:33 [1325] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>       
            cib:     info: cib_perform_op:    Diff: --- 0.37.4056 2<br>
            Sep 01 09:15:33 [1325] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>       
            cib:     info: cib_perform_op:    Diff: +++ 0.37.4057 (null)<br>
            Sep 01 09:15:33 [1325] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>       
            cib:     info: cib_perform_op:    +  /cib: 
            @num_updates=4057<br>
            Sep 01 09:15:33 [1325] <a
              href="http://TPC-F9-26.phaedrus.sandvine.com"
              moz-do-not-send="true">TPC-F9-26.phaedrus.sandvine.com</a>       
            cib:     info: cib_perform_op:    ++
/cib/status/node_state[@id='2']/transient_attributes[@id='2']/instance_attributes[@id='status-2']: 
            <nvpair id="status-2-fail-count-SVSDEHA"
            name="fail-count-SVSDEHA" </font></div>
        <div><font size="2" face="monospace">value="1"/></font></div>
        <div><font size="2" face="Consolas"><br>
          </font></div>
        <div>I was suspecting around the highlighted parts of the logs
          above. </div>
        <div><font size="2" face="sans-serif">After 09:10:12 the next
            log is at 09:15:33. During this time other node failed
            several times but was not migrated here.</font></div>
        <div><font size="2"><br>
          </font></div>
        <div><font size="2">I am yet to check with sbd fencing with  the
            patch shared by Klaus.</font></div>
        <div><font size="2">I am on CentOS. </font></div>
        <div><font size="2" face="monospace"><br>
          </font></div>
        <div><font size="2" face="monospace"># cat /etc/centos-release<br>
            CentOS Linux release 7.3.1611 (Core)</font></div>
      </div>
    </blockquote>
    <font size="2"><font face="monospace"></font></font><br>
    <font size="2">I would expect that CentOS Linux release 7.4.1708
      should have the patch mentioned.<br>
      Currently I'm on a train with slow and flaky internet-connection
      thus checking out would<br>
      probably be a pain at the moment ...<br>
      iirc the RHEL-7.4 package was working fine on RHEL 7.3 so you
      might be lucky with just taking<br>
      sbd from there.<br>
      <br>
      Regards,<br>
      Klaus<br>
      <br>
    </font>
    <blockquote type="cite"
cite="mid:CAJ=hLKGe3xUoRbD09Ta19xKAUEVMNKsYcN9A636MMV9Whnu-cg@mail.gmail.com">
      <div dir="ltr">
        <div><font size="2" face="sans-serif">Regards,</font></div>
        <div><font size="2" face="sans-serif">Abhay</font></div>
        <div><font size="2" face="sans-serif"><br>
          </font></div>
        <div><br>
        </div>
        <div><br>
        </div>
      </div>
      <br>
      <div class="gmail_quote">
        <div dir="ltr">On Sat, 2 Sep 2017 at 15:23 Klaus Wenninger <<a
            href="mailto:kwenning@redhat.com" moz-do-not-send="true">kwenning@redhat.com</a>>
          wrote:<br>
        </div>
        <blockquote class="gmail_quote" style="margin:0 0 0
          .8ex;border-left:1px #ccc solid;padding-left:1ex">On
          09/01/2017 11:45 PM, Ken Gaillot wrote:<br>
          > On Fri, 2017-09-01 at 15:06 +0530, Abhay B wrote:<br>
          >>         Are you sure the monitor stopped? Pacemaker
          only logs<br>
          >>         recurring monitors<br>
          >>         when the status changes. Any successful
          monitors after this<br>
          >>         wouldn't be<br>
          >>         logged.<br>
          >><br>
          >> Yes. Since there  were no logs which said
          "RecurringOp:  Start<br>
          >> recurring monitor" on the node after it had failed.<br>
          >> Also there were no logs for any actions pertaining to<br>
          >> The problem was that even though the one node was
          failing, the<br>
          >> resources were never moved to the other node(the node
          on which I<br>
          >> suspect monitoring had stopped).<br>
          >><br>
          >><br>
          >>         There are a lot of resource action failures,
          so I'm not sure<br>
          >>         where the<br>
          >>         issue is, but I'm guessing it has to do with<br>
          >>         migration-threshold=1 --<br>
          >>         once a resource has failed once on a node, it
          won't be allowed<br>
          >>         back on<br>
          >>         that node until the failure is cleaned up. Of
          course you also<br>
          >>         have<br>
          >>         failure-timeout=1s, which should clean it up
          immediately, so<br>
          >>         I'm not<br>
          >>         sure.<br>
          >><br>
          >><br>
          >> migration-threshold=1<br>
          >> failure-timeout=1s<br>
          >><br>
          >> cluster-recheck-interval=2s<br>
          >><br>
          >><br>
          >>         first, set "two_node:<br>
          >>         1" in corosync.conf and let no-quorum-policy
          default in<br>
          >>         pacemaker<br>
          >><br>
          >><br>
          >> This is already configured.<br>
          >> # cat /etc/corosync/corosync.conf<br>
          >> totem {<br>
          >>     version: 2<br>
          >>     secauth: off<br>
          >>     cluster_name: SVSDEHA<br>
          >>     transport: udpu<br>
          >>     token: 5000<br>
          >> }<br>
          >><br>
          >><br>
          >> nodelist {<br>
          >>     node {<br>
          >>         ring0_addr: 2.0.0.10<br>
          >>         nodeid: 1<br>
          >>     }<br>
          >><br>
          >><br>
          >>     node {<br>
          >>         ring0_addr: 2.0.0.11<br>
          >>         nodeid: 2<br>
          >>     }<br>
          >> }<br>
          >><br>
          >><br>
          >> quorum {<br>
          >>     provider: corosync_votequorum<br>
          >>     two_node: 1<br>
          >> }<br>
          >><br>
          >><br>
          >> logging {<br>
          >>     to_logfile: yes<br>
          >>     logfile: /var/log/cluster/corosync.log<br>
          >>     to_syslog: yes<br>
          >> }<br>
          >><br>
          >><br>
          >>         let no-quorum-policy default in pacemaker;
          then,<br>
          >>         get stonith configured, tested, and enabled<br>
          >><br>
          >><br>
          >> By not configuring no-quorum-policy, would it ignore
          quorum for a 2<br>
          >> node cluster?<br>
          > With two_node, corosync always provides quorum to
          pacemaker, so<br>
          > pacemaker doesn't see any quorum loss. The only
          significant difference<br>
          > from ignoring quorum is that corosync won't form a
          cluster from a cold<br>
          > start unless both nodes can reach each other (a safety
          feature).<br>
          ><br>
          >> For my use case I don't need stonith enabled. My
          intention is to have<br>
          >> a highly available system all the time.<br>
          > Stonith is the only way to recover from certain types of
          failure, such<br>
          > as the "split brain" scenario, and a resource that fails
          to stop.<br>
          ><br>
          > If your nodes are physical machines with hardware
          watchdogs, you can set<br>
          > up sbd for fencing without needing any extra equipment.<br>
          <br>
          Small caveat here:<br>
          If I get it right you have a 2-node-setup. In this case the
          watchdog-only<br>
          sbd-setup would not be usable as it relies on 'real' quorum.<br>
          In 2-node-setups sbd needs at least a single shared disk.<br>
          For the sbd-single-disk-setup working with 2-node<br>
          you need the patch from <a
            href="https://github.com/ClusterLabs/sbd/pull/23"
            rel="noreferrer" target="_blank" moz-do-not-send="true">https://github.com/ClusterLabs/sbd/pull/23</a><br>
          in place. (Saw you mentioning RHEL documentation - RHEL-7.4
          has<br>
          it in since GA)<br>
          <br>
          Regards,<br>
          Klaus<br>
          <br>
          ><br>
          >> I will test my RA again as suggested with
          no-quorum-policy=default.<br>
          >><br>
          >><br>
          >> One more doubt.<br>
          >> Why do we see this is 'pcs property' ?<br>
          >> last-lrm-refresh: 1504090367<br>
          >><br>
          >><br>
          >><br>
          >> Never seen this on a healthy cluster.<br>
          >> From RHEL documentation:<br>
          >> last-lrm-refresh<br>
          >><br>
          >> Last refresh of the<br>
          >> Local Resource Manager,<br>
          >> given in units of<br>
          >> seconds since epoca.<br>
          >> Used for diagnostic<br>
          >> purposes; not<br>
          >> user-configurable.<br>
          >><br>
          >><br>
          >> Doesn't explain much.<br>
          > Whenever a cluster property changes, the cluster rechecks
          the current<br>
          > state to see if anything needs to be done.
          last-lrm-refresh is just a<br>
          > dummy property that the cluster uses to trigger that.
          It's set in<br>
          > certain rare circumstances when a resource cleanup is
          done. You should<br>
          > see a line in your logs like "Triggering a refresh after
          ... deleted ...<br>
          > from the LRM". That might give some idea of why.<br>
          ><br>
          >> Also. does avg. CPU load impact resource monitoring ?<br>
          >><br>
          >><br>
          >> Regards,<br>
          >> Abhay<br>
          > Well, it could cause the monitor to take so long that it
          times out. The<br>
          > only direct effect of load on pacemaker is that the
          cluster might lower<br>
          > the number of agent actions that it can execute
          simultaneously.<br>
          ><br>
          ><br>
          >> On Thu, 31 Aug 2017 at 20:11 Ken Gaillot <<a
            href="mailto:kgaillot@redhat.com" target="_blank"
            moz-do-not-send="true">kgaillot@redhat.com</a>> wrote:<br>
          >><br>
          >>         On Thu, 2017-08-31 at 06:41 +0000, Abhay B
          wrote:<br>
          >>         > Hi,<br>
          >>         ><br>
          >>         ><br>
          >>         > I have a 2 node HA cluster configured on
          CentOS 7 with pcs<br>
          >>         command.<br>
          >>         ><br>
          >>         ><br>
          >>         > Below are the properties of the cluster
          :<br>
          >>         ><br>
          >>         ><br>
          >>         > # pcs property<br>
          >>         > Cluster Properties:<br>
          >>         >  cluster-infrastructure: corosync<br>
          >>         >  cluster-name: SVSDEHA<br>
          >>         >  cluster-recheck-interval: 2s<br>
          >>         >  dc-deadtime: 5<br>
          >>         >  dc-version: 1.1.15-11.el7_3.5-e174ec8<br>
          >>         >  have-watchdog: false<br>
          >>         >  last-lrm-refresh: 1504090367<br>
          >>         >  no-quorum-policy: ignore<br>
          >>         >  start-failure-is-fatal: false<br>
          >>         >  stonith-enabled: false<br>
          >>         ><br>
          >>         ><br>
          >>         > PFA the cib.<br>
          >>         > Also attached is the corosync.log around
          the time the below<br>
          >>         issue<br>
          >>         > happened.<br>
          >>         ><br>
          >>         ><br>
          >>         > After around 10 hrs and multiple
          failures, pacemaker stops<br>
          >>         monitoring<br>
          >>         > resource on one of the nodes in the
          cluster.<br>
          >>         ><br>
          >>         ><br>
          >>         > So even though the resource on other
          node fails, it is never<br>
          >>         migrated<br>
          >>         > to the node on which the resource is not
          monitored.<br>
          >>         ><br>
          >>         ><br>
          >>         > Wanted to know what could have triggered
          this and how to<br>
          >>         avoid getting<br>
          >>         > into such scenarios.<br>
          >>         > I am going through the logs and couldn't
          find why this<br>
          >>         happened.<br>
          >>         ><br>
          >>         ><br>
          >>         > After this log the monitoring stopped.<br>
          >>         ><br>
          >>         > Aug 29 11:01:44 [16500] <a
            href="http://TPC-D12-10-002.phaedrus.sandvine.com"
            rel="noreferrer" target="_blank" moz-do-not-send="true">TPC-D12-10-002.phaedrus.sandvine.com</a><br>
          >>         > crmd:     info: process_lrm_event: 
           Result of monitor<br>
          >>         operation for<br>
          >>         > SVSDEHA on <a
            href="http://TPC-D12-10-002.phaedrus.sandvine.com"
            rel="noreferrer" target="_blank" moz-do-not-send="true">TPC-D12-10-002.phaedrus.sandvine.com</a>:
          0 (ok) |<br>
          >>         call=538<br>
          >>         > key=SVSDEHA_monitor_2000 confirmed=false
          cib-update=50013<br>
          >><br>
          >>         Are you sure the monitor stopped? Pacemaker
          only logs<br>
          >>         recurring monitors<br>
          >>         when the status changes. Any successful
          monitors after this<br>
          >>         wouldn't be<br>
          >>         logged.<br>
          >><br>
          >>         > Below log says the resource is leaving
          the cluster.<br>
          >>         > Aug 29 11:01:44 [16499] <a
            href="http://TPC-D12-10-002.phaedrus.sandvine.com"
            rel="noreferrer" target="_blank" moz-do-not-send="true">TPC-D12-10-002.phaedrus.sandvine.com</a><br>
          >>         > pengine:     info: LogActions:  Leave 
           SVSDEHA:0<br>
          >>          (Slave<br>
          >>         > <a
            href="http://TPC-D12-10-002.phaedrus.sandvine.com"
            rel="noreferrer" target="_blank" moz-do-not-send="true">TPC-D12-10-002.phaedrus.sandvine.com</a>)<br>
          >><br>
          >>         This means that the cluster will leave the
          resource where it<br>
          >>         is (i.e. it<br>
          >>         doesn't need a start, stop, move, demote,
          promote, etc.).<br>
          >><br>
          >>         > Let me know if anything more is needed.<br>
          >>         ><br>
          >>         ><br>
          >>         > Regards,<br>
          >>         > Abhay<br>
          >>         ><br>
          >>         ><br>
          >>         > PS:'pcs resource cleanup' brought the
          cluster back into good<br>
          >>         state.<br>
          >><br>
          >>         There are a lot of resource action failures,
          so I'm not sure<br>
          >>         where the<br>
          >>         issue is, but I'm guessing it has to do with<br>
          >>         migration-threshold=1 --<br>
          >>         once a resource has failed once on a node, it
          won't be allowed<br>
          >>         back on<br>
          >>         that node until the failure is cleaned up. Of
          course you also<br>
          >>         have<br>
          >>         failure-timeout=1s, which should clean it up
          immediately, so<br>
          >>         I'm not<br>
          >>         sure.<br>
          >><br>
          >>         My gut feeling is that you're trying to do
          too many things at<br>
          >>         once. I'd<br>
          >>         start over from scratch and proceed more
          slowly: first, set<br>
          >>         "two_node:<br>
          >>         1" in corosync.conf and let no-quorum-policy
          default in<br>
          >>         pacemaker; then,<br>
          >>         get stonith configured, tested, and enabled;
          then, test your<br>
          >>         resource<br>
          >>         agent manually on the command line to make
          sure it conforms to<br>
          >>         the<br>
          >>         expected return values<br>
          >>         ( <a
href="http://clusterlabs.org/doc/en-US/Pacemaker/1.1-pcs/html-single/Pacemaker_Explained/index.html#ap-ocf"
            rel="noreferrer" target="_blank" moz-do-not-send="true">http://clusterlabs.org/doc/en-US/Pacemaker/1.1-pcs/html-single/Pacemaker_Explained/index.html#ap-ocf</a>
          ); then add your resource to the cluster without
          migration-threshold or failure-timeout, and work out any
          issues with frequent failures; then finally set
          migration-threshold and failure-timeout to reflect how you
          want recovery to proceed.<br>
          >>         --<br>
          >>         Ken Gaillot <<a
            href="mailto:kgaillot@redhat.com" target="_blank"
            moz-do-not-send="true">kgaillot@redhat.com</a>><br>
          >><br>
          >><br>
          >><br>
          >><br>
          >><br>
          >>       
           _______________________________________________<br>
          >>         Users mailing list: <a
            href="mailto:Users@clusterlabs.org" target="_blank"
            moz-do-not-send="true">Users@clusterlabs.org</a><br>
          >>         <a
            href="http://lists.clusterlabs.org/mailman/listinfo/users"
            rel="noreferrer" target="_blank" moz-do-not-send="true">http://lists.clusterlabs.org/mailman/listinfo/users</a><br>
          >><br>
          >>         Project Home: <a
            href="http://www.clusterlabs.org" rel="noreferrer"
            target="_blank" moz-do-not-send="true">http://www.clusterlabs.org</a><br>
          >>         Getting started:<br>
          >>         <a
            href="http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf"
            rel="noreferrer" target="_blank" moz-do-not-send="true">http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf</a><br>
          >>         Bugs: <a href="http://bugs.clusterlabs.org"
            rel="noreferrer" target="_blank" moz-do-not-send="true">http://bugs.clusterlabs.org</a><br>
          <br>
          <br>
          _______________________________________________<br>
          Users mailing list: <a href="mailto:Users@clusterlabs.org"
            target="_blank" moz-do-not-send="true">Users@clusterlabs.org</a><br>
          <a href="http://lists.clusterlabs.org/mailman/listinfo/users"
            rel="noreferrer" target="_blank" moz-do-not-send="true">http://lists.clusterlabs.org/mailman/listinfo/users</a><br>
          <br>
          Project Home: <a href="http://www.clusterlabs.org"
            rel="noreferrer" target="_blank" moz-do-not-send="true">http://www.clusterlabs.org</a><br>
          Getting started: <a
            href="http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf"
            rel="noreferrer" target="_blank" moz-do-not-send="true">http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf</a><br>
          Bugs: <a href="http://bugs.clusterlabs.org" rel="noreferrer"
            target="_blank" moz-do-not-send="true">http://bugs.clusterlabs.org</a><br>
        </blockquote>
      </div>
    </blockquote>
    <br>
  </body>
</html>