<div dir="ltr">Sure. Here's the full config:<div><br></div><div><div><cib epoch="28" num_updates="34" admin_epoch="0" validate-with="pacemaker-1.2" cib-last-written="Thu Oct 3 16:26:39 2013" crm_feature_set="3.0.6" update-origin="test-vm-2" update-client="cibadmin" have-quorum="1" dc-uuid="test-vm-1"></div>
<div> <configuration></div><div> <crm_config></div><div> <cluster_property_set id="cib-bootstrap-options"></div><div> <nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.1.7-ee0730e13d124c3d58f00016c3376a1de5323cff"/></div>
<div> <nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="openais"/></div><div> <nvpair id="cib-bootstrap-options-expected-quorum-votes" name="expected-quorum-votes" value="2"/></div>
<div> <nvpair id="cib-bootstrap-options-stonith-enabled" name="stonith-enabled" value="false"/></div><div> <nvpair id="cib-bootstrap-options-no-quorum-policy" name="no-quorum-policy" value="ignore"/></div>
<div> </cluster_property_set></div><div> </crm_config></div><div> <nodes></div><div> <node id="test-vm-1" type="normal" uname="test-vm-1"/></div><div>
<node id="test-vm-2" type="normal" uname="test-vm-2"/></div>
<div> </nodes></div><div> <resources></div><div> <group id="nfs_resources"></div><div> <meta_attributes id="nfs_resources-meta_attributes"></div><div> <nvpair id="nfs_resources-meta_attributes-target-role" name="target-role" value="Started"/></div>
<div> </meta_attributes></div><div> <primitive class="ocf" id="nfs_fs" provider="heartbeat" type="Filesystem"></div><div> <instance_attributes id="nfs_fs-instance_attributes"></div>
<div> <nvpair id="nfs_fs-instance_attributes-device" name="device" value="/dev/drbd1"/></div><div> <nvpair id="nfs_fs-instance_attributes-directory" name="directory" value="/export/data/"/></div>
<div> <nvpair id="nfs_fs-instance_attributes-fstype" name="fstype" value="ext3"/></div><div> <nvpair id="nfs_fs-instance_attributes-options" name="options" value="noatime,nodiratime"/></div>
<div> </instance_attributes></div><div> <operations></div><div> <op id="nfs_fs-start-0" interval="0" name="start" timeout="60"/></div>
<div> <op id="nfs_fs-stop-0" interval="0" name="stop" timeout="120"/></div><div> </operations></div><div> </primitive></div><div> <primitive class="ocf" id="nfs_ip" provider="heartbeat" type="IPaddr2"></div>
<div> <instance_attributes id="nfs_ip-instance_attributes"></div><div> <nvpair id="nfs_ip-instance_attributes-ip" name="ip" value="192.168.25.205"/></div>
<div> <nvpair id="nfs_ip-instance_attributes-cidr_netmask" name="cidr_netmask" value="32"/></div><div> </instance_attributes></div><div> <operations></div>
<div> <op id="nfs_ip-monitor-10s" interval="10s" name="monitor"/></div><div> </operations></div><div> <meta_attributes id="nfs_ip-meta_attributes"></div>
<div> <nvpair id="nfs_ip-meta_attributes-is-managed" name="is-managed" value="true"/></div><div> </meta_attributes></div><div> </primitive></div>
<div>
<primitive class="lsb" id="nfs" type="nfs-kernel-server"></div><div> <operations></div><div> <op id="nfs-monitor-5s" interval="5s" name="monitor"/></div>
<div> <op id="nfs-start-0" interval="0" name="start" timeout="120"/></div><div> <op id="nfs-stop-0" interval="0" name="stop" timeout="120"/></div>
<div> </operations></div><div> </primitive></div><div> </group></div><div> <master id="ms-drbd_r0"></div><div> <meta_attributes id="ms-drbd_r0-meta_attributes"></div>
<div> <nvpair id="ms-drbd_r0-meta_attributes-clone-max" name="clone-max" value="2"/></div><div> <nvpair id="ms-drbd_r0-meta_attributes-notify" name="notify" value="true"/></div>
<div> <nvpair id="ms-drbd_r0-meta_attributes-globally-unique" name="globally-unique" value="false"/></div><div> <nvpair id="ms-drbd_r0-meta_attributes-target-role" name="target-role" value="Master"/></div>
<div> </meta_attributes></div><div> <primitive class="ocf" id="drbd_r0" provider="heartbeat" type="drbd"></div><div> <instance_attributes id="drbd_r0-instance_attributes"></div>
<div> <nvpair id="drbd_r0-instance_attributes-drbd_resource" name="drbd_resource" value="r0"/></div><div> </instance_attributes></div><div> <operations></div>
<div> <op id="drbd_r0-monitor-59s" interval="59s" name="monitor" role="Master" timeout="30s"/></div><div> <op id="drbd_r0-monitor-60s" interval="60s" name="monitor" role="Slave" timeout="30s"/></div>
<div> </operations></div><div> </primitive></div><div> </master></div><div> </resources></div><div> <constraints></div><div> <rsc_colocation id="drbd-nfs-ha" rsc="ms-drbd_r0" rsc-role="Master" score="INFINITY" with-rsc="nfs_resources"/></div>
<div> <rsc_order id="drbd-before-nfs" first="ms-drbd_r0" first-action="promote" score="INFINITY" then="nfs_resources" then-action="start"/></div><div>
</constraints></div><div> <rsc_defaults></div><div> <meta_attributes id="rsc-options"></div><div> <nvpair id="rsc-options-resource-stickiness" name="resource-stickiness" value="100"/></div>
<div> </meta_attributes></div><div> </rsc_defaults></div><div> </configuration></div><div> <status></div><div> <node_state id="test-vm-1" uname="test-vm-1" ha="active" in_ccm="true" crmd="online" join="member" expected="member" crm-debug-origin="do_state_transition" shutdown="0"></div>
<div> <transient_attributes id="test-vm-1"></div><div> <instance_attributes id="status-test-vm-1"></div><div> <nvpair id="status-test-vm-1-fail-count-drbd_r0.1" name="fail-count-drbd_r0:1" value="1"/></div>
<div> <nvpair id="status-test-vm-1-last-failure-drbd_r0.1" name="last-failure-drbd_r0:1" value="1380831442"/></div><div> <nvpair id="status-test-vm-1-master-drbd_r0.0" name="master-drbd_r0:0" value="100"/></div>
<div> <nvpair id="status-test-vm-1-probe_complete" name="probe_complete" value="true"/></div><div> </instance_attributes></div><div> </transient_attributes></div>
<div> <lrm id="test-vm-1"></div><div> <lrm_resources></div><div> <lrm_resource id="drbd_r0:0" type="drbd" class="ocf" provider="heartbeat"></div>
<div> <lrm_rsc_op id="drbd_r0:0_last_failure_0" operation_key="drbd_r0:0_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.6" transition-key="7:4:7:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" transition-magic="0:8;7:4:7:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" call-id="32" rc-code="8" op-status="0" interval="0" op-digest="c0e018b73fdf522b6cdd355e125af15e"/></div>
<div> <lrm_rsc_op id="drbd_r0:0_monitor_59000" operation_key="drbd_r0:0_monitor_59000" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.6" transition-key="20:5:8:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" transition-magic="0:8;20:5:8:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" call-id="35" rc-code="8" op-status="0" interval="59000" op-digest="6f5adcd7f1211cdfc17850827b8582c5"/></div>
<div> </lrm_resource></div><div> <lrm_resource id="nfs" type="nfs-kernel-server" class="lsb"></div><div> <lrm_rsc_op id="nfs_last_0" operation_key="nfs_start_0" operation="start" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.6" transition-key="14:8:0:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" transition-magic="0:0;14:8:0:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" call-id="39" rc-code="0" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/></div>
<div> <lrm_rsc_op id="nfs_last_failure_0" operation_key="nfs_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.6" transition-key="6:4:7:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" transition-magic="0:0;6:4:7:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" call-id="31" rc-code="0" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/></div>
<div> <lrm_rsc_op id="nfs_monitor_5000" operation_key="nfs_monitor_5000" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.6" transition-key="2:8:0:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" transition-magic="0:0;2:8:0:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" call-id="40" rc-code="0" op-status="0" interval="5000" op-digest="4811cef7f7f94e3a35a70be7916cb2fd"/></div>
<div> </lrm_resource></div><div> <lrm_resource id="nfs_ip" type="IPaddr2" class="ocf" provider="heartbeat"></div><div> <lrm_rsc_op id="nfs_ip_last_failure_0" operation_key="nfs_ip_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.6" transition-key="5:4:7:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" transition-magic="0:0;5:4:7:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" call-id="30" rc-code="0" op-status="0" interval="0" op-digest="570cd25774b1ead32cb1840813adbe21"/></div>
<div> <lrm_rsc_op id="nfs_ip_monitor_10000" operation_key="nfs_ip_monitor_10000" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.6" transition-key="8:5:0:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" transition-magic="0:0;8:5:0:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" call-id="33" rc-code="0" op-status="0" interval="10000" op-digest="bc929bfa78c3086ebd199cf0110b87bf"/></div>
<div> </lrm_resource></div><div> <lrm_resource id="nfs_fs" type="Filesystem" class="ocf" provider="heartbeat"></div><div> <lrm_rsc_op id="nfs_fs_last_failure_0" operation_key="nfs_fs_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.6" transition-key="4:4:7:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" transition-magic="0:0;4:4:7:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" call-id="29" rc-code="0" op-status="0" interval="0" op-digest="c0a40c0015f71e8b20b5359e12f25eb5"/></div>
<div> </lrm_resource></div><div> </lrm_resources></div><div> </lrm></div><div> </node_state></div><div> <node_state id="test-vm-2" uname="test-vm-2" ha="active" in_ccm="true" crmd="online" join="member" crm-debug-origin="do_update_resource" expected="member" shutdown="0"></div>
<div> <lrm id="test-vm-2"></div><div> <lrm_resources></div><div> <lrm_resource id="nfs" type="nfs-kernel-server" class="lsb"></div><div> <lrm_rsc_op id="nfs_last_0" operation_key="nfs_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="10:14:7:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" transition-magic="0:7;10:14:7:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" call-id="4" rc-code="7" op-status="0" interval="0" last-run="1380832563" last-rc-change="1380832563" exec-time="210" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/></div>
<div> </lrm_resource></div><div> <lrm_resource id="nfs_ip" type="IPaddr2" class="ocf" provider="heartbeat"></div><div> <lrm_rsc_op id="nfs_ip_last_0" operation_key="nfs_ip_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="9:14:7:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" transition-magic="0:7;9:14:7:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" call-id="3" rc-code="7" op-status="0" interval="0" last-run="1380832563" last-rc-change="1380832563" exec-time="490" queue-time="0" op-digest="570cd25774b1ead32cb1840813adbe21"/></div>
<div> </lrm_resource></div><div> <lrm_resource id="nfs_fs" type="Filesystem" class="ocf" provider="heartbeat"></div><div> <lrm_rsc_op id="nfs_fs_last_0" operation_key="nfs_fs_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="8:14:7:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" transition-magic="0:7;8:14:7:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" call-id="2" rc-code="7" op-status="0" interval="0" last-run="1380832563" last-rc-change="1380832563" exec-time="690" queue-time="0" op-digest="c0a40c0015f71e8b20b5359e12f25eb5"/></div>
<div> </lrm_resource></div><div> <lrm_resource id="drbd_r0:1" type="drbd" class="ocf" provider="heartbeat"></div><div> <lrm_rsc_op id="drbd_r0:1_last_0" operation_key="drbd_r0:1_start_0" operation="start" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="26:14:0:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" transition-magic="0:0;26:14:0:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" call-id="6" rc-code="0" op-status="0" interval="0" last-run="1380832564" last-rc-change="1380832564" exec-time="840" queue-time="0" op-digest="c0e018b73fdf522b6cdd355e125af15e"/></div>
<div> <lrm_rsc_op id="drbd_r0:1_monitor_60000" operation_key="drbd_r0:1_monitor_60000" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="25:15:0:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" transition-magic="0:0;25:15:0:1b4a3ae4-b013-45d1-a865-9b3b3deecf5f" call-id="8" rc-code="0" op-status="0" interval="60000" last-rc-change="1380832565" exec-time="310" queue-time="10" op-digest="6f5adcd7f1211cdfc17850827b8582c5"/></div>
<div> </lrm_resource></div><div> </lrm_resources></div><div> </lrm></div><div> <transient_attributes id="test-vm-2"></div><div> <instance_attributes id="status-test-vm-2"></div>
<div> <nvpair id="status-test-vm-2-probe_complete" name="probe_complete" value="true"/></div><div> <nvpair id="status-test-vm-2-master-drbd_r0.1" name="master-drbd_r0:1" value="75"/></div>
<div> </instance_attributes></div><div> </transient_attributes></div><div> </node_state></div><div> </status></div><div></cib></div></div></div><div class="gmail_extra"><br><br>
<div class="gmail_quote">On Thu, Oct 3, 2013 at 5:06 PM, Andreas Kurz <span dir="ltr"><<a href="mailto:andreas@hastexo.com" target="_blank">andreas@hastexo.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<div class="HOEnZb"><div class="h5">On 2013-10-03 22:12, David Parker wrote:<br>
> Thanks, Andrew. The goal was to use either Pacemaker and Corosync 1.x<br>
> from the Debain packages, or use both compiled from source. So, with<br>
> the compiled version, I was hoping to avoid CMAN. However, it seems the<br>
> packaged version of Pacemaker doesn't support CMAN anyway, so it's moot.<br>
><br>
> I rebuilt my VMs from scratch, re-installed Pacemaker and Corosync from<br>
> the Debian packages, but I'm still having an odd problem. Here is the<br>
> config portion of my CIB:<br>
><br>
> <crm_config><br>
> <cluster_property_set id="cib-bootstrap-options"><br>
> <nvpair id="cib-bootstrap-options-dc-version" name="dc-version"<br>
> value="1.1.7-ee0730e13d124c3d58f00016c3376a1de5323cff"/><br>
> <nvpair id="cib-bootstrap-options-cluster-infrastructure"<br>
> name="cluster-infrastructure" value="openais"/><br>
> <nvpair id="cib-bootstrap-options-expected-quorum-votes"<br>
> name="expected-quorum-votes" value="2"/><br>
> <nvpair id="cib-bootstrap-options-stonith-enabled"<br>
> name="stonith-enabled" value="false"/><br>
> <nvpair id="cib-bootstrap-options-no-quorum-policy"<br>
> name="no-quorum-policy" value="ignore"/><br>
> </cluster_property_set><br>
> </crm_config><br>
><br>
> I set no-quorum-policy=ignore based on the documentation example for a<br>
> 2-node cluster. But when Pacemaker starts up on the first node, the<br>
> DRBD resource is in slave mode and none of the other resources are<br>
> started (they depend on DRBD being master), and I see these lines in the<br>
> log:<br>
><br>
> Oct 03 15:29:18 test-vm-1 pengine: [3742]: notice: unpack_config: On<br>
> loss of CCM Quorum: Ignore<br>
> Oct 03 15:29:18 test-vm-1 pengine: [3742]: notice: LogActions: Start<br>
> nfs_fs (test-vm-1 - blocked)<br>
> Oct 03 15:29:18 test-vm-1 pengine: [3742]: notice: LogActions: Start<br>
> nfs_ip (test-vm-1 - blocked)<br>
> Oct 03 15:29:18 test-vm-1 pengine: [3742]: notice: LogActions: Start<br>
> nfs (test-vm-1 - blocked)<br>
> Oct 03 15:29:18 test-vm-1 pengine: [3742]: notice: LogActions: Start<br>
> drbd_r0:0 (test-vm-1)<br>
><br>
> I'm assuming the NFS resources show "blocked" because the resource they<br>
> depend on is not in the correct state.<br>
><br>
> Even when the second node (test-vm-2) comes online, the state of these<br>
> resources does not change. I can shutdown and re-start Pacemaker over<br>
> and over again on test-vm-2, but nothihg changes. However... and this<br>
> is where it gets weird... if I shut down Pacemaker on test-vm-1, then<br>
> all of the resources immediately fail over to test-vm-2 and start<br>
> correctly. And I see these lines in the log:<br>
><br>
> Oct 03 15:44:26 test-vm-1 pengine: [5305]: notice: unpack_config: On<br>
> loss of CCM Quorum: Ignore<br>
> Oct 03 15:44:26 test-vm-1 pengine: [5305]: notice: stage6: Scheduling<br>
> Node test-vm-1 for shutdown<br>
> Oct 03 15:44:26 test-vm-1 pengine: [5305]: notice: LogActions: Start<br>
> nfs_fs (test-vm-2)<br>
> Oct 03 15:44:26 test-vm-1 pengine: [5305]: notice: LogActions: Start<br>
> nfs_ip (test-vm-2)<br>
> Oct 03 15:44:26 test-vm-1 pengine: [5305]: notice: LogActions: Start<br>
> nfs (test-vm-2)<br>
> Oct 03 15:44:26 test-vm-1 pengine: [5305]: notice: LogActions: Stop<br>
> drbd_r0:0 (test-vm-1)<br>
> Oct 03 15:44:26 test-vm-1 pengine: [5305]: notice: LogActions: Promote<br>
> drbd_r0:1 (Slave -> Master test-vm-2)<br>
><br>
> After that, I can generally move the resources back and forth, and even<br>
> fail them over by hard-failing a node, without any problems. The real<br>
> problem is that this isn't consistent, though. Every once in a while,<br>
> I'll hard-fail a node and the other one will go into this "stuck" state<br>
> where Pacemaker knows it lost a node, but DRBD will stay in slave mode<br>
> and the other resources will never start. It seems to happen quite<br>
> randomly. Then, even if I restart Pacemaker on both nodes, or reboot<br>
> them altogether, I run into the startup issue mentioned previously.<br>
><br>
> Any ideas?<br>
<br>
</div></div>Yes, share your complete resource configuration ;-)<br>
<br>
Regards,<br>
Andreas<br>
<div class="im"><br>
><br>
> Thanks,<br>
> Dave<br>
><br>
><br>
><br>
> On Wed, Oct 2, 2013 at 1:01 AM, Andrew Beekhof <<a href="mailto:andrew@beekhof.net">andrew@beekhof.net</a><br>
</div><div class="im">> <mailto:<a href="mailto:andrew@beekhof.net">andrew@beekhof.net</a>>> wrote:<br>
><br>
><br>
> On 02/10/2013, at 5:24 AM, David Parker <<a href="mailto:dparker@utica.edu">dparker@utica.edu</a><br>
</div><div class="im">> <mailto:<a href="mailto:dparker@utica.edu">dparker@utica.edu</a>>> wrote:<br>
><br>
> > Thanks, I did a little Googling and found the git repository for pcs.<br>
><br>
> pcs won't help you rebuild pacemaker with cman support (or corosync<br>
> 2.x support) turned on though.<br>
><br>
><br>
> > Is there any way to make a two-node cluster work with the stock<br>
> Debian packages, though? It seems odd that this would be impossible.<br>
><br>
> it really depends how the debian maintainers built pacemaker.<br>
> by the sounds of it, it only supports the pacemaker plugin mode for<br>
> corosync 1.x<br>
><br>
> ><br>
> ><br>
> > On Tue, Oct 1, 2013 at 3:16 PM, Larry Brigman<br>
</div><div class="im">> <<a href="mailto:larry.brigman@gmail.com">larry.brigman@gmail.com</a> <mailto:<a href="mailto:larry.brigman@gmail.com">larry.brigman@gmail.com</a>>> wrote:<br>
> > pcs is another package you will need to install.<br>
> ><br>
> > On Oct 1, 2013 9:04 AM, "David Parker" <<a href="mailto:dparker@utica.edu">dparker@utica.edu</a><br>
</div><div><div class="h5">> <mailto:<a href="mailto:dparker@utica.edu">dparker@utica.edu</a>>> wrote:<br>
> > Hello,<br>
> ><br>
> > Sorry for the delay in my reply. I've been doing a lot of<br>
> experimentation, but so far I've had no luck.<br>
> ><br>
> > Thanks for the suggestion, but it seems I'm not able to use CMAN.<br>
> I'm running Debian Wheezy with Corosync and Pacemaker installed via<br>
> apt-get. When I installed CMAN and set up a cluster.conf file,<br>
> Pacemaker refused to start and said that CMAN was not supported.<br>
> When CMAN is not installed, Pacemaker starts up fine, but I see<br>
> these lines in the log:<br>
> ><br>
> > Sep 30 23:36:29 test-vm-1 crmd: [6941]: ERROR:<br>
> init_quorum_connection: The Corosync quorum API is not supported in<br>
> this build<br>
> > Sep 30 23:36:29 test-vm-1 pacemakerd: [6932]: ERROR:<br>
> pcmk_child_exit: Child process crmd exited (pid=6941, rc=100)<br>
> > Sep 30 23:36:29 test-vm-1 pacemakerd: [6932]: WARN:<br>
> pcmk_child_exit: Pacemaker child process crmd no longer wishes to be<br>
> respawned. Shutting ourselves down.<br>
> ><br>
> > So, then I checked to see which plugins are supported:<br>
> ><br>
> > # pacemakerd -F<br>
> > Pacemaker 1.1.7 (Build: ee0730e13d124c3d58f00016c3376a1de5323cff)<br>
> > Supporting: generated-manpages agent-manpages ncurses heartbeat<br>
> corosync-plugin snmp libesmtp<br>
> ><br>
> > Am I correct in believing that this Pacemaker package has been<br>
> compiled without support for any quorum API? If so, does anyone<br>
> know if there is a Debian package which has the correct support?<br>
> ><br>
> > I also tried compiling LibQB, Corosync and Pacemaker from source<br>
> via git, following the instructions documented here:<br>
> ><br>
> > <a href="http://clusterlabs.org/wiki/SourceInstall" target="_blank">http://clusterlabs.org/wiki/SourceInstall</a><br>
> ><br>
> > I was hopeful that this would work, because as I understand it,<br>
> Corosync 2.x no longer uses CMAN. Everything compiled and started<br>
> fine, but the compiled version of Pacemaker did not include either<br>
> the 'crm' or 'pcs' commands. Do I need to install something else in<br>
> order to get one of these?<br>
> ><br>
> > Any and all help is greatly appreciated!<br>
> ><br>
> > Thanks,<br>
> > Dave<br>
> ><br>
> ><br>
> > On Wed, Sep 25, 2013 at 6:08 AM, David Lang <<a href="mailto:david@lang.hm">david@lang.hm</a><br>
</div></div><div class="im">> <mailto:<a href="mailto:david@lang.hm">david@lang.hm</a>>> wrote:<br>
> > the cluster is trying to reach a quarum (the majority of the nodes<br>
> talking to each other) and that is never going to happen with only<br>
> one node. so you have to disable this.<br>
> ><br>
> > try putting<br>
> > <cman two_node="1" expected_votes="1" transport="udpu"/><br>
> > in your cluster.conf<br>
> ><br>
> > David Lang<br>
> ><br>
> > On Tue, 24 Sep 2013, David Parker wrote:<br>
> ><br>
> > Date: Tue, 24 Sep 2013 11:48:59 -0400<br>
</div>> > From: David Parker <<a href="mailto:dparker@utica.edu">dparker@utica.edu</a> <mailto:<a href="mailto:dparker@utica.edu">dparker@utica.edu</a>>><br>
<div class="im">> > Reply-To: The Pacemaker cluster resource manager<br>
> > <<a href="mailto:pacemaker@oss.clusterlabs.org">pacemaker@oss.clusterlabs.org</a><br>
</div>> <mailto:<a href="mailto:pacemaker@oss.clusterlabs.org">pacemaker@oss.clusterlabs.org</a>>><br>
<div class="im">> > To: The Pacemaker cluster resource manager<br>
</div>> <<a href="mailto:pacemaker@oss.clusterlabs.org">pacemaker@oss.clusterlabs.org</a> <mailto:<a href="mailto:pacemaker@oss.clusterlabs.org">pacemaker@oss.clusterlabs.org</a>>><br>
<div class="im">> > Subject: Re: [Pacemaker] Corosync won't recover when a node fails<br>
> ><br>
> ><br>
> > I forgot to mention, OS is Debian Wheezy 64-bit, Corosync and<br>
> Pacemaker<br>
> > installed from packages via apt-get, and there are no local<br>
> firewall rules<br>
> > in place:<br>
> ><br>
> > # iptables -L<br>
> > Chain INPUT (policy ACCEPT)<br>
> > target prot opt source destination<br>
> ><br>
> > Chain FORWARD (policy ACCEPT)<br>
> > target prot opt source destination<br>
> ><br>
> > Chain OUTPUT (policy ACCEPT)<br>
> > target prot opt source destination<br>
> ><br>
> ><br>
> > On Tue, Sep 24, 2013 at 11:41 AM, David Parker <<a href="mailto:dparker@utica.edu">dparker@utica.edu</a><br>
</div><div><div class="h5">> <mailto:<a href="mailto:dparker@utica.edu">dparker@utica.edu</a>>> wrote:<br>
> ><br>
> > Hello,<br>
> ><br>
> > I have a 2-node cluster using Corosync and Pacemaker, where the<br>
> nodes are<br>
> > actually to VirtualBox VMs on the same physical machine. I have some<br>
> > resources set up in Pacemaker, and everything works fine if I move<br>
> them in<br>
> > a controlled way with the "crm_resource -r <resource> --move<br>
> --node <node>"<br>
> > command.<br>
> ><br>
> > However, when I hard-fail one of the nodes via the "poweroff"<br>
> command in<br>
> > Virtual Box, which "pulls the plug" on the VM, the resources do<br>
> not move,<br>
> > and I see the following output in the log on the remaining node:<br>
> ><br>
> > Sep 24 11:20:30 corosync [TOTEM ] The token was lost in the<br>
> OPERATIONAL<br>
> > state.<br>
> > Sep 24 11:20:30 corosync [TOTEM ] A processor failed, forming new<br>
> > configuration.<br>
> > Sep 24 11:20:30 corosync [TOTEM ] entering GATHER state from 2.<br>
> > Sep 24 11:20:31 test-vm-2 lrmd: [2503]: debug: rsc:drbd_r0:0<br>
> monitor[31]<br>
> > (pid 8495)<br>
> > drbd[8495]: 2013/09/24_11:20:31 WARNING: This resource agent is<br>
> > deprecated and may be removed in a future release. See the man<br>
> page for<br>
> > details. To suppress this warning, set the "ignore_deprecation"<br>
> resource<br>
> > parameter to true.<br>
> > drbd[8495]: 2013/09/24_11:20:31 WARNING: This resource agent is<br>
> > deprecated and may be removed in a future release. See the man<br>
> page for<br>
> > details. To suppress this warning, set the "ignore_deprecation"<br>
> resource<br>
> > parameter to true.<br>
> > drbd[8495]: 2013/09/24_11:20:31 DEBUG: r0: Calling drbdadm -c<br>
> > /etc/drbd.conf role r0<br>
> > drbd[8495]: 2013/09/24_11:20:31 DEBUG: r0: Exit code 0<br>
> > drbd[8495]: 2013/09/24_11:20:31 DEBUG: r0: Command output:<br>
> > Secondary/Primary<br>
> > drbd[8495]: 2013/09/24_11:20:31 DEBUG: r0: Calling drbdadm -c<br>
> > /etc/drbd.conf cstate r0<br>
> > drbd[8495]: 2013/09/24_11:20:31 DEBUG: r0: Exit code 0<br>
> > drbd[8495]: 2013/09/24_11:20:31 DEBUG: r0: Command output:<br>
> Connected<br>
> > drbd[8495]: 2013/09/24_11:20:31 DEBUG: r0 status:<br>
> Secondary/Primary<br>
> > Secondary Primary Connected<br>
> > Sep 24 11:20:31 test-vm-2 lrmd: [2503]: info: operation monitor[31] on<br>
> > drbd_r0:0 for client 2506: pid 8495 exited with return code 0<br>
> > Sep 24 11:20:32 corosync [TOTEM ] entering GATHER state from 0.<br>
> > Sep 24 11:20:34 corosync [TOTEM ] The consensus timeout expired.<br>
> > Sep 24 11:20:34 corosync [TOTEM ] entering GATHER state from 3.<br>
> > Sep 24 11:20:36 corosync [TOTEM ] The consensus timeout expired.<br>
> > Sep 24 11:20:36 corosync [TOTEM ] entering GATHER state from 3.<br>
> > Sep 24 11:20:38 corosync [TOTEM ] The consensus timeout expired.<br>
> > Sep 24 11:20:38 corosync [TOTEM ] entering GATHER state from 3.<br>
> > Sep 24 11:20:40 corosync [TOTEM ] The consensus timeout expired.<br>
> > Sep 24 11:20:40 corosync [TOTEM ] entering GATHER state from 3.<br>
> > Sep 24 11:20:40 corosync [TOTEM ] Totem is unable to form a cluster<br>
> > because of an operating system or network fault. The most common<br>
> cause of<br>
> > this message is that the local firewall is configured improperly.<br>
> > Sep 24 11:20:43 corosync [TOTEM ] The consensus timeout expired.<br>
> > Sep 24 11:20:43 corosync [TOTEM ] entering GATHER state from 3.<br>
> > Sep 24 11:20:43 corosync [TOTEM ] Totem is unable to form a cluster<br>
> > because of an operating system or network fault. The most common<br>
> cause of<br>
> > this message is that the local firewall is configured improperly.<br>
> > Sep 24 11:20:45 corosync [TOTEM ] The consensus timeout expired.<br>
> > Sep 24 11:20:45 corosync [TOTEM ] entering GATHER state from 3.<br>
> > Sep 24 11:20:45 corosync [TOTEM ] Totem is unable to form a cluster<br>
> > because of an operating system or network fault. The most common<br>
> cause of<br>
> > this message is that the local firewall is configured improperly.<br>
> > Sep 24 11:20:47 corosync [TOTEM ] The consensus timeout expired.<br>
> ><br>
> > Those last 3 messages just repeat over and over, the cluster never<br>
> > recovers, and the resources never move. "crm_mon" reports that the<br>
> > resources are still running on the dead node, and shows no<br>
> indication that<br>
> > anything has gone wrong.<br>
> ><br>
> > Does anyone know what the issue could be? My expectation was that the<br>
> > remaining node would become the sole member of the cluster, take<br>
> over the<br>
> > resources, and everything would keep running.<br>
> ><br>
> > For reference, my corosync.conf file is below:<br>
> ><br>
> > compatibility: whitetank<br>
> ><br>
> > totem {<br>
> > version: 2<br>
> > secauth: off<br>
> > interface {<br>
> > member {<br>
> > memberaddr: 192.168.25.201<br>
> > }<br>
> > member {<br>
> > memberaddr: 192.168.25.202<br>
> > }<br>
> > ringnumber: 0<br>
> > bindnetaddr: 192.168.25.0<br>
> > mcastport: 5405<br>
> > }<br>
> > transport: udpu<br>
> > }<br>
> ><br>
> > logging {<br>
> > fileline: off<br>
> > to_logfile: yes<br>
> > to_syslog: yes<br>
> > debug: on<br>
> > logfile: /var/log/cluster/corosync.log<br>
> > timestamp: on<br>
> > logger_subsys {<br>
> > subsys: AMF<br>
> > debug: on<br>
> > }<br>
> > }<br>
> ><br>
> ><br>
> > Thanks!<br>
> > Dave<br>
> ><br>
> > --<br>
> > Dave Parker<br>
> > Systems Administrator<br>
> > Utica College<br>
> > Integrated Information Technology Services<br>
> > (315) 792-3229<br>
> > Registered Linux User #408177<br>
> ><br>
> ><br>
> ><br>
> ><br>
> ><br>
> > _______________________________________________<br>
> ><br>
> > Pacemaker mailing list: <a href="mailto:Pacemaker@oss.clusterlabs.org">Pacemaker@oss.clusterlabs.org</a><br>
</div></div>> <mailto:<a href="mailto:Pacemaker@oss.clusterlabs.org">Pacemaker@oss.clusterlabs.org</a>><br>
<div class="im">> ><br>
> > <a href="http://oss.clusterlabs.org/mailman/listinfo/pacemaker" target="_blank">http://oss.clusterlabs.org/mailman/listinfo/pacemaker</a><br>
> ><br>
> ><br>
> ><br>
> > Project Home: <a href="http://www.clusterlabs.org" target="_blank">http://www.clusterlabs.org</a><br>
> ><br>
> > Getting started:<br>
> <a href="http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf" target="_blank">http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf</a><br>
> ><br>
> > Bugs: <a href="http://bugs.clusterlabs.org" target="_blank">http://bugs.clusterlabs.org</a><br>
> ><br>
> ><br>
> > _______________________________________________<br>
> > Pacemaker mailing list: <a href="mailto:Pacemaker@oss.clusterlabs.org">Pacemaker@oss.clusterlabs.org</a><br>
</div>> <mailto:<a href="mailto:Pacemaker@oss.clusterlabs.org">Pacemaker@oss.clusterlabs.org</a>><br>
<div class="im">> > <a href="http://oss.clusterlabs.org/mailman/listinfo/pacemaker" target="_blank">http://oss.clusterlabs.org/mailman/listinfo/pacemaker</a><br>
> ><br>
> > Project Home: <a href="http://www.clusterlabs.org" target="_blank">http://www.clusterlabs.org</a><br>
> > Getting started:<br>
> <a href="http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf" target="_blank">http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf</a><br>
> > Bugs: <a href="http://bugs.clusterlabs.org" target="_blank">http://bugs.clusterlabs.org</a><br>
> ><br>
> ><br>
> ><br>
> ><br>
> > --<br>
> > Dave Parker<br>
> > Systems Administrator<br>
> > Utica College<br>
> > Integrated Information Technology Services<br>
</div>> > <a href="tel:%28315%29%20792-3229" value="+13157923229">(315) 792-3229</a> <tel:%28315%29%20792-3229><br>
<div class="im">> > Registered Linux User #408177<br>
> ><br>
> > _______________________________________________<br>
> > Pacemaker mailing list: <a href="mailto:Pacemaker@oss.clusterlabs.org">Pacemaker@oss.clusterlabs.org</a><br>
</div>> <mailto:<a href="mailto:Pacemaker@oss.clusterlabs.org">Pacemaker@oss.clusterlabs.org</a>><br>
<div class="im">> > <a href="http://oss.clusterlabs.org/mailman/listinfo/pacemaker" target="_blank">http://oss.clusterlabs.org/mailman/listinfo/pacemaker</a><br>
> ><br>
> > Project Home: <a href="http://www.clusterlabs.org" target="_blank">http://www.clusterlabs.org</a><br>
> > Getting started:<br>
> <a href="http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf" target="_blank">http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf</a><br>
> > Bugs: <a href="http://bugs.clusterlabs.org" target="_blank">http://bugs.clusterlabs.org</a><br>
> ><br>
> ><br>
> > _______________________________________________<br>
> > Pacemaker mailing list: <a href="mailto:Pacemaker@oss.clusterlabs.org">Pacemaker@oss.clusterlabs.org</a><br>
</div>> <mailto:<a href="mailto:Pacemaker@oss.clusterlabs.org">Pacemaker@oss.clusterlabs.org</a>><br>
<div class="im">> > <a href="http://oss.clusterlabs.org/mailman/listinfo/pacemaker" target="_blank">http://oss.clusterlabs.org/mailman/listinfo/pacemaker</a><br>
> ><br>
> > Project Home: <a href="http://www.clusterlabs.org" target="_blank">http://www.clusterlabs.org</a><br>
> > Getting started:<br>
> <a href="http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf" target="_blank">http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf</a><br>
> > Bugs: <a href="http://bugs.clusterlabs.org" target="_blank">http://bugs.clusterlabs.org</a><br>
> ><br>
> ><br>
> ><br>
> ><br>
> > --<br>
> > Dave Parker<br>
> > Systems Administrator<br>
> > Utica College<br>
> > Integrated Information Technology Services<br>
</div>> > <a href="tel:%28315%29%20792-3229" value="+13157923229">(315) 792-3229</a> <tel:%28315%29%20792-3229><br>
<div class="im">> > Registered Linux User #408177<br>
> > _______________________________________________<br>
> > Pacemaker mailing list: <a href="mailto:Pacemaker@oss.clusterlabs.org">Pacemaker@oss.clusterlabs.org</a><br>
</div>> <mailto:<a href="mailto:Pacemaker@oss.clusterlabs.org">Pacemaker@oss.clusterlabs.org</a>><br>
<div class="im">> > <a href="http://oss.clusterlabs.org/mailman/listinfo/pacemaker" target="_blank">http://oss.clusterlabs.org/mailman/listinfo/pacemaker</a><br>
> ><br>
> > Project Home: <a href="http://www.clusterlabs.org" target="_blank">http://www.clusterlabs.org</a><br>
> > Getting started:<br>
> <a href="http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf" target="_blank">http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf</a><br>
> > Bugs: <a href="http://bugs.clusterlabs.org" target="_blank">http://bugs.clusterlabs.org</a><br>
><br>
><br>
> _______________________________________________<br>
> Pacemaker mailing list: <a href="mailto:Pacemaker@oss.clusterlabs.org">Pacemaker@oss.clusterlabs.org</a><br>
</div>> <mailto:<a href="mailto:Pacemaker@oss.clusterlabs.org">Pacemaker@oss.clusterlabs.org</a>><br>
<div class="im">> <a href="http://oss.clusterlabs.org/mailman/listinfo/pacemaker" target="_blank">http://oss.clusterlabs.org/mailman/listinfo/pacemaker</a><br>
><br>
> Project Home: <a href="http://www.clusterlabs.org" target="_blank">http://www.clusterlabs.org</a><br>
> Getting started: <a href="http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf" target="_blank">http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf</a><br>
> Bugs: <a href="http://bugs.clusterlabs.org" target="_blank">http://bugs.clusterlabs.org</a><br>
><br>
><br>
><br>
><br>
> --<br>
> Dave Parker<br>
> Systems Administrator<br>
> Utica College<br>
> Integrated Information Technology Services<br>
> <a href="tel:%28315%29%20792-3229" value="+13157923229">(315) 792-3229</a><br>
> Registered Linux User #408177<br>
><br>
><br>
</div>> This body part will be downloaded on demand.<br>
><br>
<span class="HOEnZb"><font color="#888888"><br>
<br>
--<br>
Need help with Pacemaker?<br>
<a href="http://www.hastexo.com/now" target="_blank">http://www.hastexo.com/now</a><br>
<br>
<br>
</font></span><br>_______________________________________________<br>
Pacemaker mailing list: <a href="mailto:Pacemaker@oss.clusterlabs.org">Pacemaker@oss.clusterlabs.org</a><br>
<a href="http://oss.clusterlabs.org/mailman/listinfo/pacemaker" target="_blank">http://oss.clusterlabs.org/mailman/listinfo/pacemaker</a><br>
<br>
Project Home: <a href="http://www.clusterlabs.org" target="_blank">http://www.clusterlabs.org</a><br>
Getting started: <a href="http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf" target="_blank">http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf</a><br>
Bugs: <a href="http://bugs.clusterlabs.org" target="_blank">http://bugs.clusterlabs.org</a><br>
<br></blockquote></div><br><br clear="all"><div><br></div>-- <br><div>Dave Parker</div>Systems Administrator<br>Utica College<br>Integrated Information Technology Services<br>(315) 792-3229<br>Registered Linux User #408177
</div>