[Pacemaker] Monitoring on master node not running after standby is connected

Juraj Fabo juraj.fabo at gmail.com
Thu Sep 19 11:39:01 EDT 2013


Dear all

Attached is my 2-nodes, master slave cluster configuration with master-slave
postgresql resource and some IP resources.
I've modified pgsql resource agent to log its "main" entry with the
parameter to see what operation is called.
My problem is that while the single node is running, the monitor operation
on pgsql resource is running.
Even after promoting to master.

However, when second node is connected, monitor operation is no longer
executed on master node, only on hot-standby.
I've seen very similar bug report
http://bugs.clusterlabs.org/show_bug.cgi?id=5072 which was quite some time
ago fixed.
I know that the monitor operation on MS resource must be configured twice
with different intervals to work properly 
according to
http://clusterlabs.org/doc/en-US/Pacemaker/1.1/html/Pacemaker_Explained/_monitoring_multi_state_resources.html

Here is output of "crm configure show xml"
Also patch which we used when building pacemaker from sources

pacemaker version as logged in the log file
 "main: Starting Pacemaker 1.1.10 (Build: 368c726):  ncurses libqb-logging
libqb-ipc lha-fencing nagios  corosync-native"

Please, could you advise me what else must be obeyed to have master/slave
monitoring working properly?

thank you in advance

Juraj

<?xml version="1.0" ?>
<cib admin_epoch="0" cib-last-written="Thu Sep 19 17:19:20 2013"
crm_feature_set="3.0.7" dc-uuid="1" epoch="30" have-quorum="1"
num_updates="4" update-client="crm_attribute" update-origin="ifds1"
validate-with="pacemaker-1.2">
  <configuration>
    <crm_config>
      <cluster_property_set id="cib-bootstrap-options">
        <nvpair id="cib-bootstrap-options-dc-version" name="dc-version"
value="1.1.10-368c726"/>
        <nvpair id="cib-bootstrap-options-cluster-infrastructure"
name="cluster-infrastructure" value="corosync"/>
        <nvpair id="cib-bootstrap-options-no-quorum-policy"
name="no-quorum-policy" value="ignore"/>
        <nvpair id="cib-bootstrap-options-stonith-enabled"
name="stonith-enabled" value="false"/>
        <nvpair id="cib-bootstrap-options-crmd-transition-delay"
name="crmd-transition-delay" value="0s"/>
      </cluster_property_set>
    </crm_config>
    <nodes>
      <node id="2" uname="ifds2">
        <instance_attributes id="nodes-2">
          <nvpair id="nodes-2-IFDS-pgsql-data-status"
name="IFDS-pgsql-data-status" value="STREAMING|ASYNC"/>
        </instance_attributes>
      </node>
      <node id="1" uname="ifds1">
        <instance_attributes id="nodes-1">
          <nvpair id="nodes-1-IFDS-pgsql-data-status"
name="IFDS-pgsql-data-status" value="LATEST"/>
        </instance_attributes>
      </node>
    </nodes>
    <resources>
      <clone id="clnPingd">
        <primitive class="ocf" id="IFDS-pingCheck" provider="pacemaker"
type="ping">
          <instance_attributes id="IFDS-pingCheck-instance_attributes">
            <nvpair id="IFDS-pingCheck-instance_attributes-host_list"
name="host_list" value="10.40.0.99"/>
            <nvpair id="IFDS-pingCheck-instance_attributes-name" name="name"
value="default_ping_set"/>
            <nvpair id="IFDS-pingCheck-instance_attributes-multiplier"
name="multiplier" value="100"/>
          </instance_attributes>
          <operations>
            <op id="IFDS-pingCheck-start-0s" interval="0s" name="start"
on-fail="restart" timeout="60s"/>
            <op id="IFDS-pingCheck-monitor-2s" interval="2s" name="monitor"
on-fail="restart" timeout="60s"/>
            <op id="IFDS-pingCheck-stop-0s" interval="0s" name="stop"
on-fail="ignore" timeout="60s"/>
          </operations>
        </primitive>
      </clone>
      <group id="master-group">
        <primitive class="ocf" id="IFDS-MIP1" provider="heartbeat"
type="IPaddr2">
          <instance_attributes id="IFDS-MIP1-instance_attributes">
            <nvpair id="IFDS-MIP1-instance_attributes-ip" name="ip"
value="10.40.0.70"/>
            <nvpair id="IFDS-MIP1-instance_attributes-cidr_netmask"
name="cidr_netmask" value="24"/>
            <nvpair id="IFDS-MIP1-instance_attributes-iflabel"
name="iflabel" value="mas1"/>
          </instance_attributes>
          <operations>
            <op id="IFDS-MIP1-monitor-10s" interval="10s" name="monitor"/>
          </operations>
        </primitive>
        <primitive class="ocf" id="IFDS-MIP2" provider="heartbeat"
type="IPaddr2">
          <instance_attributes id="IFDS-MIP2-instance_attributes">
            <nvpair id="IFDS-MIP2-instance_attributes-ip" name="ip"
value="10.40.0.71"/>
            <nvpair id="IFDS-MIP2-instance_attributes-cidr_netmask"
name="cidr_netmask" value="24"/>
            <nvpair id="IFDS-MIP2-instance_attributes-iflabel"
name="iflabel" value="mas2"/>
          </instance_attributes>
          <operations>
            <op id="IFDS-MIP2-monitor-10s" interval="10s" name="monitor"/>
          </operations>
        </primitive>
      </group>
      <primitive class="ocf" id="IFDS-VIP" provider="heartbeat" type="IPaddr2">
        <instance_attributes id="IFDS-VIP-instance_attributes">
          <nvpair id="IFDS-VIP-instance_attributes-ip" name="ip"
value="10.40.0.72"/>
          <nvpair id="IFDS-VIP-instance_attributes-cidr_netmask"
name="cidr_netmask" value="24"/>
          <nvpair id="IFDS-VIP-instance_attributes-iflabel" name="iflabel"
value="slav"/>
        </instance_attributes>
        <meta_attributes id="IFDS-VIP-meta_attributes">
          <nvpair id="IFDS-VIP-meta_attributes-resource-stickiness"
name="resource-stickiness" value="1"/>
        </meta_attributes>
        <operations>
          <op id="IFDS-VIP-monitor-10s" interval="10s" name="monitor"
on-fail="restart" timeout="60s"/>
        </operations>
      </primitive>
      <master id="msPostgresql">
        <meta_attributes id="msPostgresql-meta_attributes">
          <nvpair id="msPostgresql-meta_attributes-master-max"
name="master-max" value="1"/>
          <nvpair id="msPostgresql-meta_attributes-master-node-max"
name="master-node-max" value="1"/>
          <nvpair id="msPostgresql-meta_attributes-clone-max"
name="clone-max" value="2"/>
          <nvpair id="msPostgresql-meta_attributes-clone-node-max"
name="clone-node-max" value="1"/>
          <nvpair id="msPostgresql-meta_attributes-notify" name="notify"
value="true"/>
        </meta_attributes>
        <primitive class="ocf" id="IFDS-pgsql" provider="heartbeat"
type="pgsql">
          <instance_attributes id="IFDS-pgsql-instance_attributes">
            <nvpair id="IFDS-pgsql-instance_attributes-master_ip"
name="master_ip" value="10.40.0.70"/>
            <nvpair id="IFDS-pgsql-instance_attributes-node_list"
name="node_list" value="ifds1 ifds2"/>
            <nvpair id="IFDS-pgsql-instance_attributes-pgctl" name="pgctl"
value="/usr/bin/pg_ctl"/>
            <nvpair id="IFDS-pgsql-instance_attributes-psql" name="psql"
value="/usr/bin/psql"/>
            <nvpair id="IFDS-pgsql-instance_attributes-pgdata" name="pgdata"
value="/var/lib/pgsql/data/"/>
            <nvpair id="IFDS-pgsql-instance_attributes-start_opt"
name="start_opt" value="-p 5432"/>
            <nvpair id="IFDS-pgsql-instance_attributes-rep_mode"
name="rep_mode" value="async"/>
            <nvpair id="IFDS-pgsql-instance_attributes-logfile"
name="logfile" value="/var/log/ifds_ra_pgsql.log"/>
            <nvpair id="IFDS-pgsql-instance_attributes-primary_conninfo_opt"
name="primary_conninfo_opt" value="keepalives_idle=60 keepalives_interval=5
keepalives_count=5"/>
            <nvpair id="IFDS-pgsql-instance_attributes-stop_escalate"
name="stop_escalate" value="0"/>
          </instance_attributes>
          <operations>
            <op id="IFDS-pgsql-start-0s" interval="0s" name="start"
on-fail="restart" timeout="30s"/>
            <op id="IFDS-pgsql-monitor-7s" interval="7s" name="monitor"
on-fail="restart" role="Master" timeout="30s"/>
            <op id="IFDS-pgsql-monitor-8s" interval="8s" name="monitor"
on-fail="restart" timeout="30s"/>
            <op id="IFDS-pgsql-promote-0s" interval="0s" name="promote"
on-fail="restart" timeout="30s"/>
            <op id="IFDS-pgsql-demote-0s" interval="0s" name="demote"
on-fail="stop" timeout="30s"/>
            <op id="IFDS-pgsql-stop-0s" interval="0s" name="stop"
on-fail="block" timeout="30s"/>
            <op id="IFDS-pgsql-notify-0s" interval="0s" name="notify"
timeout="30s"/>
          </operations>
        </primitive>
      </master>
    </resources>
    <constraints>
      <rsc_colocation id="rsc_colocation-2" rsc="master-group"
score="INFINITY" with-rsc="msPostgresql" with-rsc-role="Master"/>
      <rsc_location id="rsc_location-1" rsc="msPostgresql">
        <rule boolean-op="or" id="rsc_location-1-rule" score="-INFINITY">
          <expression attribute="default_ping_set"
id="rsc_location-1-expression" operation="not_defined"/>
          <expression attribute="default_ping_set"
id="rsc_location-1-expression-0" operation="lt" value="100"/>
        </rule>
      </rsc_location>
      <rsc_colocation id="rsc_colocation-1" rsc="msPostgresql"
score="INFINITY" with-rsc="clnPingd"/>
      <rsc_colocation id="rsc_colocation-3" rsc="IFDS-VIP" score="-INFINITY"
with-rsc="msPostgresql" with-rsc-role="Master"/>
      <rsc_order first="msPostgresql" first-action="promote"
id="rsc_order-2" score="INFINITY" symmetrical="false" then="master-group"
then-action="start"/>
      <rsc_order first="msPostgresql" first-action="demote" id="rsc_order-3"
score="0" symmetrical="false" then="master-group" then-action="stop"/>
      <rsc_order first="clnPingd" id="rsc_order-1" score="0"
symmetrical="false" then="msPostgresql"/>
    </constraints>
    <rsc_defaults>
      <meta_attributes id="rsc-options">
        <nvpair id="rsc-options-resource-stickiness"
name="resource-stickiness" value="INFINITY"/>
        <nvpair id="rsc-options-migration-threshold"
name="migration-threshold" value="1"/>
      </meta_attributes>
    </rsc_defaults>
  </configuration>
</cib>

diff -urp pacemaker-Pacemaker-1.1.10.z0/crmd/lrm.c
pacemaker-Pacemaker-1.1.10/crmd/lrm.c
--- pacemaker-Pacemaker-1.1.10.z0/crmd/lrm.c    2013-07-26
00:02:31.000000000 +0000
+++ pacemaker-Pacemaker-1.1.10/crmd/lrm.c       2013-08-27
10:10:57.000000000 +0000
@@ -340,7 +340,7 @@ lrm_state_verify_stopped(lrm_state_t * l
             lrm_state->pending_ops, stop_recurring_actions, lrm_state);

         crm_notice("Stopped %u recurring operations at %s (%u ops remaining)",
-                   g_hash_table_size(lrm_state->pending_ops), removed, when);
+                   g_hash_table_size(lrm_state->pending_ops), when, removed);
     }

     if (lrm_state->pending_ops) {
diff -urp pacemaker-Pacemaker-1.1.10.z0/lib/common/logging.c
pacemaker-Pacemaker-1.1.10/lib/common/logging.c
--- pacemaker-Pacemaker-1.1.10.z0/lib/common/logging.c  2013-07-26
00:02:31.000000000 +0000
+++ pacemaker-Pacemaker-1.1.10/lib/common/logging.c     2013-08-26
13:29:56.000000000 +0000
@@ -632,10 +632,10 @@ crm_log_init(const char *entity, int lev
     g_log_set_always_fatal((GLogLevelFlags) 0); /*value out of range */

     if (facility == NULL) {
-        facility = "daemon";
+        facility = "local3";

     } else if (safe_str_eq(facility, "none")) {
-        facility = "daemon";
+        facility = "local3";
         quiet = TRUE;
     }

diff -urp pacemaker-Pacemaker-1.1.10.z0/mcp/corosync.c
pacemaker-Pacemaker-1.1.10/mcp/corosync.c
--- pacemaker-Pacemaker-1.1.10.z0/mcp/corosync.c        2013-07-26
00:02:31.000000000 +0000
+++ pacemaker-Pacemaker-1.1.10/mcp/corosync.c   2013-08-26
13:31:38.000000000 +0000
@@ -407,7 +407,7 @@ mcp_read_config(void)
     } else {
         get_config_opt(config, local_handle, KEY_PREFIX "to_syslog",
&logging_to_syslog, "on");
         get_config_opt(config, local_handle, KEY_PREFIX "syslog_facility",
&logging_syslog_facility,
-                       "daemon");
+                       "local3");
     }

 #if HAVE_CONFDB











More information about the Pacemaker mailing list