Hi,<br>It seams that it happens every time PE wants to check the conf<br>09:23:55 crmd: [3473]: info: crm_timer_popped: PEngine Recheck Timer (I_PE_CALC) just popped!<br><br>and then check_rsc_parameters() wants to reset my resources<br>
<br>09:23:55 pengine: [3979]: notice: check_rsc_parameters: Forcing restart of pbx_02 on node-02, provider changed: heartbeat -> <null><br>09:23:55 pengine: [3979]: notice: DeleteRsc: Removing pbx_02 from node-02<br>
09:23:55 pengine: [3979]: notice: check_rsc_parameters: Forcing restart of pbx_01 on node-01, provider changed: heartbeat -> <null><br><br>looking at the code I can't conclude where the issue could be, in the actual conf or I am hitting a bug<br>
static gboolean<br>check_rsc_parameters(resource_t *rsc, node_t *node, xmlNode *rsc_entry,<br> pe_working_set_t *data_set)<br>{<br> int attr_lpc = 0;<br> gboolean force_restart = FALSE;<br> gboolean delete_resource = FALSE;<br>
<br> const char *value = NULL;<br> const char *old_value = NULL;<br> const char *attr_list[] = {<br> XML_ATTR_TYPE,<br> XML_AGENT_ATTR_CLASS,<br> XML_AGENT_ATTR_PROVIDER<br> };<br><br> for(; attr_lpc < DIMOF(attr_list); attr_lpc++) {<br>
value = crm_element_value(rsc->xml, attr_list[attr_lpc]);<br> old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]);<br> if(value == old_value /* ie. NULL */<br> || crm_str_eq(value, old_value, TRUE)) {<br>
continue;<br> }<br><br> force_restart = TRUE;<br> crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s",<br> rsc->id, node->details->uname, attr_list[attr_lpc],<br>
crm_str(old_value), crm_str(value));<br> }<br> if(force_restart) {<br> /* make sure the restart happens */<br> stop_action(rsc, node, FALSE);<br> set_bit(rsc->flags, pe_rsc_start_pending);<br>
delete_resource = TRUE;<br> }<br> return delete_resource;<br>}<br><br><br><div class="gmail_quote">On 1 October 2010 09:13, Pavlos Parissis <span dir="ltr"><<a href="mailto:pavlos.parissis@gmail.com">pavlos.parissis@gmail.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin: 0pt 0pt 0pt 0.8ex; border-left: 1px solid rgb(204, 204, 204); padding-left: 1ex;">Hi<br>Could be related to a possible bug mentioned here[1]?<br><br>BTW here is the conf of pacemaker<br>
node $id="b8ad13a6-8a6e-4304-a4a1-8f69fa735100" node-02<br>node $id="d5557037-cf8f-49b7-95f5-c264927a0c76" node-01<br>
node $id="e5195d6b-ed14-4bb3-92d3-9105543f9251" node-03<br>primitive drbd_01 ocf:linbit:drbd \<br> params drbd_resource="drbd_pbx_service_1" \<br> op monitor interval="30s" \<br>
op start interval="0" timeout="240s" \<br> op stop interval="0" timeout="120s"<br>primitive drbd_02 ocf:linbit:drbd \<br> params drbd_resource="drbd_pbx_service_2" \<br>
op monitor interval="30s" \<br> op start interval="0" timeout="240s" \<br> op stop interval="0" timeout="120s"<br>primitive fs_01 ocf:heartbeat:Filesystem \<br>
params device="/dev/drbd1" directory="/pbx_service_01" fstype="ext3" \<br> meta migration-threshold="3" failure-timeout="60" \<br> op monitor interval="20s" timeout="40s" OCF_CHECK_LEVEL="20" \<br>
op start interval="0" timeout="60s" \<br> op stop interval="0" timeout="60s"<br>primitive fs_02 ocf:heartbeat:Filesystem \<br> params device="/dev/drbd2" directory="/pbx_service_02" fstype="ext3" \<br>
meta migration-threshold="3" failure-timeout="60" \<br> op monitor interval="20s" timeout="40s" OCF_CHECK_LEVEL="20" \<br> op start interval="0" timeout="60s" \<br>
op stop interval="0" timeout="60s"<br>primitive ip_01 ocf:heartbeat:IPaddr2 \<br> params ip="192.168.78.10" cidr_netmask="24" broadcast="192.168.78.255" \<br>
meta failure-timeout="120" migration-threshold="3" \<br> op monitor interval="5s"<br>primitive ip_02 ocf:heartbeat:IPaddr2 \<br> params ip="192.168.78.20" cidr_netmask="24" broadcast="192.168.78.255" \<br>
op monitor interval="5s"<br>primitive pbx_01 lsb:test-01 \<br> meta failure-timeout="60" migration-threshold="3" target-role="Started" \<br> op monitor interval="20s" \<br>
op start interval="0" timeout="60s" \<br> op stop interval="0" timeout="60s"<br>primitive pbx_02 lsb:test-02 \<br> meta failure-timeout="60" migration-threshold="3" target-role="Started" \<br>
op monitor interval="20s" \<br> op start interval="0" timeout="60s" \<br> op stop interval="0" timeout="60s"<br>group pbx_service_01 ip_01 fs_01 pbx_01 \<br>
meta target-role="Started"<br>group pbx_service_02 ip_02 fs_02 pbx_02 \<br> meta target-role="Started"<br>ms ms-drbd_01 drbd_01 \<br> meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" target-role="Started"<br>
ms ms-drbd_02 drbd_02 \<br> meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" target-role="Started"<br>location PrimaryNode-drbd_01 ms-drbd_01 100: node-01<br>
location PrimaryNode-drbd_02 ms-drbd_02 100: node-02<br>location PrimaryNode-pbx_service_01 pbx_service_01 200: node-01<br>location PrimaryNode-pbx_service_02 pbx_service_02 200: node-02<br>location SecondaryNode-drbd_01 ms-drbd_01 0: node-03<br>
location SecondaryNode-drbd_02 ms-drbd_02 0: node-03<br>location SecondaryNode-pbx_service_01 pbx_service_01 10: node-03<br>location SecondaryNode-pbx_service_02 pbx_service_02 10: node-03<br>colocation fs_01-on-drbd_01 inf: fs_01 ms-drbd_01:Master<br>
colocation fs_02-on-drbd_02 inf: fs_02 ms-drbd_02:Master<br>order pbx_service_01-after-drbd_01 inf: ms-drbd_01:promote pbx_service_01:start<br>order pbx_service_02-after-drbd_02 inf: ms-drbd_02:promote pbx_service_02:start<br>
property $id="cib-bootstrap-options" \<br> dc-version="1.0.9-89bd754939df5150de7cd76835f98fe90851b677" \<br> cluster-infrastructure="Heartbeat" \<br> stonith-enabled="false" \<br>
symmetric-cluster="false" \<br> last-lrm-refresh="1285323745"<br>rsc_defaults $id="rsc-options" \<br><br>Cheers,<br>Pavlos<br><br><br><br><br>[1] <a href="http://oss.clusterlabs.org/pipermail/pacemaker/2010-September/007624.html" target="_blank">http://oss.clusterlabs.org/pipermail/pacemaker/2010-September/007624.html</a><br>
</blockquote></div><br>