[Pacemaker] RFC: What part of the XML configuration do you hate the most?

Andrew Beekhof beekhof at gmail.com
Thu Nov 27 07:55:43 UTC 2008


I'm going to fix this properly today.

On Nov 27, 2008, at 8:27 AM, Satomi TANIGUCHI wrote:

> Hi Andrew,
>
> I found another behavior that is caused because the cluster forgets  
> the resource is supposed to stay stopped.
>
> For example, in the case of a node which has primitive and master/ 
> slave resource.
> Their settings of on-fail is "standby".
> When the master/slave resource is failed, all resources on failed  
> node are going to stop. And master/slave resource's fail-count is  
> increased.
> But then, only primitive resource re-starts on failed node because  
> its fail-count is not be increased and the cluster forgets the  
> resource is supposed to stay stopped...
>
> When F/O occurs,
> in the case of _not_ master/slave resource,
> pengine creates one graph to stop and restart the resource.
> And in the case of master/slave resource, it creates a graph 2 times.
> One is for the resource's stop-process and another is for restart- 
> process.
> And when it creates a graph for restart-process,
> no one remembers that resources are supposed to stay stopped on  
> failed node.
>
> This behavior is same as (or similar to) what you are worried, isn't  
> it?
>
> To avoid this behavior, it requires to update the status of a node  
> before restart-process.
> On trial, I created a patch (for pacemaker-dev 366b14d79780).
> And I attached the graph with patched pacemaker.
> It's not a "general" way, just for reference...
>
>
> Regards,
> Satomi TANIGUCHI
> diff -urN pacemaker-dev/crmd/te_actions.c pacemaker-dev.mod/crmd/ 
> te_actions.c
> --- pacemaker-dev/crmd/te_actions.c	2008-11-26 10:47:46.000000000  
> +0900
> +++ pacemaker-dev.mod/crmd/te_actions.c	2008-11-26  
> 10:48:47.000000000 +0900
> @@ -175,6 +175,42 @@
> 	return TRUE;
> }
>
> +static gboolean
> +te_standby_node(crm_graph_t *graph, crm_action_t *action)
> +{
> +	const char *id = NULL;
> +	const char *uuid = NULL;
> +	const char *target = NULL;
> +
> +	id = ID(action->xml);
> +	target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
> +	uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
> +
> +	CRM_CHECK(id != NULL,
> +		  crm_log_xml_warn(action->xml, "BadAction");
> +		  return FALSE);
> +	CRM_CHECK(uuid != NULL,
> +		  crm_log_xml_warn(action->xml, "BadAction");
> +		  return FALSE);
> +	CRM_CHECK(target != NULL,
> +		  crm_log_xml_warn(action->xml, "BadAction");
> +		  return FALSE);
> +
> +	te_log_action(LOG_INFO,
> +		      "Executing standby operation (%s) on %s", id, target);
> +
> +	if (cib_ok > set_standby(fsa_cib_conn, uuid, XML_CIB_TAG_NODES,  
> "on")) {
> +		crm_err("Cannot standby %s: set_standby() call failed.", target);
> +	}
> +
> +	crm_info("Skipping wait for %d", action->id);
> +	action->confirmed = TRUE;
> +	update_graph(graph, action);
> +	trigger_graph();
> +
> +	return TRUE;
> +}
> +
> static int get_target_rc(crm_action_t *action)
> {
> 	const char *target_rc_s = crm_meta_value(action->params,  
> XML_ATTR_TE_TARGET_RC);
> @@ -500,7 +536,8 @@
> 	te_pseudo_action,
> 	te_rsc_command,
> 	te_crm_command,
> -	te_fence_node
> +	te_fence_node,
> +	te_standby_node
> };
>
> void
> diff -urN pacemaker-dev/include/crm/crm.h pacemaker-dev.mod/include/ 
> crm/crm.h
> --- pacemaker-dev/include/crm/crm.h	2008-11-26 10:47:46.000000000  
> +0900
> +++ pacemaker-dev.mod/include/crm/crm.h	2008-11-26  
> 10:48:47.000000000 +0900
> @@ -146,6 +146,7 @@
> #define CRM_OP_SHUTDOWN_REQ	"req_shutdown"
> #define CRM_OP_SHUTDOWN 	"do_shutdown"
> #define CRM_OP_FENCE	 	"stonith"
> +#define CRM_OP_STANDBY		"standby"
> #define CRM_OP_EVENTCC		"event_cc"
> #define CRM_OP_TEABORT		"te_abort"
> #define CRM_OP_TEABORTED	"te_abort_confirmed" /* we asked */
> diff -urN pacemaker-dev/include/crm/pengine/common.h pacemaker- 
> dev.mod/include/crm/pengine/common.h
> --- pacemaker-dev/include/crm/pengine/common.h	2008-11-26  
> 10:47:46.000000000 +0900
> +++ pacemaker-dev.mod/include/crm/pengine/common.h	2008-11-26  
> 10:48:47.000000000 +0900
> @@ -52,6 +52,7 @@
> 	action_demote,
> 	action_demoted,
> 	shutdown_crm,
> +	standby_node,
> 	stonith_node
> };
>
> diff -urN pacemaker-dev/include/crm/pengine/status.h pacemaker- 
> dev.mod/include/crm/pengine/status.h
> --- pacemaker-dev/include/crm/pengine/status.h	2008-11-26  
> 10:47:46.000000000 +0900
> +++ pacemaker-dev.mod/include/crm/pengine/status.h	2008-11-26  
> 10:48:47.000000000 +0900
> @@ -104,6 +104,7 @@
> 		const char *uname;
> 		gboolean online;
> 		gboolean standby;
> +		gboolean action_standby;
> 		gboolean pending;
> 		gboolean unclean;
> 		gboolean shutdown;
> diff -urN pacemaker-dev/include/crm/transition.h pacemaker-dev.mod/ 
> include/crm/transition.h
> --- pacemaker-dev/include/crm/transition.h	2008-11-26  
> 10:47:46.000000000 +0900
> +++ pacemaker-dev.mod/include/crm/transition.h	2008-11-26  
> 10:48:47.000000000 +0900
> @@ -115,6 +115,7 @@
> 		gboolean (*rsc)(crm_graph_t *graph, crm_action_t *action);
> 		gboolean (*crmd)(crm_graph_t *graph, crm_action_t *action);
> 		gboolean (*stonith)(crm_graph_t *graph, crm_action_t *action);
> +		gboolean (*standby)(crm_graph_t *graph, crm_action_t *action);
> } crm_graph_functions_t;
>
> enum transition_status {
> diff -urN pacemaker-dev/lib/pengine/common.c pacemaker-dev.mod/lib/ 
> pengine/common.c
> --- pacemaker-dev/lib/pengine/common.c	2008-11-26 10:47:46.000000000  
> +0900
> +++ pacemaker-dev.mod/lib/pengine/common.c	2008-11-26  
> 10:48:47.000000000 +0900
> @@ -178,6 +178,8 @@
> 		return shutdown_crm;
> 	} else if(safe_str_eq(task, CRM_OP_FENCE)) {
> 		return stonith_node;
> +	} else if(safe_str_eq(task, CRM_OP_STANDBY)) {
> +		return standby_node;
> 	} else if(safe_str_eq(task, CRMD_ACTION_STATUS)) {
> 		return monitor_rsc;
> 	} else if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) {
> @@ -245,6 +247,9 @@
> 		case stonith_node:
> 			result = CRM_OP_FENCE;
> 			break;
> +		case standby_node:
> +			result = CRM_OP_STANDBY;
> +			break;
> 		case monitor_rsc:
> 			result = CRMD_ACTION_STATUS;
> 			break;
> diff -urN pacemaker-dev/lib/pengine/unpack.c pacemaker-dev.mod/lib/ 
> pengine/unpack.c
> --- pacemaker-dev/lib/pengine/unpack.c	2008-11-26 10:47:46.000000000  
> +0900
> +++ pacemaker-dev.mod/lib/pengine/unpack.c	2008-11-26  
> 10:48:47.000000000 +0900
> @@ -240,6 +240,7 @@
> 			 */
> 			new_node->details->unclean = TRUE;
> 		}
> +		new_node->details->action_standby = FALSE;
> 		
> 		if(type == NULL
> 		   || safe_str_eq(type, "member")
> @@ -832,7 +833,7 @@
> 			stop_action(rsc, node, FALSE);
> 				
> 		} else if(on_fail == action_fail_standby) {
> -			node->details->standby = TRUE;
> +			node->details->action_standby = TRUE;
>
> 		} else if(on_fail == action_fail_block) {
> 			/* is_managed == FALSE will prevent any
> diff -urN pacemaker-dev/lib/transition/graph.c pacemaker-dev.mod/lib/ 
> transition/graph.c
> --- pacemaker-dev/lib/transition/graph.c	2008-11-26  
> 10:47:46.000000000 +0900
> +++ pacemaker-dev.mod/lib/transition/graph.c	2008-11-26  
> 10:48:47.000000000 +0900
> @@ -188,6 +188,11 @@
> 			crm_debug_2("Executing STONITH-event: %d",
> 				      action->id);
> 			return graph_fns->stonith(graph, action);
> +
> +		} else if(safe_str_eq(task, CRM_OP_STANDBY)) {
> +			crm_debug_2("Executing STANDBY-event: %d",
> +				      action->id);
> +			return graph_fns->standby(graph, action);
> 		}
> 		
> 		crm_debug_2("Executing crm-event: %d", action->id);
> diff -urN pacemaker-dev/lib/transition/utils.c pacemaker-dev.mod/lib/ 
> transition/utils.c
> --- pacemaker-dev/lib/transition/utils.c	2008-11-26  
> 10:47:46.000000000 +0900
> +++ pacemaker-dev.mod/lib/transition/utils.c	2008-11-26  
> 10:48:47.000000000 +0900
> @@ -41,6 +41,7 @@
> 	pseudo_action_dummy,
> 	pseudo_action_dummy,
> 	pseudo_action_dummy,
> +	pseudo_action_dummy,
> 	pseudo_action_dummy
> };
>
> @@ -61,6 +62,7 @@
> 	CRM_ASSERT(graph_fns->crmd != NULL);
> 	CRM_ASSERT(graph_fns->pseudo != NULL);
> 	CRM_ASSERT(graph_fns->stonith != NULL);
> +	CRM_ASSERT(graph_fns->standby != NULL);
> }
>
> const char *
> diff -urN pacemaker-dev/pengine/allocate.c pacemaker-dev.mod/pengine/ 
> allocate.c
> --- pacemaker-dev/pengine/allocate.c	2008-11-26 10:47:46.000000000  
> +0900
> +++ pacemaker-dev.mod/pengine/allocate.c	2008-11-26  
> 10:48:47.000000000 +0900
> @@ -774,6 +774,15 @@
> 				last_stonith = stonith_op;			
> 			}
>
> +		} else if(node->details->online && node->details->action_standby) {
> +			action_t *standby_op = NULL;
> +
> +			standby_op = custom_action(
> +				NULL, crm_strdup(CRM_OP_STANDBY),
> +				CRM_OP_STANDBY, node, FALSE, TRUE, data_set);
> +
> +			order_actions(standby_op, all_stopped, pe_order_implies_left);
> +
> 		} else if(node->details->online && node->details->shutdown) {			
> 			action_t *down_op = NULL;	
> 			crm_info("Scheduling Node %s for shutdown",
> diff -urN pacemaker-dev/pengine/graph.c pacemaker-dev.mod/pengine/ 
> graph.c
> --- pacemaker-dev/pengine/graph.c	2008-11-26 10:47:46.000000000 +0900
> +++ pacemaker-dev.mod/pengine/graph.c	2008-11-26 10:48:47.000000000  
> +0900
> @@ -368,7 +368,10 @@
> 	if(safe_str_eq(action->task, CRM_OP_FENCE)) {
> 		action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT);
> /* 		needs_node_info = FALSE; */
> -		
> +
> +	} else if(safe_str_eq(action->task, CRM_OP_STANDBY)) {
> +		action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT);
> +
> 	} else if(safe_str_eq(action->task, CRM_OP_SHUTDOWN)) {
> 		action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT);
>
> diff -urN pacemaker-dev/pengine/group.c pacemaker-dev.mod/pengine/ 
> group.c
> --- pacemaker-dev/pengine/group.c	2008-11-26 10:47:46.000000000 +0900
> +++ pacemaker-dev.mod/pengine/group.c	2008-11-26 10:48:47.000000000  
> +0900
> @@ -423,6 +423,7 @@
> 		case action_notified:
> 		case shutdown_crm:
> 		case stonith_node:
> +		case standby_node:
> 		    break;
> 		case stop_rsc:
> 		case stopped_rsc:
> diff -urN pacemaker-dev/pengine/utils.c pacemaker-dev.mod/pengine/ 
> utils.c
> --- pacemaker-dev/pengine/utils.c	2008-11-26 10:47:49.000000000 +0900
> +++ pacemaker-dev.mod/pengine/utils.c	2008-11-26 10:49:54.000000000  
> +0900
> @@ -338,6 +338,7 @@
> 		case monitor_rsc:
> 		case shutdown_crm:
> 		case stonith_node:
> +		case standby_node:
> 			task = no_action;
> 			break;
> 		default:
> @@ -430,6 +431,7 @@
> 	
> 	switch(text2task(action->task)) {
> 		case stonith_node:
> +		case standby_node:
> 		case shutdown_crm:
> 			do_crm_log_unlikely(log_level,
> 				      "%s%s%sAction %d: %s%s%s%s%s%s",
> <pe-warn-0.left.gif>_______________________________________________
> Pacemaker mailing list
> Pacemaker at clusterlabs.org
> http://list.clusterlabs.org/mailman/listinfo/pacemaker





More information about the Pacemaker mailing list