[Pacemaker] [RFC] [Patch] DC node preferences (dc-priority)

Lars Ellenberg lars.ellenberg at linbit.com
Thu May 3 03:38:18 EDT 2012


People sometimes think they have a use case
for influencing which node will be the DC.

Sometimes it is latency (certain cli commands work faster
when done on the DC), sometimes they add a "mostly quorum"
node which may be not quite up to the task of being DC.


Prohibiting a node from becoming DC completely would
mean it can not even be cleanly shutdown (with 1.0.x, no MCP),
or act on its own resources for certain no-quorum policies.

So here is a patch I have been asked to present for discussion,
against Pacemaker 1.0, that introduces a "dc-prio" configuration
parameter, which will add some skew to the election algorithm.


Open questions:
 * does it make sense at all?

 * election algorithm compatibility, stability:
   will the election be correct if some nodes have this patch,
   and some don't ?

 * How can it be improved so that a node with dc-prio=0 will
   "give up" its DC-role as soon as there is at least one other node
   with dc-prio > 0?

	Lars


--- ./crmd/election.c.orig	2011-05-11 11:36:05.577329600 +0200
+++ ./crmd/election.c	2011-05-12 13:49:04.671484200 +0200
@@ -29,6 +29,7 @@
 GHashTable *voted = NULL;
 uint highest_born_on = -1;
 static int current_election_id = 1;
+static int our_dc_prio = -1;
 
 /*	A_ELECTION_VOTE	*/
 void
@@ -55,6 +56,20 @@
 			break;
 	}
 
+	if (our_dc_prio < 0) {
+			char * dc_prio_str = getenv("HA_dc_prio");
+
+			if (dc_prio_str == NULL) {
+				our_dc_prio = 1;
+			} else {
+				our_dc_prio = atoi(dc_prio_str);
+			}
+	}
+
+	if (!our_dc_prio) {
+		not_voting = TRUE;
+	}
+
 	if(not_voting == FALSE) {
 		if(is_set(fsa_input_register, R_STARTING)) {
 			not_voting = TRUE;
@@ -72,12 +87,13 @@
 	}
 	
 	vote = create_request(
-		CRM_OP_VOTE, NULL, NULL,
+		our_dc_prio?CRM_OP_VOTE:CRM_OP_NOVOTE, NULL, NULL,
 		CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
 
 	current_election_id++;
 	crm_xml_add(vote, F_CRM_ELECTION_OWNER, fsa_our_uuid);
 	crm_xml_add_int(vote, F_CRM_ELECTION_ID, current_election_id);
+	crm_xml_add_int(vote, F_CRM_DC_PRIO, our_dc_prio);
 
 	send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
 	free_xml(vote);
@@ -188,6 +204,7 @@
 		       fsa_data_t *msg_data)
 {
 	int election_id = -1;
+	int your_dc_prio = 1;
 	int log_level = LOG_INFO;
 	gboolean done = FALSE;
 	gboolean we_loose = FALSE;
@@ -216,6 +233,17 @@
 	your_version   = crm_element_value(vote->msg, F_CRM_VERSION);
 	election_owner = crm_element_value(vote->msg, F_CRM_ELECTION_OWNER);
 	crm_element_value_int(vote->msg, F_CRM_ELECTION_ID, &election_id);
+	crm_element_value_int(vote->msg, F_CRM_DC_PRIO, &your_dc_prio);
+
+	if (our_dc_prio < 0) {
+		char * dc_prio_str = getenv("HA_dc_prio");
+
+		if (dc_prio_str == NULL) {
+			our_dc_prio = 1;
+		} else {
+			our_dc_prio = atoi(dc_prio_str);
+		}
+	}
 
 	CRM_CHECK(vote_from != NULL, vote_from = fsa_our_uname);
 	
@@ -269,6 +297,13 @@
 	    reason = "Recorded";
 	    done = TRUE;
 	    
+	} else if(our_dc_prio < your_dc_prio) {
+	    reason = "DC Prio";
+	    we_loose = TRUE;
+
+	} else if(our_dc_prio > your_dc_prio) {
+	    reason = "DC Prio";
+
 	} else if(compare_version(your_version, CRM_FEATURE_SET) < 0) {
 	    reason = "Version";
 	    we_loose = TRUE;
@@ -328,6 +363,7 @@
 
 		crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner);
 		crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id);
+		crm_xml_add_int(novote, F_CRM_DC_PRIO, our_dc_prio);
 		
 		send_cluster_message(vote_from, crm_msg_crmd, novote, TRUE);
 		free_xml(novote);
--- ./include/crm/msg_xml.h.orig	2011-05-11 18:22:08.061726000 +0200
+++ ./include/crm/msg_xml.h	2011-05-11 18:24:17.405132000 +0200
@@ -32,6 +32,7 @@
 #define F_CRM_ORIGIN			"origin"
 #define F_CRM_JOIN_ID			"join_id"
 #define F_CRM_ELECTION_ID		"election-id"
+#define F_CRM_DC_PRIO			"dc-prio"
 #define F_CRM_ELECTION_OWNER		"election-owner"
 #define F_CRM_TGRAPH			"crm-tgraph"
 #define F_CRM_TGRAPH_INPUT		"crm-tgraph-in"
--- ./lib/ais/plugin.c.orig	2011-05-11 11:29:38.496116000 +0200
+++ ./lib/ais/plugin.c	2011-05-11 17:28:32.385425300 +0200
@@ -421,6 +421,9 @@
     get_config_opt(pcmk_api, local_handle, "use_logd", &value, "no");
     pcmk_env.use_logd = value;
 
+    get_config_opt(pcmk_api, local_handle, "dc_prio", &value, "1");
+    pcmk_env.dc_prio = value;
+
     get_config_opt(pcmk_api, local_handle, "use_mgmtd", &value, "no");
     if(ais_get_boolean(value) == FALSE) {
 	int lpc = 0;
@@ -584,6 +587,7 @@
     pcmk_env.logfile  = NULL;
     pcmk_env.use_logd = "false";
     pcmk_env.syslog   = "daemon";
+    pcmk_env.dc_prio = "1";
 
     if(cs_uid != root_uid) {
 	ais_err("Corosync must be configured to start as 'root',"
--- ./lib/ais/utils.c.orig	2011-05-11 11:27:08.460183200 +0200
+++ ./lib/ais/utils.c	2011-05-11 17:29:09.182064800 +0200
@@ -171,6 +171,7 @@
 	setenv("HA_logfacility",	pcmk_env.syslog,   1);
 	setenv("HA_LOGFACILITY",	pcmk_env.syslog,   1);
 	setenv("HA_use_logd",		pcmk_env.use_logd, 1);
+	setenv("HA_dc_prio",		pcmk_env.dc_prio,  1);
 	if(pcmk_env.logfile) {
 	    setenv("HA_debugfile", pcmk_env.logfile, 1);
 	}
--- ./lib/ais/utils.h.orig	2011-05-11 11:26:12.757414700 +0200
+++ ./lib/ais/utils.h	2011-05-11 17:36:34.194841700 +0200
@@ -226,6 +226,7 @@
 	const char *syslog;
 	const char *logfile;
 	const char *use_logd;
+	const char *dc_prio;
 };
 
 extern struct pcmk_env_s pcmk_env;



-- 
: Lars Ellenberg
: LINBIT | Your Way to High Availability
: DRBD/HA support and consulting http://www.linbit.com

DRBD® and LINBIT® are registered trademarks of LINBIT, Austria.




More information about the Pacemaker mailing list