[Pacemaker] [RFC] [Patch] DC node preferences (dc-priority)

Andrew Beekhof andrew at beekhof.net
Sun May 6 07:45:09 EDT 2012


On Thu, May 3, 2012 at 5:38 PM, Lars Ellenberg
<lars.ellenberg at linbit.com> wrote:
>
> People sometimes think they have a use case
> for influencing which node will be the DC.

Agreed :-)

>
> Sometimes it is latency (certain cli commands work faster
> when done on the DC),

Config changes can be run against any node, there is no reason to go
to the one on the DC.

> sometimes they add a "mostly quorum"
> node which may be not quite up to the task of being DC.

I'm not sure I buy that.  Most of the load would comes from the
resources themselves.

> Prohibiting a node from becoming DC completely would
> mean it can not even be cleanly shutdown (with 1.0.x, no MCP),
> or act on its own resources for certain no-quorum policies.
>
> So here is a patch I have been asked to present for discussion,

May one ask where it originated?

> against Pacemaker 1.0, that introduces a "dc-prio" configuration
> parameter, which will add some skew to the election algorithm.
>
>
> Open questions:
>  * does it make sense at all?

Doubtful :-)

>
>  * election algorithm compatibility, stability:
>   will the election be correct if some nodes have this patch,
>   and some don't ?

Unlikely, but you could easily make it so by placing it after the
version check (and bumping said version in the patch)

>  * How can it be improved so that a node with dc-prio=0 will
>   "give up" its DC-role as soon as there is at least one other node
>   with dc-prio > 0?

Short of causing an election every time a node joins... I doubt it.

>        Lars
>
>
> --- ./crmd/election.c.orig      2011-05-11 11:36:05.577329600 +0200
> +++ ./crmd/election.c   2011-05-12 13:49:04.671484200 +0200
> @@ -29,6 +29,7 @@
>  GHashTable *voted = NULL;
>  uint highest_born_on = -1;
>  static int current_election_id = 1;
> +static int our_dc_prio = -1;
>
>  /*     A_ELECTION_VOTE */
>  void
> @@ -55,6 +56,20 @@
>                        break;
>        }
>
> +       if (our_dc_prio < 0) {
> +                       char * dc_prio_str = getenv("HA_dc_prio");
> +
> +                       if (dc_prio_str == NULL) {
> +                               our_dc_prio = 1;
> +                       } else {
> +                               our_dc_prio = atoi(dc_prio_str);
> +                       }
> +       }
> +
> +       if (!our_dc_prio) {
> +               not_voting = TRUE;
> +       }
> +
>        if(not_voting == FALSE) {
>                if(is_set(fsa_input_register, R_STARTING)) {
>                        not_voting = TRUE;
> @@ -72,12 +87,13 @@
>        }
>
>        vote = create_request(
> -               CRM_OP_VOTE, NULL, NULL,
> +               our_dc_prio?CRM_OP_VOTE:CRM_OP_NOVOTE, NULL, NULL,
>                CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
>
>        current_election_id++;
>        crm_xml_add(vote, F_CRM_ELECTION_OWNER, fsa_our_uuid);
>        crm_xml_add_int(vote, F_CRM_ELECTION_ID, current_election_id);
> +       crm_xml_add_int(vote, F_CRM_DC_PRIO, our_dc_prio);
>
>        send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
>        free_xml(vote);
> @@ -188,6 +204,7 @@
>                       fsa_data_t *msg_data)
>  {
>        int election_id = -1;
> +       int your_dc_prio = 1;
>        int log_level = LOG_INFO;
>        gboolean done = FALSE;
>        gboolean we_loose = FALSE;
> @@ -216,6 +233,17 @@
>        your_version   = crm_element_value(vote->msg, F_CRM_VERSION);
>        election_owner = crm_element_value(vote->msg, F_CRM_ELECTION_OWNER);
>        crm_element_value_int(vote->msg, F_CRM_ELECTION_ID, &election_id);
> +       crm_element_value_int(vote->msg, F_CRM_DC_PRIO, &your_dc_prio);
> +
> +       if (our_dc_prio < 0) {
> +               char * dc_prio_str = getenv("HA_dc_prio");
> +
> +               if (dc_prio_str == NULL) {
> +                       our_dc_prio = 1;
> +               } else {
> +                       our_dc_prio = atoi(dc_prio_str);
> +               }
> +       }
>
>        CRM_CHECK(vote_from != NULL, vote_from = fsa_our_uname);
>
> @@ -269,6 +297,13 @@
>            reason = "Recorded";
>            done = TRUE;
>
> +       } else if(our_dc_prio < your_dc_prio) {
> +           reason = "DC Prio";
> +           we_loose = TRUE;
> +
> +       } else if(our_dc_prio > your_dc_prio) {
> +           reason = "DC Prio";
> +
>        } else if(compare_version(your_version, CRM_FEATURE_SET) < 0) {
>            reason = "Version";
>            we_loose = TRUE;
> @@ -328,6 +363,7 @@
>
>                crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner);
>                crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id);
> +               crm_xml_add_int(novote, F_CRM_DC_PRIO, our_dc_prio);
>
>                send_cluster_message(vote_from, crm_msg_crmd, novote, TRUE);
>                free_xml(novote);
> --- ./include/crm/msg_xml.h.orig        2011-05-11 18:22:08.061726000 +0200
> +++ ./include/crm/msg_xml.h     2011-05-11 18:24:17.405132000 +0200
> @@ -32,6 +32,7 @@
>  #define F_CRM_ORIGIN                   "origin"
>  #define F_CRM_JOIN_ID                  "join_id"
>  #define F_CRM_ELECTION_ID              "election-id"
> +#define F_CRM_DC_PRIO                  "dc-prio"
>  #define F_CRM_ELECTION_OWNER           "election-owner"
>  #define F_CRM_TGRAPH                   "crm-tgraph"
>  #define F_CRM_TGRAPH_INPUT             "crm-tgraph-in"
> --- ./lib/ais/plugin.c.orig     2011-05-11 11:29:38.496116000 +0200
> +++ ./lib/ais/plugin.c  2011-05-11 17:28:32.385425300 +0200
> @@ -421,6 +421,9 @@
>     get_config_opt(pcmk_api, local_handle, "use_logd", &value, "no");
>     pcmk_env.use_logd = value;
>
> +    get_config_opt(pcmk_api, local_handle, "dc_prio", &value, "1");
> +    pcmk_env.dc_prio = value;
> +
>     get_config_opt(pcmk_api, local_handle, "use_mgmtd", &value, "no");
>     if(ais_get_boolean(value) == FALSE) {
>        int lpc = 0;
> @@ -584,6 +587,7 @@
>     pcmk_env.logfile  = NULL;
>     pcmk_env.use_logd = "false";
>     pcmk_env.syslog   = "daemon";
> +    pcmk_env.dc_prio = "1";
>
>     if(cs_uid != root_uid) {
>        ais_err("Corosync must be configured to start as 'root',"
> --- ./lib/ais/utils.c.orig      2011-05-11 11:27:08.460183200 +0200
> +++ ./lib/ais/utils.c   2011-05-11 17:29:09.182064800 +0200
> @@ -171,6 +171,7 @@
>        setenv("HA_logfacility",        pcmk_env.syslog,   1);
>        setenv("HA_LOGFACILITY",        pcmk_env.syslog,   1);
>        setenv("HA_use_logd",           pcmk_env.use_logd, 1);
> +       setenv("HA_dc_prio",            pcmk_env.dc_prio,  1);
>        if(pcmk_env.logfile) {
>            setenv("HA_debugfile", pcmk_env.logfile, 1);
>        }
> --- ./lib/ais/utils.h.orig      2011-05-11 11:26:12.757414700 +0200
> +++ ./lib/ais/utils.h   2011-05-11 17:36:34.194841700 +0200
> @@ -226,6 +226,7 @@
>        const char *syslog;
>        const char *logfile;
>        const char *use_logd;
> +       const char *dc_prio;
>  };
>
>  extern struct pcmk_env_s pcmk_env;
>
>
>
> --
> : Lars Ellenberg
> : LINBIT | Your Way to High Availability
> : DRBD/HA support and consulting http://www.linbit.com
>
> DRBD® and LINBIT® are registered trademarks of LINBIT, Austria.
>
> _______________________________________________
> Pacemaker mailing list: Pacemaker at oss.clusterlabs.org
> http://oss.clusterlabs.org/mailman/listinfo/pacemaker
>
> Project Home: http://www.clusterlabs.org
> Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
> Bugs: http://bugs.clusterlabs.org




More information about the Pacemaker mailing list