[Pacemaker] RFC: What part of the XML configuration do you hate the most?

Andrew Beekhof beekhof at gmail.com
Thu Nov 27 15:13:23 UTC 2008


Done:
   http://hg.clusterlabs.org/pacemaker/stable-1.0/rev/9919f48d3313

On Thu, Nov 27, 2008 at 08:55, Andrew Beekhof <beekhof at gmail.com> wrote:
> I'm going to fix this properly today.
>
> On Nov 27, 2008, at 8:27 AM, Satomi TANIGUCHI wrote:
>
>> Hi Andrew,
>>
>> I found another behavior that is caused because the cluster forgets the
>> resource is supposed to stay stopped.
>>
>> For example, in the case of a node which has primitive and master/slave
>> resource.
>> Their settings of on-fail is "standby".
>> When the master/slave resource is failed, all resources on failed node are
>> going to stop. And master/slave resource's fail-count is increased.
>> But then, only primitive resource re-starts on failed node because its
>> fail-count is not be increased and the cluster forgets the resource is
>> supposed to stay stopped...
>>
>> When F/O occurs,
>> in the case of _not_ master/slave resource,
>> pengine creates one graph to stop and restart the resource.
>> And in the case of master/slave resource, it creates a graph 2 times.
>> One is for the resource's stop-process and another is for restart-process.
>> And when it creates a graph for restart-process,
>> no one remembers that resources are supposed to stay stopped on failed
>> node.
>>
>> This behavior is same as (or similar to) what you are worried, isn't it?
>>
>> To avoid this behavior, it requires to update the status of a node before
>> restart-process.
>> On trial, I created a patch (for pacemaker-dev 366b14d79780).
>> And I attached the graph with patched pacemaker.
>> It's not a "general" way, just for reference...
>>
>>
>> Regards,
>> Satomi TANIGUCHI
>> diff -urN pacemaker-dev/crmd/te_actions.c
>> pacemaker-dev.mod/crmd/te_actions.c
>> --- pacemaker-dev/crmd/te_actions.c     2008-11-26 10:47:46.000000000
>> +0900
>> +++ pacemaker-dev.mod/crmd/te_actions.c 2008-11-26 10:48:47.000000000
>> +0900
>> @@ -175,6 +175,42 @@
>>        return TRUE;
>> }
>>
>> +static gboolean
>> +te_standby_node(crm_graph_t *graph, crm_action_t *action)
>> +{
>> +       const char *id = NULL;
>> +       const char *uuid = NULL;
>> +       const char *target = NULL;
>> +
>> +       id = ID(action->xml);
>> +       target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
>> +       uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
>> +
>> +       CRM_CHECK(id != NULL,
>> +                 crm_log_xml_warn(action->xml, "BadAction");
>> +                 return FALSE);
>> +       CRM_CHECK(uuid != NULL,
>> +                 crm_log_xml_warn(action->xml, "BadAction");
>> +                 return FALSE);
>> +       CRM_CHECK(target != NULL,
>> +                 crm_log_xml_warn(action->xml, "BadAction");
>> +                 return FALSE);
>> +
>> +       te_log_action(LOG_INFO,
>> +                     "Executing standby operation (%s) on %s", id,
>> target);
>> +
>> +       if (cib_ok > set_standby(fsa_cib_conn, uuid, XML_CIB_TAG_NODES,
>> "on")) {
>> +               crm_err("Cannot standby %s: set_standby() call failed.",
>> target);
>> +       }
>> +
>> +       crm_info("Skipping wait for %d", action->id);
>> +       action->confirmed = TRUE;
>> +       update_graph(graph, action);
>> +       trigger_graph();
>> +
>> +       return TRUE;
>> +}
>> +
>> static int get_target_rc(crm_action_t *action)
>> {
>>        const char *target_rc_s = crm_meta_value(action->params,
>> XML_ATTR_TE_TARGET_RC);
>> @@ -500,7 +536,8 @@
>>        te_pseudo_action,
>>        te_rsc_command,
>>        te_crm_command,
>> -       te_fence_node
>> +       te_fence_node,
>> +       te_standby_node
>> };
>>
>> void
>> diff -urN pacemaker-dev/include/crm/crm.h
>> pacemaker-dev.mod/include/crm/crm.h
>> --- pacemaker-dev/include/crm/crm.h     2008-11-26 10:47:46.000000000
>> +0900
>> +++ pacemaker-dev.mod/include/crm/crm.h 2008-11-26 10:48:47.000000000
>> +0900
>> @@ -146,6 +146,7 @@
>> #define CRM_OP_SHUTDOWN_REQ     "req_shutdown"
>> #define CRM_OP_SHUTDOWN         "do_shutdown"
>> #define CRM_OP_FENCE            "stonith"
>> +#define CRM_OP_STANDBY         "standby"
>> #define CRM_OP_EVENTCC          "event_cc"
>> #define CRM_OP_TEABORT          "te_abort"
>> #define CRM_OP_TEABORTED        "te_abort_confirmed" /* we asked */
>> diff -urN pacemaker-dev/include/crm/pengine/common.h
>> pacemaker-dev.mod/include/crm/pengine/common.h
>> --- pacemaker-dev/include/crm/pengine/common.h  2008-11-26
>> 10:47:46.000000000 +0900
>> +++ pacemaker-dev.mod/include/crm/pengine/common.h      2008-11-26
>> 10:48:47.000000000 +0900
>> @@ -52,6 +52,7 @@
>>        action_demote,
>>        action_demoted,
>>        shutdown_crm,
>> +       standby_node,
>>        stonith_node
>> };
>>
>> diff -urN pacemaker-dev/include/crm/pengine/status.h
>> pacemaker-dev.mod/include/crm/pengine/status.h
>> --- pacemaker-dev/include/crm/pengine/status.h  2008-11-26
>> 10:47:46.000000000 +0900
>> +++ pacemaker-dev.mod/include/crm/pengine/status.h      2008-11-26
>> 10:48:47.000000000 +0900
>> @@ -104,6 +104,7 @@
>>                const char *uname;
>>                gboolean online;
>>                gboolean standby;
>> +               gboolean action_standby;
>>                gboolean pending;
>>                gboolean unclean;
>>                gboolean shutdown;
>> diff -urN pacemaker-dev/include/crm/transition.h
>> pacemaker-dev.mod/include/crm/transition.h
>> --- pacemaker-dev/include/crm/transition.h      2008-11-26
>> 10:47:46.000000000 +0900
>> +++ pacemaker-dev.mod/include/crm/transition.h  2008-11-26
>> 10:48:47.000000000 +0900
>> @@ -115,6 +115,7 @@
>>                gboolean (*rsc)(crm_graph_t *graph, crm_action_t *action);
>>                gboolean (*crmd)(crm_graph_t *graph, crm_action_t *action);
>>                gboolean (*stonith)(crm_graph_t *graph, crm_action_t
>> *action);
>> +               gboolean (*standby)(crm_graph_t *graph, crm_action_t
>> *action);
>> } crm_graph_functions_t;
>>
>> enum transition_status {
>> diff -urN pacemaker-dev/lib/pengine/common.c
>> pacemaker-dev.mod/lib/pengine/common.c
>> --- pacemaker-dev/lib/pengine/common.c  2008-11-26 10:47:46.000000000
>> +0900
>> +++ pacemaker-dev.mod/lib/pengine/common.c      2008-11-26
>> 10:48:47.000000000 +0900
>> @@ -178,6 +178,8 @@
>>                return shutdown_crm;
>>        } else if(safe_str_eq(task, CRM_OP_FENCE)) {
>>                return stonith_node;
>> +       } else if(safe_str_eq(task, CRM_OP_STANDBY)) {
>> +               return standby_node;
>>        } else if(safe_str_eq(task, CRMD_ACTION_STATUS)) {
>>                return monitor_rsc;
>>        } else if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) {
>> @@ -245,6 +247,9 @@
>>                case stonith_node:
>>                        result = CRM_OP_FENCE;
>>                        break;
>> +               case standby_node:
>> +                       result = CRM_OP_STANDBY;
>> +                       break;
>>                case monitor_rsc:
>>                        result = CRMD_ACTION_STATUS;
>>                        break;
>> diff -urN pacemaker-dev/lib/pengine/unpack.c
>> pacemaker-dev.mod/lib/pengine/unpack.c
>> --- pacemaker-dev/lib/pengine/unpack.c  2008-11-26 10:47:46.000000000
>> +0900
>> +++ pacemaker-dev.mod/lib/pengine/unpack.c      2008-11-26
>> 10:48:47.000000000 +0900
>> @@ -240,6 +240,7 @@
>>                         */
>>                        new_node->details->unclean = TRUE;
>>                }
>> +               new_node->details->action_standby = FALSE;
>>
>>                if(type == NULL
>>                   || safe_str_eq(type, "member")
>> @@ -832,7 +833,7 @@
>>                        stop_action(rsc, node, FALSE);
>>
>>                } else if(on_fail == action_fail_standby) {
>> -                       node->details->standby = TRUE;
>> +                       node->details->action_standby = TRUE;
>>
>>                } else if(on_fail == action_fail_block) {
>>                        /* is_managed == FALSE will prevent any
>> diff -urN pacemaker-dev/lib/transition/graph.c
>> pacemaker-dev.mod/lib/transition/graph.c
>> --- pacemaker-dev/lib/transition/graph.c        2008-11-26
>> 10:47:46.000000000 +0900
>> +++ pacemaker-dev.mod/lib/transition/graph.c    2008-11-26
>> 10:48:47.000000000 +0900
>> @@ -188,6 +188,11 @@
>>                        crm_debug_2("Executing STONITH-event: %d",
>>                                      action->id);
>>                        return graph_fns->stonith(graph, action);
>> +
>> +               } else if(safe_str_eq(task, CRM_OP_STANDBY)) {
>> +                       crm_debug_2("Executing STANDBY-event: %d",
>> +                                     action->id);
>> +                       return graph_fns->standby(graph, action);
>>                }
>>
>>                crm_debug_2("Executing crm-event: %d", action->id);
>> diff -urN pacemaker-dev/lib/transition/utils.c
>> pacemaker-dev.mod/lib/transition/utils.c
>> --- pacemaker-dev/lib/transition/utils.c        2008-11-26
>> 10:47:46.000000000 +0900
>> +++ pacemaker-dev.mod/lib/transition/utils.c    2008-11-26
>> 10:48:47.000000000 +0900
>> @@ -41,6 +41,7 @@
>>        pseudo_action_dummy,
>>        pseudo_action_dummy,
>>        pseudo_action_dummy,
>> +       pseudo_action_dummy,
>>        pseudo_action_dummy
>> };
>>
>> @@ -61,6 +62,7 @@
>>        CRM_ASSERT(graph_fns->crmd != NULL);
>>        CRM_ASSERT(graph_fns->pseudo != NULL);
>>        CRM_ASSERT(graph_fns->stonith != NULL);
>> +       CRM_ASSERT(graph_fns->standby != NULL);
>> }
>>
>> const char *
>> diff -urN pacemaker-dev/pengine/allocate.c
>> pacemaker-dev.mod/pengine/allocate.c
>> --- pacemaker-dev/pengine/allocate.c    2008-11-26 10:47:46.000000000
>> +0900
>> +++ pacemaker-dev.mod/pengine/allocate.c        2008-11-26
>> 10:48:47.000000000 +0900
>> @@ -774,6 +774,15 @@
>>                                last_stonith = stonith_op;
>>
>>                        }
>>
>> +               } else if(node->details->online &&
>> node->details->action_standby) {
>> +                       action_t *standby_op = NULL;
>> +
>> +                       standby_op = custom_action(
>> +                               NULL, crm_strdup(CRM_OP_STANDBY),
>> +                               CRM_OP_STANDBY, node, FALSE, TRUE,
>> data_set);
>> +
>> +                       order_actions(standby_op, all_stopped,
>> pe_order_implies_left);
>> +
>>                } else if(node->details->online && node->details->shutdown)
>> {
>>                        action_t *down_op = NULL;
>>                        crm_info("Scheduling Node %s for shutdown",
>> diff -urN pacemaker-dev/pengine/graph.c pacemaker-dev.mod/pengine/graph.c
>> --- pacemaker-dev/pengine/graph.c       2008-11-26 10:47:46.000000000
>> +0900
>> +++ pacemaker-dev.mod/pengine/graph.c   2008-11-26 10:48:47.000000000
>> +0900
>> @@ -368,7 +368,10 @@
>>        if(safe_str_eq(action->task, CRM_OP_FENCE)) {
>>                action_xml = create_xml_node(NULL,
>> XML_GRAPH_TAG_CRM_EVENT);
>> /*              needs_node_info = FALSE; */
>> -
>> +
>> +       } else if(safe_str_eq(action->task, CRM_OP_STANDBY)) {
>> +               action_xml = create_xml_node(NULL,
>> XML_GRAPH_TAG_CRM_EVENT);
>> +
>>        } else if(safe_str_eq(action->task, CRM_OP_SHUTDOWN)) {
>>                action_xml = create_xml_node(NULL,
>> XML_GRAPH_TAG_CRM_EVENT);
>>
>> diff -urN pacemaker-dev/pengine/group.c pacemaker-dev.mod/pengine/group.c
>> --- pacemaker-dev/pengine/group.c       2008-11-26 10:47:46.000000000
>> +0900
>> +++ pacemaker-dev.mod/pengine/group.c   2008-11-26 10:48:47.000000000
>> +0900
>> @@ -423,6 +423,7 @@
>>                case action_notified:
>>                case shutdown_crm:
>>                case stonith_node:
>> +               case standby_node:
>>                    break;
>>                case stop_rsc:
>>                case stopped_rsc:
>> diff -urN pacemaker-dev/pengine/utils.c pacemaker-dev.mod/pengine/utils.c
>> --- pacemaker-dev/pengine/utils.c       2008-11-26 10:47:49.000000000
>> +0900
>> +++ pacemaker-dev.mod/pengine/utils.c   2008-11-26 10:49:54.000000000
>> +0900
>> @@ -338,6 +338,7 @@
>>                case monitor_rsc:
>>                case shutdown_crm:
>>                case stonith_node:
>> +               case standby_node:
>>                        task = no_action;
>>                        break;
>>                default:
>> @@ -430,6 +431,7 @@
>>
>>        switch(text2task(action->task)) {
>>                case stonith_node:
>> +               case standby_node:
>>                case shutdown_crm:
>>                        do_crm_log_unlikely(log_level,
>>                                      "%s%s%sAction %d: %s%s%s%s%s%s",
>> <pe-warn-0.left.gif>_______________________________________________
>> Pacemaker mailing list
>> Pacemaker at clusterlabs.org
>> http://list.clusterlabs.org/mailman/listinfo/pacemaker
>
>




More information about the Pacemaker mailing list