[ClusterLabs] Is 20 seconds to complete redis switchover to be expected?

Tue Mar 31 02:27:07 EDT 2020

Hello,

I am new with Pacemaker (new to redis also) and appreciate the info shared here.

I believe with Redis sentinel  a switchover is about 2 seconds.
Reading a post about Pacemaker with Redis, the author said he was
doing it in 3 seconds.  For me, it takes about 20 seconds and I am
wondering what I am doing wrong.

Example timeline from looking at the logs:

:00 - node1 crash (redis-server killed)
:02 - Pacemaker knows there is a problem.
:02 - STONITH/fence starts.  (fence_ipmilan power off)
:09 - STONITH/fence finished.
:18 - Redis switched.
:19 - VIP switched.

During the time of the stonith/ fencing action nothing else is
happening. It is the fence_ipmilan which does a poweroff. Can the
fencing be started on node1 while node2 works in parallel to take care
of the redis & VIP?   Also, after the stonith/fence is finished for 9
seconds or so it is just pacemaker trying to invite the dead node1
back (I think).  Be great if I could skip that and go directly to
redis switch & vip switchovers.

I have been researching & testing with various configurations and
would appreciate any pointers and advice.

I am using pacemaker 1.1.10, with 2 nodes.

my (edited) crm configure show :

node $id="1" example1.com \
        attributes standby="off"
node $id="2" example2.com
primitive fence_example1 stonith:fence_ipmilan \
        params pcmk_host_list="example1.com" ipaddr="10.24.x.xxx"
login="user" passwd="password" pcmk_action_limit="-1"
concurrent-fencing="true" lanplus="true" action="off" \
        op monitor interval="20s" \
        meta target-role="Started"
primitive fence_example2 stonith:fence_ipmilan \
        params pcmk_host_list="example2.com" ipaddr="10.24.x.yyy"
login="user" passwd="password" concurrent-fencing="true"
pcmk_action_limit="-1" lanplus="true" action="off" delay="10" \
        op monitor interval="20s" \
        meta target-role="Started"
primitive redis ocf:heartbeat:redis \
        params bin="/usr/local/bin/redis-server"
client_bin="/usr/local/bin/redis-cli" port="6379"
config="/etc/redis/redis.conf" socket_name="redis.sock"
pidfile_name="redis.pid" \
        meta target-role="Started" is-managed="true" \
        op monitor interval="1s" role="Master" timeout="1s" on-fail="fence"
primitive res_vip ocf:heartbeat:IPaddr2 \
        params ip="10.24.x.zzz" nic="bond0.xx" cidr_netmask="22" \
        op monitor interval="10" \
        meta target-role="Started"
ms redis_clone redis \
        meta notify="true" is-managed="true" ordered="false"
interleave="true" globally-unique="false" target-role="Started"
migration-threshold="1"
colocation redis-first inf: redis_clone:Master res_vip
order redis-before-vip inf: redis_clone:promote res_vip:start
property $id="cib-bootstrap-options" \
        dc-version="1.1.10-42f2063" \
        cluster-infrastructure="corosync" \
        stonith-enabled="true" \
        no-quorum-policy="ignore" \
        default-resource-stickiness="200" \
        batch-limit="300" \
        migration-limit="-1"
property $id="redis_replication" \
        redis_REPL_INFO="example1.com"
rsc_defaults $id="rsc-options" \
        migration-threshold="1"

Thank you very much