[Pacemaker] Advisory ordering on clones not working?
    Adrian Fita 
    adrian.fita at gmail.com
       
    Tue Feb 14 16:38:15 UTC 2012
    
    
  
Ok. It seems that advisory working is not working with normal
resources either... So I tried the following configuration:
node nocmc1-cos6-2
node nocmc2-cos6-2
node nocms1-cos6-2 \
        attributes standby="off" type="storage"
node nocms2-cos6-2 \
        attributes standby="off" type="storage"
primitive p_drbd_config ocf:linbit:drbd \
        params drbd_resource="config" \
        op monitor interval="15" role="Master" \
        op monitor interval="30" role="Slave"
primitive p_drbd_other_lower ocf:linbit:drbd \
        params drbd_resource="other_lower" \
        op monitor interval="15" role="Master" \
        op monitor interval="30" role="Slave"
primitive p_exportfs_config ocf:heartbeat:exportfs \
        params directory="/mnt/config" fsid="1"
options="rw,no_root_squash,mountpoint=/mnt/config"
clientspec="10.1.0.0/255.255.0.0" unlock_on_stop="true" \
        op stop interval="0" timeout="15s"
primitive p_exportfs_other_lower ocf:heartbeat:exportfs \
        params directory="/mnt/other" fsid="2"
options="rw,no_root_squash,mountpoint=/mnt/other"
clientspec="10.1.0.0/255.255.0.0" unlock_on_stop="true" \
        op stop interval="0" timeout="15s" \
        meta target-role="Started"
primitive p_exportfs_root ocf:heartbeat:exportfs \
        params directory="/mnt" fsid="0"
options="rw,crossmnt,no_root_squash" clientspec="10.1.0.0/255.255.0.0"
unlock_on_stop="true" \
        op stop interval="0" timeout="15s"
primitive p_fs_config ocf:heartbeat:Filesystem \
        params device="/dev/drbd/by-res/config"
directory="/mnt/config" fstype="ext4" \
        op monitor interval="10s"
primitive p_fs_nfs_config_nocmc1 ocf:idirect:Filesystem \
        params fstype="nfs" device="10.1.11.186:/mnt/config"
directory="/nfs/config" options="noatime" \
        op start interval="0" timeout="60s" \
        op stop interval="0" timeout="60s" \
        op monitor interval="30s" timeout="60s" \
        meta failure-timeout="120s"
primitive p_fs_nfs_other_nocmc1 ocf:idirect:Filesystem \
        params fstype="nfs" device="10.1.11.186:/mnt/other"
directory="/nfs/other" options="noatime" \
        op start interval="0" timeout="60s" \
        op stop interval="0" timeout="60s" \
        op monitor interval="30s" timeout="60s" \
        meta failure-timeout="120s"
primitive p_fs_other_lower ocf:heartbeat:Filesystem \
        params device="/dev/drbd/by-res/other_lower"
directory="/mnt/other" fstype="ext4" \
        op monitor interval="10s"
primitive p_ip_nfs ocf:heartbeat:IPaddr2 \
        params ip="10.1.11.186" cidr_netmask="16" \
        op monitor interval="30s"
primitive p_nfsserver ocf:heartbeat:nfsserver \
        params nfs_init_script="/etc/init.d/nfs"
nfs_notify_cmd="/usr/sbin/sm-notify"
nfs_shared_infodir="/mnt/config/nfs_infodir" nfs_ip="10.1.11.186" \
        op monitor interval="10s" \
        op start interval="0" timeout="40s"
ms ms_drbd_config p_drbd_config \
        meta master-max="1" master-node-max="1" clone-max="2"
clone-node-max="1" notify="true"
ms ms_drbd_other_lower p_drbd_other_lower \
        meta master-max="1" master-node-max="1" clone-max="2"
clone-node-max="1" notify="true"
location l_ms_drbd_config_only_on_storage ms_drbd_config \
        rule $id="l_ms_drbd_config_only_on_storage-rule" -inf: type ne storage
location l_ms_drbd_other_lower_only_on_storage ms_drbd_other_lower \
        rule $id="l_ms_drbd_other_lower_only_on_storage-rule" -inf:
type ne storage
location l_p_fs_nfs_config_nocmc1_only_on_nocmc1
p_fs_nfs_config_nocmc1 inf: nocmc1-cos6-2
location l_p_fs_nfs_other_nocmc1_only_on_nocmc1 p_fs_nfs_other_nocmc1
inf: nocmc1-cos6-2
location l_p_ip_nfs_only_on_storage p_ip_nfs \
        rule $id="l_p_ip_nfs_only_on_storage-rule" -inf: type ne storage
colocation c_fs_config_with_drbd_config inf: p_fs_config ms_drbd_config:Master
colocation c_fs_other_lower_with_drbd_other_lower inf:
p_fs_other_lower ms_drbd_other_lower:Master
colocation c_ms_drbd_with_ip_nfs inf: ( ms_drbd_config:Master
ms_drbd_other_lower:Master ) p_ip_nfs
colocation c_p_exportfs_p_exportfs_root inf: ( p_exportfs_config
p_exportfs_other_lower ) p_exportfs_root
colocation c_p_exportfs_root_p_nfsserver inf: p_exportfs_root p_nfsserver
colocation c_p_nfsserver_p_fs inf: p_nfsserver ( p_fs_config p_fs_other_lower )
order o_ms_drbd_config_p_fs_config inf: ms_drbd_config:promote p_fs_config:start
order o_ms_drbd_other_lower_p_fs_other_lower inf:
ms_drbd_other_lower:promote p_fs_other_lower:start
order o_p_exportfs_config_p_fs_nfs_config_nocmc1 0: p_exportfs_config
p_fs_nfs_config_nocmc1
order o_p_exportfs_other_lower_p_fs_nfs_other_nocmc1 0:
p_exportfs_other_lower p_fs_nfs_other_nocmc1
order o_p_exportfs_root_p_exportfs inf: p_exportfs_root (
p_exportfs_config p_exportfs_other_lower )
order o_p_fs_p_nfsserver inf: ( p_fs_config p_fs_other_lower ) p_nfsserver
order o_p_ip_nfs_ms_drbd_config inf: p_ip_nfs:start (
ms_drbd_config:promote ms_drbd_other_lower:promote )
order o_p_nfsserver_p_exportfs_root inf: p_nfsserver p_exportfs_root
property $id="cib-bootstrap-options" \
        dc-version="1.1.6-3.el6-a02c0f19a00c1eb2527ad38f146ebc0834814558" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="4" \
        stonith-enabled="false" \
        no-quorum-policy="ignore" \
        stop-all-resources="true" \
        maintenance-mode="true" \
        last-lrm-refresh="1329236439"
rsc_defaults $id="rsc-options" \
        resource-stickiness="200"
I'm expecting for p_fs_nfs_config_nocmc1 and p_fs_nfs_other_nocmc1 to
be started after p_exportfs_config, p_exportfs_other_lower
respectively, but this doesn't happen. p_fs_nfs_config_nocmc1 and
p_fs_nfs_other_nocmc1 get started right when pacemaker starts, along
with p_ip_nfs, the first resource to get started. Here is an extract
from /var/log/messages:
Feb 14 16:10:23 nocmc1-cos6-2 corosync[6906]:   [MAIN  ] main Corosync
Cluster Engine ('1.4.1'): started and ready to provide service.
Feb 14 16:10:23 nocmc1-cos6-2 corosync[6906]:   [MAIN  ] main Corosync
built-in features: nss dbus rdma snmp
Feb 14 16:10:23 nocmc1-cos6-2 corosync[6906]:   [MAIN  ] main
Successfully read main configuration file
'/etc/corosync/corosync.conf'.
Feb 14 16:10:23 nocmc1-cos6-2 corosync[6906]:   [TOTEM ]
totemnet_instance_initialize Initializing transport (UDP/IP
Multicast).
Feb 14 16:10:23 nocmc1-cos6-2 corosync[6906]:   [TOTEM ]
init_sober_crypto Initializing transmit/receive security: libtomcrypt
SOBER128/SHA1HMAC (mode 0).
Feb 14 16:10:23 nocmc1-cos6-2 corosync[6906]:   [TOTEM ]
timer_function_netif_check_timeout The network interface [10.1.11.181]
is now up.
Feb 14 16:10:23 nocmc1-cos6-2 corosync[6906]:   [pcmk  ]
process_ais_conf info: process_ais_conf: Reading configure
Feb 14 16:10:23 nocmc1-cos6-2 corosync[6906]:   [pcmk  ]
config_find_init info: config_find_init: Local handle:
2013064636357672963 for logging
[...]
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: RecurringOp:
Start recurring monitor (30s) for p_ip_nfs on nocms2-cos6-2
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: RecurringOp:
Start recurring monitor (30s) for p_drbd_config:0 on nocms2-cos6-2
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: RecurringOp:
Start recurring monitor (30s) for p_drbd_config:0 on nocms2-cos6-2
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: RecurringOp:
Start recurring monitor (30s) for p_drbd_other_lower:0 on
nocms2-cos6-2
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: RecurringOp:
Start recurring monitor (30s) for p_drbd_other_lower:0 on
nocms2-cos6-2
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: RecurringOp:
Start recurring monitor (30s) for p_fs_nfs_config_nocmc1 on
nocmc1-cos6-2
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: RecurringOp:
Start recurring monitor (30s) for p_fs_nfs_other_nocmc1 on
nocmc1-cos6-2
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: LogActions:
Leave   p_exportfs_config#011(Stopped)
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: LogActions:
Leave   p_exportfs_other_lower#011(Stopped)
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: LogActions:
Leave   p_exportfs_root#011(Stopped)
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: LogActions:
Leave   p_fs_config#011(Stopped)
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: LogActions:
Leave   p_fs_other_lower#011(Stopped)
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: LogActions:
Start   p_ip_nfs#011(nocms2-cos6-2)
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: LogActions:
Leave   p_nfsserver#011(Stopped)
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: LogActions:
Start   p_drbd_config:0#011(nocms2-cos6-2)
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: LogActions:
Leave   p_drbd_config:1#011(Stopped)
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: LogActions:
Start   p_drbd_other_lower:0#011(nocms2-cos6-2)
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: LogActions:
Leave   p_drbd_other_lower:1#011(Stopped)
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: LogActions:
Start   p_fs_nfs_config_nocmc1#011(nocmc1-cos6-2)
Feb 14 16:10:47 nocmc1-cos6-2 pengine: [6942]: notice: LogActions:
Start   p_fs_nfs_other_nocmc1#011(nocmc1-cos6-2)
--
Fita Adrian
On Tue, Feb 14, 2012 at 15:40, Adrian Fita <adrian.fita at gmail.com> wrote:
> Hi. I'm trying something like this: "crm configure order order_id 0:
> nfs_server nfs_mount", but nfs_mount doesn't start after nfs_server,
> instead it starts at pacemaker startup, when nfs_server isn't even
> started.
>
> The reason I'm doing this is because nfs_mount is a clone of NFS
> filesystem mounts that are mounted on each node in the cluster. The
> NFS mounts are then used by some processes that read/write stuff in
> those NFS mounts. If I do "crm configure order order_id inf:
> nfs_server nfs_mount", on a node failure, all the nfs_mounts instances
> get restarted causing the processes that use them to fail or get
> restarted themselves. With advisory ordering I saw that the nfs_mount
> instances are not all restarted, the ones that are not on the failed
> node are left alone, leaving the surviving processes alone also.
>
> So... Am I missing something? Is my approach wrong?
> --
> Fita Adrian
    
    
More information about the Pacemaker
mailing list