[Pacemaker] LVM restarts after SLES upgrade

Thu Apr 19 02:29:54 EDT 2012

Hi,

I've installed a 2-Node Xen-Cluster with SLES 11 SP1.

After an upgrade to SLES11 SP2 the cluster won't work as the old one.

After some time, all LVs reorganize and so the VM where restarted.

Any clue?

CIB:

node xencluster1 \
        attributes standby="off"
node xencluster2 \
        attributes standby="off"
primitive STONITH--Node1 stonith:meatware \
        meta is-managed="true" target-role="Started" \
        operations $id="STONITH--Node1-operations" \
        op monitor interval="15" timeout="15" start-delay="15" \
        params hostlist="xencluster1"
primitive STONITH-Node2 stonith:meatware \
        meta target-role="started" is-managed="true" \
        operations $id="STONITH-Node2-operations" \
        op monitor interval="15" timeout="15" start-delay="15" \
        params hostlist="xencluster2"
primitive clvm ocf:lvm2:clvmd \
        operations $id="clvm-operations" \
        op monitor interval="10" timeout="20" \
        op start interval="0" timeout="90" \
        op stop interval="0" timeout="100" \
        params daemon_timeout="30"
primitive clvm-xenvg ocf:heartbeat:LVM \
        operations $id="clvm-xenvg-operations" \
        op monitor interval="10" timeout="30" \
        params volgrpname="XenVG"
primitive cmirror ocf:lvm2:cmirrord \
        operations $id="cmirror-operations" \
        op monitor interval="10" timeout="20" \
        op start interval="0" timeout="90" \
        op stop interval="0" timeout="100" \
        params daemon_timeout="30"
primitive dlm ocf:pacemaker:controld \
        operations $id="dlm-operations" \
        op monitor interval="10" timeout="20" start-delay="0" \
        op start interval="0" timeout="90" \
        op stop interval="0" timeout="100"
primitive fs-config-xen ocf:heartbeat:Filesystem \
        meta is-managed="true" target-role="Started" \
        operations $id="fs-config-xen-operations" \
        op monitor interval="20" timeout="40" \
        params device="/dev/mapper/XenVG-xenconfig" directory="/etc/xen/vm"
fstype="ocfs2"
primitive o2cb ocf:ocfs2:o2cb \
        operations $id="o2cb-operations" \
        op monitor interval="10" timeout="20" \
        op start interval="0" timeout="90" \
        op stop interval="0" timeout="100"
primitive vm-cim ocf:heartbeat:Xen \
        meta target-role="Started" is-managed="true" allow-migrate="true" \
        operations $id="vm-cim-operations" \
        op monitor interval="10" timeout="30" \
        op start interval="0" timeout="60" \
        op stop interval="0" timeout="60" \
        op migrate_to interval="0" timeout="360" \
        params xmfile="/etc/xen/vm/cim"
primitive vm-deprepo ocf:heartbeat:Xen \
        meta target-role="started" is-managed="true" allow-migrate="true" \
        operations $id="vm-deprepo-operations" \
        op monitor interval="10" timeout="30" \
        op start interval="0" timeout="60" \
        op stop interval="0" timeout="60" \
        op migrate_to interval="0" timeout="360" \
        params xmfile="/etc/xen/vm/debrepo"

group clvm-glue dlm clvm o2cb cmirror \
        meta target-role="Started"
group xen-vg-fs clvm-xenvg fs-config-xen
clone c-clvm-glue clvm-glue \
        meta interleave="true" ordered="true" target-role="Started"
clone c-xen-vg-fs xen-vg-fs \
        meta interleave="true" ordered="true" is-managed="true"
target-role="Started"
location STONITHnode1Constraint STONITH--Node1 -inf: xencluster1
colocation colo-clvmglue-xenvgsfs inf: c-xen-vg-fs c-clvm-glue
order o-cim inf: c-xen-vg-fs vm-cim
order o-clvmglue-xenvgfs inf: c-clvm-glue c-xen-vg-fs
order o-deprepo inf: c-xen-vg-fs vm-deprepo
order o-ilmt inf: c-xen-vg-fs vm-ilmt
order o-intranet inf: c-xen-vg-fs vm-intranet
order o-nagois inf: c-xen-vg-fs vm-nagios
order o-oneiroi inf: c-xen-vg-fs vm-oneiroi
order o-otbridge inf: c-xen-vg-fs vm-otbridge
order o-soa inf: c-xen-vg-fs vm-soa
order o-topdesk-entw inf: c-xen-vg-fs vm-topdesk-entw
order o-traumschiff inf: c-xen-vg-fs vm-traumschiff
order o-virenscanner inf: c-xen-vg-fs vm-virenscanner
property $id="cib-bootstrap-options" \
        dc-version="1.1.6-b988976485d15cb702c9307df55512d323831a5e" \
        no-quorum-policy="ignore" \
        default-resource-stickiness="100000" \
        last-lrm-refresh="1334518148" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="2" \
        default-action-timeout="120s" \
        maintenance-mode="true"
op_defaults $id="op_defaults-options" \
        record-pending="false"

Node 1Logs:

Apr 15 22:01:10 xencluster1 clvmd[8763]: Sending message to all cluster
nodes
Apr 15 22:01:10 xencluster1 clvmd[8763]: process_work_item: local
Apr 15 22:01:10 xencluster1 clvmd[8763]: process_local_command: unknown
(0x2d) msg=0x7f5c40010e80, msglen =32, client=0x7f5c400578d0
Apr 15 22:01:10 xencluster1 clvmd[8763]: Syncing device names
Apr 15 22:01:10 xencluster1 clvmd[8763]: Reply from node 4914420a: 0 bytes
Apr 15 22:01:10 xencluster1 clvmd[8763]: Got 1 replies, expecting: 2
Apr 15 22:01:10 xencluster1 clvmd[8763]: LVM thread waiting for work
Apr 15 22:01:10 xencluster1 clvmd[8763]: 1226064394 got message from
nodeid 1226064394 for 0. len 32
Apr 15 22:01:40 xencluster1 lrmd: [8093]: WARN: clvm-xenvg:0:monitor
process (PID 31785) timed out (try 1).  Killing with signal SIGTERM (15).
Apr 15 22:01:40 xencluster1 lrmd: [8093]: WARN: operation monitor[92] on
clvm-xenvg:0 for client 8096: pid 31785 timed out
Apr 15 22:01:40 xencluster1 crmd: [8096]: ERROR: process_lrm_event: LRM
operation clvm-xenvg:0_monitor_10000 (92) Timed Out (timeout=30000ms)
Apr 15 22:01:42 xencluster1 corosync[8071]:   [TOTEM ] Retransmit List: 21ac
Apr 15 22:01:42 xencluster1 corosync[8071]:   [TOTEM ] Retransmit List: 21ab
Apr 15 22:01:42 xencluster1 corosync[8071]:   [TOTEM ] Retransmit List: 21ac
Apr 15 22:01:42 xencluster1 corosync[8071]:   [TOTEM ] Retransmit List: 21ab

pr 15 22:01:50 xencluster1 clvmd[8763]: Send local reply
Apr 15 22:01:50 xencluster1 clvmd[8763]: Read on local socket 14, len = 32
Apr 15 22:01:50 xencluster1 clvmd[8763]: check_all_clvmds_running
Apr 15 22:01:50 xencluster1 clvmd[8763]: down_callback. node 1226064394,
state = 3
Apr 15 22:01:50 xencluster1 clvmd[8763]: down_callback. node 1309950474,
state = 3
Apr 15 22:01:50 xencluster1 clvmd[8763]: Got pre command condition...
Apr 15 22:01:50 xencluster1 clvmd[8763]: Writing status 0 down pipe 16
Apr 15 22:01:50 xencluster1 clvmd[8763]: Waiting to do post command -
state = 0
Apr 15 22:01:50 xencluster1 clvmd[8763]: read on PIPE 15: 4 bytes: status: 0
Apr 15 22:01:50 xencluster1 clvmd[8763]: background routine status was
0, sock_client=0x7f5c40057c10
Apr 15 22:01:50 xencluster1 clvmd[8763]: distribute command: XID = 15473
Apr 15 22:01:50 xencluster1 clvmd[8763]: num_nodes = 2
Apr 15 22:01:50 xencluster1 clvmd[8763]: add_to_lvmqueue:
cmd=0x7f5c4000dbd0. client=0x7f5c40057c10, msg=0x7f5c40057880, len=32,
csid=(nil), xid=15473
Apr 15 22:01:50 xencluster1 clvmd[8763]: Sending message to all cluster
nodes
Apr 15 22:01:50 xencluster1 clvmd[8763]: process_work_item: local
Apr 15 22:01:50 xencluster1 clvmd[8763]: process_local_command: unknown
(0x2d) msg=0x7f5c40010e80, msglen =32, client=0x7f5c40057c10
Apr 15 22:01:50 xencluster1 clvmd[8763]: Syncing device names
Apr 15 22:01:50 xencluster1 clvmd[8763]: Reply from node 4914420a: 0 bytes
Apr 15 22:01:50 xencluster1 clvmd[8763]: Got 1 replies, expecting: 2
Apr 15 22:01:50 xencluster1 clvmd[8763]: LVM thread waiting for work
Apr 15 22:01:50 xencluster1 corosync[8071]:   [TOTEM ] Retransmit List: 21ab
Apr 15 22:01:50 xencluster1 corosync[8071]:   [TOTEM ] Retransmit List: 21ac
Apr 15 22:01:50 xencluster1 corosync[8071]:   [TOTEM ] Retransmit List: 21ab
...
Apr 15 22:02:19 xencluster1 corosync[8071]:   [TOTEM ] Retransmit List: 21ac
Apr 15 22:02:20 xencluster1 corosync[8071]:   [TOTEM ] Retransmit List: 21ab
Apr 15 22:02:20 xencluster1 lrmd: [8093]: WARN: clvm-xenvg:0:monitor
process (PID 771) timed out (try 1).  Killing with signal SIGTERM (15).
Apr 15 22:02:20 xencluster1 lrmd: [8093]: WARN: operation monitor[92] on
clvm-xenvg:0 for client 8096: pid 771 timed out
Apr 15 22:02:20 xencluster1 crmd: [8096]: ERROR: process_lrm_event: LRM
operation clvm-xenvg:0_monitor_10000 (92) Timed Out (timeout=30000ms)
Apr 15 22:02:20 xencluster1 corosync[8071]:   [TOTEM ] Retransmit List: 21ac
Apr 15 22:02:20 xencluster1 corosync[8071]:   [TOTEM ] Retransmit List: 21ab
...
Apr 15 22:02:30 xencluster1 clvmd[8763]: Got new connection on fd 17
Apr 15 22:02:30 xencluster1 clvmd[8763]: Read on local socket 17, len = 28
Apr 15 22:02:30 xencluster1 clvmd[8763]: creating pipe, [18, 19]
Apr 15 22:02:30 xencluster1 clvmd[8763]: Creating pre&post thread
Apr 15 22:02:30 xencluster1 clvmd[8763]: Created pre&post thread, state = 0
Apr 15 22:02:30 xencluster1 clvmd[8763]: in sub thread: client =
0x7f5c40058080
Apr 15 22:02:30 xencluster1 clvmd[8763]: Sub thread ready for work.
Apr 15 22:02:30 xencluster1 clvmd[8763]: doing PRE command LOCK_VG
'V_XenVG' at 1 (client=0x7f5c40058080)
Apr 15 22:02:30 xencluster1 clvmd[8763]: lock_resource 'V_XenVG',
flags=0, mode=3
Apr 15 22:02:30 xencluster1 clvmd[8763]: lock_resource returning 0,
lock_id=3a0001

Apr 15 22:02:48 xencluster1 corosync[8071]:   [TOTEM ] Retransmit List: 21ab
Apr 15 22:02:48 xencluster1 cib: [8092]: info: cib_stats: Processed 4
operations (2500.00us average, 0% utilization) in the last 10min
Apr 15 22:02:48 xencluster1 corosync[8071]:   [TOTEM ] Retransmit List: 21ac

Apr 15 22:03:00 xencluster1 corosync[8071]:   [TOTEM ] Retransmit List: 21ac
Apr 15 22:03:00 xencluster1 lrmd: [8093]: WARN: clvm-xenvg:0:monitor
process (PID 2256) timed out (try 1).  Killing with signal SIGTERM (15).
Apr 15 22:03:00 xencluster1 lrmd: [8093]: WARN: operation monitor[92] on
clvm-xenvg:0 for client 8096: pid 2256 timed out
Apr 15 22:03:00 xencluster1 crmd: [8096]: ERROR: process_lrm_event: LRM
operation clvm-xenvg:0_monitor_10000 (92) Timed Out (timeout=30000ms)
Apr 15 22:03:00 xencluster1 corosync[8071]:   [TOTEM ] Retransmit List: 21ab

Node2 Log

Apr 15 22:00:51 xencluster2 clvmd[12666]: Joined child thread
Apr 15 22:00:51 xencluster2 clvmd[12666]: ret == 0, errno = 0. removing
client
Apr 15 22:00:51 xencluster2 clvmd[12666]: add_to_lvmqueue:
cmd=0x7fa3d0028710. client=0x7fa3d0028760, msg=(nil), len=0, csid=(nil),
xid=14778
Apr 15 22:00:51 xencluster2 clvmd[12666]: process_work_item: free fd -1
Apr 15 22:00:51 xencluster2 clvmd[12666]: LVM thread waiting for work
Apr 15 22:00:52 xencluster2 crmd: [7678]: info: crm_timer_popped:
PEngine Recheck Timer (I_PE_CALC) just popped (900000ms)
Apr 15 22:00:52 xencluster2 crmd: [7678]: info: do_state_transition:
State transition S_IDLE -> S_POLICY_ENGINE [ input=I_PE_CALC
cause=C_TIMER_POPPED origin=crm_timer_popped ]
Apr 15 22:00:52 xencluster2 crmd: [7678]: info: do_state_transition:
Progressed to state S_POLICY_ENGINE after C_TIMER_POPPED
Apr 15 22:00:52 xencluster2 crmd: [7678]: info: do_state_transition: All
2 cluster nodes are eligible to run resources.
Apr 15 22:00:52 xencluster2 crmd: [7678]: info: do_pe_invoke: Query 983:
Requesting the current CIB: S_POLICY_ENGINE
Apr 15 22:00:52 xencluster2 crmd: [7678]: info: do_pe_invoke_callback:
Invoking the PE: query=983, ref=pe_calc-dc-1334520052-604, seq=2212,
quorate=1
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_config: On
loss of CCM Quorum: Ignore
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation clvm:1_last_failure_0 found resource clvm:1 active on xencluster2
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation STONITH-Node2_last_failure_0 found resource STONITH-Node2
active on xencluster2
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation STONITH--Node1_last_failure_0 found resource STONITH--Node1
active on xencluster2
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation cmirror:1_last_failure_0 found resource cmirror:1 active on
xencluster2
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation dlm:1_last_failure_0 found resource dlm:1 active on xencluster2
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation o2cb:1_last_failure_0 found resource o2cb:1 active on xencluster2
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation dlm:0_last_failure_0 found resource dlm:0 active on xencluster1
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation clvm:0_last_failure_0 found resource clvm:0 active on xencluster1
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation o2cb:0_last_failure_0 found resource o2cb:0 active on xencluster1
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation clvm-xenvg:0_last_failure_0 found resource clvm-xenvg:0 active
on xencluster1
Apr 15 22:00:52 xencluster2 pengine: [7677]: WARN: unpack_rsc_op:
Processing failed op vm-deprepo_last_failure_0 on xencluster1: unknown
error (1)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 STONITH-Node2  (Started xencluster2)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 STONITH--Node1 (Started xencluster2)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 dlm:0  (Started xencluster1)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 clvm:0 (Started xencluster1)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 o2cb:0 (Started xencluster1)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 cmirror:0      (Started xencluster1)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 dlm:1  (Started xencluster2)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 clvm:1 (Started xencluster2)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 o2cb:1 (Started xencluster2)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 cmirror:1      (Started xencluster2)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 clvm-xenvg:0   (Started xencluster1)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 fs-config-xen:0        (Started xencluster1)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 clvm-xenvg:1   (Started xencluster2)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 fs-config-xen:1        (Started xencluster2)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 vm-ilmt        (Started xencluster1)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 vm-topdesk-entw        (Started xencluster1)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 vm-otbridge    (Started xencluster2)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 vm-virenscanner        (Started xencluster1)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 vm-deprepo     (Started xencluster1)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 vm-traumschiff (Started xencluster2)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 vm-cim (Started xencluster2)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 vm-oneiroi     (Started xencluster1)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 vm-intranet    (Started xencluster1)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave
 vm-soa (Started xencluster1)
Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: process_pe_message:
Transition 80: PEngine Input stored in: /var/lib/pengine/pe-input-271.bz2
Apr 15 22:00:52 xencluster2 crmd: [7678]: info: do_state_transition:
State transition S_POLICY_ENGINE -> S_TRANSITION_ENGINE [
input=I_PE_SUCCESS cause=C_IPC_MESSAGE origin=handle_response ]
Apr 15 22:00:52 xencluster2 crmd: [7678]: info: unpack_graph: Unpacked
transition 80: 0 actions in 0 synapses
Apr 15 22:00:52 xencluster2 crmd: [7678]: info: do_te_invoke: Processing
graph 80 (ref=pe_calc-dc-1334520052-604) derived from
/var/lib/pengine/pe-input-271.bz2
Apr 15 22:00:52 xencluster2 crmd: [7678]: info: run_graph:
====================================================
Apr 15 22:00:52 xencluster2 crmd: [7678]: notice: run_graph: Transition
80 (Complete=0, Pending=0, Fired=0, Skipped=0, Incomplete=0,
Source=/var/lib/pengine/pe-input-271.bz2): Complete
Apr 15 22:00:52 xencluster2 crmd: [7678]: info: te_graph_trigger:
Transition 80 is now complete
Apr 15 22:00:52 xencluster2 crmd: [7678]: info: notify_crmd: Transition
80 status: done - <null>
Apr 15 22:00:52 xencluster2 crmd: [7678]: info: do_state_transition:
State transition S_TRANSITION_ENGINE -> S_IDLE [ input=I_TE_SUCCESS
cause=C_FSA_INTERNAL origin=notify_crmd ]
Apr 15 22:00:52 xencluster2 crmd: [7678]: info: do_state_transition:
Starting PEngine Recheck Timer
Apr 15 22:01:00 xencluster2 clvmd[12666]: 1309950474 got message from
nodeid 1226064394 for 0. len 32
Apr 15 22:01:00 xencluster2 clvmd[12666]: add_to_lvmqueue:
cmd=0x7fa3d0028780. client=0x6934a0, msg=0x7fa3d910063c, len=32,
csid=0x7fffb4e5d944, xid=0
Apr 15 22:01:00 xencluster2 clvmd[12666]: process_work_item: remote
Apr 15 22:01:00 xencluster2 clvmd[12666]: process_remote_command unknown
(0x2d) for clientid 0x5000000 XID 15337 on node 4914420a
Apr 15 22:01:00 xencluster2 clvmd[12666]: Syncing device names
Apr 15 22:01:00 xencluster2 clvmd[12666]: LVM thread waiting for work
Apr 15 22:01:00 xencluster2 clvmd[12666]: 1309950474 got message from
nodeid 1309950474 for 1226064394. len 18
Apr 15 22:01:00 xencluster2 clvmd[12666]: 1309950474 got message from
nodeid 1226064394 for 0. len 32

Apr 15 22:01:02 xencluster2 clvmd[12666]: Joined child thread
Apr 15 22:01:02 xencluster2 clvmd[12666]: ret == 0, errno = 0. removing
client
Apr 15 22:01:02 xencluster2 clvmd[12666]: add_to_lvmqueue:
cmd=0x7fa3d004d7a0. client=0x7fa3d004d7f0, msg=(nil), len=0, csid=(nil),
xid=14848
Apr 15 22:01:02 xencluster2 clvmd[12666]: process_work_item: free fd -1
Apr 15 22:01:02 xencluster2 clvmd[12666]: LVM thread waiting for work
Apr 15 22:01:06 xencluster2 stonith-ng: [8283]: info: stonith_command:
Processed st_execute from lrmd: rc=-1
Apr 15 22:01:06 xencluster2 stonith: parse config info info=xencluster1
Apr 15 22:01:06 xencluster2 stonith: meatware device OK.
Apr 15 22:01:06 xencluster2 stonith-ng: [8283]: info: log_operation:
STONITH--Node1: Performing: stonith -t meatware -S
Apr 15 22:01:06 xencluster2 stonith-ng: [8283]: info: log_operation:
STONITH--Node1: success:  0
Apr 15 22:01:06 xencluster2 stonith-ng: [8283]: info:
stonith_device_execute: Nothing to do for STONITH--Node1
Apr 15 22:01:06 xencluster2 stonith-ng: [8283]: info: stonith_command:
Processed st_execute from lrmd: rc=-1
Apr 15 22:01:06 xencluster2 stonith: parse config info info=xencluster2
Apr 15 22:01:06 xencluster2 stonith: meatware device OK.
Apr 15 22:01:06 xencluster2 stonith-ng: [8283]: info: log_operation:
STONITH-Node2: Performing: stonith -t meatware -S
Apr 15 22:01:06 xencluster2 stonith-ng: [8283]: info: log_operation:
STONITH-Node2: success:  0
Apr 15 22:01:06 xencluster2 stonith-ng: [8283]: info:
stonith_device_execute: Nothing to do for STONITH-Node2
Apr 15 22:01:10 xencluster2 clvmd[12666]: 1309950474 got message from
nodeid 1226064394 for 0. len 32
Apr 15 22:01:10 xencluster2 clvmd[12666]: add_to_lvmqueue:
cmd=0x7fa3d004d810. client=0x6934a0, msg=0x7fa3d9100cfc, len=32,
csid=0x7fffb4e5d944, xid=0
Apr 15 22:01:10 xencluster2 clvmd[12666]: process_work_item: remote
Apr 15 22:01:10 xencluster2 clvmd[12666]: process_remote_command unknown
(0x2d) for clientid 0x5000000 XID 15407 on node 4914420a
Apr 15 22:01:10 xencluster2 clvmd[12666]: Syncing device names
Apr 15 22:01:10 xencluster2 clvmd[12666]: LVM thread waiting for work
Apr 15 22:01:10 xencluster2 clvmd[12666]: 1309950474 got message from
nodeid 1309950474 for 1226064394. len 18
Apr 15 22:01:10 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List: 2196
Apr 15 22:01:10 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List: 2196
Apr 15 22:01:10 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List: 2196
....
Apr 15 22:01:11 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List: 2196
Apr 15 22:01:12 xencluster2 clvmd[12666]: Got new connection on fd 5
Apr 15 22:01:12 xencluster2 clvmd[12666]: Read on local socket 5, len = 28
Apr 15 22:01:12 xencluster2 clvmd[12666]: creating pipe, [12, 13]
Apr 15 22:01:12 xencluster2 clvmd[12666]: Creating pre&post thread
Apr 15 22:01:12 xencluster2 clvmd[12666]: Created pre&post thread, state = 0
Apr 15 22:01:12 xencluster2 clvmd[12666]: in sub thread: client =
0x7fa3d004d810

Apr 15 22:01:12 xencluster2 clvmd[12666]: Syncing device names
Apr 15 22:01:12 xencluster2 clvmd[12666]: Reply from node 4e14420a: 0 bytes
Apr 15 22:01:12 xencluster2 clvmd[12666]: Got 1 replies, expecting: 2
Apr 15 22:01:12 xencluster2 clvmd[12666]: LVM thread waiting for work
Apr 15 22:01:12 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List: 2196
Apr 15 22:01:12 xencluster2 clvmd[12666]: 1309950474 got message from
nodeid 1309950474 for 0. len 32
Apr 15 22:01:12 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List: 2196
Apr 15 22:01:12 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List: 2196
....
Apr 15 22:01:21 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197
Apr 15 22:01:21 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197
Apr 15 22:01:21 xencluster2 stonith-ng: [8283]: info: stonith_command:
Processed st_execute from lrmd: rc=-1
Apr 15 22:01:21 xencluster2 stonith: parse config info info=xencluster1
Apr 15 22:01:21 xencluster2 stonith: meatware device OK.
Apr 15 22:01:21 xencluster2 stonith-ng: [8283]: info: log_operation:
STONITH--Node1: Performing: stonith -t meatware -S
Apr 15 22:01:21 xencluster2 stonith-ng: [8283]: info: log_operation:
STONITH--Node1: success:  0
Apr 15 22:01:21 xencluster2 stonith-ng: [8283]: info:
stonith_device_execute: Nothing to do for STONITH--Node1
Apr 15 22:01:21 xencluster2 stonith-ng: [8283]: info: stonith_command:
Processed st_execute from lrmd: rc=-1
Apr 15 22:01:21 xencluster2 stonith: parse config info info=xencluster2
Apr 15 22:01:21 xencluster2 stonith: meatware device OK.
Apr 15 22:01:21 xencluster2 stonith-ng: [8283]: info: log_operation:
STONITH-Node2: Performing: stonith -t meatware -S
Apr 15 22:01:21 xencluster2 stonith-ng: [8283]: info: log_operation:
STONITH-Node2: success:  0
Apr 15 22:01:21 xencluster2 stonith-ng: [8283]: info:
stonith_device_execute: Nothing to do for STONITH-Node2
Apr 15 22:01:21 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197
Apr 15 22:01:22 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197
Apr 15 22:01:22 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197
....
Apr 15 22:01:36 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197
Apr 15 22:01:36 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197
Apr 15 22:01:36 xencluster2 stonith-ng: [8283]: info: stonith_command:
Processed st_execute from lrmd: rc=-1
Apr 15 22:01:36 xencluster2 stonith: parse config info info=xencluster1
Apr 15 22:01:36 xencluster2 stonith: meatware device OK.
Apr 15 22:01:36 xencluster2 stonith-ng: [8283]: info: log_operation:
STONITH--Node1: Performing: stonith -t meatware -S
Apr 15 22:01:36 xencluster2 stonith-ng: [8283]: info: log_operation:
STONITH--Node1: success:  0
Apr 15 22:01:36 xencluster2 stonith-ng: [8283]: info:
stonith_device_execute: Nothing to do for STONITH--Node1
Apr 15 22:01:36 xencluster2 stonith-ng: [8283]: info: stonith_command:
Processed st_execute from lrmd: rc=-1
Apr 15 22:01:36 xencluster2 stonith: parse config info info=xencluster2
Apr 15 22:01:36 xencluster2 stonith: meatware device OK.
Apr 15 22:01:36 xencluster2 stonith-ng: [8283]: info: log_operation:
STONITH-Node2: Performing: stonith -t meatware -S
Apr 15 22:01:36 xencluster2 stonith-ng: [8283]: info: log_operation:
STONITH-Node2: success:  0
Apr 15 22:01:36 xencluster2 stonith-ng: [8283]: info:
stonith_device_execute: Nothing to do for STONITH-Node2
Apr 15 22:01:36 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197
Apr 15 22:01:37 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197
....
Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197
Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197
Apr 15 22:01:40 xencluster2 crmd: [7678]: info: process_graph_event:
Detected action clvm-xenvg:0_monitor_10000 from a different transition:
62 vs. 80
Apr 15 22:01:40 xencluster2 crmd: [7678]: info: abort_transition_graph:
process_graph_event:476 - Triggered transition abort (complete=1,
tag=lrm_rsc_op, id=clvm-xenvg:0_monitor_10000,
magic=2:-2;61:62:0:72497771-fdb2-4ca7-a723-2cc360861bb0, cib=0.5816.4) :
Old event
Apr 15 22:01:40 xencluster2 crmd: [7678]: WARN: update_failcount:
Updating failcount for clvm-xenvg:0 on xencluster1 after failed monitor:
rc=-2 (update=value++, time=1334520100)
Apr 15 22:01:40 xencluster2 crmd: [7678]: info: do_state_transition:
State transition S_IDLE -> S_POLICY_ENGINE [ input=I_PE_CALC
cause=C_FSA_INTERNAL origin=abort_transition_graph ]
Apr 15 22:01:40 xencluster2 crmd: [7678]: info: do_state_transition: All
2 cluster nodes are eligible to run resources.
Apr 15 22:01:40 xencluster2 crmd: [7678]: info: do_pe_invoke: Query 984:
Requesting the current CIB: S_POLICY_ENGINE
Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197
Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197
Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197
Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197
Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197
Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197
Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197 219a 219b 219c 219d
...
Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197 219a 219b 219c 219d
Apr 15 22:01:40 xencluster2 crmd: [7678]: info: do_pe_invoke_callback:
Invoking the PE: query=984, ref=pe_calc-dc-1334520100-605, seq=2212,
quorate=1
Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197 219a 219b 219c 219d
...

Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197 219a 219b 219c 219d
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_config: On
loss of CCM Quorum: Ignore
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation clvm:1_last_failure_0 found resource clvm:1 active on xencluster2
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation STONITH-Node2_last_failure_0 found resource STONITH-Node2
active on xencluster2
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation STONITH--Node1_last_failure_0 found resource STONITH--Node1
active on xencluster2
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation cmirror:1_last_failure_0 found resource cmirror:1 active on
xencluster2
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation dlm:1_last_failure_0 found resource dlm:1 active on xencluster2
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation o2cb:1_last_failure_0 found resource o2cb:1 active on xencluster2
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation cmirror:0_last_failure_0 found resource cmirror:0 active on
xencluster1
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation dlm:0_last_failure_0 found resource dlm:0 active on xencluster1
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation clvm:0_last_failure_0 found resource clvm:0 active on xencluster1
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation o2cb:0_last_failure_0 found resource o2cb:0 active on xencluster1
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op:
Operation clvm-xenvg:0_last_failure_0 found resource clvm-xenvg:0 active
on xencluster1
Apr 15 22:01:40 xencluster2 pengine: [7677]: WARN: unpack_rsc_op:
Processing failed op clvm-xenvg:0_monitor_10000 on xencluster1: unknown
exec error (-2)
Apr 15 22:01:40 xencluster2 pengine: [7677]: WARN: unpack_rsc_op:
Processing failed op vm-deprepo_last_failure_0 on xencluster1: unknown
error (1)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: RecurringOp:  Start
recurring monitor (10s) for clvm-xenvg:0 on xencluster1
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave
 STONITH-Node2  (Started xencluster2)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave
 STONITH--Node1 (Started xencluster2)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave
 dlm:0  (Started xencluster1)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave
 clvm:0 (Started xencluster1)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave
 o2cb:0 (Started xencluster1)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave
 cmirror:0      (Started xencluster1)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave
 dlm:1  (Started xencluster2)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave
 clvm:1 (Started xencluster2)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave
 o2cb:1 (Started xencluster2)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave
 cmirror:1      (Started xencluster2)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Recover
clvm-xenvg:0   (Started xencluster1)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Restart
fs-config-xen:0        (Started xencluster1)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave
 clvm-xenvg:1   (Started xencluster2)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave
 fs-config-xen:1        (Started xencluster2)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Restart
vm-ilmt        (Started xencluster1)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Restart
vm-topdesk-entw        (Started xencluster1)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave
 vm-otbridge    (Started xencluster2)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Restart
vm-virenscanner        (Started xencluster1)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Restart
vm-deprepo     (Started xencluster1)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave
 vm-traumschiff (Started xencluster2)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave
 vm-cim (Started xencluster2)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Restart
vm-oneiroi     (Started xencluster1)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Restart
vm-intranet    (Started xencluster1)
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Restart
vm-soa (Started xencluster1)
Apr 15 22:01:40 xencluster2 crmd: [7678]: info: do_state_transition:
State transition S_POLICY_ENGINE -> S_TRANSITION_ENGINE [
input=I_PE_SUCCESS cause=C_IPC_MESSAGE origin=handle_response ]
Apr 15 22:01:40 xencluster2 crmd: [7678]: info: unpack_graph: Unpacked
transition 81: 39 actions in 39 synapses
Apr 15 22:01:40 xencluster2 crmd: [7678]: info: do_te_invoke: Processing
graph 81 (ref=pe_calc-dc-1334520100-605) derived from
/var/lib/pengine/pe-input-272.bz2
Apr 15 22:01:40 xencluster2 crmd: [7678]: info: te_rsc_command:
Initiating action 82: stop vm-ilmt_stop_0 on xencluster1
Apr 15 22:01:40 xencluster2 crmd: [7678]: info: te_rsc_command:
Initiating action 86: stop vm-topdesk-entw_stop_0 on xencluster1
Apr 15 22:01:40 xencluster2 crmd: [7678]: info: te_rsc_command:
Initiating action 90: stop vm-virenscanner_stop_0 on xencluster1
Apr 15 22:01:40 xencluster2 crmd: [7678]: info: te_rsc_command:
Initiating action 92: stop vm-deprepo_stop_0 on xencluster1
Apr 15 22:01:40 xencluster2 crmd: [7678]: info: te_rsc_command:
Initiating action 98: stop vm-oneiroi_stop_0 on xencluster1
Apr 15 22:01:40 xencluster2 crmd: [7678]: info: te_rsc_command:
Initiating action 100: stop vm-intranet_stop_0 on xencluster1
Apr 15 22:01:40 xencluster2 crmd: [7678]: info: te_rsc_command:
Initiating action 102: stop vm-soa_stop_0 on xencluster1
Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: process_pe_message:
Transition 81: PEngine Input stored in: /var/lib/pengine/pe-input-272.bz2
Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197 219a 219b 219c 219d
Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197 219a 219b 219c 219d
Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197 219a 219b 219c 219d
Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197 219a 219b 219c 219d
Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197 219a 219b 219c 219d
Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197 219a 219b 219c 219d 219e 219f 21a0 21a1 21a2 21a3 21a4 21a5
Apr 15 22:01:40 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197 219a 219b 219c 219d 219e 219f 21a0 21a1 21a2 21a3 21a4 21a5
...
Apr 15 22:01:41 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197 219a 219b 219c 219d 219e 219f 21a0 21a1 21a2 21a3 21a4 21a5
Apr 15 22:01:42 xencluster2 lrmd: [7675]: WARN: clvm-xenvg:1:monitor
process (PID 29785) timed out (try 1).  Killing with signal SIGTERM (15).
Apr 15 22:01:42 xencluster2 lrmd: [7675]: WARN: operation monitor[125]
on clvm-xenvg:1 for client 7678: pid 29785 timed out
Apr 15 22:01:42 xencluster2 crmd: [7678]: ERROR: process_lrm_event: LRM
operation clvm-xenvg:1_monitor_10000 (125) Timed Out (timeout=30000ms)
Apr 15 22:01:42 xencluster2 crmd: [7678]: info: process_graph_event:
Detected action clvm-xenvg:1_monitor_10000 from a different transition:
68 vs. 81
Apr 15 22:01:42 xencluster2 crmd: [7678]: info: abort_transition_graph:
process_graph_event:476 - Triggered transition abort (complete=0,
tag=lrm_rsc_op, id=clvm-xenvg:1_monitor_10000,
magic=2:-2;61:68:0:72497771-fdb2-4ca7-a723-2cc360861bb0, cib=0.5816.5) :
Old event
Apr 15 22:01:42 xencluster2 crmd: [7678]: info: update_abort_priority:
Abort priority upgraded from 0 to 1000000
Apr 15 22:01:42 xencluster2 crmd: [7678]: info: update_abort_priority:
Abort action done superceeded by restart
Apr 15 22:01:42 xencluster2 crmd: [7678]: WARN: update_failcount:
Updating failcount for clvm-xenvg:1 on xencluster2 after failed monitor:
rc=-2 (update=value++, time=1334520102)
Apr 15 22:01:42 xencluster2 attrd: [7676]: notice: attrd_trigger_update:
Sending flush op to all hosts for: fail-count-clvm-xenvg:1 (1)
Apr 15 22:01:42 xencluster2 attrd: [7676]: notice: attrd_perform_update:
Sent update 448: fail-count-clvm-xenvg:1=1
Apr 15 22:01:42 xencluster2 attrd: [7676]: notice: attrd_trigger_update:
Sending flush op to all hosts for: last-failure-clvm-xenvg:1 (1334520102)
Apr 15 22:01:42 xencluster2 crmd: [7678]: info: abort_transition_graph:
te_update_diff:176 - Triggered transition abort (complete=0, tag=nvpair,
id=status-xencluster2-fail-count-clvm-xenvg.1,
name=fail-count-clvm-xenvg:1, value=1, magic=NA, cib=0.5816.6) :
Transient attribute: update
Apr 15 22:01:42 xencluster2 attrd: [7676]: notice: attrd_perform_update:
Sent update 451: last-failure-clvm-xenvg:1=1334520102
Apr 15 22:01:42 xencluster2 crmd: [7678]: info: abort_transition_graph:
te_update_diff:176 - Triggered transition abort (complete=0, tag=nvpair,
id=status-xencluster2-last-failure-clvm-xenvg.1,
name=last-failure-clvm-xenvg:1, value=1334520102, magic=NA,
cib=0.5816.7) : Transient attribute: update
Apr 15 22:01:42 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197 219a 219b 219c 219d 219e 219f 21a0 21a1 21a2 21a3 21a4 21a5
Apr 15 22:01:42 xencluster2 corosync[7666]:   [TOTEM ] Retransmit List:
2196 2197 219a 219b 219c 219d 219e 219f 21a0 21a1 21a2 21a3 21a4 21a5

-- 
Mit freundlichen Grüßen

Frank Meier
UNIX-Basis

Hamm Reno Group GmbH
Industriegebiet West | D-66987 Thaleischweiler-Fröschen
T.+49(0)6334 444-8322 | F.+49(0)6334 444-8190
frank.meier at hr-group.de | www.reno.de
___________________________________________________________________

Sitz: Am Tie 7 | D-49086 Osnabrück
Handelsregister Osnabrück HRB 19587
Geschäftsführer: Hans-Jürgen de Fries,
Jens Gransee, Manfred Klumpp,
Robert Reisch