[Pacemaker] Help! Simultaneously Mount gfs2 in Pacemaker on RHEL6.1x64 HANGS!

xin.liang at cs2c.com.cn xin.liang at cs2c.com.cn
Wed Sep 21 21:48:28 EDT 2011



hi:
 Hope you everything goes well.

 I have problems with DRBD+dlm+gfs
in pacemaker.

 I follow this
doc:http://www.clusterlabs.org/doc/en-US/Pacemaker/1.1/html/Clusters_from_Scratch/index.html
to make a DRBD-GFS2 cluster

 When I run DRBD(two
primaries)+dlm+gfs+Filesystem, node-c hangs and reboot.

 Then I run
DRBD(two primaries)+dlm+gfs , it does well, running on both nodes;

 And
then, I "mount -t gfs2 /dev/drbd0 /mnt" on node-b, it's ok;

 BUT, when I
run command "mount -t gfs2 /dev/drbd0 /mnt" on node-c, node-c hangs and
reboot. No err log on stderr. 

 Once again, I do this on node-c firstly,
it's ok; then on node-b, it hangs and
reboot!

#################################################################
$
crm configure show

node ha-b
node ha-c
primitive dlm
ocf:pacemaker:controld 
 operations $id="dlm-operations" 
 op monitor
interval="10" timeout="20" start-delay="0" 
 params args="-L -K -P -q 0
"
primitive drbd ocf:linbit:drbd 
 operations $id="drbd-operations" 
 op
monitor interval="20" role="Slave" timeout="20" 
 op monitor interval="10"
role="Master" timeout="20" 
 params drbd_resource="drbd0"
primitive gfs
ocf:pacemaker:controld 
 operations $id="gfs-operations" 
 op monitor
interval="10" timeout="20" start-delay="0" 
 params
daemon="gfs_controld.pcmk" args="-L -P -g 0"
group groups dlm gfs
ms
ms-drbd drbd 
 meta master-max="2" notify="true"
target-role="Started"
clone clone-set groups 
 meta interleave="true"
target-role="Started"
colocation clone-on-drbd inf: clone-set:Started
ms-drbd:Master
order clone-after-drbd inf: ms-drbd:promote clone-set:start
symmetrical=true
property $id="cib-bootstrap-options" 

dc-version="1.1.6-1.el6-9971ebba4494012a93c03b40a2c58ec0eb60f50c" 

cluster-infrastructure="openais" 
 expected-quorum-votes="2" 

no-quorum-policy="ignore" 

stonith-enabled="false"
##########################################################################

gfs_controld.log
on
ha-b
##########################################################################
[root at ha-b
~]# cat /var/log/cluster/gfs_controld.log

Sep 22 09:08:13 gfs_controld
gfs_controld 3.0.12 started
Sep 22 09:08:13 gfs_controld Connected as node
3393650954 to cluster 'cs2c'
Sep 22 09:08:13 gfs_controld logging mode 3
syslog f 160 p 6 logfile p 7 /var/log/cluster/gfs_controld.log
Sep 22
09:08:13 gfs_controld group_mode 3 compat 0
Sep 22 09:08:13 gfs_controld
setup_cpg_daemon 11
Sep 22 09:08:13 gfs_controld gfs:controld conf 1 1 0
memb -901316342 join -901316342 left
Sep 22 09:08:13 gfs_controld
set_protocol member_count 1 propose daemon 1.1.1 kernel 1.1.1
Sep 22
09:08:13 gfs_controld run protocol from nodeid -901316342
Sep 22 09:08:13
gfs_controld daemon run 1.1.1 max 1.1.1 kernel run 1.1.1 max 1.1.1
Sep 22
09:08:14 gfs_controld gfs:controld conf 2 1 0 memb -901316342 -884539126
join -884539126 left
Sep 22 09:11:57 gfs_controld client connection 5 fd
14
Sep 22 09:11:57 gfs_controld join: /mnt gfs2 lock_dlm cs2c:liang rw
/dev/drbd0
Sep 22 09:11:57 gfs_controld liang join: cluster name matches:
cs2c
Sep 22 09:11:57 gfs_controld liang process_dlmcontrol register 0
Sep
22 09:11:57 gfs_controld gfs:mount:liang conf 1 1 0 memb -901316342 join
-901316342 left
Sep 22 09:11:57 gfs_controld liang add_change cg 1 joined
nodeid -901316342
Sep 22 09:11:57 gfs_controld liang add_change cg 1 we
joined
Sep 22 09:11:57 gfs_controld liang add_change cg 1 counts member 1
joined 1 remove 0 failed 0
Sep 22 09:11:57 gfs_controld liang
wait_conditions skip for zero started_count
Sep 22 09:11:57 gfs_controld
liang send_start cg 1 id_count 1 om 0 nm 1 oj 0 nj 0
Sep 22 09:11:57
gfs_controld liang receive_start -901316342:1 len 92
Sep 22 09:11:57
gfs_controld liang match_change -901316342:1 matches cg 1
Sep 22 09:11:57
gfs_controld liang wait_messages cg 1 got all 1
Sep 22 09:11:57
gfs_controld liang pick_first_recovery_master low -901316342 old 0
Sep 22
09:11:57 gfs_controld liang sync_state all_nodes_new first_recovery_needed
master -901316342
Sep 22 09:11:57 gfs_controld liang create_old_nodes all
new
Sep 22 09:11:57 gfs_controld liang create_new_nodes -901316342 ro 0
spect 0
Sep 22 09:11:57 gfs_controld liang create_failed_journals all
new
Sep 22 09:11:57 gfs_controld liang apply_recovery first
start_kernel
Sep 22 09:11:57 gfs_controld liang start_kernel cg 1
member_count 1
Sep 22 09:11:57 gfs_controld liang set
/sys/fs/gfs2/cs2c:liang/lock_module/block to 0
Sep 22 09:11:57 gfs_controld
liang set open /sys/fs/gfs2/cs2c:liang/lock_module/block error -1 2
Sep 22
09:11:57 gfs_controld liang client_reply_join_full ci 5 result 0
hostdata=jid=0:id=915250580:first=1
Sep 22 09:11:57 gfs_controld
client_reply_join liang ci 5 result 0
Sep 22 09:11:57 gfs_controld uevent
add gfs2 /fs/gfs2/cs2c:liang
Sep 22 09:11:57 gfs_controld liang
ping_kernel_mount 0
Sep 22 09:11:57 gfs_controld uevent change gfs2
/fs/gfs2/cs2c:liang
Sep 22 09:11:57 gfs_controld liang recovery_uevent jid
0 first recovery done 0
Sep 22 09:11:57 gfs_controld uevent change gfs2
/fs/gfs2/cs2c:liang
Sep 22 09:11:57 gfs_controld liang recovery_uevent jid
1 first recovery done 0
Sep 22 09:11:57 gfs_controld uevent change gfs2
/fs/gfs2/cs2c:liang
Sep 22 09:11:57 gfs_controld liang recovery_uevent jid
1 first recovery done 0
Sep 22 09:11:57 gfs_controld liang recovery_uevent
first_done
Sep 22 09:11:57 gfs_controld liang receive_first_recovery_done
from -901316342 master -901316342 mount_client_notified 1
Sep 22 09:11:57
gfs_controld liang wait_recoveries done
Sep 22 09:11:57 gfs_controld uevent
online gfs2 /fs/gfs2/cs2c:liang
Sep 22 09:11:57 gfs_controld liang
ping_kernel_mount 0
Sep 22 09:11:57 gfs_controld mount_done: liang result
0
Sep 22 09:11:57 gfs_controld connection 5 read error -1
Sep 22 09:11:57
gfs_controld liang receive_mount_done from -901316342 result 0
Sep 22
09:11:57 gfs_controld liang wait_recoveries done
Sep 22 09:12:37
gfs_controld uevent remove gfs2 /fs/gfs2/cs2c:liang
Sep 22 09:12:37
gfs_controld do_leave liang mnterr 0
Sep 22 09:12:37 gfs_controld
gfs:mount:liang conf 0 0 1 memb join left -901316342
Sep 22 09:12:37
gfs_controld liang confchg for our
leave
##########################################################################

gfs_controld.log
on
ha-c
##########################################################################
[root at ha-c
~]# cat /var/log/cluster/gfs_controld.log

Sep 22 08:52:12 gfs_controld
gfs_controld 3.0.12 started
Sep 22 08:52:12 gfs_controld Connected as node
3410428170 to cluster 'cs2c'
Sep 22 08:52:12 gfs_controld logging mode 3
syslog f 160 p 6 logfile p 7 /var/log/cluster/gfs_controld.log
Sep 22
08:52:12 gfs_controld group_mode 3 compat 0
Sep 22 08:52:12 gfs_controld
setup_cpg_daemon 11
Sep 22 08:52:12 gfs_controld gfs:controld conf 2 1 0
memb -901316342 -884539126 join -884539126 left
Sep 22 08:52:12
gfs_controld run protocol from nodeid -901316342
Sep 22 08:52:12
gfs_controld daemon run 1.1.1 max 1.1.1 kernel run 1.1.1 max 1.1.1
Sep 22
08:56:52 gfs_controld client connection 5 fd 14
Sep 22 08:56:52
gfs_controld join: /mnt gfs2 lock_dlm cs2c:liang rw /dev/drbd0
Sep 22
08:56:52 gfs_controld liang join: cluster name matches: cs2c
Sep 22
08:56:52 gfs_controld liang process_dlmcontrol register 0
Sep 22 08:56:52
gfs_controld gfs:mount:liang conf 1 1 0 memb -884539126 join -884539126
left
Sep 22 08:56:52 gfs_controld liang add_change cg 1 joined nodeid
-884539126
Sep 22 08:56:52 gfs_controld liang add_change cg 1 we joined
Sep
22 08:56:52 gfs_controld liang add_change cg 1 counts member 1 joined 1
remove 0 failed 0
Sep 22 08:56:52 gfs_controld liang wait_conditions skip
for zero started_count
Sep 22 08:56:52 gfs_controld liang send_start cg 1
id_count 1 om 0 nm 1 oj 0 nj 0
Sep 22 08:56:52 gfs_controld liang
receive_start -884539126:1 len 92
Sep 22 08:56:52 gfs_controld liang
match_change -884539126:1 matches cg 1
Sep 22 08:56:52 gfs_controld liang
wait_messages cg 1 got all 1
Sep 22 08:56:52 gfs_controld liang
pick_first_recovery_master low -884539126 old 0
Sep 22 08:56:52
gfs_controld liang sync_state all_nodes_new first_recovery_needed master
-884539126
Sep 22 08:56:52 gfs_controld liang create_old_nodes all new
Sep
22 08:56:52 gfs_controld liang create_new_nodes -884539126 ro 0 spect 0
Sep
22 08:56:52 gfs_controld liang create_failed_journals all new
Sep 22
08:56:52 gfs_controld liang apply_recovery first start_kernel
Sep 22
08:56:52 gfs_controld liang start_kernel cg 1 member_count 1
Sep 22
08:56:52 gfs_controld liang set /sys/fs/gfs2/cs2c:liang/lock_module/block
to 0
Sep 22 08:56:52 gfs_controld liang set open
/sys/fs/gfs2/cs2c:liang/lock_module/block error -1 2
Sep 22 08:56:52
gfs_controld liang client_reply_join_full ci 5 result 0
hostdata=jid=0:id=915250580:first=1
Sep 22 08:56:52 gfs_controld
client_reply_join liang ci 5 result 0
Sep 22 08:56:53 gfs_controld uevent
add gfs2 /fs/gfs2/cs2c:liang
Sep 22 08:56:53 gfs_controld liang
ping_kernel_mount 0
Sep 22 08:56:53 gfs_controld uevent change gfs2
/fs/gfs2/cs2c:liang
Sep 22 08:56:53 gfs_controld liang recovery_uevent jid
0 first recovery done 0
Sep 22 08:56:53 gfs_controld uevent change gfs2
/fs/gfs2/cs2c:liang
Sep 22 08:56:53 gfs_controld liang recovery_uevent jid
1 first recovery done 0
Sep 22 08:56:53 gfs_controld uevent change gfs2
/fs/gfs2/cs2c:liang
Sep 22 08:56:53 gfs_controld liang recovery_uevent jid
1 first recovery done 0
Sep 22 08:56:53 gfs_controld liang recovery_uevent
first_done
Sep 22 08:56:53 gfs_controld liang receive_first_recovery_done
from -884539126 master -884539126 mount_client_notified 1
Sep 22 08:56:53
gfs_controld liang wait_recoveries done
Sep 22 08:56:53 gfs_controld uevent
online gfs2 /fs/gfs2/cs2c:liang
Sep 22 08:56:53 gfs_controld liang
ping_kernel_mount 0
Sep 22 08:56:53 gfs_controld mount_done: liang result
0
Sep 22 08:56:53 gfs_controld connection 5 read error -1
Sep 22 08:56:53
gfs_controld liang receive_mount_done from -884539126 result 0
Sep 22
08:56:53 gfs_controld liang wait_recoveries done
Sep 22 08:57:17
gfs_controld gfs:mount:liang conf 2 1 0 memb -901316342 -884539126 join
-901316342 left
Sep 22 08:57:17 gfs_controld liang add_change cg 2 joined
nodeid -901316342
Sep 22 08:57:17 gfs_controld liang add_change cg 2 counts
member 2 joined 1 remove 0 failed 0
Sep 22 08:57:17 gfs_controld liang
wait_conditions skip for zero journals_need_recovery
Sep 22 08:57:17
gfs_controld liang send_start cg 2 id_count 2 om 1 nm 1 oj 0 nj 0
Sep 22
08:57:17 gfs_controld liang receive_start -901316342:1 len 104
Sep 22
08:57:17 gfs_controld liang match_change -901316342:1 matches cg 2
Sep 22
08:57:17 gfs_controld liang wait_messages cg 2 need 1 of 2
Sep 22 08:57:17
gfs_controld liang receive_start -884539126:2 len 104
Sep 22 08:57:17
gfs_controld liang match_change -884539126:2 matches cg 2
Sep 22 08:57:17
gfs_controld liang wait_messages cg 2 got all 2
Sep 22 08:57:17
gfs_controld liang sync_state first_recovery_msg
Sep 22 08:57:17
gfs_controld liang create_new_nodes -901316342 ro 0 spect 0
Sep 22 08:57:17
gfs_controld liang wait_recoveries done
Sep 22 08:57:22 gfs_controld
gfs:controld conf 1 0 1 memb -884539126 join left -901316342
Sep 22
08:57:22 gfs_controld gfs:mount:liang conf 1 0 1 memb -884539126 join left
-901316342
Sep 22 08:57:22 gfs_controld liang add_change cg 3 remove nodeid
-901316342 reason 3
Sep 22 08:57:22 gfs_controld liang add_change cg 3
counts member 1 joined 0 remove 1 failed 1
Sep 22 08:57:22 gfs_controld
liang stop_kernel
Sep 22 08:57:22 gfs_controld liang set
/sys/fs/gfs2/cs2c:liang/lock_module/block to 1
Sep 22 08:57:22 gfs_controld
liang check_dlm_notify nodeid -901316342 begin
Sep 22 08:57:22 gfs_controld
liang process_dlmcontrol notified nodeid -901316342 result 0
Sep 22
08:57:22 gfs_controld liang check_dlm_notify done
Sep 22 08:57:22
gfs_controld liang send_start cg 3 id_count 1 om 1 nm 0 oj 0 nj 0
Sep 22
08:57:22 gfs_controld liang receive_start -884539126:3 len 92
Sep 22
08:57:22 gfs_controld liang match_change -884539126:3 matches cg 3
Sep 22
08:57:22 gfs_controld liang wait_messages cg 3 got all 1
Sep 22 08:57:22
gfs_controld liang sync_state first_recovery_msg
Sep 22 08:57:22
gfs_controld liang set_failed_journals no journal for nodeid -901316342

Sep 22 08:57:22 gfs_controld liang wait_recoveries done
Sep 22 08:57:22
gfs_controld liang apply_recovery start_kernel
Sep 22 08:57:22 gfs_controld
liang start_kernel cg 3 member_count 1
Sep 22 08:57:22 gfs_controld liang
set /sys/fs/gfs2/cs2c:liang/lock_module/block to
0
##########################################################################


My experimental environment:

 2 PC 
 node-b 
 node-c

 Both OS are
RHEL6.1X64

 RPMS:
 pacemaker-cli-1.1.6-1.el6.x86_64

pacemaker-doc-1.1.6-1.el6.x86_64
 pacemaker-libs-1.1.6-1.el6.x86_64

pacemaker-1.1.6-1.el6.x86_64
 pacemaker-cts-1.1.6-1.el6.x86_64

pacemaker-libs-devel-1.1.6-1.el6.x86_64

 corosynclib-1.4.1-1.x86_64

corosync-1.4.1-1.x86_64
 corosynclib-devel-1.4.1-1.x86_64


resource-agents-3.9.2-1.x86_64


cluster-glue-libs-devel-1.0.7-1.el6.x86_64

cluster-glue-libs-1.0.7-1.el6.x86_64
 cluster-glue-1.0.7-1.el6.x86_64


openais-1.1.1-7.el6.x86_64
 openaislib-1.1.1-7.el6.x86_64


dlm-pcmk-3.0.12-23.el6_0.6.x86_64

 gfs-pcmk-3.0.12-23.el6_0.6.x86_64

gfs2-utils-3.0.12-41.el6.x86_64

 clusterlib-3.0.12-41.el6.x86_64


drbd-udev-8.4.0-1.el6.x86_64
 drbd-8.4.0-1.el6.x86_64

drbd-utils-8.4.0-1.el6.x86_64
 drbd-heartbeat-8.4.0-1.el6.x86_64

drbd-pacemaker-8.4.0-1.el6.x86_64
 drbd-bash-completion-8.4.0-1.el6.x86_64

drbd-xen-8.4.0-1.el6.x86_64

drbd-km-2.6.32_131.0.15.el6.x86_64-8.4.0-1.el6.x86_64

drbd-kernel-8.4.0-1.el6.x86_64

 My conf:
 $ cat
/etc/corosync/corosync.conf

 compatibility: whitetank

 totem {
 version:
2
 secauth: off
 threads: 0
 rrp_mode: passive
 interface {
 ringnumber: 0

bindnetaddr: 10.1.71.0
 mcastaddr: 235.3.4.5
 mcastport: 9876
 }


interface {
 ringnumber: 1
 bindnetaddr: 10.10.10.0
 mcastaddr: 235.3.4.6

mcastport: 9877
 }
 }

 logging {
 fileline: off
 to_stderr: no

to_logfile: yes
 to_syslog: yes
 logfile: /var/log/cluster/corosync.log

debug: off
 timestamp: on
 logger_subsys {
 subsys: AMF
 debug: off
 }
 }


amf {
 mode: disabled
 }

 service {
 name: pacemaker
 var: 0
 use_logd:
yes
 use_mgmtd: yes
 clustername: cs2c
 } 

 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.clusterlabs.org/pipermail/pacemaker/attachments/20110922/ec9ef296/attachment-0002.html>


More information about the Pacemaker mailing list