and this is what happen in cluster1<br><br>[root@cluster1 ~]# ps axf<br> PID TTY STAT TIME COMMAND<br> 2 ? S 0:00 [kthreadd]<br> 3 ? S 0:00 \_ [migration/0]<br> 4 ? S 0:00 \_ [ksoftirqd/0]<br>
5 ? S 0:00 \_ [migration/0]<br> 6 ? S 0:00 \_ [watchdog/0]<br> 7 ? S 0:04 \_ [events/0]<br> 8 ? S 0:00 \_ [cgroup]<br> 9 ? S 0:00 \_ [khelper]<br>
10 ? S 0:00 \_ [netns]<br> 11 ? S 0:00 \_ [async/mgr]<br> 12 ? S 0:00 \_ [pm]<br> 13 ? S 0:00 \_ [sync_supers]<br> 14 ? S 0:00 \_ [bdi-default]<br>
15 ? S 0:00 \_ [kintegrityd/0]<br> 16 ? S 0:00 \_ [kblockd/0]<br> 17 ? S 0:00 \_ [kacpid]<br> 18 ? S 0:00 \_ [kacpi_notify]<br> 19 ? S 0:00 \_ [kacpi_hotplug]<br>
20 ? S 0:00 \_ [ata/0]<br> 21 ? S 0:00 \_ [ata_aux]<br> 22 ? S 0:00 \_ [ksuspend_usbd]<br> 23 ? S 0:00 \_ [khubd]<br> 24 ? S 0:00 \_ [kseriod]<br>
25 ? S 0:00 \_ [md/0]<br> 26 ? S 0:00 \_ [md_misc/0]<br> 27 ? S 0:00 \_ [khungtaskd]<br> 28 ? S 0:00 \_ [kswapd0]<br> 29 ? SN 0:00 \_ [ksmd]<br> 30 ? SN 0:00 \_ [khugepaged]<br>
31 ? S 0:00 \_ [aio/0]<br> 32 ? S 0:00 \_ [crypto/0]<br> 37 ? S 0:00 \_ [kthrotld/0]<br> 39 ? S 0:00 \_ [kpsmoused]<br> 40 ? S 0:00 \_ [usbhid_resumer]<br>
71 ? S 0:00 \_ [kstriped]<br> 198 ? S 0:00 \_ [scsi_eh_0]<br> 199 ? S 0:00 \_ [scsi_eh_1]<br> 210 ? S 0:00 \_ [scsi_eh_2]<br> 266 ? S 0:00 \_ [kdmflush]<br>
268 ? S 0:00 \_ [kdmflush]<br> 287 ? S 0:00 \_ [jbd2/dm-0-8]<br> 288 ? S 0:00 \_ [ext4-dio-unwrit]<br> 845 ? S 0:00 \_ [kdmflush]<br> 881 ? S 0:00 \_ [flush-253:0]<br>
882 ? S 0:00 \_ [jbd2/sda1-8]<br> 883 ? S 0:00 \_ [ext4-dio-unwrit]<br> 941 ? S 0:00 \_ [kauditd]<br> 1281 ? S 0:00 \_ [rpciod/0]<br> 2246 ? S 0:00 \_ [cqueue]<br>
2282 ? S 0:00 \_ [drbd1_worker]<br> 2721 ? S 0:00 \_ [glock_workqueue]<br> 2722 ? S 0:00 \_ [delete_workqueu]<br> 2723 ? S< 0:00 \_ [kslowd001]<br> 2724 ? S< 0:00 \_ [kslowd000]<br>
1 ? Ss 0:01 /sbin/init<br> 371 ? S<s 0:00 /sbin/udevd -d<br> 832 ? S< 0:00 \_ /sbin/udevd -d<br> 833 ? S< 0:00 \_ /sbin/udevd -d<br> 1167 ? S<sl 0:00 auditd<br>
1193 ? Sl 0:00 /sbin/rsyslogd -i /var/run/syslogd.pid -c 5<br> 1235 ? Ss 0:00 rpcbind<br> 1253 ? Ss 0:00 rpc.statd<br> 1285 ? Ss 0:00 rpc.idmapd<br> 1391 ? SLsl 0:05 corosync -f<br>
1434 ? Ssl 0:00 fenced<br> 1459 ? Ssl 0:00 dlm_controld<br> 1507 ? Ssl 0:00 gfs_controld<br> 1579 ? Ss 0:00 dbus-daemon --system<br> 1590 ? S 0:00 avahi-daemon: running [cluster1.local]<br>
1591 ? Ss 0:00 \_ avahi-daemon: chroot helper<br> 1601 ? Ss 0:00 cupsd -C /etc/cups/cupsd.conf<br> 1626 ? Ss 0:00 /usr/sbin/acpid<br> 1635 ? Ss 0:00 hald<br> 1636 ? S 0:00 \_ hald-runner<br>
1677 ? S 0:00 \_ hald-addon-input: Listening on /dev/input/event1 /dev/input/event0 /dev/input/event3<br> 1680 ? S 0:00 \_ hald-addon-acpi: listening on acpid socket /var/run/acpid.socket<br>
1700 ? Ssl 0:00 automount --pid-file /var/run/autofs.pid<br> 1725 ? Ss 0:00 /usr/sbin/sshd<br>10172 ? Ss 0:00 \_ sshd: root@pts/0 <br>10177 pts/0 Ss 0:00 \_ -bash<br>10239 pts/0 R+ 0:00 \_ ps axf<br>
1733 ? Ss 0:00 ntpd -u ntp:ntp -p /var/run/ntpd.pid -g<br> 1813 ? Ss 0:00 /usr/libexec/postfix/master<br> 1819 ? S 0:00 \_ pickup -l -t fifo -u<br> 1820 ? S 0:00 \_ qmgr -l -t fifo -u<br>
1837 ? Ss 0:00 /usr/sbin/abrtd<br> 1845 ? Ss 0:00 abrt-dump-oops -d /var/spool/abrt -rwx /var/log/messages<br> 1855 ? Ss 0:00 crond<br> 1869 ? Ss 0:00 /usr/sbin/atd<br> 1881 ? Ss 0:00 /usr/sbin/certmonger -S -p /var/run/certmonger.pid<br>
1907 ? S 0:00 pacemakerd<br> 1911 ? Ss 0:01 \_ /usr/libexec/pacemaker/cib<br> 1912 ? Ss 0:00 \_ /usr/libexec/pacemaker/stonithd<br> 1913 ? Ss 0:02 \_ /usr/lib64/heartbeat/lrmd<br>
1914 ? Ss 0:00 \_ /usr/libexec/pacemaker/attrd<br> 1915 ? Ss 0:00 \_ /usr/libexec/pacemaker/pengine<br> 1916 ? Ss 0:00 \_ /usr/libexec/pacemaker/crmd<br> 1980 ? Ss 0:00 /usr/sbin/gdm-binary -nodaemon<br>
2117 ? S 0:00 \_ /usr/libexec/gdm-simple-slave --display-id /org/gnome/DisplayManager/Display1 --force-active-vt<br> 2129 tty1 Ss+ 0:02 \_ /usr/bin/Xorg :0 -nr -verbose -audit 4 -auth /var/run/gdm/auth-for-gdm-4wqtrX/database -nolisten tcp vt1<br>
2416 ? Ssl 0:00 \_ /usr/bin/gnome-session --autostart=/usr/share/gdm/autostart/LoginWindow/<br> 2479 ? S 0:00 | \_ /usr/libexec/at-spi-registryd<br> 2545 ? S 0:00 | \_ metacity<br>
2568 ? S 0:00 | \_ gnome-power-manager<br> 2573 ? S 0:00 | \_ /usr/libexec/polkit-gnome-authentication-agent-1<br> 2574 ? S 0:00 | \_ /usr/libexec/gdm-simple-greeter<br>
2666 ? S 0:00 \_ pam: gdm-password<br> 2011 tty2 Ss+ 0:00 /sbin/mingetty /dev/tty2<br> 2023 tty3 Ss+ 0:00 /sbin/mingetty /dev/tty3<br> 2035 tty4 Ss+ 0:00 /sbin/mingetty /dev/tty4<br>
2048 tty5 Ss+ 0:00 /sbin/mingetty /dev/tty5<br> 2056 tty6 Ss+ 0:00 /sbin/mingetty /dev/tty6<br> 2295 ? Sl 0:00 /usr/sbin/console-kit-daemon --no-daemon<br> 2405 ? S 0:00 /usr/bin/dbus-launch --exit-with-session<br>
2411 ? Ss 0:00 /bin/dbus-daemon --fork --print-pid 5 --print-address 7 --session<br> 2437 ? S 0:00 /usr/libexec/devkit-power-daemon<br> 2448 ? S 0:00 /usr/libexec/gconfd-2<br> 2487 ? Ssl 0:00 /usr/libexec/gnome-settings-daemon --gconf-prefix=/apps/gdm/simple-greeter/settings-manager-plugins<br>
2499 ? Ssl 0:00 /usr/libexec/bonobo-activation-server --ac-activate --ior-output-fd=12<br> 2536 ? S 0:00 /usr/libexec/gvfsd<br> 2595 ? S 0:00 /usr/libexec/polkit-1/polkitd<br> 2616 ? S<sl 0:00 /usr/bin/pulseaudio --start --log-target=syslog<br>
2619 ? SNl 0:00 /usr/libexec/rtkit-daemon<br><br>[root@cluster1 ~]# grep -i error /var/log/messages<br>Oct 31 11:04:58 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)<br>
Oct 31 11:10:44 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)<br>Oct 31 11:10:49 cluster1 dlm_controld[1467]: open "/sys/kernel/dlm/web/id" error -1 2<br>
Oct 31 11:10:49 cluster1 dlm_controld[1467]: open "/sys/kernel/dlm/web/control" error -1 2<br>Oct 31 11:10:49 cluster1 dlm_controld[1467]: open "/sys/kernel/dlm/web/event_done" error -1 2<br>Oct 31 11:11:12 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)<br>
Oct 31 11:11:19 cluster1 crmd[1922]: warning: status_from_rc: Action 10 (WebSite:0_monitor_0) on cluster2 failed (target: 7 vs. rc: 0): Error<br>Oct 31 11:11:19 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)<br>
Oct 31 11:11:26 cluster1 kernel: dlm_new_lockspace error -512<br>Oct 31 11:11:26 cluster1 kernel: block drbd1: error receiving ReportState, l: 4!<br>Oct 31 11:11:30 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)<br>
Oct 31 11:11:30 cluster1 kernel: block drbd1: error receiving ReportState, l: 4!<br>Oct 31 11:11:37 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)<br>
Oct 31 11:11:40 cluster1 dlm_controld[1467]: open "/sys/kernel/dlm/web/control" error -1 2<br>Oct 31 11:11:40 cluster1 dlm_controld[1467]: open "/sys/kernel/dlm/web/control" error -1 2<br>Oct 31 11:11:58 cluster1 crmd[1922]: warning: status_from_rc: Action 47 (WebFS:0_start_0) on cluster2 failed (target: 0 vs. rc: -2): Error<br>
Oct 31 11:11:58 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)<br>Oct 31 11:11:58 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)<br>
Oct 31 11:12:18 cluster1 crmd[1922]: warning: status_from_rc: Action 3 (WebFS:0_stop_0) on cluster2 failed (target: 0 vs. rc: -2): Error<br>Oct 31 11:12:18 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)<br>
Oct 31 11:12:18 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)<br>Oct 31 11:12:18 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)<br>
Oct 31 11:12:18 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)<br>Oct 31 11:15:45 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)<br>
Oct 31 11:15:45 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)<br>Oct 31 11:15:49 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)<br>
Oct 31 11:15:49 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)<br>Oct 31 11:15:51 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)<br>
Oct 31 11:15:51 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)<br>Oct 31 11:40:41 cluster1 kernel: block drbd1: error receiving ReportState, l: 4!<br>
Oct 31 11:40:44 cluster1 kernel: block drbd1: error receiving ReportState, l: 4!<br>Oct 31 11:41:12 cluster1 crmd[1916]: error: process_lrm_event: LRM operation WebFS:1_start_0 (14) Timed Out (timeout=20000ms)<br><br><br>
<div class="gmail_quote">On Wed, Oct 31, 2012 at 12:15 PM, Soni Maula Harriz <span dir="ltr"><<a href="mailto:soni.harriz@sangkuriang.co.id" target="_blank">soni.harriz@sangkuriang.co.id</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br><br><div class="gmail_quote"><div><div class="h5">On Tue, Oct 30, 2012 at 12:20 PM, Andrew Beekhof <span dir="ltr"><<a href="mailto:andrew@beekhof.net" target="_blank">andrew@beekhof.net</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<div><div>On Mon, Oct 29, 2012 at 4:22 PM, Soni Maula Harriz<br>
<<a href="mailto:soni.harriz@sangkuriang.co.id" target="_blank">soni.harriz@sangkuriang.co.id</a>> wrote:<br>
> dear all,<br>
> i configure pacemaker and corosync on 2 Centos 6.3 servers by following<br>
> instruction on 'Cluster from Scratch'.<br>
> on the beginning, i follow 'Cluster from Scratch' edition 5. but, since i<br>
> use centos, i change to 'Cluster from Scratch' edition 3 to configure<br>
> active/active servers.<br>
> Now on 1st server (cluster1), the Filesystem resource cannot start. the gfs2<br>
> filesystem can't be mounted.<br>
><br>
> this is the crm configuration<br>
> [root@cluster2 ~]# crm configure show<br>
> node cluster1 \<br>
> attributes standby="off"<br>
> node cluster2 \<br>
> attributes standby="off"<br>
> primitive ClusterIP ocf:heartbeat:IPaddr2 \<br>
> params ip="xxx.xxx.xxx.229" cidr_netmask="32" clusterip_hash="sourceip"<br>
> \<br>
> op monitor interval="30s"<br>
> primitive WebData ocf:linbit:drbd \<br>
> params drbd_resource="wwwdata" \<br>
> op monitor interval="60s"<br>
> primitive WebFS ocf:heartbeat:Filesystem \<br>
> params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html"<br>
> fstype="gfs2"<br>
> primitive WebSite ocf:heartbeat:apache \<br>
> params configfile="/etc/httpd/conf/httpd.conf"<br>
> statusurl="<a href="http://localhost/server-status" target="_blank">http://localhost/server-status</a>" \<br>
> op monitor interval="1min"<br>
> ms WebDataClone WebData \<br>
> meta master-max="2" master-node-max="1" clone-max="2" clone-node-max="1"<br>
> notify="true"<br>
> clone WebFSClone WebFS<br>
> clone WebIP ClusterIP \<br>
> meta globally-unique="true" clone-max="2" clone-node-max="1"<br>
> interleave="false"<br>
> clone WebSiteClone WebSite \<br>
> meta interleave="false"<br>
> colocation WebSite-with-WebFS inf: WebSiteClone WebFSClone<br>
> colocation colocation-WebSite-ClusterIP-INFINITY inf: WebSiteClone WebIP<br>
> colocation fs_on_drbd inf: WebFSClone WebDataClone:Master<br>
> order WebFS-after-WebData inf: WebDataClone:promote WebFSClone:start<br>
> order WebSite-after-WebFS inf: WebFSClone WebSiteClone<br>
> order order-ClusterIP-WebSite-mandatory : WebIP:start WebSiteClone:start<br>
> property $id="cib-bootstrap-options" \<br>
> dc-version="1.1.7-6.el6-148fccfd5985c5590cc601123c6c16e966b85d14" \<br>
> cluster-infrastructure="cman" \<br>
> expected-quorum-votes="2" \<br>
> stonith-enabled="false" \<br>
> no-quorum-policy="ignore"<br>
> rsc_defaults $id="rsc-options" \<br>
> resource-stickiness="100"<br>
><br>
> when i want to mount the filesystem manually, this message appear :<br>
> [root@cluster1 ~]# mount /dev/drbd1 /mnt/<br>
> mount point already used or other mount in progress<br>
> error mounting lockproto lock_dlm<br>
><br>
> but when i check the mount, there is no mount from drbd<br>
<br></div></div></blockquote></div></div><div><br>This is what the system told me :<br> </div><div class="im"><blockquote class="gmail_quote" style="margin:0pt 0pt 0pt 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<div>
<div>
</div></div>what does "ps axf" say? Is there another mount process running?<br></blockquote></div><div><br>[root@cluster2 ~]# ps axf<br>PID TTY STAT TIME COMMAND<br> 2 ? S 0:00 [kthreadd]<br>
3 ? S 0:00 \_ [migration/0]<br>
4 ? S 0:00 \_ [ksoftirqd/0]<br> 5 ? S 0:00 \_ [migration/0]<br> 6 ? S 0:00 \_ [watchdog/0]<br> 7 ? S 0:03 \_ [events/0]<br> 8 ? S 0:00 \_ [cgroup]<br>
9 ? S 0:00 \_ [khelper]<br> 10 ? S 0:00 \_ [netns]<br> 11 ? S 0:00 \_ [async/mgr]<br> 12 ? S 0:00 \_ [pm]<br> 13 ? S 0:00 \_ [sync_supers]<br> 14 ? S 0:00 \_ [bdi-default]<br>
15 ? S 0:00 \_ [kintegrityd/0]<br> 16 ? S 0:03 \_ [kblockd/0]<br> 17 ? S 0:00 \_ [kacpid]<br> 18 ? S 0:00 \_ [kacpi_notify]<br> 19 ? S 0:00 \_ [kacpi_hotplug]<br>
20 ? S 0:00 \_ [ata/0]<br> 21 ? S 0:00 \_ [ata_aux]<br> 22 ? S 0:00 \_ [ksuspend_usbd]<br> 23 ? S 0:00 \_ [khubd]<br> 24 ? S 0:00 \_ [kseriod]<br>
25 ? S 0:00 \_ [md/0]<br> 26 ? S 0:00 \_ [md_misc/0]<br> 27 ? S 0:00 \_ [khungtaskd]<br> 28 ? S 0:00 \_ [kswapd0]<br> 29 ? SN 0:00 \_ [ksmd]<br> 30 ? SN 0:00 \_ [khugepaged]<br>
31 ? S 0:00 \_ [aio/0]<br> 32 ? S 0:00 \_ [crypto/0]<br> 37 ? S 0:00 \_ [kthrotld/0]<br> 39 ? S 0:00 \_ [kpsmoused]<br> 40 ? S 0:00 \_ [usbhid_resumer]<br>
71 ? S 0:00 \_ [kstriped]<br> 188 ? S 0:00 \_ [scsi_eh_0]<br> 190 ? S 0:00 \_ [scsi_eh_1]<br> 220 ? S 0:00 \_ [scsi_eh_2]<br> 272 ? S 0:00 \_ [kdmflush]<br>
273 ? S 0:00 \_ [kdmflush]<br> 293 ? S 0:00 \_ [jbd2/dm-0-8]<br> 294 ? S 0:00 \_ [ext4-dio-unwrit]<br> 853 ? S 0:00 \_ [kdmflush]<br> 877 ? S 0:00 \_ [flush-253:0]<br>
890 ? S 0:00 \_ [jbd2/sda1-8]<br> 891 ? S 0:00 \_ [ext4-dio-unwrit]<br> 949 ? S 0:00 \_ [kauditd]<br> 1602 ? S 0:00 \_ [rpciod/0]<br> 2344 ? S 0:00 \_ [cqueue]<br>
2456 ? S 0:00 \_ [drbd1_worker]<br> 2831 ? S 0:00 \_ [glock_workqueue]<br> 2832 ? S 0:00 \_ [delete_workqueu]<br> 2833 ? S< 0:00 \_ [kslowd001]<br> 2834 ? S< 0:00 \_ [kslowd000]<br>
2846 ? S 0:00 \_ [dlm_astd]<br> 2847 ? S 0:00 \_ [dlm_scand]<br> 2848 ? S 0:00 \_ [dlm_recv/0]<br> 2849 ? S 0:00 \_ [dlm_send]<br> 2850 ? S 0:00 \_ [dlm_recoverd]<br>
1 ? Ss 0:01 /sbin/init<br> 377 ? S<s 0:00 /sbin/udevd -d<br> 840 ? S< 0:00 \_ /sbin/udevd -d<br> 842 ? S< 0:00 \_ /sbin/udevd -d<br> 1182 ? S<sl 0:00 auditd<br>
1208 ? Sl 0:00 /sbin/rsyslogd -i /var/run/syslogd.pid -c 5<br> 1250 ? Ss 0:00 rpcbind<br> 1351 ? SLsl 0:06 corosync -f<br> 1394 ? Ssl 0:00 fenced<br> 1420 ? Ssl 0:00 dlm_controld<br>
1467 ? Ssl 0:00 gfs_controld<br> 1539 ? Ss 0:00 dbus-daemon --system<br> 1550 ? S 0:00 avahi-daemon: running [cluster2.local]<br> 1551 ? Ss 0:00 \_ avahi-daemon: chroot helper<br>
1568 ? Ss 0:00 rpc.statd<br> 1606 ? Ss 0:00 rpc.idmapd<br> 1616 ? Ss 0:00 cupsd -C /etc/cups/cupsd.conf<br> 1641 ? Ss 0:00 /usr/sbin/acpid<br> 1650 ? Ss 0:00 hald<br>
1651 ? S 0:00 \_ hald-runner<br> 1692 ? S 0:00 \_ hald-addon-input: Listening on /dev/input/event3 /dev/input/event1 /dev/input/event0<br> 1695 ? S 0:00 \_ hald-addon-acpi: listening on acpid socket /var/run/acpid.socket<br>
1715 ? Ssl 0:00 automount --pid-file /var/run/autofs.pid<br> 1740 ? Ss 0:00 /usr/sbin/sshd<br> 1979 ? Ss 0:00 \_ sshd: root@pts/0 <br> 2207 pts/0 Ss 0:00 \_ -bash<br> 8528 pts/0 R+ 0:00 \_ ps axf<br>
1748 ? Ss 0:00 ntpd -u ntp:ntp -p /var/run/ntpd.pid -g<br> 1828 ? Ss 0:00 /usr/libexec/postfix/master<br> 1834 ? S 0:00 \_ pickup -l -t fifo -u<br> 1835 ? S 0:00 \_ qmgr -l -t fifo -u<br>
1852 ? Ss 0:00 /usr/sbin/abrtd<br> 1860 ? Ss 0:00 abrt-dump-oops -d /var/spool/abrt -rwx /var/log/messages<br> 1890 ? Ss 0:00 crond<br> 1901 ? Ss 0:00 /usr/sbin/atd<br> 1913 ? Ss 0:00 /usr/sbin/certmonger -S -p /var/run/certmonger.pid<br>
1939 ? S 0:00 pacemakerd<br> 1943 ? Ss 0:02 \_ /usr/libexec/pacemaker/cib<br> 1944 ? Ss 0:00 \_ /usr/libexec/pacemaker/stonithd<br> 1945 ? Ss 0:01 \_ /usr/lib64/heartbeat/lrmd<br>
1946 ? Ss 0:00 \_ /usr/libexec/pacemaker/attrd<br> 1947 ? Ss 0:00 \_ /usr/libexec/pacemaker/pengine<br> 1948 ? Ss 0:01 \_ /usr/libexec/pacemaker/crmd<br> 2005 ? Ss 0:00 /usr/sbin/gdm-binary -nodaemon<br>
2136 ? S 0:00 \_ /usr/libexec/gdm-simple-slave --display-id /org/gnome/DisplayManager/Display1 --force-active-vt<br> 2157 tty1 Ss+ 0:02 \_ /usr/bin/Xorg :0 -nr -verbose -audit 4 -auth /var/run/gdm/auth-for-gdm-nrpPGF/database -nolisten tcp vt1<br>
2485 ? Ssl 0:00 \_ /usr/bin/gnome-session --autostart=/usr/share/gdm/autostart/LoginWindow/<br> 2595 ? S 0:00 | \_ /usr/libexec/at-spi-registryd<br> 2683 ? S 0:00 | \_ metacity<br>
2705 ? S 0:00 | \_ gnome-power-manager<br> 2706 ? S 0:00 | \_ /usr/libexec/gdm-simple-greeter<br> 2708 ? S 0:00 | \_ /usr/libexec/polkit-gnome-authentication-agent-1<br>
2788 ? S 0:00 \_ pam: gdm-password<br> 2028 tty2 Ss+ 0:00 /sbin/mingetty /dev/tty2<br> 2037 tty3 Ss+ 0:00 /sbin/mingetty /dev/tty3<br> 2050 tty4 Ss+ 0:00 /sbin/mingetty /dev/tty4<br>
2062 tty5 Ss+ 0:00 /sbin/mingetty /dev/tty5<br> 2071 tty6 Ss+ 0:00 /sbin/mingetty /dev/tty6<br> 2346 ? Sl 0:00 /usr/sbin/console-kit-daemon --no-daemon<br> 2474 ? S 0:00 /usr/bin/dbus-launch --exit-with-session<br>
2482 ? Ss 0:00 /bin/dbus-daemon --fork --print-pid 5 --print-address 7 --session<br> 2527 ? S 0:00 /usr/libexec/devkit-power-daemon<br> 2546 ? S 0:00 /usr/libexec/gconfd-2<br> 2609 ? Ssl 0:00 /usr/libexec/gnome-settings-daemon --gconf-prefix=/apps/gdm/simple-greeter/settings-manager-plugins<br>
2615 ? Ssl 0:00 /usr/libexec/bonobo-activation-server --ac-activate --ior-output-fd=12<br> 2672 ? S 0:00 /usr/libexec/gvfsd<br> 2728 ? S 0:00 /usr/libexec/polkit-1/polkitd<br> 2744 ? S<sl 0:00 /usr/bin/pulseaudio --start --log-target=syslog<br>
2748 ? SNl 0:00 /usr/libexec/rtkit-daemon<br> 2843 ? D 0:00 /sbin/mount.gfs2 /dev/drbd1 /var/www/html -o rw<br> 3049 ? D 0:00 blockdev --flushbufs /dev/drbd/by-res/wwwdata<br> 7881 ? Ss 0:00 /usr/sbin/anacron -s<br>
<br> </div><div class="im"><blockquote class="gmail_quote" style="margin:0pt 0pt 0pt 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
Did crm_mon report any errors? </blockquote></div><div><br>[root@cluster2 ~]# crm status<br>============<br>Last updated: Wed Oct 31 12:10:31 2012<br>Last change: Mon Oct 29 17:01:09 2012 via cibadmin on cluster1<br>Stack: cman<br>
Current DC: cluster2 - partition with quorum<br>Version: 1.1.7-6.el6-148fccfd5985c5590cc601123c6c16e966b85d14<br>2 Nodes configured, 2 expected votes<br>8 Resources configured.<br>============<br><br>Online: [ cluster1 cluster2 ]<br>
<br> Master/Slave Set: WebDataClone [WebData]<br> Masters: [ cluster1 cluster2 ]<br> Clone Set: WebIP [ClusterIP] (unique)<br> ClusterIP:0 (ocf::heartbeat:IPaddr2): Started cluster1<br> ClusterIP:1 (ocf::heartbeat:IPaddr2): Started cluster2<br>
Clone Set: WebFSClone [WebFS]<br> WebFS:0 (ocf::heartbeat:Filesystem): Started cluster2 (unmanaged) FAILED<br> Stopped: [ WebFS:1 ]<br><br>Failed actions:<br> WebFS:1_start_0 (node=cluster1, call=14, rc=-2, status=Timed Out): unknown exec error<br>
WebFS:0_stop_0 (node=cluster2, call=16, rc=-2, status=Timed Out): unknown exec error<br><br> </div><div class="im"><blockquote class="gmail_quote" style="margin:0pt 0pt 0pt 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
Did you check the system logs?<br></blockquote></div><div><br>[root@cluster2 ~]# crm_verify -L -V<br> warning: unpack_rsc_op: Processing failed op WebFS:1_last_failure_0 on cluster1: unknown exec error (-2)<br> warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)<br>
warning: common_apply_stickiness: Forcing WebFSClone away from cluster1 after 1000000 failures (max=1000000)<br> warning: common_apply_stickiness: Forcing WebFSClone away from cluster1 after 1000000 failures (max=1000000)<br>
warning: common_apply_stickiness: Forcing WebFSClone away from cluster2 after 1000000 failures (max=1000000)<br> warning: common_apply_stickiness: Forcing WebFSClone away from cluster2 after 1000000 failures (max=1000000)<br>
warning: should_dump_input: Ignoring requirement that WebFS:0_stop_0 comeplete before WebFSClone_stopped_0: unmanaged failed resources cannot prevent clone shutdown<br><br>[root@cluster2 ~]# grep -i error /var/log/messages<br>
Oct 31 11:12:25 cluster2 kernel: block drbd1: error receiving ReportState, l: 4!<br>Oct 31 11:12:29 cluster2 kernel: block drbd1: error receiving ReportState, l: 4!<br>Oct 31 11:12:56 cluster2 crmd[1948]: error: process_lrm_event: LRM operation WebFS:0_start_0 (15) Timed Out (timeout=20000ms)<br>
Oct 31 11:13:17 cluster2 crmd[1948]: error: process_lrm_event: LRM operation WebFS:0_stop_0 (16) Timed Out (timeout=20000ms)<br>Oct 31 11:15:51 cluster2 pengine[1947]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)<br>
Oct 31 11:16:16 cluster2 pengine[1947]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)<br>Oct 31 11:31:16 cluster2 pengine[1947]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)<br>
Oct 31 11:39:05 cluster2 pengine[1947]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)<br>Oct 31 11:39:30 cluster2 pengine[1947]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)<br>
Oct 31 11:39:42 cluster2 kernel: block drbd1: error receiving ReportState, l: 4!<br>Oct 31 11:39:44 cluster2 pengine[1947]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)<br>
Oct 31 11:39:44 cluster2 kernel: block drbd1: error receiving ReportState, l: 4!<br>Oct 31 11:39:53 cluster2 pengine[1947]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)<br>
Oct 31 11:40:13 cluster2 crmd[1948]: warning: status_from_rc: Action 49 (WebFS:1_start_0) on cluster1 failed (target: 0 vs. rc: -2): Error<br>Oct 31 11:40:13 cluster2 pengine[1947]: warning: unpack_rsc_op: Processing failed op WebFS:1_last_failure_0 on cluster1: unknown exec error (-2)<br>
Oct 31 11:40:13 cluster2 pengine[1947]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)<br>Oct 31 11:40:14 cluster2 pengine[1947]: warning: unpack_rsc_op: Processing failed op WebFS:1_last_failure_0 on cluster1: unknown exec error (-2)<br>
Oct 31 11:40:14 cluster2 pengine[1947]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)<br>Oct 31 11:55:15 cluster2 pengine[1947]: warning: unpack_rsc_op: Processing failed op WebFS:1_last_failure_0 on cluster1: unknown exec error (-2)<br>
Oct 31 11:55:15 cluster2 pengine[1947]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)<br>Oct 31 12:10:15 cluster2 pengine[1947]: warning: unpack_rsc_op: Processing failed op WebFS:1_last_failure_0 on cluster1: unknown exec error (-2)<br>
Oct 31 12:10:15 cluster2 pengine[1947]: warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)<br> <br><br></div><div><div class="h5"><blockquote class="gmail_quote" style="margin:0pt 0pt 0pt 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<div><br>
><br>
> there is another strange thing, the 1st server (cluster1) cannot reboot. it<br>
> hangs with message 'please standby while rebooting the system'. in the<br>
> reboot process, there are 2 failed action which is related to fencing. i<br>
> didn't configure any fencing yet. one of the failed action is :<br>
> 'stopping cluster<br>
> leaving fence domain .... found dlm lockspace /sys/kernel/dlm/web<br>
> fence_tool : cannot leave due to active system [FAILED]'<br>
><br>
> please help me with this problem<br>
><br>
> --<br>
> Best Regards,<br>
><br>
> Soni Maula Harriz<br>
> Database Administrator<br>
> PT. Data Aksara Sangkuriang<br>
><br>
><br>
</div>> _______________________________________________<br>
> Pacemaker mailing list: <a href="mailto:Pacemaker@oss.clusterlabs.org" target="_blank">Pacemaker@oss.clusterlabs.org</a><br>
> <a href="http://oss.clusterlabs.org/mailman/listinfo/pacemaker" target="_blank">http://oss.clusterlabs.org/mailman/listinfo/pacemaker</a><br>
><br>
> Project Home: <a href="http://www.clusterlabs.org" target="_blank">http://www.clusterlabs.org</a><br>
> Getting started: <a href="http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf" target="_blank">http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf</a><br>
> Bugs: <a href="http://bugs.clusterlabs.org" target="_blank">http://bugs.clusterlabs.org</a><br>
><br>
<br>
_______________________________________________<br>
Pacemaker mailing list: <a href="mailto:Pacemaker@oss.clusterlabs.org" target="_blank">Pacemaker@oss.clusterlabs.org</a><br>
<a href="http://oss.clusterlabs.org/mailman/listinfo/pacemaker" target="_blank">http://oss.clusterlabs.org/mailman/listinfo/pacemaker</a><br>
<br>
Project Home: <a href="http://www.clusterlabs.org" target="_blank">http://www.clusterlabs.org</a><br>
Getting started: <a href="http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf" target="_blank">http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf</a><br>
Bugs: <a href="http://bugs.clusterlabs.org" target="_blank">http://bugs.clusterlabs.org</a><br>
</blockquote></div></div></div><div class="HOEnZb"><div class="h5"><br><br clear="all"><br>-- <br>Best Regards,<br><br>Soni Maula Harriz<br>Database Administrator<br>PT. Data Aksara Sangkuriang<br><br>
</div></div></blockquote></div><br><br clear="all"><br>-- <br>Best Regards,<br><br>Soni Maula Harriz<br>Database Administrator<br>PT. Data Aksara Sangkuriang<br><br>