<tt><font size=2><br>

> On Fri, May 20, 2011 at 7:09 AM, Eamon Roque <Eamon.Roque@lex-com.net>wrote:<br>

> <br>

> ><br>

> > ><br>

> > > On Fri, May 20, 2011 at 3:42 AM, Eamon Roque <Eamon.Roque@lex-com.net<br>

> > >wrote:<br>

> > ><br>

> > > > Hi,<br>

> > > ><br>

> > > ><br>

> > > > >> On Thu, May 19, 2011 at 5:05 AM, Eamon Roque

<<br>

> > Eamon.Roque@lex-com.net<br>

> > > > >wrote:<br>

> > > ><br>

> > > > >> Hi,<br>

> > > > >><br>

> > > > >> I've put together a cluster of two nodes running

a databank without<br>

> > > > shared<br>

> > > > >> storage. Both nodes replicate data between

them, which is taken care<br>

> > of<br>

> > > > by<br>

> > > > >> the databank itself.<br>

> > > > >><br>

> > > > >> I have a resource for the databank and ip.

I then created a stateful<br>

> > > > clone<br>

> > > > >> from the databank resource. I created colocation

rules joining the<br>

> > > > >> databank-ms-clone and ip:<br>

> > > > >><br>

> > > > >> node pgsqltest1<br>

> > > > >> node pgsqltest2<br>

> > > > >> primitive Postgres-IP ocf:heartbeat:IPaddr2

\<br>

> > > > >>         params ip="10.19.57.234"

cidr_netmask="32" \<br>

> > > > >>         op monitor interval="30s"

\<br>

> > > > >>         meta is-managed="false"<br>

> > > > >> primitive resPostgres ocf:heartbeat:pgsql

\<br>

> > > > >>         params pgctl="/opt/PostgreSQL/9.0/bin/pg_ctl"<br>

> > > > >>pgdata="/opt/PostgreSQL/9.0/data"

psql="/opt/PostgreSQL/9.0/bin/psql"<br>

> > > > >> pgdba="postgres" \<br>

> > > > >>         op monitor interval="1min"

\<br>

> > > > >>         meta is-managed="false"<br>

> > > > >> ms msPostgres resPostgres \<br>

> > > > >>         meta master-max="1"

master-node-max="1" clone-max="2"<br>

> > > > >> clone-node-max="1" notify="true"

target-role="started"<br>

> > > > >> colocation colPostgres inf: Postgres-IP msPostgres:Master<br>

> > > > >> order ordPostgres inf: msPostgres:promote

Postgres-IP:start<br>

> > > > >> property $id="cib-bootstrap-options"

\<br>

> > > > >>         dc-version="1.1.2-2e096a41a5f9e184a1c1537c82c6da1093698eb5"<br>

> > \<br>

> > > > >>         cluster-infrastructure="openais"

\<br>

> > > > >>         expected-quorum-votes="2"

\<br>

> > > > >>        stonith-enabled="false"

\<br>

> > > > >>        no-quorum-policy="ignore"

\<br>

> > > > >>         last-lrm-refresh="1302707146"<br>

> > > > >> rsc_defaults $id="rsc-options" \<br>

> > > > >>         resource-stickiness="200"<br>

> > > > >> op_defaults $id="op_defaults-options"

\<br>

> > > > >>         record-pending="false"<br>

> > > > >><br>

> > > > >> The normal postgres agent doesn't support

this functionality, but<br>

> > I've<br>

> > > > put<br>

> > > > >> together my own using the mysql agent as a

model. Before running the<br>

> > > > script<br>

> > > > >> through ocf-tester, I unmanage the postgres

resource.<br>

> > > > >><br>

> > > ><br>

> > > > > Could you show how you implemented promote/demote

for pgsql?<br>

> > > ><br>

> > > > Sure, let's start with the ultra-simple "promote"

function:<br>

> > > ><br>

> > > > #<br>

> > > > # These variables are higher up in the file, but they

will probably<br>

> > help<br>

> > > > with understanding the error of<br>

> > > > # my ways.<br>

> > > ><br>

> > > > CRM_MASTER="${HA_SBIN_DIR}/crm_master"<br>

> > > > ATTRD_UPDATER="${HA_SBIN_DIR}/attrd_updater"<br>

> > > ><br>

> > > > pgsql_promote() {<br>

> > > >         local output<br>

> > > >         local rc<br>

> > > >         local CHECK_PG_SQL<br>

> > > >         local COMPLETE_STANDBY_QUERY<br>

> > > >         local PROMOTE_SCORE_HIGH<br>

> > > >         local MOD_PSQL_M_FORMAT<br>

> > > ><br>

> > > ><br>

> > > >         PROMOTE_SCORE_HIGH=1000<br>

> > > >         CHECK_PG_SQL="SELECT

pg_is_in_recovery()"<br>

> > > >         MOD_PSQL_M_FORMAT="$OCF_RESKEY_psql

-Atc"<br>

> > > >         COMPLETE_STANDBY_QUERY="$MOD_PSQL_M_FORMAT

\"$CHECK_PG_SQL\""<br>

> > > ><br>

> > > >         output=$(su - $OCF_RESKEY_pgdba

-c "$COMPLETE_STANDBY_QUERY"<br>

> > 2>&1)<br>

> > > >         echo $output<br>

> > > ><br>

> > > >         rc=$?<br>

> > > ><br>

> > > >         case $output in<br>

> > > >                

f)<br>

> > > >                

        ocf_log debug "PostgreSQL Node is running

in<br>

> > Master<br>

> > > > mode..."<br>

> > > >                

        return $OCF_RUNNING_MASTER<br>

> > > >                

;;<br>

> > > ><br>

> > > >                

t)<br>

> > > >                

        ocf_log debug "PostgreSQL Node is in<br>

> > Hot_Standby<br>

> > > > mode..."<br>

> > > >                

        return $OCF_SUCCESS<br>

> > > >                

;;<br>

> > > ><br>

> > > >                

*)<br>

> > > >                

        ocf_log err "Critical error in $CHECK_PG_SQL:<br>

> > > > $output"<br>

> > > >                

        return $OCF_ERR_GENERIC<br>

> > > >                

;;<br>

> > > >         esac<br>

> > > ><br>

> > > > #<br>

> > > > # "Real" promotion is handled here.<br>

> > > > # The trigger file is created and we check for "recovery.conf"

on the<br>

> > host.<br>

> > > > # If we can't find it, then the file will be copied

from the HA-Config<br>

> > into<br>

> > > > postgres' data folder.<br>

> > > > #<br>

> > > ><br>

> > > > if ! touch $OCF_RESKEY_trigger_file; then<br>

> > > >         ocf_log err "$OCF_RESKEY_trigger_file

could not be created!"<br>

> > > >         return $OCF_ERR_GENERIC<br>

> > > > fi<br>

> > > ><br>

> > > > if [ ! -f $OCF_RESKEY_recovery_conf ]; then<br>

> > > >         ocf_log err "$OCF_RESKEY_recovery_conf

doesn't exist!"<br>

> > > >         cp $OCF_RESKEY_recovery_conf_ersatz

$OCF_RESKEY_pgdata<br>

> > > >         return $OCF_SUCCESS<br>

> > > > fi<br>

> > ><br>

> > ><br>

> > > Why do you need this? As far as I know when you switch standby

database<br>

> > to<br>

> > > primary using trigger file recovery.conf gets renamed to

recovery.done.<br>

> > If<br>

> > > you rename it back DB will be put into standby mode after

restart.We are<br>

> > > talking about streaming replication, right?<br>

> > ><br>

> > ><br>

> > Right. The order is wrong. According to the Binary Replication

tutorial on<br>

> > the postgres wiki, when I perform a failover with a trigger file,

it wants<br>

> > to find a "recovery.conf", which it then processes

(checking the archive for<br>

> > missing updates etc.) and renames (after noticing the trigger

file).<br>

> ><br>

> > I assumed that this would work in exactly the same way with Streaming<br>

> > Replication.<br>

> ><br>

> > Am I wrong?<br>

> <br>

> <br>

> I think so. You have to have recovery.conf when you start your standby,

not<br>

> master. Actually instance that has recovery.conf always tries to start

as<br>

> standby. You have to have master's IP address there and path to archived

log<br>

> files.<br>

> <br>

> </font></tt>

<br>

<br><tt><font size=2>So the failover behavior in binary replication and

streaming replication is different? Or is the wiki entry just antiquated?</font></tt>

<br>

<br><tt><font size=2><br>

> ><br>

> ><br>

> > > ><br>

> > > ><br>

> > > > # If both file exist or can be created, then the failover

fun can<br>

> > start.<br>

> > > ><br>

> > > > ocf_log info "$OCF_RESKEY_trigger_file was created."<br>

> > > > ocf_log info "$OCF_RESKEY_recovery_conf exists

and can be copied to the<br>

> > > > correct location."<br>

> > > ><br>

> > > > # Sometimes, the master needs a bit of time to take

the reins. So...<br>

> > > ><br>

> > > > while :<br>

> > > > do<br>

> > > >         pgsql_monitor warn<br>

> > > >         rc=$?<br>

> > > ><br>

> > > >         if [ $rc -eq $OCF_RUNNING_MASTER

]; then<br>

> > > >                

break;<br>

> > > >         fi<br>

> > > ><br>

> > > >         ocf_log debug "Postgres

Server could not be promoted. Please<br>

> > > > wait..."<br>

> > > ><br>

> > > >         sleep 1<br>

> > > ><br>

> > > > done<br>

> > > ><br>

> > > > ocf_log info "Postgres Server has been promoted.

Please check on the<br>

> > > > previous master."<br>

> > > ><br>

> > > > #################################<br>

> > > > #Attributes Update:          

  #<br>

> > > > #################################<br>

> > > ><br>

> > > > $ATTRD_UPDATER -n $PGSQL_STATUS_NAME -v \"PRI\"

|| exit $(echo "Eh!<br>

> > > > Attrd_updater is not working!")<br>

> > > ><br>

> > > > #############################################<br>

> > > > # Resource stickiness pumped up to 1000 :  #<br>

> > > > #############################################<br>

> > > ><br>

> > > > $CRM_MASTER -v $PROMOTE_WERT_HOCH || exit $(echo "crm_master

could not<br>

> > > > change the Master's status!")<br>

> > > ><br>

> > > > ############<br>

> > > > # Success! #<br>

> > > > ############<br>

> > > ><br>

> > > > return $OCF_SUCCESS<br>

> > > ><br>

> > > > }<br>

> > > ><br>

> > > ><br>

> > > ><br>

> > ><br>

> > <br>

> ######################################################################################################<br>

> > > ><br>

> > > > Thanks!<br>

> > > ><br>

> > > ><br>

> > > And what about demote? Switching standby into primary using

trigger files<br>

> > > changes TIMELINE in the DB and that invalidates all other

standby<br>

> > databases<br>

> > > as well as previous master database. After that you have

to restore them<br>

> > > from a fresh backup made on new master. This particular

behavior stopped<br>

> > me<br>

> > > from implementing Master/Slave functionality in pgsql RA

so far.<br>

> > ><br>

> > > BTW, why pgsql is set to is-managed="false" in

your configuration.With<br>

> > this<br>

> > > setting cluster will keep monitoring it but won't take any

other actions<br>

> > > AFAIK.<br>

> ><br>

> > Demote? Well, seeing as neither promote nor demote actually worked

for me,<br>

> > I thought I would start small.<br>

> ><br>

> <br>

> It doesn't work because you have it in unmanaged state I think.<br>

> </font></tt>

<br>

<br><tt><font size=2>I'm using the ocf-tester utility to test the agent.

Won't there be a conflict if I try and have the cluster manage the resources

and then try and wrest it's control away with my own testing agent?</font></tt>

<br>

<br><tt><font size=2><br>

> <br>

> ><br>

> > As far as the trigger file switching goes, you're of course completely<br>

> > right. This behavior isn't really a big deal in my environment,

as it's<br>

> > meant as more of test and we want to bring back the demoted servers

up<br>

> > manually, but I can see that it would cause a lot of problems

in a more<br>

> <br>

> <br>

> That means that demote operation should stop master server which isn't

the<br>

> best behavior IMHO.<br>

> </font></tt>

<br>

<br><tt><font size=2>I don't disagree. This was the policy that was "agreed"

upon, so it's more of a political issue, really.</font></tt>

<br>

<br><tt><font size=2>Would you prefer putting it into RO mode?</font></tt>

<br><tt><font size=2><br>

> <br>

> <br>

> > complex environment. When I tested the failover functionality

without<br>

> > pacemaker, I have to perform a fresh backup even if I waited

less than 30s<br>

> > to bring the old master back up as a standby.<br>

> ><br>

> > I guess that with 9.1 this will be easier...<br>

> ><br>

> > I unmanaged the resources so that my test agent would handle

them. Is this<br>

> > incorrect?<br>

> ><br>

> <br>

> Again I think you are wrong. In this mode pacemaker won't call your

RA to<br>

> promote/demote or failover your resource.<br>

> <br>

> <br>

> ><br>

> ><br>

> > ><br>

> > ><br>

> > > ?amon<br>

> > > ><br>

> > > ><br>

> > > ><br>

> > > > >> Unfortunately, promote/demote doesn't work.

ocf-tester tries to use<br>

> > the<br>

> > > > >> "crm_attribute -N pgsql1 -n master-pgrql-replication-agent

-l reboot<br>

> > -v<br>

> > > > >> 100", but the (unmanaged) resources don't

accept the score change.<br>

> > > > >><br>

> > > > >> I'm pretty sure that I just need to be hit

with a clue stick and<br>

> > would<br>

> > > > be<br>

> > > > >> grateful for any help.<br>

> > > > >><br>

> > > > >> Thanks,<br>

> > > > >><br>

> > > > >> ?amon<br>

> > > > >><br>

> > > ><br>

> > > ><br>

> > > ><br>

> > > > --<br>

> > > > Serge Dubrouski.<br>

> > > > _______________________________________________<br>

> > > > Pacemaker mailing list: Pacemaker@oss.clusterlabs.org<br>

> > > > </font></tt><a href=http://oss.clusterlabs.org/mailman/listinfo/pacemaker><tt><font size=2>http://oss.clusterlabs.org/mailman/listinfo/pacemaker</font></tt></a><tt><font size=2><br>

> > > ><br>

> > > > Project Home: </font></tt><a href=http://www.clusterlabs.org/><tt><font size=2>http://www.clusterlabs.org</font></tt></a><tt><font size=2><br>

> > > > Getting started:<br>

> > </font></tt><a href=http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf><tt><font size=2>http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf</font></tt></a><tt><font size=2><br>

> > > > Bugs:<br>

> > > ><br>

> > </font></tt><a href="http://developerbugs.linux-foundation.org/enter_bug.cgi?product=Pacemaker"><tt><font size=2>http://developerbugs.linux-foundation.org/enter_bug.cgi?product=Pacemaker</font></tt></a><tt><font size=2><br>

> ><br>

> > > ><br>

> > > ><br>

> > ><br>

> > ><br>

> > > --<br>

> > > Serge Dubrouski.<br>

> > > -------------- next part --------------<br>

> > > An HTML attachment was scrubbed...<br>

> > > URL: <</font></tt><a href=http://oss.clusterlabs.org/pipermail/pacemaker/attachments/><tt><font size=2>http://oss.clusterlabs.org/pipermail/pacemaker/attachments/</font></tt></a><tt><font size=2><br>

> > > 20110520/e1f26230/attachment.html><br>

> > ><br>

> > > ------------------------------<br>

> ><br>

> > ><br>

> > > _______________________________________________<br>

> > > Pacemaker mailing list<br>

> > > Pacemaker@oss.clusterlabs.org<br>

> > ><br>

> > </font></tt><a href=http://oss.clusterlabs.org/mailman/listinfo/pacemaker><tt><font size=2>http://oss.clusterlabs.org/mailman/listinfo/pacemaker</font></tt></a><tt><font size=2><br>

> > ><br>

> > ><br>

> > > End of Pacemaker Digest, Vol 42, Issue 53<br>

> > > *****************************************<br>

> ><br>

> > _______________________________________________<br>

> > Pacemaker mailing list: Pacemaker@oss.clusterlabs.org<br>

> > </font></tt><a href=http://oss.clusterlabs.org/mailman/listinfo/pacemaker><tt><font size=2>http://oss.clusterlabs.org/mailman/listinfo/pacemaker</font></tt></a><tt><font size=2><br>

> ><br>

> > Project Home: </font></tt><a href=http://www.clusterlabs.org/><tt><font size=2>http://www.clusterlabs.org</font></tt></a><tt><font size=2><br>

> > Getting started: </font></tt><a href=http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf><tt><font size=2>http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf</font></tt></a><tt><font size=2><br>

> > Bugs:<br>

> > </font></tt><a href="http://developerbugs.linux-foundation.org/enter_bug.cgi?product=Pacemaker"><tt><font size=2>http://developerbugs.linux-foundation.org/enter_bug.cgi?product=Pacemaker</font></tt></a><tt><font size=2><br>

> ><br>

> ><br>

> <br>

> <br>

> -- <br>

> Serge Dubrouski.<br>

> -------------- next part --------------<br>

> An HTML attachment was scrubbed...<br>

> URL: <</font></tt><a href=http://oss.clusterlabs.org/pipermail/pacemaker/attachments/><tt><font size=2>http://oss.clusterlabs.org/pipermail/pacemaker/attachments/</font></tt></a><tt><font size=2><br>

> 20110520/19777245/attachment.html><br>

> <br>

> ------------------------------<br>

> <br>

> _______________________________________________<br>

> Pacemaker mailing list<br>

> Pacemaker@oss.clusterlabs.org<br>

> </font></tt><a href=http://oss.clusterlabs.org/mailman/listinfo/pacemaker><tt><font size=2>http://oss.clusterlabs.org/mailman/listinfo/pacemaker</font></tt></a><tt><font size=2><br>

> <br>

> <br>

> End of Pacemaker Digest, Vol 42, Issue 55<br>

> *****************************************<br>

</font></tt>