[Pacemaker] Required resources for stateful clones
Eamon Roque
Eamon.Roque at lex-com.net
Fri May 20 05:42:44 EDT 2011
Hi,
>> On Thu, May 19, 2011 at 5:05 AM, Eamon Roque
<Eamon.Roque at lex-com.net>wrote:
>> Hi,
>>
>> I've put together a cluster of two nodes running a databank without
shared
>> storage. Both nodes replicate data between them, which is taken care of
by
>> the databank itself.
>>
>> I have a resource for the databank and ip. I then created a stateful
clone
>> from the databank resource. I created colocation rules joining the
>> databank-ms-clone and ip:
>>
>> node pgsqltest1
>> node pgsqltest2
>> primitive Postgres-IP ocf:heartbeat:IPaddr2 \
>> params ip="10.19.57.234" cidr_netmask="32" \
>> op monitor interval="30s" \
>> meta is-managed="false"
>> primitive resPostgres ocf:heartbeat:pgsql \
>> params pgctl="/opt/PostgreSQL/9.0/bin/pg_ctl"
>>pgdata="/opt/PostgreSQL/9.0/data" psql="/opt/PostgreSQL/9.0/bin/psql"
>> pgdba="postgres" \
>> op monitor interval="1min" \
>> meta is-managed="false"
>> ms msPostgres resPostgres \
>> meta master-max="1" master-node-max="1" clone-max="2"
>> clone-node-max="1" notify="true" target-role="started"
>> colocation colPostgres inf: Postgres-IP msPostgres:Master
>> order ordPostgres inf: msPostgres:promote Postgres-IP:start
>> property $id="cib-bootstrap-options" \
>> dc-version="1.1.2-2e096a41a5f9e184a1c1537c82c6da1093698eb5" \
>> cluster-infrastructure="openais" \
>> expected-quorum-votes="2" \
>> stonith-enabled="false" \
>> no-quorum-policy="ignore" \
>> last-lrm-refresh="1302707146"
>> rsc_defaults $id="rsc-options" \
>> resource-stickiness="200"
>> op_defaults $id="op_defaults-options" \
>> record-pending="false"
>>
>> The normal postgres agent doesn't support this functionality, but I've
put
>> together my own using the mysql agent as a model. Before running the
script
>> through ocf-tester, I unmanage the postgres resource.
>>
> Could you show how you implemented promote/demote for pgsql?
Sure, let's start with the ultra-simple "promote" function:
#
# These variables are higher up in the file, but they will probably help
with understanding the error of
# my ways.
CRM_MASTER="${HA_SBIN_DIR}/crm_master"
ATTRD_UPDATER="${HA_SBIN_DIR}/attrd_updater"
pgsql_promote() {
local output
local rc
local CHECK_PG_SQL
local COMPLETE_STANDBY_QUERY
local PROMOTE_SCORE_HIGH
local MOD_PSQL_M_FORMAT
PROMOTE_SCORE_HIGH=1000
CHECK_PG_SQL="SELECT pg_is_in_recovery()"
MOD_PSQL_M_FORMAT="$OCF_RESKEY_psql -Atc"
COMPLETE_STANDBY_QUERY="$MOD_PSQL_M_FORMAT \"$CHECK_PG_SQL\""
output=$(su - $OCF_RESKEY_pgdba -c "$COMPLETE_STANDBY_QUERY" 2>&1)
echo $output
rc=$?
case $output in
f)
ocf_log debug "PostgreSQL Node is running in
Master mode..."
return $OCF_RUNNING_MASTER
;;
t)
ocf_log debug "PostgreSQL Node is in Hot_Standby
mode..."
return $OCF_SUCCESS
;;
*)
ocf_log err "Critical error in $CHECK_PG_SQL:
$output"
return $OCF_ERR_GENERIC
;;
esac
#
# "Real" promotion is handled here.
# The trigger file is created and we check for "recovery.conf" on the
host.
# If we can't find it, then the file will be copied from the HA-Config
into postgres' data folder.
#
if ! touch $OCF_RESKEY_trigger_file; then
ocf_log err "$OCF_RESKEY_trigger_file could not be created!"
return $OCF_ERR_GENERIC
fi
if [ ! -f $OCF_RESKEY_recovery_conf ]; then
ocf_log err "$OCF_RESKEY_recovery_conf doesn't exist!"
cp $OCF_RESKEY_recovery_conf_ersatz $OCF_RESKEY_pgdata
return $OCF_SUCCESS
fi
# If both file exist or can be created, then the failover fun can start.
ocf_log info "$OCF_RESKEY_trigger_file was created."
ocf_log info "$OCF_RESKEY_recovery_conf exists and can be copied to the
correct location."
# Sometimes, the master needs a bit of time to take the reins. So...
while :
do
pgsql_monitor warn
rc=$?
if [ $rc -eq $OCF_RUNNING_MASTER ]; then
break;
fi
ocf_log debug "Postgres Server could not be promoted. Please
wait..."
sleep 1
done
ocf_log info "Postgres Server has been promoted. Please check on the
previous master."
#################################
#Attributes Update: #
#################################
$ATTRD_UPDATER -n $PGSQL_STATUS_NAME -v \"PRI\" || exit $(echo "Eh!
Attrd_updater is not working!")
#############################################
# Resource stickiness pumped up to 1000 : #
#############################################
$CRM_MASTER -v $PROMOTE_WERT_HOCH || exit $(echo "crm_master could not
change the Master's status!")
############
# Success! #
############
return $OCF_SUCCESS
}
######################################################################################################
Thanks!
Éamon
>> Unfortunately, promote/demote doesn't work. ocf-tester tries to use the
>> "crm_attribute -N pgsql1 -n master-pgrql-replication-agent -l reboot -v
>> 100", but the (unmanaged) resources don't accept the score change.
>>
>> I'm pretty sure that I just need to be hit with a clue stick and would
be
>> grateful for any help.
>>
>> Thanks,
>>
>> ?amon
>>
--
Serge Dubrouski.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.clusterlabs.org/pipermail/pacemaker/attachments/20110520/dd801d1f/attachment-0003.html>
More information about the Pacemaker
mailing list