[Pacemaker] new RA: http_ping
Dejan Muhamedagic
dejanmm at fastmail.fm
Thu Aug 16 05:14:32 EDT 2012
Hi,
On Wed, Aug 15, 2012 at 12:53:33PM +0000, Nicolai Langfeldt wrote:
> Hi,
>
> I've written a new RA based on what I learnt from the ping and nginx RAs
> for monitoring frontend-proxy-stacks.
>
> It is attached here for your consideration - and indeed - critique. I
> am hopeful that it makes it into the pacemaker distribution some time.
Did you consider using the existing monitor facility in the
apache RA? It can be sourced from
/usr/lib/ocf/lib/heartbeat/http-mon.sh
Somebody was already up to this, but it seems like they gave up.
More details here:
https://github.com/ClusterLabs/resource-agents/pull/22
Thanks,
Dejan
> Regards,
> Nicolai
> #!/bin/sh
> #
> # High-Availability httpd daemon monitoring OCF agent
> #
> # nginx
> #
> # Description: monitors http servers (no start, no stop).
> #
> # Author: Nicolai Langfeldt, Broadnet AS
> #
> # Started out as nginx agent. Heavily repurposed.
> #
> # Nginx RA lists these authors:
> # Alan Robertson
> # Dejan Muhamedagic
>
> #
> # Support: linux-ha at lists.linux-ha.org
> #
> # License: GNU General Public License (GPL)
> #
> # Copyright:
> # Some parts (C) 2012 Broadnet AS
> # Some other parts (C) 2002-2010 International Business Machines
> #
> #
> # Patches are being accepted ;-)
> #
> # Requires *curl*, wget and GET are not sane/flexible enough.
> #
> # Usage example:
> #
> # N-node proxy cluster. Pacemaker manages production virtual IP
> # (vip). HAproxy started by init script on all N nodes. HAproxy is
> # used several times in the frontend stack and is needed on all nodes
> # at all times for load distribution between the proxies.
> #
> # Production VIP must never be started on a node where HAproxy is not
> # running but can run on any node where HAproxy does run.
> #
> # My solution: Create this monitoring agent inspired by the ping and
> # nginx agents and use it the same way as the ping agent to controll
> # where the VIP agent can be run.
>
> # NOTE: This agent will not start or stop the resource. It is assumed
> # that the resource is mananged by init script and warnings about
> # failures are sent by something else (like nagios).
>
> # 1. Configure status URL in haproxy useing a randomized URL to hide
> # the status page from random probers (I wanted the status to be
> # available over the network too). "pwgen" is useful for generaring
> # a random url.
> #
> # listen httpsservice 0.0.0.0:80
> # ...
> # stats uri /phei1SaeIevoh4eM
> #
> # 2. Check if working by directing a browser there
> #
> # 3. Configure pacemaker
> #
> # primitive vip ocf:heartbeat:IPaddr \
> # params ip="192.168.5.8"
> #
> # primitive happing ocf:pacemaker:http_ping \
> # params name="happing" testurl="http://localhost/phei1SaeIevoh4eM" \
> # op monitor interval="1s" depth="0"
> #
> # clone happingall happing \
> # meta target-role="Started"
> #
> # location locVip vip \
> # rule $id="locVipRule" -INF: not_defined happing
> #
> # If your frontend runs for example a
> # haproxy/nginx/varnish/whatever mix: set up http pings for all of
> # the ones that _have_ to be running and combine in the location
> # rule like this:
> #
> # location locVip vip \
> # rule $id="locVipRule" -INF: not_defined happing or not_defined nxping
> #
> # 4. Use crm_mon -A to monitor the vip and the happing token. Document that the
> # token is supposed to be defined on all nodes during normal operation.
> #
> #
> # OCF parameters:
> # OCF_RESKEY_testurl
> # OCF_RESKEY_bindaddr
> # OCF_RESKEY_testregex
> # OCF_RESKEY_name
> # OCF_RESKEY_timeout
> # OCF_RESKEY_dampen
> # OCF_RESKEY_multiplier
> # OCF_RESKEY_curlopts
> # OCF_RESKEY_auth
> # OCF_RESKEY_curl
> #
>
> : ${OCF_ROOT:="/usr/lib/ocf"}
> : ${OCF_FUNCTIONS_DIR=$OCF_ROOT/lib/heartbeat}
>
> # No defaults: $OCF_RESKEY_testurl
>
> : ${OCF_RESKEY_bindaddr:=lo}
> : ${OCF_RESKEY_testregex:=""}
> : ${OCF_RESKEY_name:="httpping"}
> : ${OCF_RESKEY_timeout:="1s"}
> : ${OCF_RESKEY_dampen:="5s"}
> : ${OCF_RESKEY_multiplier:="1000"}
> : ${OCF_RESKEY_curlopts:=""}
> : ${OCF_RESKEY_auth:=""}
> : ${OCF_RESKEY_curl:="curl"}
>
> . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
> HA_VARRUNDIR=${HA_VARRUN}
>
> # This kind of check/recalculation should be provided by ocf-shellfuncs
> integer=$(echo ${OCF_RESKEY_timeout} | egrep -o '[0-9]*')
> case ${OCF_RESKEY_timeout} in
> *[0-9]ms|*[0-9]msec) OCF_RESKEY_timeout=$(( $integer / 1000 ));;
> *[0-9]m|*[0-9]min) OCF_RESKEY_timeout=$(( $integer * 60 ));;
> *[0-9]h|*[0-9]hr) OCF_RESKEY_timeout=$(( $integer * 60 * 60 ));;
> *) OCF_RESKEY_timeout=$integer;;
> esac
>
> # Reduce timeout by 10%
> NEW=$(($OCF_RESKEY_timeout * 9 / 10))
>
> # Check the result to avoid a zero timeout (=inifinite), and see if we still can't
> # make sure it's less than the original.
> case $NEW:$OCF_RESKEY_timeout in
> 0:0) :;;
> 0:1) OCF_RESKEY_timeout=1;;
> 0:*) OCF_RESKEY_timeout=$(( $OCF_RESKEY_timeout - 1 ));;
> $OCF_RESKEY_timeout:$NEW) OCF_RESKEY_timeout=$(( $OCF_RESKEY_timeout - 1 ));;
> *) OCF_RESKEY_timeout=$NEW;;
> esac
>
> #######################################################################
> #
> # Configuration options - usually you don't need to change these
> #
> #######################################################################
>
> # default options for http clients
> # NB: We _always_ test a local resource, so it should be
> # safe to connect from the local interface.
>
> CURLOPTS="-Ssk --interface ${OCF_RESKEY_bindaddr} --max-time ${OCF_RESKEY_timeout} ${OCF_RESKEY_curlopts}"
>
> #
> # End of Configuration options
> #######################################################################
>
> CMD=`basename $0`
>
> # The config-file-pathname is the pathname to the configuration
> # file for this web server. Various appropriate defaults are
> # assumed if no config file is specified.
> usage() {
> cat <<EOM
> usage: $0 action
>
> action:
> start "start" http_ping agent(or rather, if it's running report it as such)
>
> stop "stop" http_ping agent
>
> status human readable web server status
>
> monitor return TRUE if the http server appears to be working.
> A testurl must be given and this URL must be configured
> and working.
>
> meta-data show meta data message
>
> validate-all validate the instance parameters
> EOM
> exit $1
> }
>
> #
> # run the http client
> #
> curl_func() {
> case $OCF_RESKEY_auth in
> '') $OCF_RESKEY_curl "$@";;
> *) echo "-u $OCF_RESKEY_auth" |
> $OCF_RESKEY_curl -K - "$@";;
> esac
> return $?
> }
>
>
> silent_status() {
>
> case $OCF_RESKEY_testregex in
> '') HTTP_CODE=$(curl_func -o/dev/null $CURLOPTS \
> --write-out '%{http_code}\n' \
> "$OCF_RESKEY_testurl" 2>/dev/null)
> curlexit=$?
> # Check headers file since we don't have any RE. The last header should
> # be a 200. There can be redirects before that.
> case $curlexit in
> 0) case $HTTP_CODE in
> 200) return 0;;
> esac
> return 1;;
> *) curlexit=$OCF_ERR_GENERIC;;
> esac
> ;;
>
> *) curl_func -o- $CURLOPTS "$OCF_RESKEY_testurl" |
> grep -Eiq i"$OCF_RESKEY_testregex" >/dev/null
> curlexit=$?
> ;;
> esac
>
> return $curlexit
>
> }
>
>
> start() {
> silent_status
> rc=$?
> case $rc in
>
> 0) attrd_updater -U $OCF_RESKEY_multiplier -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen
> ocf_log info "start: test worked, set token."
> # return $OCF_SUCCESS
> ;;
>
> *) attrd_updater -D -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen
> ocf_log info "start: test failed, deleting token."
> # return $OCF_ERR_GENERIC
> ;;
>
> esac
>
> return $OCF_SUCCESS
> }
>
>
> stop() {
> ocf_log info "http_ping stoping"
> attrd_updater -D -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen
> return $OCF_SUCCESS
> }
>
>
> status() {
> silent_status
> rc=$?
> case $rc in
> 0) ocf_log info "test ($OCF_RESKEY_testurl) worked";;
> *) ocf_log info "test ($OCF_RESKEY_testurl) failed"
> esac
>
> return $OCF_SUCCESS
> }
>
>
> monitor() {
> # Monitor action always succeeds. It just adds or removes the named attribute.
>
> silent_status
> if
> [ $? -ne 0 ]
> then
> ocf_log info "$CMD not running"
> attrd_updater -D -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen
> return $OCF_SUCCESS # $OCF_ERR_GENERIC
> fi
>
> attrd_updater -q -U $OCF_RESKEY_multiplier -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen
> return $OCF_SUCCESS
> }
>
> metadata(){
> cat <<END
> <?xml version="1.0"?>
> <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
> <resource-agent name="http_ping">
> <version>1.0</version>
> <longdesc lang="en">
> This is the resource _monitor_ agent for any httpd by polling a status
> page.
>
> It provides only one level of testing, get a URL and optionaly look
> for a regular expression. The HTTP GET should be on this side of
> instant, the default timeout is one second. We allow a monitoring
> interval down to one second.
> </longdesc>
> <shortdesc lang="en">Monitors a http server</shortdesc>
>
> <parameters>
>
> <parameter name="testurl">
> <longdesc lang="en">
> URL to test. There is no default. You will need to configure a
> status or "ping" url in your http server.
> </longdesc>
> <shortdesc lang="en">test url</shortdesc>
> <content type="string" />
> </parameter>
>
> <parameter name="testregex">
> <longdesc lang="en">
> Regular expression (egrep) to match in the output of testurl. Case
> insensitive. If no testregex is given then the HTTP status code is
> used. It must be 200 otherwise the test fails.
>
> If you want the test to succeed as long as the server responds in any
> way set testregex to ".".
>
> </longdesc>
> <shortdesc lang="en">monitor regular expression</shortdesc>
> <content type="string" default=""/>
> </parameter>
>
> <parameter name="bindaddr">
> <longdesc lang="en">
> By default curl is run with "--interface lo". If you can't reach the
> web server from the loopback (URL containing "localhost") specify the
> interface name or address to bind to with this option. Try
> 'bindaddr="0.0.0.0"' if the URL is not a localhost URL.
> </longdesc>
> <shortdesc lang="en">network bind</shortdesc>
> <content type="string" default="lo"/>
> </parameter>
>
> <parameter name="name" unique="0">
> <longdesc lang="en">
> The name of the attribute to set. This is the name to be used in the
> constraints.
> </longdesc>
> <shortdesc lang="en">Attribute name</shortdesc>
> <content type="string" default="httpping"/>
> </parameter>
>
> <parameter name="multiplier" unique="0">
> <longdesc lang="en">
> The number by which to set if the httpd is up.
> </longdesc>
> <shortdesc lang="en">Value multiplier</shortdesc>
> <content type="integer" default="1000"/>
> </parameter>
>
> <parameter name="timeout" unique="0">
> <longdesc lang="en">
> How long (in seconds) to wait before declaring a test lost
> </longdesc>
> <shortdesc lang="en">test timeout in seconds</shortdesc>
> <content type="integer" default="1s"/>
> </parameter>
>
> <parameter name="dampen" unique="0">
> <longdesc lang="en">
> Amount of time to wait (dampen) before setting any new value.
> </longdesc>
> <shortdesc lang="en">Dampening interval</shortdesc>
> <content type="integer" default="5s"/>
> </parameter>
>
> </parameters>
>
> <actions>
> <action name="start" timeout="1s" />
> <action name="stop" timeout="1s" />
> <action name="status" timeout="1s" />
> <action name="monitor" timeout="1s" depth="0" interval="1s" />
> <action name="meta-data" timeout="5" />
> <action name="validate-all" timeout="5" />
> </actions>
> </resource-agent>
> END
>
> exit $OCF_SUCCESS
> }
>
> # #####################################################################
>
> validate_all() {
> if
> [ -z $STATUSURL ]
> then
> ocf_log err "No testurl given!"
> exit $OCF_ERR_PARAM
> fi
>
> case $STATUSURL in
> http://*/*) ;;
> https://*/*) ;;
> *) ocf_log err "Invalid STATUSURL $STATUSURL"
> exit $OCF_ERR_ARGS ;;
> esac
>
> if ! $OCF_RESKEY_curl --help >/dev/null 2>/dev/null; then
> ocf_log err "curl ($OCF_RESKEY_curl) binary not found! Please verify that you've installed it"
> exit $OCF_ERR_INSTALLED
> fi
>
> }
>
> # ########################### MAIN ###########################
>
> if [ $# -eq 1 ]; then
> COMMAND=$1
> else
> usage $OCF_ERR_ARGS
> fi
>
> STATUSURL="$OCF_RESKEY_testurl"
>
> case $COMMAND in
> meta-data) metadata; exit 0;;
> validate-all) validate_all; exit 0;;
> start|stop|status|monitor) validate_all; eval $COMMAND; exit 0;;
> *usage|*help) usage $OCF_SUCCESS;; # "help" as well as "--help"
> *) usage $OCF_ERR_UNIMPLEMENTED;;
> esac
>
> ocf_log err "$0: Running off end of script?!"
>
> exit $OCF_ERR_GENERIC
> _______________________________________________
> Pacemaker mailing list: Pacemaker at oss.clusterlabs.org
> http://oss.clusterlabs.org/mailman/listinfo/pacemaker
>
> Project Home: http://www.clusterlabs.org
> Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
> Bugs: http://bugs.clusterlabs.org
More information about the Pacemaker
mailing list