#!/bin/sh # # High-Availability httpd daemon monitoring OCF agent # # nginx # # Description: monitors http servers (no start, no stop). # # Author: Nicolai Langfeldt, Broadnet AS # # Started out as nginx agent. Heavily repurposed. # # Nginx RA lists these authors: # Alan Robertson # Dejan Muhamedagic # # Support: linux-ha@lists.linux-ha.org # # License: GNU General Public License (GPL) # # Copyright: # Some parts (C) 2012 Broadnet AS # Some other parts (C) 2002-2010 International Business Machines # # # Patches are being accepted ;-) # # Requires *curl*, wget and GET are not sane/flexible enough. # # Usage example: # # N-node proxy cluster. Pacemaker manages production virtual IP # (vip). HAproxy started by init script on all N nodes. HAproxy is # used several times in the frontend stack and is needed on all nodes # at all times for load distribution between the proxies. # # Production VIP must never be started on a node where HAproxy is not # running but can run on any node where HAproxy does run. # # My solution: Create this monitoring agent inspired by the ping and # nginx agents and use it the same way as the ping agent to controll # where the VIP agent can be run. # NOTE: This agent will not start or stop the resource. It is assumed # that the resource is mananged by init script and warnings about # failures are sent by something else (like nagios). # 1. Configure status URL in haproxy useing a randomized URL to hide # the status page from random probers (I wanted the status to be # available over the network too). "pwgen" is useful for generaring # a random url. # # listen httpsservice 0.0.0.0:80 # ... # stats uri /phei1SaeIevoh4eM # # 2. Check if working by directing a browser there # # 3. Configure pacemaker # # primitive vip ocf:heartbeat:IPaddr \ # params ip="192.168.5.8" # # primitive happing ocf:pacemaker:http_ping \ # params name="happing" testurl="http://localhost/phei1SaeIevoh4eM" \ # op monitor interval="1s" depth="0" # # clone happingall happing \ # meta target-role="Started" # # location locVip vip \ # rule $id="locVipRule" -INF: not_defined happing # # If your frontend runs for example a # haproxy/nginx/varnish/whatever mix: set up http pings for all of # the ones that _have_ to be running and combine in the location # rule like this: # # location locVip vip \ # rule $id="locVipRule" -INF: not_defined happing or not_defined nxping # # 4. Use crm_mon -A to monitor the vip and the happing token. Document that the # token is supposed to be defined on all nodes during normal operation. # # # OCF parameters: # OCF_RESKEY_testurl # OCF_RESKEY_bindaddr # OCF_RESKEY_testregex # OCF_RESKEY_name # OCF_RESKEY_timeout # OCF_RESKEY_dampen # OCF_RESKEY_multiplier # OCF_RESKEY_curlopts # OCF_RESKEY_auth # OCF_RESKEY_curl # : ${OCF_ROOT:="/usr/lib/ocf"} : ${OCF_FUNCTIONS_DIR=$OCF_ROOT/lib/heartbeat} # No defaults: $OCF_RESKEY_testurl : ${OCF_RESKEY_bindaddr:=lo} : ${OCF_RESKEY_testregex:=""} : ${OCF_RESKEY_name:="httpping"} : ${OCF_RESKEY_timeout:="1s"} : ${OCF_RESKEY_dampen:="5s"} : ${OCF_RESKEY_multiplier:="1000"} : ${OCF_RESKEY_curlopts:=""} : ${OCF_RESKEY_auth:=""} : ${OCF_RESKEY_curl:="curl"} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs HA_VARRUNDIR=${HA_VARRUN} # This kind of check/recalculation should be provided by ocf-shellfuncs integer=$(echo ${OCF_RESKEY_timeout} | egrep -o '[0-9]*') case ${OCF_RESKEY_timeout} in *[0-9]ms|*[0-9]msec) OCF_RESKEY_timeout=$(( $integer / 1000 ));; *[0-9]m|*[0-9]min) OCF_RESKEY_timeout=$(( $integer * 60 ));; *[0-9]h|*[0-9]hr) OCF_RESKEY_timeout=$(( $integer * 60 * 60 ));; *) OCF_RESKEY_timeout=$integer;; esac # Reduce timeout by 10% NEW=$(($OCF_RESKEY_timeout * 9 / 10)) # Check the result to avoid a zero timeout (=inifinite), and see if we still can't # make sure it's less than the original. case $NEW:$OCF_RESKEY_timeout in 0:0) :;; 0:1) OCF_RESKEY_timeout=1;; 0:*) OCF_RESKEY_timeout=$(( $OCF_RESKEY_timeout - 1 ));; $OCF_RESKEY_timeout:$NEW) OCF_RESKEY_timeout=$(( $OCF_RESKEY_timeout - 1 ));; *) OCF_RESKEY_timeout=$NEW;; esac ####################################################################### # # Configuration options - usually you don't need to change these # ####################################################################### # default options for http clients # NB: We _always_ test a local resource, so it should be # safe to connect from the local interface. CURLOPTS="-Ssk --interface ${OCF_RESKEY_bindaddr} --max-time ${OCF_RESKEY_timeout} ${OCF_RESKEY_curlopts}" # # End of Configuration options ####################################################################### CMD=`basename $0` # The config-file-pathname is the pathname to the configuration # file for this web server. Various appropriate defaults are # assumed if no config file is specified. usage() { cat </dev/null) curlexit=$? # Check headers file since we don't have any RE. The last header should # be a 200. There can be redirects before that. case $curlexit in 0) case $HTTP_CODE in 200) return 0;; esac return 1;; *) curlexit=$OCF_ERR_GENERIC;; esac ;; *) curl_func -o- $CURLOPTS "$OCF_RESKEY_testurl" | grep -Eiq i"$OCF_RESKEY_testregex" >/dev/null curlexit=$? ;; esac return $curlexit } start() { silent_status rc=$? case $rc in 0) attrd_updater -U $OCF_RESKEY_multiplier -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen ocf_log info "start: test worked, set token." # return $OCF_SUCCESS ;; *) attrd_updater -D -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen ocf_log info "start: test failed, deleting token." # return $OCF_ERR_GENERIC ;; esac return $OCF_SUCCESS } stop() { ocf_log info "http_ping stoping" attrd_updater -D -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen return $OCF_SUCCESS } status() { silent_status rc=$? case $rc in 0) ocf_log info "test ($OCF_RESKEY_testurl) worked";; *) ocf_log info "test ($OCF_RESKEY_testurl) failed" esac return $OCF_SUCCESS } monitor() { # Monitor action always succeeds. It just adds or removes the named attribute. silent_status if [ $? -ne 0 ] then ocf_log info "$CMD not running" attrd_updater -D -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen return $OCF_SUCCESS # $OCF_ERR_GENERIC fi attrd_updater -q -U $OCF_RESKEY_multiplier -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen return $OCF_SUCCESS } metadata(){ cat < 1.0 This is the resource _monitor_ agent for any httpd by polling a status page. It provides only one level of testing, get a URL and optionaly look for a regular expression. The HTTP GET should be on this side of instant, the default timeout is one second. We allow a monitoring interval down to one second. Monitors a http server URL to test. There is no default. You will need to configure a status or "ping" url in your http server. test url Regular expression (egrep) to match in the output of testurl. Case insensitive. If no testregex is given then the HTTP status code is used. It must be 200 otherwise the test fails. If you want the test to succeed as long as the server responds in any way set testregex to ".". monitor regular expression By default curl is run with "--interface lo". If you can't reach the web server from the loopback (URL containing "localhost") specify the interface name or address to bind to with this option. Try 'bindaddr="0.0.0.0"' if the URL is not a localhost URL. network bind The name of the attribute to set. This is the name to be used in the constraints. Attribute name The number by which to set if the httpd is up. Value multiplier How long (in seconds) to wait before declaring a test lost test timeout in seconds Amount of time to wait (dampen) before setting any new value. Dampening interval END exit $OCF_SUCCESS } # ##################################################################### validate_all() { if [ -z $STATUSURL ] then ocf_log err "No testurl given!" exit $OCF_ERR_PARAM fi case $STATUSURL in http://*/*) ;; https://*/*) ;; *) ocf_log err "Invalid STATUSURL $STATUSURL" exit $OCF_ERR_ARGS ;; esac if ! $OCF_RESKEY_curl --help >/dev/null 2>/dev/null; then ocf_log err "curl ($OCF_RESKEY_curl) binary not found! Please verify that you've installed it" exit $OCF_ERR_INSTALLED fi } # ########################### MAIN ########################### if [ $# -eq 1 ]; then COMMAND=$1 else usage $OCF_ERR_ARGS fi STATUSURL="$OCF_RESKEY_testurl" case $COMMAND in meta-data) metadata; exit 0;; validate-all) validate_all; exit 0;; start|stop|status|monitor) validate_all; eval $COMMAND; exit 0;; *usage|*help) usage $OCF_SUCCESS;; # "help" as well as "--help" *) usage $OCF_ERR_UNIMPLEMENTED;; esac ocf_log err "$0: Running off end of script?!" exit $OCF_ERR_GENERIC