#!/bin/bash # # This STONITH agent is designed to be used in clusters that run on virtual machines which are managed by ganeti # It talks to the ganeti api to shutdown or restart virtual machines # # Author: Dominik Klein # Copyright: 2016 Dominik Klein # License: GNU General Public License (GPL) # # Known issues: # - The curl calls might be wrapped into a helper function to clean up code # However, I did not manage to make the passed json work in a function # - I did not want to re-invent ocf_is_true and therefore sourced the ocf-shellfuncs # However, a stonith agent does not know OCF_ROOT. I assumed /usr/lib/ocf which will # break on systems with a different OCF_ROOT . /usr/lib/ocf/lib/heartbeat/ocf-shellfuncs for binary in jq curl; do if ! which ${binary} &>/dev/null; then echo "${0} requires ${binary}." exit 1 fi done apihost=${ganeti_cluster} apiuser=${ganeti_auth_user} apipass=${ganeti_auth_password} apiport=${ganeti_api_port:-5080} apitransport=${ganeti_api_transport:-https} stoptimeout=${ganeti_stop_timeout:-120} vm=${2} if [ "${apiuser}" != "" -a "${apipass}" != "" ]; then apiauthstring="${apiuser}:${apipass}@" else unset apiauthstring fi ganetiapi=${apitransport}://${apiauthstring}${apihost}:${apiport} curl_options="-s " if ocf_is_true "${ganeti_curl_ignore_ssl_errors}"; then curl_options="${curl_options} -k " fi check_api_auth() { curl -X GET ${curl_options} ${ganetiapi}/2/info >/dev/null } validate_config() { if [ -z "${apihost}" ]; then echo Missing mandatory parameter: ganeti_cluster exit 1 fi if [ -n "${apiuser}" -a -z "${apipass}" ]; then echo We need user and password, not just user. exit 1 fi if [ -z "${apiuser}" -a -n "${apipass}" ]; then echo We need user and password, not just password. exit 1 fi } # ganeti may queue new jobs depending on what it is currently doing and therefore not run them instantly # so before reporting back, we need to make sure the command we sent actually ran and worked wait_for_job_to_finish() { jobid=${1} jobname=${2} for i in $(seq ${stoptimeout}); do jobstatus=$(curl -X GET ${curl_options} ${ganetiapi}/2/jobs/${jobid} | jq -c .opstatus | cut -d\" -f 2) if [ "${jobstatus}" = "success" ]; then return 0 elif [ "${jobstatus}" = "error" ]; then echo "job ${jobid} (${jobname}) is in error state after ${i} seconds" # get error description from job curl -X GET ${curl_options} ${ganetiapi}/2/jobs/${jobid} | jq ".oplog | .[0] | .[] | .[3]" exit 1 else sleep 1 fi done exit 1 } poweroff_vm() { jobid=$(curl -H 'Content-Type: application/json' -d '{ "timeout":0 }' -X PUT ${curl_options} ${ganetiapi}/2/instances/${vm}/shutdown) wait_for_job_to_finish ${jobid} ${FUNCNAME} ${stoptimeout} } poweron_vm() { jobid=$(curl -X PUT ${curl_options} ${ganetiapi}/2/instances/${vm}/startup) wait_for_job_to_finish ${jobid} ${FUNCNAME} ${stoptimeout} } check_if_vm_is() { vmstate=$(curl -X GET ${curl_options} ${ganetiapi}/2/instances/${vm} | jq -c .status | cut -d\" -f 2) if [ "${1}" = "started" ]; then if [ "${vmstate}" = "running" ]; then return 0 else return 1 fi elif [ "${1}" = "stopped" ]; then if [ "${vmstate}" = "running" ]; then return 1 else return 0 fi else return 1 fi } # Main code case ${1} in gethosts) echo ${vm} exit 0 ;; on) validate_config poweron_vm check_if_vm_is started exit ${?} ;; off) validate_config poweroff_vm check_if_vm_is stopped exit ${?} ;; reset) validate_config poweroff_vm if check_if_vm_is stopped; then poweron_vm fi check_if_vm_is started exit ${?} ;; monitor|status) validate_config if check_api_auth; then exit 0 else exit 1 fi ;; getconfignames) echo "ganeti_cluster ganeti_auth_user ganeti_auth_password ganeti_stop_timeout ganeti_api_port ganeti_api_transport ganeti_curl_ignore_ssl_errors" exit 0 ;; getinfo-devid) echo "ganeti device" exit 0 ;; getinfo-devname) echo "ganeti device" exit 0 ;; getinfo-devdescr) echo "Allows STONITH to control vms managed by a ganeti cluster." exit 0 ;; getinfo-devurl) echo "http://does.not.exist" exit 0 ;; getinfo-xml) cat << GANETIXML Ganeti Cluster Name The fqdn (or ip address) the ganeti Cluster can be reached and managed at. Stop timeout The timeout, in seconds, for which to wait for ganeti to report that the vm has been stopped, before aborting with a failure. Default: 120 Ganeti auth user The user account to use for basic auth on ganeti api connection. Ganeti auth password The password to use for basic auth on ganeti api connection. The apis TCP port The TCP port to connect to the api Default: 5080 http or https The transport to use when connecting to the api Default: https ignore certificate errors when using https If you use self signed certificates and curl complains about that you might set this to true, so curl is called with -k GANETIXML exit 0 ;; *) exit 1 ;; esac