#!/bin/bash # # OCF resource agent which manages upstart jobs. # # Copyright (c) 2011 Vladislav Bogdanov # # OCF instance parameters: # OCF_RESKEY_job_name: name of upstart job # OCF_RESKEY_process_name: name of process # # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults meta_data() { cat < 1.0 This RA manages upstart jobs as HA resources. Manage upstart job The name of the upstart job. Can also contain job instance appended after space. Example: job_name="my_job INSTANCE=1" Job name The name of the process which is to be launched by upstart job. Process name Additional command to run on mointor. Additional monitor command How many seconds to wait for check command to finish. Monitor command timeout What to run if monitor command fails or times out. Monitor failure action END } usage() { cat <&1 ) monitor "${status}" case $? in $OCF_SUCCESS) ocf_log info "Upstart job ${OCF_RESKEY_job_name} started successfully." ret=$OCF_SUCCESS ;; *) ocf_log err "Failed to start upstart job ${OCF_RESKEY_job_name}." ret=$OCF_ERR_GENERIC ;; esac return ${ret} } stop() { local status=$1 monitor "${status}" if [ $? -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi status=$( initctl stop ${OCF_RESKEY_job_name} 2>&1 ) monitor "${status}" case $? in $OCF_NOT_RUNNING) ocf_log info "Upstart job ${OCF_RESKEY_job_name} stopped successfully." ret=$OCF_SUCCESS ;; *) ocf_log err "Failed to stop upstart job ${OCF_RESKEY_job_name}." ret=$OCF_ERR_GENERIC ;; esac return ${ret} } get_status() { local _output _output=$( initctl status ${OCF_RESKEY_job_name} 2>&1 ) if echo "${_output}" | grep -q "Unknown job" ; then ocf_log err "Unknown upstart job ${OCF_RESKEY_job_name}" exit $OCF_ERR_INSTALLED fi # Leave only first line (main process) _output=$( echo "${_output}" | awk '{print $0; exit}' ) # Store job status for later consumption eval $1=\${_output} } monitor() { local status=$1 local pid local ret=$OCF_NOT_RUNNING local process # Operation timeout minus 5 seconds local attempts=$((($OCF_RESKEY_CRM_meta_timeout/1000) - 5)) local i=0 if ocf_is_decimal ${OCF_RESKEY_check_timeout} ; then attempts=$(( attempts - OCF_RESKEY_check_timeout )) fi if [ ${attempts} -le 0 ] ; then attempts=0 fi # We first receive output from outside, then re-poll for it while [ ${ret} -eq $OCF_NOT_RUNNING ] ; do # upstart can report: # (instance) start/[running|pre-start], process (item0) pid if [[ "${status}" =~ (^${OCF_RESKEY_job_name}( \(.+\)){0,1} start/([a-z-]+), process (\(.+\) ){0,1}([0-9]+)) ]] ; then state=${BASH_REMATCH[3]} case ${state} in running) pid=${BASH_REMATCH[5]} if [ -n "${pid}" ] ; then kill -0 ${pid} if [ $? -eq 0 ] ; then process=$( awk '/^Name:/ {print $2}' < /proc/${pid}/status ) if [ "${process}" != "${OCF_RESKEY_process_name}" ] ; then # job is started, but it did not yet launched process itself (( i == 0 )) && ocf_log info "pid ${pid} corresponds to process ${process} instead of ${OCF_RESKEY_process_name}, waiting." ret=$OCF_NOT_RUNNING else ret=$OCF_SUCCESS fi else # This will cause resource to be marked as 'Started FAILED' # with subsequent stop and start (( i == 0 )) && ocf_log info "upstart reports process ${pid} is running, but it really isn't, waiting." ret=$OCF_NOT_RUNNING fi fi ;; pre-start) : Just waiting ;; *) : Ditto ;; esac if [ ${ret} -eq $OCF_NOT_RUNNING ] ; then # Wait for upstart to recover started job if (( i++ >= attempts )) ; then ocf_log err "Timed out waiting for process ${OCF_RESKEY_process_name} pid ${pid} to appear." break fi sleep 1 get_status status fi else # Job is not launched ocf_log info "${status}" ret=$OCF_NOT_RUNNING break fi done if [ $ret = $OCF_SUCCESS ] ; then if [ -n "${OCF_RESKEY_check_command}" ] ; then local rc if ! ocf_is_decimal ${OCF_RESKEY_check_timeout} ; then OCF_RESKEY_check_timeout=5 fi timeout -s KILL ${OCF_RESKEY_check_timeout} ${OCF_RESKEY_check_command} >/dev/null 2>&1 rc=$? if [ ${rc} -ne 0 ] ; then ocf_log warn "check_command '${OCF_RESKEY_check_command}' exited with status ${rc}." if [ -n "${OCF_RESKEY_check_action}" ] ; then ocf_log warn "Running repair command '${OCF_RESKEY_check_action}'." ${OCF_RESKEY_check_action} >/dev/null 2>&1 fi fi fi fi return ${ret} } validate() { check_binary initctl # Check the interface parameter if [ -z "${OCF_RESKEY_job_name}" ]; then ocf_log err "Empty job_name parameter." exit $OCF_ERR_CONFIGURED fi if [ -z "${OCF_RESKEY_process_name}" ]; then ocf_log err "Empty process_name parameter." exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) usage exit $OCF_SUCCESS ;; esac if [ `uname` != "Linux" ] ; then ocf_log err "This RA works only on linux." exit $OCF_ERR_INSTALLED fi validate || exit $? status="" get_status status case $__OCF_ACTION in start) start "${status}" ;; stop) stop "${status}" ;; monitor) monitor "${status}" ;; reload) if monitor "${status}" ; then if stop ; then # Re-poll job status get_status status start "${status}" else exit $OCF_ERR_GENERIC fi else start "${status}" fi ;; validate-all) ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $?