#! /bin/sh

#
#   Resource Agent for managing the Generic SCSI Target Subsystem
#   for Linux (SCST) and related daemons.
#
#   License: GNU General Public License (GPL)
#   (c) 2012-2014 Marc A. Smith
#

PATH=$PATH:/usr/local/sbin
# Initialization
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#MODULES="scst qla2x00tgt iscsi_scst ib_srpt \
#scst_disk scst_vdisk scst_tape scst_changer fcst"
MODULES="scst iscsi_scst scst_disk scst_vdisk"
SCST_CFG="/etc/scst.conf"
PRE_SCST_CONF="/etc/pre-scst_xtra_conf"
POST_SCST_CONF="/etc/post-scst_xtra_conf"
SCST_SYSFS="/sys/kernel/scst_tgt"
ALUA_STATES="active nonoptimized standby unavailable offline transitioning"
NO_CLOBBER="/tmp/scst_ra-no_clobber"

# For optional SCST modules
if [ -f "/lib/modules/$(uname -r)/extra/ocs_fc_scst.ko" ]; then
    MODULES="${MODULES} ocs_fc_scst"
fi
if [ -f "/lib/modules/$(uname -r)/extra/chfcoe.ko" ]; then
    MODULES="${MODULES} chfcoe"
fi


scst_start() {
    # Exit immediately if configuration is not valid
    scst_validate_all || exit ${?}

    # If resource is already running, bail out early
    if scst_monitor; then
        ocf_log info "Resource is already running."
        return ${OCF_SUCCESS}
    fi

    # If our pre-SCST file exists, run it
    if [ -f "${PRE_SCST_CONF}" ]; then
        ocf_log info "Pre-SCST user config. file found; running..."
        ocf_run -warn sh "${PRE_SCST_CONF}"
    fi

    # Load all modules
    ocf_log info "Loading kernel modules..."
    for i in ${MODULES}; do
        ocf_log debug "scst_start() -> Module: ${i}"
        if [ -d /sys/module/${i} ]; then
            ocf_log warn "The ${i} module is already loaded!"
        else
            ocf_run modprobe ${i} || exit ${OCF_ERR_GENERIC}
        fi
    done

    # Start the daemons
    ocf_log info "Starting daemons..."
    if pidof iscsi-scstd > /dev/null 2>&1; then
        ocf_log warn "The iscsi-scstd daemon is already running!"
    else
        ocf_run iscsi-scstd || exit ${OCF_ERR_GENERIC}
    fi
    if pidof lldpad > /dev/null 2>&1; then
        ocf_log warn "The lldpad daemon is already running!"
    else
        ocf_run lldpad -d || exit ${OCF_ERR_GENERIC}
    fi
    #if pidof fcoemon > /dev/null 2>&1; then
    #    ocf_log warn "The fcoemon daemon is already running!"
    #else
    #    ocf_run fcoemon -s || exit ${OCF_ERR_GENERIC}
    #fi

    # Configure SCST
    if [ -f "${SCST_CFG}" ]; then
        ocf_log info "Applying SCST configuration..."
        ocf_run scstadmin -config "${SCST_CFG}"
        # Prevent scst_stop() from clobbering the configuration file
        if [ ${?} -ne 0 ]; then
            ocf_log err "Something is wrong with the SCST configuration!"
            ocf_run touch "${NO_CLOBBER}"
            exit ${OCF_ERR_GENERIC}
        else
            if [ -f "${NO_CLOBBER}" ]; then
                ocf_run rm -f "${NO_CLOBBER}"
            fi
        fi
    fi

    # If our post-SCST file exists, run it
    if [ -f "${POST_SCST_CONF}" ]; then
        ocf_log info "Post-SCST user config. file found; running..."
        ocf_run -warn sh "${POST_SCST_CONF}"
    fi

    # If we are using ALUA, be sure we are using the "Slave" state initially
    if ocf_is_true ${OCF_RESKEY_alua}; then
        check_alua
        # Set the local target group ALUA state
        ocf_log debug "scst_start() -> Setting target group" \
            "'${OCF_RESKEY_local_tgt_grp}' ALUA state to" \
            "'${OCF_RESKEY_s_alua_state}'..."
        ocf_run scstadmin -noprompt -set_tgrp_attr \
            ${OCF_RESKEY_local_tgt_grp} -dev_group \
            ${OCF_RESKEY_device_group} -attributes \
            state\=${OCF_RESKEY_s_alua_state} || exit ${OCF_ERR_GENERIC}
        # For now, we simply assume the other node is the Master
        ocf_log debug "scst_start() -> Setting target group" \
            "'${OCF_RESKEY_remote_tgt_grp}' ALUA state to" \
            "'${OCF_RESKEY_m_alua_state}'..."
        ocf_run scstadmin -noprompt -set_tgrp_attr \
            ${OCF_RESKEY_remote_tgt_grp} -dev_group \
            ${OCF_RESKEY_device_group} -attributes \
            state\=${OCF_RESKEY_m_alua_state} || exit ${OCF_ERR_GENERIC}
    fi

    # Make sure the resource started correctly
    while ! scst_monitor; do
        ocf_log debug "scst_start() -> Resource has not started yet, waiting..."
        sleep 1
    done

    # Only return $OCF_SUCCESS if _everything_ succeeded as expected
    return ${OCF_SUCCESS}
}


scst_stop() {
    # Exit immediately if configuration is not valid
    scst_validate_all || exit ${?}

    # Check the current resource state
    scst_monitor
    local rc=${?}
    case "${rc}" in
    "${OCF_SUCCESS}")
        # Currently running; normal, expected behavior
        ocf_log info "Resource is currently running."
        ;;
    "${OCF_RUNNING_MASTER}")
        # Running as a Master; need to demote before stopping
        ocf_log info "Resource is currently running as Master."
        scst_demote || ocf_log warn "Demote failed, trying to stop anyway..."
        ;;
    "${OCF_NOT_RUNNING}")
        # Currently not running; nothing to do
        ocf_log info "Resource is already stopped."
        return ${OCF_SUCCESS}
        ;;
    esac

    # Save the SCST configuration, just in case
    ocf_log info "Saving SCST configuration..."
    if [ -f "${NO_CLOBBER}" ]; then
        ocf_log warn "The '${NO_CLOBBER}' file exists," \
            "not writing configuration!"
    else
        ocf_run scstadmin -force -nonkey -write_config \
            ${SCST_CFG} || exit ${OCF_ERR_GENERIC}
    fi

    # Stop all of the daemons and wait for each
    ocf_log info "Stopping daemons..."
    #for i in "fcoemon lldpad iscsi-scstd"; do
    for i in "lldpad iscsi-scstd"; do
        pidof ${i} > /dev/null 2>&1
        if [ ${?} -ne 0 ]; then
            ocf_log warn "The ${i} daemon is not running!"
            continue
        fi
        ocf_log debug "scst_stop() -> Sending the TERM signal to ${i}..."
        ocf_run killall -TERM ${i} || exit ${OCF_ERR_GENERIC}
        while pidof ${i} > /dev/null 2>&1; do
            ocf_log debug "scst_stop() -> The ${i} daemon is" \
                "still running, waiting..."
            sleep 1
        done
    done

    # Unload the modules (in reverse)
    ocf_log info "Unloading kernel modules..."
    for i in $(echo ${MODULES} | tr ' ' '\n' | tac | tr '\n' ' '); do
        ocf_log debug "scst_stop() -> Module: ${i}"
        if [ -d /sys/module/${i} ]; then
            ocf_run rmmod -w ${i} || exit ${OCF_ERR_GENERIC}
        else
            ocf_log warn "The ${i} module is not loaded!"
        fi
    done

    # Make sure the resource stopped correctly
    while scst_monitor; do
        ocf_log debug "scst_stop() -> Resource has not stopped yet, waiting..."
        sleep 1
    done

    # Only return $OCF_SUCCESS if _everything_ succeeded as expected
    return ${OCF_SUCCESS}
}


scst_monitor() {
    # Exit immediately if configuration is not valid
    scst_validate_all || exit ${?}

    # Check if SCST is loaded
    local rc
    if [ -e "${SCST_SYSFS}/version" ]; then
        ocf_log debug "scst_monitor() -> SCST version:" \
            "$(cat ${SCST_SYSFS}/version)"
        ocf_log debug "scst_monitor() -> Resource is running."
        crm_master -l reboot -v 100
        rc=${OCF_SUCCESS}
    else
        ocf_log debug "scst_monitor() -> Resource is not running."
        crm_master -l reboot -D
        rc=${OCF_NOT_RUNNING}
        return ${rc}
    fi

    # If we are using ALUA, then we can test if we are Master or not
    if ocf_is_true ${OCF_RESKEY_alua}; then
        dev_grp_path="${SCST_SYSFS}/device_groups/${OCF_RESKEY_device_group}"
        tgt_grp_path="${dev_grp_path}/target_groups/${OCF_RESKEY_local_tgt_grp}"
        tgt_grp_state="$(head -1 ${tgt_grp_path}/state)"
        ocf_log debug "scst_monitor() -> SCST local target" \
            "group state: ${tgt_grp_state}"
        if [ "x${tgt_grp_state}" = "x${OCF_RESKEY_m_alua_state}" ]; then
            rc=${OCF_RUNNING_MASTER}
        fi
    fi

    return ${rc}
}


scst_validate_all() {
    # Test for required binaries
    check_binary scstadmin

    # There can only be one instance of SCST running per node
    if [ ! -z "${OCF_RESKEY_CRM_meta_clone_node_max}" ] &&
        [ "${OCF_RESKEY_CRM_meta_clone_node_max}" -ne 1 ]; then
        ocf_log err "The 'clone-node-max' parameter must equal '1'."
        exit ${OCF_ERR_CONFIGURED}
    fi

    # If ALUA support is enabled, we need to check the parameters
    if ocf_is_true ${OCF_RESKEY_alua}; then
        # Make sure they are set to something
        if [ -z "${OCF_RESKEY_device_group}" ]; then
            ocf_log err "The 'device_group' parameter is not set!"
            exit ${OCF_ERR_CONFIGURED}
        fi
        if [ -z "${OCF_RESKEY_local_tgt_grp}" ]; then
            ocf_log err "The 'local_tgt_grp' parameter is not set!"
            exit ${OCF_ERR_CONFIGURED}
        fi
        if [ -z "${OCF_RESKEY_remote_tgt_grp}" ]; then
            ocf_log err "The 'remote_tgt_grp' parameter is not set!"
            exit ${OCF_ERR_CONFIGURED}
        fi
        if [ -z "${OCF_RESKEY_m_alua_state}" ]; then
            ocf_log err "The 'm_alua_state' parameter is not set!"
            exit ${OCF_ERR_CONFIGURED}
        fi
        if [ -z "${OCF_RESKEY_s_alua_state}" ]; then
            ocf_log err "The 's_alua_state' parameter is not set!"
            exit ${OCF_ERR_CONFIGURED}
        fi
        #  Currently, we only support using one Master with this RA
        if [ ! -z "${OCF_RESKEY_CRM_meta_master_max}" ] &&
            [ "${OCF_RESKEY_CRM_meta_master_max}" -ne 1 ]; then
            ocf_log err "The 'master-max' parameter must equal '1'."
            exit ${OCF_ERR_CONFIGURED}
        fi
        if [ ! -z "${OCF_RESKEY_CRM_meta_master_node_max}" ] &&
            [ "${OCF_RESKEY_CRM_meta_master_node_max}" -ne 1 ]; then
            ocf_log err "The 'master-node-max' parameter must equal '1'."
            exit ${OCF_ERR_CONFIGURED}
        fi
    fi

    return ${OCF_SUCCESS}
}


scst_meta_data() {
	cat <<-EOF
	<?xml version="1.0"?>
	<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
	<resource-agent name="scst" version="0.1">
	  <version>0.1</version>
	  <longdesc lang="en">The SCST OCF resource agent for ESOS; includes SCST ALUA support.</longdesc>
	  <shortdesc lang="en">SCST OCF RA script for ESOS.</shortdesc>
	  <parameters>
	    <parameter name="alua" unique="0" required="0">
	      <longdesc lang="en">Use to enable/disable updating ALUA status in SCST.</longdesc>
	      <shortdesc lang="en">The 'alua' parameter.</shortdesc>
	      <content type="boolean" default="false" />
	    </parameter>
	    <parameter name="device_group" unique="0" required="0">
	      <longdesc lang="en">The name of the SCST device group (used with ALUA support).</longdesc>
	      <shortdesc lang="en">The 'device_group' parameter.</shortdesc>
	      <content type="string" default="" />
	    </parameter>
	    <parameter name="local_tgt_grp" unique="0" required="0">
	      <longdesc lang="en">The name of the SCST local target group (used with ALUA support).</longdesc>
	      <shortdesc lang="en">The 'local_tgt_grp' parameter.</shortdesc>
	      <content type="string" default="" />
	    </parameter>
	    <parameter name="remote_tgt_grp" unique="0" required="0">
	      <longdesc lang="en">The name of the SCST remote target group (used with ALUA support).</longdesc>
	      <shortdesc lang="en">The 'remote_tgt_grp' parameter.</shortdesc>
	      <content type="string" default="" />
	    </parameter>
	    <parameter name="m_alua_state" unique="0" required="0">
	      <longdesc lang="en">The ALUA state (eg, active) for a Master node (used with ALUA support).</longdesc>
	      <shortdesc lang="en">The 'm_alua_state' parameter.</shortdesc>
	      <content type="string" default="active" />
	    </parameter>
	    <parameter name="s_alua_state" unique="0" required="0">
	      <longdesc lang="en">The ALUA state (eg, nonoptimized) for a Slave node (used with ALUA support).</longdesc>
	      <shortdesc lang="en">The 's_alua_state' parameter.</shortdesc>
	      <content type="string" default="nonoptimized" />
	    </parameter>
	  </parameters>
	  <actions>
	    <action name="meta-data" timeout="5" />
	    <action name="start" timeout="120" />
	    <action name="stop" timeout="60" />
	    <action name="monitor" timeout="20" depth="0" interval="10" role="Master" />
	    <action name="monitor" timeout="20" depth="0" interval="20" role="Slave" />
	    <action name="notify" timeout="20" />
	    <action name="promote" timeout="20" />
	    <action name="demote" timeout="20" />
	    <action name="reload" timeout="20" />
	    <action name="validate-all" timeout="20" />
	  </actions>
	</resource-agent>
	EOF
}


scst_usage() {
    echo "usage: ${0} {start|stop|monitor|validate-all|promote|demote|reload|notify|meta-data}"
    echo ""
    echo "Expects to have a fully populated OCF RA-compliant environment set."
}


scst_promote() {
    # Exit immediately if configuration is not valid
    scst_validate_all || exit ${?}

    # Test the resource's current state
    scst_monitor
    local rc=${?}
    case "${rc}" in
    "${OCF_SUCCESS}")
        # Running as Slave; normal, expected behavior
        ocf_log debug "scst_promote() -> Resource is" \
            "currently running as Slave."
        ;;
    "${OCF_RUNNING_MASTER}")
        # Already a Master; unexpected, but not a problem
        ocf_log info "Resource is already running as Master."
        return ${OCF_SUCCESS}
        ;;
    "${OCF_NOT_RUNNING}")
        # Currently not running; need to start before promoting
        ocf_log info "Resource is currently not running."
        scst_start
        ;;
    *)
        # Failed resource; let the cluster manager recover
        ocf_log err "Unexpected error, cannot promote."
        exit ${rc}
        ;;
    esac

    # Promote only makes sense if we are using ALUA
    if ocf_is_true ${OCF_RESKEY_alua}; then
        check_alua
        # Set the local target group to the "Master" ALUA state
        ocf_log debug "scst_promote() -> Setting target group" \
            "'${OCF_RESKEY_local_tgt_grp}' ALUA state to" \
            "'${OCF_RESKEY_m_alua_state}'..."
        ocf_run scstadmin -noprompt -set_tgrp_attr \
            ${OCF_RESKEY_local_tgt_grp} -dev_group \
            ${OCF_RESKEY_device_group} -attributes \
            state\=${OCF_RESKEY_m_alua_state} || exit ${OCF_ERR_GENERIC}
        # Since there can only be one Master, set the remote target group
        ocf_log debug "scst_promote() -> Setting target group" \
            "'${OCF_RESKEY_remote_tgt_grp}' ALUA state to" \
            "'${OCF_RESKEY_s_alua_state}'..."
        ocf_run scstadmin -noprompt -set_tgrp_attr \
            ${OCF_RESKEY_remote_tgt_grp} -dev_group \
            ${OCF_RESKEY_device_group} -attributes \
            state\=${OCF_RESKEY_s_alua_state} || exit ${OCF_ERR_GENERIC}
    else
        ocf_log err "The ALUA parameters need to be configured before using MS."
        exit ${OCF_ERR_CONFIGURED}
    fi

    # After the resource has been promoted, check whether the promotion worked
    while true; do
        scst_monitor
        if [ ${?} -eq ${OCF_RUNNING_MASTER} ]; then
            ocf_log info "Resource was promoted successfully."
            break
        else
            ocf_log debug "scst_promote() -> Resource still" \
                "awaiting promotion."
            sleep 1
        fi
    done

    # Only return $OCF_SUCCESS if _everything_ succeeded as expected
    return ${OCF_SUCCESS}
}


scst_demote() {
    # Exit immediately if configuration is not valid
    scst_validate_all || exit ${?}

    # Test the resource's current state
    scst_monitor
    local rc=${?}
    case "${rc}" in
    "${OCF_RUNNING_MASTER}")
        # Running as Master; normal, expected behavior
        ocf_log debug "scst_demote() -> Resource is" \
            "currently running as Master."
        ;;
    "${OCF_SUCCESS}")
        # Already running as Slave; nothing to do
        ocf_log debug "scst_demote() -> Resource is" \
            "currently running as Slave."
        return ${OCF_SUCCESS}
        ;;
    "${OCF_NOT_RUNNING}")
        # Not running; getting a demote action in this state is unexpected
        ocf_log err "Resource is currently not running."
        exit ${OCF_ERR_GENERIC}
        ;;
    *)
        # Failed resource; let the cluster manager recover
        ocf_log err "Unexpected error, cannot demote."
        exit ${rc}
        ;;
    esac

    # Demote only makes sense if we are using ALUA
    if ocf_is_true ${OCF_RESKEY_alua}; then
        check_alua
        # Set the local target group to the "Slave" ALUA state
        ocf_log debug "scst_demote() -> Setting target group" \
            "'${OCF_RESKEY_local_tgt_grp}' ALUA state to" \
            "'${OCF_RESKEY_s_alua_state}'..."
        ocf_run scstadmin -noprompt -set_tgrp_attr \
            ${OCF_RESKEY_local_tgt_grp} -dev_group \
            ${OCF_RESKEY_device_group} -attributes \
            state\=${OCF_RESKEY_s_alua_state} || exit ${OCF_ERR_GENERIC}
        # If we're a Slave, we assume the remote side is the Master
        ocf_log debug "scst_demote() -> Setting target group" \
            "'${OCF_RESKEY_remote_tgt_grp}' ALUA state to" \
            "'${OCF_RESKEY_m_alua_state}'..."
        ocf_run scstadmin -noprompt -set_tgrp_attr \
            ${OCF_RESKEY_remote_tgt_grp} -dev_group \
            ${OCF_RESKEY_device_group} -attributes \
            state\=${OCF_RESKEY_m_alua_state} || exit ${OCF_ERR_GENERIC}
    else
        ocf_log err "The ALUA parameters need to be configured before using MS."
        exit ${OCF_ERR_CONFIGURED}
    fi

    # After the resource has been demoted, check whether the demotion worked
    while true; do
        scst_monitor
        if [ ${?} -eq ${OCF_RUNNING_MASTER} ]; then
            ocf_log debug "scst_demote() -> Resource still" \
                "awaiting demotion."
            sleep 1
        else
            ocf_log info "Resource was demoted successfully."
            break
        fi
    done

    # Only return $OCF_SUCCESS if _everything_ succeeded as expected
    return ${OCF_SUCCESS}
}


scst_notify() {
    # We're currently not using this
    ocf_log debug "scst_notify() -> Received a" \
        "'${OCF_RESKEY_CRM_meta_notify_type}' /" \
        "'${OCF_RESKEY_CRM_meta_notify_operation}' notification."

    return ${OCF_SUCCESS}
}


check_alua() {
    # Make sure the directories exist in the SCST sysfs structure
    if [ ! -d "${SCST_SYSFS}/device_groups/${OCF_RESKEY_device_group}" ]; then
        ocf_log err "The '${OCF_RESKEY_device_group}' device group does not exist!"
        exit ${OCF_ERR_INSTALLED}
    fi
    target_groups="${SCST_SYSFS}/device_groups/${OCF_RESKEY_device_group}/target_groups"
    if [ ! -d "${target_groups}/${OCF_RESKEY_local_tgt_grp}" ]; then
        ocf_log err "The '${OCF_RESKEY_local_tgt_grp}' target group does not exist!"
        exit ${OCF_ERR_INSTALLED}
    fi
    if [ ! -d "${target_groups}/${OCF_RESKEY_remote_tgt_grp}" ]; then
        ocf_log err "The '${OCF_RESKEY_remote_tgt_grp}' target group does not exist!"
        exit ${OCF_ERR_INSTALLED}
    fi

    # Check that the given ALUA states are valid
    local valid_m_alua_state=0
    local valid_s_alua_state=0
    for i in ${ALUA_STATES}; do
        if [ "x${OCF_RESKEY_m_alua_state}" = "x${i}" ]; then
            valid_m_alua_state=1
        fi
        if [ "x${OCF_RESKEY_s_alua_state}" = "x${i}" ]; then
            valid_s_alua_state=1
        fi
    done
    if [ ${valid_m_alua_state} -eq 0 ]; then
        ocf_log err "The 'm_alua_state' value is not valid: ${OCF_RESKEY_m_alua_state}"
        exit ${OCF_ERR_INSTALLED}
    fi
    if [ ${valid_s_alua_state} -eq 0 ]; then
        ocf_log err "The 's_alua_state' value is not valid: ${OCF_RESKEY_s_alua_state}"
        exit ${OCF_ERR_INSTALLED}
    fi
}


# Make sure meta-data and usage always succeed
case ${__OCF_ACTION} in
meta-data)
    scst_meta_data
    exit ${OCF_SUCCESS}
    ;;
usage|help)
    scst_usage
    exit ${OCF_SUCCESS}
    ;;
esac

# Anything other than meta-data and usage must pass validation
scst_validate_all || exit ${?}

# Translate each action into the appropriate function call
case ${__OCF_ACTION} in
start)
    scst_start
    ;;
stop)
    scst_stop
    ;;
status|monitor)
    scst_monitor
    ;;
notify)
    scst_notify
    ;;
promote)
    scst_promote
    ;;
demote)
    scst_demote
    ;;
reload)
    ocf_log info "Reloading..."
    scst_start
    ;;
validate-all)
    ;;
migrate_to|migrate_from)
    scst_usage
    exit ${OCF_ERR_UNIMPLEMENTED}
    ;;
*)
    scst_usage
    exit ${OCF_ERR_UNIMPLEMENTED}
    ;;
esac

# Log a debug message and exit
rc=${?}
ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned: ${rc}"
exit ${rc}

