Files
secondo/Algebras/Hadoop/clusterManagement/ps-cluster-queryMonitorStatus
2026-01-23 17:03:45 +08:00

98 lines
2.1 KiB
Bash

#!/bin/bash
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
source $bin/ps-functions
# Process the parameters
declare -i numOfArgs=$#
let numOfArgs++
while [ $numOfArgs -ne $OPTIND ]; do
getopts "h" optKey
if [ "$optKey" == "?" ]; then
optKey="h"
fi
case $optKey in
h)
echo -e "List all processes involving Secondo inside the cluster.\n"
echo -e " -h Print this message and exit.\n"
echo -e " -s List a Short status for the current cluster.\n"
exit
;;
esac
done
aJobs=()
Status="/tmp/.ps-cluster-queryMonitorStatus-${USER}.data"
cat /dev/null > $Status
function checkNode
{
IPAddr=$1
NIdx=$2
FPORT=""
declare -i START=0
declare -i FAILED=0
DSList=($(cat $PARALLEL_SECONDO_MASTER $PARALLEL_SECONDO_SLAVES | grep $IPAddr | awk '!x[$0]++' ))
for DataServer in ${DSList[*]};do
Port=$(echo $DataServer | cut -d':' -f3)
ssh -o LogLevel=quiet $IPAddr "lsof -i :${Port}" | grep Secondo | grep $USER 2>&1 1>/dev/null
if [ $? -eq 0 ]; then
let START++
else
let FAILED++
FPORT="$FPORT:$Port"
fi
done
NodeStatus="${IPAddr}\t${#DSList[*]}\t\t${START}\t${FAILED}(${FPORT})"
echo -e $NodeStatus >> $Status
}
IPLIST=($(get_slaveIPs -m))
NDNUM=${#IPLIST[*]}
declare -i NDIDX=1
for ((NDIDX=0;NDIDX<$NDNUM;));do
node=${IPLIST[$NDIDX]}
for((token=1;token<$PS_PIPE_Width;token++));do
if [ $NDIDX -gt $NDNUM ]; then
break
fi
if [ ! "${aJobs[$token]}" ] || ! kill -0 ${aJobs[$token]} 2>/dev/null; then
$(checkNode $node $NDIDX) &
aJobs[$token]=$!
let NDIDX++
break
fi
done
done
for ((token=1;token<$PS_PIPE_Width;)); do
if ! kill -0 ${aJobs[$token]} 2>/dev/null; then
let token++
fi
done
echo -e "\tIPAddr\t\tDataServer\tStarted\tFailed"
cat $Status | sort | cat -n
echo -e "\n\n"
# Conclude the status
FAILCNT=$(cat $Status | grep "[1-9]\d*([0-9,:]*)" | wc -l)
SUCCCNT=$(( $NDNUM - $FAILCNT ))
echo -e "In total, $SUCCCNT nodes start completely, while $FAILCNT nodes fail."
if [ $FAILCNT -gt 0 ]; then
echo -e "Failed nodes include:"
cat $Status | grep "[1-9]\d*([0-9,:]*)" | cut -f1
fi
exit $FAILCNT