Files
secondo/Algebras/Hadoop/clusterManagement/ps-cluster-format

758 lines
24 KiB
Plaintext
Raw Normal View History

2026-01-23 17:03:45 +08:00
#!/bin/bash
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
source $bin/ps-functions
# This script is used to initialize the environment for Parallel Secondo
# Before running this script, following steps must be done first:
# * First, all required computers are connected to be a cluster,
# also the Secondo SDK has been installed on every node already.
# * Second, the hadoop archieve file hadoop-0.20.2.tar.gz is kept in
# $SECONDO_BUILD_DIR/bin .
# * Third, a Configuration file named ParallelSecondoConfig.ini
# is kept in $SECONDO_BUILD_DIR/bin too.
# # In this script, following steps will be done:
# # 1. Check the existence of all required files
# # 2. Unpack the Hadoop distribution, and set its configurations based on the ParallelSecondoConfig.ini
# # 3. Create the .parasecrc file, which set up needed environment variables on every cluster node
# # before the parallel Secondo starts
# # 4. Prepare the masters and slaves files required by every node
# # 5. Distribute .parasecrc file to every cluster node, and source it in each $HOME/.bashrc
# # 6. Create data server folders on every cluster machine
# 0. Process the arguments.
WARNINFO="Warning !! "
ERRORINFO="ERROR !! "
declare -a NODESARRAY # The list for hadoop nodes
declare -a DSEVSARRAY # The list for data servers
HDVERSION="hadoop-0.20.2"
DSCONFNAME="conf"
DSMASTERNAME="master"
DSSLAVESNAME="slaves"
DSPSFSNAME="PSFS"
DSMSECNAME="msec"
DSMSDBNAME="msec-databases"
DMHADPNAME="hadoop"
DMEBINNAME="bin"
RCFILENAME=".parasecrc"
MDSBINLIST="ds_scriptList"
PSCONFILE=$SECONDO_BUILD_DIR/bin/ParallelSecondoConfig.ini
PSDKLOC=$SECONDO_BUILD_DIR/bin/ps_SDK
### ----------------------------------------------------------------------------------------
# Auxiliary Functions
function trim
{
echo $(echo $1 | sed 's/^ *//' | sed 's/ *$//')
}
# Read configurations in $PSCONFILE
function readPSConf
{
EnvName=$(trim "$1")
TitName=$(trim "$2")
DlmSign=$(trim "$3")
if [ "$TitName" != "" -a "$DlmSign" = "" ]; then
echo "${WARNINFO}The delimeter cannot be empty while the title is defined." >&2
return
fi
findEnvName=false
findTitle=false
local values=()
vnum=${#values[*]}
while read LINE; do
LINE=$(trim "$LINE")
if [ "$LINE" != "" ]; then
# Ignore comments
if [ "$(echo $LINE | grep '^#' )" != "" ]; then
continue
fi
if [ $(echo $LINE | grep '^\[\w*\]$') ]; then
# Read title
if ! $findEnvName ; then
if [ $(echo $LINE | tr -d '[]') = "$EnvName" ]; then
findEnvName=true;
fi
else
# Return when finds another environment
echo "${values[*]}"
return
fi
else
# Read Parameter values
if $findEnvName ; then
if [ "$TitName" != "" ]; then
if [ "$(echo $LINE | grep ${DlmSign})" != "" ]; then
iTit=$(trim ${LINE%%${DlmSign}*})
iValue=$(trim ${LINE#*${DlmSign}})
if [ "$TitName" = "$iTit" ]; then
values[${vnum}]="$iValue"
if [ "$DlmSign" = "+=" ]; then
let vnum++
fi
fi
fi
else
values[${vnum}]="$LINE"
let vnum++
fi
fi
fi
fi
done < ${PSCONFILE}
echo "${values[*]}"
}
# ------ Example of Reading ParallelSecondoConfig.ini File -----
#OLDIFS=$IFS
#IFS=$'\n'
#ClusterInfo=$(readPSConf "Cluster" "Slaves" "+=")
#for item in $ClusterInfo; do
# echo "item: $item"
#done
#IFS=$OLDIFS
# ------ Example of Reading ParallelSecondoConfig.ini File -----
#Set a SecondoConfig parameter
#If the position line is set, then only add the new parameter after that
#Or else, remove old setting before inserting the new value.
function setSecConf
{
DSDBCONF=$1 #Configure File
ParaName=$2
ParaValue=$3
declare -i PPos=$4
if [ $PPos -eq 0 ]; then
DPos=($(grep -n "^[#]*[ ]*$ParaName" $DSDBCONF | cut -d':' -f1))
declare -i APos=$(( ${DPos[0]} - 1 )) # Append position
DPos=$( echo ${DPos[*]} | tr ' ' ',' )
sed $(PSEDOPT) "${DPos}d" $DSDBCONF # Delete old parameter
else
declare -i APos=$PPos
fi
NewPara="$ParaName=$ParaValue"
sed $(PSEDOPT) "${APos}a \\
$NewPara
" $DSDBCONF
}
# Check the existence of specific utility
function isExist
{
UName=$1
UPath=$(which ${UName})
if [ ! -f "$UPath" ]; then
echo "${ERRORINFO}The utility ${UName} is not installed" >&2
exit -1
fi
}
### ----------------------------------------------------------------------------------------
# 1. Check the existence of all required files
# a. Check the availability of Secondo
if [ "$SECONDO_BUILD_DIR" = "" ]; then
echo "${ERRORINFO}The Secondo database is not correctly installed in this machine."
echo "Installation for Parallel Secondo Fails. "
exit -1
elif [ ! -d ${SECONDO_BUILD_DIR} ]; then
echo "${ERRORINFO}The \$SECONDO_BUILD_DIR: $SECONDO_BUILD_DIR does not exist."
echo "Installation for Parallel Secondo Fails. "
exit -1
fi
if [ ! -d $PSDKLOC ]; then
mkdir -p $PSDKLOC
fi
# b. Check the exist of the Hadoop archieve
HADOOPARV=$SECONDO_BUILD_DIR/bin/${HDVERSION}.tar.gz
HADOOPATH=$SECONDO_BUILD_DIR/bin/${HDVERSION}
if [ ! -f $HADOOPARV ]; then
echo "${ERRORINFO}The Hadoop archieve is not prepared in this machine."
echo "Looking for $HADOOPARV ... "
echo "Installation for Parallel Secondo Fails. "
exit -1
else
#Extract the Hadoop archieve to $SECONDO_BUILD_DIR
if [ -d $HADOOPATH -o -f $HADOOPATH ]; then
echo "${WARNINFO}The last extraction of Hadoop $HADOOPATH is deleted."
rm -rf $HADOOPATH
fi
echo "Unpacking $HADOOPARV to $HADOOPATH ... "
tar -xzf $HADOOPARV -C $SECONDO_BUILD_DIR/bin/
fi
# The exist of hadoop archieve file is put in $SECONDO_BUILD_DIR/bin
# It may better be put at $SECONDO_SDK, which is not set at least in linux64 platform
# c. Check the exist of ParallelSecondoConfig.ini
if [ ! -f $PSCONFILE ]; then
echo -e "The configuration file for installing Parallel Secondo \n
$PSCONFILE \n does not exist."
echo "Installation for Parallel Secondo Fails. "
exit -1
fi
# d. Check the availability of Java,
OLDIFS=$IFS
IFS=$'\n'
ParaInfo=($(readPSConf "Hadoop" "hadoop-env.sh:JAVA_HOME" "="))
javaHome=$(trim ${ParaInfo[0]})
IFS=$OLDIFS
if [ "$javaHome" = "" ]; then
if [ "$JAVA_HOME" = "" ]; then
echo "${ERRORINFO}The Java runtime is not correctly installed in this machine."
echo "Installation for Parallel Secondo Fails. "
exit -1
else
javaHome=${JAVA_HOME}
fi
fi
if [ ! -d "$javaHome" ]; then
echo "${ERRORINFO}The \$javaHome: $javaHome does not exist."
echo "Installation for Parallel Secondo Fails. "
exit -1
fi
javaRun=$javaHome/bin/java
JVersion=$($javaRun -version 2>&1 | grep "java version" | cut -d' ' -f 3 | tr -d \" | cut -d'.' -f2)
if [[ "$JVersion" =~ "^[0-9]+$" || $JVersion < 6 ]]; then
echo "${ERRORINFO}The Java runtime version must be greater than 5."
echo "Installation for Parallel Secondo Fails. "
exit -1
fi
# e. Check the availability of $MDSBINLIST
if [ ! -f ${bin}/$MDSBINLIST ]; then
echo "${ERRORINFO}The file listing required data server scripts does not exist."
echo "Installation for Parallel Secondo Fails. "
exit -1
fi
# f. Check prerequisites on the current computer, includes:
# passphraseless ssh, screen, ifconfig, etc
$(isExist ssh)
$(isExist screen)
$(isExist ifconfig)
# 2. Unpack the Hadoop distribution, and set its configurations based on the ParallelSecondoConfig.ini
# Set Hadoop configurations based on the current node.
# The configuration files of Hadoop includes:
# a hadoop/conf/hadoop-env.sh
# b hadoop/conf/masters
# c hadoop/conf/slaves
# d hadoop/conf/core-site.xml
# e hadoop/conf/hdfs-site.xml
# f hadoop/conf/mapreduce-site.xml
HADOOPCONF=$HADOOPATH/conf
# a hadoop/conf/hadoop-env.sh
# ** Set Java_Home
HPENV=$HADOOPCONF/hadoop-env.sh
Pattern="^[#]*[ ]*export[ ]*JAVA_HOME"
NJHOME="export JAVA_HOME=$javaHome"
JHPos=$(grep -n "$Pattern" $HPENV | tail -1 | cut -d':' -f1)
if [[ $JHPos < 0 ]]; then
JHPos=0
fi
# remove the old set up for JAVA_HOME
sed $(PSEDOPT) '/'"$Pattern"'/d' $HPENV
let JHPos--
sed $(PSEDOPT) "${JHPos}a \\
$NJHOME
" $HPENV
# ** Set HADOOP_OPTS
OLDIFS=$IFS
IFS=$'\n'
Opts=($(readPSConf "Hadoop" "hadoop-env.sh:HADOOP_OPTS" "+="))
IFS=$OLDIFS
HPOPTS="HADOOP_OPTS=\""
for item in ${Opts[*]}; do
HPOPTS="$HPOPTS$item "
done
HPOPTS="$HPOPTS\""
Pattern="^[#]*[ ]*HADOOP_OPTS"
LLPos=$(grep -n "$Pattern" $HPENV | head -1 | cut -d':' -f1)
if [[ $LLPos < 0 ]]; then
LLPos=$(wc -l $HPENV | awk '{print $1}')
fi
#sed $(PSEDOPT) '/'"$Pattern"'/d' $HPENV
sed $(PSEDOPT) "${LLPos}d" $HPENV
let LLPos--
sed $(PSEDOPT) "${LLPos}a \\
$HPOPTS
" $HPENV
# b hadoop/conf/masters
# c hadoop/conf/slaves
HPMaster=$HADOOPCONF/masters
HPSlaves=$HADOOPCONF/slaves
OLDIFS=$IFS
IFS=$'\n'
CMInfo=($(readPSConf "Cluster" "Master" "="))
MasterDS=$(trim ${CMInfo[0]})
IFS=$OLDIFS
if [ $(echo $MasterDS | grep -o ":" | wc -l) -ne 2 ]; then
echo "${ERRORINFO}Format of Cluster:Master Configuration is wrong. "
echo "Installation for Parallel Secondo Fails. "
exit -1
fi
MIP=$(echo $MasterDS | cut -d':' -f1)
echo $MIP > $HPMaster
# check the passphraseless ssh to the master
MHost=$(ssh -q -o PasswordAuthentication=no $MIP hostname)
if [ "${MHost}" = "" ]; then
echo "${ERRORINFO}Can't ssh to ${MIP} without a passphrase."
exit -1
fi
OLDIFS=$IFS
IFS=$'\n'
CSInfo=($(readPSConf "Cluster" "Slaves" "+="))
IFS=$OLDIFS
cat /dev/null > $HPSlaves
for slave in ${CSInfo[*]}; do
if [ $(echo $slave | grep -o ":" | wc -l) -ne 2 ]; then
echo "${ERRORINFO}Format of a Cluster:Slave Configuration is wrong. "
echo "Installation for Parallel Secondo Fails. "
exit -1
fi
SIP=$(echo $slave | cut -d':' -f1)
echo $SIP >> $HPSlaves
done
# Use sort + uniq commands to remove duplicated lines.
sort $HPSlaves | uniq > ${HPSlaves}.tmp
mv ${HPSlaves}.tmp $HPSlaves
if [ ! -s $HPSlaves ]; then
echo "${ERRORINFO}No slaves are defined."
echo "Installation for Parallel Secondo Fails. "
exit -1
fi
# check the passphraseless ssh to slaves
while read Slave; do
SIP=$(echo $Slave | cut -d':' -f1)
SHost=$(ssh -q -o PasswordAuthentication=no $SIP hostname)
if [ "${SHost}" = "" ]; then
echo "${ERRORINFO}Can't ssh to ${SIP} without a passphrase."
exit -1
fi
done < $HPSlaves
# Get all configurations about Hadoop
OLDIFS=$IFS
IFS=$'\n'
HXCInfo=($(readPSConf "Hadoop" "" ""))
IFS=$OLDIFS
# d hadoop/conf/core-site.xml
# e hadoop/conf/hdfs-site.xml
# f hadoop/conf/mapreduce-site.xml
XMLFile=(core-site.xml hdfs-site.xml mapred-site.xml)
for xfileName in ${XMLFile[*]}; do
xfile=$HADOOPCONF/$xfileName
ENDPos=$(cat -n $xfile | grep "<configuration>" | cut -f1)
sed $(PSEDOPT) "$(( $ENDPos + 1)),\$d" $xfile #Remove all exist configurations
ITNum=0
while [ $ITNum -lt ${#HXCInfo[*]} ]; do
item=${HXCInfo[${ITNum}]}
if [ "$(echo $item | grep "^${xfileName}:")" != "" ]; then
ititle=$(trim ${item%=*})
ititle=$(trim ${ititle#*:})
ivalue=$(trim ${item#*=})
PROPERTY="\n\t<property>\n\t\t<name>$ititle</name>\n
\t\t<value>$ivalue</value>\n\t</property>\n"
echo -e $PROPERTY >> $xfile
fi
let ITNum++
done
echo "</configuration>" >> $xfile
done
# 3. Create the .parasecrc file, which set up needed environment variables on every cluster node
# before the parallel Secondo starts
PSRCFILE=$PSDKLOC/psrc_tmp
CLSTFILE=$PSDKLOC/clusterNodes
DSRVFILE=$PSDKLOC/dataServers
cat /dev/null > $PSRCFILE
cat /dev/null > $CLSTFILE
cat /dev/null > $DSRVFILE
#Merge master and slaves to $CLSTFILE
cat $HPMaster $HPSlaves | sort | uniq > ${CLSTFILE}
CNInfo=(${CMInfo[*]} ${CSInfo[*]})
CNInfo=($(echo ${CNInfo[*]} | tr ' ' '\n' | awk '!x[$0]++' | tr '\n' ' ' ))
#Find all cluster nodes and data servers
while read Node; do
DSLOC="$Node"
ITNum=0
while [ $ITNum -lt ${#CNInfo[*]} ]; do
ivalue=${CNInfo[${ITNum}]}
ivalue=${ivalue#*=}
if [ "$(echo $ivalue | grep "^$Node:" )" != "" ]; then
loc=$( echo $ivalue | cut -d':' -f2)
DSLOC="$DSLOC:$loc"
fi
let ITNum++
done
echo "$DSLOC" >> $DSRVFILE
done < $CLSTFILE
#Prepare the template script for .parasecrc
echo "export PARALLEL_SECONDO_BUILD_DIR=\$SECONDO_BUILD_DIR" >> $PSRCFILE
echo "export PARALLEL_SECONDO_DATASERVER_NAME=\$USER" >> $PSRCFILE
echo "PARALLEL_SECONDO_MAINDS=\$(echo \${PARALLEL_SECONDO} | cut -d':' -f 1)" >> $PSRCFILE
echo "export PARALLEL_SECONDO_MAINDS=\$PARALLEL_SECONDO_MAINDS/\$PARALLEL_SECONDO_DATASERVER_NAME" >> $PSRCFILE
echo "export PARALLEL_SECONDO_CONF=\$PARALLEL_SECONDO_MAINDS/${DSCONFNAME}" >> $PSRCFILE
echo "export PARALLEL_SECONDO_MASTER=\$PARALLEL_SECONDO_CONF/${DSMASTERNAME}" >> $PSRCFILE
echo "export PARALLEL_SECONDO_SLAVES=\$PARALLEL_SECONDO_CONF/${DSSLAVESNAME}" >> $PSRCFILE
echo "export PARALLEL_SECONDO_PSFSNAME=\"${DSPSFSNAME}\"" >> $PSRCFILE
echo "export PARALLEL_SECONDO_MINI_NAME=\"${DSMSECNAME}\"" >> $PSRCFILE
echo "export PARALLEL_SECONDO_MINIDB_NAME=\"${DSMSDBNAME}\"" >> $PSRCFILE
echo "export HADOOP_HOME=\$PARALLEL_SECONDO_MAINDS/hadoop" >> $PSRCFILE
echo "export PATH=\$PATH:\$HADOOP_HOME/bin" >> $PSRCFILE
echo "export PATH=\$PATH:\$PARALLEL_SECONDO_MAINDS/${DMEBINNAME}" >> $PSRCFILE
# 4. Prepare the masters and slaves files required by every node
PSMFILE=$PSDKLOC/ps_master_tmp
PSSFILE=$PSDKLOC/ps_slaves_tmp
for IDX in {1..2}; do
if [ $IDX -eq 1 ]; then
ARRAY=$MasterDS
FILE=$PSMFILE
else
ARRAY=${CSInfo[*]}
FILE=$PSSFILE
fi
cat /dev/null > $FILE
for item in ${ARRAY[*]};do
IPAddr=$(echo $item | cut -d':' -f1)
PSFSPT=$(echo $item | cut -d':' -f2)
MSPORT=$(echo $item | cut -d':' -f3)
PSFSPT="$PSFSPT/$USER/$DSPSFSNAME"
echo "$IPAddr:$PSFSPT:$MSPORT" >> $FILE
done
done
# 5. Prepare each data server's SecondoConfig.ini file
MSECLIST=$PSDKLOC/miniSecList #List of all data servers
DBCONFILE="SecondoConfig.ini"
# Following parameters need to be changed.
PT_SECHOME="SecondoHome"
PT_SECPORT="SecondoPort"
PT_PARASEC="\[ParallelSecondo\]" #Add parameters instead of change values
PT_PSFSNME="SecondoFilePath"
PT_LIPADDR="localIP"
MDBCONF=$SECONDO_BUILD_DIR/bin/$DBCONFILE
# 5a. Clean all Parallel Secondo configuration in $MDBCONF
declare -i PSCST=$(grep -n "$PT_PARASEC" $MDBCONF| cut -d':' -f1)
if [ $PSCST -gt 0 ]; then
declare -i PSCED=$(sed -n "$(( $PSCST + 1 )),$ p" $MDBCONF | grep -n "^[[]" | head -1 | cut -d':' -f1)
if [ $PSCED -eq 0 ]; then
PSCED=$(wc -l $MDBCONF | cut -d' ' -f1)
else
PSCED=$(( $PSCST + $PSCED - 1 ))
fi
sed $(PSEDOPT) "$(( $PSCST + 1 )),${PSCED}s/^\([^#]\)*//" $MDBCONF
# Delete the backup file in MacOSX
if [ "$SECONDO_PLATFORM" = "mac_osx" ]; then
rm "${MDBCONF}.bak"
fi
else
echo "$PT_PARASEC" | sed 's/\\//g' >> $MDBCONF
PSCST=$(grep -n "$PT_PARASEC" $MDBCONF| cut -d':' -f1)
fi
cat $PSMFILE $PSSFILE | awk '!x[$0]++' > $MSECLIST
NodeIPAddr=""
while read dataServ; do
IPAddre=$(echo $dataServ | cut -d':' -f1)
PSFSLoc=$(echo $dataServ | cut -d':' -f2)
SecHome=$(echo $PSFSLoc | sed "s/$DSPSFSNAME/$DSMSDBNAME/")
PortNum=$(echo $dataServ | cut -d':' -f3)
DSDBCONF=$PSDKLOC/$DBCONFILE.$IPAddre.$PortNum
cp $MDBCONF $DSDBCONF
# Change SecondoHome & SecondoPort
ChgPName=($PT_SECHOME $PT_SECPORT)
ChgValue=($SecHome $PortNum)
Cnt=0
while [ $Cnt -lt ${#ChgPName[*]} ]; do
ParaName=${ChgPName[$Cnt]}
ParaVale=${ChgValue[$Cnt]}
setSecConf $DSDBCONF $ParaName $ParaVale
let Cnt++
done
# In case the parallel Secondo environment is not initialized
# if [ "$(grep "$PT_PARASEC" $DSDBCONF )" = "" ]; then
# echo "$PT_PARASEC" | sed 's/\\//g' >> $DSDBCONF
# fi
# Add PSFS & LocalIP to $PT_PARASEC environment
AddPName=($PT_PSFSNME $PT_LIPADDR)
AddValue=($PSFSLoc $IPAddre)
Cnt=0
while [ $Cnt -lt ${#AddPName[*]} ]; do
ParaName=${AddPName[$Cnt]}
ParaVale=${AddValue[$Cnt]}
# PPos=$(grep -n "$PT_PARASEC" $DSDBCONF | tail -1 | cut -d':' -f1)
setSecConf $DSDBCONF $ParaName $ParaVale $PSCST
let Cnt++
done
# Delete the backup file in MacOSX
if [ "$SECONDO_PLATFORM" = "mac_osx" ]; then
rm "${DSDBCONF}.bak"
fi
done < $MSECLIST
ANODES=($(cat $DSRVFILE | tr '\n' ' '))
for Node in ${ANODES[*]}; do
nodeAddr=${Node%%:*}
nodeRCFile=$PSDKLOC/parasecrc_${nodeAddr}
echo "export PARALLEL_SECONDO=${Node#*:}" > $nodeRCFile
cat $PSRCFILE >>$nodeRCFile
ConfPath=$(echo $Node | cut -d':' -f2)
#Get all port numbers of current node
portArr=($(grep $nodeAddr $MSECLIST | cut -d':' -f3))
DBCONFIG="export PARALLEL_SECONDO_DBCONFIG="
for port in ${portArr[*]}; do
DBCONFIG="${DBCONFIG}${ConfPath}/${USER}/conf/${DBCONFILE}.${port}:"
done
DBCONFIG=$(echo $DBCONFIG | sed 's/:$//')
echo $DBCONFIG >> $nodeRCFile
done
# 6. Distribute files and folders to every cluster node, including:
# # a. $HOME/.parasecrc , and source it in each $HOME/.bashrc
# # b. msec , msec-databases, PSFS folders
# # c. conf, hadoop, HDFS (only on the main data server of each node)
SDSFLODER=$PSDKLOC/sdbFolder #Master data server folder
MDSFLODER=$PSDKLOC/mdbFolder #Slave data server folder
# Prepare everything that a master and slave data server need on current machine
# Then copy these folders to remote nodes, instead of creating files and folders one by one
echo "Preparing all needed files and folder for master and slave data servers ..."
mkdir -p $SDSFLODER/$DSMSECNAME #msec
mkdir -p $SDSFLODER/$DSMSDBNAME #msec-databases
mkdir -p $SDSFLODER/$DSPSFSNAME #PSFS
cp -r $SDSFLODER $MDSFLODER
mkdir -p $MDSFLODER/$DSCONFNAME #conf
mkdir -p $MDSFLODER/$DMHADPNAME #hadoop
mkdir -p $MDSFLODER/$DMEBINNAME #bin
# use compression to speed up copying files
declare -i SLEVEL=$(echo $HADOOPATH | tr '/' '\n' | sed '/^$/d' | wc -l)
tar -cf - $HADOOPATH | tar -xf - -C $MDSFLODER/hadoop --strip=$SLEVEL
cp $PSMFILE $MDSFLODER/$DSCONFNAME/$DSMASTERNAME
cp $PSSFILE $MDSFLODER/$DSCONFNAME/$DSSLAVESNAME
cat ${bin}/$MDSBINLIST | xargs -i cp ${bin}/{} $MDSFLODER/$DMEBINNAME
declare -i SLEVEL=$(echo $MDSFLODER | tr '/' '\n' | sed '/^$/d' | wc -l)
tar -czf ${MDSFLODER}.tar.gz ${MDSFLODER} &> /dev/null
tar -czf ${SDSFLODER}.tar.gz ${SDSFLODER} &> /dev/null
#echo "---------------------"
#echo "The Data Server List"
#echo "---------------------"
#cat $DSRVFILE
#echo "---------------------"
function formatNode
{
node=$1
curPlat=$2
nodeIP=${node%%:*}
dataServers=($(echo ${node#*:} | tr ':' ' '))
nodeRCFile=$PSDKLOC/parasecrc_${nodeIP}
scp -o LogLevel=quiet $nodeRCFile $nodeIP:$RCFILENAME
# Delete exist commands if necessary
CLNUM=$(ssh -o LogLevel=quiet $nodeIP "grep -n \"source \\\$HOME/${RCFILENAME}\" \$HOME/.bashrc" | cut -d':' -f1)
if [ "$CLNUM" != "" ]; then
ssh -o LogLevel=quiet $nodeIP "sed -i \"${CLNUM}d\" \$HOME/.bashrc"
fi
#source .parasecrc file in the .bashrc, to set all required environment
case ${curPlat} in
# Process linux and linux64 with same statements
linux )
# Execute the .parasecrc directly after processing .secondorc
SCNUM=$(ssh -o LogLevel=quiet $nodeIP "grep -n \"source [\\\$A-Z_.\\\/]*secondorc [\\\$a-zA-Z_.\\\/]*\" \$HOME/.bashrc" | cut -d':' -f1)
if [ "$SCNUM" == "" ]; then
SCNUM=1
else
let SCNUM++
fi
ssh -o LogLevel=quiet $nodeIP "sed -i \"${SCNUM}isource \\\$HOME/${RCFILENAME}\" \$HOME/.bashrc"
;;
mac_osx )
ssh -o LogLevel=quiet $nodeIP "echo \"source \\\$HOME/$RCFILENAME\" >> \$HOME/.bashrc"
echo "Note!!The $HOME/.bashrc file must be executed in Mac OS X shell prompt" >&2
echo -e "This step has to be done manually by users.\n\n" >&2
;;
esac
declare -i sidx=0
for dserv in ${dataServers[*]}; do
dserv="$dserv/$USER" #Use $USER to different users' data servers on a same cluster node
ssh -o LogLevel=quiet $nodeIP "mkdir -p $dserv"
if [ $sidx -eq 0 ]; then
# This is the main data server on the current node
ARVPATH=${MDSFLODER}
else
# This is a normal data server on the current node
ARVPATH=${SDSFLODER}
fi
ARVNAME=${ARVPATH##*/}
scp -o LogLevel=quiet ${ARVPATH}.tar.gz $nodeIP:$dserv
ssh -o LogLevel=quiet $nodeIP "tar -xmf $dserv/${ARVNAME}.tar.gz -C $dserv/ --strip=$SLEVEL "
#Copy SecondoConfig.ini files
if [ $sidx -eq 0 ]; then
FILES=($(ls $PSDKLOC/$DBCONFILE.$nodeIP.*))
for file in ${FILES[*]}; do
port=${file##*.}
scp -o LogLevel=quiet $PSDKLOC/$DBCONFILE.$nodeIP.$port $nodeIP:$dserv/conf/${DBCONFILE}.$port
done
fi
let sidx++
done
}
CURPLAT=$(echo $SECONDO_PLATFORM | sed 's/[0-9]*$//')
if [ "$CURPLAT" != "linux" -a "$CURPLAT" != "mac_osx" ]; then
echo "${ERRORINFO}Unknown Platform, Check your \$SECONDO_PLATFORM definition."
echo "Installation for Parallel Secondo Fails. "
exit 1
fi
ANODES=($(cat $DSRVFILE | tr '\n' ' '))
NDNUM=${#ANODES[*]}
aJobs=()
echo "Start to distribute data servers on the cluster ..."
for ((NDIDX=0;NDIDX<$NDNUM;));do
node=${ANODES[$NDIDX]}
for ((token=1;token<=$PS_PIPE_Width;toekn++));do
if [ $NDIDX -ge $NDNUM ];then
break
fi
if [ ! "${aJobs[$token]}" ] || ! kill -0 ${aJobs[$token]} 2>/dev/null; then
formatNode $node $CURPLAT &
aJobs[$token]=$!
let NDIDX++
break
fi
done
done
for ((token=1;token<$PS_PIPE_Width;)); do
if ! kill -0 ${aJobs[$token]} 2>/dev/null; then
let token++
fi
done
# 7. Set up Master Mini Secondo Configuration if NS4Master is set
NS4Master=($(readPSConf "Options" "NS4Master" "="))
NS4Master=$(trim ${NS4Master[0]})
if [ "$NS4Master" = "true" ]; then
# Make sure the current node is the master node
if ! $(oneOf "$MIP" $(get_localIPs)) ; then
echo "${ERRORINFO}The current node is not the master node,
cannot set its Secondo as the master mini Secondo."
echo "Installation for Parallel Secondo Fails. "
exit -1
fi
#Set up Secondo Configurations
# a. Change Secondo Home and Port
SHPos=($(grep -n "^[ ]*$PT_SECHOME[ ]*=" $MDBCONF | cut -d':' -f1))
SHPos=${SHPos[$(( ${#SHPos[@]} - 1 ))]}
SHConf=$(sed -n "${SHPos}p" $MDBCONF)
PSHLoc=("$(trim ${SHConf#*=})" "$SECONDO_PARAM_SecondoHome" "$HOME/secondo-databases")
for loc in ${PSHLoc[*]}; do
if [ -d $loc ]; then
MSHLoc=$loc
echo "The master Secondo Home is set as $MSHLoc"
break
fi
done
if [ "$MSHLoc" = "" ]; then
echo "${ERRORINFO}Cannot find the master Secondo Home."
echo "Installation for Parallel Secondo Fails. "
exit -1
fi
setSecConf $MDBCONF $PT_SECHOME $MSHLoc
MSPtNum=$(echo $MasterDS | cut -d':' -f3)
SPtPos=($(grep -n "^[ ]*$PT_SECPORT[ ]*=" $MDBCONF | cut -d':' -f1))
SPtPos=${SPtPos[$(( ${#SPtPos[@]} - 1 ))]}
CPtConf=$(sed -n "${SPtPos}p" $MDBCONF)
CptNum=$(trim ${CPtConf#*=})
if [ "$CptNum" != "" ]; then
if [ $CptNum -ne $MSPtNum ]; then
echo "${WARNINFO}The current Secondo port number $CptNum will be
overlapped by the master Secondo port $MSPtNum."
fi
fi
setSecConf $MDBCONF $PT_SECPORT $MSPtNum
# b. Add Parallel Secondo Configurations
setSecConf $MDBCONF $PT_LIPADDR $MIP $PSCST
MPSFSLOC=$(head -1 $PSMFILE | cut -d':' -f2)
setSecConf $MDBCONF $PT_PSFSNME $MPSFSLOC $PSCST
# Delete the backup file in MacOSX
if [ "$SECONDO_PLATFORM" = "mac_osx" ]; then
rm "${MDBCONF}.bak"
fi
#Replace the msec with a link
MMSECLOC=${MPSFSLOC%/*}/$DSMSECNAME
rm -rf $MMSECLOC
ln -s $SECONDO_BUILD_DIR $MMSECLOC
echo "The $SECONDO_BUILD_DIR is used to be the master mini Secondo of the parallel Secondo."
fi
echo -e "\n\n\n\n"
#Set the environment variables in .parasecrc
source $HOME/.bashrc
#Format the namenode of Hadoop
hadoop namenode -format
echo -e "\n\n\n************************************************"
echo "Congratulations! The parallel Secondo has been correctly set up on your cluster."
cat $DSRVFILE
echo "************************************************"
exit 0