758 lines
24 KiB
Bash
758 lines
24 KiB
Bash
#!/bin/bash
|
|
|
|
bin=`dirname "$0"`
|
|
bin=`cd "$bin"; pwd`
|
|
source $bin/ps-functions
|
|
|
|
# This script is used to initialize the environment for Parallel Secondo
|
|
# Before running this script, following steps must be done first:
|
|
# * First, all required computers are connected to be a cluster,
|
|
# also the Secondo SDK has been installed on every node already.
|
|
# * Second, the hadoop archieve file hadoop-0.20.2.tar.gz is kept in
|
|
# $SECONDO_BUILD_DIR/bin .
|
|
# * Third, a Configuration file named ParallelSecondoConfig.ini
|
|
# is kept in $SECONDO_BUILD_DIR/bin too.
|
|
|
|
# # In this script, following steps will be done:
|
|
# # 1. Check the existence of all required files
|
|
# # 2. Unpack the Hadoop distribution, and set its configurations based on the ParallelSecondoConfig.ini
|
|
# # 3. Create the .parasecrc file, which set up needed environment variables on every cluster node
|
|
# # before the parallel Secondo starts
|
|
# # 4. Prepare the masters and slaves files required by every node
|
|
# # 5. Distribute .parasecrc file to every cluster node, and source it in each $HOME/.bashrc
|
|
# # 6. Create data server folders on every cluster machine
|
|
|
|
# 0. Process the arguments.
|
|
|
|
WARNINFO="Warning !! "
|
|
ERRORINFO="ERROR !! "
|
|
|
|
declare -a NODESARRAY # The list for hadoop nodes
|
|
declare -a DSEVSARRAY # The list for data servers
|
|
HDVERSION="hadoop-0.20.2"
|
|
DSCONFNAME="conf"
|
|
DSMASTERNAME="master"
|
|
DSSLAVESNAME="slaves"
|
|
DSPSFSNAME="PSFS"
|
|
DSMSECNAME="msec"
|
|
DSMSDBNAME="msec-databases"
|
|
DMHADPNAME="hadoop"
|
|
DMEBINNAME="bin"
|
|
RCFILENAME=".parasecrc"
|
|
MDSBINLIST="ds_scriptList"
|
|
PSCONFILE=$SECONDO_BUILD_DIR/bin/ParallelSecondoConfig.ini
|
|
PSDKLOC=$SECONDO_BUILD_DIR/bin/ps_SDK
|
|
|
|
### ----------------------------------------------------------------------------------------
|
|
# Auxiliary Functions
|
|
|
|
function trim
|
|
{
|
|
echo $(echo $1 | sed 's/^ *//' | sed 's/ *$//')
|
|
}
|
|
|
|
|
|
# Read configurations in $PSCONFILE
|
|
function readPSConf
|
|
{
|
|
EnvName=$(trim "$1")
|
|
TitName=$(trim "$2")
|
|
DlmSign=$(trim "$3")
|
|
|
|
if [ "$TitName" != "" -a "$DlmSign" = "" ]; then
|
|
echo "${WARNINFO}The delimeter cannot be empty while the title is defined." >&2
|
|
return
|
|
fi
|
|
|
|
findEnvName=false
|
|
findTitle=false
|
|
|
|
local values=()
|
|
vnum=${#values[*]}
|
|
while read LINE; do
|
|
LINE=$(trim "$LINE")
|
|
if [ "$LINE" != "" ]; then
|
|
# Ignore comments
|
|
if [ "$(echo $LINE | grep '^#' )" != "" ]; then
|
|
continue
|
|
fi
|
|
|
|
if [ $(echo $LINE | grep '^\[\w*\]$') ]; then
|
|
# Read title
|
|
if ! $findEnvName ; then
|
|
if [ $(echo $LINE | tr -d '[]') = "$EnvName" ]; then
|
|
findEnvName=true;
|
|
fi
|
|
else
|
|
# Return when finds another environment
|
|
echo "${values[*]}"
|
|
return
|
|
fi
|
|
else
|
|
# Read Parameter values
|
|
if $findEnvName ; then
|
|
if [ "$TitName" != "" ]; then
|
|
if [ "$(echo $LINE | grep ${DlmSign})" != "" ]; then
|
|
iTit=$(trim ${LINE%%${DlmSign}*})
|
|
iValue=$(trim ${LINE#*${DlmSign}})
|
|
if [ "$TitName" = "$iTit" ]; then
|
|
values[${vnum}]="$iValue"
|
|
if [ "$DlmSign" = "+=" ]; then
|
|
let vnum++
|
|
fi
|
|
fi
|
|
fi
|
|
else
|
|
values[${vnum}]="$LINE"
|
|
let vnum++
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|
|
done < ${PSCONFILE}
|
|
|
|
echo "${values[*]}"
|
|
}
|
|
|
|
# ------ Example of Reading ParallelSecondoConfig.ini File -----
|
|
#OLDIFS=$IFS
|
|
#IFS=$'\n'
|
|
#ClusterInfo=$(readPSConf "Cluster" "Slaves" "+=")
|
|
#for item in $ClusterInfo; do
|
|
# echo "item: $item"
|
|
#done
|
|
#IFS=$OLDIFS
|
|
# ------ Example of Reading ParallelSecondoConfig.ini File -----
|
|
|
|
#Set a SecondoConfig parameter
|
|
#If the position line is set, then only add the new parameter after that
|
|
#Or else, remove old setting before inserting the new value.
|
|
function setSecConf
|
|
{
|
|
DSDBCONF=$1 #Configure File
|
|
ParaName=$2
|
|
ParaValue=$3
|
|
declare -i PPos=$4
|
|
|
|
if [ $PPos -eq 0 ]; then
|
|
DPos=($(grep -n "^[#]*[ ]*$ParaName" $DSDBCONF | cut -d':' -f1))
|
|
declare -i APos=$(( ${DPos[0]} - 1 )) # Append position
|
|
DPos=$( echo ${DPos[*]} | tr ' ' ',' )
|
|
sed $(PSEDOPT) "${DPos}d" $DSDBCONF # Delete old parameter
|
|
else
|
|
declare -i APos=$PPos
|
|
fi
|
|
|
|
NewPara="$ParaName=$ParaValue"
|
|
sed $(PSEDOPT) "${APos}a \\
|
|
$NewPara
|
|
" $DSDBCONF
|
|
}
|
|
|
|
# Check the existence of specific utility
|
|
function isExist
|
|
{
|
|
UName=$1
|
|
UPath=$(which ${UName})
|
|
if [ ! -f "$UPath" ]; then
|
|
echo "${ERRORINFO}The utility ${UName} is not installed" >&2
|
|
exit -1
|
|
fi
|
|
}
|
|
|
|
|
|
### ----------------------------------------------------------------------------------------
|
|
# 1. Check the existence of all required files
|
|
# a. Check the availability of Secondo
|
|
if [ "$SECONDO_BUILD_DIR" = "" ]; then
|
|
echo "${ERRORINFO}The Secondo database is not correctly installed in this machine."
|
|
echo "Installation for Parallel Secondo Fails. "
|
|
exit -1
|
|
elif [ ! -d ${SECONDO_BUILD_DIR} ]; then
|
|
echo "${ERRORINFO}The \$SECONDO_BUILD_DIR: $SECONDO_BUILD_DIR does not exist."
|
|
echo "Installation for Parallel Secondo Fails. "
|
|
exit -1
|
|
fi
|
|
if [ ! -d $PSDKLOC ]; then
|
|
mkdir -p $PSDKLOC
|
|
fi
|
|
|
|
# b. Check the exist of the Hadoop archieve
|
|
HADOOPARV=$SECONDO_BUILD_DIR/bin/${HDVERSION}.tar.gz
|
|
HADOOPATH=$SECONDO_BUILD_DIR/bin/${HDVERSION}
|
|
if [ ! -f $HADOOPARV ]; then
|
|
echo "${ERRORINFO}The Hadoop archieve is not prepared in this machine."
|
|
echo "Looking for $HADOOPARV ... "
|
|
echo "Installation for Parallel Secondo Fails. "
|
|
exit -1
|
|
else
|
|
#Extract the Hadoop archieve to $SECONDO_BUILD_DIR
|
|
if [ -d $HADOOPATH -o -f $HADOOPATH ]; then
|
|
echo "${WARNINFO}The last extraction of Hadoop $HADOOPATH is deleted."
|
|
rm -rf $HADOOPATH
|
|
fi
|
|
echo "Unpacking $HADOOPARV to $HADOOPATH ... "
|
|
tar -xzf $HADOOPARV -C $SECONDO_BUILD_DIR/bin/
|
|
fi
|
|
# The exist of hadoop archieve file is put in $SECONDO_BUILD_DIR/bin
|
|
# It may better be put at $SECONDO_SDK, which is not set at least in linux64 platform
|
|
|
|
|
|
# c. Check the exist of ParallelSecondoConfig.ini
|
|
if [ ! -f $PSCONFILE ]; then
|
|
echo -e "The configuration file for installing Parallel Secondo \n
|
|
$PSCONFILE \n does not exist."
|
|
echo "Installation for Parallel Secondo Fails. "
|
|
exit -1
|
|
fi
|
|
|
|
# d. Check the availability of Java,
|
|
OLDIFS=$IFS
|
|
IFS=$'\n'
|
|
ParaInfo=($(readPSConf "Hadoop" "hadoop-env.sh:JAVA_HOME" "="))
|
|
javaHome=$(trim ${ParaInfo[0]})
|
|
IFS=$OLDIFS
|
|
if [ "$javaHome" = "" ]; then
|
|
if [ "$JAVA_HOME" = "" ]; then
|
|
echo "${ERRORINFO}The Java runtime is not correctly installed in this machine."
|
|
echo "Installation for Parallel Secondo Fails. "
|
|
exit -1
|
|
else
|
|
javaHome=${JAVA_HOME}
|
|
fi
|
|
fi
|
|
if [ ! -d "$javaHome" ]; then
|
|
echo "${ERRORINFO}The \$javaHome: $javaHome does not exist."
|
|
echo "Installation for Parallel Secondo Fails. "
|
|
exit -1
|
|
fi
|
|
javaRun=$javaHome/bin/java
|
|
JVersion=$($javaRun -version 2>&1 | grep "java version" | cut -d' ' -f 3 | tr -d \" | cut -d'.' -f2)
|
|
if [[ "$JVersion" =~ "^[0-9]+$" || $JVersion < 6 ]]; then
|
|
echo "${ERRORINFO}The Java runtime version must be greater than 5."
|
|
echo "Installation for Parallel Secondo Fails. "
|
|
exit -1
|
|
fi
|
|
|
|
# e. Check the availability of $MDSBINLIST
|
|
if [ ! -f ${bin}/$MDSBINLIST ]; then
|
|
echo "${ERRORINFO}The file listing required data server scripts does not exist."
|
|
echo "Installation for Parallel Secondo Fails. "
|
|
exit -1
|
|
fi
|
|
|
|
# f. Check prerequisites on the current computer, includes:
|
|
# passphraseless ssh, screen, ifconfig, etc
|
|
$(isExist ssh)
|
|
$(isExist screen)
|
|
$(isExist ifconfig)
|
|
|
|
# 2. Unpack the Hadoop distribution, and set its configurations based on the ParallelSecondoConfig.ini
|
|
# Set Hadoop configurations based on the current node.
|
|
# The configuration files of Hadoop includes:
|
|
# a hadoop/conf/hadoop-env.sh
|
|
# b hadoop/conf/masters
|
|
# c hadoop/conf/slaves
|
|
# d hadoop/conf/core-site.xml
|
|
# e hadoop/conf/hdfs-site.xml
|
|
# f hadoop/conf/mapreduce-site.xml
|
|
|
|
HADOOPCONF=$HADOOPATH/conf
|
|
# a hadoop/conf/hadoop-env.sh
|
|
# ** Set Java_Home
|
|
HPENV=$HADOOPCONF/hadoop-env.sh
|
|
Pattern="^[#]*[ ]*export[ ]*JAVA_HOME"
|
|
NJHOME="export JAVA_HOME=$javaHome"
|
|
JHPos=$(grep -n "$Pattern" $HPENV | tail -1 | cut -d':' -f1)
|
|
if [[ $JHPos < 0 ]]; then
|
|
JHPos=0
|
|
fi
|
|
# remove the old set up for JAVA_HOME
|
|
sed $(PSEDOPT) '/'"$Pattern"'/d' $HPENV
|
|
let JHPos--
|
|
sed $(PSEDOPT) "${JHPos}a \\
|
|
$NJHOME
|
|
" $HPENV
|
|
# ** Set HADOOP_OPTS
|
|
OLDIFS=$IFS
|
|
IFS=$'\n'
|
|
Opts=($(readPSConf "Hadoop" "hadoop-env.sh:HADOOP_OPTS" "+="))
|
|
IFS=$OLDIFS
|
|
HPOPTS="HADOOP_OPTS=\""
|
|
for item in ${Opts[*]}; do
|
|
HPOPTS="$HPOPTS$item "
|
|
done
|
|
HPOPTS="$HPOPTS\""
|
|
Pattern="^[#]*[ ]*HADOOP_OPTS"
|
|
LLPos=$(grep -n "$Pattern" $HPENV | head -1 | cut -d':' -f1)
|
|
if [[ $LLPos < 0 ]]; then
|
|
LLPos=$(wc -l $HPENV | awk '{print $1}')
|
|
fi
|
|
#sed $(PSEDOPT) '/'"$Pattern"'/d' $HPENV
|
|
sed $(PSEDOPT) "${LLPos}d" $HPENV
|
|
let LLPos--
|
|
sed $(PSEDOPT) "${LLPos}a \\
|
|
$HPOPTS
|
|
" $HPENV
|
|
|
|
# b hadoop/conf/masters
|
|
# c hadoop/conf/slaves
|
|
HPMaster=$HADOOPCONF/masters
|
|
HPSlaves=$HADOOPCONF/slaves
|
|
OLDIFS=$IFS
|
|
IFS=$'\n'
|
|
CMInfo=($(readPSConf "Cluster" "Master" "="))
|
|
MasterDS=$(trim ${CMInfo[0]})
|
|
IFS=$OLDIFS
|
|
if [ $(echo $MasterDS | grep -o ":" | wc -l) -ne 2 ]; then
|
|
echo "${ERRORINFO}Format of Cluster:Master Configuration is wrong. "
|
|
echo "Installation for Parallel Secondo Fails. "
|
|
exit -1
|
|
fi
|
|
MIP=$(echo $MasterDS | cut -d':' -f1)
|
|
echo $MIP > $HPMaster
|
|
# check the passphraseless ssh to the master
|
|
MHost=$(ssh -q -o PasswordAuthentication=no $MIP hostname)
|
|
if [ "${MHost}" = "" ]; then
|
|
echo "${ERRORINFO}Can't ssh to ${MIP} without a passphrase."
|
|
exit -1
|
|
fi
|
|
|
|
OLDIFS=$IFS
|
|
IFS=$'\n'
|
|
CSInfo=($(readPSConf "Cluster" "Slaves" "+="))
|
|
IFS=$OLDIFS
|
|
cat /dev/null > $HPSlaves
|
|
for slave in ${CSInfo[*]}; do
|
|
if [ $(echo $slave | grep -o ":" | wc -l) -ne 2 ]; then
|
|
echo "${ERRORINFO}Format of a Cluster:Slave Configuration is wrong. "
|
|
echo "Installation for Parallel Secondo Fails. "
|
|
exit -1
|
|
fi
|
|
SIP=$(echo $slave | cut -d':' -f1)
|
|
echo $SIP >> $HPSlaves
|
|
done
|
|
# Use sort + uniq commands to remove duplicated lines.
|
|
sort $HPSlaves | uniq > ${HPSlaves}.tmp
|
|
mv ${HPSlaves}.tmp $HPSlaves
|
|
if [ ! -s $HPSlaves ]; then
|
|
echo "${ERRORINFO}No slaves are defined."
|
|
echo "Installation for Parallel Secondo Fails. "
|
|
exit -1
|
|
fi
|
|
|
|
# check the passphraseless ssh to slaves
|
|
while read Slave; do
|
|
SIP=$(echo $Slave | cut -d':' -f1)
|
|
SHost=$(ssh -q -o PasswordAuthentication=no $SIP hostname)
|
|
if [ "${SHost}" = "" ]; then
|
|
echo "${ERRORINFO}Can't ssh to ${SIP} without a passphrase."
|
|
exit -1
|
|
fi
|
|
done < $HPSlaves
|
|
|
|
# Get all configurations about Hadoop
|
|
OLDIFS=$IFS
|
|
IFS=$'\n'
|
|
HXCInfo=($(readPSConf "Hadoop" "" ""))
|
|
IFS=$OLDIFS
|
|
|
|
# d hadoop/conf/core-site.xml
|
|
# e hadoop/conf/hdfs-site.xml
|
|
# f hadoop/conf/mapreduce-site.xml
|
|
XMLFile=(core-site.xml hdfs-site.xml mapred-site.xml)
|
|
for xfileName in ${XMLFile[*]}; do
|
|
xfile=$HADOOPCONF/$xfileName
|
|
ENDPos=$(cat -n $xfile | grep "<configuration>" | cut -f1)
|
|
sed $(PSEDOPT) "$(( $ENDPos + 1)),\$d" $xfile #Remove all exist configurations
|
|
ITNum=0
|
|
while [ $ITNum -lt ${#HXCInfo[*]} ]; do
|
|
item=${HXCInfo[${ITNum}]}
|
|
if [ "$(echo $item | grep "^${xfileName}:")" != "" ]; then
|
|
|
|
ititle=$(trim ${item%=*})
|
|
ititle=$(trim ${ititle#*:})
|
|
ivalue=$(trim ${item#*=})
|
|
PROPERTY="\n\t<property>\n\t\t<name>$ititle</name>\n
|
|
\t\t<value>$ivalue</value>\n\t</property>\n"
|
|
echo -e $PROPERTY >> $xfile
|
|
fi
|
|
let ITNum++
|
|
done
|
|
echo "</configuration>" >> $xfile
|
|
done
|
|
|
|
|
|
# 3. Create the .parasecrc file, which set up needed environment variables on every cluster node
|
|
# before the parallel Secondo starts
|
|
PSRCFILE=$PSDKLOC/psrc_tmp
|
|
CLSTFILE=$PSDKLOC/clusterNodes
|
|
DSRVFILE=$PSDKLOC/dataServers
|
|
cat /dev/null > $PSRCFILE
|
|
cat /dev/null > $CLSTFILE
|
|
cat /dev/null > $DSRVFILE
|
|
|
|
#Merge master and slaves to $CLSTFILE
|
|
cat $HPMaster $HPSlaves | sort | uniq > ${CLSTFILE}
|
|
CNInfo=(${CMInfo[*]} ${CSInfo[*]})
|
|
CNInfo=($(echo ${CNInfo[*]} | tr ' ' '\n' | awk '!x[$0]++' | tr '\n' ' ' ))
|
|
#Find all cluster nodes and data servers
|
|
while read Node; do
|
|
DSLOC="$Node"
|
|
ITNum=0
|
|
while [ $ITNum -lt ${#CNInfo[*]} ]; do
|
|
ivalue=${CNInfo[${ITNum}]}
|
|
ivalue=${ivalue#*=}
|
|
if [ "$(echo $ivalue | grep "^$Node:" )" != "" ]; then
|
|
loc=$( echo $ivalue | cut -d':' -f2)
|
|
DSLOC="$DSLOC:$loc"
|
|
fi
|
|
let ITNum++
|
|
done
|
|
echo "$DSLOC" >> $DSRVFILE
|
|
done < $CLSTFILE
|
|
|
|
#Prepare the template script for .parasecrc
|
|
echo "export PARALLEL_SECONDO_BUILD_DIR=\$SECONDO_BUILD_DIR" >> $PSRCFILE
|
|
echo "export PARALLEL_SECONDO_DATASERVER_NAME=\$USER" >> $PSRCFILE
|
|
echo "PARALLEL_SECONDO_MAINDS=\$(echo \${PARALLEL_SECONDO} | cut -d':' -f 1)" >> $PSRCFILE
|
|
echo "export PARALLEL_SECONDO_MAINDS=\$PARALLEL_SECONDO_MAINDS/\$PARALLEL_SECONDO_DATASERVER_NAME" >> $PSRCFILE
|
|
echo "export PARALLEL_SECONDO_CONF=\$PARALLEL_SECONDO_MAINDS/${DSCONFNAME}" >> $PSRCFILE
|
|
echo "export PARALLEL_SECONDO_MASTER=\$PARALLEL_SECONDO_CONF/${DSMASTERNAME}" >> $PSRCFILE
|
|
echo "export PARALLEL_SECONDO_SLAVES=\$PARALLEL_SECONDO_CONF/${DSSLAVESNAME}" >> $PSRCFILE
|
|
echo "export PARALLEL_SECONDO_PSFSNAME=\"${DSPSFSNAME}\"" >> $PSRCFILE
|
|
echo "export PARALLEL_SECONDO_MINI_NAME=\"${DSMSECNAME}\"" >> $PSRCFILE
|
|
echo "export PARALLEL_SECONDO_MINIDB_NAME=\"${DSMSDBNAME}\"" >> $PSRCFILE
|
|
echo "export HADOOP_HOME=\$PARALLEL_SECONDO_MAINDS/hadoop" >> $PSRCFILE
|
|
echo "export PATH=\$PATH:\$HADOOP_HOME/bin" >> $PSRCFILE
|
|
echo "export PATH=\$PATH:\$PARALLEL_SECONDO_MAINDS/${DMEBINNAME}" >> $PSRCFILE
|
|
|
|
|
|
# 4. Prepare the masters and slaves files required by every node
|
|
PSMFILE=$PSDKLOC/ps_master_tmp
|
|
PSSFILE=$PSDKLOC/ps_slaves_tmp
|
|
for IDX in {1..2}; do
|
|
if [ $IDX -eq 1 ]; then
|
|
ARRAY=$MasterDS
|
|
FILE=$PSMFILE
|
|
else
|
|
ARRAY=${CSInfo[*]}
|
|
FILE=$PSSFILE
|
|
fi
|
|
cat /dev/null > $FILE
|
|
|
|
for item in ${ARRAY[*]};do
|
|
IPAddr=$(echo $item | cut -d':' -f1)
|
|
PSFSPT=$(echo $item | cut -d':' -f2)
|
|
MSPORT=$(echo $item | cut -d':' -f3)
|
|
PSFSPT="$PSFSPT/$USER/$DSPSFSNAME"
|
|
echo "$IPAddr:$PSFSPT:$MSPORT" >> $FILE
|
|
done
|
|
|
|
done
|
|
|
|
# 5. Prepare each data server's SecondoConfig.ini file
|
|
MSECLIST=$PSDKLOC/miniSecList #List of all data servers
|
|
DBCONFILE="SecondoConfig.ini"
|
|
# Following parameters need to be changed.
|
|
PT_SECHOME="SecondoHome"
|
|
PT_SECPORT="SecondoPort"
|
|
PT_PARASEC="\[ParallelSecondo\]" #Add parameters instead of change values
|
|
PT_PSFSNME="SecondoFilePath"
|
|
PT_LIPADDR="localIP"
|
|
MDBCONF=$SECONDO_BUILD_DIR/bin/$DBCONFILE
|
|
# 5a. Clean all Parallel Secondo configuration in $MDBCONF
|
|
declare -i PSCST=$(grep -n "$PT_PARASEC" $MDBCONF| cut -d':' -f1)
|
|
if [ $PSCST -gt 0 ]; then
|
|
declare -i PSCED=$(sed -n "$(( $PSCST + 1 )),$ p" $MDBCONF | grep -n "^[[]" | head -1 | cut -d':' -f1)
|
|
if [ $PSCED -eq 0 ]; then
|
|
PSCED=$(wc -l $MDBCONF | cut -d' ' -f1)
|
|
else
|
|
PSCED=$(( $PSCST + $PSCED - 1 ))
|
|
fi
|
|
sed $(PSEDOPT) "$(( $PSCST + 1 )),${PSCED}s/^\([^#]\)*//" $MDBCONF
|
|
# Delete the backup file in MacOSX
|
|
if [ "$SECONDO_PLATFORM" = "mac_osx" ]; then
|
|
rm "${MDBCONF}.bak"
|
|
fi
|
|
else
|
|
echo "$PT_PARASEC" | sed 's/\\//g' >> $MDBCONF
|
|
PSCST=$(grep -n "$PT_PARASEC" $MDBCONF| cut -d':' -f1)
|
|
fi
|
|
|
|
cat $PSMFILE $PSSFILE | awk '!x[$0]++' > $MSECLIST
|
|
NodeIPAddr=""
|
|
while read dataServ; do
|
|
IPAddre=$(echo $dataServ | cut -d':' -f1)
|
|
PSFSLoc=$(echo $dataServ | cut -d':' -f2)
|
|
SecHome=$(echo $PSFSLoc | sed "s/$DSPSFSNAME/$DSMSDBNAME/")
|
|
PortNum=$(echo $dataServ | cut -d':' -f3)
|
|
|
|
DSDBCONF=$PSDKLOC/$DBCONFILE.$IPAddre.$PortNum
|
|
cp $MDBCONF $DSDBCONF
|
|
|
|
# Change SecondoHome & SecondoPort
|
|
ChgPName=($PT_SECHOME $PT_SECPORT)
|
|
ChgValue=($SecHome $PortNum)
|
|
Cnt=0
|
|
while [ $Cnt -lt ${#ChgPName[*]} ]; do
|
|
ParaName=${ChgPName[$Cnt]}
|
|
ParaVale=${ChgValue[$Cnt]}
|
|
|
|
setSecConf $DSDBCONF $ParaName $ParaVale
|
|
let Cnt++
|
|
done
|
|
|
|
# In case the parallel Secondo environment is not initialized
|
|
# if [ "$(grep "$PT_PARASEC" $DSDBCONF )" = "" ]; then
|
|
# echo "$PT_PARASEC" | sed 's/\\//g' >> $DSDBCONF
|
|
# fi
|
|
|
|
# Add PSFS & LocalIP to $PT_PARASEC environment
|
|
AddPName=($PT_PSFSNME $PT_LIPADDR)
|
|
AddValue=($PSFSLoc $IPAddre)
|
|
Cnt=0
|
|
while [ $Cnt -lt ${#AddPName[*]} ]; do
|
|
ParaName=${AddPName[$Cnt]}
|
|
ParaVale=${AddValue[$Cnt]}
|
|
|
|
# PPos=$(grep -n "$PT_PARASEC" $DSDBCONF | tail -1 | cut -d':' -f1)
|
|
setSecConf $DSDBCONF $ParaName $ParaVale $PSCST
|
|
let Cnt++
|
|
done
|
|
|
|
# Delete the backup file in MacOSX
|
|
if [ "$SECONDO_PLATFORM" = "mac_osx" ]; then
|
|
rm "${DSDBCONF}.bak"
|
|
fi
|
|
|
|
done < $MSECLIST
|
|
|
|
ANODES=($(cat $DSRVFILE | tr '\n' ' '))
|
|
for Node in ${ANODES[*]}; do
|
|
nodeAddr=${Node%%:*}
|
|
nodeRCFile=$PSDKLOC/parasecrc_${nodeAddr}
|
|
echo "export PARALLEL_SECONDO=${Node#*:}" > $nodeRCFile
|
|
cat $PSRCFILE >>$nodeRCFile
|
|
|
|
ConfPath=$(echo $Node | cut -d':' -f2)
|
|
#Get all port numbers of current node
|
|
portArr=($(grep $nodeAddr $MSECLIST | cut -d':' -f3))
|
|
DBCONFIG="export PARALLEL_SECONDO_DBCONFIG="
|
|
for port in ${portArr[*]}; do
|
|
DBCONFIG="${DBCONFIG}${ConfPath}/${USER}/conf/${DBCONFILE}.${port}:"
|
|
done
|
|
DBCONFIG=$(echo $DBCONFIG | sed 's/:$//')
|
|
echo $DBCONFIG >> $nodeRCFile
|
|
done
|
|
|
|
|
|
# 6. Distribute files and folders to every cluster node, including:
|
|
# # a. $HOME/.parasecrc , and source it in each $HOME/.bashrc
|
|
# # b. msec , msec-databases, PSFS folders
|
|
# # c. conf, hadoop, HDFS (only on the main data server of each node)
|
|
SDSFLODER=$PSDKLOC/sdbFolder #Master data server folder
|
|
MDSFLODER=$PSDKLOC/mdbFolder #Slave data server folder
|
|
# Prepare everything that a master and slave data server need on current machine
|
|
# Then copy these folders to remote nodes, instead of creating files and folders one by one
|
|
echo "Preparing all needed files and folder for master and slave data servers ..."
|
|
mkdir -p $SDSFLODER/$DSMSECNAME #msec
|
|
mkdir -p $SDSFLODER/$DSMSDBNAME #msec-databases
|
|
mkdir -p $SDSFLODER/$DSPSFSNAME #PSFS
|
|
cp -r $SDSFLODER $MDSFLODER
|
|
mkdir -p $MDSFLODER/$DSCONFNAME #conf
|
|
mkdir -p $MDSFLODER/$DMHADPNAME #hadoop
|
|
mkdir -p $MDSFLODER/$DMEBINNAME #bin
|
|
# use compression to speed up copying files
|
|
declare -i SLEVEL=$(echo $HADOOPATH | tr '/' '\n' | sed '/^$/d' | wc -l)
|
|
tar -cf - $HADOOPATH | tar -xf - -C $MDSFLODER/hadoop --strip=$SLEVEL
|
|
cp $PSMFILE $MDSFLODER/$DSCONFNAME/$DSMASTERNAME
|
|
cp $PSSFILE $MDSFLODER/$DSCONFNAME/$DSSLAVESNAME
|
|
cat ${bin}/$MDSBINLIST | xargs -i cp ${bin}/{} $MDSFLODER/$DMEBINNAME
|
|
declare -i SLEVEL=$(echo $MDSFLODER | tr '/' '\n' | sed '/^$/d' | wc -l)
|
|
tar -czf ${MDSFLODER}.tar.gz ${MDSFLODER} &> /dev/null
|
|
tar -czf ${SDSFLODER}.tar.gz ${SDSFLODER} &> /dev/null
|
|
|
|
#echo "---------------------"
|
|
#echo "The Data Server List"
|
|
#echo "---------------------"
|
|
#cat $DSRVFILE
|
|
#echo "---------------------"
|
|
|
|
function formatNode
|
|
{
|
|
node=$1
|
|
curPlat=$2
|
|
nodeIP=${node%%:*}
|
|
dataServers=($(echo ${node#*:} | tr ':' ' '))
|
|
nodeRCFile=$PSDKLOC/parasecrc_${nodeIP}
|
|
scp -o LogLevel=quiet $nodeRCFile $nodeIP:$RCFILENAME
|
|
|
|
# Delete exist commands if necessary
|
|
CLNUM=$(ssh -o LogLevel=quiet $nodeIP "grep -n \"source \\\$HOME/${RCFILENAME}\" \$HOME/.bashrc" | cut -d':' -f1)
|
|
if [ "$CLNUM" != "" ]; then
|
|
ssh -o LogLevel=quiet $nodeIP "sed -i \"${CLNUM}d\" \$HOME/.bashrc"
|
|
fi
|
|
#source .parasecrc file in the .bashrc, to set all required environment
|
|
case ${curPlat} in
|
|
# Process linux and linux64 with same statements
|
|
linux )
|
|
# Execute the .parasecrc directly after processing .secondorc
|
|
SCNUM=$(ssh -o LogLevel=quiet $nodeIP "grep -n \"source [\\\$A-Z_.\\\/]*secondorc [\\\$a-zA-Z_.\\\/]*\" \$HOME/.bashrc" | cut -d':' -f1)
|
|
if [ "$SCNUM" == "" ]; then
|
|
SCNUM=1
|
|
else
|
|
let SCNUM++
|
|
fi
|
|
ssh -o LogLevel=quiet $nodeIP "sed -i \"${SCNUM}isource \\\$HOME/${RCFILENAME}\" \$HOME/.bashrc"
|
|
;;
|
|
mac_osx )
|
|
ssh -o LogLevel=quiet $nodeIP "echo \"source \\\$HOME/$RCFILENAME\" >> \$HOME/.bashrc"
|
|
echo "Note!!The $HOME/.bashrc file must be executed in Mac OS X shell prompt" >&2
|
|
echo -e "This step has to be done manually by users.\n\n" >&2
|
|
;;
|
|
esac
|
|
|
|
declare -i sidx=0
|
|
for dserv in ${dataServers[*]}; do
|
|
dserv="$dserv/$USER" #Use $USER to different users' data servers on a same cluster node
|
|
ssh -o LogLevel=quiet $nodeIP "mkdir -p $dserv"
|
|
|
|
if [ $sidx -eq 0 ]; then
|
|
# This is the main data server on the current node
|
|
ARVPATH=${MDSFLODER}
|
|
else
|
|
# This is a normal data server on the current node
|
|
ARVPATH=${SDSFLODER}
|
|
fi
|
|
ARVNAME=${ARVPATH##*/}
|
|
scp -o LogLevel=quiet ${ARVPATH}.tar.gz $nodeIP:$dserv
|
|
ssh -o LogLevel=quiet $nodeIP "tar -xmf $dserv/${ARVNAME}.tar.gz -C $dserv/ --strip=$SLEVEL "
|
|
|
|
#Copy SecondoConfig.ini files
|
|
if [ $sidx -eq 0 ]; then
|
|
FILES=($(ls $PSDKLOC/$DBCONFILE.$nodeIP.*))
|
|
for file in ${FILES[*]}; do
|
|
port=${file##*.}
|
|
scp -o LogLevel=quiet $PSDKLOC/$DBCONFILE.$nodeIP.$port $nodeIP:$dserv/conf/${DBCONFILE}.$port
|
|
done
|
|
fi
|
|
|
|
let sidx++
|
|
done
|
|
}
|
|
|
|
CURPLAT=$(echo $SECONDO_PLATFORM | sed 's/[0-9]*$//')
|
|
if [ "$CURPLAT" != "linux" -a "$CURPLAT" != "mac_osx" ]; then
|
|
echo "${ERRORINFO}Unknown Platform, Check your \$SECONDO_PLATFORM definition."
|
|
echo "Installation for Parallel Secondo Fails. "
|
|
exit 1
|
|
fi
|
|
|
|
ANODES=($(cat $DSRVFILE | tr '\n' ' '))
|
|
NDNUM=${#ANODES[*]}
|
|
aJobs=()
|
|
|
|
echo "Start to distribute data servers on the cluster ..."
|
|
for ((NDIDX=0;NDIDX<$NDNUM;));do
|
|
node=${ANODES[$NDIDX]}
|
|
for ((token=1;token<=$PS_PIPE_Width;toekn++));do
|
|
if [ $NDIDX -ge $NDNUM ];then
|
|
break
|
|
fi
|
|
|
|
if [ ! "${aJobs[$token]}" ] || ! kill -0 ${aJobs[$token]} 2>/dev/null; then
|
|
formatNode $node $CURPLAT &
|
|
aJobs[$token]=$!
|
|
let NDIDX++
|
|
break
|
|
fi
|
|
done
|
|
done
|
|
|
|
for ((token=1;token<$PS_PIPE_Width;)); do
|
|
if ! kill -0 ${aJobs[$token]} 2>/dev/null; then
|
|
let token++
|
|
fi
|
|
done
|
|
|
|
# 7. Set up Master Mini Secondo Configuration if NS4Master is set
|
|
NS4Master=($(readPSConf "Options" "NS4Master" "="))
|
|
NS4Master=$(trim ${NS4Master[0]})
|
|
if [ "$NS4Master" = "true" ]; then
|
|
# Make sure the current node is the master node
|
|
if ! $(oneOf "$MIP" $(get_localIPs)) ; then
|
|
echo "${ERRORINFO}The current node is not the master node,
|
|
cannot set its Secondo as the master mini Secondo."
|
|
echo "Installation for Parallel Secondo Fails. "
|
|
exit -1
|
|
fi
|
|
|
|
#Set up Secondo Configurations
|
|
|
|
# a. Change Secondo Home and Port
|
|
SHPos=($(grep -n "^[ ]*$PT_SECHOME[ ]*=" $MDBCONF | cut -d':' -f1))
|
|
SHPos=${SHPos[$(( ${#SHPos[@]} - 1 ))]}
|
|
SHConf=$(sed -n "${SHPos}p" $MDBCONF)
|
|
PSHLoc=("$(trim ${SHConf#*=})" "$SECONDO_PARAM_SecondoHome" "$HOME/secondo-databases")
|
|
for loc in ${PSHLoc[*]}; do
|
|
if [ -d $loc ]; then
|
|
MSHLoc=$loc
|
|
echo "The master Secondo Home is set as $MSHLoc"
|
|
break
|
|
fi
|
|
done
|
|
if [ "$MSHLoc" = "" ]; then
|
|
echo "${ERRORINFO}Cannot find the master Secondo Home."
|
|
echo "Installation for Parallel Secondo Fails. "
|
|
exit -1
|
|
fi
|
|
setSecConf $MDBCONF $PT_SECHOME $MSHLoc
|
|
|
|
MSPtNum=$(echo $MasterDS | cut -d':' -f3)
|
|
SPtPos=($(grep -n "^[ ]*$PT_SECPORT[ ]*=" $MDBCONF | cut -d':' -f1))
|
|
SPtPos=${SPtPos[$(( ${#SPtPos[@]} - 1 ))]}
|
|
CPtConf=$(sed -n "${SPtPos}p" $MDBCONF)
|
|
CptNum=$(trim ${CPtConf#*=})
|
|
if [ "$CptNum" != "" ]; then
|
|
if [ $CptNum -ne $MSPtNum ]; then
|
|
echo "${WARNINFO}The current Secondo port number $CptNum will be
|
|
overlapped by the master Secondo port $MSPtNum."
|
|
fi
|
|
fi
|
|
setSecConf $MDBCONF $PT_SECPORT $MSPtNum
|
|
|
|
# b. Add Parallel Secondo Configurations
|
|
setSecConf $MDBCONF $PT_LIPADDR $MIP $PSCST
|
|
|
|
MPSFSLOC=$(head -1 $PSMFILE | cut -d':' -f2)
|
|
setSecConf $MDBCONF $PT_PSFSNME $MPSFSLOC $PSCST
|
|
|
|
# Delete the backup file in MacOSX
|
|
if [ "$SECONDO_PLATFORM" = "mac_osx" ]; then
|
|
rm "${MDBCONF}.bak"
|
|
fi
|
|
|
|
#Replace the msec with a link
|
|
MMSECLOC=${MPSFSLOC%/*}/$DSMSECNAME
|
|
rm -rf $MMSECLOC
|
|
ln -s $SECONDO_BUILD_DIR $MMSECLOC
|
|
echo "The $SECONDO_BUILD_DIR is used to be the master mini Secondo of the parallel Secondo."
|
|
fi
|
|
|
|
echo -e "\n\n\n\n"
|
|
#Set the environment variables in .parasecrc
|
|
source $HOME/.bashrc
|
|
|
|
#Format the namenode of Hadoop
|
|
hadoop namenode -format
|
|
|
|
|
|
echo -e "\n\n\n************************************************"
|
|
echo "Congratulations! The parallel Secondo has been correctly set up on your cluster."
|
|
cat $DSRVFILE
|
|
echo "************************************************"
|
|
|
|
|
|
exit 0
|