#!/bin/bash bin=`dirname "$0"` bin=`cd "$bin"; pwd` source $bin/ps-functions # This script is used to initialize the environment for Parallel Secondo # Before running this script, following steps must be done first: # * First, all required computers are connected to be a cluster, # also the Secondo SDK has been installed on every node already. # * Second, the hadoop archieve file hadoop-0.20.2.tar.gz is kept in # $SECONDO_BUILD_DIR/bin . # * Third, a Configuration file named ParallelSecondoConfig.ini # is kept in $SECONDO_BUILD_DIR/bin too. # # In this script, following steps will be done: # # 1. Check the existence of all required files # # 2. Unpack the Hadoop distribution, and set its configurations based on the ParallelSecondoConfig.ini # # 3. Create the .parasecrc file, which set up needed environment variables on every cluster node # # before the parallel Secondo starts # # 4. Prepare the masters and slaves files required by every node # # 5. Distribute .parasecrc file to every cluster node, and source it in each $HOME/.bashrc # # 6. Create data server folders on every cluster machine # 0. Process the arguments. WARNINFO="Warning !! " ERRORINFO="ERROR !! " declare -a NODESARRAY # The list for hadoop nodes declare -a DSEVSARRAY # The list for data servers HDVERSION="hadoop-0.20.2" DSCONFNAME="conf" DSMASTERNAME="master" DSSLAVESNAME="slaves" DSPSFSNAME="PSFS" DSMSECNAME="msec" DSMSDBNAME="msec-databases" DMHADPNAME="hadoop" DMEBINNAME="bin" RCFILENAME=".parasecrc" MDSBINLIST="ds_scriptList" PSCONFILE=$SECONDO_BUILD_DIR/bin/ParallelSecondoConfig.ini PSDKLOC=$SECONDO_BUILD_DIR/bin/ps_SDK ### ---------------------------------------------------------------------------------------- # Auxiliary Functions function trim { echo $(echo $1 | sed 's/^ *//' | sed 's/ *$//') } # Read configurations in $PSCONFILE function readPSConf { EnvName=$(trim "$1") TitName=$(trim "$2") DlmSign=$(trim "$3") if [ "$TitName" != "" -a "$DlmSign" = "" ]; then echo "${WARNINFO}The delimeter cannot be empty while the title is defined." >&2 return fi findEnvName=false findTitle=false local values=() vnum=${#values[*]} while read LINE; do LINE=$(trim "$LINE") if [ "$LINE" != "" ]; then # Ignore comments if [ "$(echo $LINE | grep '^#' )" != "" ]; then continue fi if [ $(echo $LINE | grep '^\[\w*\]$') ]; then # Read title if ! $findEnvName ; then if [ $(echo $LINE | tr -d '[]') = "$EnvName" ]; then findEnvName=true; fi else # Return when finds another environment echo "${values[*]}" return fi else # Read Parameter values if $findEnvName ; then if [ "$TitName" != "" ]; then if [ "$(echo $LINE | grep ${DlmSign})" != "" ]; then iTit=$(trim ${LINE%%${DlmSign}*}) iValue=$(trim ${LINE#*${DlmSign}}) if [ "$TitName" = "$iTit" ]; then values[${vnum}]="$iValue" if [ "$DlmSign" = "+=" ]; then let vnum++ fi fi fi else values[${vnum}]="$LINE" let vnum++ fi fi fi fi done < ${PSCONFILE} echo "${values[*]}" } # ------ Example of Reading ParallelSecondoConfig.ini File ----- #OLDIFS=$IFS #IFS=$'\n' #ClusterInfo=$(readPSConf "Cluster" "Slaves" "+=") #for item in $ClusterInfo; do # echo "item: $item" #done #IFS=$OLDIFS # ------ Example of Reading ParallelSecondoConfig.ini File ----- #Set a SecondoConfig parameter #If the position line is set, then only add the new parameter after that #Or else, remove old setting before inserting the new value. function setSecConf { DSDBCONF=$1 #Configure File ParaName=$2 ParaValue=$3 declare -i PPos=$4 if [ $PPos -eq 0 ]; then DPos=($(grep -n "^[#]*[ ]*$ParaName" $DSDBCONF | cut -d':' -f1)) declare -i APos=$(( ${DPos[0]} - 1 )) # Append position DPos=$( echo ${DPos[*]} | tr ' ' ',' ) sed $(PSEDOPT) "${DPos}d" $DSDBCONF # Delete old parameter else declare -i APos=$PPos fi NewPara="$ParaName=$ParaValue" sed $(PSEDOPT) "${APos}a \\ $NewPara " $DSDBCONF } # Check the existence of specific utility function isExist { UName=$1 UPath=$(which ${UName}) if [ ! -f "$UPath" ]; then echo "${ERRORINFO}The utility ${UName} is not installed" >&2 exit -1 fi } ### ---------------------------------------------------------------------------------------- # 1. Check the existence of all required files # a. Check the availability of Secondo if [ "$SECONDO_BUILD_DIR" = "" ]; then echo "${ERRORINFO}The Secondo database is not correctly installed in this machine." echo "Installation for Parallel Secondo Fails. " exit -1 elif [ ! -d ${SECONDO_BUILD_DIR} ]; then echo "${ERRORINFO}The \$SECONDO_BUILD_DIR: $SECONDO_BUILD_DIR does not exist." echo "Installation for Parallel Secondo Fails. " exit -1 fi if [ ! -d $PSDKLOC ]; then mkdir -p $PSDKLOC fi # b. Check the exist of the Hadoop archieve HADOOPARV=$SECONDO_BUILD_DIR/bin/${HDVERSION}.tar.gz HADOOPATH=$SECONDO_BUILD_DIR/bin/${HDVERSION} if [ ! -f $HADOOPARV ]; then echo "${ERRORINFO}The Hadoop archieve is not prepared in this machine." echo "Looking for $HADOOPARV ... " echo "Installation for Parallel Secondo Fails. " exit -1 else #Extract the Hadoop archieve to $SECONDO_BUILD_DIR if [ -d $HADOOPATH -o -f $HADOOPATH ]; then echo "${WARNINFO}The last extraction of Hadoop $HADOOPATH is deleted." rm -rf $HADOOPATH fi echo "Unpacking $HADOOPARV to $HADOOPATH ... " tar -xzf $HADOOPARV -C $SECONDO_BUILD_DIR/bin/ fi # The exist of hadoop archieve file is put in $SECONDO_BUILD_DIR/bin # It may better be put at $SECONDO_SDK, which is not set at least in linux64 platform # c. Check the exist of ParallelSecondoConfig.ini if [ ! -f $PSCONFILE ]; then echo -e "The configuration file for installing Parallel Secondo \n $PSCONFILE \n does not exist." echo "Installation for Parallel Secondo Fails. " exit -1 fi # d. Check the availability of Java, OLDIFS=$IFS IFS=$'\n' ParaInfo=($(readPSConf "Hadoop" "hadoop-env.sh:JAVA_HOME" "=")) javaHome=$(trim ${ParaInfo[0]}) IFS=$OLDIFS if [ "$javaHome" = "" ]; then if [ "$JAVA_HOME" = "" ]; then echo "${ERRORINFO}The Java runtime is not correctly installed in this machine." echo "Installation for Parallel Secondo Fails. " exit -1 else javaHome=${JAVA_HOME} fi fi if [ ! -d "$javaHome" ]; then echo "${ERRORINFO}The \$javaHome: $javaHome does not exist." echo "Installation for Parallel Secondo Fails. " exit -1 fi javaRun=$javaHome/bin/java JVersion=$($javaRun -version 2>&1 | grep "java version" | cut -d' ' -f 3 | tr -d \" | cut -d'.' -f2) if [[ "$JVersion" =~ "^[0-9]+$" || $JVersion < 6 ]]; then echo "${ERRORINFO}The Java runtime version must be greater than 5." echo "Installation for Parallel Secondo Fails. " exit -1 fi # e. Check the availability of $MDSBINLIST if [ ! -f ${bin}/$MDSBINLIST ]; then echo "${ERRORINFO}The file listing required data server scripts does not exist." echo "Installation for Parallel Secondo Fails. " exit -1 fi # f. Check prerequisites on the current computer, includes: # passphraseless ssh, screen, ifconfig, etc $(isExist ssh) $(isExist screen) $(isExist ifconfig) # 2. Unpack the Hadoop distribution, and set its configurations based on the ParallelSecondoConfig.ini # Set Hadoop configurations based on the current node. # The configuration files of Hadoop includes: # a hadoop/conf/hadoop-env.sh # b hadoop/conf/masters # c hadoop/conf/slaves # d hadoop/conf/core-site.xml # e hadoop/conf/hdfs-site.xml # f hadoop/conf/mapreduce-site.xml HADOOPCONF=$HADOOPATH/conf # a hadoop/conf/hadoop-env.sh # ** Set Java_Home HPENV=$HADOOPCONF/hadoop-env.sh Pattern="^[#]*[ ]*export[ ]*JAVA_HOME" NJHOME="export JAVA_HOME=$javaHome" JHPos=$(grep -n "$Pattern" $HPENV | tail -1 | cut -d':' -f1) if [[ $JHPos < 0 ]]; then JHPos=0 fi # remove the old set up for JAVA_HOME sed $(PSEDOPT) '/'"$Pattern"'/d' $HPENV let JHPos-- sed $(PSEDOPT) "${JHPos}a \\ $NJHOME " $HPENV # ** Set HADOOP_OPTS OLDIFS=$IFS IFS=$'\n' Opts=($(readPSConf "Hadoop" "hadoop-env.sh:HADOOP_OPTS" "+=")) IFS=$OLDIFS HPOPTS="HADOOP_OPTS=\"" for item in ${Opts[*]}; do HPOPTS="$HPOPTS$item " done HPOPTS="$HPOPTS\"" Pattern="^[#]*[ ]*HADOOP_OPTS" LLPos=$(grep -n "$Pattern" $HPENV | head -1 | cut -d':' -f1) if [[ $LLPos < 0 ]]; then LLPos=$(wc -l $HPENV | awk '{print $1}') fi #sed $(PSEDOPT) '/'"$Pattern"'/d' $HPENV sed $(PSEDOPT) "${LLPos}d" $HPENV let LLPos-- sed $(PSEDOPT) "${LLPos}a \\ $HPOPTS " $HPENV # b hadoop/conf/masters # c hadoop/conf/slaves HPMaster=$HADOOPCONF/masters HPSlaves=$HADOOPCONF/slaves OLDIFS=$IFS IFS=$'\n' CMInfo=($(readPSConf "Cluster" "Master" "=")) MasterDS=$(trim ${CMInfo[0]}) IFS=$OLDIFS if [ $(echo $MasterDS | grep -o ":" | wc -l) -ne 2 ]; then echo "${ERRORINFO}Format of Cluster:Master Configuration is wrong. " echo "Installation for Parallel Secondo Fails. " exit -1 fi MIP=$(echo $MasterDS | cut -d':' -f1) echo $MIP > $HPMaster # check the passphraseless ssh to the master MHost=$(ssh -q -o PasswordAuthentication=no $MIP hostname) if [ "${MHost}" = "" ]; then echo "${ERRORINFO}Can't ssh to ${MIP} without a passphrase." exit -1 fi OLDIFS=$IFS IFS=$'\n' CSInfo=($(readPSConf "Cluster" "Slaves" "+=")) IFS=$OLDIFS cat /dev/null > $HPSlaves for slave in ${CSInfo[*]}; do if [ $(echo $slave | grep -o ":" | wc -l) -ne 2 ]; then echo "${ERRORINFO}Format of a Cluster:Slave Configuration is wrong. " echo "Installation for Parallel Secondo Fails. " exit -1 fi SIP=$(echo $slave | cut -d':' -f1) echo $SIP >> $HPSlaves done # Use sort + uniq commands to remove duplicated lines. sort $HPSlaves | uniq > ${HPSlaves}.tmp mv ${HPSlaves}.tmp $HPSlaves if [ ! -s $HPSlaves ]; then echo "${ERRORINFO}No slaves are defined." echo "Installation for Parallel Secondo Fails. " exit -1 fi # check the passphraseless ssh to slaves while read Slave; do SIP=$(echo $Slave | cut -d':' -f1) SHost=$(ssh -q -o PasswordAuthentication=no $SIP hostname) if [ "${SHost}" = "" ]; then echo "${ERRORINFO}Can't ssh to ${SIP} without a passphrase." exit -1 fi done < $HPSlaves # Get all configurations about Hadoop OLDIFS=$IFS IFS=$'\n' HXCInfo=($(readPSConf "Hadoop" "" "")) IFS=$OLDIFS # d hadoop/conf/core-site.xml # e hadoop/conf/hdfs-site.xml # f hadoop/conf/mapreduce-site.xml XMLFile=(core-site.xml hdfs-site.xml mapred-site.xml) for xfileName in ${XMLFile[*]}; do xfile=$HADOOPCONF/$xfileName ENDPos=$(cat -n $xfile | grep "" | cut -f1) sed $(PSEDOPT) "$(( $ENDPos + 1)),\$d" $xfile #Remove all exist configurations ITNum=0 while [ $ITNum -lt ${#HXCInfo[*]} ]; do item=${HXCInfo[${ITNum}]} if [ "$(echo $item | grep "^${xfileName}:")" != "" ]; then ititle=$(trim ${item%=*}) ititle=$(trim ${ititle#*:}) ivalue=$(trim ${item#*=}) PROPERTY="\n\t\n\t\t$ititle\n \t\t$ivalue\n\t\n" echo -e $PROPERTY >> $xfile fi let ITNum++ done echo "" >> $xfile done # 3. Create the .parasecrc file, which set up needed environment variables on every cluster node # before the parallel Secondo starts PSRCFILE=$PSDKLOC/psrc_tmp CLSTFILE=$PSDKLOC/clusterNodes DSRVFILE=$PSDKLOC/dataServers cat /dev/null > $PSRCFILE cat /dev/null > $CLSTFILE cat /dev/null > $DSRVFILE #Merge master and slaves to $CLSTFILE cat $HPMaster $HPSlaves | sort | uniq > ${CLSTFILE} CNInfo=(${CMInfo[*]} ${CSInfo[*]}) CNInfo=($(echo ${CNInfo[*]} | tr ' ' '\n' | awk '!x[$0]++' | tr '\n' ' ' )) #Find all cluster nodes and data servers while read Node; do DSLOC="$Node" ITNum=0 while [ $ITNum -lt ${#CNInfo[*]} ]; do ivalue=${CNInfo[${ITNum}]} ivalue=${ivalue#*=} if [ "$(echo $ivalue | grep "^$Node:" )" != "" ]; then loc=$( echo $ivalue | cut -d':' -f2) DSLOC="$DSLOC:$loc" fi let ITNum++ done echo "$DSLOC" >> $DSRVFILE done < $CLSTFILE #Prepare the template script for .parasecrc echo "export PARALLEL_SECONDO_BUILD_DIR=\$SECONDO_BUILD_DIR" >> $PSRCFILE echo "export PARALLEL_SECONDO_DATASERVER_NAME=\$USER" >> $PSRCFILE echo "PARALLEL_SECONDO_MAINDS=\$(echo \${PARALLEL_SECONDO} | cut -d':' -f 1)" >> $PSRCFILE echo "export PARALLEL_SECONDO_MAINDS=\$PARALLEL_SECONDO_MAINDS/\$PARALLEL_SECONDO_DATASERVER_NAME" >> $PSRCFILE echo "export PARALLEL_SECONDO_CONF=\$PARALLEL_SECONDO_MAINDS/${DSCONFNAME}" >> $PSRCFILE echo "export PARALLEL_SECONDO_MASTER=\$PARALLEL_SECONDO_CONF/${DSMASTERNAME}" >> $PSRCFILE echo "export PARALLEL_SECONDO_SLAVES=\$PARALLEL_SECONDO_CONF/${DSSLAVESNAME}" >> $PSRCFILE echo "export PARALLEL_SECONDO_PSFSNAME=\"${DSPSFSNAME}\"" >> $PSRCFILE echo "export PARALLEL_SECONDO_MINI_NAME=\"${DSMSECNAME}\"" >> $PSRCFILE echo "export PARALLEL_SECONDO_MINIDB_NAME=\"${DSMSDBNAME}\"" >> $PSRCFILE echo "export HADOOP_HOME=\$PARALLEL_SECONDO_MAINDS/hadoop" >> $PSRCFILE echo "export PATH=\$PATH:\$HADOOP_HOME/bin" >> $PSRCFILE echo "export PATH=\$PATH:\$PARALLEL_SECONDO_MAINDS/${DMEBINNAME}" >> $PSRCFILE # 4. Prepare the masters and slaves files required by every node PSMFILE=$PSDKLOC/ps_master_tmp PSSFILE=$PSDKLOC/ps_slaves_tmp for IDX in {1..2}; do if [ $IDX -eq 1 ]; then ARRAY=$MasterDS FILE=$PSMFILE else ARRAY=${CSInfo[*]} FILE=$PSSFILE fi cat /dev/null > $FILE for item in ${ARRAY[*]};do IPAddr=$(echo $item | cut -d':' -f1) PSFSPT=$(echo $item | cut -d':' -f2) MSPORT=$(echo $item | cut -d':' -f3) PSFSPT="$PSFSPT/$USER/$DSPSFSNAME" echo "$IPAddr:$PSFSPT:$MSPORT" >> $FILE done done # 5. Prepare each data server's SecondoConfig.ini file MSECLIST=$PSDKLOC/miniSecList #List of all data servers DBCONFILE="SecondoConfig.ini" # Following parameters need to be changed. PT_SECHOME="SecondoHome" PT_SECPORT="SecondoPort" PT_PARASEC="\[ParallelSecondo\]" #Add parameters instead of change values PT_PSFSNME="SecondoFilePath" PT_LIPADDR="localIP" MDBCONF=$SECONDO_BUILD_DIR/bin/$DBCONFILE # 5a. Clean all Parallel Secondo configuration in $MDBCONF declare -i PSCST=$(grep -n "$PT_PARASEC" $MDBCONF| cut -d':' -f1) if [ $PSCST -gt 0 ]; then declare -i PSCED=$(sed -n "$(( $PSCST + 1 )),$ p" $MDBCONF | grep -n "^[[]" | head -1 | cut -d':' -f1) if [ $PSCED -eq 0 ]; then PSCED=$(wc -l $MDBCONF | cut -d' ' -f1) else PSCED=$(( $PSCST + $PSCED - 1 )) fi sed $(PSEDOPT) "$(( $PSCST + 1 )),${PSCED}s/^\([^#]\)*//" $MDBCONF # Delete the backup file in MacOSX if [ "$SECONDO_PLATFORM" = "mac_osx" ]; then rm "${MDBCONF}.bak" fi else echo "$PT_PARASEC" | sed 's/\\//g' >> $MDBCONF PSCST=$(grep -n "$PT_PARASEC" $MDBCONF| cut -d':' -f1) fi cat $PSMFILE $PSSFILE | awk '!x[$0]++' > $MSECLIST NodeIPAddr="" while read dataServ; do IPAddre=$(echo $dataServ | cut -d':' -f1) PSFSLoc=$(echo $dataServ | cut -d':' -f2) SecHome=$(echo $PSFSLoc | sed "s/$DSPSFSNAME/$DSMSDBNAME/") PortNum=$(echo $dataServ | cut -d':' -f3) DSDBCONF=$PSDKLOC/$DBCONFILE.$IPAddre.$PortNum cp $MDBCONF $DSDBCONF # Change SecondoHome & SecondoPort ChgPName=($PT_SECHOME $PT_SECPORT) ChgValue=($SecHome $PortNum) Cnt=0 while [ $Cnt -lt ${#ChgPName[*]} ]; do ParaName=${ChgPName[$Cnt]} ParaVale=${ChgValue[$Cnt]} setSecConf $DSDBCONF $ParaName $ParaVale let Cnt++ done # In case the parallel Secondo environment is not initialized # if [ "$(grep "$PT_PARASEC" $DSDBCONF )" = "" ]; then # echo "$PT_PARASEC" | sed 's/\\//g' >> $DSDBCONF # fi # Add PSFS & LocalIP to $PT_PARASEC environment AddPName=($PT_PSFSNME $PT_LIPADDR) AddValue=($PSFSLoc $IPAddre) Cnt=0 while [ $Cnt -lt ${#AddPName[*]} ]; do ParaName=${AddPName[$Cnt]} ParaVale=${AddValue[$Cnt]} # PPos=$(grep -n "$PT_PARASEC" $DSDBCONF | tail -1 | cut -d':' -f1) setSecConf $DSDBCONF $ParaName $ParaVale $PSCST let Cnt++ done # Delete the backup file in MacOSX if [ "$SECONDO_PLATFORM" = "mac_osx" ]; then rm "${DSDBCONF}.bak" fi done < $MSECLIST ANODES=($(cat $DSRVFILE | tr '\n' ' ')) for Node in ${ANODES[*]}; do nodeAddr=${Node%%:*} nodeRCFile=$PSDKLOC/parasecrc_${nodeAddr} echo "export PARALLEL_SECONDO=${Node#*:}" > $nodeRCFile cat $PSRCFILE >>$nodeRCFile ConfPath=$(echo $Node | cut -d':' -f2) #Get all port numbers of current node portArr=($(grep $nodeAddr $MSECLIST | cut -d':' -f3)) DBCONFIG="export PARALLEL_SECONDO_DBCONFIG=" for port in ${portArr[*]}; do DBCONFIG="${DBCONFIG}${ConfPath}/${USER}/conf/${DBCONFILE}.${port}:" done DBCONFIG=$(echo $DBCONFIG | sed 's/:$//') echo $DBCONFIG >> $nodeRCFile done # 6. Distribute files and folders to every cluster node, including: # # a. $HOME/.parasecrc , and source it in each $HOME/.bashrc # # b. msec , msec-databases, PSFS folders # # c. conf, hadoop, HDFS (only on the main data server of each node) SDSFLODER=$PSDKLOC/sdbFolder #Master data server folder MDSFLODER=$PSDKLOC/mdbFolder #Slave data server folder # Prepare everything that a master and slave data server need on current machine # Then copy these folders to remote nodes, instead of creating files and folders one by one echo "Preparing all needed files and folder for master and slave data servers ..." mkdir -p $SDSFLODER/$DSMSECNAME #msec mkdir -p $SDSFLODER/$DSMSDBNAME #msec-databases mkdir -p $SDSFLODER/$DSPSFSNAME #PSFS cp -r $SDSFLODER $MDSFLODER mkdir -p $MDSFLODER/$DSCONFNAME #conf mkdir -p $MDSFLODER/$DMHADPNAME #hadoop mkdir -p $MDSFLODER/$DMEBINNAME #bin # use compression to speed up copying files declare -i SLEVEL=$(echo $HADOOPATH | tr '/' '\n' | sed '/^$/d' | wc -l) tar -cf - $HADOOPATH | tar -xf - -C $MDSFLODER/hadoop --strip=$SLEVEL cp $PSMFILE $MDSFLODER/$DSCONFNAME/$DSMASTERNAME cp $PSSFILE $MDSFLODER/$DSCONFNAME/$DSSLAVESNAME cat ${bin}/$MDSBINLIST | xargs -i cp ${bin}/{} $MDSFLODER/$DMEBINNAME declare -i SLEVEL=$(echo $MDSFLODER | tr '/' '\n' | sed '/^$/d' | wc -l) tar -czf ${MDSFLODER}.tar.gz ${MDSFLODER} &> /dev/null tar -czf ${SDSFLODER}.tar.gz ${SDSFLODER} &> /dev/null #echo "---------------------" #echo "The Data Server List" #echo "---------------------" #cat $DSRVFILE #echo "---------------------" function formatNode { node=$1 curPlat=$2 nodeIP=${node%%:*} dataServers=($(echo ${node#*:} | tr ':' ' ')) nodeRCFile=$PSDKLOC/parasecrc_${nodeIP} scp -o LogLevel=quiet $nodeRCFile $nodeIP:$RCFILENAME # Delete exist commands if necessary CLNUM=$(ssh -o LogLevel=quiet $nodeIP "grep -n \"source \\\$HOME/${RCFILENAME}\" \$HOME/.bashrc" | cut -d':' -f1) if [ "$CLNUM" != "" ]; then ssh -o LogLevel=quiet $nodeIP "sed -i \"${CLNUM}d\" \$HOME/.bashrc" fi #source .parasecrc file in the .bashrc, to set all required environment case ${curPlat} in # Process linux and linux64 with same statements linux ) # Execute the .parasecrc directly after processing .secondorc SCNUM=$(ssh -o LogLevel=quiet $nodeIP "grep -n \"source [\\\$A-Z_.\\\/]*secondorc [\\\$a-zA-Z_.\\\/]*\" \$HOME/.bashrc" | cut -d':' -f1) if [ "$SCNUM" == "" ]; then SCNUM=1 else let SCNUM++ fi ssh -o LogLevel=quiet $nodeIP "sed -i \"${SCNUM}isource \\\$HOME/${RCFILENAME}\" \$HOME/.bashrc" ;; mac_osx ) ssh -o LogLevel=quiet $nodeIP "echo \"source \\\$HOME/$RCFILENAME\" >> \$HOME/.bashrc" echo "Note!!The $HOME/.bashrc file must be executed in Mac OS X shell prompt" >&2 echo -e "This step has to be done manually by users.\n\n" >&2 ;; esac declare -i sidx=0 for dserv in ${dataServers[*]}; do dserv="$dserv/$USER" #Use $USER to different users' data servers on a same cluster node ssh -o LogLevel=quiet $nodeIP "mkdir -p $dserv" if [ $sidx -eq 0 ]; then # This is the main data server on the current node ARVPATH=${MDSFLODER} else # This is a normal data server on the current node ARVPATH=${SDSFLODER} fi ARVNAME=${ARVPATH##*/} scp -o LogLevel=quiet ${ARVPATH}.tar.gz $nodeIP:$dserv ssh -o LogLevel=quiet $nodeIP "tar -xmf $dserv/${ARVNAME}.tar.gz -C $dserv/ --strip=$SLEVEL " #Copy SecondoConfig.ini files if [ $sidx -eq 0 ]; then FILES=($(ls $PSDKLOC/$DBCONFILE.$nodeIP.*)) for file in ${FILES[*]}; do port=${file##*.} scp -o LogLevel=quiet $PSDKLOC/$DBCONFILE.$nodeIP.$port $nodeIP:$dserv/conf/${DBCONFILE}.$port done fi let sidx++ done } CURPLAT=$(echo $SECONDO_PLATFORM | sed 's/[0-9]*$//') if [ "$CURPLAT" != "linux" -a "$CURPLAT" != "mac_osx" ]; then echo "${ERRORINFO}Unknown Platform, Check your \$SECONDO_PLATFORM definition." echo "Installation for Parallel Secondo Fails. " exit 1 fi ANODES=($(cat $DSRVFILE | tr '\n' ' ')) NDNUM=${#ANODES[*]} aJobs=() echo "Start to distribute data servers on the cluster ..." for ((NDIDX=0;NDIDX<$NDNUM;));do node=${ANODES[$NDIDX]} for ((token=1;token<=$PS_PIPE_Width;toekn++));do if [ $NDIDX -ge $NDNUM ];then break fi if [ ! "${aJobs[$token]}" ] || ! kill -0 ${aJobs[$token]} 2>/dev/null; then formatNode $node $CURPLAT & aJobs[$token]=$! let NDIDX++ break fi done done for ((token=1;token<$PS_PIPE_Width;)); do if ! kill -0 ${aJobs[$token]} 2>/dev/null; then let token++ fi done # 7. Set up Master Mini Secondo Configuration if NS4Master is set NS4Master=($(readPSConf "Options" "NS4Master" "=")) NS4Master=$(trim ${NS4Master[0]}) if [ "$NS4Master" = "true" ]; then # Make sure the current node is the master node if ! $(oneOf "$MIP" $(get_localIPs)) ; then echo "${ERRORINFO}The current node is not the master node, cannot set its Secondo as the master mini Secondo." echo "Installation for Parallel Secondo Fails. " exit -1 fi #Set up Secondo Configurations # a. Change Secondo Home and Port SHPos=($(grep -n "^[ ]*$PT_SECHOME[ ]*=" $MDBCONF | cut -d':' -f1)) SHPos=${SHPos[$(( ${#SHPos[@]} - 1 ))]} SHConf=$(sed -n "${SHPos}p" $MDBCONF) PSHLoc=("$(trim ${SHConf#*=})" "$SECONDO_PARAM_SecondoHome" "$HOME/secondo-databases") for loc in ${PSHLoc[*]}; do if [ -d $loc ]; then MSHLoc=$loc echo "The master Secondo Home is set as $MSHLoc" break fi done if [ "$MSHLoc" = "" ]; then echo "${ERRORINFO}Cannot find the master Secondo Home." echo "Installation for Parallel Secondo Fails. " exit -1 fi setSecConf $MDBCONF $PT_SECHOME $MSHLoc MSPtNum=$(echo $MasterDS | cut -d':' -f3) SPtPos=($(grep -n "^[ ]*$PT_SECPORT[ ]*=" $MDBCONF | cut -d':' -f1)) SPtPos=${SPtPos[$(( ${#SPtPos[@]} - 1 ))]} CPtConf=$(sed -n "${SPtPos}p" $MDBCONF) CptNum=$(trim ${CPtConf#*=}) if [ "$CptNum" != "" ]; then if [ $CptNum -ne $MSPtNum ]; then echo "${WARNINFO}The current Secondo port number $CptNum will be overlapped by the master Secondo port $MSPtNum." fi fi setSecConf $MDBCONF $PT_SECPORT $MSPtNum # b. Add Parallel Secondo Configurations setSecConf $MDBCONF $PT_LIPADDR $MIP $PSCST MPSFSLOC=$(head -1 $PSMFILE | cut -d':' -f2) setSecConf $MDBCONF $PT_PSFSNME $MPSFSLOC $PSCST # Delete the backup file in MacOSX if [ "$SECONDO_PLATFORM" = "mac_osx" ]; then rm "${MDBCONF}.bak" fi #Replace the msec with a link MMSECLOC=${MPSFSLOC%/*}/$DSMSECNAME rm -rf $MMSECLOC ln -s $SECONDO_BUILD_DIR $MMSECLOC echo "The $SECONDO_BUILD_DIR is used to be the master mini Secondo of the parallel Secondo." fi echo -e "\n\n\n\n" #Set the environment variables in .parasecrc source $HOME/.bashrc #Format the namenode of Hadoop hadoop namenode -format echo -e "\n\n\n************************************************" echo "Congratulations! The parallel Secondo has been correctly set up on your cluster." cat $DSRVFILE echo "************************************************" exit 0