199 lines
5.6 KiB
Bash
199 lines
5.6 KiB
Bash
#!/bin/bash
|
|
|
|
# This script generates a BerlinMOD data set in parallel on a cluster.
|
|
# It can only run after Parallel Secondo has been correctly installed.
|
|
# Besides, it needs following prerequisites:
|
|
# * Distribute the data files streets, homeRegions and workRegions to the cluster.
|
|
# * A Hadoop program named GenMOD.jar
|
|
# * A set of Secondo scripts, include:
|
|
# - BerlinMOD_DataGenerator_map.SEC (Generate data on slaves in Map stage)
|
|
# - BerlinMOD_DataGenerator_reduce.SEC (Generate data on slaves in Reduce stage)
|
|
# - BerlinMOD_DataGenerator_master1.SEC (Set global parameters on the master database)
|
|
# - BerlinMOD_DataGenerator_master2.SEC (Collect distributed data on the master at last)
|
|
# * This script must run on the master node of the cluster.
|
|
|
|
bin=`dirname "$0"`
|
|
bin=`cd "$bin"; pwd`
|
|
|
|
WARNINFO="Warning !! "
|
|
ERRORINFO="ERROR !! "
|
|
|
|
|
|
DBNAME="berlinmod"
|
|
SCALEFACTOR=0.01
|
|
P_NUMDAYS=0
|
|
GALONE=false
|
|
|
|
# 0. Process the arguments.
|
|
declare -i numOfArgs=$#
|
|
let numOfArgs++
|
|
|
|
while [ $numOfArgs -ne $OPTIND ]; do
|
|
getopts "hd:s:p:l" optKey
|
|
if [ "$optKey" == "?" ]; then
|
|
optKey="h"
|
|
fi
|
|
|
|
case $optKey in
|
|
h)
|
|
echo -en "Usage of ${0##*/}:\n\n"
|
|
echo -en " -h Print this message and exit. \n\n"
|
|
echo -en " -d Set the created Database name (default) ${DBNAME} \n\n"
|
|
echo -en " -s Set the Scale factor of the data set (default) ${SCALEFACTOR} \n\n"
|
|
echo -en " -p Set the Period of simulation by days (default) NULL \n\n"
|
|
echo -en " -l Generate the data set Lonely on one computer (default) ${GALONE} \n\n "
|
|
exit 0
|
|
;;
|
|
d)
|
|
DBNAME="${OPTARG}"
|
|
if [ -z "${DBNAME}" ]; then
|
|
echo -en "${ERRORINFO}The database name cannot be empty. \n\n"
|
|
exit -1
|
|
fi
|
|
;;
|
|
s)
|
|
SCALEFACTOR=${OPTARG}
|
|
CHK=$(echo ${SCALEFACTOR/./} | grep "^[0-9]*$")
|
|
if [ -z "$CHK" ]; then
|
|
echo -en "${ERRORINFO}The input scale factor ${SCALEFACTOR} is invalid.\n\n"
|
|
exit -1
|
|
elif [ "${CHK}" = "${SCALEFACTOR}" ]; then
|
|
echo -en "${ERRORINFO}The scale factor ${SCALEFACTOR} should be a real number.\n\n"
|
|
exit -1
|
|
fi
|
|
;;
|
|
p)
|
|
P_NUMDAYS=${OPTARG}
|
|
CHK=$(echo ${P_NUMDAYS/./} | grep "^[0-9]*$")
|
|
if [ -z "$CHK" ]; then
|
|
echo -en "${ERRORINFO}The simulate period ${P_NUMDAYS} is invalid.\n\n"
|
|
exit -1
|
|
elif [ "${CHK}" != "${P_NUMDAYS}" ]; then
|
|
echo -en "${ERRORINFO}The simulate period ${P_NUMDAYS} should be an integer.\n\n"
|
|
exit -1
|
|
elif [ ${P_NUMDAYS} -le 0 ]; then
|
|
echo -en "${ERRORINFO}The simulate period ${P_NUMDAYS} should at least be one day.\n\n"
|
|
exit -1
|
|
fi
|
|
;;
|
|
l)
|
|
GALONE=true
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# Check the installation of Parallel SECONDO
|
|
if [ ! -d ${PARALLEL_SECONDO_MAINDS}/bin ]; then
|
|
echo -en "${ERRORINFO}The Parallel Secondo is not correctly installed. \n\n"
|
|
exit -1
|
|
fi
|
|
source ${PARALLEL_SECONDO_MAINDS}/bin/ps-functions
|
|
|
|
|
|
SUBDBNAME="ps${DBNAME}"
|
|
if [ ${#SUBDBNAME} -gt 16 ]; then
|
|
echo -en "${ERRORINFO}The database name ${DBNAME} is too long. \n\n"
|
|
exit -1
|
|
fi
|
|
|
|
tcmd=$(which ps-startTTYCS)
|
|
if [ $? -ne 0 ]; then
|
|
echo -en "${ERRORINFO}The Parallel Secondo is not correctly installed. \n\n"
|
|
exit -1
|
|
fi
|
|
|
|
if [ ! -f "${bin}/BerlinMOD_DataGenerator_master1.SEC" ] \
|
|
|| [ ! -f "${bin}/BerlinMOD_DataGenerator_master2.SEC" ] \
|
|
|| [ ! -f "${bin}/BerlinMOD_DataGenerator_map.SEC" ] \
|
|
|| [ ! -f "${bin}/BerlinMOD_DataGenerator_reduce.SEC" ] \
|
|
|| [ ! -f "${bin}/GenMOD.jar" ]; then
|
|
echo -en "${ERRORINFO}Cannot find SECONDO scripts and the hadoop program. \n\n"
|
|
exit -1
|
|
fi
|
|
|
|
CONFFILE=${PARALLEL_SECONDO_MAINDS}/${PARALLEL_SECONDO_MINI_NAME}/bin/SecondoConfig.ini
|
|
if [ ! -f $CONFFILE ]; then
|
|
echo -en "${ERRORINFO}The configuration of the Parallel Secondo is wrong. \n\n"
|
|
exit -1
|
|
fi
|
|
LFNAME="/tmp/SECMON_$(get_secPort $CONFFILE)"
|
|
|
|
if ${GALONE} ; then
|
|
if [ -f ${LFNAME} ]; then
|
|
echo -en "${ERRORINFO}Stop the running monitors first if you like generate data on a single computer. \n\n"
|
|
exit -1
|
|
fi
|
|
|
|
ps-startTTY -s 1 <<< "delete database ${DBNAME};
|
|
create database ${DBNAME};
|
|
open database ${DBNAME};
|
|
let SCALEFACTOR = ${SCALEFACTOR};
|
|
close database;
|
|
q;"
|
|
|
|
if [ ${P_NUMDAYS} -gt 0 ]; then
|
|
ps-startTTY -s 1 <<< "open database ${DBNAME};
|
|
let P_NUMDAYS = ${P_NUMDAYS};
|
|
close database;
|
|
q;"
|
|
fi
|
|
|
|
ps-startTTY -s 1 <<< "open database ${DBNAME};
|
|
@@${bin}/BerlinMOD_DataGenerator_map.SEC
|
|
@@${bin}/BerlinMOD_DataGenerator_reduce.SEC
|
|
close database;
|
|
q;"
|
|
|
|
exit 0
|
|
fi
|
|
|
|
if [ ! -f ${LFNAME} ]; then
|
|
echo -en "${ERRORINFO}The monitors are not started. \n\n"
|
|
exit -1
|
|
fi
|
|
|
|
ps-startTTYCS -s 1 <<< "delete database ${DBNAME};
|
|
create database ${DBNAME};
|
|
open database ${DBNAME};
|
|
let SCALEFACTOR = ${SCALEFACTOR};
|
|
close database;
|
|
q;"
|
|
|
|
if [ ${P_NUMDAYS} -gt 0 ]; then
|
|
ps-startTTYCS -s 1 <<< "open database ${DBNAME};
|
|
let P_NUMDAYS = ${P_NUMDAYS};
|
|
close database;
|
|
q;"
|
|
fi
|
|
|
|
ps-startTTYCS -s 1 <<< "open database ${DBNAME};
|
|
@@${bin}/BerlinMOD_DataGenerator_master1.SEC
|
|
close database;
|
|
q;"
|
|
|
|
if [ $? -ne 0 ];then
|
|
echo -en "${ERROR}Initialization on the master DS fails.\n\n"
|
|
exit -1
|
|
fi
|
|
|
|
hadoop jar ${bin}/GenMOD.jar GenMOD ${SUBDBNAME} ${SCALEFACTOR} ${P_NUMDAYS} ${bin}/BerlinMOD_DataGenerator_map.SEC ${bin}/BerlinMOD_DataGenerator_reduce.SEC
|
|
|
|
if [ $? -ne 0 ];then
|
|
echo -en "${ERROR}The Hadoop program fails.\n\n"
|
|
exit -1
|
|
fi
|
|
|
|
RESULTFILE=$bin/.genResult
|
|
hadoop dfs -cat OUTPUT-MOD/p* > ${RESULTFILE}
|
|
RESULT="("
|
|
while read ROW DS COLUMN; do
|
|
RESULT="${RESULT}(${ROW} ${DS} ${COLUMN} '') "
|
|
done < ${RESULTFILE}
|
|
RESULT="${RESULT})"
|
|
|
|
ps-startTTYCS -s 1 <<< "open database ${DBNAME};
|
|
let locRel = [const rel(tuple([Row:int, DS:int, Column:int, Path:text])) value ${RESULT} ];
|
|
@@${bin}/BerlinMOD_DataGenerator_master2.SEC
|
|
close database;
|
|
q;"
|