#!/bin/bash

# This script generates a BerlinMOD data set in parallel on a cluster. 
# It can only run after Parallel Secondo has been correctly installed. 
# Besides, it needs following prerequisites: 
#   * Distribute the data files streets, homeRegions and workRegions to the cluster. 
#	* A Hadoop program named GenMOD.jar
# 	* A set of Secondo scripts, include: 
#		- BerlinMOD_DataGenerator_map.SEC		(Generate data on slaves in Map stage)
#		- BerlinMOD_DataGenerator_reduce.SEC	(Generate data on slaves in Reduce stage)
#		- BerlinMOD_DataGenerator_master1.SEC	(Set global parameters on the master database)
#		- BerlinMOD_DataGenerator_master2.SEC	(Collect distributed data on the master at last)
#	* This script must run on the master node of the cluster. 

bin=`dirname "$0"`
bin=`cd "$bin"; pwd`

WARNINFO="Warning !! "
ERRORINFO="ERROR !! "


DBNAME="berlinmod"
SCALEFACTOR=0.01
P_NUMDAYS=0
GALONE=false

# 0. Process the arguments. 
declare -i numOfArgs=$#
let numOfArgs++

while [ $numOfArgs -ne $OPTIND ]; do
  getopts "hd:s:p:l" optKey
  if [ "$optKey" == "?" ]; then
    optKey="h"
  fi
  
  case $optKey in 
    h)
    	echo -en "Usage of ${0##*/}:\n\n"
    	echo -en "	-h Print this message and exit. \n\n"
    	echo -en "	-d Set the created Database name (default) ${DBNAME} \n\n"
    	echo -en "	-s Set the Scale factor of the data set (default) ${SCALEFACTOR} \n\n"
    	echo -en "	-p Set the Period of simulation by days (default) NULL \n\n"
    	echo -en "	-l Generate the data set Lonely on one computer (default) ${GALONE} \n\n "
        exit 0
    ;;
    d)
    	DBNAME="${OPTARG}"
    	if [ -z "${DBNAME}" ]; then
  		  echo -en "${ERRORINFO}The database name cannot be empty. \n\n"
          exit -1
        fi
    ;;
    s)
    	SCALEFACTOR=${OPTARG}
    	CHK=$(echo ${SCALEFACTOR/./} | grep "^[0-9]*$")
    	if [ -z "$CHK" ]; then
    	  echo -en "${ERRORINFO}The input scale factor ${SCALEFACTOR} is invalid.\n\n"
    	  exit -1
    	elif [ "${CHK}" = "${SCALEFACTOR}" ]; then
    	  echo -en "${ERRORINFO}The scale factor ${SCALEFACTOR} should be a real number.\n\n"
    	  exit -1
    	fi
    ;;
    p)
    	P_NUMDAYS=${OPTARG}
    	CHK=$(echo ${P_NUMDAYS/./} | grep "^[0-9]*$")
    	if [ -z "$CHK" ]; then
    	  echo -en "${ERRORINFO}The simulate period ${P_NUMDAYS} is invalid.\n\n"
    	  exit -1
    	elif [ "${CHK}" != "${P_NUMDAYS}" ]; then
    	  echo -en "${ERRORINFO}The simulate period ${P_NUMDAYS} should be an integer.\n\n"
    	  exit -1
    	elif [ ${P_NUMDAYS} -le 0 ]; then
    	  echo -en "${ERRORINFO}The simulate period ${P_NUMDAYS} should at least be one day.\n\n"
    	  exit -1	
    	fi
    ;;
    l)
    	GALONE=true
    ;;
  esac
done

# Check the installation of Parallel SECONDO
if [ ! -d ${PARALLEL_SECONDO_MAINDS}/bin ]; then
  echo -en "${ERRORINFO}The Parallel Secondo is not correctly installed. \n\n"
  exit -1
fi
source ${PARALLEL_SECONDO_MAINDS}/bin/ps-functions


SUBDBNAME="ps${DBNAME}"
if [ ${#SUBDBNAME} -gt 16 ]; then
  echo -en "${ERRORINFO}The database name ${DBNAME} is too long. \n\n"
  exit -1
fi

tcmd=$(which ps-startTTYCS)
if [ $? -ne 0 ]; then
  echo -en "${ERRORINFO}The Parallel Secondo is not correctly installed. \n\n"
  exit -1
fi

if [ ! -f "${bin}/BerlinMOD_DataGenerator_master1.SEC" ] \
 || [ ! -f "${bin}/BerlinMOD_DataGenerator_master2.SEC" ] \
 || [ ! -f "${bin}/BerlinMOD_DataGenerator_map.SEC" ] \
 || [ ! -f "${bin}/BerlinMOD_DataGenerator_reduce.SEC" ] \
 || [ ! -f "${bin}/GenMOD.jar" ]; then
  echo -en "${ERRORINFO}Cannot find SECONDO scripts and the hadoop program. \n\n"
  exit -1
fi

CONFFILE=${PARALLEL_SECONDO_MAINDS}/${PARALLEL_SECONDO_MINI_NAME}/bin/SecondoConfig.ini
if [ ! -f $CONFFILE ]; then
  echo -en "${ERRORINFO}The configuration of the Parallel Secondo is wrong. \n\n"
  exit -1
fi
LFNAME="/tmp/SECMON_$(get_secPort $CONFFILE)"

if ${GALONE} ; then
  if [ -f ${LFNAME} ]; then
    echo -en "${ERRORINFO}Stop the running monitors first if you like generate data on a single computer. \n\n"
    exit -1
  fi
  
  ps-startTTY -s 1 <<< "delete database ${DBNAME};
create database ${DBNAME};
open database ${DBNAME};
let SCALEFACTOR = ${SCALEFACTOR};
close database;
q;"
  
  if [ ${P_NUMDAYS} -gt 0 ]; then
    ps-startTTY -s 1 <<< "open database ${DBNAME};
let P_NUMDAYS = ${P_NUMDAYS};
close database;
q;"
  fi
  
  ps-startTTY -s 1 <<< "open database ${DBNAME};
@@${bin}/BerlinMOD_DataGenerator_map.SEC
@@${bin}/BerlinMOD_DataGenerator_reduce.SEC
close database;
q;"
 
  exit 0
fi

if [ ! -f ${LFNAME} ]; then
  echo -en "${ERRORINFO}The monitors are not started. \n\n"
  exit -1
fi

ps-startTTYCS -s 1 <<< "delete database ${DBNAME};
create database ${DBNAME};
open database ${DBNAME};
let SCALEFACTOR = ${SCALEFACTOR};
close database;
q;"

if [ ${P_NUMDAYS} -gt 0 ]; then
  ps-startTTYCS -s 1 <<< "open database ${DBNAME};
let P_NUMDAYS = ${P_NUMDAYS};
close database;
q;"
fi

ps-startTTYCS -s 1 <<< "open database ${DBNAME};
@@${bin}/BerlinMOD_DataGenerator_master1.SEC
close database;
q;"

if [ $? -ne 0 ];then
  echo -en "${ERROR}Initialization on the master DS fails.\n\n"
  exit -1
fi

hadoop jar ${bin}/GenMOD.jar GenMOD ${SUBDBNAME} ${SCALEFACTOR} ${P_NUMDAYS} ${bin}/BerlinMOD_DataGenerator_map.SEC ${bin}/BerlinMOD_DataGenerator_reduce.SEC

if [ $? -ne 0 ];then
  echo -en "${ERROR}The Hadoop program fails.\n\n"
  exit -1
fi

RESULTFILE=$bin/.genResult
hadoop dfs -cat OUTPUT-MOD/p* > ${RESULTFILE}
RESULT="("
while read ROW DS COLUMN; do
  RESULT="${RESULT}(${ROW} ${DS} ${COLUMN} '') "
done < ${RESULTFILE}
RESULT="${RESULT})"

ps-startTTYCS -s 1 <<< "open database ${DBNAME};
let locRel = [const rel(tuple([Row:int, DS:int, Column:int, Path:text])) value ${RESULT} ];
@@${bin}/BerlinMOD_DataGenerator_master2.SEC
close database;
q;"