# This file is part of SECONDO. # Copyright (C) 2004, University in Hagen, Department of Computer Science, # Database Systems for New Applications. # SECONDO is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # SECONDO is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with SECONDO; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # --- Parallel Secondo Configuration --- # This file is specially prepared for Parallel Secondo Configuration # # Note: every parameter explained below can be overruled by an environment # variable with name SECONDO_PARAM_, # e.g.: export SECONDO_PARAM_SecondoHome="/tmp/databases" # If a configuration is denoted with an equal sign, then this value is set by its value only # If it is denoted with a += sign, then it is set by a sequence of values [Secondo] # Settings in this environment are not provided by Parallel Secondo yet. # Here set parameters for Secondo configurations of all data servers. # All values lists here will be inserted into all data servers' SecondoConfig.ini file # and be distributed later with ps-secondo-buildMini script # Each parameter is set as : [Environment]:[title] = [value] # e.g.: if BerkeleyDB:CacheSize = 65536 is set here, then all data servers' Secondo databases are # set with cache space of 64MB # Nontheless, parameters about parallel Secondo, # including SecondoFilePath, Environment:SecondoPort, Environment:SecondoHome etc are not allowed be set here, # or else interferences may happen on data servers share a same cluster node. [Hadoop] # This section is used to set configurations in Hadoop, # based on which the parallel Secondo is built up. # A option configuration is made by two parts: [fileName]:[title] = [value] # At present, four files in $HADOOP_HOME/conf/ can be recognized and edited # hadoop-env.sh , core-site.xml, hdfs-site.xml and mapred-site.xml # On every cluster node, its $HADOOP_HOME and HDFS path always be set at its first data server path $PARALLEL_SECONDO_MAINDS # e.g.: in Mac OSX 10.5, it should be set as hadoop-env.sh:JAVA_HOME = /System/Library/Frameworks/JavaVM.framework/Versions/1.6.0/Home hadoop-env.sh:JAVA_HOME = /usr/lib/jvm/java-6-openjdk # Following parameters are specially prepared for parallel Secondo, # Except for advanced users, changing them is unnecessary hadoop-env.sh:HADOOP_OPTS += -Dhostname=$HOSTNAME hadoop-env.sh:HADOOP_OPTS += -Dhome=$HOME hadoop-env.sh:HADOOP_OPTS += -Duser=$USER hadoop-env.sh:HADOOP_OPTS += -Dmpath=$PARALLEL_SECONDO_MAINDS hadoop-env.sh:HADOOP_OPTS += -Djava.net.preferIPv4Stack=true hadoop-env.sh:HADOOP_OPTS += -Djavax.xml.parsers.DocumentBuilderFactory=com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl core-site.xml:hadoop.tmp.dir = /tmp/hadoop-${hostname}-${user} hdfs-site.xml:dfs.name.dir = ${mpath}/HDFS/NAME hdfs-site.xml:dfs.data.dir = ${mpath}/HDFS/DATA # By default, following parameters are prepared for # deploying Parallel Secondo on a single computer with Ubuntu system. # If you need to deploy it to a computer cluster, then change the host name localhost # to the IP address of the node indicated as the master node of the parallel Secondo # If the cluster is shared by several users, # then setting different service port numbers for each of them. core-site.xml:fs.default.name = hdfs://localhost:49000 hdfs-site.xml:dfs.http.address = localhost:50070 hdfs-site.xml:dfs.secondary.http.address = localhost:50090 mapred-site.xml:mapred.job.tracker = localhost:49001 mapred-site.xml:mapred.job.tracker.http.address = localhost:50030 # Setting how many map/reduce tasks can run in parallel on one node of the cluster # Normally it is the two times of the core number # it is set as 12 since our cluster has six processor cores in every computer mapred-site.xml:mapred.tasktracker.map.tasks.maximum = 12 mapred-site.xml:mapred.tasktracker.reduce.tasks.maximum = 12 mapred-site.xml:mapred.reduce.parallel.copies = 12 mapred-site.xml:mapred.task.timeout = 6000000 # hdfs-site.xml:dfs.replication = 1 # Following parameters are prepared only for several users sharing a cluster. # If so, then setting different service port numbers are required for different users. # hdfs-site.xml:dfs.datanode.address = 0.0.0.0:50010 # hdfs-site.xml:dfs.datanode.http.address = 0.0.0.0:50075 # hdfs-site.xml:dfs.datanode.ipc.address = 0.0.0.0:50020 # hdfs-site.xml:dfs.datanode.https.address = 0.0.0.0:50475 # hdfs-site.xml:dfs.https.address = 0.0.0.0:50470 # mapred-site.xml:mapred.task.tracker.http.address = 0.0.0.0:51060 [Cluster] # Each node is defined by three parameters: IP address, server path and Secondo Port # delimited by a colon. # The IP address cannot be replaced by host names or loopback address, # also the server path must be absolute paths. # Ports and data server folders must be different from each other. # e.g.: Master = 192.168.0.1:/Home/DataServer:1234 # Only one node can be set as the master data server in parallel Secondo, # if the Master is set several times, then only the latest configuration is available # Slave nodes are set by using the '+=' mark, and can be set for several times # In principle, slaves should be different from each other # A master can also be set as a slave. # Master = 192.168.0.1:/Home/dataServer1:11234 # Slaves += 192.168.0.1:/Home/dataServer2:12234 # Slaves += 192.168.0.1:/Home/dataServer3:13234 [Options] # Here defines some special configuration for parallel Secondo # Each defines as [title] = [value] # NS4Master is the abbreviation for "Normal Secondo for Master Mini Secondo" # If this feature is set as true, then user's normal Secondo database will be # used as the master in parallel Secondo. # NS4Master = true