Files
secondo/Algebras/Hadoop/Java/ParallelSecondo/PS_HadoopReduce2_HDJ_QMap.java
2026-01-23 17:03:45 +08:00

400 lines
13 KiB
Java

package ParallelSecondo;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Scanner;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import sj.lang.ListExpr;
public class PS_HadoopReduce2_HDJ_QMap
extends Mapper<LongWritable, Text, IntWritable, BytesWritable>
implements Constant
{
Map<String, Integer> paraNames;
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException,
InterruptedException {
String parameters[] = value.toString().split(inDim);
int mapperIdx = Integer.parseInt(parameters[0]);
String databaseName = parameters[1];
String AcceptFileNameList = parameters[6];
String AcceptFileLocList = parameters[7];
String AcceptDLOName = parameters[8];
String AcceptDLOLoc = parameters[9];
//------------------------------------------------------------------
String[] InputObjectName = { parameters[12], parameters[21]} ;
int[] slaveIdx =
{ Integer.parseInt(parameters[13]), Integer.parseInt(parameters[22])};
int[] duplicateTimes =
{ Integer.parseInt(parameters[14]), Integer.parseInt(parameters[23])};
String[] PAName = { parameters[15], parameters[24]} ;
String[] mapCreateQuery = { parameters[16], parameters[25]} ;
String[] mapFileName = { parameters[17], parameters[26]} ;
String[] mapFileLoc = { parameters[18], parameters[27]} ;
ListExpr recvFileList = new ListExpr();
recvFileList.readFromString(AcceptFileLocList);
String[] interResultName = {
"P" + 1 + "_" + context.getJobName(),
"P" + 2 + "_" + context.getJobName()};
int secondoSlaveIdx = mapperIdx + 1;
String slFile = System.getenv().get("PARALLEL_SECONDO_SLAVES");
if (slFile == null)
throw new RuntimeException(
"Undefined PARALLEL_SECONDO_SLAVES in " +
InetAddress.getLocalHost().getHostAddress());
List<PSNode> slaves = new ArrayList<PSNode>();
try {
Scanner scanner;
scanner = new Scanner(new FileInputStream(slFile));
int lineNum = 0;
while (scanner.hasNextLine()){
String[] line = scanner.nextLine().split(sysDim);
slaves.add(new PSNode(lineNum++, line[0], line[1],
Integer.parseInt(line[2])));
}
} catch (FileNotFoundException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
String mapperIPAddr = slaves.get(mapperIdx).getIpAddr();
int mapperPortNum = slaves.get(mapperIdx).getPortNum();
QuerySecondo secEntity = new QuerySecondo();
ListExpr resultList = new ListExpr();
try
{
// First, process the map query like the QMap class
// Output the type file, and then use the ~doubleexport~ and ~send~
// operators to deliver the result into HDFS.
String[] SIDESTREAMNL = {"XXXSIDESTREAM1", "XXXSIDESTREAM2"};
String[] SIDEKeyNAME = {"XXXSIDEKEY1", "XXXSIDEKEY2"};
//Create an empty stream, in case one side tuple stream doesn't exist.
ListExpr emptyStream = ListExpr.threeElemList(ListExpr.symbolAtom("head"),
ListExpr.twoElemList(ListExpr.symbolAtom("transformstream"),
ListExpr.threeElemList(ListExpr.symbolAtom("intstream"),
ListExpr.intAtom(0), ListExpr.intAtom(0))),
ListExpr.intAtom(0));
ListExpr emptyStreamPA = ListExpr.symbolAtom("Elem");
int sendPort = HPA_AuxFunctions.getPort();
ListExpr comMapQuery = ListExpr.twoElemList(ListExpr.symbolAtom("query"),
ListExpr.fourElemList(ListExpr.symbolAtom("send"),
ListExpr.fiveElemList(ListExpr.symbolAtom("doubleexport"),
ListExpr.symbolAtom(SIDESTREAMNL[0]),
ListExpr.symbolAtom(SIDESTREAMNL[1]),
ListExpr.symbolAtom(SIDEKeyNAME[0]),
ListExpr.symbolAtom(SIDEKeyNAME[1])),
ListExpr.intAtom(sendPort),
ListExpr.symbolAtom("KeyT")));
ListExpr InterSymbol = ListExpr.symbolAtom(QUERYNLSTR);
ListExpr[] mapQuery =
{ListExpr.theEmptyList(), ListExpr.theEmptyList()};
secEntity.open(mapperIPAddr, databaseName, mapperPortNum, true);
for (int side = 0; side < 2; side++)
{
ListExpr typeMapQuery = ListExpr.twoElemList(ListExpr.symbolAtom("query"),
ListExpr.fiveElemList(ListExpr.symbolAtom("fconsume"),
ListExpr.threeElemList(ListExpr.symbolAtom("head"),
InterSymbol,
ListExpr.intAtom(0)),
ListExpr.twoElemList(ListExpr.stringAtom(interResultName[side]),
ListExpr.textAtom("")),
ListExpr.oneElemList(ListExpr.intAtom(0)),
ListExpr.theEmptyList()));
boolean replaced = true, isInputFile = true;
ListExpr inputStream = null;
ListExpr mapQueryList = new ListExpr(),
mapDLFNameList = new ListExpr(),
mapDLFLocsList = new ListExpr();
mapQueryList.readFromString(mapCreateQuery[side]);
mapDLFNameList.readFromString(mapFileName[side]);
mapDLFLocsList.readFromString(mapFileLoc[side]);
if (!mapQueryList.isEmpty())
{
//Embed the unexecuted map query
if (!mapDLFNameList.isEmpty())
{
inputStream = mapDLFNameList.first();
typeMapQuery = ExtListExpr.replace(typeMapQuery, InterSymbol, mapQueryList);
typeMapQuery = HPA_AuxFunctions.loc2Ffeed(typeMapQuery,
ListExpr.oneElemList(inputStream), mapDLFLocsList,
duplicateTimes[side]);
replaced = (!typeMapQuery.isEmpty());
}
else
{
//Impossible happens
throw new IOException("Error in an unexpected brach.");
}
}
else
{
//Follows the old way
if (InputObjectName[side].matches(INDLFPattern))
{
inputStream = ListExpr.stringAtom(InputObjectName[side]);
typeMapQuery = ExtListExpr.replace(typeMapQuery, InterSymbol, inputStream);
ListExpr mapFileLocList = null;
if (side == 0)
mapFileLocList = recvFileList.first();
else
mapFileLocList = recvFileList.second();
typeMapQuery = HPA_AuxFunctions.loc2Ffeed(
typeMapQuery, ListExpr.oneElemList(inputStream),
ListExpr.oneElemList(mapFileLocList),
duplicateTimes[side]);
replaced = (!mapFileLocList.isEmpty());
}
else
{
isInputFile = false;
inputStream = ListExpr.twoElemList(ListExpr.symbolAtom("feed"),
ListExpr.symbolAtom(InputObjectName[side]));
ListExpr omnList = new ListExpr();
ListExpr omlList = new ListExpr();
omnList.readFromString(AcceptDLOName);
omlList.readFromString(AcceptDLOLoc);
boolean isObjExist = HPA_AuxFunctions.objectExist(InputObjectName[side], omnList, omlList);
if (isObjExist){
typeMapQuery = ExtListExpr.replace(typeMapQuery, InterSymbol, inputStream);
}
else{
typeMapQuery = ListExpr.theEmptyList();
}
}
}
if (!typeMapQuery.isEmpty())
{
secEntity.query(typeMapQuery.toString(), resultList);
//Replace the input stream
comMapQuery = ExtListExpr.replace(comMapQuery,
ListExpr.symbolAtom(SIDESTREAMNL[side]),
typeMapQuery.second().second().second());
//Replace the Partition attribute (Key)
comMapQuery = ExtListExpr.replace(comMapQuery,
ListExpr.symbolAtom(SIDEKeyNAME[side]),
ListExpr.symbolAtom(PAName[side]));
}
else
{
//Replace the input stream
comMapQuery = ExtListExpr.replace(comMapQuery,
ListExpr.symbolAtom(SIDESTREAMNL[side]),
emptyStream);
//Replace the Partition attribute (Key)
comMapQuery = ExtListExpr.replace(comMapQuery,
ListExpr.symbolAtom(SIDEKeyNAME[side]),
emptyStreamPA);
}
}
secEntity.close();
// ----------------------------------------------------------------------
//Remove duplicate parameter names, since the two input stream is
//created independently.
comMapQuery = rdupParameters(comMapQuery);
// Second, use the doubleexport operation to send the result into HDFS
String[] queries = {comMapQuery.toString()};
SecExRunnable sender =
new SecExRunnable(mapperIPAddr, databaseName, mapperPortNum, queries);
if (!sender.isInitialized())
throw new RemoteStreamException(
"Error! Exception while setting up sender in : " + mapperIdx);
Thread senderThread = new Thread(sender);
senderThread.start();
RemoteStream receiver = new RemoteStream("client", mapperIPAddr, sendPort);
receiver.Connect();
if (receiver.getConnected()){
// Read the typeInfo, especially get the key attribute type
String typeInfo = receiver.readLine();
receiver.writeLine("<GET TYPE/>");
int keyLoc = typeInfo.indexOf("APPEND");
if (keyLoc < 0) {
System.err.println("Get typeInfo: " + typeInfo);
throw new IOException("Error: Expect appended key attribute");
}
String keyType = typeInfo.substring(keyLoc + 7);
if (!keyType.equals("string"))
throw new IOException("Error: Expect string type key attribute");
receiver.setKeyType(keyType);
int loadTupleNum = 0;
int sockCount = 0;
int loadTupleSize = 0;
while (!receiver.getTheLastSocket()) {
byte[] tupleBuffer = new byte[RemoteStream.MAX_TUPLESIZE];
int tbOffset = 0;
while (receiver.receiveSocket(tupleBuffer, tbOffset)) {
tbOffset += RemoteStream.SOCKTUP_SIZE;
sockCount++;
}
if (!receiver.getTheLastSocket()) {
sockCount++;
tbOffset += RemoteStream.SOCKTUP_SIZE;
loadTupleNum += LoadTuples(tupleBuffer, tbOffset, context);
loadTupleSize += tbOffset;
}
}
senderThread.join(); //wait until the Secondo thread close
String hostName = InetAddress.getLocalHost().getHostName();
System.out.print(hostName + " through port " + sendPort);
System.out.println(" total got " + loadTupleNum + " tuples");
System.out.println(" total got " + loadTupleSize / (1024*1024) + "MB tuples");
receiver.close();
receiver = null;
}
else{
throw new IOException("Error! Unconnected to sender "
+ mapperIPAddr + ":" + sendPort);
}
}
catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
throw new RemoteStreamException("Catch IOException in Map task");
}
}
private int LoadTuples(byte[] srcBuf, int curPos, Context context)
throws IOException, InterruptedException{
int offset = 0;
int loadTupleNum = 0;
while(offset < curPos) {
//Read key attribute size;
int keySize = RemoteStream.Byte2Int(srcBuf, offset);
if (keySize < 0)
throw new IOException("Error: invalid key attribute size");
else if (keySize == 0)
break;
offset += 4;
//Read key attribute value;
String keyValue = RemoteStream.Byte2String(srcBuf, offset, keySize);
offset += keySize;
//Read tuple block size;
//The tuple block size contains a complete tuple block,
//outputted from Tuple::WriteToBin.
int tupleSize = RemoteStream.Byte2Int(srcBuf, offset);
//short tupleSize = RemoteStream.Byte2UnsignedShort(srcBuf, offset);
//tupleSize += 4/*2*/; //The whole tuple size should contain the short length
BytesWritable byteValue = new BytesWritable();
byteValue.set(srcBuf, offset, tupleSize);
offset += tupleSize;
context.write(new IntWritable(Integer.parseInt(keyValue)), byteValue);
loadTupleNum++;
}
return loadTupleNum;
}
/**
* This function scans the given nested list,
* if it is start with fun, finds out all its parameters,
* if a parameter's name is already used before,
* replace it with a new name.
*
*
* @param queryList
* @return
*/
private ListExpr rdupParameters(ListExpr queryList)
{
if (paraNames == null){
paraNames = new HashMap<String, Integer>();
}
if (queryList.isEmpty())
return ListExpr.theEmptyList();
if (queryList.isAtom())
return queryList;
if (queryList.first().isAtom())
{
if (queryList.first().equals(ListExpr.symbolAtom("fun"))){
//This is a function list
ListExpr rest = queryList.rest();
List<String[]> dupPNPairs = new ArrayList<String[]>();
while (rest.listLength() > 1)
{
//read parameter names
ListExpr parameter = rest.first();
String pName = parameter.first().symbolValue();
if (paraNames.containsKey(pName))
{
int Cnt = paraNames.get(pName);
Cnt++;
String newPName = pName + Cnt;
paraNames.put(pName, Cnt);
String[] pnPairs = {pName, newPName};
dupPNPairs.add(pnPairs);
}
else
{
paraNames.put(pName, 0);
}
rest = rest.rest();
}
if (dupPNPairs.size() > 0)
{
ListIterator<String[]> it = dupPNPairs.listIterator();
while (it.hasNext()){
String[] pnPairs = (String[]) it.next();
ListExpr oldName = ListExpr.symbolAtom(pnPairs[0]);
ListExpr newName = ListExpr.symbolAtom(pnPairs[1]);
queryList = ExtListExpr.replace(queryList, oldName, newName);
}
}
return queryList;
}
}
return ListExpr.cons(rdupParameters(queryList.first()),
rdupParameters(queryList.rest()));
}
}