Files
secondo/Tools/Converter/Csv2Secondo/CSV2Secondo.java
2026-01-23 17:03:45 +08:00

362 lines
10 KiB
Java

//This file is part of SECONDO.
//Copyright (C) 2004, University in Hagen, Department of Computer Science,
//Database Systems for New Applications.
//SECONDO is free software; you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation; either version 2 of the License, or
//(at your option) any later version.
//SECONDO is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
//You should have received a copy of the GNU General Public License
//along with SECONDO; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.io.*;
import java.util.*;
public class CSV2Secondo{
public boolean printType(String Line){
//
neededValues=0;
StringTokenizer ST1 = new StringTokenizer(Line,"\t");
Types= new Vector();
Names = new Vector();
while (ST1.hasMoreTokens()){
String Attr = ST1.nextToken();
StringTokenizer ST2 = new StringTokenizer(Attr," ");
String Name="";
String Type="";
if(ST2.hasMoreTokens())
Name = ST2.nextToken().trim();
else
error("no name in attr:"+Attr);
if(ST2.hasMoreTokens())
Type = ST2.nextToken().trim().toLowerCase();
else
error("Missing Type in Attr"+Attr);
if( !Type.equals("int") & !Type.equals("text") & !Type.equals("real") &
!Type.equals("point") & !Type.equals("string") && !Type.equals("spoint")){
error("unknown type "+Type);
}
neededValues++;
Types.add(Type);
Names.add(Name);
if(Type.equals("point") || Type.equals("spoint")) // this types need two values
neededValues++;
}
// print the type
System.out.println("(rel (tuple ( ");
for(int i=0;i<Types.size();i++){
if(Types.get(i).equals("spoint"))
System.out.println(" ("+Names.get(i)+" point )");
else
System.out.println(" ("+Names.get(i)+" "+Types.get(i)+" )");
}
System.out.println(" )))");
Values = new Vector(Types.size());
return true;
}
private void printScheme(){
System.err.println("Scheme:");
for(int i=0;i<Types.size();i++)
System.err.println(i+" "+Types.get(i)+" : "+Names.get(i));
System.err.println("-------------------");
}
/**
converts a single line into a SECONDO relation tuple.
*/
private boolean printLine(String Line){
if(Line==null)
return false;
// first collect all values in the appropriate String
MyStringTokenizer ST = new MyStringTokenizer(Line,Separators.charAt(0));
Values.clear();
while(ST.hasMoreTokens())
Values.add(ST.nextToken());
if(Values.size()<neededValues){
printScheme();
error("too little values for the given scheme in Line \n"+Line);
}
if(!allowMoreValues && Values.size()>neededValues){
printScheme();
System.err.println("Values="+Values);
System.err.println("neededValue ="+neededValues);
System.err.println("foundValues ="+Values.size());
error("too much values for the given schema in line \n");
}
System.out.print(" ( "); // open the tuple
int ValuePos = 0;
String Value1,Value2;
for(int i=0;i<Types.size();i++){
String Type = (String) Types.get(i);
if(Type.equals("point")|| Type.equals("spoint")){ // we need the next entry
Value1 = ((String)Values.get(ValuePos)).trim();
ValuePos++;
Value2 = ((String) Values.get(ValuePos)).trim();
ValuePos++;
if(Type.equals("point"))
System.out.print("("+Value1+" "+Value2+" ) ");
else
System.out.print("("+Value2+" "+Value1+" ) ");
if(checknumeric){
try{
Double.parseDouble(Value1);
Double.parseDouble(Value2);
}catch(Exception e){
error("error in numric check for coordinate of a point in Line \n"+Line);
}
}
}else if(Type.equals("int")){
Value1 = ((String)Values.get(ValuePos)).replaceAll("[^0-9+-]", "");
ValuePos++;
System.out.print(" "+Value1+" ");
if(checknumeric){
try{
Integer.parseInt(Value1);
}
catch(Exception e){
error("wrong format for an integer in Line\n"+Line);
}
}
}
else if(Type.equals("string")){
Value1=((String)Values.get(ValuePos)).replaceAll("\"", "");
ValuePos++;
if(Value1.length()>MAXSTRINGLENGTH){
System.err.println("Warning: String too long");
System.err.println("Line :"+Line);
System.err.println("Attr :"+Names.get(i));
}
System.out.print(" \""+Value1+"\" ");
}
else if(Type.equals("text")){
Value1 = (String)Values.get(ValuePos);
ValuePos++;
System.out.print(" (<text>"+Value1+"</text--->) ");
}
else if(Type.equals("real")){
Value1=((String)Values.get(ValuePos)).replaceAll("[^0-9+-\\.]", "");
ValuePos++;
if(checknumeric){
try{
Double.parseDouble(Value1);
}
catch(Exception e){
error("wrong format for an integer in Line\n"+Line);
}
}
System.out.print(" "+Value1+" ");
}
else{
error("unknow type found : "+Type);
}
}
System.out.println(" )"); // close tuple
return true;
}
private static void error(String Message){
System.err.println(Message);
System.exit(1);
}
public void convert(String CfgFile,String SourceFile){
Properties Cfg = new Properties();
try{
FileInputStream CfgIn = new FileInputStream(CfgFile);
Cfg.load(CfgIn);
CfgIn.close();
} catch(Exception e){
error("Error in loading configuration");
}
Enumeration Keys = Cfg.propertyNames();
boolean SeparatorFound = false;
boolean SchemeFound = false;
boolean ObjectFound = false;
String Key;
// check for Separator and Scheme key
while(Keys.hasMoreElements()){
Key = (String) Keys.nextElement();
if(Key.equals("Separator"))
SeparatorFound =true;
if(Key.equals("Object"))
ObjectFound =true;
if(Key.equals("Scheme"))
SchemeFound = true;
}
if(!SeparatorFound)
error("Missing Separator entry in config file");
if(!SchemeFound)
error("Missing Scheme entry in config file");
Separators = Cfg.getProperty("Separator");
if(Separators.equals(""))
error("empty separator specification in configuration file ");
String Scheme = Cfg.getProperty("Scheme");
if(Scheme.equals(""))
error("empty scheme specification in configuration file ");
String NoLines = Cfg.getProperty("IgnoreLines");
String OName = "CSV_Obj";
if (ObjectFound)
{
OName = Cfg.getProperty("Object");
if(OName.equals(""))
error("empty object specification in configuration file ");
}
LinesToIgnore = new Vector();
if(NoLines!=null){
StringTokenizer ST = new StringTokenizer(NoLines,",");
LinesToIgnore.add(ST.nextToken().trim());
}
String Comment = Cfg.getProperty("Comment");
checknumeric = Cfg.getProperty("checknumeric")!=null;
allowMoreValues = Cfg.getProperty("allowMoreValues")!=null;
try{
BufferedReader CSVin = new BufferedReader(new FileReader(SourceFile));
if (! ObjectFound)
{
int iodot=SourceFile.indexOf(".");
OName = SourceFile;
if(iodot>0)
OName = SourceFile.substring(0,iodot);
}
boolean ok = true;
System.out.println("( OBJECT "+OName+" () "); // open object
if(!printType(Scheme)){
error("type analyse failed");
}
System.out.println("("); // open value list , tuple
int LNo = 0;
String Line;
while(CSVin.ready() ){
Line = CSVin.readLine();
if(LinesToIgnore==null || !LinesToIgnore.contains(""+new Integer(LNo))){
if(Comment==null || !Line.startsWith(Comment)){
ok = printLine(Line);
if(!ok){
System.err.println("Error in processing line "+LNo+" = "+Line);
}
}
}
LNo++;
if((LNo%5000)==0)
System.err.print(".");
}
if(oldStyle)
System.out.println(") () )"); // close valueList and object
else
System.out.println(") )");
CSVin.close();
System.err.print("\n");
}catch(Exception e){
e.printStackTrace();
}
}
public static void main(String[] args){
int start=0;
CSV2Secondo C = new CSV2Secondo();
if(args.length>0 && args[0].equals("--oldstyle")){
C.oldStyle=true;
start++;
}
if(args.length<2+start){
error("missing parameter\nusage: java CVS2Secondo [--oldstyle] ConfigFile SourceFile [>TargetFile]");
}
File CfgFile = new File(args[start]);
if(!CfgFile.exists()){
error("Config file not found");
}
File SourceFile = new File(args[1+start]);
if(!SourceFile.exists())
error("Source file not found ");
C.convert(args[start],args[start+1]);
}
private static final int INTEGER=0;
private static final int TEXT=1;
private static final int STRING=2;
private static final int POINT=3;
private static final int FLOAT=4;
private static final int SPOINT=5;
private static final int MAXSTRINGLENGTH=48;
private Vector Types;
private Vector Names;
private String Separators;
private Vector LinesToIgnore;
private boolean checknumeric;
private Vector Values;
private boolean allowMoreValues;
private int neededValues;
private boolean oldStyle=false;
private static class MyStringTokenizer{
public MyStringTokenizer(String S,char delim){
MyString = S;
this.delim=delim;
}
public boolean hasMoreTokens(){
return MyString.length()>0;
}
public String nextToken(){
if(MyString.length()==0) return "";
int index = MyString.indexOf(delim);
if (index<0){
String res = MyString;
MyString="";
return res;
} else{
String res = MyString.substring(0,index);
MyString = MyString.substring(index+1);
return res;
}
}
private String MyString;
private char delim;
}
}