secondo/Algebras/Web/Web.cpp


/*
----
This file is part of SECONDO.

Copyright (C) 2004, University in Hagen, Department of Computer Science,
Database Systems for New Applications.

SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a coplet page1 = [const page value ((html ((instant (10 10 2006 10 27 18)) <file>/home/sopra/secondo/Algebras/Web/bilder.htm</file--->(url ("http"<text>www.myimages.de</text---> <text>/</text---> )))) ((url ("http"<text>Garten-1.jpg</text---> <text>/</text---> ))<file>/home/sopra/secondo/Algebras/Web/Garten-1.jpg</file--->"image/jpeg")( (url ("http" <text>Garten-2.jpg</text---><text>/</text---> ))<file>/home/sopra/secondo/Algebras/Web/Garten-2.jpg</file--->"image/jpeg"))];y of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Templelet page1 = [const page value ((html ((instant (10 10 2006 10 27 18)) <file>/home/sopra/secondo/Algebras/Web/bilder.htm</file--->(url ("http"<text>www.myimages.de</text---> <text>/</text---> )))) ((url ("http"<text>Garten-1.jpg</text---> <text>/</text---> ))<file>/home/sopra/secondo/Algebras/Web/Garten-1.jpg</file--->"image/jpeg")( (url ("http" <text>Garten-2.jpg</text---><text>/</text---> ))<file>/home/sopra/secondo/Algebras/Web/Garten-2.jpg</file--->"image/jpeg"))]; Place, Suite 330, Boston, MA  02111-1307  USA
----

[1] /Web Algebra

November 2006

1 Preliminaries

1.1 Includes

*/


#undef __POS__
#define __POS__ __FILE__ << ".." << __PRETTY_FUNCTION__ << "@" << __LINE__
//#define TRACEON
#ifdef TRACEON
#define __TRACE__ cout << __POS__ << endl;
#else
#define __TRACE__
#endif

//#define _DEBUG_JPS  //Enables Debug output used by Joerg Siegel
//#define _DEBUG_JPS_2  //Enables Debug output used by Joerg Siegel
//#define _DEBUG_JPS_3  //Enables Debug output used by Joerg Siegel

#include "Algebra.h"
#include "NestedList.h"
#include "QueryProcessor.h"
#include "StandardTypes.h"
#include "Algebras/FText/FTextAlgebra.h"
#include "Algebras/BinaryFile/BinaryFileAlgebra.h"
#include "Algebras/Relation-C++/RelationAlgebra.h"
#include "Attribute.h"
#include "DateTime.h"
#include "Tools/Flob/DbArray.h"
#include "Tools/Flob/Flob.h"
#include "web.h"

#include "SocketIO.h"      //used for web access
#include "Base64.h"        //to en-/ decode binary data
#include <stack>
#include <string>

#ifdef SECONDO_WIN32
#include "ClientServer/Win32Socket.h"
#else //Linux
#include "ClientServer/UnixSocket.h"
#endif

extern NestedList* nl;
extern QueryProcessor *qp;
using namespace datetime;
using namespace std;

/*
1.2 Dummy Functions

No dummy function needed.

*/
/*
2.0 needed definitions

*/

/*


2.1 Implementation of WebLex

*/

WebLex::WebLex(std::istream *is) : yyFlexLexer (is) {
    switchState=-1;
    myin = is;
}

int WebLex::nextToken(){
  int symbol=0;

//__TRACE__
  symbol=yylex(switchState);
//__TRACE__
  switchState=-1;

  tokenVal= YYText();

  if (tokenVal.length() == 0)
    return symbol;

  if (tokenVal[0]=='"' && tokenVal[tokenVal.length()-1]=='"'){
    if (tokenVal.length() > 2){
      tokenVal.erase(0,1);
      tokenVal.erase(tokenVal.length()-1);
    }else{
      tokenVal="";
    }
  }


  return symbol;
}

void WebLex::switchStartCond(int ns){
  switchState=ns;
}

string WebLex::getVal() {
  return tokenVal;
}

int WebLex::yylex(){return 0;}

 int WebLex::startElement (string& element){

  int symbol=0;
  switchStartCond(FINDELEMSTART);
  symbol=nextToken();
  //cout << "-" << getVal() << endl;
  while (symbol == SEARCH_ELEMENT_START){
    //cout << "-" << getVal() << endl;
    symbol=nextToken();
  }
   //cout << "ENDE startelement  " << getVal() << endl;
   element= getVal();

  if (symbol){
    return symbol;
  }

  return 0;

}

/*

in: attribute
out: value
return: true if ~attribute~ was found in input stream, false otherweise

Looking for the attribute in the input stream of WebLex. Param ~value~ contains the value of the attribute

*/
bool WebLex::findAttribute(string attribute, string& value){

  value="";
  int symbol;


  __TRACE__
  symbol=nextToken();

  //__TRACE__
  while (symbol && symbol != CLOSE_TAG){

    if (symbol == ERROR){
      cout << "findAttribute Es ist ein Fehler aufgetreten" << endl;
      return false;
    }
    //__TRACE__
    //we found an attribute identifier
    if (symbol == EIDENTIFIER){
//__TRACE__
      //is this the attribute we are looking for?
      if (isEqual(getVal(),attribute)){
        //cout << "findAttribute Atribut gefunden " << endl;
        if (symbol == ERROR){
          cout << "Fehler: " << getVal() << endl;
          return false;
        }
//__TRACE__
        symbol=nextToken();
        if (symbol == ATTVALUE){
          value = getVal();

          return true;

        }else{
          return true;
        }
      }
    }
    //__TRACE__
    symbol=nextToken();
    //__TRACE__
  }
  return false;


}


/*

in: attributes
out: value
return: true if of of the elements of ~attributes~ was found in input stream, false otherweise

Looking for the attribute in the input stream of WebLex. Param ~value~ contains the value of the attribute

*/

bool WebLex::findAttribute(vector<string>& attributes,
                           string& value, string& attribute){

  value="";
  int symbol;


  //__TRACE__
  symbol=nextToken();

  //__TRACE__
  while (symbol && symbol != CLOSE_TAG){

    if (symbol == ERROR){
      cout << "findAttribute Es ist ein Fehler aufgetreten" << endl;
      return false;
    }
    //__TRACE__
    //we found an attribute identifier
    if (symbol == EIDENTIFIER){
//__TRACE__
      //is this the attribute we are looking for?
      vector<string>::iterator it = attributes.begin();

      while (it != attributes.end()){
        //cout << "FINDATTR " << *it << endl;
        if (isEqual(*it,getVal())){
          attribute=*it;
          //cout << "findAttribute Atribut gefunden " << endl;
          if (symbol == ERROR){
            cout << "Fehler: " << getVal() << endl;
            return false;
          }
  //__TRACE__
          symbol=nextToken();
          if (symbol == ATTVALUE){
            value = getVal();

            return true;

          }else{
            return true;
          }


        }

        it++;
      }
    }
    //__TRACE__
    symbol=nextToken();
    //__TRACE__
  }
  return false;


}

/*

Find Position of ~value~ in ~content~ and return flobindex Object

*/
flobindex WebLex::setPos(string value, const string& content){
  unsigned long tmp;
  flobindex i;

//__TRACE__
  i.offset= 0;
  i.len=0;

//cout << value << pos << endl;


  tmp= (unsigned long) strstr(content.c_str() + pos, value.c_str()) ;


  if (!tmp)
    return i;


  pos = tmp - (unsigned long) content.c_str();
  i.offset=pos;
  i.len=value.length();

  return i;

}

/*

read content of a html element

*/
int WebLex::readContent(){
  int symbol=0;
  string value="";

//  __TRACE__

  symbol= nextToken();
  //cout << "******** readcontent *********" << endl;
  while (symbol == CONTENT){
    //cout << getVal() ;
    value += getVal();
    symbol= nextToken();
  }

  //cout << "readcontent: " << endl;


  if (symbol){
    value += getVal();
    tokenVal= value;
    return CONTENT;
  }
  tokenVal= value;
  return symbol;
}

int WebLex::readContentTmp(){
  int symbol=0;
  string v="";


  __TRACE__
//cout << "**********TMP **************" << endl;


  symbol= nextToken();
  //cout << getVal()  << " " << symbol << endl;
  v += getVal();


  while (symbol == CONTENT){
    symbol= nextToken();
    v += getVal();
    //cout << ":" << getVal()  << " " << symbol << "  " << v << endl;
  }

  //cout << "---" << v << endl;
  return 0;


}

/*

2.2 Helping Functions

*/

bool isEqual (string s1, string s2){
  transform(s1.begin(), s1.end(), s1.begin(), ::tolower);
  transform(s2.begin(), s2.end(), s2.begin(), ::tolower);

  return s1 == s2;
}

//Taken from http://www.codeproject.com/string/stringsplit.asp
int SplitString(const string& input,
       const string& delimiter, vector<string>& results,
       bool includeEmpties)
{
    int iPos = 0;
    int newPos = -1;
    int sizeS2 = (int)delimiter.size();
    int isize = (int)input.size();

    if(
        ( isize == 0 )
        ||
        ( sizeS2 == 0 )
    )
    {
        return 0;
    }

    vector<int> positions;

    newPos = input.find (delimiter, 0);

    if( newPos < 0 )
    {
        return 0;
    }

    int numFound = 0;

    while( newPos >= iPos )
    {
        numFound++;
        positions.push_back(newPos);
        iPos = newPos;
        newPos = input.find (delimiter, iPos+sizeS2);
    }

    if( numFound == 0 )
    {
        return 0;
    }

    for( int i=0; i <= (int)positions.size(); ++i )
    {
        string s("");
        if( i == 0 )
        {
            s = input.substr( i, positions[i] );
        }
        int offset = positions[i-1] + sizeS2;
        if( offset < isize )
        {
            if( i == (int)positions.size() )
            {
                s = input.substr(offset);
            }
            else if( i > 0 )
            {
                s = input.substr( positions[i-1] + sizeS2,
                      positions[i] - positions[i-1] - sizeS2 );
            }
        }
        if( includeEmpties || ( s.size() > 0 ) )
        {
            results.push_back(s);
        }
    }
    return numFound;
}

bool isWhite(char c){
  return c == ' ' || c == '\n' || c == '\t';
}


/*
3 l Definitions of ~URL, HTML, Page~

3.1 Class ~URL~

----
Example to create an object:
let url1 = [const url value ("http" <text>//www.google.de</text--->
<text>/</text--->)]
----

*/
class URL : public IndexableAttribute
{
 public:
  URL();
  ~URL();
  URL(const string&);
  URL(const URL&);
  URL(const string &prot, const string &h, const string &pp);
  bool operator== (const URL& url) const;
  void setProtocol(string);
  string getProtocol() const;
  void setPath(string);
  string getPath() const;
  void setHost(string);
  string getHost() const;
  URL* Clone() const;
  friend ostream& operator<<(ostream& s, URL u);
  ListExpr ToListExpr(bool typeincluded)const;
  /* Returns whether this object is defined or not. */
  bool IsDefined() const;
  /* Sets this object as defined or undefined. */
  void SetDefined( bool Defined);
  size_t Sizeof() const;
  int Compare(const Attribute*) const;
  bool Adjacent(const Attribute*) const;
  //void operator=(const URL&);
  void Set( bool d, URL& u);
  void destroy(void);
  static bool urlFromString(const string& url,URL& myurl);
   inline virtual int NumOfFLOBs() const {__TRACE__ return 2;}
  Flob *GetFLOB(const int);
  void WriteTo (char*)const;
  void ReadFrom(const char*);
  SmiSize SizeOfChars(void) const;
  size_t HashValue(void) const;
  void CopyFrom(const Attribute *arg);
  static const string BasicType() { return "url"; }
  static const bool checkType(const ListExpr type){
    return listutils::isSymbol(type, BasicType());
  }
 private:
  STRING_T protocol;
  Flob host;
  Flob path;
  bool defined;
  static bool isValidURL(const string&);
  static bool isValidURL(const string&, string&, string&, string&);
};

/*
3.1.1 Implementation of Class-Operations of ~URL~

*/
URL::URL()
{
  __TRACE__
}


URL::~URL()
{
//  __TRACE__
}

URL::URL(const string& u)
:IndexableAttribute(true),host(0),path(0)
{
//  __TRACE__
  string p;
  string h;
  string pa;

  if (!isValidURL(u, p, h, pa)){
  __TRACE__
    defined=false;
    return;
  }
//  __TRACE__
  //cout << p << " " << h << " " << pa << endl;
  defined = true;
  setProtocol (p);
  setHost(h);
  setPath(pa);


}

URL::URL(const string &prot, const string &h, const string &p)
: IndexableAttribute(true),host(h.length()+1), path(p.length()+2)
//: host(h.length()+1), path(p.length()+1)
{
   __TRACE__

  if (prot.length() > MAX_STRINGSIZE){
    defined=false;
    return;
  }

  __TRACE__
  //cout << "*************" << prot +  h + p << endl;


  if (!isValidURL(prot + "://" + h + p)){
    defined=false;
    return;
  }

  __TRACE__
  defined = true;
  setProtocol (prot);
  setHost(h);
  setPath(p);
}

URL::URL(const URL& u)
:IndexableAttribute(u.IsDefined()),host(u.getHost().length()+1),
 path(u.getPath().length()+1)
{
//  __TRACE__

  if (!u.IsDefined()){
    defined=false;
    return;
  }

  defined=true;
  //cout << "url: " << u.getPath() << "  " << defined << endl;
  setProtocol ( u.getProtocol());
  setHost(u.getHost());
  setPath(u.getPath());
  //cout << "url: " << getPath() << endl;

}

URL* URL::Clone() const
{
  __TRACE__

  URL *pUrl = new URL(getProtocol(),getHost(),getPath());
   return pUrl;
}

string URL::getProtocol() const
{
//  __TRACE__
  if (!defined)
    return "";
  return protocol;
}


void URL::setProtocol(string p)
{
//  __TRACE__
  if (!defined)
    return;
  if (p.length()  <= MAX_STRINGSIZE){
    strcpy (protocol, p.c_str());
  }
}

string URL::getHost() const
{
//  __TRACE__
  if (!defined)
    return "";

  char s[host.getSize()];
  host.read(s, host.getSize());

  //cout << "getHost " << s << endl;
  return string(s);
}

void URL::setHost(string h)
{
//  __TRACE__
  if (!defined)
    return;
  //cout << "setHost " << h << endl;
  host.resize (h.length() +1);
  host.write(h.c_str(),h.length() + 1);
}

string URL::getPath() const
{
//  __TRACE__
  if (!defined)
    return "";
  char s[path.getSize()];
  path.read(s, path.getSize());
  return string(s);
}

void URL::setPath(string p)
{
//  __TRACE__

  if (!defined)
    return;

  //cout << "setPath " << p << endl;
  if (p.length() == 0)
    p= "/";
  if (p.at(0) != '/')
    p= "/" + p;
  path.resize (p.length() +1);
  path.write(p.c_str(), p.length() +1);
}

ostream& operator<<(ostream& s, URL u)
{
//  __TRACE__
  if (!u.IsDefined())
    return s << "Value is Undefined";
  return s << "URL: [Protocol: " << u.getProtocol() << endl
    << "Host: " << u.getHost() << endl
    << "Path: " << u.getPath() << "]" << endl;
}

ListExpr URL::ToListExpr(bool typeincluded)const {
  __TRACE__

  ListExpr value;
  if( defined )
  {
    value = nl->ThreeElemList(
    nl->StringAtom(getProtocol()),
    nl->TextAtom(getHost()),
    nl->TextAtom(getPath()));
  }
  else
    value = nl->ThreeElemList(
    nl->StringAtom(""),
    nl->TextAtom(""),
    nl->TextAtom(""));
   if(typeincluded)
        return nl->TwoElemList(nl->SymbolAtom(URL::BasicType()),value);
  else
    return value;
}

bool URL::IsDefined() const {
//  __TRACE__
    return defined;
}

void URL::SetDefined( bool def) {
//  __TRACE__
    defined = def;
}

size_t URL::Sizeof() const
{
  __TRACE__
  return sizeof( *this );
}

int URL::Compare(const Attribute*) const
{
  __TRACE__
  return 0;
}

bool URL::Adjacent(const Attribute*) const
{
  __TRACE__

  return 0;
}

void URL::Set( bool d, URL& u)
{
  __TRACE__
  defined = d;

  if (!d || !u.IsDefined())
    return;


  string s = u.getProtocol();
  string h = u.getHost();
  string p = u.getPath();
  __TRACE__


  strcpy(protocol, s.c_str());
  host.resize( h.length() + 1 );
  host.write(h.c_str(), h.length() + 1 );
  path.resize( p.length() + 1 );
  path.write( p.c_str(), p.length() + 1 );
}


void URL::destroy(){
  __TRACE__
  host.destroy();
  path.destroy();
}

bool URL::urlFromString (const string& url,URL& myurl){
  string host;
  string protocol;
  string path;

//  __TRACE__

  if (!isValidURL(url, protocol, host, path)){
    myurl.SetDefined(false);
    return false;
  }


  myurl.SetDefined(true);
  myurl.setPath(path);
  myurl.setProtocol (protocol);
  myurl.setHost(host);


  return true;

}


bool URL::isValidURL(const string& url, string& protocol,
                     string& host, string& path){
  stringstream is (url);
  WebLex lexer(&is);

//   __TRACE__

  lexer.switchStartCond(MSCHEME);
  //cout << url << endl;
  if (lexer.nextToken() != SCHEME){
//     __TRACE__
    return false;
  }

  protocol= lexer.getVal();
  protocol= protocol.erase(protocol.length()-1);
//  __TRACE__
  //cout << protocol << endl;

  if (lexer.nextToken() != AUTHORITY){
//     __TRACE__
    return false;
  }

  host= lexer.getVal();
  host=host.erase(0,2);
//  __TRACE__
  //cout  << host << endl;


  if (lexer.nextToken() == PATH){
    path= lexer.getVal();
  }else{
    path="";
  }
  //__TRACE__
  //cout << lexer.getVal() << endl;

  return true;
}

bool URL::isValidURL(const string& url){
  string x,y,z;

  __TRACE__
  return isValidURL(url, x,y,z);
}

Flob *URL::GetFLOB(const int i){
//  __TRACE__


  if ( i == 0 )
    return &host;

  if ( i == 1 )
    return &path;

  return NULL;
}

void URL::WriteTo ( char* dest ) const {
  __TRACE__
  string url= getProtocol() + getHost() + getPath();
  strcpy (dest, url.c_str());
}

SmiSize URL::SizeOfChars()const {
  __TRACE__
    return (strlen (protocol) + host.getSize() + path.getSize());
}

void URL::ReadFrom ( const char *src){
  __TRACE__
  int erg;
  string url (src);
  stringstream is (url);

  WebLex lexer (&is);
  lexer.switchStartCond(MURI);

  string protocol;
  string host;
  string path;

  erg= lexer.nextToken();
  if (erg==ERROR)
    return;

  protocol= lexer.getVal();

  erg= lexer.nextToken();
  if (erg==ERROR)
    return;

  host= lexer.getVal();

  erg= lexer.nextToken();
  if (erg==ERROR)
    return;

  path= lexer.getVal();

  setProtocol ( protocol);
  setHost (host);
  setPath (path);
}


size_t URL::HashValue(void) const{
  __TRACE__
  return SizeOfChars();
}

void URL::CopyFrom(const Attribute *arg){
  __TRACE__
  URL *url = (URL*) arg;
  setProtocol ( url->getProtocol());
  setHost ( url->getHost());
  setPath ( url->getPath());
}

bool URL::operator== (const URL& url) const{
  return (isEqual(url.getProtocol(),getProtocol()) &&
          isEqual(url.getHost(), getHost()) &&
          isEqual(url.getPath(), getPath()));
}

/*
3.2 Class ~HTML~

----
Example to create an object:
let html1 = [const html value ((instant (10 10 2006 10 27 18)) <file>/home/sopra/secondo/Algebras/Web/bilder.htm</file---> (url ("http" <text>www.mybilder.de</text---> <text>/</text---> )))]
----

*/
class HTML : public Attribute
{
 public:
  HTML(){}
  ~HTML(){}
  HTML(const string& s);
  HTML(const DateTime &d, const string &s, const URL &u);
  HTML(const HTML&);
  bool operator== (const HTML& h) const;
  URL getSource() const;
  string getContent() const;
  string getText() const;
  int getNumberOfUrls() const;
  URL getUrl(const int i) ;
  int getNumberOfEmbUrls() const;
  URL  getEmbUrl (const int i);
  URL getUrlHosts(int i, string hosts, bool& contains);
  bool containsURL( const URL*);
  datetime::DateTime getLastModified() const;
  string getMetaInfo(string name);
  int getNumberOfMetainfos() const;
  string getMetainfo( int ii, string& pContent) const;
  int getNumberOf(string);
  double similar(HTML*, int, bool);
  HTML* Clone() const;
  ListExpr ToListExpr(bool typeincluded)const;
  bool IsDefined() const;
  void SetDefined(bool d) ;
  void Set(const HTML &h);
  Flob* GetFLOB(const int i);
  int NumOfFLOBs() const;
  size_t Sizeof() const;
  int Compare(const Attribute*) const;
  bool Adjacent (const Attribute*)const;
  const DbArray<FlobIndex>* getURLS()const;
  const DbArray<FlobIndex>* getMetainfoKeys()const;
  const DbArray<FlobIndex>* getMetainfoContents()const;
  const DbArray<FlobIndex>* getEmbededURLS() const;

  bool IsValid() const;
  void CopyFrom(const Attribute *arg);
  size_t HashValue(void) const;

  static const string BasicType() { return "html"; }
  static const bool checkType(const ListExpr type){
    return listutils::isSymbol(type, BasicType());
  }


 private:
  DateTime lastChange;
  Flob source;
  DbArray<flobindex> urls;
  DbArray<flobindex> emburls;
  DbArray<flobindex> metainfoKeys;
  DbArray<flobindex> metainfoContents;
  URL sourceURL;

  bool defined;
  int tiefe;

   URL findNextURI(WebLex& lexer, flobindex& i, const string&, URL& url);

  void analyseStructure(WebLex& lexer, int maxdepth, int& depth,
                              AnalyseList& al, int& error, int& symbol);
  bool checkURI(string value,URL& url);
  void getMetaInfos(const string&);
  void filterEmbUrls(URL& u, flobindex& f);
  void getUrls(const string&);
  bool valid;
};


/*
3.2.1 Implementation of Class-Operations of ~HTML~

*/
HTML::HTML(const string& s)
:lastChange(instanttype),source(s.length()+1),
 urls(0), emburls(0),metainfoKeys(0),metainfoContents(0),
 sourceURL("http://"),defined(true),
tiefe(0), valid(true)
{
  __TRACE__
  //cout << "V1" << endl;
  defined = true;
  source.resize(s.length()+1);
  source.write(s.c_str(),s.length()+1);
  //tiefe=0;


  //source.Put(0,s.length()+1,s.c_str());

  valid=true;

  getMetaInfos(s);
  getUrls(s);

  __TRACE__

  //creates an HTML object without lastChange and sourceURL.
  // If ~isValidHTML~ returns false, the object is not defined.
}

HTML::HTML(const DateTime &d, const string &s, const URL &u)
: lastChange(d),
source(s.length()+1),urls(0),emburls(0),metainfoKeys(0),
metainfoContents(0), sourceURL(u),defined(true),
tiefe(0),valid(true)
{
  __TRACE__
  //cout << "V2" << endl;
  source.resize(s.length()+1);
  source.write(s.c_str(), s.length() + 1);

  //jps: Only Debug must be removed!!!!!!!!!!!
  //cout << d.ToString() << " , " << u << endl;
  //cout << "|" << s << "|" << endl;

  valid=true;


  // __TRACE__
  getMetaInfos(s);
  // __TRACE__
  getUrls(s);

  __TRACE__
  //creates an HTML object. If ~isValidHTML~ returns false,
  // the object is not defined.
}

HTML::HTML(const HTML& h)
:lastChange(h.getLastModified()),
source(0), urls(0), emburls(0),metainfoKeys(0),
metainfoContents(0), sourceURL(h.getSource()),
defined(h.IsDefined()),tiefe(0),valid(h.IsValid())
{
  __TRACE__

  //cout << "V3" << endl;
  FlobIndex tmp;
  const DbArray<FlobIndex> *tmpArray=0;

  int i=0;
//__TRACE__
  string c = h.getContent();
  source.resize (c.length() +1 );
  source.write(c.c_str(), c.length()+1);

//  __TRACE__
  tmpArray=h.getURLS();
  for (i=0; i < tmpArray->Size();i++){
    tmpArray->Get(i,tmp);
    urls.Put(i,  tmp);
  }

//__TRACE__
  tmpArray=h.getMetainfoKeys();
  for (i=0; i < tmpArray->Size();i++){
    tmpArray->Get(i,tmp);
    metainfoKeys.Put( i, tmp);
  }
//__TRACE__
  tmpArray=h.getMetainfoContents();
  for (i=0; i < tmpArray->Size();i++){
    tmpArray->Get(i,tmp);
    metainfoContents.Put( i, tmp);
  }
//  __TRACE__
 /*
  tmpArray=h.getEmbededURLS();
  for (i=0; i < tmpArray->Size();i++){
    tmpArray->Get(i,tmp);
    emburls.Put( i, *tmp);
  }
 */
//    __TRACE__
}


HTML* HTML::Clone() const
{
  __TRACE__
  return new HTML( *this );
}

bool HTML::operator== (const HTML& h) const
{
  __TRACE__
  return  (h.getContent() ==  this->getContent() &&
           h.getSource() == this->getSource() &&
           h.getLastModified() == this->getLastModified());
}

datetime::DateTime HTML::getLastModified() const
{
  __TRACE__
  return lastChange;
}

/*

returns the source - code of the html object

*/

string HTML::getContent() const
{
  __TRACE__
  if (!defined)
    return "";

  char s[source.getSize()];
  source.read(s, source.getSize());
  return string(s);
}

/*
  returns the content of the html - elements

*/

string HTML::getText() const
{
  //returns the content without tags, only text


  __TRACE__
  if (!valid)
  return "";


  int symbol=0;
  string content;

  WebLex lexer(0);
  content= getContent();
  //char out[content.length()+1];
  string out="";
  stringstream is (getContent());


  lexer.yyrestart(&is);

  lexer.switchStartCond (RELEM_WA);
  symbol = lexer.nextToken();

  while (symbol){
    //cout << lexer.getVal() << endl;
    if (symbol == ERROR){
      cout << "Fehler" << endl;
      return "";
    }

    if (symbol == CONTENT){
      out += lexer.getVal();
    } else{
      //cout << "Token: " << symbol << ": " << lexer.getVal() << endl;
    }

    if (symbol == ELEMENT){
      if (isEqual(lexer.getVal(), "script") ||
          isEqual(lexer.getVal(), "style")){
        symbol = lexer.nextToken();
        while (symbol == CONTENT)
          symbol= lexer.nextToken();
      }
    }
    symbol= lexer.nextToken();
  }

  //cout << "*******" << content << endl;
  return out;
}

URL HTML::getSource() const
{
//  __TRACE__
  return sourceURL;
}

ListExpr HTML::ToListExpr(bool typeincluded)const {

   __TRACE__
  if (!defined)
    return HTML("").ToListExpr(typeincluded);
  __TRACE__
  Base64 b;
  string content = getContent();
  string textBytes;
  b.encode( content.c_str(), content.size(), textBytes );

  ListExpr value = nl->ThreeElemList(
    getLastModified().ToListExpr(true),
    nl->TextAtom(textBytes),
    sourceURL.ToListExpr(true));
  if(typeincluded)
  {
    return nl->TwoElemList(nl->SymbolAtom(HTML::BasicType()),value);
  }
  else
    return value;
}

bool HTML::IsDefined() const {
   __TRACE__
    return defined;
}

void HTML::getUrls(const string& content){
  string href;
  WebLex lexer(0);
  stringstream ss (content);
  lexer.yyrestart(&ss);
  flobindex i;
  URL url("");

   __TRACE__


  findNextURI (lexer, i, content,url);


  while (url.IsDefined()){
     __TRACE__
    //cout << "getUrls" << url.getPath() << endl;
    urls.Append (i);
//    filterEmbUrls(url,i); //has errors AB 11.2.07
    //url=findNextURI(lexer, i, content);
    findNextURI (lexer, i, content, url);
  }

   __TRACE__
}


/*
  checks' wether the URL u ist a embeded URL
  If so, the flobindex is appendes to emburls

*/
void HTML::filterEmbUrls (URL& u, flobindex& i){
  __TRACE__
  string name = u.getPath();
  //cout << "---" << u.getPath() << endl;

  int first =name.rfind(".");
  if (first>0){
    name= name.substr(first +1);
    //cout << name << endl;

    if (name == "jpg" || name == "jpeg" || name == "gif" || name == "bmp" ||
        name == "png" || name =="tif")
      emburls.Append(i);
  }

}

int HTML::getNumberOfUrls() const
{
  __TRACE__
  return urls.Size();
  //cout << urls.Size() << endl;
}

URL HTML::getUrl( int i)
{
  __TRACE__
  flobindex ind;
  string content;
  URL url("");
  if (i < urls.Size()){
    char s[source.getSize()];
    source.read(s, source.getSize());
    urls.Get(i, ind);
    string tmp (s+ind.offset, ind.len);
    content= tmp;
    if (checkURI( content, url))
      return URL(url);
  }

  return URL("");
}

int HTML::getNumberOfEmbUrls() const{
  __TRACE__
  return emburls.Size();
}

URL HTML::getEmbUrl( int i)
{
  __TRACE__
  flobindex ind;
  string content;
  URL url("");
  if (i < emburls.Size()){
    char s[source.getSize()];
    source.read(s, source.getSize());
    emburls.Get(i, ind);
    string tmp (s+ind.offset, ind.len);
    content= tmp;
    if (checkURI( content, url))
      return URL(url);
  }

  return URL("");
}

/*
checks, wether the host of getUrl(i) is equal to
one of hosts in the parameter ~hosts~

*/

URL HTML::getUrlHosts(int i, string hosts, bool& contains){

  vector<string> vhosts;
  vector<string>::const_iterator it;
  string host="";

  hosts+=",";
  URL url= getUrl (i);
  //cout << "Hosts übergeben: " << hosts << endl;
  if( !hosts.length() )
  {
    contains = true;
    return url;
  }

  contains=false;
  if (!url.IsDefined() || !valid)
    return url;

  /*for (j=0;j < hosts.length();j++){
    if (isWhite (hosts.at(j)))
      hosts.erase(j,1);
  }*/

  SplitString( hosts,",",vhosts,false);

  it= vhosts.begin();
  host= url.getHost();
  //cout << "Host enthalten: " << host << vhosts.size() << endl;
  while(it != vhosts.end()){
    //cout << "--- Host: " << host << ", Erlaubt: " << *it << endl;
    if (isEqual(host, *it)){
      //cout << "gleich" << endl;
      contains =true;
      return url;
    }

    it++;
  }

  return url;

}


bool HTML::containsURL(const URL *url){
  string href;
  int i=0;

  __TRACE__

  while (i < getNumberOfUrls()){
    if (*url == getUrl (i))
      return true;

    i++;
  }

  return false;
}

/*
  checks, wether value is a valid URL. Returns true if so, false otherwise

*/
bool HTML::checkURI(string value,URL& url){
  WebLex lexer(0);
  stringstream ss;

//  __TRACE__
  //cout << "Prfe URL " << value << endl;
  url.SetDefined(false);

  //check if this is a complete url
  if (URL::urlFromString(value,url))
      return true;


   __TRACE__
  //match a URL
  lexer.switchStartCond (MURI);
  ss << value;
  lexer.yyrestart(&ss);
  lexer.nextToken();


  // we have the Path from a URL ~value~ and the source URL
  //Now we try to build a valid url with protocol, host and path
   __TRACE__
  string path= getSource().getPath();
  string urlpath=value;
  string myurl="";
  string mypath="";
  int pos=0;

  //
  pos = urlpath.find("./");
  if (pos == 0){
    //Unterverzeichnis der source url
    pos= path.rfind("/");
    mypath=path.substr(0,pos );

    urlpath= urlpath.substr (2);
  } else{
    pos= urlpath.find("/");
    if (pos == 0){
      //im Wurzelverzeichnis des Webservers
      mypath="";
    } else {
      //Unterverzeichnis der source url
      pos= path.rfind("/");
      mypath= path.substr(0,pos );
    }
  }

  if (urlpath.find("/") == 0){
    urlpath= urlpath.substr(1);
  }

  myurl=urlpath;
  //cout << myurl << " ---  " << mypath << endl;
  while (true){
    pos = myurl.find("../");
    //parent directory
    if (pos == 0){
      myurl= myurl.substr(3);
      pos= mypath.rfind("/");
      //cout << "1:" << mypath << endl;
      if (pos < 0){
        //cout << "error parsing url" << endl;
        return false;
      } else {
        mypath= mypath.substr(0, pos );
        //cout << "2. " << mypath << endl;
      }
    }else {
      pos= myurl.find("/");
      if (pos == -1)
        break;

      mypath= mypath  + "/" + myurl.substr(0,pos);
      myurl= myurl.substr(pos +1);
    }
  }
  //__TRACE__
  url.SetDefined(true);
  url.setProtocol (getSource().getProtocol());
  url.setHost(getSource().getHost());

  //cout << "checkuri " << mypath << "  " << myurl;
  url.setPath(mypath + "/" + myurl);

  //cout << "Neue URL :" << url.getPath() << endl;
  //__TRACE__
  return true;
}

/*

in:lexer
in:content (COontent of HTML Object)
out:i (FlobIndex for the URL)
out:url (the found URL Object)


find NextUri in the stream of ~lexer~

*/


URL HTML::findNextURI(WebLex& lexer, flobindex& i,
                      const string& content, URL& url ){
  string element, value;
  int symbol=0;

  //URL url("");

//  __TRACE__
  url.SetDefined(false);
  //vector<string> attributes;

  //attributes.push_back("src");
  //attributes.push_back("href");


  symbol= lexer.startElement(element);
  while (symbol){
    __TRACE__


    if (isEqual(element, "img")){
      if (lexer.findAttribute("src",value)){
        if (checkURI(value,url)){
          i= lexer.setPos(value,content);
          return url;
        }
      }
    }

    /*if (!isEqual(element,"script")){

//      __TRACE__
      if (lexer.findAttribute(attributes,value)){
        __TRACE__
        if(checkURI(value,url)){
          __TRACE__
          i=lexer.setPos(value, content);
          //cout << "StartKopie" << url.getPath() << endl;
          return url;
        }
      }
    }*/


//    __TRACE__
    if (lexer.findAttribute("href",value)){
        //__TRACE__
        if(checkURI(value,url)){
          //__TRACE__
          i=lexer.setPos(value, content);
          return url;
        }
      }

    if (isEqual(element,"script")){
      __TRACE__
      //cout << element << endl;
      if (lexer.findAttribute("src",value)){
        if (checkURI(value,url)){
          i= lexer.setPos(value,content);

        }
      }

//      __TRACE__
      symbol= lexer.nextToken();
      element=lexer.getVal();
      while(symbol == CONTENT){
        symbol= lexer.nextToken();
        element= lexer.getVal();
      }
      //cout << "------------" << lexer.getVal() << symbol << endl;
      if (url.IsDefined())
        return url;

    }else{
      __TRACE__
      symbol=lexer.startElement(element);
    }

  }

  return url;
}

int HTML::getNumberOfMetainfos() const
{
   __TRACE__
  //cout << metainfoKeys.Size() << endl;
  return metainfoKeys.Size();
}

string HTML::getMetainfo( int i, string& pContent) const
{
   __TRACE__
  //returns the key of metainfo number ii
  //fills pContent with the content of the metainfo number ii

  flobindex ind;
  char content[source.getSize()];

  source.read(content, source.getSize());

  if (i < metainfoKeys.Size()){
    metainfoContents.Get (i, ind);
    string tmp (content+ind.offset, ind.len);
    pContent= tmp;
    metainfoKeys.Get( i, ind);
    return string (content+ind.offset, ind.len);
  }
  return "";
}

string HTML::getMetaInfo(string name){
  __TRACE__
  int i=0;
  string content;
  for (i=0; i< getNumberOfMetainfos();i++){
    if (isEqual(getMetainfo(i, content),name)){
      return content;
    }
  }

  return "";
}


/*

find all Metainfos in ~content~ and append them to
the attributes ~metainfoContents~ and ~metainfoKeys~

*/

void HTML::getMetaInfos(const string& content){
//  __TRACE__
  string attname;
  flobindex ikey, icontent;
  int symbol=0;
  string value("");;
  stringstream ss (content);
  WebLex lexer (&ss);
  vector<string> attributes;
  attributes.push_back("content");
  attributes.push_back("name");

  //cout << "getMeta Content " << content << endl;

  symbol=lexer.startElement(attname);
//   __TRACE__
  while (symbol){
    //cout << "getMeta Content " << attname << endl;
    if (isEqual (attname, "/head"))
      return;
    if (symbol== EIDENTIFIER && isEqual (attname, "meta")){
//      __TRACE__

      string tmp("");
      if (lexer.findAttribute(attributes,value,tmp)){
          //cout << "--" << value << endl;


          if (isEqual(tmp,"name")){
            ikey= lexer.setPos(value, content);
          }else{
            icontent= lexer.setPos(value, content);
          }

          if (lexer.findAttribute(attributes,value,tmp)){

            if (isEqual(tmp,"name")){
              ikey= lexer.setPos(value, content);

            }else{
              icontent= lexer.setPos(value, content);
            }

            metainfoContents.Append (icontent);
            metainfoKeys.Append (ikey);


          }
      }
    }


    if (isEqual(attname,"script")){
      //cout << "******* Treffer **********" << endl;
      lexer.switchStartCond(RSCRIPT);
      symbol= lexer.nextToken();
      attname= lexer.getVal();
      while(symbol == CONTENT){
        symbol= lexer.nextToken();
        attname=lexer.getVal();
      }
    }else{
      symbol=lexer.startElement(attname);
    }
  }


}

/*

  return the number of the occurences of the element in this Object

*/
int HTML::getNumberOf(string element){
  __TRACE__
  int count=0;
  string e="";
  int symbol;
  stringstream ss (getContent());
  WebLex lexer (&ss);

  //cout << getContent << endl;
  if (!valid)
    return 0;

  lexer.switchStartCond(RELEM_WA);
  symbol = lexer.nextToken();

  while (symbol){
    e= lexer.getVal();

   //read content
    __TRACE__
    lexer.readContent();

    if (isEqual (e, element))
      count++;

    //next element
    symbol= lexer.nextToken();
  }

  return count;

}

/*
  analyse Structure of html object
*/
void HTML::analyseStructure(WebLex& lexer, int maxdepth, int& depth,
                            AnalyseList& al, int& error, int& symbol){

//  __TRACE__
  int sym1=0;
  string element;
  lexer.switchStartCond (RELEM_WA);


  //cout << "*****  Rein " << tiefe << " ********* " << endl;
  depth++;

  //cout << "nextToken 1" << endl;
  symbol= lexer.nextToken();
  //cout << "analyse: " << symbol << endl;


  while (symbol == 10000){
    symbol= lexer.nextToken();
    //cout << "analyse: " << symbol << endl;
  }


  while (symbol && !error){
    //cout << "TAG Name: " << lexer.getVal() << "  " << symbol <<  endl;
    if (symbol != ELEMENT && symbol !=COMMENT &&
        symbol != ELEMENT_SA && symbol !=ELEMENT_CLOSE){
      error=-1;
      cout << "1 ERROR " << lexer.getVal() << symbol << endl;
      return ;
    }

    if (symbol != ELEMENT_CLOSE && lexer.getVal()[0] == '/'){
      cout << " 2 ERROR " << lexer.getVal() << "  " << symbol << endl;
      error=-1;
      return ;
    }


    element= lexer.getVal();

    if (isEqual(element, "/html")){
      symbol=0;
      return;
    }else{
      //cout << "endetest:" << element << endl;
    }

    //Read content of current element <tag>content<....
    //  it is maby empty <tag><tag>
    //cout << "nextToken 2" << endl;
    if ((sym1=lexer.readContent()) != CONTENT){
      symbol=sym1;
      if (!symbol)
        return;
      cout << "3 ERROR CONTENT" << lexer.getVal() << symbol << endl;
      error =-1;
      return ;
    }

    //cout << "Content " << lexer.getVal() << endl;

    if (symbol == ELEMENT_CLOSE){
      //cout << "nextToken 3" << endl;
      symbol=lexer.nextToken();
      //cout << "Element_close " << element << endl;
      element= element.substr (1);

      break;
    }

    if (symbol == ELEMENT){


      //we have to check every single standalone html attribute
      if (isEqual (element,"area") || isEqual (element,"base") ||
          isEqual (element,"basefont") || isEqual (element,"br") ||
          isEqual (element,"col") || isEqual (element,"frame") ||
          isEqual (element,"hr") || isEqual (element,"img") ||
          isEqual (element,"img") || isEqual (element,"input") ||
          isEqual (element,"isindex") || isEqual (element,"link") ||
          isEqual (element,"meta") || isEqual (element,"param") ||
          isEqual (element,"param")){

        //cout << "SA Element " << element << endl;
        //cout << "nextToken 4" << endl;
        symbol= lexer.nextToken();


      }else{
        if ((depth <= maxdepth) ||maxdepth < 0)
          al.push_back ( element );
        analyseStructure(lexer, maxdepth, depth, al, error, symbol);
        //cout << "Zurck " << symbol << endl;

      }
    } else if (symbol == ELEMENT_SA || symbol == COMMENT){
      //cout << "SA Element " << element << endl;
      if ((depth <= maxdepth) || maxdepth < 0)
        al.push_back ( element );
      //cout << "nextToken 5" << endl;
      symbol= lexer.nextToken();
    }
    else {
      cout << "5 Error" << element << "  " << symbol << endl;
      error=-1;
      return;
    }
  }

  depth--;
  return;
}


double HTML::similar(HTML *html, int maxdepth, bool respectOrder){
  __TRACE__
  AnalyseList *al1, *al2, *al3, *al4, *al;
  int counter=0;
  int depth=0;
  int error=0;
  int symbol=0;
  AnalyseList::const_iterator it1,it2;


  if (!valid || !html->IsValid())
    return 0;

  al1= new AnalyseList();
  string tmp1=getContent();

  stringstream ss1(tmp1);


  WebLex lexer (&ss1);
  analyseStructure(lexer, maxdepth, depth, *al1, error, symbol);


  depth=0;
  symbol=0;
  error=0;
  al2= new AnalyseList();
  string tmp2 = html->getContent();
  stringstream ss2(tmp2);
  lexer.yyrestart(&ss2);
  analyseStructure(lexer, maxdepth, depth, *al2,error, symbol);

  if (respectOrder){
    if (al2->size() > al1->size()){
      __TRACE__
      al= al2;
      al2= al1;
      al1= al;
    }


    it1= al1->begin();
    it2= al2->begin();

    //cout << al1->size() << "  " << al2->size() << endl;

    while ((it1 != al1->end() && it2 !=al2->end())){
      if (isEqual(it1->getElement(), it2->getElement())){
        //cout << "treffer" << it1->getElement() << endl;
        counter++;
        if (!(it1 != al1->end() && it2 !=al2->end())){
          break;

        }
        it1++;
        it2++;
      }else {


        if (!al1->find( it1, it2->getElement())){
          //cout << "nicht gefunden " << it2->getElement() << endl;
          if (!(it1 != al1->end() && it2 !=al2->end())){
            break;

          }
          it2++;
        }else {
          if (!(it1 != al1->end() && it2 !=al2->end())){
            break;

          }
          it1++;
          //cout << "gefunden " << it2->getElement() << endl;
        }
      }
    }

    //cout << "-------" << counter << endl;
    if ((double) al1->size() == 0)
      return 0;
    return (double) counter / (double) al1->size();

  }

  al3= new AnalyseList();
  al4= new AnalyseList();
  it1 = al1->begin();
  it2 = al2->begin();


  while (it1 != al1->end()){
    al3->add(it1->getElement());
    it1++;
  }


  while (it2 != al2->end()){
    al4->add(it2->getElement());
    it2++;
  }


  al1=al3;
  al2=al4;

  if (al2->size() > al1->size()){
    __TRACE__
    al= al2;
    al2= al1;
    al1= al;
  }

  it1= al1->begin();
  it2= al2->begin();

  //cout << al1->size() << "  " << al2->size() << endl;

  while ((it1 != al1->end() && it2 !=al2->end())){
    if (isEqual(it1->getElement(), it2->getElement())){
      //cout << "treffer" << it1->getElement() << endl;
      counter++;
      if (!(it1 != al1->end() && it2 !=al2->end())){
        break;

      }
      it1++;
      it2++;
    }else {


      if (!al1->find( it1, it2->getElement())){
        //cout << "nicht gefunden " << it2->getElement() << endl;
        if (!(it1 != al1->end() && it2 !=al2->end())){
          break;

        }
        it2++;
      }else {
        if (!(it1 != al1->end() && it2 !=al2->end())){
          break;

        }
        it1++;
        //cout << "gefunden " << it2->getElement() << endl;
      }
    }
  }

  __TRACE__

  if (al1->size() == 0)
    return (double) 0;

  return (double) counter / (double) al1->size();

}

void HTML::Set(const HTML &h)
{
  FlobIndex tmp;
  const DbArray<FlobIndex> *tmpArray=0;

  int i=0;


  __TRACE__
  if (!h.IsDefined())
    return;
  valid= h.IsValid();
  defined=true;
  DateTime d = h.getLastModified();
  lastChange.SetType(instanttype);
  lastChange.Set(d.GetYear(),d.GetMonth(), d.GetGregDay(), d.GetHour(),
                 d.GetMinute(), d.GetSecond(),d.GetMillisecond());

  URL u(h.getSource());
  sourceURL.Set(true,u);
  string s = h.getContent();
  source.resize( s.length() + 1 );
  source.write(s.c_str(), s.length() + 1);


  string c = h.getContent();
  source.resize (c.length() +1 );
  source.write(c.c_str(),c.length()+1);


  tmpArray=h.getURLS();
  for (i=0; i < tmpArray->Size();i++){
    tmpArray->Get(i,tmp);
    urls.Put(i,  tmp);
  }

  tmpArray=h.getMetainfoKeys();
  for (i=0; i < tmpArray->Size();i++){
    tmpArray->Get(i,tmp);
    metainfoKeys.Put( i, tmp);
  }

  tmpArray=h.getMetainfoContents();
  for (i=0; i < tmpArray->Size();i++){
    tmpArray->Get(i,tmp);
    metainfoContents.Put( i, tmp);
  }
}


int HTML::NumOfFLOBs() const{
  __TRACE__
    return 7;
}

Flob *HTML::GetFLOB(const int i){
//  __TRACE__
  //assert (i < NumOfFLOBs());

  if (i==0)
    return &source;
  if (i==1)
    return &urls;

  if (i==2)
    return &metainfoKeys;

  if (i==3)
    return &metainfoContents;

  if (i==4)
    return &emburls;

  if (i==5)
    return sourceURL.GetFLOB(0);

  if (i==6)
    return sourceURL.GetFLOB(1);

  return NULL;
}

size_t HTML::Sizeof() const{
  return sizeof(HTML);
}

int HTML::Compare(const Attribute*) const{
  return 0;
}

bool HTML::Adjacent (const Attribute*)const{
  return 0;
}

void HTML::SetDefined(bool d) {
  __TRACE__
  defined=d;
}

const DbArray<FlobIndex>* HTML::getURLS() const{
  return &urls;

}

const DbArray<FlobIndex>* HTML::getMetainfoKeys()const{
  return &metainfoKeys;
}

const DbArray<FlobIndex>* HTML::getMetainfoContents() const{
  return &metainfoContents;
}

bool HTML::IsValid() const{
  return valid;
}

void HTML::CopyFrom(const Attribute* right)
{
  __TRACE__
  const HTML *r = (const HTML *)right;
  lastChange = r->getLastModified();
  source.resize( r->source.getSize() );
  char bin[r->source.getSize()];
  r->source.read(bin, r->source.getSize() );
  source.write( bin, r->source.getSize());

  sourceURL.setProtocol( r->getSource().getProtocol());
  sourceURL.setHost( r->getSource().getHost());
  sourceURL.setPath( r->getSource().getPath());
  defined = r->IsDefined();
  valid=true;
  tiefe=0;
  urls.clean();
  metainfoKeys.clean();
  metainfoContents.clean();
  getMetaInfos(bin);
  getUrls(bin);
}

size_t HTML::HashValue(void) const
{
  return 0;
}


const DbArray<FlobIndex>* HTML::getEmbededURLS() const{
  return &emburls;
}

/*

3.3 Class ~Page~

----
Example to create an object:
let page1 = [const page value ((html ((instant (10 10 2006 10 27 18)) <file>/home/sopra/secondo/Algebras/Web/bilder.htm</file---> (url ("http" <text>www.myimages.de</text---> <text>/</text---> )))) ((url ("http" <text>Garten-1.jpg</text---> <text>/</text---> )) <file>/home/sopra/secondo/Algebras/Web/Garten-1.jpg</file---> "image/jpeg")( (url ("http" <text>Garten-2.jpg</text---> <text>/</text---> )) <file>/home/sopra/secondo/Algebras/Web/Garten-2.jpg</file---> "image/jpeg"))]
----

*/


class Page : public HTML
{
  public:
    Page(){}
    ~Page(){}
    Page(const string &s);
    Page(const HTML &);
    Page(const Page &);
    Page(const URL &url, string &mime, string &binFile, DateTime &dt);

    bool operator== (const Page& h) const;
    HTML extractHTML();
    int numOfFiles() const;
    URL getUrl(int i) const;
    string getText( int i) const;
    string getMime( int i) const;
    void addEmbObject(const URL &u, const string &mime, const string &s);

    bool IsDefined() const;
    void SetDefined(bool d) ;
    Flob *GetFLOB(const int i);
    int NumOfFLOBs() const;
    size_t SizeOf() const;
    int Compare(const Attribute*) const;
    bool Adjacent (const Attribute*)const;
    void CopyFrom(const Attribute *arg);
    Page* Clone() const;

    static const string BasicType() { return "page"; }
    static const bool checkType(const ListExpr type){
      return listutils::isSymbol(type, BasicType());
    }

  private:

    /*Class ~HTTPSocket~

      This Page classes inner class is designed to capsulate all details
      of the socket´s implementation and the page request, depending on the
      http protocol. It is an inner private class because, up to now, the
      page class is the only object connecting to the web.

     */
    class HTTPSocket
    {
      public:
        enum HTTPProtocol {HTTP_10, HTTP_11};
        HTTPSocket(string webAddr, string filePath, HTTPProtocol proto,
                   string port);
        inline const string getServerAddress() {return WebAddr;}

        //returns the string represantation of an valid http get request
        const string getGetRequest();
        inline Socket * getSocket() {return s;}
        bool parseHTTPResponse(vector<string> serverResponse);
        inline string getContentType() {return contentType;}
        inline int getContentLength() {return contentLength;}
        inline DateTime getLastModified() {return lastModified;}
        inline bool getSuccessResponded() {return successResponded;}
          inline bool Close() { return s->Close();}
        inline bool getChunked(){ return isChunked;}
      private:
        string WebAddr;
        string FilePath;
        HTTPProtocol Protocol;
        string Port;

        string contentType;
        int contentLength;
        DateTime lastModified;
        DateTime responseDate;
        bool successResponded;
        bool isChunked;

        Socket *s;
        bool setLastModified(string s);
        bool setResponseDate(string s);
        DateTime setDateTime(string s);
        string getMonthNumFromName(string monthName);
    };
  public:
    static string getFromWeb(URL url, string &mime, bool &MimeIsEqual,
                             DateTime &dt, bool onlyHtml = false);
  private:
    struct FLOBIndex
    {
      int offset;
      int len;
    };
    int numOfEmbeddedObjects;
    DbArray<FLOBIndex> embUrlIds;
    Flob embUrls;
    DbArray<FLOBIndex> binIDs;
    Flob binFiles;
    DbArray<FLOBIndex> mimeIDs;
    Flob mimeTypes;

    bool allocateOneElem(int BytesOfData, int BytesOfURL, int BytesOfMime);
    bool allocateSpaceInArray(DbArray<FLOBIndex> *dba, int numOfBytes);
    URL getURLFromString(string &s) const;
    bool checkEmbUrl(URL &u);
    static const int MAXBUFFERSIZE = 1000000;
};

/********************OVERWRITING ATTRIBUTE************************/

bool Page::IsDefined() const
{
  return HTML::IsDefined();
}

void Page::SetDefined(bool d)
{
  HTML::SetDefined(d);
}

Flob* Page::GetFLOB(const int i)
{
  #ifdef _DEBUG_JPS
    cout << "FLOB* Page::GetFLOB(const int i):"  << i << endl;
    cout << HTML::NumOfFLOBs() << endl;
    cout << NumOfFLOBs() << endl;
   #endif
  if (i < (NumOfFLOBs() - HTML::NumOfFLOBs())){
    switch (i)
    {
      case 0: return &embUrlIds;
      case 1: return &embUrls;
      case 2: return &binIDs;
      case 3: return &binFiles;
      case 4: return &mimeIDs;
      case 5: return &mimeTypes;
      default: return NULL;
    }
  }
  if (i < NumOfFLOBs()){
//    __TRACE__
    //cout << " > "<<(i - (NumOfFLOBs() - HTML::NumOfFLOBs())) << endl;
    return HTML::GetFLOB(i - (NumOfFLOBs() - HTML::NumOfFLOBs()));
  }else{
    __TRACE__
    return NULL;
  }
}

int Page::NumOfFLOBs() const
{
  __TRACE__
  return 6 + HTML::NumOfFLOBs();
}

size_t Page::SizeOf() const
{
  return sizeof(Page);
}

int Page::Compare(const Attribute*) const
{
  return 0;
}

bool Page::Adjacent (const Attribute*)const
{
  return false;
}

Page* Page::Clone() const
{
  __TRACE__
  return new Page( *this );
}

void Page::CopyFrom(const Attribute* right)
{
  __TRACE__
  const Page *r = (const Page *)right;
  HTML::CopyFrom(right);

  numOfEmbeddedObjects = 0;
  for( int ii = 0; ii < r->numOfFiles(); ++ii)
  {
    addEmbObject(r->getUrl(ii), r->getMime(ii), r->getText(ii));
  }
}

/*
3.2.1 Implementation of Class-Operations of ~Page~

*/
Page::Page(const string &s)
: HTML(s), numOfEmbeddedObjects(0), embUrlIds(0), embUrls(0),
  binIDs(0), binFiles(0), mimeIDs(0), mimeTypes(0)
{
    #ifdef _DEBUG_JPS_3
  cout << "Page::Page(const string &s)" << endl;
  #endif
  __TRACE__
}

Page::Page(const HTML &h)
: HTML(h), numOfEmbeddedObjects(0), embUrlIds(0), embUrls(0),
  binIDs(0), binFiles(0), mimeIDs(0), mimeTypes(0)
{
  //NOT USED!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  #ifdef _DEBUG_JPS_3
  cout << "Page::Page(const HTML &h)" << endl;
  #endif
  //generate a page object without emb.urls
  //the size of the emb obj. has to set to 0
 __TRACE__
}

Page::Page(const Page &p)
: HTML(p), numOfEmbeddedObjects(0), embUrlIds(0), embUrls(0),
 binIDs(0), binFiles(0), mimeIDs(0), mimeTypes(0)
{
 __TRACE__
  for( int ii = 0; ii < p.numOfFiles(); ++ii)
  {
    addEmbObject(p.getUrl(ii), p.getMime(ii), p.getText(ii));
  }
}

Page::Page(const URL &url, string &mime, string &binFile, DateTime &dt)
: HTML(dt, binFile, url),
  numOfEmbeddedObjects(0), embUrlIds(0), embUrls(0),
  binIDs(0), binFiles(0), mimeIDs(0), mimeTypes(0)
{
  __TRACE__
  #ifdef _DEBUG_JPS
    cout << "Page::Page(const URL &url, string &mime,"
            " string &binFile, DateTime &dt) "
         << HTML::getNumberOfUrls() << endl;
    #endif
  for (int i= 0; i < HTML::getNumberOfUrls(); i++)
  {
    #ifdef _DEBUG_JPS
    cout << "Page::Page(const URL &url, string &mime,"
            " string &binFile, DateTime &dt) " << i<< endl;
    #endif
    URL embUrl(HTML::getUrl(i));//getEmbUrl(i);
    if( checkEmbUrl(embUrl) )
    {
      DateTime dt;
      string theMime;
      bool mustBeEqual = false;
      if (embUrl.getHost() != "error")
      {

      string embCont = getFromWeb(embUrl, theMime, mustBeEqual, dt);
      addEmbObject(embUrl, theMime, embCont);
      }
    }
  }
}

bool Page::checkEmbUrl(URL &u)
{
  string filename = u.getPath();
  int first =filename.rfind(".");
  if (first>0){
    string name = filename.substr(first +1);

    if (name == "jpg" || name == "jpeg" || name == "gif" ||
        name == "bmp" || name == "png" || name =="tif"){
      return true;
    }
  }
  return false;
}

bool Page::operator== (const Page& h) const
{
  __TRACE__
  if (this->numOfFiles() == h.numOfFiles())
  {
    for (int i = 0; i < this->numOfFiles(); i++)
    {
      Page &p = const_cast<Page&>(h);
      Page *self = const_cast<Page*>(this);
      if (!(self->getUrl(i) == p.getUrl(i))) return false;
      if (!(self->getMime(i) == p.getMime(i))) return false;
      if (!(self->getText(i) == p.getText(i))) return false;
    }
    return true;
  }
  return false;
}

HTML Page::extractHTML()
{
  __TRACE__
  return *this;
}

int Page::numOfFiles() const
{
  __TRACE__
  #ifdef _DEBUG_JPS_3
  cout << "Page::numOfFiles()" << numOfEmbeddedObjects <<endl;
  #endif
  return numOfEmbeddedObjects;
}

URL Page::getUrl(int i) const
{
  __TRACE__
  if(i < numOfEmbeddedObjects)
  {
    //Get the right url flobindex..
    FLOBIndex getThisUrl;
    embUrlIds.Get(i, getThisUrl);
    #ifdef _DEBUG_JPS
    //cout << "getUrl: " << (*getThisUrl).offset << endl;
    #endif
    //..and get the url..
    char c[getThisUrl.len];
    embUrls.read(c, getThisUrl.len, getThisUrl.offset);
    string result(c);
    #ifdef _DEBUG_JPS_3
    //cout <<"getUrl: >1ind:" << i << " >2url: " << result <<
     // " >3offset: " << (*getThisUrl).offset <<endl;
    #endif
    return getURLFromString(result);
  }
  return URL("http", "error", "error"); //TODO  Handle this!
}


string Page::getText( int i) const
{
  __TRACE__
  if(i < numOfEmbeddedObjects)
  {
    //Get the right bin index..
    FLOBIndex getThisBin;
    binIDs.Get(i, getThisBin);

    //..and get the bin data..
    char c[getThisBin.len];
    binFiles.read(c, getThisBin.len, getThisBin.offset);
    string result(c);
    #ifdef _DEBUG_JPS_3
    //cout <<"getMime: >1ind:" << i << " >2mime: " <<
      //" >3offset: " << (*getThisMime).offset <<endl;
    #endif
    //result.erase((*getThisMime).len, result.size());
    //erase not needed cause trailing zero was saved
    return result;
  }
  return "Error Page::GetText wrong Index!"; //TODO Handle this!
}

string Page::getMime( int i) const
{
  __TRACE__
  if(i < numOfEmbeddedObjects)
  {
    //Get the right bin index..
    FLOBIndex getThisMime;
    mimeIDs.Get(i, getThisMime);

    //..and get the bin data..
    char c[getThisMime.len];
    mimeTypes.read(c, getThisMime.len, getThisMime.offset);
    string result(c);
    #ifdef _DEBUG_JPS_3
    //cout <<"getMime: >1ind:" << i << " >2mime: " << result <<
      //" >3offset: " << (*getThisMime).offset <<endl;
    #endif
    //result.erase((*getThisMime).len, result.size());
    //erase not needed cause trailing zero was saved
    return result;
  }
  return "Error Page::GetMime wrong Index!"; //TODO Handle this!
}

//Stores embedded object, containing an url, the binaries and the mime-type
void Page::addEmbObject(const URL &u, const string &mime, const string &s)
{
  __TRACE__

  //If the new object is valdid..
  if (u.IsDefined() && (s.size() > 0) && (mime.size() > 0))
  {
    //Create an easy to use string represantation of the url
    string s_url = u.getProtocol() + "://" + u.getHost() + u.getPath();

    if (allocateOneElem(s.size() +1, s_url.size()+1, mime.size()+1))
    {
      /******************URL**********************/
      FLOBIndex insertUrlHere;
      embUrlIds.Get(numOfEmbeddedObjects - 1, insertUrlHere);
      embUrls.write(s_url.c_str(),
                    insertUrlHere.len + 1,
                    insertUrlHere.offset);

      /******************MIME**********************/
      FLOBIndex insertMimeHere;
      mimeIDs.Get(numOfEmbeddedObjects - 1, insertMimeHere);
      mimeTypes.write(mime.c_str(),
                      insertMimeHere.len + 1,
                      insertMimeHere.offset);

      /******************BINARY**********************/
      FLOBIndex insertBinHere;
      binIDs.Get(numOfEmbeddedObjects - 1, insertBinHere);
      binFiles.write(s.c_str(),
                     insertBinHere.len + 1,
                     insertBinHere.offset);
    }
  }
}

bool Page::allocateOneElem(int BytesOfData, int BytesOfURL, int BytesOfMime)
{
  //Inc the number of embedded objects
  __TRACE__
  ++numOfEmbeddedObjects;

  //Prepare the bin and url DBArrays to take the new object..
  __TRACE__
  if (allocateSpaceInArray(&binIDs, BytesOfData)
    && allocateSpaceInArray(&embUrlIds, BytesOfURL)
    && allocateSpaceInArray(&mimeIDs, BytesOfMime))
  {
    //.. and allocate the right amount of memory in the flobs!
    FLOBIndex resizeUrlIndex;
    embUrlIds.Get(numOfEmbeddedObjects - 1, resizeUrlIndex);
    embUrls.resize(embUrls.getSize() + resizeUrlIndex.len + 1);

    FLOBIndex resizeBinIndex;
    binIDs.Get(numOfEmbeddedObjects - 1, resizeBinIndex);
    binFiles.resize(binFiles.getSize() + resizeBinIndex.len + 1);

    FLOBIndex resizeMimeIndex;
    mimeIDs.Get(numOfEmbeddedObjects - 1, resizeMimeIndex);
    mimeTypes.resize(mimeTypes.getSize() + resizeMimeIndex.len + 1);

    return true;
  }

  //Something went wrong - no element can be added (should not occur)!
  --numOfEmbeddedObjects;
  return false;
}


bool Page::allocateSpaceInArray(DbArray<FLOBIndex> *dba, int numOfBytes)
{
  //Get the index and offset of the previous element..
  __TRACE__
  FLOBIndex pIndex;
  if (numOfEmbeddedObjects > 1)
  {
  __TRACE__
    FLOBIndex prevIndex;
    dba->Get(numOfEmbeddedObjects - 2, prevIndex);
    pIndex.offset = prevIndex.offset;
    pIndex.len = prevIndex.len;
  }

  //..or set index and length to 0 if the element is the first!
  else
  {
  __TRACE__
    pIndex.offset = 0;
    pIndex.len = 0;
  }

  //Now we can calculate the new offset and length..
  __TRACE__
  FLOBIndex newIndex;
  newIndex.offset = pIndex.offset + pIndex.len;
  newIndex.len = numOfBytes;

  //..and append it to the DBArray!
  dba->Append(newIndex);
  __TRACE__
  return true;
}

URL Page::getURLFromString(string &s) const
{
  //This method expects the following format:
  //<protocol>://<host>/<path>
  int pos1 = s.find("://", 1);
  if (pos1 != (int)string::npos)
  {
    string s_prot(""), s_myHost(""), s_path("");
    s_prot.append(s, 0, pos1);
    int pos2 = s.find("/", pos1 + 3);
    if (pos2 != (int)string::npos)
    {
      s_myHost.append(s, pos1+3, pos2 - (pos1 + 3));
      s_path.append(s, pos2, s.size());
    }
    else s_myHost.append(s, pos1+3, s.size());
    return URL(s_prot, s_myHost, s_path);
  }
  return *(new URL());
}


/*
3.2.1.1 If the Page as HTML Instance is not defined and the content type is text/html,
the data will be used to fill the instance as html object. Elsewise everything is interpreted as an embedded object of the page instance itself and so it is added as an embedded object.
TODO: The return type must be defined - it will not be a string!!!!!

*/
string Page::getFromWeb(URL url, string &mime, bool &MimeIsEqual,
                       DateTime &dt, bool onlyHtml)
{
  __TRACE__

  //Set the HTTP Protocol
  HTTPSocket::HTTPProtocol httpProt;
  httpProt = HTTPSocket::HTTP_11;

  //Get an Instance of the HTTPSocket class..
  HTTPSocket httpSock(url.getHost(), url.getPath(), httpProt, "80");
  //TODO: only http supported!

  //..and use the os independent socket!
  Socket *s = httpSock.getSocket();

  //Get the corresponding http GET request as a string..
  string req = httpSock.getGetRequest();
  string result("");
  //cout << "http request: " << req << " , " << req.size() << endl;

  if (s->IsOk())
  {

  //..and write it to the socket!
  iostream& io = s->GetSocketStream();
  io << req << endl;

  string line("");
  bool readyForBinData = false;
  vector<string> serverResponse;
  int size = 0;
  int packetsize = 0;
  char byte = 0x00;

  while(s->IsOk())
  {
    if (!readyForBinData) //Server http response not completly received yet..
    {
      getline(io,line);
//      cout << "Line: " << line << endl;
      //..response finalized..
      if (line.find("\r") == 0) //..parse it!
      {
        readyForBinData = httpSock.parseHTTPResponse(serverResponse);
        if (!readyForBinData)
        {
          result = "not ready for response";
          mime = "error";
          Base64 b;
          string binBytes;
          b.encode( result.c_str(), result.size(), binBytes );
          httpSock.Close();
          return binBytes;
        }
        if (mime.size() > 0) //stops and returns false if different mime types
        {
          if((mime.find(httpSock.getContentType(), 0) == string::npos))
          {
            if (MimeIsEqual)
            {
              MimeIsEqual = false;
              httpSock.Close();
              return "";
            }
            MimeIsEqual = false;
          }
        }
        if( onlyHtml )
        {
          mime = httpSock.getContentType();
          if((mime.find(HTML::BasicType()) == string::npos)){
            MimeIsEqual = false;
            httpSock.Close();
            return "";
          }
          onlyHtml = false;
        }
        if( !httpSock.getChunked())
        {
          result.reserve(httpSock.getContentLength()+1);
        }
      }
      else //..append the line to the server´s response!
      {
        serverResponse.push_back(line);
      }
    }
    else //..receive the binary data!
    {
    //  if (size%1000 == 0) cout << "1000 Zeichen gelesen!" << endl;
      if(httpSock.getChunked() && packetsize<=0)
      {
        getline(io,line);
//        cout << line << endl;
        if(line.length()>1)  //perhaps empty line
        {
          //files come in packets of n-bytes
          packetsize = (int)strtol(line.c_str(),NULL,16);
//          cout << "Line Bytes: " << packetsize << endl;
          if(!packetsize){break;}
          result.reserve(result.size() + packetsize);
        }
      }
      else
      {
        io.get(byte);
        if (true)//(s->Read(&byte, 1, 1, 1) > 0)
        {
          result += byte;
          size++;
          if(httpSock.getChunked()) --packetsize;
        }
        else
        {
          //cout << "TIMEOUT nach " << size -1 << " Zeichen!" << endl;
          httpSock.Close();
          break;
        }
        if ((httpSock.getContentLength() > 0) &&
          (size >= httpSock.getContentLength())) {break;}
      }
    }
   }
  mime = httpSock.getContentType();
  dt = httpSock.getLastModified();
  httpSock.Close();
  __TRACE__
  }
  MimeIsEqual = false;
  if( mime.find(HTML::BasicType()) != string::npos)
  {
    MimeIsEqual = true;
  }
  if( !MimeIsEqual )
  {
    //binary data encode base64
    if( !result.size() )
    {
      result = "not found";
      mime = "error";
    }
    Base64 b;
    string binBytes;
    b.encode( result.c_str(), result.size(), binBytes );
    return binBytes;
  }
  else { return result;}
}


/*
3.2.1 Implementation of Class-Operations of ~HTTPSocket~ - private inner class of Page

*/

/*
3.2.1.1 Allocates an os dependent socket and offers an instance of
abstract Socket type, hiding the os dependancy.

*/
Page::HTTPSocket::HTTPSocket(string webAddr, string filePath,
                            HTTPProtocol proto, string port):
  WebAddr(webAddr), FilePath(filePath), Protocol(proto),
          Port(port), contentType(""), contentLength(-1),
          successResponded(false), isChunked(false)
{
  lastModified.SetType(instanttype);
  responseDate.SetType(instanttype);
  s = Socket::Connect(webAddr , port);
}

/*
3.2.1.2 Returns the http get request as const string.

*/
const string Page::HTTPSocket::getGetRequest()
{
  string result("");
  result += "GET " + FilePath;
  (Protocol == HTTP_10) ? result += " HTTP/1.0" : result += " HTTP/1.1";
  result += "\r\nHost: " + WebAddr + ":" + Port + "\r\n";
  return result;
}

/*
3.2.1.3 Extracts the relevant items out of the strings given
by the vector. Will return true if there is no error transmitted
by the server.

Example:
HTTP/1.1 200 OK
Server: Apache/1.3.29 (Unix) PHP/4.3.4
Content-Length: (Größe von infotext.html in Byte)
Last-Modified: Sat, 28 Oct 2006 18:40:44 GMT
Content-Language: de
Content-Type: text/html
Connection: close

*/
bool Page::HTTPSocket::parseHTTPResponse(vector<string> serverResponse)
{
  //cout << "serverresponse:" << endl;
  bool gotLastMod = false;
  bool gotDate = false;
//  bool isChunked = false;

  for (vector<string>::iterator iter = serverResponse.begin();
    iter != serverResponse.end(); iter++)
  {
    //cout << (*iter) << endl;
    //Protocol and error code..
    if ((*iter).find("HTTP/1.0", 0) != string::npos)
    {
      #ifdef _DEBUG_JPS
      cout << "found HTTP/1.0 " << endl;
      #endif
    }

    else if ((*iter).find("HTTP/1.1", 0) != string::npos)
    {
      #ifdef _DEBUG_JPS
      cout << "found HTTP/1.1 " << endl;
      #endif
    }

    if (((*iter).find("200", 0) != string::npos) &&
      ((*iter).find("OK", 0) != string::npos))
    {
      successResponded = true;
      #ifdef _DEBUG_JPS
      cout << "success " << endl;
      #endif
    }

    else if ((*iter).find("Content-Length:", 0) != string::npos)
    {
      int pos = (*iter).find(":", 14);
      if ((pos != (int)string::npos) && (pos < (int)((*iter).size() + 1)))
      {
        string numStr("");
        numStr.assign((*iter), pos + 2, (*iter).size() - pos + 2);
        contentLength = strtol(numStr.c_str(), 0, 10);
        #ifdef _DEBUG_JPS
        cout << "contentLength: " << contentLength << endl;
        #endif
      }
    }

    else if ((*iter).find("Transfer-Encoding: chunked", 0) != string::npos)
    {
      isChunked = true;
      contentLength = -1;
      #ifdef _DEBUG_JPS
      cout << "CHUNKED: contentLength: " << contentLength << endl;
      #endif
    }

    else if ((*iter).find("Content-Type:", 0) != string::npos)
    {
      if ((*iter).find("text/html", 13) != string::npos)
      {
        contentType = "text/html";
        #ifdef _DEBUG_JPS
        cout << "contentType = text/html" << endl;
        #endif
      }

      else //save the Content Type without deeper interpretation!
      {
        contentType.assign((*iter), 14, (*iter).size() - 14);
      }

    }
    else if ((*iter).find("Connection:", 0) != string::npos)
    { //TODO!
      if ((*iter).find("close", 11) != string::npos)
      {}

      else if ((*iter).find("keep-alive", 11) != string::npos)
      {}

    }

    else if ((*iter).find("Last-Modified: ", 0) != string::npos)
    {
      gotLastMod = setLastModified(*iter);
    }

    else if ((*iter).find("Date: ", 0) != string::npos)
    {
      gotDate = setResponseDate(*iter);
    }

  }
  if (successResponded && ((contentType.size() > 0) || isChunked) && gotDate)
  {
    if (!gotLastMod) lastModified = responseDate;
    __TRACE__
    #ifdef _DEBUG_JPS
    cout << "parseHTTPResponse E N D E true!" << endl;
    #endif
    //cout << "serverresponse ende - true:" << endl;
    return true;
  }
  #ifdef _DEBUG_JPS
  cout << "parseHTTPResponse E N D E false!" << endl;
  #endif
  //cout << "serverresponse ende - false:" << endl;
  return false;
}

bool Page::HTTPSocket::setResponseDate(string s)
{
  responseDate = setDateTime(s);
  #ifdef _DEBUG_JPS
    cout << "responseDate: " << responseDate.ToString() << endl;
  #endif
  return true;
}

DateTime Page::HTTPSocket::setDateTime(string s)
{
  /*Convert DayName, day monthName year[4 nums] hh:mm:ss GMT to
    YEAR-MONTH-DAY-HOUR:MIN:SECOND to store it as an DateTime instance!
  */
  DateTime result;
  result.SetType(instanttype);
  int pos = s.find(",", 0);
  int gmtPos = s.find("GMT", 0);
  int dateLength = gmtPos - pos - 3;
  string dtStr("");
  dtStr.assign(s, pos + 2, dateLength);
  #ifdef _DEBUG_JPS_4
  cout << "dtStr.assign: |" << dtStr << "|" << endl;
  #endif

  //will be used to create a DateTime string!
  string dtFormattedString("");

  //..3rd the year..
  string dtElem = "";
  dtElem.assign(dtStr, 7, 4);
  dtFormattedString += dtElem + "-";
  #ifdef _DEBUG_JPS_4
  cout << "year: |" << dtElem << "|" << endl;
  #endif

  //..2nd the month..
  dtElem = "";
  dtElem = getMonthNumFromName(dtStr);
  dtFormattedString += dtElem + "-";
  #ifdef _DEBUG_JPS_4
  cout << "month: |" << dtElem << "|" << endl;
  #endif

  //1st store the day..
  dtElem = "";
  dtElem.assign(dtStr, 0, 2);
  dtFormattedString += dtElem + "-";
  #ifdef _DEBUG_JPS_4
  cout << "day: |" << dtElem << "|" << endl;
  #endif

  //..4th the hour::minutes:seconds
  dtElem = "";
  dtElem.assign(dtStr, 12, 8);
  dtFormattedString += dtElem;
  result.ReadFrom(dtFormattedString);
  #ifdef _DEBUG_JPS_4
  cout << "h:m:s: |" << dtElem << "|" << endl;
  cout << "secondo datetime: |" << dtFormattedString << "|" << endl;
  cout << "dateTime: " << result.ToString() << endl;
  #endif
  return result;
}

bool Page::HTTPSocket::setLastModified(string s)
{
  lastModified = setDateTime(s);
  #ifdef _DEBUG_JPS
  cout << "lastModified: " << lastModified.ToString() << endl;
  #endif
  return true;
}

string Page::HTTPSocket::getMonthNumFromName(string monthName)
{
  if (monthName.find("Jan", 0) != std::string::npos) return "1";
  else if (monthName.find("Feb", 0) != std::string::npos) return "2";
  else if (monthName.find("Mar", 0) != std::string::npos) return "3";
  else if (monthName.find("Apr", 0) != std::string::npos) return "4";
  else if (monthName.find("May", 0) != std::string::npos) return "5";
  else if (monthName.find("Jun", 0) != std::string::npos) return "6";
  else if (monthName.find("Jul", 0) != std::string::npos) return "7";
  else if (monthName.find("Aug", 0) != std::string::npos) return "8";
  else if (monthName.find("Sep", 0) != std::string::npos) return "9";
  else if (monthName.find("Oct", 0) != std::string::npos) return "10";
  else if (monthName.find("Nov", 0) != std::string::npos) return "11";
  else if (monthName.find("Dec", 0) != std::string::npos) return "12";
  return "";
}


/*
4 In/Out, Checking Functions and Type Construction of URL

4.1 List Representation and In/Out Functions of ~URL~

Example: The list representation of a URL is

STRING First, text Second, text Third
where First Protocoll i.e. http or ftp
      Second Host i.e "//www.google.de"
      Third Path i.e. /


*/

ListExpr
OutURL( ListExpr typeInfo, Word value )
{
  __TRACE__
//  cout << *((URL*)(value.addr)) << endl;
  return ((URL*)(value.addr))->ToListExpr(false);
}

Word
InURL( const ListExpr typeInfo, const ListExpr instance,
              const int errorPos, ListExpr& errorInfo, bool& correct )
{
  __TRACE__
  if ( nl->ListLength( instance ) == 3 )
  {
    ListExpr First = nl->First(instance);
    ListExpr Second = nl->Second(instance);
    ListExpr Third = nl->Third(instance);

    if ( nl->IsAtom(First) && nl->AtomType(First) == StringType
      && nl->IsAtom(Second) && nl->AtomType(Second) == TextType
      && nl->IsAtom(Third) && nl->AtomType(Third) == TextType )
    {
      string prot = nl->StringValue(First);
      string host = nl->Text2String(Second);
      string path = nl->Text2String(Third);
      {
      if( host.length() >= 2 && host[0] == '/' && host[1] == '/')
      {
        host = host.c_str() + 2;
      }
      correct = true;
      URL* newUrl = new URL(prot, host, path);
      return SetWord(newUrl);
      }
    }
   else
   {
    if( !nl->IsAtom(First)) ErrorReporter::ReportError("First not an atom");
    if( !nl->IsAtom(Second)) ErrorReporter::ReportError("Second not an atom");
    if( !nl->IsAtom(Third)) ErrorReporter::ReportError("Third not an atom");
    if (!(nl->AtomType(First) == StringType))
        ErrorReporter::ReportError("First not a StringType");
    if (!(nl->AtomType(Second) == TextType))
        ErrorReporter::ReportError("Second not a TextType");
    if (!(nl->AtomType(Third) == TextType))
        ErrorReporter::ReportError("Third not a TextType");
    correct = false;
    return SetWord(Address(0));
   }
  }
  ErrorReporter::ReportError("Wrong number of"
                             " params, expecting protocol,host,path");
  correct = false;
  return SetWord(Address(0));
}

Word
CreateURL( const ListExpr typeInfo )
{
  __TRACE__
  return (SetWord( new URL( "http://" ) ));
}

void
DeleteURL( const ListExpr typeInfo, Word& w )
{
  __TRACE__
//  ((URL*)w.addr)->destroy();
  delete (URL *)w.addr;
  w.addr = 0;
}

void
CloseURL( const ListExpr typeInfo, Word& w )
{
  __TRACE__
  delete (URL *)w.addr;
//  w.addr = 0;
}

Word
CloneURL( const ListExpr typeInfo, const Word& w )
{
  __TRACE__
  return SetWord( ((URL *)w.addr)->Clone() );
}

int
SizeOfURL()
{
  __TRACE__
  return sizeof(URL);
}

/*

4.2 Kind Checking Function and Property of ~URL~

This function checks whether the type constructor is applied correctly.

*/
bool
CheckURL( ListExpr type, ListExpr& errorInfo )
{
  __TRACE__
  return (nl->IsEqual( type, URL::BasicType() ));
}

ListExpr
URLProperty()
{
  __TRACE__
  return (nl->TwoElemList(
            nl->FiveElemList(nl->StringAtom("Signature"),
                       nl->StringAtom("Example Type List"),
           nl->StringAtom("List Rep"),
           nl->StringAtom("Example List"),
           nl->StringAtom("Remarks")),
            nl->FiveElemList(nl->StringAtom("-> DATA"),
                       nl->StringAtom(URL::BasicType()),
           nl->StringAtom("(<protocol> <host> <path>)"),
           nl->StringAtom("(http //dict.leo.org /)"),
           nl->StringAtom("prot.: STRING<46 bytes, host, path"
           "type text."))));
}

void* CastURL( void* addr ) {return (new (addr) URL);}

/*
4.3 Creation of the Type Constructor Instance of ~URL~

*/
TypeConstructor url(   URL::BasicType(),
                URLProperty,
                OutURL, InURL,
                0, 0,
                CreateURL, DeleteURL,
                OpenAttribute<URL>, SaveAttribute<URL>,
                CloseURL, CloneURL,
                CastURL, SizeOfURL,
                CheckURL );


/*
5 In/Out, Checking Functions and Type Construction of HTML

5.1 List Representation and In/Out Functions of ~HTML~

Example: The list representation of a HTML is

Listenformat: ( datetime text url )
Atribute: LastChange, source, sourceURL
Example:

----
let html1 = [const html value ((instant (10 10 2006 10 27 18)) <text>test</text---> (url ("http" <text>www.xx.de</text---> <text>/</text---> )))]
----

*/

ListExpr
OutHTML( ListExpr typeInfo, Word value )
{
  __TRACE__
  return ((HTML*)(value.addr))->ToListExpr(false);
}

Word
InHTML( const ListExpr typeInfo, const ListExpr instance,
              const int errorPos, ListExpr& errorInfo, bool& correct )
{
  __TRACE__
  if ( nl->ListLength( instance ) == 3 )
  {
    ListExpr First = nl->First(instance);    //DateTime
    ListExpr Second = nl->Second(instance);  //Text (FLOB)
    ListExpr Third = nl->Third(instance);    //URL

    if ( nl->ListLength( First ) == 2
     && nl->IsEqual(nl->First(First), Instant::BasicType())
      && nl->IsAtom(Second) && nl->AtomType(Second) == TextType
      && nl->ListLength( Third ) == 2
      && nl->IsEqual(nl->First(Third), URL::BasicType()))
    {
      DateTime date(instanttype);
    date.ReadFrom(First,true);
      string text = nl->Text2String(Second);
//    cout << "Text: " << text << endl;
  __TRACE__

    Base64 b;
    int sizeDecoded = b.sizeDecoded( text.size() );
    char *bytes = (char *)malloc( sizeDecoded + 1);

    int result = b.decode( text, bytes );

    assert( result <= sizeDecoded );
    bytes[result] = 0;
    //cout << "Size: " << result << endl;
    //cout << "Dekodiert: " << bytes << endl;
    text = bytes;
    free( bytes );
    //cout << "Text: " << text << endl;
    //cout << "Size Text: " << text.size() << endl;
  __TRACE__
    correct = true;
    //string out;
    //nl->WriteToString(out, Third);
    //cout << "Typ Third: " << out << endl;
    Word u = InURL( Third, nl->Second(Third),errorPos,errorInfo, correct );
    URL *url;
    if( correct)
    {
      url = (URL*)u.addr;
      {
        //cout << " in html " << url->IsDefined() << endl;
        HTML* newHtml = new HTML(date, text, *url);
        return SetWord(newHtml);
      }
    }
    else
    {
        ErrorReporter::ReportError("Error in reading url in InHTML");
      return SetWord(Address(0));
    }
    }
   else
   {
  __TRACE__
    if( nl->ListLength( First ) != 2 )
             ErrorReporter::ReportError("First not an list of length 2");
    else if( !nl->IsAtom(Second))
             ErrorReporter::ReportError("Second not an atom");
    else if( nl->ListLength( Third ) != 2)
             ErrorReporter::ReportError("Third not a list of length 2");
    else if (!(nl->IsEqual(nl->First(First), Instant::BasicType())))
        ErrorReporter::ReportError("First not an instant");
    else if (!(nl->AtomType(Second) == TextType))
        ErrorReporter::ReportError("Second not a TextType");
    else //if (!(nl->IsEqual(nl->First(Third), URL::BasicType())))
        ErrorReporter::ReportError("Third not a url");
    correct = false;
    return SetWord(Address(0));
   }
  }
  __TRACE__
  ErrorReporter::ReportError("Wrong number of params, expecting"
                             " lastModified,source,sourceUrl");
  correct = false;
  return SetWord(Address(0));
}

Word
CreateHTML( const ListExpr typeInfo )
{
  __TRACE__
  return (SetWord( new HTML( "" ) ));
}

void
DeleteHTML( const ListExpr typeInfo, Word& w )
{
  __TRACE__
  delete (HTML *)w.addr;
  w.addr = 0;
}

void
CloseHTML( const ListExpr typeInfo, Word& w )
{
  __TRACE__
  delete (HTML *)w.addr;
  w.addr = 0;
}

Word
CloneHTML( const ListExpr typeInfo, const Word& w )
{
  __TRACE__
  return SetWord( ((HTML *)w.addr)->Clone() );
}

int
SizeOfHTML()
{
  __TRACE__
  return sizeof(HTML);
}

/*

5.2 Kind Checking Function and Property of ~HTML~

This function checks whether the type constructor is applied correctly.

*/
bool
CheckHTML( ListExpr type, ListExpr& errorInfo )
{
  __TRACE__
  return (nl->IsEqual( type, HTML::BasicType() ));
}

ListExpr
HTMLProperty()
{
  __TRACE__
  return (nl->TwoElemList(
            nl->FiveElemList(nl->StringAtom("Signature"),
                       nl->StringAtom("Example Type List"),
           nl->StringAtom("List Rep"),
           nl->StringAtom("Example List"),
           nl->StringAtom("Remarks")),
            nl->FiveElemList(nl->StringAtom("-> DATA"),
                       nl->StringAtom(HTML::BasicType()),
           nl->StringAtom("(<datetime: lastchange><text source> <url>)"),
           nl->StringAtom("(list representation)"),
           nl->StringAtom("url has the type url"))));
}

void* CastHTML( void* addr ) {return (new (addr) HTML);}

/*
5.3 Creation of the Type Constructor Instance of ~HTML~

*/
TypeConstructor html(   HTML::BasicType(),
                HTMLProperty,
                OutHTML, InHTML,
                0, 0,
                CreateHTML, DeleteHTML,
                OpenAttribute<HTML>, SaveAttribute<HTML>,
                CloseHTML, CloneHTML,
                CastHTML, SizeOfHTML,
                CheckHTML );


/*
6 In/Out, Checking Functions and Type Construction of Page

5.1 List Representation and In/Out Functions of ~Page~

Example: The list representation of a Page is

Listenformat: (html (url text string)*)
Atribute: html wird geerbt , (EmbededURL binFile mime)*
Example:

----
see at the top of the class Page
----

*/

ListExpr
OutPage( ListExpr typeInfo, Word value )
{
  __TRACE__
  Page* pPage = (Page*)(value.addr);
  int noObjects = pPage->numOfFiles();
  ListExpr pageList = nl->OneElemList(((HTML*)pPage)->ToListExpr(true));
  ListExpr pageStart = pageList;
  for( int ii=0; ii<noObjects; ii++)
  {
  __TRACE__
    pageList = nl->Append( pageList, nl->ThreeElemList(
     pPage->getUrl(ii).ToListExpr(true),
     nl->TextAtom(pPage->getText( ii)),
      nl->StringAtom(pPage->getMime( ii))));

  }
  __TRACE__
  return pageStart;
}

Word
InPage( const ListExpr typeInfo, const ListExpr instance,
              const int errorPos, ListExpr& errorInfo, bool& correct )
{
  __TRACE__
  if ( nl->ListLength( instance ) >= 1
        && nl->ListLength( nl->First(instance) ) == 2
        && nl->IsEqual(nl->First(nl->First(instance)), HTML::BasicType()))
  {
    ListExpr First = nl->First(instance);    //html
   int nrOfEmb = nl->ListLength(instance) - 1;
   correct = true;
   Word h = InHTML( First, nl->Second(First),errorPos,errorInfo, correct );
   if( correct)
   {
     HTML *html = (HTML*)h.addr;
    Page *newpage = new Page(*html);
    First = nl->Rest(instance);
    //now lists of (url text string)
    for( int ii=0; ii < nrOfEmb; ii++)
    {
      ListExpr emblist = nl->First(First);
      First = nl->Rest(First);

      if ( nl->ListLength( emblist ) == 3
        && nl->IsEqual(nl->First(nl->First(emblist)), URL::BasicType())
        && nl->IsAtom(nl->Second(emblist))
        && nl->AtomType(nl->Second(emblist)) == TextType
        && nl->IsAtom(nl->Third(emblist))
        && nl->AtomType(nl->Third(emblist)) == StringType)
      {
        Word u = InURL( nl->First(emblist),
          nl->Second(nl->First(emblist)),errorPos,errorInfo, correct );
        if( correct)
        {
          URL *url = (URL*)u.addr;
          string text = nl->Text2String(nl->Second(emblist));
          string mime = nl->StringValue(nl->Third(emblist));
          newpage->addEmbObject(*url,mime,text);
          delete url;
          url = NULL;
        }
        else
        {
          __TRACE__
          ErrorReporter::ReportError("emb obj has not"
                                     " the right list structure");
          return SetWord(Address(0));
        }
      }
      else
      {
        __TRACE__
        correct = false;
        return SetWord(Address(0));
      }
    }
    return SetWord(newpage);
   }
   else
   {
     __TRACE__
    ErrorReporter::ReportError("page has no correct html as first element");
    return SetWord(Address(0));
   }
  }
  __TRACE__
  ErrorReporter::ReportError("Wrong number of params or not a html"
                             " as first, expecting html,(url,text, string)*");
  correct = false;
  return SetWord(Address(0));
}

Word
CreatePage( const ListExpr typeInfo )
{
  __TRACE__
  return (SetWord( new Page( "" ) ));
}

void
DeletePage( const ListExpr typeInfo, Word& w )
{
  __TRACE__
  delete (Page *)w.addr;
  w.addr = 0;
}

void
ClosePage( const ListExpr typeInfo, Word& w )
{
  __TRACE__
  delete (Page *)w.addr;
  w.addr = 0;
}

Word
ClonePage( const ListExpr typeInfo, const Word& w )
{
  __TRACE__
  return SetWord( ((Page *)w.addr)->Clone() );
}

int
SizeOfPage()
{
  __TRACE__
  return sizeof(Page);
}

/*

5.2 Kind Checking Function and Property of ~Page~

This function checks whether the type constructor is applied correctly.

*/
bool
CheckPage( ListExpr type, ListExpr& errorInfo )
{
  __TRACE__
  return (nl->IsEqual( type, Page::BasicType() ));
}

ListExpr
PageProperty()
{
  __TRACE__
  return (nl->TwoElemList(
            nl->FiveElemList(nl->StringAtom("Signature"),
                       nl->StringAtom("Example Type List"),
           nl->StringAtom("List Rep"),
           nl->StringAtom("Example List"),
           nl->StringAtom("Remarks")),
            nl->FiveElemList(nl->StringAtom("-> DATA"),
                       nl->StringAtom(Page::BasicType()),
           nl->StringAtom("(<html>(<url text string>)*)"),
           nl->StringAtom("(list representation)"),
           nl->StringAtom("<url text mimetype> are the embedded objects"))));
}

void* CastPage( void* addr ) {return (new (addr) Page);}

/*
5.3 Creation of the Type Constructor Instance of ~Page~

*/
TypeConstructor page(   Page::BasicType(),
                PageProperty,
                OutPage, InPage,
                0, 0,
                CreatePage, DeletePage,
                OpenAttribute<Page>, SaveAttribute<Page>,
                ClosePage, ClonePage,
                CastPage, SizeOfPage,
                CheckPage );


/*
6 Creating Operators

6.1.1 Type Mapping of Operator ~protocol,host,filename~

*/
ListExpr
protocolHostFilenameTypeMap( ListExpr args)
{
  __TRACE__
  if ( nl->ListLength(args) == 1 )
  {
    ListExpr arg1 = nl->First(args);
    if ( nl->IsEqual(arg1, URL::BasicType()) )
      return nl->SymbolAtom(FText::BasicType());
  }
  return nl->SymbolAtom(Symbol::TYPEERROR());
}

/*
6.1.1 Type Mapping of Operator ~source~

*/
ListExpr
sourceTypeMap( ListExpr args)
{
  __TRACE__
  if ( nl->ListLength(args) == 1 )
  {
    ListExpr arg1 = nl->First(args);
    if ( nl->IsEqual(arg1, HTML::BasicType()) ||
         nl->IsEqual(arg1,Page::BasicType()))
      return nl->SymbolAtom(URL::BasicType());
  }
  return nl->SymbolAtom(Symbol::TYPEERROR());
}

/*
6.1.2 Type Mapping of Operator ~createurl~

*/
ListExpr
createurlTypeMap( ListExpr args)
{
  __TRACE__
  if ( nl->ListLength(args) == 1 )
  {
    ListExpr arg1 = nl->First(args);
    if ( nl->IsEqual(arg1, FText::BasicType()))
      return nl->SymbolAtom(URL::BasicType());
  }
  return nl->SymbolAtom(Symbol::TYPEERROR());
}

/*
6.1.3 Type Mapping of Operator ~content~

*/
ListExpr
contentTypeMap( ListExpr args)
{
  __TRACE__
  if ( nl->ListLength(args) == 1 )
  {
    ListExpr arg1 = nl->First(args);
    if ( nl->IsEqual(arg1, HTML::BasicType()))
      return nl->SymbolAtom(FText::BasicType());
  }
  return nl->SymbolAtom(Symbol::TYPEERROR());
}

/*
6.1.4 Type Mapping of Operator ~urls~

*/
ListExpr
urlsTypeMap( ListExpr args)
{
  __TRACE__
  if ( nl->ListLength(args) == 1 )
  {
    ListExpr arg1 = nl->First(args);
    if ( nl->IsEqual(arg1, HTML::BasicType()) ||
         nl->IsEqual(arg1,Page::BasicType()))
      return nl->TwoElemList(nl->SymbolAtom(Symbol::STREAM()),
                             nl->SymbolAtom(URL::BasicType()));
  }
  return nl->SymbolAtom(Symbol::TYPEERROR());
}

/*
6.1.5 Type Mapping of Operator ~containsurl~

*/
ListExpr
containsurlTypeMap( ListExpr args)
{
  __TRACE__
  if ( nl->ListLength(args) == 2 )
  {
    ListExpr arg1 = nl->First(args);
    ListExpr arg2 = nl->Second(args);
    if ( (nl->IsEqual(arg1, HTML::BasicType()) ||
          nl->IsEqual(arg1,Page::BasicType()))
       && nl->IsEqual(arg2,URL::BasicType()))
      return nl->SymbolAtom(CcBool::BasicType());
  }
  return nl->SymbolAtom(Symbol::TYPEERROR());
}

/*
6.1.6 Type Mapping of Operator ~last_modified~
----
----

*/
ListExpr
lastmodifiedTypeMap( ListExpr args)
{
  __TRACE__
  if ( nl->ListLength(args) == 1 )
  {
    ListExpr arg1 = nl->First(args);
    if ( nl->IsEqual(arg1, HTML::BasicType()))
      return nl->SymbolAtom(Instant::BasicType());
  }
  return nl->SymbolAtom(Symbol::TYPEERROR());
}

/*
6.1.7 Type Mapping of Operator ~metainfo~

*/
ListExpr
metainfoTypeMap( ListExpr args)
{
  __TRACE__
  if ( nl->ListLength(args) == 2 )
  {
    ListExpr arg1 = nl->First(args);
    ListExpr arg2 = nl->Second(args);
    if ( nl->IsEqual(arg1, HTML::BasicType())  &&
         nl->IsEqual(arg2,CcString::BasicType()))
      return nl->SymbolAtom(FText::BasicType());
  }
  return nl->SymbolAtom(Symbol::TYPEERROR());
}

/*
6.1.8 Type Mapping of Operator ~metainfos~

*/
ListExpr
metainfosTypeMap( ListExpr args)
{
  __TRACE__
  if ( nl->ListLength(args) == 1 )
  {
    ListExpr arg1 = nl->First(args);
    if ( nl->IsEqual(arg1, HTML::BasicType()))
   {
     ListExpr attrList =
      nl->OneElemList(nl->TwoElemList(nl->SymbolAtom("Key"),
        nl->SymbolAtom(CcString::BasicType())));
      nl->Append(attrList,nl->TwoElemList(nl->SymbolAtom("Content"),
        nl->SymbolAtom(FText::BasicType())));

      return nl->TwoElemList(nl->SymbolAtom(Symbol::STREAM()),
     nl->TwoElemList(nl->SymbolAtom(Tuple::BasicType()),attrList));
    }
  }
  return nl->SymbolAtom(Symbol::TYPEERROR());
}

/*
6.1.9 Type Mapping of Operator ~number_of~

*/
ListExpr
numberofTypeMap( ListExpr args)
{
  __TRACE__
  if ( nl->ListLength(args) == 2 )
  {
    ListExpr arg1 = nl->First(args);
    ListExpr arg2 = nl->Second(args);
    if ( nl->IsEqual(arg1, HTML::BasicType())  &&
         nl->IsEqual(arg2,CcString::BasicType()))
      return nl->SymbolAtom(CcInt::BasicType());
  }
  return nl->SymbolAtom(Symbol::TYPEERROR());
}

/*
6.1.10 Type Mapping of Operator ~similar~

*/
ListExpr
similarTypeMap( ListExpr args)
{
  __TRACE__
  if ( nl->ListLength(args) == 4 )
  {
    ListExpr arg1 = nl->First(args);
    ListExpr arg2 = nl->Second(args);
    ListExpr arg3 = nl->Third(args);
    ListExpr arg4 = nl->Fourth(args);
    if ( nl->IsEqual(arg1, HTML::BasicType())  &&
         nl->IsEqual(arg2,HTML::BasicType())
     && nl->IsEqual(arg3,CcInt::BasicType()) &&
        nl->IsEqual(arg4,CcBool::BasicType()))
      return nl->SymbolAtom(CcReal::BasicType());
  }
  return nl->SymbolAtom(Symbol::TYPEERROR());
}

/*
6.1.11 Type Mapping of Operator ~extracthtml~

*/
ListExpr
extracthtmlTypeMap( ListExpr args)
{
  __TRACE__
  if ( nl->ListLength(args) == 1 )
  {
    ListExpr arg1 = nl->First(args);
    if ( nl->IsEqual(arg1, Page::BasicType()))
      return nl->SymbolAtom(HTML::BasicType());
  }
  return nl->SymbolAtom(Symbol::TYPEERROR());
}

/*
6.1.12 Type Mapping of Operator ~numoffiles~

*/
ListExpr
numoffilesTypeMap( ListExpr args)
{
  __TRACE__
  if ( nl->ListLength(args) == 1 )
  {
    ListExpr arg1 = nl->First(args);
    if ( nl->IsEqual(arg1, Page::BasicType()))
      return nl->SymbolAtom(CcInt::BasicType());
  }
  return nl->SymbolAtom(Symbol::TYPEERROR());
}

/*
6.1.13 Type Mapping of Operator ~getfiles~

*/
ListExpr
getfilesTypeMap( ListExpr args)
{
  __TRACE__
  if ( nl->ListLength(args) == 1 )
  {
    ListExpr arg1 = nl->First(args);
    if ( nl->IsEqual(arg1, Page::BasicType()))
   {
     ListExpr attrList =
      nl->OneElemList(nl->TwoElemList(nl->SymbolAtom("Source"),
        nl->SymbolAtom(URL::BasicType())));
    ListExpr lastAttrList = attrList;
    lastAttrList =
      nl->Append(lastAttrList,nl->TwoElemList(nl->SymbolAtom("Type"),
        nl->SymbolAtom(CcString::BasicType())));
    nl->Append(lastAttrList,nl->TwoElemList(nl->SymbolAtom("File"),
        nl->SymbolAtom(BinaryFile::BasicType())));

      return nl->TwoElemList(nl->SymbolAtom(Symbol::STREAM()),
     nl->TwoElemList(nl->SymbolAtom(Tuple::BasicType()),attrList));
    }
  }
  return nl->SymbolAtom(Symbol::TYPEERROR());
}

/*
6.1.14 Type Mapping of Operator ~wget~

*/
ListExpr
wgetTypeMap( ListExpr args)
{
    __TRACE__
  if ( nl->ListLength(args) == 4 || nl->ListLength(args) == 5 )
  {
    ListExpr arg1 = nl->First(args);
    ListExpr arg2 = nl->Second(args);
    ListExpr arg3 = nl->Third(args);
    ListExpr arg4 = nl->Fourth(args);
   if( nl->ListLength(args) == 5  )
   {
     ListExpr arg5 = nl->  Fifth(args);
    if (nl->IsAtom(arg5)
        || nl->ListLength(arg5) != 3
        || !nl->IsEqual(nl->First(arg5), Symbol::MAP())
        || !nl->IsEqual(nl->Second(arg5), URL::BasicType())
        || !nl->IsEqual(nl->Third(arg5), CcBool::BasicType()) )
    {
      string out;
      nl->WriteToString(out, arg5);
      ErrorReporter::ReportError("Operator wget expects a "
        "(map -> bool) as its fifth argument. "
        "The second argument provided "
        "has type '" + out + "' instead.");
      return nl->SymbolAtom(Symbol::TYPEERROR());
    }
  }

  __TRACE__

    if ( nl->IsEqual(arg1, URL::BasicType()) &&
         nl->IsEqual(arg2, CcBool::BasicType())
     && nl->IsEqual(arg3, CcInt::BasicType()) &&
        nl->IsEqual(arg4, FText::BasicType()))
   {
  __TRACE__
     ListExpr attrList =
      nl->OneElemList(nl->TwoElemList(nl->SymbolAtom("Source"),
        nl->SymbolAtom(URL::BasicType())));
    ListExpr lastAttrList = attrList;
    lastAttrList =
      nl->Append(lastAttrList,nl->TwoElemList(nl->SymbolAtom("Type"),
        nl->SymbolAtom(CcString::BasicType())));
    nl->Append(lastAttrList,nl->TwoElemList(nl->SymbolAtom("File"),
        nl->SymbolAtom(BinaryFile::BasicType())));

      return nl->TwoElemList(nl->SymbolAtom(Symbol::STREAM()),
     nl->TwoElemList(nl->SymbolAtom(Tuple::BasicType()),attrList));
    }
  }
  __TRACE__
  return nl->SymbolAtom(Symbol::TYPEERROR());
}

/*
6.1.15 Type Mapping of Operator ~pageget~

*/
ListExpr
pagegetTypeMap( ListExpr args)
{
  __TRACE__
  if ( nl->ListLength(args) == 4 || nl->ListLength(args) == 5 )
  {
    ListExpr arg1 = nl->First(args);
    ListExpr arg2 = nl->Second(args);
    ListExpr arg3 = nl->Third(args);
    ListExpr arg4 = nl->Fourth(args);
   if( nl->ListLength(args) == 5  )
   {
     ListExpr arg5 = nl->Fifth(args);
    if (nl->IsAtom(arg5)
        || nl->ListLength(arg5) != 3
        || !nl->IsEqual(nl->First(arg5), Symbol::MAP())
        || !nl->IsEqual(nl->Second(arg5), URL::BasicType())
        || !nl->IsEqual(nl->Third(arg5), CcBool::BasicType()) )
    {
      string out;
      nl->WriteToString(out, arg5);
      ErrorReporter::ReportError("Operator pageget expects a "
        "(map -> bool) as its fifth argument. "
        "The second argument provided "
        "has type '" + out + "' instead.");
      return nl->SymbolAtom(Symbol::TYPEERROR());
    }
  }

  __TRACE__

    if ( nl->IsEqual(arg1, URL::BasicType()) &&
         nl->IsEqual(arg2, CcBool::BasicType())
     && nl->IsEqual(arg3, CcInt::BasicType()) &&
        nl->IsEqual(arg4, FText::BasicType()))
   {
  __TRACE__
     ListExpr attrList =
      nl->OneElemList(nl->TwoElemList(nl->SymbolAtom("Source"),
        nl->SymbolAtom(URL::BasicType())));
      nl->Append(attrList,nl->TwoElemList(nl->SymbolAtom("Page"),
        nl->SymbolAtom(Page::BasicType())));

      return nl->TwoElemList(nl->SymbolAtom(Symbol::STREAM()),
     nl->TwoElemList(nl->SymbolAtom(Tuple::BasicType()),attrList));
    }
  }
  __TRACE__
  return nl->SymbolAtom(Symbol::TYPEERROR());
}

/*
6.1.15 Type Mapping of Operator ~htmlget~

*/
ListExpr
htmlgetTypeMap( ListExpr args)
{
  __TRACE__
  if ( nl->ListLength(args) == 4 || nl->ListLength(args) == 5 )
  {
    ListExpr arg1 = nl->First(args);
    ListExpr arg2 = nl->Second(args);
    ListExpr arg3 = nl->Third(args);
    ListExpr arg4 = nl->Fourth(args);
   if( nl->ListLength(args) == 5  )
   {
     ListExpr arg5 = nl->Fifth(args);
    if (nl->IsAtom(arg5)
        || nl->ListLength(arg5) != 3
        || !nl->IsEqual(nl->First(arg5), Symbol::MAP())
        || !nl->IsEqual(nl->Second(arg5), URL::BasicType())
        || !nl->IsEqual(nl->Third(arg5), CcBool::BasicType()) )
    {
      string out;
      nl->WriteToString(out, arg5);
      ErrorReporter::ReportError("Operator htmlget expects a "
        "(map -> bool) as its fifth argument. "
        "The second argument provided "
        "has type '" + out + "' instead.");
      return nl->SymbolAtom(Symbol::TYPEERROR());
    }
  }

  __TRACE__

    if ( nl->IsEqual(arg1, URL::BasicType()) &&
         nl->IsEqual(arg2, CcBool::BasicType())
     && nl->IsEqual(arg3, CcInt::BasicType()) &&
        nl->IsEqual(arg4, FText::BasicType()))
   {
  __TRACE__
     ListExpr attrList =
      nl->OneElemList(nl->TwoElemList(nl->SymbolAtom("Source"),
        nl->SymbolAtom(URL::BasicType())));
      nl->Append(attrList,nl->TwoElemList(nl->SymbolAtom("Html"),
        nl->SymbolAtom(HTML::BasicType())));

      return nl->TwoElemList(nl->SymbolAtom(Symbol::STREAM()),
     nl->TwoElemList(nl->SymbolAtom(Tuple::BasicType()),attrList));
    }
  }
  __TRACE__
  return nl->SymbolAtom(Symbol::TYPEERROR());
}

/*
6.1.16 Type Mapping of Operator ~webequal =:~

*/
ListExpr
webequalTypeMap( ListExpr args)
{
  __TRACE__
  if ( nl->ListLength(args) == 2 )
  {
    ListExpr arg1 = nl->First(args);
    ListExpr arg2 = nl->Second(args);
    if ( (nl->IsEqual(arg1, URL::BasicType()) &&
          nl->IsEqual(arg2,URL::BasicType()))
     || (nl->IsEqual(arg1, HTML::BasicType()) &&
         nl->IsEqual(arg2,HTML::BasicType()))
       || (nl->IsEqual(arg1, Page::BasicType())&&
           nl->IsEqual(arg2,Page::BasicType())))
      return nl->SymbolAtom(CcBool::BasicType());
  }
  return nl->SymbolAtom(Symbol::TYPEERROR());
}

/*
6.2 Value Mapping and Selection Functions

6.2.1 Value Mapping Function for Operator ~protocol~

*/
int
protocolFun (Word* args, Word& result, int message, Word& local, Supplier s)
{
  __TRACE__
  URL* u = ((URL*)args[0].addr);

  result = qp->ResultStorage(s);  //query processor has provided
            //a result instance to take the result

  ((FText*)result.addr)->Set(true, u->getProtocol().c_str());
            //the first argument says the
            //value is defined, the second is the
            //real value)
  return 0;
}

/*
6.2.2 Value Mapping Function for Operator ~host~

*/
int
hostFun (Word* args, Word& result, int message, Word& local, Supplier s)
{
  __TRACE__
  URL* u = ((URL*)args[0].addr);

  result = qp->ResultStorage(s);  //query processor has provided
            //a result instance to take the result

  ((FText*)result.addr)->Set(true, u->getHost().c_str());
            //the first argument says the
            //value is defined, the second is the
            //real value)
  return 0;
}

/*
6.2.3 Value Mapping Function for Operator ~filename~

*/
int
filenameFun (Word* args, Word& result, int message, Word& local, Supplier s)
{
  __TRACE__
  URL* u = ((URL*)args[0].addr);

  result = qp->ResultStorage(s);  //query processor has provided
            //a result instance to take the result

  ((FText*)result.addr)->Set(true, u->getPath().c_str());
            //the first argument says the boolean
            //value is defined, the second is the
            //real value)
  return 0;
}

/*
6.2.4 Value Mapping Function for Operator ~source~

*/
int
sourceFun (Word* args, Word& result, int message, Word& local, Supplier s)
{
  __TRACE__
  HTML* h = ((HTML*)args[0].addr);

  result = qp->ResultStorage(s);  //query processor has provided
            //a result instance to take the result

  URL *u = new URL(h->getSource());
  __TRACE__
  ((URL*)result.addr)->Set(true, *u);
            //the first argument says the boolean
            //value is defined, the second is the
            //real value)
  __TRACE__
  delete u;
  return 0;
}

/*
6.2.5 Value Mapping Function for Operator ~createurl~

*/
int
createurlFun (Word* args, Word& result, int message, Word& local, Supplier s)
{
  __TRACE__
  FText* t = ((FText*)args[0].addr);

  result = qp->ResultStorage(s);  //query processor has provided
            //a result instance to take the result

  const char *str =  t->Get();
  URL u("");
  string sUrl = str;
  bool erg = URL::urlFromString(sUrl,u);
  //the function has to return a url. From every string
  //it has to return a valid url
  ((URL*)result.addr)->Set(erg, u);
            //the first argument says the
            //value is defined, the second is the
            //real value)
  return 0;
}

/*
6.2.6 Value Mapping Function for Operator ~content~

*/
int
contentFun (Word* args, Word& result, int message, Word& local, Supplier s)
{
  __TRACE__
  HTML* h = ((HTML*)args[0].addr);

  result = qp->ResultStorage(s);  //query processor has provided
            //a result instance to take the result

  ((FText*)result.addr)->Set(true, h->getText().c_str());
            //the first argument says the
            //value is defined, the second is the
            //real value)
  return 0;
}

/*
6.2.7 Value Mapping Function for Operator ~urls~

*/
int
urlsFun (Word* args, Word& result, int message, Word& local, Supplier s)
{
  __TRACE__
  HTML* h = ((HTML*)args[0].addr);
  struct UrlAdvance {int numberOf, current;}* urladvance;

  switch( message )
  {
    case OPEN:

      urladvance = new UrlAdvance;
      urladvance->current = 0;
      urladvance->numberOf =  h->getNumberOfUrls();

      local.addr = urladvance;

      return 0;

    case REQUEST:

      urladvance = ((UrlAdvance*) local.addr);

      if ( urladvance->current < urladvance->numberOf )
      {
        URL *elem = new URL((h->getUrl(urladvance->current++)));
        result.addr = elem;
        return YIELD;
      }
      else return CANCEL;

    case CLOSE:

      urladvance = ((UrlAdvance*) local.addr);
      delete urladvance;
      return 0;
  }
  /* should not happen */
  return -1;
}

/*
6.2.8 Value Mapping Function for Operator ~containsurl~

*/
int
containsurlFun (Word* args, Word& result, int message,
                 Word& local, Supplier s)
{
  __TRACE__
  HTML* h = ((HTML*)args[0].addr);
  URL* u = ((URL*)args[1].addr);

   result = qp->ResultStorage(s);  //query processor has provided
            //a result instance to take the result

  ((CcBool*)result.addr)->Set(true, h->containsURL(u));
            //the first argument says the boolean
            //value is defined, the second is the
            //real value)
  return 0;
}

/*
6.2.9 Value Mapping Function for Operator ~lastmodified~

*/
int
lastmodifiedFun (Word* args, Word& result, int message,
                 Word& local, Supplier s)
{
  __TRACE__
  HTML* h = ((HTML*)args[0].addr);

  result = qp->ResultStorage(s);  //query processor has provided
            //a result instance to take the result

  DateTime d = h->getLastModified();
  ((DateTime*)result.addr)->Set(d.GetYear(),d.GetMonth(), d.GetGregDay(),
            d.GetHour(), d.GetMinute(), d.GetSecond(),d.GetMillisecond());
  return 0;
}

/*
6.2.10 Value Mapping Function for Operator ~metainfo~

*/
int
metainfoFun (Word* args, Word& result, int message, Word& local, Supplier s)
{
  __TRACE__
  HTML* h = ((HTML*)args[0].addr);
  string key = StdTypes::GetString(args[1]);

  result = qp->ResultStorage(s);  //query processor has provided
            //a result instance to take the result

  ((FText*)result.addr)->Set(true, h->getMetaInfo(key).c_str());
  return 0;
}

/*
6.2.11 Value Mapping Function for Operator ~metainfos~

*/
int
metainfosFun (Word* args, Word& result, int message, Word& local, Supplier s)
{
  __TRACE__
  HTML* h = ((HTML*)args[0].addr);
  struct MiAdvance {int numberOf, current;
                    TupleType *resultTupleType;}* miAdvance;

  ListExpr resultType;

  switch( message )
  {
    case OPEN:

      miAdvance = new MiAdvance;
      miAdvance->current = 0;
      miAdvance->numberOf =  h->getNumberOfMetainfos();
    resultType = GetTupleResultType( s );
    miAdvance->resultTupleType = new TupleType( nl->Second( resultType ));
      local.addr = miAdvance;

      return 0;

    case REQUEST:

      miAdvance = ((MiAdvance*) local.addr);

      if ( miAdvance->current < miAdvance->numberOf )
      {
      string content;
      string key = h->getMetainfo(miAdvance->current++,content);
      //make tuple [Key: string, Content: text]
      Tuple *elem = new Tuple( miAdvance->resultTupleType );
      STRING_T skey;
      strcpy(skey, key.c_str());
      CcString* cckey = new CcString(true,&skey);
      elem->PutAttribute(0,cckey);
      FText *t = new FText(true,content.c_str());
      elem->PutAttribute(1,t);
      result.addr = elem;
      return YIELD;
      }
      else return CANCEL;

    case CLOSE:

      miAdvance = ((MiAdvance*) local.addr);
    miAdvance->resultTupleType->DeleteIfAllowed();
      delete miAdvance;
      return 0;
  }
  /* should not happen */
  return -1;
}

/*
6.2.12 Value Mapping Function for Operator ~numberof~

*/
int
numberofFun (Word* args, Word& result, int message, Word& local, Supplier s)
{
  __TRACE__
  HTML* h = ((HTML*)args[0].addr);
  string key = StdTypes::GetString(args[1]);

  result = qp->ResultStorage(s);  //query processor has provided
            //a result instance to take the result

  ((CcInt*)result.addr)->Set(true, h->getNumberOf(key));
  return 0;
}

/*
6.2.13 Value Mapping Function for Operator ~similar~

*/
int
similarFun (Word* args, Word& result, int message, Word& local, Supplier s)
{
  __TRACE__
  HTML* h1 = ((HTML*)args[0].addr);
  HTML* h2 = ((HTML*)args[1].addr);
  int tiefe = StdTypes::GetInt(args[2]);
  bool doFollowOrder = StdTypes::GetBool(args[3]);

  result = qp->ResultStorage(s);  //query processor has provided
            //a result instance to take the result

  ((CcReal*)result.addr)->Set(true, h1->similar(h2,tiefe,doFollowOrder));
  __TRACE__
  return 0;
}

/*
6.2.14 Value Mapping Function for Operator ~extracthtml~

*/
int
extracthtmlFun (Word* args, Word& result, int message,
                Word& local, Supplier s)
{
  __TRACE__
  Page* p = ((Page*)args[0].addr);

  result = qp->ResultStorage(s);  //query processor has provided
            //a result instance to take the result

  HTML h( p->extractHTML());
  ((HTML*)result.addr)->Set(h);
  return 0;
}

/*
6.2.15 Value Mapping Function for Operator ~numoffiles~

*/
int
numoffilesFun (Word* args, Word& result, int message,
               Word& local, Supplier s)
{
  __TRACE__
  Page* p = ((Page*)args[0].addr);

  result = qp->ResultStorage(s);  //query processor has provided
            //a result instance to take the result

  ((CcInt*)result.addr)->Set(true, p->numOfFiles());
  return 0;
}

/*
6.2.16 Value Mapping Function for Operator ~getfiles~

*/
int
getfilesFun (Word* args, Word& result, int message,
             Word& local, Supplier s)
{
  __TRACE__
  ListExpr resultType;
  Page* p = ((Page*)args[0].addr);
  struct EmbAdvance {int numberOf, current;
                     TupleType *resultTupleType;}* embAdvance;

  switch( message )
  {
    case OPEN:

      embAdvance = new EmbAdvance;
      embAdvance->current = 0;
      embAdvance->numberOf =  p->numOfFiles();
    resultType = GetTupleResultType( s );
    embAdvance->resultTupleType = new TupleType( nl->Second( resultType ) );

      local.addr = embAdvance;

      return 0;

    case REQUEST:

      embAdvance = ((EmbAdvance*) local.addr);

      if ( embAdvance->current < embAdvance->numberOf )
      {
      URL *u = new URL((p->getUrl(embAdvance->current)));
      string type = p->getMime( embAdvance->current);
      string src = p->getText( embAdvance->current++);

      //make tuple [Source: url, Type: string, File: binfile]
      Tuple *elem = new Tuple( embAdvance->resultTupleType );
      elem->PutAttribute(0,u);
      STRING_T stype;
      strcpy(stype, type.c_str());
      CcString* cctype = new CcString(true,&stype);
      elem->PutAttribute(1,cctype);
      //BinaryFile *file = new BinaryFile( src.length()+1 );
      //file->Put(0,src.length()+1,src.c_str());
      BinaryFile *file = new BinaryFile( 0 );
      file->Decode(src);
      elem->PutAttribute(2,file);
      result.addr = elem;
      return YIELD;
      }
      else return CANCEL;

    case CLOSE:

      embAdvance = ((EmbAdvance*) local.addr);
    embAdvance->resultTupleType->DeleteIfAllowed();
      delete embAdvance;
      return 0;
  }
  /* should not happen */
  return -1;
}

/*
6.2.17.1 class definitions for hashtable for operators wget and pageget

*/
class HashUrl
{
private:
  static const size_t NO_BUCKETS = 50;//255;
  size_t nBuckets;

  vector<vector< string*> > *bucketsU;

  size_t GetHashVal(string* s)
  {
    int size = 0;
    for( unsigned int i = 0; i < s->length(); i++)
    {
      size += (*s)[i];
    }
    return size % nBuckets;
  }

  void ClearBucketsU()
  {

    vector< vector<string*> >::iterator iterBuckets = bucketsU->begin();

    while(iterBuckets != bucketsU->end() )
    {
      vector<string*>::iterator iter = (*iterBuckets).begin();
      while(iter != (*iterBuckets).end())
      {
       delete *iter;
        iter++;
      }
      iterBuckets++;
    }
   }

public:
  HashUrl()
  {
    nBuckets = NO_BUCKETS;
    bucketsU = new vector< vector< string*> >(nBuckets);
  }

  ~HashUrl()
  {
    ClearBucketsU();
  }

  bool IsDuplicate( string &s)
  {
    //prüft ob sring schon im Hash ist
    //Wenn ja wird true returnt,
    //sonst false und der übergeb.String wird eingefügt

   char* str = new char[s.length() + 1];
   char *pstr = str;
   const char* ps = s.c_str();
   while ((*pstr++ = toupper(*ps++)) != 0);
   string *hashstring = new string(str);
   delete[] str;

    size_t hashVal = GetHashVal(hashstring);
   //cout << "Wert: " << hashVal << "Hash: " << *hashstring << endl;
    vector<string*>::iterator iter = (*bucketsU)[hashVal].begin();
    while(iter != (*bucketsU)[hashVal].end())
    {
     //cout << "iter: " << **iter << endl;
     if( **iter == *hashstring)
    {
      return true; //Die Strings sind gleich
    }
      iter++;
    }
    //hier daher kein gleiches gefunden
    (*bucketsU)[hashVal].push_back(hashstring);
    return false;
  }
};

/*
6.2.19 Selection functions  for Operator ~wget, pageget, htmlget~

*/
int webwget_pagegetSelect( ListExpr args)
{
  if ( nl->ListLength(args) == 4  )
    return(0);
  if ( nl->ListLength(args) == 5  )
    return(1);
  return(-1); //This point should never be reached
}


/*
6.2.17 Value Mapping Functions for Operator ~wget~

*/

struct PageAdvance {int numberOfEmb, currentEmb,
                numberOfLinks,currentLink; Page *p;};

int
wgetFun (Word* args, Word& result, int message, Word& local, Supplier s,
  bool hasFunction)
{
  ListExpr resultType;

  struct GetAdvance {stack<PageAdvance*>* myDepthStack;
                HashUrl *myHash; TupleType *resultTupleType;
              int depth; bool isnew;; string *host;}* getAdvance;
  __TRACE__

  switch( message )
  {
    case OPEN:
    {
  __TRACE__
      getAdvance = new GetAdvance;
      getAdvance->myHash = new HashUrl;
      getAdvance->myDepthStack = new stack<PageAdvance*>;
      resultType = GetTupleResultType( s );
      getAdvance->resultTupleType = new TupleType( nl->Second( resultType ) );
      getAdvance->depth = 0;
      getAdvance->isnew = true;
      FText* t = ((FText*)args[3].addr);
      URL* u = ((URL*)args[0].addr);
      string s = t->Get();
      if( s.length() > 0)
      {
        getAdvance->host = new string(u->getHost() + "," + t->Get());
      }
      else
      {
        getAdvance->host = new string(u->getHost());
      }

      local.addr = getAdvance;
    }
      return 0;

    case REQUEST:
     //cout << "In wget Request" << endl;
  __TRACE__
    {
      getAdvance = ((GetAdvance*) local.addr);
    PageAdvance *pa = NULL;
    bool extLinks = StdTypes::GetBool(args[1]);
    int depth = StdTypes::GetInt(args[2]);
    bool isUnlimited = (depth < 0);
    URL *exturl = NULL;
    if( !getAdvance->myDepthStack->empty() )
    {
      pa = getAdvance->myDepthStack->top();
    }
    while( !exturl && pa)
    {
  __TRACE__
      while ( pa && pa->currentEmb < pa->numberOfEmb )
      {
    __TRACE__
        URL *u = new URL((pa->p->getUrl(pa->currentEmb)));
        string type = pa->p->getMime( pa->currentEmb);
        string src = pa->p->getText( pa->currentEmb++);

        string hashstring = u->getProtocol() + ":"
          + u->getHost() + u->getPath();
        if( !getAdvance->myHash->IsDuplicate(hashstring) )
        {
          cout << *u << endl;
          //make tuple [Source: url, Type: string, File: binfile]
          Tuple *elem = new Tuple( getAdvance->resultTupleType );
          elem->PutAttribute(0,u);
          STRING_T stype;
          strcpy(stype, type.c_str());
          CcString* cctype = new CcString(true,&stype);
          elem->PutAttribute(1,cctype);
          BinaryFile *file = new BinaryFile( 0 );
          if( src.length() )
            file->Decode(src);
          elem->PutAttribute(2,file);
          result.addr = elem;
          return YIELD;
        }
        else
        {
          delete u;
          u = 0;
        }
      }
      //check if there is a link (a href) to load
      //after the emb obj. are handelt
      while( !exturl && pa && pa->currentLink < pa->numberOfLinks  )
      {
        //check if the right host und check if the
        //url is not loaded before with the hash.
        //Also check of the function
        bool hostOk = true;
        URL *checkUrl = new URL((pa->p->getUrlHosts(pa->currentLink++,
            *getAdvance->host,hostOk)));
        cout << *checkUrl << endl;


        if( checkUrl->IsDefined() && hostOk)
        {
  __TRACE__
          string hashstring = checkUrl->getProtocol() + "://"
            + checkUrl->getHost() + checkUrl->getPath();
          if(!getAdvance->myHash->IsDuplicate(hashstring))
          {
            cout << "Defined and host o.k. and not duplicate" << endl;
            if( hasFunction )
            {
              ArgVectorPointer funargs = qp->Argument(args[4].addr);
              (*funargs)[0] = SetWord(checkUrl);
              Word funresult;
              qp->Request(args[4].addr, funresult);
              bool funerg;
              if (((Attribute*)funresult.addr)->IsDefined())
              {
                funerg = ((CcBool*)funresult.addr)->GetBoolval();
              }
              else
                funerg = false;

              if( funerg)
              {
                exturl = checkUrl;
              }
              else
              {
                delete checkUrl;
                checkUrl = NULL;
              }
            }
            else
              exturl = checkUrl;
          }
          else
          {
            delete checkUrl;
            checkUrl = NULL;
          }
        }
        else {
          delete checkUrl;
          checkUrl = NULL;
        }
      }
      if( !exturl )
      {
        delete pa->p;
        delete pa;
        pa = 0;
        getAdvance->myDepthStack->pop();
        --getAdvance->depth;
        if( !getAdvance->myDepthStack->empty() )
        {
          pa = getAdvance->myDepthStack->top();
        }
      }
    }
    if(getAdvance->isnew || exturl)
    {
  __TRACE__
      //load the URL und make Page-Objekt if is HTML
      //else return the loaded file

      URL* u;
      if(getAdvance->isnew)
      {
  __TRACE__
        u = ((URL*)args[0].addr);
        /*if( hasFunction )
        {
          ArgVectorPointer funargs = qp->Argument(args[4].addr);
          (*funargs)[0] = args[0];
          Word funresult;
          qp->Request(args[4].addr, funresult);
          bool funerg;
          if (((Attribute*)funresult.addr)->IsDefined())
          {
            funerg = ((CcBool*)funresult.addr)->GetBoolval();
          }
          else
            funerg = false;

          if( !funerg)
          {
            return CANCEL;
          }
        }*/

        string hashstring = u->getProtocol() + "://"
          + u->getHost() + u->getPath();
        getAdvance->myHash->IsDuplicate(hashstring);
        getAdvance->isnew = false;
        exturl = new URL(*u);
      }
      u = exturl;
      string type;// = "text/html";
      bool isHtml = false;
      DateTime dt;
      cout << "load url from web" << endl;
      string src = Page::getFromWeb(*u, type, isHtml, dt);
      //cout << "ready loading url" << endl;
      #ifdef _DEBUG_JPS_2
      cout << "DEBUG_JPS_2" << src  << "DEBUG_JPS_2 ends"<< endl;
      #endif
      Tuple *elem = new Tuple( getAdvance->resultTupleType );
      elem->PutAttribute(0,u);
      STRING_T stype;
      strcpy(stype, type.c_str());
      CcString* cctype = new CcString(true,&stype);
      elem->PutAttribute(1,cctype);
      if( !isHtml && (int)type.find(HTML::BasicType()) != -1)
        isHtml = true;
      cout << "isHTML: " << isHtml << ", " << type << endl;
      BinaryFile *file;
      if( isHtml )
      {
        file = new BinaryFile( src.length()+1 );
        file->Put(0,src.length()+1,src.c_str());
      }
      else
      {
        file = new BinaryFile( 0 );
        if( src.length() )
          file->Decode(src);
      }
      elem->PutAttribute(2,file);
      result.addr = elem;

      if( isHtml)
      {
  __TRACE__
        //make page object of the html data
        //const char* s = 0;
        //file->Get(0, &s);
        //string str = s;
        DateTime dt;
        Page *p = new Page(*u, type, src, dt);
        PageAdvance *pa = new PageAdvance();
        pa->numberOfEmb = p->numOfFiles();
        if( extLinks && (isUnlimited || getAdvance->depth < depth ))
          pa->numberOfLinks = p->getNumberOfUrls();
        else
          pa->numberOfLinks = 0;
        cout << "Links: " << pa->numberOfLinks << endl;
        pa->currentEmb = 0;
        pa->currentLink = 0;
        pa->p = p;
        ++getAdvance->depth;
        getAdvance->myDepthStack->push(pa);
      }
      return YIELD;
    }
      else
      return CANCEL;

    }

    case CLOSE:
  __TRACE__
    {
      getAdvance = ((GetAdvance*) local.addr);
    delete getAdvance->myHash;
    getAdvance->myHash = 0;
    delete getAdvance->host;
    getAdvance->host = 0;
    while( !getAdvance->myDepthStack->empty())
    {
      PageAdvance *pa = getAdvance->myDepthStack->top();
      if( pa->p)
        delete pa->p;
      delete pa;
      pa = 0;
      getAdvance->myDepthStack->pop();
    }
    delete getAdvance->myDepthStack;
    getAdvance->myDepthStack = 0;
    getAdvance->resultTupleType->DeleteIfAllowed();
      delete getAdvance;
    getAdvance = 0;
      return 0;
    }
  }
  /* should not happen */
  return -1;
}

int
ISWebWgetFourParam (Word* args, Word& result, int message,
                    Word& local, Supplier s)
{
  return wgetFun(args,result,message,local,s,false);
}
int
ISWebWgetFiveParam (Word* args, Word& result, int message,
                    Word& local, Supplier s)
{
  return wgetFun(args,result,message,local,s,true);
}

/*
6.2.18 Value Mapping Function for Operator ~pageget, htmlget~

*/
int
pagegetFun (Word* args, Word& result, int message, Word& local, Supplier s,
  bool hasFunction, bool onlyhtml)
{
//to check with map not ready
  ListExpr resultType;

  struct GetAdvance {stack<PageAdvance*>* myDepthStack;
                HashUrl *myHash; TupleType *resultTupleType;
              int depth; bool isnew; string *host;}* getAdvance;
  __TRACE__

  switch( message )
  {
    case OPEN:
    {
  __TRACE__
      getAdvance = new GetAdvance;
      getAdvance->myHash = new HashUrl;
      getAdvance->myDepthStack = new stack<PageAdvance*>;
      resultType = GetTupleResultType( s );
      getAdvance->resultTupleType = new TupleType( nl->Second( resultType ) );
      getAdvance->depth = 0;
      getAdvance->isnew = true;
      FText* t = ((FText*)args[3].addr);
      URL* u = ((URL*)args[0].addr);
      string s = t->Get();
      if( s.length() > 0)
      {
        getAdvance->host = new string(u->getHost() + "," + t->Get());
      }
      else
      {
        getAdvance->host = new string(u->getHost());
      }

      local.addr = getAdvance;
    }
      return 0;

    case REQUEST:
  __TRACE__
    {
      getAdvance = ((GetAdvance*) local.addr);
    PageAdvance *pa = NULL;
    bool extLinks = StdTypes::GetBool(args[1]);
    int depth = StdTypes::GetInt(args[2]);
    bool isUnlimited = (depth < 0);
    URL *exturl = NULL;
    while( getAdvance->isnew || !getAdvance->myDepthStack->empty() )
    {
  __TRACE__
      if( !getAdvance->myDepthStack->empty() )
        pa = getAdvance->myDepthStack->top();

      while( !exturl && pa)
      {
    __TRACE__
        //check if there is a link (a href) to load
        //after the emb obj. are handelt
        while( !exturl && pa->currentLink < pa->numberOfLinks)
        {
          //check if the right host und check if the
          //url is not loaded before with the hash.
          //Also check of the function
          bool hostOk = true;
          URL *checkUrl = new URL((pa->p->getUrlHosts(pa->currentLink++,
              *getAdvance->host,hostOk)));
          //cout << *checkUrl << endl;
          cout << ".";
          if( checkUrl->IsDefined() && hostOk)
          {
            string hashstring = checkUrl->getProtocol() + "://"
              + checkUrl->getHost() + checkUrl->getPath();
            if(!getAdvance->myHash->IsDuplicate(hashstring))
            {
              //cout << "Defined and host o.k. and not duplicate" << endl;
              cout << hashstring << endl;
              if( hasFunction )
              {
                ArgVectorPointer funargs = qp->Argument(args[4].addr);
                (*funargs)[0] = SetWord(checkUrl);
                Word funresult;
                qp->Request(args[4].addr, funresult);
                bool funerg;
                if (((Attribute*)funresult.addr)->IsDefined())
                {
                  funerg = ((CcBool*)funresult.addr)->GetBoolval();
                }
                else
                  funerg = false;

                if( funerg)
                {
                  exturl = checkUrl;
                }
                else
                {
                  delete checkUrl;
                  checkUrl = NULL;
                }
              }
              else
                exturl = checkUrl;
            }
            else
            {
              delete checkUrl;
              checkUrl = NULL;
            }
          }
          else
          {
            delete checkUrl;
            checkUrl = NULL;
          }
        }
        if( !exturl )
        {
          delete pa->p;
          delete pa;
          pa = 0;
          getAdvance->myDepthStack->pop();
          --getAdvance->depth;
          if( !getAdvance->myDepthStack->empty() )
          {
            pa = getAdvance->myDepthStack->top();
          }
        }
      }
      if(getAdvance->isnew || exturl)
      {
  __TRACE__
        //load the URL und make Page-Objekt if is HTML
        //else return the loaded file
        URL* u;
        if(getAdvance->isnew)
        {
  __TRACE__
          u = ((URL*)args[0].addr);
          /*if( hasFunction )
          {
            ArgVectorPointer funargs = qp->Argument(args[4].addr);
            (*funargs)[0] = args[0];
            Word funresult;
            qp->Request(args[4].addr, funresult);
            bool funerg;
            if (((Attribute*)funresult.addr)->IsDefined())
            {
              funerg = ((CcBool*)funresult.addr)->GetBoolval();
            }
            else
              funerg = false;

            if( !funerg)
            {
  __TRACE__
              return CANCEL;
            }
          }*/

          string hashstring = u->getProtocol() + "://"
            + u->getHost() + u->getPath();
          getAdvance->myHash->IsDuplicate(hashstring);
          getAdvance->isnew = false;
          exturl = new URL(*u);
          cout << *u << endl;
        }
        u = exturl;
        string type;// = "text/html";
        bool isHtml = false;
          DateTime dt(instanttype);
    __TRACE__
        cout << "load url from web" << endl;
        string src = Page::getFromWeb(*u, type, isHtml, dt, true);
        //cout << "ready loading url" << endl;

//    __TRACE__
        if( !isHtml && (int)type.find(HTML::BasicType()) != -1)
          isHtml = true;
        cout << "isHTML: " << isHtml << ", " << type << endl;

        if( isHtml)
        {
    __TRACE__
          //make page or html object depends on value onlyhtml
          //of the html data
          Page *p;
          PageAdvance *pa = new PageAdvance();
          if( onlyhtml )
          {
            HTML h(dt, src, *u);

            p = new Page( h );
            //cout << "Inhalt" << p->getContent() << endl;
             pa->numberOfEmb = 0;
          }
          else
          {
            p = new Page(*u, type, src, dt);
            pa->numberOfEmb = p->numOfFiles();
          }
          if( extLinks && (isUnlimited || getAdvance->depth < depth ))
            pa->numberOfLinks = p->getNumberOfUrls();
          else
            pa->numberOfLinks = 0;
          cout << "Links: " << pa->numberOfLinks << endl << endl;
          pa->currentEmb = 0;
          pa->currentLink = 0;
          pa->p = p;
          ++getAdvance->depth;
          getAdvance->myDepthStack->push(pa);

          Tuple *elem = new Tuple( getAdvance->resultTupleType );
          if( onlyhtml )
          {
            HTML *hh = (HTML*)p;
            elem->PutAttribute(0,u);
            elem->PutAttribute(1,new HTML(*hh));
          }
          else
          {
            elem->PutAttribute(0,u);
            elem->PutAttribute(1,new Page(*p));
          }

          result.addr = elem;
          return YIELD;
        }
        else
        {
          pa = NULL;
          delete exturl;
          exturl = NULL;
        }
      }
    }

    return CANCEL;

    }

    case CLOSE:
  __TRACE__
    {
      getAdvance = ((GetAdvance*) local.addr);
    delete getAdvance->myHash;
    getAdvance->myHash = 0;
    delete getAdvance->host;
    getAdvance->host = 0;
    while( !getAdvance->myDepthStack->empty())
    {
      PageAdvance *pa = getAdvance->myDepthStack->top();
      if( pa->p)
        pa->p->DeleteIfAllowed();
      delete pa;
      pa = 0;
      getAdvance->myDepthStack->pop();
    }
    delete getAdvance->myDepthStack;
    getAdvance->myDepthStack = 0;
    getAdvance->resultTupleType->DeleteIfAllowed();
      delete getAdvance;
    getAdvance = 0;
      return 0;
    }
  }
  /* should not happen */
  __TRACE__
  return -1;
}

int
ISWebPagegetFourParam (Word* args, Word& result, int message,
                       Word& local, Supplier s)
{
  return pagegetFun(args,result,message,local,s,false,false);
}
int
ISWebPagegetFiveParam (Word* args, Word& result, int message,
                       Word& local, Supplier s)
{
  return pagegetFun(args,result,message,local,s,true,false);
}
int
ISWebHtmlgetFourParam (Word* args, Word& result, int message,
                       Word& local, Supplier s)
{
  return pagegetFun(args,result,message,local,s,false,true);
}
int
ISWebHtmlgetFiveParam (Word* args, Word& result, int message,
                       Word& local, Supplier s)
{
  return pagegetFun(args,result,message,local,s,true,true);
}

/*
6.2.19 Selection functions  for Operator ~webequal~

*/
int webequalSelect( ListExpr args)
{
  ListExpr arg1 = nl->First( args);
  ListExpr arg2 = nl->Second( args);
  if ( nl->IsEqual(arg1, URL::BasicType()) &&
       nl->IsEqual(arg2, URL::BasicType()) )
    return(0);
  if ( nl->IsEqual(arg1, HTML::BasicType()) &&
       nl->IsEqual(arg2, HTML::BasicType()) )
    return(1);
  if ( nl->IsEqual(arg1, Page::BasicType()) &&
       nl->IsEqual(arg2, Page::BasicType()) )
    return(2);
  return(-1); //This point should never be reached
}

/*
6.2.20 Value Mapping Functions for Operators ~webequal~

*/
int
ISWebequalUrlFun (Word* args, Word& result, int message,
                  Word& local, Supplier s)
{
  __TRACE__
  URL* u1 = ((URL*)args[0].addr);
  URL* u2 = ((URL*)args[1].addr);

  result = qp->ResultStorage(s);  //query processor has provided
            //a result instance to take the result

  ((CcBool*)result.addr)->Set(true, *u1 == *u2);
  return 0;
}

int
ISWebequalHtmlFun (Word* args, Word& result, int message,
                   Word& local, Supplier s)
{
  __TRACE__
  HTML* h1 = ((HTML*)args[0].addr);
  HTML* h2 = ((HTML*)args[1].addr);

  result = qp->ResultStorage(s);  //query processor has provided
            //a result instance to take the result

  ((CcBool*)result.addr)->Set(true, *h1 == *h2);
  return 0;
}

int
ISWebequalPageFun (Word* args, Word& result, int message,
                   Word& local, Supplier s)
{
  __TRACE__
  Page* p1 = ((Page*)args[0].addr);
  Page* p2 = ((Page*)args[1].addr);

  result = qp->ResultStorage(s);  //query processor has provided
            //a result instance to take the result

  ((CcBool*)result.addr)->Set(true, *p1 == *p2);
  return 0;
}

/*
6.2.21 Value Mapping Array for Operators ~webequal, wget, pageget,htmlget~

*/
ValueMapping webequalMap[] =
{ISWebequalUrlFun,ISWebequalHtmlFun,ISWebequalPageFun};
ValueMapping webwgetMap[] =
{ISWebWgetFourParam,ISWebWgetFiveParam};
ValueMapping webpagegetMap[] =
{ISWebPagegetFourParam,ISWebPagegetFiveParam};
ValueMapping webhtmlgetMap[] =
{ISWebHtmlgetFourParam,ISWebHtmlgetFiveParam};


/*
6.3 Specifications

6.3.1 Specification of Operator ~protocol~

*/

const string protocolSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
       "\"Example\" ) "
       "( <text>(url) -> text</text--->"
       "<text>protocol( url )</text--->"
       "<text>Returns the protocol of the url</text--->"
       "<text>protocol( url1 )</text--->"
       ") )";

/*
6.3.2 Specification of Operator ~host~

*/
const string hostSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
       "\"Example\" ) "
       "( <text>(url) -> text</text--->"
       "<text>host( url )</text--->"
       "<text>Returns the host of the url</text--->"
       "<text>host( url1 )</text--->"
       ") )";

/*
6.3.3 Specification of Operator ~filename~

*/
const string filenameSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
       "\"Example\" ) "
       "( <text>(url) -> text</text--->"
       "<text>filename( url )</text--->"
       "<text>Returns the filename with path</text--->"
       "<text>filename( url1 )</text--->"
       ") )";

/*
6.3.4 Specification of Operator ~source~

*/
const string sourceSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
       "\"Example\" ) "
       "( <text>(html or page) -> url</text--->"
       "<text>source( html/page )</text--->"
       "<text>Returns the url of the html/page</text--->"
       "<text>source( html1 )</text--->"
       ") )";

/*
6.3.5 Specification of Operator ~createurl~

*/
const string createurlSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
       "\"Example\" ) "
       "( <text>(text) -> url</text--->"
       "<text>createurl( text )</text--->"
       "<text>Creates an url of the given text</text--->"
       "<text>createurl(text.../text--- )</text--->"
       ") )";

/*
6.3.6 Specification of Operator ~content~

*/
const string contentSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
       "\"Example\" ) "
       "( <text>(html) -> text</text--->"
       "<text>content( html )</text--->"
       "<text>Returns the content without tags</text--->"
       "<text>content(html1)</text--->"
       ") )";

/*
6.3.7 Specification of Operator ~urls~

*/
const string urlsSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
       "\"Example\" ) "
       "( <text>(html or page) -> stream(url)</text--->"
       "<text>urls( html/page )</text--->"
       "<text>Returns all urls of the given object</text--->"
       "<text>urls(html1)</text--->"
       ") )";

/*
6.3.8 Specification of Operator ~containsurl~

*/
const string containsurlSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
       "\"Example\" ) "
       "( <text>(html or page x url) -> bool</text--->"
       "<text>containsurl( html/page, url )</text--->"
       "<text>Checks if the given html contains the given url</text--->"
       "<text>containsurl(html1,url1)</text--->"
       ") )";

/*
6.3.9 Specification of Operator ~lastmodified~

*/
const string lastmodifiedSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
       "\"Example\" ) "
       "( <text>(html) -> instant</text--->"
       "<text>lastmodified( html )</text--->"
       "<text>Returns the last modified date of the given html</text--->"
       "<text>lastmodified(html1)</text--->"
       ") )";

/*
6.3.10 Specification of Operator ~metainfo~

*/
const string metainfoSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
       "\"Example\" ) "
       "( <text>(html x string) -> text</text--->"
       "<text>metainfo( html, string )</text--->"
       "<text>Returns the metainfo for the key or an empty string</text--->"
       "<text>metainfo(html1, \"content\")</text--->"
       ") )";

/*
6.3.11 Specification of Operator ~metainfos~

*/
const string metainfosSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
       "\"Example\" ) "
       "( <text>(html) -> stream(tuple([Key:string,Content:text]))</text--->"
       "<text>metainfos( html )</text--->"
       "<text>Returns all metainfos of the given html with key</text--->"
       "<text>metainfos(html1)</text--->"
       ") )";

/*
6.3.12 Specification of Operator ~numberof~

*/
const string numberofSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
                         "\"Example\" ) "
                         "( <text>(html x string)-> int</text--->"
       "<text>numberof( html, string )</text--->"
       "<text>counts the given string in the html</text--->"
       "<text>numberof(html1,\"test\")</text--->"
       ") )";

/*
6.3.13 Specification of Operator ~similar~

*/
const string similarSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
       "\"Example\" ) "
       "( <text>(html x html x int x bool) -> real</text--->"
       "<text>similar( html,html,depth,follow order )</text--->"
       "<text>calc.how similar the two htmls are to the given depth</text--->"
       "<text>similar(html1,html2,0,true)</text--->"
       ") )";


/*
6.3.14 Specification of Operator ~extracthtml~

*/
const string extracthtmlSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
                         "\"Example\" ) "
                         "( <text>page -> html</text--->"
       "<text>extracthtml( page )</text--->"
       "<text>returns the html file of the given page</text--->"
       "<text>extracthtml(page1)</text--->"
       ") )";

/*
6.3.15 Specification of Operator ~numoffiles~

*/
const string numoffilesSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
                         "\"Example\" ) "
                         "( <text>page -> int</text--->"
       "<text>numoffiles( page )</text--->"
       "<text>returns the number of the embedded objects</text--->"
       "<text>numoffiles(page1)</text--->"
       ") )";

/*
6.3.16 Specification of Operator ~getfiles~

*/
const string getfilesSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
       "\"Example\" ) "
       "( <text>page -> stream(tuple([Source:url,"
       " Type:string, File:binfile]))</text--->"
       "<text>getfiles( page1 )</text--->"
       "<text>returns a stream of tuples with all embedded files</text--->"
       "<text>getfiles(page1)</text--->"
       ") )";

/*
6.3.16 Specification of Operator ~wget~

*/
const string wgetSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
       "\"Example\" ) "
       "( <text>(url x bool x int x text x map:url->bool) ->"
       " stream(tuple([Source:url, Type:string, File:binfile]))</text--->"
       "<text>wget( url,extLinks,depth,hosts[,filterFkt] )</text--->"
       "<text>loads the given url and dependent files to depth d</text--->"
       "<text>wget(url1,TRUE,2, <text...</text...,\n"
       "fun(u:url) host(u) contains \"www\") consume</text--->"
       ") )";

/*
6.3.16 Specification of Operator ~pageget~

*/
const string pagegetSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
       "\"Example\" ) "
       "( <text>(url x bool x int x text x map:url->bool) ->"
       " stream(tuple([Source:url, Page:page]))</text--->"
       "<text>pageget( url,extLinks,depth,hosts[,filterFkt] )</text--->"
       "<text>loads the given html-url and dependent html pages</text--->"
       "<text>pageget(url1,TRUE,2, <text...</text...) consume</text--->"
       ") )";

/*
6.3.16 Specification of Operator ~htmlget~

*/
const string htmlgetSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
        "\"Example\" ) "
        "( <text>(url x bool x int x text x map:url->bool)"
        " -> stream(tuple([Source:url, Html:html]))</text--->"
        "<text>htmlget( url,extLinks,depth,hosts[,filterFkt] )</text--->"
        "<text>loads the given html-url and dependent html pages</text--->"
        "<text>htmlget(url1,TRUE,2, <text...</text...) consume</text--->"
        ") )";

/*
6.3.16 Specification of Operator ~webequal~

*/
const string webequalSpec  = "( ( \"Signature\" \"Syntax\" \"Meaning\" "
                         "\"Example\" ) "
                         "( <text>t element of {url,html,page} ->t</text--->"
       "<text>webequal( html1, html )</text--->"
       "<text>returns true if the params equal else false</text--->"
       "<text>webequal(html1, html2)</text--->"
       ") )";

/*
6.4 Definition of Operators

6.4.1 Definition of Operator ~protocol~

*/

Operator webprotocol (
  "protocol",     //name
  protocolSpec,         //specification
  protocolFun,    //value mapping
  Operator::SimpleSelect,  //trivial selection function
  protocolHostFilenameTypeMap    //type mapping
);

/*
6.4.2 Definition of Operator ~host~

*/

Operator webhost (
  "host",     //name
  hostSpec,         //specification
  hostFun,    //value mapping
  Operator::SimpleSelect,  //trivial selection function
  protocolHostFilenameTypeMap    //type mapping
);

/*
6.4.3 Definition of Operator ~filename~

*/

Operator webfilename (
  "webfilename",     //name
  filenameSpec,         //specification
  filenameFun,    //value mapping
  Operator::SimpleSelect,  //trivial selection function
  protocolHostFilenameTypeMap    //type mapping
);

/*
6.4.4 Definition of Operator ~source~

*/

Operator websource (
  "source",     //name
  sourceSpec,         //specification
  sourceFun,    //value mapping
  Operator::SimpleSelect,  //trivial selection function
  sourceTypeMap    //type mapping
);

/*
6.4.5 Definition of Operator ~createurl~

*/

Operator webcreateurl (
  "createurl",     //name
  createurlSpec,         //specification
  createurlFun,    //value mapping
  Operator::SimpleSelect,  //trivial selection function
  createurlTypeMap    //type mapping
);

/*
6.4.6 Definition of Operator ~content~

*/

Operator webcontent (
  "content",     //name
  contentSpec,         //specification
  contentFun,    //value mapping
  Operator::SimpleSelect,  //trivial selection function
  contentTypeMap    //type mapping
);

/*
6.4.7 Definition of Operator ~urls~

*/

Operator weburls (
  "urls",     //name
  urlsSpec,         //specification
  urlsFun,    //value mapping
  Operator::SimpleSelect,  //trivial selection function
  urlsTypeMap    //type mapping
);

/*
6.4.8 Definition of Operator ~containsurl~

*/

Operator webcontainsurl (
  "containsurl",     //name
  containsurlSpec,         //specification
  containsurlFun,    //value mapping
  Operator::SimpleSelect,  //trivial selection function
  containsurlTypeMap    //type mapping
);

/*
6.4.9 Definition of Operator ~lastmodified~

*/

Operator weblastmodified (
  "lastmodified",     //name
  lastmodifiedSpec,         //specification
  lastmodifiedFun,    //value mapping
  Operator::SimpleSelect,  //trivial selection function
  lastmodifiedTypeMap    //type mapping
);

/*
6.4.10 Definition of Operator ~metainfo~

*/

Operator webmetainfo (
  "metainfo",     //name
  metainfoSpec,         //specification
  metainfoFun,    //value mapping
  Operator::SimpleSelect,  //trivial selection function
  metainfoTypeMap    //type mapping
);

/*
6.4.11 Definition of Operator ~metainfos~

*/

Operator webmetainfos (
  "metainfos",     //name
  metainfosSpec,         //specification
  metainfosFun,    //value mapping
  Operator::SimpleSelect,  //trivial selection function
  metainfosTypeMap    //type mapping
);

/*
6.4.12 Definition of Operator ~numberof~

*/

Operator webnumberof (
  "numberof",     //name
  numberofSpec,         //specification
  numberofFun,    //value mapping
  Operator::SimpleSelect,  //trivial selection function
  numberofTypeMap    //type mapping
);

/*
6.4.13 Definition of Operator ~similar~

*/

Operator websimilar (
  "similar",     //name
  similarSpec,         //specification
  similarFun,    //value mapping
  Operator::SimpleSelect,  //trivial selection function
  similarTypeMap    //type mapping
);

/*
6.4.14 Definition of Operator ~extracthtml~

*/

Operator webextracthtml (
  "extracthtml",     //name
  extracthtmlSpec,         //specification
  extracthtmlFun,    //value mapping
  Operator::SimpleSelect,  //trivial selection function
  extracthtmlTypeMap    //type mapping
);

/*
6.4.15 Definition of Operator ~numoffiles~

*/

Operator webnumoffiles (
  "numoffiles",     //name
  numoffilesSpec,         //specification
  numoffilesFun,    //value mapping
  Operator::SimpleSelect,  //trivial selection function
  numoffilesTypeMap    //type mapping
);

/*
6.4.16 Definition of Operator ~getfiles~

*/

Operator webgetfiles (
  "getfiles",     //name
  getfilesSpec,         //specification
  getfilesFun,    //value mapping
  Operator::SimpleSelect,  //trivial selection function
  getfilesTypeMap    //type mapping
);

/*
6.4.17 Definition of Operator ~wget~

*/

Operator webwget (
  "wget",     //name
  wgetSpec,         //specification
  2,      //number of functions
  webwgetMap,  //value mapping
  webwget_pagegetSelect,  //trivial selection function
  wgetTypeMap    //type mapping
);

/*
6.4.18 Definition of Operator ~pageget~

*/

Operator webpageget (
  "pageget",     //name
  pagegetSpec,         //specification
  2,      //number of functions
  webpagegetMap,  //value mapping
  webwget_pagegetSelect,  //trivial selection function
  pagegetTypeMap    //type mapping
);

/*
6.4.18 Definition of Operator ~htmlget~

*/

Operator webhtmlget (
  "htmlget",     //name
  htmlgetSpec,         //specification
  2,      //number of functions
  webhtmlgetMap,  //value mapping
  webwget_pagegetSelect,  //trivial selection function
  htmlgetTypeMap    //type mapping
);

/*
6.4.19 Definition of Operator ~wegequal~

*/

Operator webequal (
  "webequal",     //name
  webequalSpec,  //specification
  3,      //number of functions
  webequalMap,  //value mapping
  webequalSelect,  //trivial selection function
  webequalTypeMap    //type mapping
);

/*
7. Algebra

*/
class WebAlgebra : public Algebra
{
 public:
  WebAlgebra() : Algebra()
  {
    AddTypeConstructor( &url );
    url.AssociateKind(Kind::DATA());
    AddTypeConstructor( &html );
    html.AssociateKind(Kind::DATA());
    AddTypeConstructor( &page );
    page.AssociateKind(Kind::DATA());

    AddOperator( &webprotocol );
    AddOperator( &webhost );
    AddOperator( &webfilename );
    AddOperator( &websource );
    AddOperator( &webcreateurl );
    AddOperator( &webcontent );
    AddOperator( &weburls );
    AddOperator( &webcontainsurl );
    AddOperator( &weblastmodified );
    AddOperator( &webmetainfo );
    AddOperator( &webmetainfos );
    AddOperator( &webnumberof );
    AddOperator( &websimilar );
    AddOperator( &webextracthtml );
    AddOperator( &webnumoffiles );
    AddOperator( &webgetfiles );
    AddOperator( &webwget );
    AddOperator( &webpageget );
    AddOperator( &webhtmlget );
    AddOperator( &webequal );
  }
  ~WebAlgebra() {};
};

/*
8. Initialization

Each algebra module needs an initialization function. The algebra manager
has a reference to this function if this algebra is included in the list
of required algebras, thus forcing the linker to include this module.

The algebra manager invokes this function to get a reference to the instance
of the algebra class and to provide references to the global nested list
container (used to store constructor, type, operator and object information)
and to the query processor.

The function has a C interface to make it possible to load the algebra
dynamically at runtime.

*/

extern "C"
Algebra*
InitializeWebAlgebra( NestedList* nlRef, QueryProcessor* qpRef )
{
  nl = nlRef;
  qp = qpRef;
  return (new WebAlgebra());
}