secondo/Tools/NestedLists/NLLex.l

/*
----
This file is part of SECONDO.

Copyright (C) 2004, University in Hagen, Department of Computer Science,
Database Systems for New Applications.

SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
----

//paragraph [1] Title: [{\Large \bf ] [}]

[1] Scanner of Stable Nested Lists

Copyright (C) 1996 Gral Support Team

Februar 1996 Holger Schenk

December 2003 Markus Spiekermann, A new {file} pattern and Scanner states have been introduced.

December 2004 M. Spiekermann. The pattern otherchar was redefined Ansi-C backslash codes are
removed. Morevocer, a calculation of the input position has been implemented.

The {file} pattern can be used in lists for reading in (binary) data from a file. Since the
lists are designed as textual representation of data, the content of these files will be encoded
into a format called base64 which maps 24 bits of input to 4 letters into an alphabet of
64 characters. An interface for encoding and decoding is provided by the class Base64.


*/


%{
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <fstream>
#include <sstream>
#include <string>

using namespace std;
#include "Base64.h"
#include "NestedList.h"
#include "NLParser.h"
#include "NLParser.tab.hpp"
#include "StringUtils.h"

#include "CharTransform.h"
#include "LogMsg.h"


// variables only visible for this
// compilation unit
// static char buffer[MAX_STRINGSIZE+1];
//static char *s = buffer;
static ostringstream* text = 0;
static double f;

// global variables
int scanNL_lines;
int scanNL_cols;
string scanNL_str;

#define UPDATE_COL scanNL_cols += yyleng; scanNL_str += string(yytext);
#define RESET_COL scanNL_cols = 0; scanNL_str = ""; scanNL_lines++;

// the yylex() function that's actually used
#define YY_DECL int NLScanner::yylex()

#include "NLScanner.h"        // The interface of the derived class


%}
%option yyclass="NLScanner"

otherChar       [^\(\)\"\'A-Za-z0-9_ \a\b\f\n\r\t\v]
letter          [a-zA-Z]
digit           [0-9]
ident           {letter}({letter}|{digit}|_)*
math            {otherChar}{otherChar}*
symbol          {math}|{ident}
num1	        [-+]?{digit}+\.?([eE][-+]?{digit}+)?
num2	        [-+]?{digit}*\.{digit}+([eE][-+]?{digit}+)?
number	        {num1}|{num2}

commentstart	"_!"
commentend		"!_"

string          \"([^\"]|\\\")*\"

file		"<file>"[^<]+"</file--->"


%option noyywrap

%x TEXT
%x TEXTSIMPLE
%x COMMENT

%%

<INITIAL>\n|\r            { scanNL_lines++; scanNL_cols=0; scanNL_str=""; }

<INITIAL>[ \a\b\f\t\v]+   { UPDATE_COL }

<INITIAL>-?[0-9]+       { UPDATE_COL
                        yylval = lexnl->IntAtom( atoi( yytext ) );
                        return (ZZINTEGER);
                        }

<INITIAL>{number}       { UPDATE_COL
                        std::stringstream ss(yytext);
                        ss >> f;
                        yylval = lexnl->RealAtom( f );
                        return (ZZREAL);
                        }

<INITIAL>"TRUE"         { UPDATE_COL
			yylval = lexnl->BoolAtom( true );
			return (ZZBOOLEAN);
		        }

<INITIAL>"FALSE"        { UPDATE_COL
			yylval = lexnl->BoolAtom( false );
			return (ZZBOOLEAN);
		        }


<INITIAL>{commentstart} { UPDATE_COL
                          BEGIN(COMMENT);
                        }
<COMMENT>{commentend}   { UPDATE_COL
                          BEGIN(INITIAL);
                        }
<COMMENT>\n             { RESET_COL
                          scanNL_lines++;
                        }

<COMMENT>.              { UPDATE_COL
                        }


<INITIAL>"<text>"       { UPDATE_COL
                        // change to state TEXT
                        BEGIN(TEXT);
                        text = new ostringstream();
            }

<TEXT>.|\n              { UPDATE_COL
                          if(yytext[0]=='\n'){
                             scanNL_lines++;
                             scanNL_cols = 0;
                          }
                          text->put( yytext[0] );
                        }

<TEXT>"\\</text--->"     { UPDATE_COL
                         (*text) << "</text--->";
                        }

<TEXT>"\\\\"     { UPDATE_COL
                         (*text) << "\\";
                        }


<TEXT>"</text--->"      { UPDATE_COL
                        BEGIN(INITIAL);
                        ListExpr newText = lexnl->TextAtom();
                        lexnl->AppendText( newText, text->str() );
                        delete text;
                        /* pass text node and token to the parser */
                        yylval = newText;
                       return (ZZTEXT);
                        }

<INITIAL>"'"       { UPDATE_COL
                        // change to state TEXT
                        BEGIN(TEXTSIMPLE);
                        text = new ostringstream();
            }

<TEXTSIMPLE>[^']|\n              { UPDATE_COL
                          if(yytext[0]=='\n'){
                             scanNL_lines++;
                             scanNL_cols = 0;
                          }
                          text->put( yytext[0] );
                        }

<TEXTSIMPLE>"\\'"     { UPDATE_COL
                         (*text) << "'";
                        }

<TEXTSIMPLE>"\\\\"     { UPDATE_COL
                         (*text) << "\\";
                        }


<TEXTSIMPLE>"'"      { UPDATE_COL
                        BEGIN(INITIAL);
                        ListExpr newText = lexnl->TextAtom();
                        lexnl->AppendText( newText, text->str() );
                        delete text;
                        /* pass text node and token to the parser */
                        yylval = newText;
                       return (ZZTEXT);
                        }

<INITIAL>{string}       { UPDATE_COL
      string s(yytext);
      s = stringutils::replaceAll(s,"\\\"","\"");
      s = stringutils::replaceAll(s,"\\\\","\\");
      yyleng = s.length();
      if ( s.length() - 2 > MAX_STRINGSIZE )
			{
			  yyleng = MAX_STRINGSIZE + 2;
			  cmsg.warning() << "Warning: string " << yytext << " was truncated!" << endl;
                          cmsg.send();
			}
      s = s.substr(1,yyleng-2);
			yylval = lexnl->StringAtom( s );
			return (ZZSTRING);
		        }


<INITIAL>{symbol}  { UPDATE_COL
		     if ( yyleng > MAX_STRINGSIZE )
	             {
	               cmsg.error() << "Symbol " << yytext << " too long!" << endl;
                       cmsg.send();
                       return(ZZERROR);
		     }
                     yylval = lexnl->SymbolAtom( yytext );
                     return (ZZSYMBOL);
                   }

<INITIAL>{file}  { /* files are expanded to base64 encoded text atoms */
                 UPDATE_COL
                 ListExpr newText = lexnl->TextAtom();

                 string fileNameStr(yytext);
                 static const int textStartLen = string("<file>").size();
                 static const int textEndLen = string("</file--->").size();
                 int contentLen = yyleng - textStartLen - textEndLen;

                 fileNameStr = trim( fileNameStr.substr(textStartLen,contentLen) );
                 fileNameStr = expandVar(fileNameStr);

                 ifstream inFile;
		 stringstream textStream;

		 inFile.open(fileNameStr.c_str(), ios::binary);
		 if ( !inFile.is_open() ) {

		    cmsg.error() << "Can not open file '" << fileNameStr << "'" << endl;
                    cmsg.send();
                    return (ZZERROR);

                 } else {

		   Base64* encoder = new Base64();
		   encoder->encodeStream(inFile, textStream);
                   delete encoder;
                 }

		 lexnl->AppendText(newText,textStream.str());

		 /* pass text node and token to the parser */
		 yylval = newText;
		 return (ZZTEXT);
		 }


<INITIAL>"("   { UPDATE_COL
                 return (ZZOPEN);
               }

<INITIAL>")"   { UPDATE_COL
                 return (ZZCLOSE);
               }

.              { UPDATE_COL
                 return (ZZERROR);
               }
%%