Files
secondo/Tools/NestedLists/NLLex.l
2026-01-23 17:03:45 +08:00

300 lines
8.4 KiB
Plaintext

/*
----
This file is part of SECONDO.
Copyright (C) 2004, University in Hagen, Department of Computer Science,
Database Systems for New Applications.
SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
----
//paragraph [1] Title: [{\Large \bf ] [}]
[1] Scanner of Stable Nested Lists
Copyright (C) 1996 Gral Support Team
Februar 1996 Holger Schenk
December 2003 Markus Spiekermann, A new {file} pattern and Scanner states have been introduced.
December 2004 M. Spiekermann. The pattern otherchar was redefined Ansi-C backslash codes are
removed. Morevocer, a calculation of the input position has been implemented.
The {file} pattern can be used in lists for reading in (binary) data from a file. Since the
lists are designed as textual representation of data, the content of these files will be encoded
into a format called base64 which maps 24 bits of input to 4 letters into an alphabet of
64 characters. An interface for encoding and decoding is provided by the class Base64.
*/
%{
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <fstream>
#include <sstream>
#include <string>
using namespace std;
#include "Base64.h"
#include "NestedList.h"
#include "NLParser.h"
#include "NLParser.tab.hpp"
#include "StringUtils.h"
#include "CharTransform.h"
#include "LogMsg.h"
// variables only visible for this
// compilation unit
// static char buffer[MAX_STRINGSIZE+1];
//static char *s = buffer;
static ostringstream* text = 0;
static double f;
// global variables
int scanNL_lines;
int scanNL_cols;
string scanNL_str;
#define UPDATE_COL scanNL_cols += yyleng; scanNL_str += string(yytext);
#define RESET_COL scanNL_cols = 0; scanNL_str = ""; scanNL_lines++;
// the yylex() function that's actually used
#define YY_DECL int NLScanner::yylex()
#include "NLScanner.h" // The interface of the derived class
%}
%option yyclass="NLScanner"
otherChar [^\(\)\"\'A-Za-z0-9_ \a\b\f\n\r\t\v]
letter [a-zA-Z]
digit [0-9]
ident {letter}({letter}|{digit}|_)*
math {otherChar}{otherChar}*
symbol {math}|{ident}
num1 [-+]?{digit}+\.?([eE][-+]?{digit}+)?
num2 [-+]?{digit}*\.{digit}+([eE][-+]?{digit}+)?
number {num1}|{num2}
commentstart "_!"
commentend "!_"
string \"([^\"]|\\\")*\"
file "<file>"[^<]+"</file--->"
%option noyywrap
%x TEXT
%x TEXTSIMPLE
%x COMMENT
%%
<INITIAL>\n|\r { scanNL_lines++; scanNL_cols=0; scanNL_str=""; }
<INITIAL>[ \a\b\f\t\v]+ { UPDATE_COL }
<INITIAL>-?[0-9]+ { UPDATE_COL
yylval = lexnl->IntAtom( atoi( yytext ) );
return (ZZINTEGER);
}
<INITIAL>{number} { UPDATE_COL
std::stringstream ss(yytext);
ss >> f;
yylval = lexnl->RealAtom( f );
return (ZZREAL);
}
<INITIAL>"TRUE" { UPDATE_COL
yylval = lexnl->BoolAtom( true );
return (ZZBOOLEAN);
}
<INITIAL>"FALSE" { UPDATE_COL
yylval = lexnl->BoolAtom( false );
return (ZZBOOLEAN);
}
<INITIAL>{commentstart} { UPDATE_COL
BEGIN(COMMENT);
}
<COMMENT>{commentend} { UPDATE_COL
BEGIN(INITIAL);
}
<COMMENT>\n { RESET_COL
scanNL_lines++;
}
<COMMENT>. { UPDATE_COL
}
<INITIAL>"<text>" { UPDATE_COL
// change to state TEXT
BEGIN(TEXT);
text = new ostringstream();
}
<TEXT>.|\n { UPDATE_COL
if(yytext[0]=='\n'){
scanNL_lines++;
scanNL_cols = 0;
}
text->put( yytext[0] );
}
<TEXT>"\\</text--->" { UPDATE_COL
(*text) << "</text--->";
}
<TEXT>"\\\\" { UPDATE_COL
(*text) << "\\";
}
<TEXT>"</text--->" { UPDATE_COL
BEGIN(INITIAL);
ListExpr newText = lexnl->TextAtom();
lexnl->AppendText( newText, text->str() );
delete text;
/* pass text node and token to the parser */
yylval = newText;
return (ZZTEXT);
}
<INITIAL>"'" { UPDATE_COL
// change to state TEXT
BEGIN(TEXTSIMPLE);
text = new ostringstream();
}
<TEXTSIMPLE>[^']|\n { UPDATE_COL
if(yytext[0]=='\n'){
scanNL_lines++;
scanNL_cols = 0;
}
text->put( yytext[0] );
}
<TEXTSIMPLE>"\\'" { UPDATE_COL
(*text) << "'";
}
<TEXTSIMPLE>"\\\\" { UPDATE_COL
(*text) << "\\";
}
<TEXTSIMPLE>"'" { UPDATE_COL
BEGIN(INITIAL);
ListExpr newText = lexnl->TextAtom();
lexnl->AppendText( newText, text->str() );
delete text;
/* pass text node and token to the parser */
yylval = newText;
return (ZZTEXT);
}
<INITIAL>{string} { UPDATE_COL
string s(yytext);
s = stringutils::replaceAll(s,"\\\"","\"");
s = stringutils::replaceAll(s,"\\\\","\\");
yyleng = s.length();
if ( s.length() - 2 > MAX_STRINGSIZE )
{
yyleng = MAX_STRINGSIZE + 2;
cmsg.warning() << "Warning: string " << yytext << " was truncated!" << endl;
cmsg.send();
}
s = s.substr(1,yyleng-2);
yylval = lexnl->StringAtom( s );
return (ZZSTRING);
}
<INITIAL>{symbol} { UPDATE_COL
if ( yyleng > MAX_STRINGSIZE )
{
cmsg.error() << "Symbol " << yytext << " too long!" << endl;
cmsg.send();
return(ZZERROR);
}
yylval = lexnl->SymbolAtom( yytext );
return (ZZSYMBOL);
}
<INITIAL>{file} { /* files are expanded to base64 encoded text atoms */
UPDATE_COL
ListExpr newText = lexnl->TextAtom();
string fileNameStr(yytext);
static const int textStartLen = string("<file>").size();
static const int textEndLen = string("</file--->").size();
int contentLen = yyleng - textStartLen - textEndLen;
fileNameStr = trim( fileNameStr.substr(textStartLen,contentLen) );
fileNameStr = expandVar(fileNameStr);
ifstream inFile;
stringstream textStream;
inFile.open(fileNameStr.c_str(), ios::binary);
if ( !inFile.is_open() ) {
cmsg.error() << "Can not open file '" << fileNameStr << "'" << endl;
cmsg.send();
return (ZZERROR);
} else {
Base64* encoder = new Base64();
encoder->encodeStream(inFile, textStream);
delete encoder;
}
lexnl->AppendText(newText,textStream.str());
/* pass text node and token to the parser */
yylval = newText;
return (ZZTEXT);
}
<INITIAL>"(" { UPDATE_COL
return (ZZOPEN);
}
<INITIAL>")" { UPDATE_COL
return (ZZCLOSE);
}
. { UPDATE_COL
return (ZZERROR);
}
%%