Files
secondo/Algebras/NestedRelation2/XmlFileReader.cpp
2026-01-23 17:03:45 +08:00

411 lines
9.8 KiB
C++

/*
----
This file is part of SECONDO.
Copyright (C) 2004, University in Hagen, Department of Computer Science,
Database Systems for New Applications.
SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
----
The content of this file is based on the OSM algebra of SECONDO
created by Thomas Uchdorf.
The implemented file reader depends on libxml2. If the dependency is not
fulfilled it writes an error message to the cerr output.
*/
// [...]
#undef __TRACE__
//#define __TRACE__ cout << __FILE__ << "::" << __LINE__;
#define __TRACE__
// --- Including header-files
#include "XmlFileReader.h"
#include "XmlParserInterface.h"
#include <iostream>
#include <cassert>
#include <libxml/xmlreader.h>
#include <libxml/parser.h>
#include <Nr2aException.h>
#include "DblpImportLocalInfo.h"
namespace nr2a{
XmlFileReader::XmlFileReader()
: m_fileName(), m_parser(NULL), m_elements(), m_reader(NULL)
{
// empty
}
XmlFileReader::XmlFileReader(const std::string &fileName,
XmlParserInterface *parser, DblpImportLocalInfo *info)
: m_fileName(fileName), m_parser(parser), m_elements(), m_reader(NULL),
m_info(info)
{
open();
}
XmlFileReader::~XmlFileReader()
{
close();
}
/*
Getter and setter for the filename of the XML file to process.
*/
void XmlFileReader::setFileName(const std::string &fileName)
{
m_fileName = fileName;
}
const std::string & XmlFileReader::getFileName() const
{
return m_fileName;
}
/*
To attach a parser to this reader use the following function.
*/
void XmlFileReader::setXmlParser(XmlParserInterface *parser)
{
// Do not call this function twice (otherwise please manage the memory
// yourself)!
assert(m_parser == NULL);
m_parser = parser;
}
#ifdef WITH_LIBXML2_SUPPORT
/*
This fucntions open and close an XML file.
*/
void XmlFileReader::open()
{
assert(!m_reader);
xmlInitParser();
m_reader = new xmlTextReaderPtr();
const char *fileName = getFileName().c_str();
(*m_reader) = xmlReaderForFile(fileName, NULL, 0);
assert(m_reader);
}
void XmlFileReader::close()
{
assert(m_reader);
xmlFreeTextReader(*m_reader);
delete m_reader;
m_reader = NULL;
xmlCleanupParser();
assert(!m_reader);
}
/*
This function starts reading an XML file. It configures a reader through the
libxml2 API.
*/
int XmlFileReader::readXmlFile()
{
int result = 0;
const char *fileName = getFileName().c_str();
const int readSuccess = 1;
xmlTextReaderPtr reader = NULL;
int ret = 1;
m_errorCounter = 0;
m_errorString = "";
xmlGenericErrorFunc handler = (xmlGenericErrorFunc)errorHandler;
xmlSetGenericErrorFunc(this, handler);
xmlInitParser();
reader = xmlReaderForFile(fileName, NULL,
XML_PARSE_RECOVER | //Relax about errors (e.g. missing mandatory fields)
XML_PARSE_HUGE | // Very big input is no reason to complain
XML_PARSE_NONET | // Do not attempt to download schema etc.
XML_PARSE_DTDLOAD | // Load the DTD
//XML_PARSE_DTDVALID | // Validate XML against the DTD
XML_PARSE_NOENT // Map entities as specified in DTD
//XML_PARSE_NOERROR | // Do not output errors to cout
//XML_PARSE_NOWARNING // Do not output warnings to cout
);
if (reader != NULL)
{
m_info->SetReader(reader);
//int res = xmlTextReaderGetParserProp(reader, XML_PARSER_LOADDTD);
while ((ret == readSuccess) && (m_errorCounter < c_maxErrorLines))
{
ret = xmlTextReaderRead(reader);
try
{
processXmlNode(reader);
}
catch (Nr2aException& e)
{
int lineNumber = xmlTextReaderGetParserLineNumber(reader);
throw Nr2aParserException(e.what(), lineNumber);
}
}
if (m_errorCounter > 0)
{
int lineNumber = xmlTextReaderGetParserLineNumber(reader);
throw Nr2aParserException(getErrorMessages(), lineNumber);
}
xmlFreeTextReader(reader);
xmlCleanupParser();
}
else
{
result = c_fileOpenError;
xmlCleanupParser();
}
if (m_errorCounter > 0)
{
result = c_processingError;
}
return result;
}
/*
Iterates the XML nodes.
*/
void XmlFileReader::getNext()
{
assert(m_reader != NULL);
const char *fileName = getFileName().c_str();
int ret;
bool found = false;
if ((*m_reader) != NULL)
{
ret = xmlTextReaderRead(*m_reader);
while (!found && ret == 1)
{
processXmlNode(*m_reader);
ret = xmlTextReaderRead(*m_reader);
found = foundInterestingElement();
}
if (!found && ret != 0)
{
std::cerr << "Could not parse \"" << fileName << "\"" << std::endl;
}
}
else
{
std::cerr << "Could not open \"" << fileName << "\"" << std::endl;
}
}
/*
This function processes an XML node by calling methods handling several
abstract events.
*/
void XmlFileReader::processXmlNode(xmlTextReaderPtr reader)
{
std::string elementName = "";
std::string elementValue = "";
std::string attributeName = "";
std::string attributeValue = "";
int elementLevel = xmlTextReaderDepth(reader);
const int line = xmlTextReaderGetParserLineNumber(reader);
int nodeType = xmlTextReaderNodeType(reader);
std::vector<std::string> attributeNames;
std::vector<std::string> attributeValues;
if (nodeType == (int) XML_READER_TYPE_ELEMENT)
{
elementName = (char *) xmlTextReaderConstName(reader);
// Fetching the attributes
while (xmlTextReaderMoveToNextAttribute(reader))
{
attributeName = (char *) xmlTextReaderConstName(reader);
attributeValue = (char *) xmlTextReaderConstValue(reader);
attributeNames.push_back(attributeName);
attributeValues.push_back(attributeValue);
}
xmlTextReaderMoveToElement(reader);
// Checking whether the tag looks like this:
// <sometag attrib_1 = "" ... attrib_n="" />
if (xmlTextReaderIsEmptyElement(reader))
{
// found empty element
Element element(elementName, attributeNames, attributeValues,
elementValue, elementLevel, line);
pushEmptyElementToStack(element);
attributeNames.clear();
attributeValues.clear();
}
else
{
// found start of element that may be empty but consists of both a
// start and an end tag
Element element(elementName, attributeNames, attributeValues,
elementValue, elementLevel, line);
pushElementToStack(element);
attributeNames.clear();
attributeValues.clear();
}
}
else if (nodeType == (int) XML_READER_TYPE_TEXT)
{
// found text
elementValue = (char *) xmlTextReaderConstValue(reader);
processText(elementValue);
}
else if (nodeType == (int) XML_READER_TYPE_END_ELEMENT)
{
// found end of element
elementName = (char *) xmlTextReaderConstName(reader);
Element element(elementName, attributeNames, attributeValues, elementValue,
elementLevel, line);
popElementFromStack(element);
attributeNames.clear();
attributeValues.clear();
}
else if (nodeType == (int) XML_READER_TYPE_ENTITY_REFERENCE)
{
elementName = (char *) xmlTextReaderConstName(reader);
processEntityReference(elementName);
}
}
#else
void XmlFileReader::open ()
{
// empty
}
void XmlFileReader::close ()
{
// empty
}
void XmlFileReader::readXmlFile ()
{
std::cerr << "libxml2 is not supported!" << std::endl;
}
void XmlFileReader::getNext ()
{
std::cerr << "libxml2 is not supported!" << std::endl;
}
#endif /* WITH_LIBXML2_SUPPORT*/
/*
Methods for handling abstract events. They mostly just send them to the parser.
*/
void XmlFileReader::pushEmptyElementToStack(const Element &element)
{
if (isElementInteresting(element))
{
pushElementToStack(element);
popElementFromStack(element);
}
}
void XmlFileReader::pushElementToStack(const Element &element)
{
if (isElementInteresting(element))
{
m_elements.push(element);
assert(m_parser != NULL);
m_parser->pushedElementToStack(element);
}
}
void XmlFileReader::popElementFromStack(const Element &element)
{
if (isElementInteresting(element))
{
Element top = m_elements.top();
m_elements.pop();
assert(top.getName() == element.getName());
assert(m_parser != NULL);
m_parser->poppedElementFromStack(top);
}
}
void XmlFileReader::processText(const std::string &text)
{
m_parser->processedText(text);
}
void XmlFileReader::processEntityReference(const std::string &name)
{
m_parser->processedEntityReference(name);
}
bool XmlFileReader::isElementInteresting(const Element &element) const
{
assert(m_parser != NULL);
return m_parser->isElementInteresting(element);
}
bool XmlFileReader::foundInterestingElement() const
{
assert(m_parser != NULL);
return m_parser->foundInterestingElement();
}
/*
Function used for receiving errors from libxml2.
*/
/*static*/
void XmlFileReader::errorHandler(void *ctx, const char *msg, ...)
{
char buf[2048];
va_list arg_ptr;
va_start(arg_ptr, msg);
vsnprintf(buf, 2048, msg, arg_ptr);
va_end(arg_ptr);
XmlFileReader *self = (XmlFileReader*)ctx;
self->m_errorCounter++;
self->m_errorString.append(buf);
return;
}
/*
Returns the error messages received so far.
*/
std::string XmlFileReader::getErrorMessages()
{
std::string result = "\n" + m_errorString;
if (m_errorCounter >= c_maxErrorLines)
{
result += "\nThere might be more errors existing in the document, "
"than the ones mentioned here. \n";
}
return result;
}
} // end of namespace nr2a