/* ---- This file is part of SECONDO. Copyright (C) 2004, University in Hagen, Department of Computer Science, Database Systems for New Applications. SECONDO is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. SECONDO is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with SECONDO; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ---- The content of this file is based on the OSM algebra of SECONDO created by Thomas Uchdorf. The implemented file reader depends on libxml2. If the dependency is not fulfilled it writes an error message to the cerr output. */ // [...] #undef __TRACE__ //#define __TRACE__ cout << __FILE__ << "::" << __LINE__; #define __TRACE__ // --- Including header-files #include "XmlFileReader.h" #include "XmlParserInterface.h" #include #include #include #include #include #include "DblpImportLocalInfo.h" namespace nr2a{ XmlFileReader::XmlFileReader() : m_fileName(), m_parser(NULL), m_elements(), m_reader(NULL) { // empty } XmlFileReader::XmlFileReader(const std::string &fileName, XmlParserInterface *parser, DblpImportLocalInfo *info) : m_fileName(fileName), m_parser(parser), m_elements(), m_reader(NULL), m_info(info) { open(); } XmlFileReader::~XmlFileReader() { close(); } /* Getter and setter for the filename of the XML file to process. */ void XmlFileReader::setFileName(const std::string &fileName) { m_fileName = fileName; } const std::string & XmlFileReader::getFileName() const { return m_fileName; } /* To attach a parser to this reader use the following function. */ void XmlFileReader::setXmlParser(XmlParserInterface *parser) { // Do not call this function twice (otherwise please manage the memory // yourself)! assert(m_parser == NULL); m_parser = parser; } #ifdef WITH_LIBXML2_SUPPORT /* This fucntions open and close an XML file. */ void XmlFileReader::open() { assert(!m_reader); xmlInitParser(); m_reader = new xmlTextReaderPtr(); const char *fileName = getFileName().c_str(); (*m_reader) = xmlReaderForFile(fileName, NULL, 0); assert(m_reader); } void XmlFileReader::close() { assert(m_reader); xmlFreeTextReader(*m_reader); delete m_reader; m_reader = NULL; xmlCleanupParser(); assert(!m_reader); } /* This function starts reading an XML file. It configures a reader through the libxml2 API. */ int XmlFileReader::readXmlFile() { int result = 0; const char *fileName = getFileName().c_str(); const int readSuccess = 1; xmlTextReaderPtr reader = NULL; int ret = 1; m_errorCounter = 0; m_errorString = ""; xmlGenericErrorFunc handler = (xmlGenericErrorFunc)errorHandler; xmlSetGenericErrorFunc(this, handler); xmlInitParser(); reader = xmlReaderForFile(fileName, NULL, XML_PARSE_RECOVER | //Relax about errors (e.g. missing mandatory fields) XML_PARSE_HUGE | // Very big input is no reason to complain XML_PARSE_NONET | // Do not attempt to download schema etc. XML_PARSE_DTDLOAD | // Load the DTD //XML_PARSE_DTDVALID | // Validate XML against the DTD XML_PARSE_NOENT // Map entities as specified in DTD //XML_PARSE_NOERROR | // Do not output errors to cout //XML_PARSE_NOWARNING // Do not output warnings to cout ); if (reader != NULL) { m_info->SetReader(reader); //int res = xmlTextReaderGetParserProp(reader, XML_PARSER_LOADDTD); while ((ret == readSuccess) && (m_errorCounter < c_maxErrorLines)) { ret = xmlTextReaderRead(reader); try { processXmlNode(reader); } catch (Nr2aException& e) { int lineNumber = xmlTextReaderGetParserLineNumber(reader); throw Nr2aParserException(e.what(), lineNumber); } } if (m_errorCounter > 0) { int lineNumber = xmlTextReaderGetParserLineNumber(reader); throw Nr2aParserException(getErrorMessages(), lineNumber); } xmlFreeTextReader(reader); xmlCleanupParser(); } else { result = c_fileOpenError; xmlCleanupParser(); } if (m_errorCounter > 0) { result = c_processingError; } return result; } /* Iterates the XML nodes. */ void XmlFileReader::getNext() { assert(m_reader != NULL); const char *fileName = getFileName().c_str(); int ret; bool found = false; if ((*m_reader) != NULL) { ret = xmlTextReaderRead(*m_reader); while (!found && ret == 1) { processXmlNode(*m_reader); ret = xmlTextReaderRead(*m_reader); found = foundInterestingElement(); } if (!found && ret != 0) { std::cerr << "Could not parse \"" << fileName << "\"" << std::endl; } } else { std::cerr << "Could not open \"" << fileName << "\"" << std::endl; } } /* This function processes an XML node by calling methods handling several abstract events. */ void XmlFileReader::processXmlNode(xmlTextReaderPtr reader) { std::string elementName = ""; std::string elementValue = ""; std::string attributeName = ""; std::string attributeValue = ""; int elementLevel = xmlTextReaderDepth(reader); const int line = xmlTextReaderGetParserLineNumber(reader); int nodeType = xmlTextReaderNodeType(reader); std::vector attributeNames; std::vector attributeValues; if (nodeType == (int) XML_READER_TYPE_ELEMENT) { elementName = (char *) xmlTextReaderConstName(reader); // Fetching the attributes while (xmlTextReaderMoveToNextAttribute(reader)) { attributeName = (char *) xmlTextReaderConstName(reader); attributeValue = (char *) xmlTextReaderConstValue(reader); attributeNames.push_back(attributeName); attributeValues.push_back(attributeValue); } xmlTextReaderMoveToElement(reader); // Checking whether the tag looks like this: // if (xmlTextReaderIsEmptyElement(reader)) { // found empty element Element element(elementName, attributeNames, attributeValues, elementValue, elementLevel, line); pushEmptyElementToStack(element); attributeNames.clear(); attributeValues.clear(); } else { // found start of element that may be empty but consists of both a // start and an end tag Element element(elementName, attributeNames, attributeValues, elementValue, elementLevel, line); pushElementToStack(element); attributeNames.clear(); attributeValues.clear(); } } else if (nodeType == (int) XML_READER_TYPE_TEXT) { // found text elementValue = (char *) xmlTextReaderConstValue(reader); processText(elementValue); } else if (nodeType == (int) XML_READER_TYPE_END_ELEMENT) { // found end of element elementName = (char *) xmlTextReaderConstName(reader); Element element(elementName, attributeNames, attributeValues, elementValue, elementLevel, line); popElementFromStack(element); attributeNames.clear(); attributeValues.clear(); } else if (nodeType == (int) XML_READER_TYPE_ENTITY_REFERENCE) { elementName = (char *) xmlTextReaderConstName(reader); processEntityReference(elementName); } } #else void XmlFileReader::open () { // empty } void XmlFileReader::close () { // empty } void XmlFileReader::readXmlFile () { std::cerr << "libxml2 is not supported!" << std::endl; } void XmlFileReader::getNext () { std::cerr << "libxml2 is not supported!" << std::endl; } #endif /* WITH_LIBXML2_SUPPORT*/ /* Methods for handling abstract events. They mostly just send them to the parser. */ void XmlFileReader::pushEmptyElementToStack(const Element &element) { if (isElementInteresting(element)) { pushElementToStack(element); popElementFromStack(element); } } void XmlFileReader::pushElementToStack(const Element &element) { if (isElementInteresting(element)) { m_elements.push(element); assert(m_parser != NULL); m_parser->pushedElementToStack(element); } } void XmlFileReader::popElementFromStack(const Element &element) { if (isElementInteresting(element)) { Element top = m_elements.top(); m_elements.pop(); assert(top.getName() == element.getName()); assert(m_parser != NULL); m_parser->poppedElementFromStack(top); } } void XmlFileReader::processText(const std::string &text) { m_parser->processedText(text); } void XmlFileReader::processEntityReference(const std::string &name) { m_parser->processedEntityReference(name); } bool XmlFileReader::isElementInteresting(const Element &element) const { assert(m_parser != NULL); return m_parser->isElementInteresting(element); } bool XmlFileReader::foundInterestingElement() const { assert(m_parser != NULL); return m_parser->foundInterestingElement(); } /* Function used for receiving errors from libxml2. */ /*static*/ void XmlFileReader::errorHandler(void *ctx, const char *msg, ...) { char buf[2048]; va_list arg_ptr; va_start(arg_ptr, msg); vsnprintf(buf, 2048, msg, arg_ptr); va_end(arg_ptr); XmlFileReader *self = (XmlFileReader*)ctx; self->m_errorCounter++; self->m_errorString.append(buf); return; } /* Returns the error messages received so far. */ std::string XmlFileReader::getErrorMessages() { std::string result = "\n" + m_errorString; if (m_errorCounter >= c_maxErrorLines) { result += "\nThere might be more errors existing in the document, " "than the ones mentioned here. \n"; } return result; } } // end of namespace nr2a