📄 dtdparser.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: dtdparser.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 19:42:52 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.19 * PRODUCTION * =========================================================================== *//* $Id: dtdparser.cpp,v 1000.2 2004/06/01 19:42:52 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Andrei Gourianov** File Description:* DTD parser** ===========================================================================*/#include <ncbi_pch.hpp>#include <serial/datatool/exceptions.hpp>#include <serial/datatool/dtdparser.hpp>#include <serial/datatool/tokens.hpp>#include <serial/datatool/module.hpp>#include <serial/datatool/moduleset.hpp>#include <serial/datatool/type.hpp>#include <serial/datatool/statictype.hpp>#include <serial/datatool/enumtype.hpp>#include <serial/datatool/reftype.hpp>#include <serial/datatool/unitype.hpp>#include <serial/datatool/blocktype.hpp>#include <serial/datatool/choicetype.hpp>#include <serial/datatool/value.hpp>#include <algorithm>#include <corelib/ncbifile.hpp>BEGIN_NCBI_SCOPE/////////////////////////////////////////////////////////////////////////////// DTDParserDTDParser::DTDParser(DTDLexer& lexer) : AbstractParser(lexer){ m_StackLexer.push(&lexer);}DTDParser::~DTDParser(void){}AutoPtr<CFileModules> DTDParser::Modules(const string& fileName){ AutoPtr<CFileModules> modules(new CFileModules(fileName)); while( Next() != T_EOF ) { CDirEntry entry(fileName); m_StackPath.push(entry.GetDir()); modules->AddModule(Module(entry.GetBase())); m_StackPath.pop(); }// CopyComments(modules->LastComments()); return modules;}AutoPtr<CDataTypeModule> DTDParser::Module(const string& name){ AutoPtr<CDataTypeModule> module(new CDataTypeModule(name)); BuildDocumentTree();#if defined(NCBI_DTDPARSER_TRACE) PrintDocumentTree();#endif GenerateDataTree(*module);/* string moduleName = ModuleReference(); AutoPtr<CDataTypeModule> module(new CDataTypeModule(moduleName)); Consume(K_DEFINITIONS, "DEFINITIONS"); Consume(T_DEFINE, "::="); Consume(K_BEGIN, "BEGIN"); Next(); CopyComments(module->Comments()); ModuleBody(*module); Consume(K_END, "END"); CopyComments(module->LastComments());*/ return module;}void DTDParser::BuildDocumentTree(void){ for (;;) { try { switch ( Next() ) { case K_ELEMENT: Consume(); BeginElementContent(); break; case K_ATTLIST: Consume(); BeginAttributesContent(); break; case K_ENTITY: Consume(); BeginEntityContent(); break; case T_ENTITY: // must be external entity PushEntityLexer(NextToken().GetText()); break; case T_EOF: if (PopEntityLexer()) { // was external entity Consume(); break; } else { // end of doc return; } default: ParseError("Invalid keyword", "keyword"); return; } } catch (CException& e) { NCBI_RETHROW_SAME(e,"DTDParser::BuildDocumentTree: failed"); } catch (exception& e) { ERR_POST(e.what()); throw; } }}/////////////////////////////////////////////////////////////////////////////// DTDParser - elementsvoid DTDParser::BeginElementContent(void){ // element name string name = NextToken().GetText(); Consume(); ParseElementContent(name, false);}void DTDParser::ParseElementContent(const string& name, bool embedded){ DTDElement& node = m_MapElement[ name]; node.SetName(name); switch (Next()) { default: case T_IDENTIFIER: ParseError("incorrect format","element category");// _ASSERT(0); break; case K_ANY: // category node.SetType(DTDElement::eAny); Consume(); break; case K_EMPTY: // category node.SetType(DTDElement::eEmpty); Consume(); break; case T_SYMBOL: // contents. the symbol must be '(' ConsumeElementContent(node); if (embedded) { node.SetEmbedded(); return; } break; case T_ENTITY: PushEntityLexer(NextToken().GetText()); ConsumeElementContent(node); PopEntityLexer(); Consume(); break; } // element description is ended ConsumeSymbol('>');}void DTDParser::ConsumeElementContent(DTDElement& node){// Element content:// http://www.w3.org/TR/2000/REC-xml-20001006#sec-element-content string id_name; char symbol; int emb=0; bool skip; if(NextToken().GetSymbol() != '(') { ParseError("Incorrect format","("); }// _ASSERT(NextToken().GetSymbol() == '('); for (skip = false; ;) { if (skip) { skip=false; } else { Consume(); } switch (Next()) { default: ParseError("Unrecognized token","token");// _ASSERT(0); break; case T_IDENTIFIER: id_name = NextToken().GetText(); if(id_name.empty()) { ParseError("Incorrect format","identifier"); }// _ASSERT(!id_name.empty()); break; case K_PCDATA: node.SetType(DTDElement::eString); break; case T_SYMBOL: switch (symbol = NextToken().GetSymbol()) { case '(': // embedded content id_name = node.GetName(); id_name += "__emb#__"; id_name += NStr::IntToString(emb++); ParseElementContent(id_name, true); skip = true; break; case ')': AddElementContent(node, id_name); EndElementContent( node); return; case ',': case '|': AddElementContent(node, id_name, symbol); break; case '+': case '*': case '?': if(id_name.empty()) { ParseError("Incorrect format","identifier"); }// _ASSERT(!id_name.empty()); node.SetOccurrence(id_name, symbol == '+' ? DTDElement::eOneOrMore : (symbol == '*' ? DTDElement::eZeroOrMore : DTDElement::eZeroOrOne)); break; default: ParseError("Unrecognized symbol","symbol");// _ASSERT(0); break; } break; case T_ENTITY: id_name = NextToken().GetText(); PushEntityLexer(id_name); skip = true; break; case T_EOF: PopEntityLexer(); break; } }}void DTDParser::AddElementContent(DTDElement& node, string& id_name, char separator){ // id_name could be empty if the prev token was K_PCDATA if (!id_name.empty()) { node.AddContent(id_name); if (separator != 0) { node.SetType(separator == ',' ? DTDElement::eSequence : DTDElement::eChoice); } else { node.SetTypeIfUnknown(DTDElement::eSequence); } m_MapElement[ id_name].SetReferenced(); id_name.erase(); }}void DTDParser::EndElementContent(DTDElement& node){ if (NextToken().GetSymbol() != ')') { ParseError("Incorrect format", ")"); }// _ASSERT(NextToken().GetSymbol() == ')'); Consume();// occurrence char symbol; switch (Next()) { default: break; case T_SYMBOL: switch (symbol = NextToken().GetSymbol()) { default: break; case '+': case '*': case '?': node.SetOccurrence( symbol == '+' ? DTDElement::eOneOrMore : (symbol == '*' ? DTDElement::eZeroOrMore : DTDElement::eZeroOrOne)); Consume(); break; } break; } FixEmbeddedNames(node);}void DTDParser::FixEmbeddedNames(DTDElement& node){ const list<string>& refs = node.GetContent(); list<string> fixed; for (list<string>::const_iterator i= refs.begin(); i != refs.end(); ++i) { DTDElement& refNode = m_MapElement[*i]; if (refNode.IsEmbedded()) { for ( int depth=1; depth<100; ++depth) { string testName = refNode.CreateEmbeddedName(depth); if (find(refs.begin(),refs.end(),testName) == refs.end()) { if (find(fixed.begin(),fixed.end(),testName) == fixed.end()) { fixed.push_back(testName); refNode.SetName(testName); break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -