📄 dtdlexer.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: dtdlexer.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 19:42:49 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.7 * PRODUCTION * =========================================================================== *//* $Id: dtdlexer.cpp,v 1000.2 2004/06/01 19:42:49 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Andrei Gourianov** File Description:* DTD lexer** ===========================================================================*/#include <ncbi_pch.hpp>#include <serial/datatool/dtdlexer.hpp>#include <serial/datatool/tokens.hpp>BEGIN_NCBI_SCOPEDTDLexer::DTDLexer(CNcbiIstream& in) : AbstractLexer(in){ m_CharsToSkip = 0;}DTDLexer::~DTDLexer(void){}TToken DTDLexer::LookupToken(void){ TToken tok; char c = Char(); switch (c) { case '<': if (Char(1)=='!') { SkipChars(2); if (isalpha(Char())) { return LookupIdentifier(); } else { LexerError("name must start with a letter (alpha)");// _ASSERT(0); } } else { // not allowed in DTD LexerError("Incorrect format");// _ASSERT(0); } break; case '#': tok = LookupIdentifier(); if (tok == T_IDENTIFIER) { LexerError("Unknown keyword"); }// _ASSERT(tok != T_IDENTIFIER); return tok; case '%': tok = LookupEntity(); return tok; case '\"': case '\'': if (!EndPrevToken()) { tok = LookupString(); return tok; } break; default: if (isalpha(c)) { tok = LookupIdentifier(); return tok; } break; } return T_SYMBOL;}// find all comments and insert them into Lexervoid DTDLexer::LookupComments(void){ EndPrevToken(); char c; for (;;) { c = Char(); switch (c) { case ' ': case '\t': case '\r': SkipChar(); break; case '\n': SkipChar(); NextLine(); break; case '<': if ((Char(1) == '!') && (Char(2) == '-') && (Char(3) == '-')) { // comment started SkipChars(4); while (ProcessComment()) ; break; } return; // if it is not comment, it is token default: return; } }}bool DTDLexer::ProcessComment(void){ CComment& comment = AddComment(); for (;;) { char c = Char(); switch ( c ) { case '\r': SkipChar(); break; case '\n': SkipChar(); NextLine(); return true; // comment not ended - there is more case 0: if ( Eof() ) return false; break; case '-': if ((Char(1) == '-') && (Char(2) == '>')) { // end of the comment SkipChars(3); return false; } // no break here default: comment.AddChar(c); SkipChar(); break; } } return false;}TToken DTDLexer::LookupIdentifier(void){ StartToken();// something (not comment) started// find where it ends for (char c = Char(); c != 0; c = Char()) {// complete specification is here:// http://www.w3.org/TR/2000/REC-xml-20001006#sec-common-syn if (isalnum(c) || strchr("#._-:", c)) { AddChar(); } else { break; } } return LookupKeyword();}#define CHECK(keyword, t, length) \ if ( memcmp(token, keyword, length) == 0 ) return tTToken DTDLexer::LookupKeyword(void){ const char* token = CurrentTokenStart();// check identifier against known keywords switch ( CurrentTokenLength() ) { default: break; case 2: CHECK("ID",K_ID,2); break; case 3: CHECK("ANY", K_ANY, 3); break; case 5: CHECK("EMPTY", K_EMPTY, 5); CHECK("CDATA", K_CDATA, 5); CHECK("IDREF", K_IDREF, 5); break; case 6: CHECK("ENTITY", K_ENTITY, 6); CHECK("SYSTEM", K_SYSTEM, 6); CHECK("PUBLIC", K_PUBLIC, 6); CHECK("IDREFS", K_IDREFS, 6); CHECK("#FIXED", K_FIXED, 6); break; case 7: CHECK("ELEMENT", K_ELEMENT, 7); CHECK("ATTLIST", K_ATTLIST, 7); CHECK("#PCDATA", K_PCDATA, 7); CHECK("NMTOKEN", K_NMTOKEN, 7); break; case 8: CHECK("NMTOKENS", K_NMTOKENS, 8); CHECK("ENTITIES", K_ENTITIES, 8); CHECK("NOTATION", K_NOTATION, 8); CHECK("#DEFAULT", K_DEFAULT, 8); CHECK("#IMPLIED", K_IMPLIED, 8); break; case 9: CHECK("#REQUIRED", K_REQUIRED, 9); break; } return T_IDENTIFIER;}TToken DTDLexer::LookupEntity(void){// Entity declaration:// http://www.w3.org/TR/2000/REC-xml-20001006#sec-entity-decl char c = Char(); if (c != '%') { LexerError("Unexpected symbol: %"); }// _ASSERT(c == '%'); if (isspace(Char(1))) { return T_SYMBOL; } else if (isalpha(Char(1))) { SkipChar(); StartToken(); for (c = Char(); c != ';'; c = Char()) { AddChar(); } m_CharsToSkip = 1; } else { LexerError("Unexpected symbol"); } return T_ENTITY;}TToken DTDLexer::LookupString(void){// Entity value:// http://www.w3.org/TR/2000/REC-xml-20001006#NT-EntityValue _ASSERT(m_CharsToSkip==0); char c0 = Char(); if(c0 != '\"' && c0 != '\'') { LexerError("Unexpected symbol"); }// _ASSERT(c0 == '\"' || c0 == '\''); SkipChar(); StartToken(); m_CharsToSkip = 1; for (char c = Char(); c != c0; c = Char()) { AddChar(); } return T_STRING;}bool DTDLexer::EndPrevToken(void){ if (m_CharsToSkip != 0) { SkipChars(m_CharsToSkip); m_CharsToSkip = 0; return true; } return false;}END_NCBI_SCOPE/* * ========================================================================== * $Log: dtdlexer.cpp,v $ * Revision 1000.2 2004/06/01 19:42:49 gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.7 * * Revision 1.7 2004/05/17 21:03:14 gorelenk * Added include of PCH ncbi_pch.hpp * * Revision 1.6 2004/01/16 19:56:52 gouriano * Minor correction when parsing end-of-line * * Revision 1.5 2002/12/17 16:24:43 gouriano * replaced _ASSERTs by throwing an exception * * Revision 1.4 2002/11/14 21:05:27 gouriano * added support of XML attribute lists * * Revision 1.3 2002/10/21 16:09:46 gouriano * added more DTD tokens * * Revision 1.2 2002/10/18 14:38:56 gouriano * added parsing of internal parsed entities * * Revision 1.1 2002/10/15 13:54:01 gouriano * DTD lexer and parser, first version * * * ========================================================================== */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -