📄 parser.cpp
字号:
int more_digit = 0;
ACEXML_UCS4 sum = 0;
while (1)
{
ACEXML_Char ch = this->get ();
switch (ch)
{
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
sum = sum * (hex ? 16 : 10) + (ch - '0');
break;
case 'a':
case 'A':
if (!hex)
return -1;
sum = sum * 16 + 10;
break;
case 'b':
case 'B':
if (!hex)
return -1;
sum = sum * 16 + 11;
break;
case 'c':
case 'C':
if (!hex)
return -1;
sum = sum * 16 + 12;
break;
case 'd':
case 'D':
if (!hex)
return -1;
sum = sum * 16 + 13;
break;
case 'e':
case 'E':
if (!hex)
return -1;
sum = sum * 16 + 14;
break;
case 'f':
case 'F':
if (!hex)
return -1;
sum = sum * 16 + 15;
break;
case ';':
if (more_digit == 0) // no digit exist???
return -1;
int clen;
#if defined (ACE_USES_WCHAR) // UTF-16
if ((clen = ACEXML_Transcoder::ucs42utf16 (sum, buf, len)) < 0)
return -1;
#elif 1 // or UTF-8
if ((clen = ACEXML_Transcoder::ucs42utf8 (sum, buf, len)) < 0)
return -1;
// #elif 0 // UCS 4, not likely
// buf [0] = sum;
// buf [1] = 0;
#endif
buf [clen] = 0;
return 0;
default:
return -1;
}
more_digit = 1;
}
ACE_NOTREACHED (return -1);
}
const ACEXML_String *
ACEXML_Parser::parse_reference (void)
{
// @@ We'll use a temporary buffer here as the Obstack is most likely in
// use when we are here. This puts a limit on the max length of a
// reference.
ACEXML_Char ref[MAXPATHLEN];
size_t loc = 0;
while (loc < MAXPATHLEN -1)
{
ACEXML_Char ch = this->get ();
if (ch == ';')
{
ref[loc] = 0;
break;
}
else
ref[loc++] = ch;
}
return this->entities_.resolve_entity (ref);
}
int
ACEXML_Parser::parse_cdata (ACEXML_ENV_SINGLE_ARG_DECL)
{
if (this->parse_token (ACE_TEXT ("[CDATA[")) < 0)
{
this->report_fatal_error(ACE_TEXT ("'[CDATA[' expected") ACEXML_ENV_ARG_PARAMETER);
return -1;
}
int parse_state = 0;
size_t datalen = 0;
while (1)
{
ACEXML_Char ch;
ACEXML_Char *cdata;
ch = this->get ();
// Anything goes except the sequence "]]>".
switch (parse_state)
{
case 2:
if (ch == ']')
{
parse_state = 3;
continue;
}
break;
case 3:
if (ch == '>') // Yay!
{
cdata = this->obstack_.freeze ();
this->content_handler_->characters (cdata,
0,
datalen ACEXML_ENV_ARG_PARAMETER);
// ACEXML_CHECK_RETURN (-1);
this->obstack_.unwind(cdata);
return 0;
}
break;
default:
if (ch == ']')
{
parse_state = 2;
continue;
}
else
parse_state = 1;
}
while (parse_state > 0)
{
if (this->try_grow_cdata (1, datalen ACEXML_ENV_ARG_PARAMETER) < 0)
return -1;
if (parse_state != 1)
this->obstack_.grow (']');
else
{
if (ch == 0x0D)
ch = (this->peek () == 0x0A ? this->get () : 0x0A);
this->obstack_.grow (ch);
}
++datalen;
--parse_state;
}
};
ACE_NOTREACHED (return -1);
}
int
ACEXML_Parser::try_grow_cdata (size_t size, size_t &len ACEXML_ENV_ARG_DECL)
{
if (this->obstack_.request (size) != 0)
{
if (len != 0)
{
ACEXML_Char *cdata = this->obstack_.freeze ();
if (cdata == 0)
{
this->report_fatal_error(ACE_TEXT ("Internal Error growing CDATA buffer") ACEXML_ENV_ARG_PARAMETER);
return -1;
}
this->content_handler_->characters (cdata,
0,
len ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
len = 0; // reset counter
if (this->obstack_.request (size) == 0)
return 0;
}
this->report_fatal_error(ACE_TEXT ("Internal Error, buffer overflowed") ACEXML_ENV_ARG_PARAMETER);
return -1;
}
return 0;
}
int
ACEXML_Parser::parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL)
{
ACEXML_Char nextch = this->skip_whitespace (0);
do {
switch (nextch)
{
case '<': // Start of markup Decl.
nextch = this->peek ();
switch (nextch)
{
case '!':
this->get (); // Discard '!'
nextch = this->peek ();
switch (nextch)
{
case 'E': // An ELEMENT or ENTITY decl
this->get ();
nextch = this->peek ();
switch (nextch)
{
case 'L':
if (this->parse_element_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0)
return -1;
break;
case 'N':
if (this->parse_entity_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0)
return -1;
break;
default:
this->report_fatal_error(ACE_TEXT ("Invalid keyword in decl spec") ACEXML_ENV_ARG_PARAMETER);
return -1;
}
break;
case 'A': // An ATTLIST decl
if (this->parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0)
return -1;
break;
case 'N': // A NOTATION decl
if (this->parse_notation_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0)
return -1;
break;
case '-': // a comment.
if (this->grok_comment () < 0)
{
this->report_fatal_error(ACE_TEXT ("Error parsing comment") ACEXML_ENV_ARG_PARAMETER);
return -1;
}
break;
case 0:
this->report_fatal_error (ACE_TEXT ("Unexpected EOF") ACEXML_ENV_ARG_PARAMETER);
return -1;
default:
this->report_fatal_error (ACE_TEXT ("Invalid char. follows '<!' in markupdecl") ACEXML_ENV_ARG_PARAMETER);
return -1;
}
break;
case '?': // PI
this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
break;
case 0:
this->report_fatal_error (ACE_TEXT ("Unexpected EOF") ACEXML_ENV_ARG_PARAMETER);
return -1;
default:
this->report_fatal_error (ACE_TEXT ("Invalid char. follows '<!' in markupdecl") ACEXML_ENV_ARG_PARAMETER);
return -1;
}
break;
case '%': // DeclSep. Define new PEreference...
break;
case ']': // End of internal definitions.
return 0; // Not applicable when parsing external DTD spec.
case 0: // This may not be an error if we decide
// to generalize this function to handle both
// internal and external DTD definitions.
this->report_fatal_error (ACE_TEXT ("Unexpected EOF") ACEXML_ENV_ARG_PARAMETER);
return -1;
default:
this->report_fatal_error (ACE_TEXT ("Expecting markupdecl or DecSep") ACEXML_ENV_ARG_PARAMETER);
return -1;
};
// To fully conform with the spec., whitespaces are only allowed
// following a 'DeclSep' section. However, I found it
// hard/impossible to eliminate all the whitespaces between
// markupdecls.
nextch = this->skip_whitespace (0);
} while (1);
ACE_NOTREACHED (return -1);
}
int
ACEXML_Parser::parse_element_decl (ACEXML_ENV_SINGLE_ARG_DECL)
{
if ((this->parse_token (ACE_TEXT ("LEMENT")) < 0) ||
this->skip_whitespace_count () == 0)
{
this->report_fatal_error (ACE_TEXT ("Expecting keyword `ELEMENT'") ACEXML_ENV_ARG_PARAMETER);
return -1;
}
ACEXML_Char *element_name = this->read_name ();
if (element_name == 0)
{
this->report_fatal_error (ACE_TEXT ("Error reading element name while defining ELEMENT.") ACEXML_ENV_ARG_PARAMETER);
return -1;
}
ACEXML_Char nextch ;
this->skip_whitespace_count (&nextch);
switch (nextch)
{
case 'E': // EMPTY
if (this->parse_token (ACE_TEXT ("EMPTY")) < 0)
{
this->report_fatal_error (ACE_TEXT ("Expecting keyword `EMPTY' in ELEMENT definition.") ACEXML_ENV_ARG_PARAMETER);
return -1;
}
break;
case 'A': // ANY
if (this->parse_token (ACE_TEXT ("ANY")) < 0)
{
this->report_fatal_error (ACE_TEXT ("Expecting keyword `ANY' in ELEMENT definition.") ACEXML_ENV_ARG_PARAMETER);
return -1;
}
break;
case '(': // children
this->parse_children_definition (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
break;
default: // error
this->report_fatal_error (ACE_TEXT ("Error reading ELEMENT definition.") ACEXML_ENV_ARG_PARAMETER);
return -1;
}
if (this->skip_whitespace (0) != '>')
{
this->report_fatal_error (ACE_TEXT ("Expecting '>' in ELEMENT definition.") ACEXML_ENV_ARG_PARAMETER);
return -1;
}
return 0;
}
int
ACEXML_Parser::parse_entity_decl (ACEXML_ENV_SINGLE_ARG_DECL)
{
ACEXML_Char nextch;
if ((this->parse_token (ACE_TEXT ("NTITY")) < 0) ||
this->skip_whitespace_count (&nextch) == 0)
{
this->report_fatal_error (ACE_TEXT ("Expecting keyword `ENTITY'") ACEXML_ENV_ARG_PARAMETER);
return -1;
}
int is_GEDecl = 1;
if (nextch == '%') // This is a PEDecl.
{
is_GEDecl = 0;
this->get (); // consume the '%'
if (this->skip_whitespace_count (&nextch) == 0)
{
this->report_fatal_error (ACE_TEXT ("Can't use a reference when defining entity name") ACEXML_ENV_ARG_PARAMETER);
return -1;
}
}
ACEXML_Char *entity_name = this->read_name ();
if (entity_name == 0)
{
this->report_fatal_error (ACE_TEXT ("Error reading ENTITY name.") ACEXML_ENV_ARG_PARAMETER);
return -1;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -