📄 xpath_syntax.cpp
字号:
/*
www.sourceforge.net/projects/tinyxpath_
Copyright (c) 2002-2004 Yves Berquin (yvesb@users.sourceforge.net)
This software is provided 'as-is', without any express or implied
warranty. In no_ event will the authors be held liable for any
damages arising from the use of this software.
Permission is granted to anyone to use this software for any
purpose, including commercial applications, and to alter it and
redistribute it freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must
not claim that you wrote the original software. If you use this
software in a product, an acknowledgment in the product documentation
would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and
must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source
distribution.
*/
/**
\file xpath_syntax.cpp
\author Yves Berquin
XPath Syntax analyzer for tinyxpath project
*/
/*
@history
Modified on 16 December 2006 by Aman Aggarwal
Added support for Expressions like ( Expr or Expr or Expr)
*/
#include <stdio.h>
#include "xpath_syntax.h"
namespace aux { namespace xml
{
/// Decodes the syntax of an XPath expression. On entry, the lexical analysis has already
/// produced a list of basic tokens.
/// \n Throws : syntax_error, syntax_overflow
void token_syntax_decoder::v_syntax_decode ()
{
bool o_res;
// group double tokens ('!=', '::', ...)
v_tokenize_expression ();
// reset list start
v_set_current_top ();
u_nb_recurs = 0;
// the XPath expression, well, ..., must be an xpath_expr
o_res = o_recognize (xpath_expr, true);
if (! o_res)
throw syntax_error ("main level");
#ifdef DUMP_SYNTAX
if (ltp_get (0))
printf ("!!! Still to decode : %s !!!\n", cp_disp_class_lex (ltp_get (0) -> lex_get_value ()));
else
printf ("Completely parsed\n");
printf ("%d recursions\n", u_nb_recurs);
#endif
}
/// Recognize one XPath construction
/// \n This function throws exceptions every time there's a failure in a backtracking attempt.
/// This should only happen when o_final is false, otherwise we have a syntax error_
bool token_syntax_decoder::o_recognize (
xpath_construct xc_current, ///< XPath construction to recognize
bool o_final) ///< true if we need to go on, false if it's just a trial
/// in the backtracking
{
lex_token * ltp_freeze;
bool o_empty, o_found, o_location_path, o_qname, o_temp;
unsigned u_nb_argument, u_nb_predicate;
int i_action_counter;
bool o_test_more;
u_nb_recurs++;
if (u_nb_recurs > 10000)
throw syntax_overflow ();
ltp_freeze = NULL;
switch (xc_current)
{
case xpath_location_path :
//
// [1] LocationPath ::= RelativeLocationPath
// | AbsoluteLocationPath
//
if (! ltp_get (0))
return false;
switch (ltp_get (0) -> lex_get_value ())
{
case lex_slash :
case lex_2_slash :
if (! o_recognize (xpath_absolute_location_path, o_final))
return false;
if (o_final)
v_action (xpath_location_path, xpath_location_path_abs);
break;
default :
if (! o_recognize (xpath_relative_location_path, o_final))
return false;
if (o_final)
v_action (xpath_location_path, xpath_location_path_rel);
break;
}
break;
case xpath_absolute_location_path :
// [2] AbsoluteLocationPath ::= '/' RelativeLocationPath?
// | AbbreviatedAbsoluteLocationPath
if (! ltp_get (0))
return false;
i_action_counter = i_get_action_counter ();
switch (ltp_get (0) -> lex_get_value ())
{
case lex_slash :
v_inc_current (1);
ltp_freeze = ltp_get (0);
o_temp = o_recognize (xpath_relative_location_path, false);
if (o_temp)
{
v_set_current (ltp_freeze);
o_recognize (xpath_relative_location_path, o_final);
if (o_final)
v_action (xpath_absolute_location_path, xpath_absolute_location_path_slash_rel, i_action_counter);
}
else
{
v_set_current (ltp_freeze);
if (o_final)
v_action (xpath_absolute_location_path, xpath_absolute_location_path_slash, i_action_counter);
}
break;
case lex_2_slash :
if (! o_recognize (xpath_abbreviated_absolute_location_path, o_final))
return false;
if (o_final)
v_action (xpath_absolute_location_path, xpath_absolute_location_path_abbrev, i_action_counter);
break;
default :
return false;
}
break;
case xpath_relative_location_path :
//
// [3] RelativeLocationPath ::= Step
// | RelativeLocationPath '/' Step
// | AbbreviatedRelativeLocationPath
// [11] AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step
//
i_action_counter = i_get_action_counter ();
if (! o_recognize (xpath_step, o_final))
return false;
if (ltp_get (0) && ltp_get (0) -> lex_get_value () == lex_slash)
{
v_inc_current (1);
if (! o_recognize (xpath_relative_location_path, o_final))
return false;
if (o_final)
v_action (xpath_relative_location_path, xpath_relative_location_path_rel_step, i_action_counter);
}
else
if (ltp_get (0) && ltp_get (0) -> lex_get_value () == lex_2_slash)
{
v_inc_current (1);
if (! o_recognize (xpath_relative_location_path, o_final))
return false;
if (o_final)
v_action (xpath_relative_location_path, xpath_relative_location_path_rel_double_slash_step, i_action_counter);
}
else
{
if (o_final)
v_action (xpath_relative_location_path, xpath_relative_location_path_step, i_action_counter);
}
break;
case xpath_step :
// [4] Step ::= AxisSpecifier NodeTest Predicate*
// | AbbreviatedStep
if (! ltp_get (0))
return false;
switch (ltp_get (0) -> lex_get_value ())
{
case lex_dot :
case lex_2_dot :
if (! o_recognize (xpath_abbrieviated_step, o_final))
return false;
if (o_final)
v_action (xpath_step, xpath_step_abbrev);
break;
default :
if (! o_recognize (xpath_axis_specifier, o_final))
return false;
if (! o_recognize (xpath_node_test, o_final))
return false;
o_found = true;
u_nb_predicate = 0;
while (o_found && ltp_get (0) && ltp_get (0) -> lex_get_value () == lex_obrack)
{
ltp_freeze = ltp_get (0);
if (! o_recognize (xpath_predicate, false))
o_found = false;
else
{
v_set_current (ltp_freeze);
o_recognize (xpath_predicate, o_final);
u_nb_predicate++;
}
}
if (o_final)
v_action (xpath_step, xpath_step_full, u_nb_predicate);
break;
}
break;
case xpath_axis_specifier :
//
// [5] AxisSpecifier ::= AxisName '::'
// | AbbreviatedAxisSpecifier
//
// [13] AbbreviatedAxisSpecifier ::= '@'?
o_empty = false;
if (ltp_get (0))
{
switch (ltp_get (0) -> lex_get_value ())
{
case lex_at :
v_inc_current (1);
if (o_final)
v_action (xpath_axis_specifier, xpath_axis_specifier_at);
break;
default :
if (o_is_axis_name (ltp_get (0) -> lex_get_value ()))
{
if (! o_recognize (xpath_axis_name, o_final))
return false;
if (! ltp_get (0))
return false;
if (ltp_get (0) -> lex_get_value () != lex_2_colon)
return false;
v_inc_current (1);
if (o_final)
v_action (xpath_axis_specifier, xpath_axis_specifier_axis_name);
}
else
o_empty = true;
break;
}
}
else
o_empty = true;
if (o_empty)
{
if (o_final)
{
v_action (xpath_abbreviated_axis_specifier, 1);
v_action (xpath_axis_specifier, xpath_axis_specifier_empty);
}
}
break;
case xpath_axis_name :
// [6] AxisName ::= 'ancestor'
// | 'ancestor-or-self'
// | 'attribute_'
// | 'child_'
// | 'descendant'
// | 'descendant-or-self'
// | 'following'
// | 'following-sibling'
// | 'namespace'
// | 'parent_'
// | 'preceding'
// | 'preceding-sibling'
// | 'self'
if (! ltp_get (0))
return false;
if (! o_is_axis_name (ltp_get (0) -> lex_get_value ()))
return false;
if (o_final)
v_action (xpath_axis_name, 0, ltp_get (0) -> lex_get_value ());
v_inc_current (1);
break;
case xpath_node_test :
// [7] NodeTest ::= NameTest
// | node_type '(' ')'
// | 'processing-instruction' '(' Literal ')'
// [38] node_type ::= 'comment_'
// | 'text_'
// | 'processing-instruction'
// | 'node_'
if (! ltp_get (0))
return false;
switch (ltp_get (0) -> lex_get_value ())
{
case lex_comment :
case lex_text :
case lex_node :
if (o_final)
v_action (xpath_node_test, xpath_node_test_reserved_keyword, ltp_get (0) -> lex_get_value ());
v_inc_current (3);
break;
case lex_processing_instruction :
if (ltp_get (2) && ltp_get (2) -> lex_get_value () == lex_cparen)
{
// single
v_inc_current (3);
if (o_final)
v_action (xpath_node_test, xpath_node_test_pi, lex_processing_instruction);
}
else
{
// with literal
v_inc_current (3);
if (o_final)
v_action (xpath_node_test, xpath_node_test_pi_lit, lex_processing_instruction, ltp_get (0) -> cp_get_literal ());
v_inc_current (1);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -