📄 xpath.js
字号:
// Copyright 2005 Google Inc.
// All Rights Reserved
//
// An XPath parser and evaluator written in JavaScript. The
// implementation is complete except for functions handling
// namespaces.
//
// Reference: [XPATH] XPath Specification
// <http://www.w3.org/TR/1999/REC-xpath-19991116>.
//
//
// The API of the parser has several parts:
//
// 1. The parser function xpathParse() that takes a string and returns
// an expession object.
//
// 2. The expression object that has an evaluate() method to evaluate the
// XPath expression it represents. (It is actually a hierarchy of
// objects that resembles the parse tree, but an application will call
// evaluate() only on the top node of this hierarchy.)
//
// 3. The context object that is passed as an argument to the evaluate()
// method, which represents the DOM context in which the expression is
// evaluated.
//
// 4. The value object that is returned from evaluate() and represents
// values of the different types that are defined by XPath (number,
// string, boolean, and node-set), and allows to convert between them.
//
// These parts are near the top of the file, the functions and data
// that are used internally follow after them.
//
//
// TODO(mesch): add jsdoc comments. Use more coherent naming.
//
//
// Author: Steffen Meschkat <mesch@google.com>
// The entry point for the parser.
//
// @param expr a string that contains an XPath expression.
// @return an expression object that can be evaluated with an
// expression context.
function xpathParse(expr) {
if (xpathdebug) {
Log.write('XPath parse ' + expr);
}
xpathParseInit();
var cached = xpathCacheLookup(expr);
if (cached) {
if (xpathdebug) {
Log.write(' ... cached');
}
return cached;
}
// Optimize for a few common cases: simple attribute node tests
// (@id), simple element node tests (page), variable references
// ($address), numbers (4), multi-step path expressions where each
// step is a plain element node test
// (page/overlay/locations/location).
if (expr.match(/^(\$|@)?\w+$/i)) {
var ret = makeSimpleExpr(expr);
xpathParseCache[expr] = ret;
if (xpathdebug) {
Log.write(' ... simple');
}
return ret;
}
if (expr.match(/^\w+(\/\w+)*$/i)) {
var ret = makeSimpleExpr2(expr);
xpathParseCache[expr] = ret;
if (xpathdebug) {
Log.write(' ... simple 2');
}
return ret;
}
var cachekey = expr; // expr is modified during parse
if (xpathdebug) {
Timer.start('XPath parse', cachekey);
}
var stack = [];
var ahead = null;
var previous = null;
var done = false;
var parse_count = 0;
var lexer_count = 0;
var reduce_count = 0;
while (!done) {
parse_count++;
expr = expr.replace(/^\s*/, '');
previous = ahead;
ahead = null;
var rule = null;
var match = '';
for (var i = 0; i < xpathTokenRules.length; ++i) {
var result = xpathTokenRules[i].re.exec(expr);
lexer_count++;
if (result && result.length > 0 && result[0].length > match.length) {
rule = xpathTokenRules[i];
match = result[0];
break;
}
}
// Special case: allow operator keywords to be element and
// variable names.
// NOTE(mesch): The parser resolves conflicts by looking ahead,
// and this is the only case where we look back to
// disambiguate. So this is indeed something different, and
// looking back is usually done in the lexer (via states in the
// general case, called "start conditions" in flex(1)). Also,the
// conflict resolution in the parser is not as robust as it could
// be, so I'd like to keep as much off the parser as possible (all
// these precedence values should be computed from the grammar
// rules and possibly associativity declarations, as in bison(1),
// and not explicitly set.
if (rule &&
(rule == TOK_DIV ||
rule == TOK_MOD ||
rule == TOK_AND ||
rule == TOK_OR) &&
(!previous ||
previous.tag == TOK_AT ||
previous.tag == TOK_DSLASH ||
previous.tag == TOK_SLASH ||
previous.tag == TOK_AXIS ||
previous.tag == TOK_DOLLAR)) {
rule = TOK_QNAME;
}
if (rule) {
expr = expr.substr(match.length);
if (xpathdebug) {
Log.write('token: ' + match + ' -- ' + rule.label);
}
ahead = {
tag: rule,
match: match,
prec: rule.prec ? rule.prec : 0, // || 0 is removed by the compiler
expr: makeTokenExpr(match)
};
} else {
if (xpathdebug) {
Log.write('DONE');
}
done = true;
}
while (xpathReduce(stack, ahead)) {
reduce_count++;
if (xpathdebug) {
Log.write('stack: ' + stackToString(stack));
}
}
}
if (xpathdebug) {
Log.write(stackToString(stack));
}
if (stack.length != 1) {
throw 'XPath parse error ' + cachekey + ':\n' + stackToString(stack);
}
var result = stack[0].expr;
xpathParseCache[cachekey] = result;
if (xpathdebug) {
Timer.end('XPath parse', cachekey);
}
if (xpathdebug) {
Log.write('XPath parse: ' + parse_count + ' / ' +
lexer_count + ' / ' + reduce_count);
}
return result;
}
var xpathParseCache = {};
function xpathCacheLookup(expr) {
return xpathParseCache[expr];
}
function xpathReduce(stack, ahead) {
var cand = null;
if (stack.length > 0) {
var top = stack[stack.length-1];
var ruleset = xpathRules[top.tag.key];
if (ruleset) {
for (var i = 0; i < ruleset.length; ++i) {
var rule = ruleset[i];
var match = xpathMatchStack(stack, rule[1]);
if (match.length) {
cand = {
tag: rule[0],
rule: rule,
match: match
};
cand.prec = xpathGrammarPrecedence(cand);
break;
}
}
}
}
var ret;
if (cand && (!ahead || cand.prec > ahead.prec ||
(ahead.tag.left && cand.prec >= ahead.prec))) {
for (var i = 0; i < cand.match.matchlength; ++i) {
stack.pop();
}
if (xpathdebug) {
Log.write('reduce ' + cand.tag.label + ' ' + cand.prec +
' ahead ' + (ahead ? ahead.tag.label + ' ' + ahead.prec +
(ahead.tag.left ? ' left' : '')
: ' none '));
}
var matchexpr = mapExpr(cand.match, function(m) { return m.expr; });
cand.expr = cand.rule[3].apply(null, matchexpr);
stack.push(cand);
ret = true;
} else {
if (ahead) {
if (xpathdebug) {
Log.write('shift ' + ahead.tag.label + ' ' + ahead.prec +
(ahead.tag.left ? ' left' : '') +
' over ' + (cand ? cand.tag.label + ' ' +
cand.prec : ' none'));
}
stack.push(ahead);
}
ret = false;
}
return ret;
}
function xpathMatchStack(stack, pattern) {
// NOTE(mesch): The stack matches for variable cardinality are
// greedy but don't do backtracking. This would be an issue only
// with rules of the form A* A, i.e. with an element with variable
// cardinality followed by the same element. Since that doesn't
// occur in the grammar at hand, all matches on the stack are
// unambiguous.
var S = stack.length;
var P = pattern.length;
var p, s;
var match = [];
match.matchlength = 0;
var ds = 0;
for (p = P - 1, s = S - 1; p >= 0 && s >= 0; --p, s -= ds) {
ds = 0;
var qmatch = [];
if (pattern[p] == Q_MM) {
p -= 1;
match.push(qmatch);
while (s - ds >= 0 && stack[s - ds].tag == pattern[p]) {
qmatch.push(stack[s - ds]);
ds += 1;
match.matchlength += 1;
}
} else if (pattern[p] == Q_01) {
p -= 1;
match.push(qmatch);
while (s - ds >= 0 && ds < 2 && stack[s - ds].tag == pattern[p]) {
qmatch.push(stack[s - ds]);
ds += 1;
match.matchlength += 1;
}
} else if (pattern[p] == Q_1M) {
p -= 1;
match.push(qmatch);
if (stack[s].tag == pattern[p]) {
while (s - ds >= 0 && stack[s - ds].tag == pattern[p]) {
qmatch.push(stack[s - ds]);
ds += 1;
match.matchlength += 1;
}
} else {
return [];
}
} else if (stack[s].tag == pattern[p]) {
match.push(stack[s]);
ds += 1;
match.matchlength += 1;
} else {
return [];
}
reverseInplace(qmatch);
qmatch.expr = mapExpr(qmatch, function(m) { return m.expr; });
}
reverseInplace(match);
if (p == -1) {
return match;
} else {
return [];
}
}
function xpathTokenPrecedence(tag) {
return tag.prec || 2;
}
function xpathGrammarPrecedence(frame) {
var ret = 0;
if (frame.rule) { /* normal reduce */
if (frame.rule.length >= 3 && frame.rule[2] >= 0) {
ret = frame.rule[2];
} else {
for (var i = 0; i < frame.rule[1].length; ++i) {
var p = xpathTokenPrecedence(frame.rule[1][i]);
ret = Math.max(ret, p);
}
}
} else if (frame.tag) { /* TOKEN match */
ret = xpathTokenPrecedence(frame.tag);
} else if (frame.length) { /* Q_ match */
for (var j = 0; j < frame.length; ++j) {
var p = xpathGrammarPrecedence(frame[j]);
ret = Math.max(ret, p);
}
}
return ret;
}
function stackToString(stack) {
var ret = '';
for (var i = 0; i < stack.length; ++i) {
if (ret) {
ret += '\n';
}
ret += stack[i].tag.label;
}
return ret;
}
// XPath expression evaluation context. An XPath context consists of a
// DOM node, a list of DOM nodes that contains this node, a number
// that represents the position of the single node in the list, and a
// current set of variable bindings. (See XPath spec.)
//
// The interface of the expression context:
//
// Constructor -- gets the node, its position, the node set it
// belongs to, and a parent context as arguments. The parent context
// is used to implement scoping rules for variables: if a variable
// is not found in the current context, it is looked for in the
// parent context, recursively. Except for node, all arguments have
// default values: default position is 0, default node set is the
// set that contains only the node, and the default parent is null.
//
// Notice that position starts at 0 at the outside interface;
// inside XPath expressions this shows up as position()=1.
//
// clone() -- creates a new context with the current context as
// parent. If passed as argument to clone(), the new context has a
// different node, position, or node set. What is not passed is
// inherited from the cloned context.
//
// setVariable(name, expr) -- binds given XPath expression to the
// name.
//
// getVariable(name) -- what the name says.
//
// setNode(node, position) -- sets the context to the new node and
// its corresponding position. Needed to implement scoping rules for
// variables in XPath. (A variable is visible to all subsequent
// siblings, not only to its children.)
function ExprContext(node, position, nodelist, parent) {
this.node = node;
this.position = position || 0;
this.nodelist = nodelist || [ node ];
this.variables = {};
this.parent = parent || null;
this.root = parent ? parent.root : node.ownerDocument;
}
ExprContext.prototype.clone = function(node, position, nodelist) {
return new
ExprContext(node || this.node,
typeof position != 'undefined' ? position : this.position,
nodelist || this.nodelist, this);
};
ExprContext.prototype.setVariable = function(name, value) {
this.variables[name] = value;
};
ExprContext.prototype.getVariable = function(name) {
if (typeof this.variables[name] != 'undefined') {
return this.variables[name];
} else if (this.parent) {
return this.parent.getVariable(name);
} else {
return null;
}
}
ExprContext.prototype.setNode = function(node, position) {
this.node = node;
this.position = position;
}
// XPath expression values. They are what XPath expressions evaluate
// to. Strangely, the different value types are not specified in the
// XPath syntax, but only in the semantics, so they don't show up as
// nonterminals in the grammar. Yet, some expressions are required to
// evaluate to particular types, and not every type can be coerced
// into every other type. Although the types of XPath values are
// similar to the types present in JavaScript, the type coercion rules
// are a bit peculiar, so we explicitly model XPath types instead of
// mapping them onto JavaScript types. (See XPath spec.)
//
// The four types are:
//
// StringValue
//
// NumberValue
//
// BooleanValue
//
// NodeSetValue
//
// The common interface of the value classes consists of methods that
// implement the XPath type coercion rules:
//
// stringValue() -- returns the value as a JavaScript String,
//
// numberValue() -- returns the value as a JavaScript Number,
//
// booleanValue() -- returns the value as a JavaScript Boolean,
//
// nodeSetValue() -- returns the value as a JavaScript Array of DOM
// Node objects.
//
function StringValue(value) {
this.value = value;
this.type = 'string';
}
StringValue.prototype.stringValue = function() {
return this.value;
}
StringValue.prototype.booleanValue = function() {
return this.value.length > 0;
}
StringValue.prototype.numberValue = function() {
return this.value - 0;
}
StringValue.prototype.nodeSetValue = function() {
throw this + ' ' + Error().stack;
}
function BooleanValue(value) {
this.value = value;
this.type = 'boolean';
}
BooleanValue.prototype.stringValue = function() {
return '' + this.value;
}
BooleanValue.prototype.booleanValue = function() {
return this.value;
}
BooleanValue.prototype.numberValue = function() {
return this.value ? 1 : 0;
}
BooleanValue.prototype.nodeSetValue = function() {
throw this + ' ' + Error().stack;
}
function NumberValue(value) {
this.value = value;
this.type = 'number';
}
NumberValue.prototype.stringValue = function() {
return '' + this.value;
}
NumberValue.prototype.booleanValue = function() {
return !!this.value;
}
NumberValue.prototype.numberValue = function() {
return this.value - 0;
}
NumberValue.prototype.nodeSetValue = function() {
throw this + ' ' + Error().stack;
}
function NodeSetValue(value) {
this.value = value;
this.type = 'node-set';
}
NodeSetValue.prototype.stringValue = function() {
if (this.value.length == 0) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -