📄 xpath.js
字号:
// Copyright 2005 Google Inc.// All Rights Reserved//// An XPath parser and evaluator written in JavaScript. The// implementation is complete except for functions handling// namespaces.//// Reference: [XPATH] XPath Specification// <http://www.w3.org/TR/1999/REC-xpath-19991116>.////// The API of the parser has several parts://// 1. The parser function xpathParse() that takes a string and returns// an expession object.//// 2. The expression object that has an evaluate() method to evaluate the// XPath expression it represents. (It is actually a hierarchy of// objects that resembles the parse tree, but an application will call// evaluate() only on the top node of this hierarchy.)//// 3. The context object that is passed as an argument to the evaluate()// method, which represents the DOM context in which the expression is// evaluated.//// 4. The value object that is returned from evaluate() and represents// values of the different types that are defined by XPath (number,// string, boolean, and node-set), and allows to convert between them.//// These parts are near the top of the file, the functions and data// that are used internally follow after them.////// TODO(mesch): add jsdoc comments. Use more coherent naming.////// Author: Steffen Meschkat <mesch@google.com>// The entry point for the parser.//// @param expr a string that contains an XPath expression.// @return an expression object that can be evaluated with an// expression context.function xpathParse(expr) { if (xpathdebug) { Log.write('XPath parse ' + expr); } xpathParseInit(); var cached = xpathCacheLookup(expr); if (cached) { if (xpathdebug) { Log.write(' ... cached'); } return cached; } // Optimize for a few common cases: simple attribute node tests // (@id), simple element node tests (page), variable references // ($address), numbers (4), multi-step path expressions where each // step is a plain element node test // (page/overlay/locations/location). if (expr.match(/^(\$|@)?\w+$/i)) { var ret = makeSimpleExpr(expr); xpathParseCache[expr] = ret; if (xpathdebug) { Log.write(' ... simple'); } return ret; } if (expr.match(/^\w+(\/\w+)*$/i)) { var ret = makeSimpleExpr2(expr); xpathParseCache[expr] = ret; if (xpathdebug) { Log.write(' ... simple 2'); } return ret; } var cachekey = expr; // expr is modified during parse if (xpathdebug) { Timer.start('XPath parse', cachekey); } var stack = []; var ahead = null; var previous = null; var done = false; var parse_count = 0; var lexer_count = 0; var reduce_count = 0; while (!done) { parse_count++; expr = expr.replace(/^\s*/, ''); previous = ahead; ahead = null; var rule = null; var match = ''; for (var i = 0; i < xpathTokenRules.length; ++i) { var result = xpathTokenRules[i].re.exec(expr); lexer_count++; if (result && result.length > 0 && result[0].length > match.length) { rule = xpathTokenRules[i]; match = result[0]; break; } } // Special case: allow operator keywords to be element and // variable names. // NOTE(mesch): The parser resolves conflicts by looking ahead, // and this is the only case where we look back to // disambiguate. So this is indeed something different, and // looking back is usually done in the lexer (via states in the // general case, called "start conditions" in flex(1)). Also,the // conflict resolution in the parser is not as robust as it could // be, so I'd like to keep as much off the parser as possible (all // these precedence values should be computed from the grammar // rules and possibly associativity declarations, as in bison(1), // and not explicitly set. if (rule && (rule == TOK_DIV || rule == TOK_MOD || rule == TOK_AND || rule == TOK_OR) && (!previous || previous.tag == TOK_AT || previous.tag == TOK_DSLASH || previous.tag == TOK_SLASH || previous.tag == TOK_AXIS || previous.tag == TOK_DOLLAR)) { rule = TOK_QNAME; } if (rule) { expr = expr.substr(match.length); if (xpathdebug) { Log.write('token: ' + match + ' -- ' + rule.label); } ahead = { tag: rule, match: match, prec: rule.prec ? rule.prec : 0, // || 0 is removed by the compiler expr: makeTokenExpr(match) }; } else { if (xpathdebug) { Log.write('DONE'); } done = true; } while (xpathReduce(stack, ahead)) { reduce_count++; if (xpathdebug) { Log.write('stack: ' + stackToString(stack)); } } } if (xpathdebug) { Log.write(stackToString(stack)); } if (stack.length != 1) { throw 'XPath parse error ' + cachekey + ':\n' + stackToString(stack); } var result = stack[0].expr; xpathParseCache[cachekey] = result; if (xpathdebug) { Timer.end('XPath parse', cachekey); } if (xpathdebug) { Log.write('XPath parse: ' + parse_count + ' / ' + lexer_count + ' / ' + reduce_count); } return result;}var xpathParseCache = {};function xpathCacheLookup(expr) { return xpathParseCache[expr];}function xpathReduce(stack, ahead) { var cand = null; if (stack.length > 0) { var top = stack[stack.length-1]; var ruleset = xpathRules[top.tag.key]; if (ruleset) { for (var i = 0; i < ruleset.length; ++i) { var rule = ruleset[i]; var match = xpathMatchStack(stack, rule[1]); if (match.length) { cand = { tag: rule[0], rule: rule, match: match }; cand.prec = xpathGrammarPrecedence(cand); break; } } } } var ret; if (cand && (!ahead || cand.prec > ahead.prec || (ahead.tag.left && cand.prec >= ahead.prec))) { for (var i = 0; i < cand.match.matchlength; ++i) { stack.pop(); } if (xpathdebug) { Log.write('reduce ' + cand.tag.label + ' ' + cand.prec + ' ahead ' + (ahead ? ahead.tag.label + ' ' + ahead.prec + (ahead.tag.left ? ' left' : '') : ' none ')); } var matchexpr = mapExpr(cand.match, function(m) { return m.expr; }); cand.expr = cand.rule[3].apply(null, matchexpr); stack.push(cand); ret = true; } else { if (ahead) { if (xpathdebug) { Log.write('shift ' + ahead.tag.label + ' ' + ahead.prec + (ahead.tag.left ? ' left' : '') + ' over ' + (cand ? cand.tag.label + ' ' + cand.prec : ' none')); } stack.push(ahead); } ret = false; } return ret;}function xpathMatchStack(stack, pattern) { // NOTE(mesch): The stack matches for variable cardinality are // greedy but don't do backtracking. This would be an issue only // with rules of the form A* A, i.e. with an element with variable // cardinality followed by the same element. Since that doesn't // occur in the grammar at hand, all matches on the stack are // unambiguous. var S = stack.length; var P = pattern.length; var p, s; var match = []; match.matchlength = 0; var ds = 0; for (p = P - 1, s = S - 1; p >= 0 && s >= 0; --p, s -= ds) { ds = 0; var qmatch = []; if (pattern[p] == Q_MM) { p -= 1; match.push(qmatch); while (s - ds >= 0 && stack[s - ds].tag == pattern[p]) { qmatch.push(stack[s - ds]); ds += 1; match.matchlength += 1; } } else if (pattern[p] == Q_01) { p -= 1; match.push(qmatch); while (s - ds >= 0 && ds < 2 && stack[s - ds].tag == pattern[p]) { qmatch.push(stack[s - ds]); ds += 1; match.matchlength += 1; } } else if (pattern[p] == Q_1M) { p -= 1; match.push(qmatch); if (stack[s].tag == pattern[p]) { while (s - ds >= 0 && stack[s - ds].tag == pattern[p]) { qmatch.push(stack[s - ds]); ds += 1; match.matchlength += 1; } } else { return []; } } else if (stack[s].tag == pattern[p]) { match.push(stack[s]); ds += 1; match.matchlength += 1; } else { return []; } reverseInplace(qmatch); qmatch.expr = mapExpr(qmatch, function(m) { return m.expr; }); } reverseInplace(match); if (p == -1) { return match; } else { return []; }}function xpathTokenPrecedence(tag) { return tag.prec || 2;}function xpathGrammarPrecedence(frame) { var ret = 0; if (frame.rule) { /* normal reduce */ if (frame.rule.length >= 3 && frame.rule[2] >= 0) { ret = frame.rule[2]; } else { for (var i = 0; i < frame.rule[1].length; ++i) { var p = xpathTokenPrecedence(frame.rule[1][i]); ret = Math.max(ret, p); } } } else if (frame.tag) { /* TOKEN match */ ret = xpathTokenPrecedence(frame.tag); } else if (frame.length) { /* Q_ match */ for (var j = 0; j < frame.length; ++j) { var p = xpathGrammarPrecedence(frame[j]); ret = Math.max(ret, p); } } return ret;}function stackToString(stack) { var ret = ''; for (var i = 0; i < stack.length; ++i) { if (ret) { ret += '\n'; } ret += stack[i].tag.label; } return ret;}// XPath expression evaluation context. An XPath context consists of a// DOM node, a list of DOM nodes that contains this node, a number// that represents the position of the single node in the list, and a// current set of variable bindings. (See XPath spec.)//// The interface of the expression context://// Constructor -- gets the node, its position, the node set it// belongs to, and a parent context as arguments. The parent context// is used to implement scoping rules for variables: if a variable// is not found in the current context, it is looked for in the// parent context, recursively. Except for node, all arguments have// default values: default position is 0, default node set is the// set that contains only the node, and the default parent is null.//// Notice that position starts at 0 at the outside interface;// inside XPath expressions this shows up as position()=1.//// clone() -- creates a new context with the current context as// parent. If passed as argument to clone(), the new context has a// different node, position, or node set. What is not passed is// inherited from the cloned context.//// setVariable(name, expr) -- binds given XPath expression to the// name.//// getVariable(name) -- what the name says.//// setNode(node, position) -- sets the context to the new node and// its corresponding position. Needed to implement scoping rules for// variables in XPath. (A variable is visible to all subsequent// siblings, not only to its children.)function ExprContext(node, position, nodelist, parent) { this.node = node; this.position = position || 0; this.nodelist = nodelist || [ node ]; this.variables = {}; this.parent = parent || null; this.root = parent ? parent.root : node.ownerDocument;}ExprContext.prototype.clone = function(node, position, nodelist) { return new ExprContext(node || this.node, typeof position != 'undefined' ? position : this.position, nodelist || this.nodelist, this);};ExprContext.prototype.setVariable = function(name, value) { this.variables[name] = value;};ExprContext.prototype.getVariable = function(name) { if (typeof this.variables[name] != 'undefined') { return this.variables[name]; } else if (this.parent) { return this.parent.getVariable(name); } else { return null; }}ExprContext.prototype.setNode = function(node, position) { this.node = node; this.position = position;}// XPath expression values. They are what XPath expressions evaluate// to. Strangely, the different value types are not specified in the// XPath syntax, but only in the semantics, so they don't show up as// nonterminals in the grammar. Yet, some expressions are required to// evaluate to particular types, and not every type can be coerced// into every other type. Although the types of XPath values are// similar to the types present in JavaScript, the type coercion rules// are a bit peculiar, so we explicitly model XPath types instead of// mapping them onto JavaScript types. (See XPath spec.)//// The four types are://// StringValue//// NumberValue//// BooleanValue//// NodeSetValue//// The common interface of the value classes consists of methods that// implement the XPath type coercion rules://// stringValue() -- returns the value as a JavaScript String,//// numberValue() -- returns the value as a JavaScript Number,//// booleanValue() -- returns the value as a JavaScript Boolean,//// nodeSetValue() -- returns the value as a JavaScript Array of DOM// Node objects.//function StringValue(value) { this.value = value; this.type = 'string';}StringValue.prototype.stringValue = function() { return this.value;}StringValue.prototype.booleanValue = function() { return this.value.length > 0;}StringValue.prototype.numberValue = function() { return this.value - 0;}StringValue.prototype.nodeSetValue = function() { throw this + ' ' + Error().stack;}function BooleanValue(value) { this.value = value; this.type = 'boolean';}BooleanValue.prototype.stringValue = function() { return '' + this.value;}BooleanValue.prototype.booleanValue = function() { return this.value;}BooleanValue.prototype.numberValue = function() { return this.value ? 1 : 0;}BooleanValue.prototype.nodeSetValue = function() { throw this + ' ' + Error().stack;}function NumberValue(value) { this.value = value; this.type = 'number';}NumberValue.prototype.stringValue = function() { return '' + this.value;}NumberValue.prototype.booleanValue = function() { return !!this.value;}NumberValue.prototype.numberValue = function() { return this.value - 0;}NumberValue.prototype.nodeSetValue = function() { throw this + ' ' + Error().stack;}function NodeSetValue(value) { this.value = value; this.type = 'node-set';}NodeSetValue.prototype.stringValue = function() { if (this.value.length == 0) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -