⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 feedprocessor.js

📁 从国外的站点上淘的个人财务管理系统.开源.
💻 JS
📖 第 1 页 / 共 4 页
字号:
/** * ElementInfo is a simple container object that describes * some characteristics of a feed element. For example, it * says whether an element can be expected to appear more * than once inside a given entry or feed. */ function ElementInfo(fieldName, containerClass, closeFunc, isArray) {  this.fieldName = fieldName;  this.containerClass = containerClass;  this.closeFunc = closeFunc;  this.isArray = isArray;  this.isWrapper = false;}/** * FeedElementInfo represents a feed element, usually the root. */function FeedElementInfo(fieldName, feedVersion) {  this.isWrapper = false;  this.fieldName = fieldName;  this.feedVersion = feedVersion;}/** * Some feed formats include vestigial wrapper elements that we don't * want to include in our object model, but we do need to keep track * of during parsing. */function WrapperElementInfo(fieldName) {  this.isWrapper = true;  this.fieldName = fieldName;}/***** The Processor *****/function FeedProcessor() {  this._reader = Cc[SAX_CONTRACTID].createInstance(Ci.nsISAXXMLReader);  this._buf =  "";  this._feed = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);  this._handlerStack = [];  this._xmlBaseStack = []; // sparse array keyed to nesting depth  this._depth = 0;  this._state = "START";  this._result = null;  this._extensionHandler = null;  this._xhtmlHandler = null;  // The nsIFeedResultListener waiting for the parse results  this.listener = null;  // These elements can contain (X)HTML or plain text.  // We keep a table here that contains their default treatment  this._textConstructs = {"atom:title":"text",                          "atom:summary":"text",                          "atom:rights":"text",                          "atom:content":"text",                          "atom:subtitle":"text",                          "description":"html",                          "rss1:description":"html",                          "dc:description":"html",                          "content:encoded":"html",                          "title":"text",                          "rss1:title":"text",                          "atom03:title":"text",                          "atom03:tagline":"text",                          "atom03:summary":"text",                          "atom03:content":"text"};  this._stack = [];  this._trans = {       "START": {      //If we hit a root RSS element, treat as RSS2.      "rss": new FeedElementInfo("RSS2", "rss2"),      // If we hit an RDF element, if could be RSS1, but we can't      // verify that until we hit a rss1:channel element.      "rdf:RDF": new WrapperElementInfo("RDF"),      // If we hit a Atom 1.0 element, treat as Atom 1.0.      "atom:feed": new FeedElementInfo("Atom", "atom"),      // Treat as Atom 0.3      "atom03:feed": new FeedElementInfo("Atom03", "atom03"),    },        /********* RSS2 **********/    "IN_RSS2": {      "channel": new WrapperElementInfo("channel")    },    "IN_CHANNEL": {      "item": new ElementInfo("items", Cc[ENTRY_CONTRACTID], null, true),      "managingEditor": new ElementInfo("authors", Cc[PERSON_CONTRACTID],                                        rssAuthor, true),      "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID],                                    rssAuthor, true),      "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],                                   rssAuthor, true),      "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],                                         rssAuthor, true),      "category": new ElementInfo("categories", null, rssCatTerm, true),      "cloud": new ElementInfo("cloud", null, null, false),      "image": new ElementInfo("image", null, null, false),      "textInput": new ElementInfo("textInput", null, null, false),      "skipDays": new ElementInfo("skipDays", null, null, false),      "skipHours": new ElementInfo("skipHours", null, null, false),      "generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID],                                   atomGenerator, false),    },    "IN_ITEMS": {      "author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],                                rssAuthor, true),      "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID],                                    rssAuthor, true),      "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],                                   rssAuthor, true),      "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],                                         rssAuthor, true),      "category": new ElementInfo("categories", null, rssCatTerm, true),      "enclosure": new ElementInfo("enclosure", null, null, true),      "guid": new ElementInfo("guid", null, rssGuid, false)    },    "IN_SKIPDAYS": {      "day": new ElementInfo("days", null, rssArrayElement, true)    },    "IN_SKIPHOURS":{      "hour": new ElementInfo("hours", null, rssArrayElement, true)    },    /********* RSS1 **********/    "IN_RDF": {      // If we hit a rss1:channel, we can verify that we have RSS1      "rss1:channel": new FeedElementInfo("rdf_channel", "rss1"),      "rss1:image": new ElementInfo("image", null, null, false),      "rss1:textinput": new ElementInfo("textInput", null, null, false),      "rss1:item": new ElementInfo("items", Cc[ENTRY_CONTRACTID], null, true),    },    "IN_RDF_CHANNEL": {      "admin:generatorAgent": new ElementInfo("generator",                                              Cc[GENERATOR_CONTRACTID],                                              null, false),      "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID],                                    rssAuthor, true),      "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],                                   rssAuthor, true),      "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],                                         rssAuthor, true),    },    /********* ATOM 1.0 **********/    "IN_ATOM": {      "atom:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],                                     null, true),      "atom:generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID],                                        atomGenerator, false),      "atom:contributor": new ElementInfo("contributors",  Cc[PERSON_CONTRACTID],                                          null, true),      "atom:link": new ElementInfo("links", null, null, true),      "atom:entry": new ElementInfo("entries", Cc[ENTRY_CONTRACTID],                                    null, true)    },    "IN_ENTRIES": {      "atom:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],                                     null, true),      "atom:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],                                          null, true),      "atom:link": new ElementInfo("links", null, null, true),    },    /********* ATOM 0.3 **********/    "IN_ATOM03": {      "atom03:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],                                       null, true),      "atom03:contributor": new ElementInfo("contributors",                                            Cc[PERSON_CONTRACTID],                                            null, true),      "atom03:link": new ElementInfo("links", null, null, true),      "atom03:entry": new ElementInfo("atom03_entries", Cc[ENTRY_CONTRACTID],                                      null, true),      "atom03:generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID],                                          atomGenerator, false),    },    "IN_ATOM03_ENTRIES": {      "atom03:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],                                       null, true),      "atom03:contributor": new ElementInfo("contributors",                                            Cc[PERSON_CONTRACTID],                                            null, true),      "atom03:link": new ElementInfo("links", null, null, true),      "atom03:entry": new ElementInfo("atom03_entries", Cc[ENTRY_CONTRACTID],                                      null, true)    }  }}// See startElement for a long description of how feeds are processed.FeedProcessor.prototype = {     // Set ourselves as the SAX handler, and set the base URI  _init: function FP_init(uri) {    this._reader.contentHandler = this;    this._reader.errorHandler = this;    this._result = Cc[FR_CONTRACTID].createInstance(Ci.nsIFeedResult);    if (uri) {      this._result.uri = uri;      this._reader.baseURI = uri;      this._xmlBaseStack[0] = uri;    }  },  // This function is called once we figure out what type of feed  // we're dealing with. Some feed types require digging a bit further  // than the root.  _docVerified: function FP_docVerified(version) {    this._result.doc = Cc[FEED_CONTRACTID].createInstance(Ci.nsIFeed);    this._result.doc.baseURI =       this._xmlBaseStack[this._xmlBaseStack.length - 1];    this._result.doc.fields = this._feed;    this._result.version = version;  },  // When we're done with the feed, let the listener know what  // happened.  _sendResult: function FP_sendResult() {    try {      // Can be null when a non-feed is fed to us      if (this._result.doc)        this._result.doc.normalize();    }    catch (e) {      LOG("FIXME: " + e);    }    try {      if (this.listener != null)        this.listener.handleResult(this._result);    }    finally {      this._result = null;      this._reader = null;    }  },  // Parsing functions  parseFromStream: function FP_parseFromStream(stream, uri) {    this._init(uri);    this._reader.parseFromStream(stream, null, stream.available(),                                  "application/xml");    this._reader = null;  },  parseFromString: function FP_parseFromString(inputString, uri) {    this._init(uri);    this._reader.parseFromString(inputString, "application/xml");    this._reader = null;  },  parseAsync: function FP_parseAsync(requestObserver, uri) {    this._init(uri);    this._reader.parseAsync(requestObserver);  },  // nsIStreamListener   // The XMLReader will throw sensible exceptions if these get called  // out of order.  onStartRequest: function FP_onStartRequest(request, context) {    this._reader.onStartRequest(request, context);  },  onStopRequest: function FP_onStopRequest(request, context, statusCode) {    this._reader.onStopRequest(request, context, statusCode);  },  onDataAvailable:  function FP_onDataAvailable(request, context, inputStream, offset, count) {    this._reader.onDataAvailable(request, context, inputStream, offset, count);  },  // nsISAXErrorHandler  // We only care about fatal errors. When this happens, we may have  // parsed through the feed metadata and some number of entries. The  // listener can still show some of that data if it wants, and we'll  // set the bozo bit to indicate we were unable to parse all the way  // through.  fatalError: function FP_reportError() {    this._result.bozo = true;    //XXX need to QI to FeedProgressListener    this._sendResult();  },  // nsISAXContentHandler  startDocument: function FP_startDocument() {    //LOG("----------");  },  endDocument: function FP_endDocument() {    this._sendResult();  },  // The transitions defined above identify elements that contain more  // than just text. For example RSS items contain many fields, and so  // do Atom authors. The only commonly used elements that contain  // mixed content are Atom Text Constructs of type="xhtml", which we  // delegate to another handler for cleaning. That leaves a couple  // different types of elements to deal with: those that should occur  // only once, such as title elements, and those that can occur  // multiple times, such as the RSS category element and the Atom  // link element. Most of the RSS1/DC elements can occur multiple  // times in theory, but in practice, the only ones that do have  // analogues in Atom.   //  // Some elements are also groups of attributes or sub-elements,  // while others are simple text fields. For the most part, we don't  // have to pay explicit attention to the simple text elements,  // unless we want to post-process the resulting string to transform  // it into some richer object like a Date or URI.  //  // Elements that have more sophisticated content models still end up  // being dictionaries, whether they are based on attributes like RSS  // cloud, sub-elements like Atom author, or even items and  // entries. These elements are treated as "containers". It's  // theoretically possible for a container to have an attribute with   // the same universal name as a sub-element, but none of the feed  // formats allow this by default, and I don't of any extension that  // works this way.  //  startElement: function FP_startElement(uri, localName, qName, attributes) {    this._buf = "";    ++this._depth;    var elementInfo;    //LOG("<" + localName + ">");    // Check for xml:base    var base = attributes.getValueFromName(XMLNS, "base");    if (base) {      this._xmlBaseStack[this._depth] =        strToURI(base, this._xmlBaseStack[this._xmlBaseStack.length - 1]);    }    // To identify the element we're dealing with, we look up the    // namespace URI in our gNamespaces dictionary, which will give us    // a "canonical" prefix for a namespace URI. For example, this    // allows Dublin Core "creator" elements to be consistently mapped    // to "dc:creator", for easy field access by consumer code. This    // strategy also happens to shorten up our state table.    var key =  this._prefixForNS(uri) + localName;    // Check to see if we need to hand this off to our XHTML handler.    // The elements we're dealing with will look like this:    //     // <title type="xhtml">    //   <div xmlns="http://www.w3.org/1999/xhtml">    //     A title with <b>bold</b> and <i>italics</i>.    //   </div>    // </title>    //    // When it returns in returnFromXHTMLHandler, the handler should    // give us back a string like this:     //     //    "A title with <b>bold</b> and <i>italics</i>."    //    // The Atom spec explicitly says the div is not part of the content,    // and explicitly allows whitespace collapsing.    //     if ((this._result.version == "atom" || this._result.version == "atom03") &&        this._textConstructs[key] != null) {      var type = attributes.getValueFromName("","type");      if (type != null && type.indexOf("xhtml") >= 0) {        this._xhtmlHandler =           new XHTMLHandler(this, (this._result.version == "atom"));        this._reader.contentHandler = this._xhtmlHandler;        return;      }    }    // Check our current state, and see if that state has a defined    // transition. For example, this._trans["atom:entry"]["atom:author"]    // will have one, and it tells us to add an item to our authors array.    if (this._trans[this._state] && this._trans[this._state][key]) {      elementInfo = this._trans[this._state][key];    }    else {      // If we don't have a transition, hand off to extension handler      this._extensionHandler = new ExtensionHandler(this);      this._reader.contentHandler = this._extensionHandler;      this._extensionHandler.startElement(uri, localName, qName, attributes);      return;    }          // This distinguishes wrappers like 'channel' from elements    // we'd actually like to do something with (which will test true).    this._handlerStack[this._depth] = elementInfo;     if (elementInfo.isWrapper) {      this._state = "IN_" + elementInfo.fieldName.toUpperCase();      this._stack.push([this._feed, this._state]);    }     else if (elementInfo.feedVersion) {      this._state = "IN_" + elementInfo.fieldName.toUpperCase();      // Check for the older RSS2 variants      if (elementInfo.feedVersion == "rss2")        elementInfo.feedVersion = this._findRSSVersion(attributes);      else if (uri == RSS090NS)        elementInfo.feedVersion = "rss090";      this._docVerified(elementInfo.feedVersion);      this._stack.push([this._feed, this._state]);      this._mapAttributes(this._feed, attributes);    }    else {      this._state = this._processComplexElement(elementInfo, attributes);    }  },  // In the endElement handler, we decrement the stack and look  // for cleanup/transition functions to execute. The second part  // of the state transition works as above in startElement, but  // the state we're looking for is prefixed with an underscore  // to distinguish endElement events from startElement events.  endElement:  function FP_endElement(uri, localName, qName) {    var elementInfo = this._handlerStack[this._depth];    //LOG("</" + localName + ">");    if (elementInfo && !elementInfo.isWrapper)      this._closeComplexElement(elementInfo);      // cut down xml:base context    if (this._xmlBaseStack.length == this._depth + 1)      this._xmlBaseStack = this._xmlBaseStack.slice(0, this._depth);    // our new state is whatever is at the top of the stack now    if (this._stack.length > 0)      this._state = this._stack[this._stack.length - 1][1];    this._handlerStack = this._handlerStack.slice(0, this._depth);    --this._depth;  },  // Buffer up character data. The buffer is cleared with every  // opening element.  characters: function FP_characters(data) {    this._buf += data;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -