📄 feed-parser.js

📁 现在很火的邮件客户端软件thunderbird的源码
💻 JS
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- * ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is the RSS Parsing Engine * * The Initial Developer of the Original Code is * The Mozilla Foundation. * Portions created by the Initial Developer are Copyright (C) 2004 * the Initial Developer. All Rights Reserved. * * Contributor(s): * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the  * ***** END LICENSE BLOCK ***** */// The feed parser depends on FeedItems.js, Feed.js.var rdfcontainer =  Components.classes["@mozilla.org/rdf/container-utils;1"].getService(Components.interfaces.nsIRDFContainerUtils);var rdfparser = Components.classes["@mozilla.org/rdf/xml-parser;1"].createInstance(Components.interfaces.nsIRDFXMLParser);var serializer = Components.classes["@mozilla.org/xmlextras/xmlserializer;1"].createInstance(Components.interfaces.nsIDOMSerializer);function FeedParser() {}FeedParser.prototype = {  // parseFeed returns an array of parsed items ready for processing  // it is currently a synchronous operation. If there was an error parsing the feed,   // parseFeed returns an empty feed in addition to calling aFeed.onParseError  parseFeed: function (aFeed, aSource, aDOM, aBaseURI)  {    if (!aSource || !(aDOM instanceof Components.interfaces.nsIDOMXMLDocument))    {      aFeed.onParseError(aFeed);         return new Array();    }    else if((aDOM.documentElement.namespaceURI == "http://www.w3.org/1999/02/22-rdf-syntax-ns#")            && (aDOM.documentElement.getElementsByTagNameNS("http://purl.org/rss/1.0/", "channel")[0]))    {      debug(aFeed.url + " is an RSS 1.x (RDF-based) feed");      // aSource can be misencoded (XMLHttpRequest converts to UTF-8 by default),       // but the DOM is almost always right because it uses the hints in the XML file.      // This is slower, but not noticably so. Mozilla doesn't have the       // XMLHttpRequest.responseBody property that IE has, which provides access       // to the unencoded response.      var xmlString=serializer.serializeToString(aDOM.documentElement);      return this.parseAsRSS1(aFeed, xmlString, aBaseURI);    }    else if (aDOM.documentElement.namespaceURI == ATOM_03_NS)    {      debug(aFeed.url + " is an Atom 0.3 feed");      return this.parseAsAtom(aFeed, aDOM);    }    else if (aDOM.documentElement.namespaceURI == ATOM_IETF_NS)    {      debug(aFeed.url + " is an IETF Atom feed");      return this.parseAsAtomIETF(aFeed, aDOM);    }    else if (aSource.search(/"http:\/\/my\.netscape\.com\/rdf\/simple\/0\.9\/"/) != -1)    {      debug(aFeed.url + " is an 0.90 feed");      return this.parseAsRSS2(aFeed, aDOM);    }    // XXX Explicitly check for RSS 2.0 instead of letting it be handled by the    // default behavior (who knows, we may change the default at some point).    else     {      // We don't know what kind of feed this is; let's pretend it's RSS 0.9x      // and hope things work out for the best.  In theory even RSS 1.0 feeds      // could be parsed by the 0.9x parser if the RSS namespace was the default.      debug(aFeed.url + " is of unknown format; assuming an RSS 0.9x feed");      return this.parseAsRSS2(aFeed, aDOM);    }  },  parseAsRSS2: function (aFeed, aDOM)   {    // Get the first channel (assuming there is only one per RSS File).    var parsedItems = new Array();    var channel = aDOM.getElementsByTagName("channel")[0];    if (!channel)      return aFeed.onParseError(aFeed);    //usually the empty string, unless this is RSS .90    var nsURI = channel.namespaceURI || "";    debug("channel NS: '" + nsURI +"'");    aFeed.title = aFeed.title || getNodeValue(this.childrenByTagNameNS(channel, nsURI, "title")[0]);    aFeed.description = getNodeValue(this.childrenByTagNameNS(channel, nsURI, "description")[0]);    aFeed.link = getNodeValue(this.childrenByTagNameNS(channel, nsURI, "link")[0]);    if (!aFeed.parseItems)      return parsedItems;    aFeed.invalidateItems();    // XXX use getElementsByTagNameNS for now    // childrenByTagNameNS would be better, but RSS .90 is still with us    var itemNodes = aDOM.getElementsByTagNameNS(nsURI,"item");    for (var i=0; i < itemNodes.length; i++)     {      var itemNode = itemNodes[i];      var item = new FeedItem();      item.feed = aFeed;      item.characterSet = "UTF-8";      var link = getNodeValue(this.childrenByTagNameNS(itemNode, nsURI, "link")[0]);      var guidNode = this.childrenByTagNameNS(itemNode, nsURI, "guid")[0];      var guid;      var isPermaLink;      if (guidNode)       {        guid = getNodeValue(guidNode);        isPermaLink = guidNode.getAttribute('isPermaLink') == 'false' ? false : true;      }      item.isStoredWithId = true;      item.url = link ? link : (guid && isPermaLink) ? guid : null;      item.id = guid;      item.description = getNodeValue(this.childrenByTagNameNS(itemNode, nsURI, "description")[0]);      item.title = getNodeValue(this.childrenByTagNameNS(itemNode, nsURI, "title")[0])                   || (item.description ? (this.stripTags(item.description).substr(0, 150)) : null)                   || item.title;      item.author = getNodeValue(this.childrenByTagNameNS(itemNode, nsURI, "author")[0]                                 || this.childrenByTagNameNS(itemNode, DC_NS, "creator")[0])                                 || aFeed.title                                 || item.author;      item.date = getNodeValue(this.childrenByTagNameNS(itemNode, nsURI, "pubDate")[0]                               || this.childrenByTagNameNS(itemNode, DC_NS, "date")[0])                               || item.date;          // If the date is invalid, users will see the beginning of the epoch      // unless we reset it here, so they'll see the current time instead.      // This is typical aggregator behavior.      if(item.date)      {        item.date = trimString(item.date);        if(!isValidRFC822Date(item.date))        {          // XXX Use this on the other formats as well          item.date = dateRescue(item.date);        }      }      var content = getNodeValue(this.childrenByTagNameNS(itemNode, RSS_CONTENT_NS, "encoded")[0]);      if(content)        item.content = content;      // Handle an enclosure (if present)      var enclosureNode = this.childrenByTagNameNS(itemNode, nsURI, "enclosure")[0];      if (enclosureNode)        item.enclosure = new FeedEnclosure(enclosureNode.getAttribute("url"),                                           enclosureNode.getAttribute("type"),                                          enclosureNode.getAttribute("length"));      parsedItems[i] = item;    }    return parsedItems;  },  parseAsRSS1 : function(aFeed, aSource, aBaseURI)   {    var parsedItems = new Array();    // RSS 1.0 is valid RDF, so use the RDF parser/service to extract data.    // Create a new RDF data source and parse the feed into it.    var ds = Components.classes["@mozilla.org/rdf/datasource;1?name=in-memory-datasource"]             .createInstance(Components.interfaces.nsIRDFDataSource);    rdfparser.parseString(ds, aBaseURI, aSource);        // Get information about the feed as a whole.    var channel = ds.GetSource(RDF_TYPE, RSS_CHANNEL, true);        aFeed.title = aFeed.title || getRDFTargetValue(ds, channel, RSS_TITLE) || aFeed.url;    aFeed.description = getRDFTargetValue(ds, channel, RSS_DESCRIPTION) || "";    aFeed.link = getRDFTargetValue(ds, channel, RSS_LINK) || aFeed.url;    if (!aFeed.parseItems)      return parsedItems;    aFeed.invalidateItems();    var items = ds.GetTarget(channel, RSS_ITEMS, true);    if (items)      items = rdfcontainer.MakeSeq(ds, items).GetElements();      // If the channel doesn't list any items, look for resources of type "item"    // (a hacky workaround for some buggy feeds).    if (!items || !items.hasMoreElements())      items = ds.GetSources(RDF_TYPE, RSS_ITEM, true);    var index = 0;     while (items.hasMoreElements())     {      var itemResource = items.getNext().QueryInterface(Components.interfaces.nsIRDFResource);      var item = new FeedItem();      item.feed = aFeed;      item.characterSet = "UTF-8";      // Prefer the value of the link tag to the item URI since the URI could be      // a relative URN.      var uri = itemResource.Value;      var link = getRDFTargetValue(ds, itemResource, RSS_LINK);      // XXX      // check for bug258465 -- entities appear escaped       // in the value returned by getRDFTargetValue when they shouldn't      //debug("link comparison\n" + " uri: " + uri + "\nlink: " + link);      item.url = link || uri;      item.id = item.url;      item.description = getRDFTargetValue(ds, itemResource, RSS_DESCRIPTION);      item.title = getRDFTargetValue(ds, itemResource, RSS_TITLE)                                     || getRDFTargetValue(ds, itemResource, DC_SUBJECT)                                     || (item.description ? (this.stripTags(item.description).substr(0, 150)) : null)                                     || item.title;      item.author = getRDFTargetValue(ds, itemResource, DC_CREATOR)                                      || getRDFTargetValue(ds, channel, DC_CREATOR)                                      || aFeed.title                                      || item.author;            item.date = getRDFTargetValue(ds, itemResource, DC_DATE) || item.date;      item.content = getRDFTargetValue(ds, itemResource, RSS_CONTENT_ENCODED);      parsedItems[index++] = item;    }      return parsedItems;  },  parseAsAtom: function(aFeed, aDOM)   {    var parsedItems = new Array();    // Get the first channel (assuming there is only one per Atom File).    var channel = aDOM.getElementsByTagName("feed")[0];    if (!channel)    {
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -