📄 feedforall_xmlparser.inc.php
字号:
<?php//// rssFilter.php Filter RSS feeds//// Copyright 2007 NotePage, Inc.// http://www.feedforall.com//// NotePage, Inc. grants registerd users of our FeedForAll and/or// FeedForAll Mac product(s) the right to install and use the// rssFilter.php script free of charge.// Please refer to the EULA included in the download for full license// terms and conditions.//// $Id: FeedForAll_XMLParser.inc.php,v 3.0 2007/04/16 14:23:03 housley Exp $//// $Log: FeedForAll_XMLParser.inc.php,v $// Revision 3.0 2007/04/16 14:23:03 housley// Release version 3.0 of the scripts//// Revision 2.30 2007/04/13 18:30:10 housley// * Atom:content might need whole string so always make it available// * atom:content of type xhtml is in a div that needs to be stripped and// then used as is.//// Revision 2.29 2007/04/11 12:11:11 housley// * Add more debug messages// * Reorder the add item code some//// Revision 2.28 2007/04/11 10:40:38 housley// Add some debug messages//// Revision 2.27 2007/04/06 11:18:03 housley// Since <dc:creator> isn't specified to be an email, we can't move it to// <author>//// Revision 2.26 2007/04/06 11:08:58 housley// Add support for the Dublin Core (dc) namespace//// Revision 2.25 2007/04/05 11:37:05 housley// Rename DcCreator so it can't interfer with a DublinCore extension//// Revision 2.24 2007/04/04 20:55:46 housley// Add the ability to set CURLOPT_CONNECTTIMEOUT//// Revision 2.23 2007/04/04 18:43:26 housley// * Update rssMesh to properly pass content:encoded through// * Make sure <description> is always populated// * Don't populate content:encoded from description in rssMesh//// Revision 2.22 2007/03/30 13:14:00 housley// Move where pubDate_t and pubDate are manipulated to the beging of the// code that processes an item. This removes redundant caculations of// pubDate_t//// Revision 2.21 2007/03/30 01:35:16 housley// Use pubDate_t for the pubDateAsNumber since it already there//// Revision 2.20 2007/03/30 01:34:12 housley// Move the very specific rssFilter code to rssFilter.php//// Revision 2.19 2007/03/28 23:23:11 housley// Add support for Atom <author><email> into RSS 2.0 <author>//// Revision 2.18 2007/03/28 13:26:30 housley// Support atom:content, at least in a basic form//// Revision 2.17 2007/03/27 23:49:02 housley// For non-RSS 2.0 formats create a valid pubDate from the appropiate date//// Revision 2.16 2007/03/27 23:16:31 housley// Add support for Atom 1.0 atom:updated date field//// Revision 2.15 2007/03/25 11:24:21 housley// Only to the replace on the one array value that we care about//// Revision 2.14 2007/03/19 14:13:24 housley// Fix some small bugs in the new code, and test//// Revision 2.13 2007/03/15 18:37:32 housley// Fix filter ordering//// Revision 2.12 2007/03/15 13:50:34 housley// * Clear the current tag, in endElement()// * Trim the feed level items at end of channel//// Revision 2.11 2007/03/15 01:21:24 housley// Changes needed for when there extra parsing files aren't included//// Revision 2.10 2007/03/14 17:55:05 housley// Support atom's id as guid//// Revision 2.9 2007/03/07 00:23:12 housley// Add isEmpty, notEmpty, alphaBefore and alphaAfter//// Revision 2.8 2007/03/06 13:31:05 housley// Change from ignoreCase to matchCase//// Revision 2.7 2007/03/05 21:13:11 housley// * Add support for working with pubDate as a date or time// * Show which items will and won't be used//// Revision 2.6 2007/03/05 15:10:07 housley// Add "Ends With"//// Revision 2.5 2007/03/05 01:12:16 housley// Move FeedForAll_scripts_convertEncoding and FeedForAll_scripts_readFile// into FeedForAll_XMLParser.inc.php, because they are used in every file//// Revision 2.4 2007/03/04 22:54:03 housley// Add methods to get the filter capabilities//// Revision 2.3 2007/03/04 13:41:53 housley// * Pass the parsing mode to the item class// * Cleanup the feed level processing// * rss2html uses the separate parser too//// Revision 2.2 2007/03/04 12:13:52 housley// If the feed is atom, check the link type if any//// Revision 2.1 2007/03/04 02:10:08 housley// Move the parser used by the paid scripts into its own file.//////// ==========================================================================// Below this point of the file there are no user editable options. Your// are welcome to make any modifications that you wish to any of the code// below, but that is not necessary for normal use.// ==========================================================================if (function_exists("FeedForAll_scripts_getRFDdate") === FALSE) { Function FeedForAll_scripts_getRFDdate($datestring) { $startTZ = 19; $year = substr($datestring, 0, 4); $month = substr($datestring, 5, 2); $day = substr($datestring, 8, 2); $hour = substr($datestring, 11, 2); $minute = substr($datestring, 14, 2); $second = substr($datestring, 17, 2); if ($datestring[$startTZ] == ".") { $curChar = $datestring[$startTZ]; while (($startTZ < strlen($datestring)) && ($curChar != "Z") && ($curChar != "+") && ($curChar != "-")) { $startTZ++; $curChar = $datestring[$startTZ]; } } if ($datestring[$startTZ] == "Z") { $offset_hour = 0; $offset_minute = 0; } else { if (substr($datestring, $startTZ, 1) == "-") { $offset_hour = substr($datestring, $startTZ+1, 2); $offset_minute = substr($datestring, $startTZ+4, 2); } else { $offset_hour = -1*substr($datestring, $startTZ+1, 2); $offset_minute = -1*substr($datestring, $startTZ+4, 2); } } return gmmktime($hour+$offset_hour, $minute+$offset_minute, $second, $month, $day, $year); }}if (function_exists("FeedForAll_scripts_convertEncoding") === FALSE) { Function FeedForAll_scripts_convertEncoding($XMLstring, $missingEncodingDefault="ISO-8859-1", $destinationEncoding="UTF-8") { $results = NULL; $inputEncoding = $missingEncodingDefault; $workString = $XMLstring; if (function_exists("mb_convert_encoding") !== FALSE) { if (preg_match("/<\?xml(.*)\?>/", $XMLstring, $results) === FALSE) return FALSE; if (count($results) == 0) return FALSE; $initialXMLHeader = $results[0]; $results[0] = str_replace("'", "\"", str_replace(" ", "", $results[0])); if (($location = stristr($results[0], "encoding=")) !== FALSE) { $parts = split("\"", $location); if (strcasecmp($parts[1], $destinationEncoding) == 0) { return $XMLstring; } $inputEncoding = $parts[1]; $modifiedXMLHeader = str_replace($inputEncoding, $destinationEncoding, $initialXMLHeader); } else { $modifiedXMLHeader = str_replace("?>", " encoding=\"$destinationEncoding\" ?>", $initialXMLHeader); } $workString = str_replace($initialXMLHeader, $modifiedXMLHeader, $workString); if (($newResult = mb_convert_encoding($workString, $destinationEncoding, $inputEncoding)) !== FALSE) { return $newResult; } } if (function_exists("iconv") !== FALSE) { if (preg_match("/<\?xml(.*)\?>/", $XMLstring, $results) === FALSE) return FALSE; if (count($results) == 0) return FALSE; $initialXMLHeader = $results[0]; $results = str_replace(" ", "", $results); $results = str_replace("'", "\"", $results); if (($location = stristr($results[0], "encoding=")) !== FALSE) { $parts = split("\"", $location); if (strcasecmp($parts[1], $destinationEncoding) == 0) { return $XMLstring; } $inputEncoding = $parts[1]; $modifiedXMLHeader = str_replace($inputEncoding, $destinationEncoding, $initialXMLHeader); } else { $modifiedXMLHeader = str_replace("?>", " encoding=\"$destinationEncoding\" ?>", $initialXMLHeader); } $workString = str_replace($initialXMLHeader, $modifiedXMLHeader, $workString); if (($newResult = iconv($inputEncoding, "$destinationEncoding//TRANSLIT", $workString)) !== FALSE) { return $newResult; } } return FALSE; }}if (function_exists("FeedForAll_scripts_readFile") === FALSE) { Function FeedForAll_scripts_readFile($filename, $useFopenURL, $useCaching = 0) { GLOBAL $connectTimeoutLimit; if ($useCaching); $GLOBALS["ERRORSTRING"] = ""; $result = ""; if (stristr($filename, "://")) { if ($useFopenURL) { if (($fd = @fopen($filename, "rb")) === FALSE) { return FALSE; } while (($data = fread($fd, 4096)) != "") { $result .= $data; } fclose($fd); } else { // This is a URL so use CURL $curlHandle = curl_init(); curl_setopt($curlHandle, CURLOPT_URL, $filename); curl_setopt($curlHandle, CURLOPT_HEADER, 0); curl_setopt($curlHandle, CURLOPT_RETURNTRANSFER, 1); curl_setopt($curlHandle, CURLOPT_USERAGENT, "FeedForAll rssFilter.php v2"); // curl_setopt($curlHandle, CURLOPT_AUTOREFERER, 1); curl_setopt($curlHandle, CURLOPT_REFERER, $filename); if (!(ini_get("safe_mode") || ini_get("open_basedir"))) { curl_setopt($curlHandle, CURLOPT_FOLLOWLOCATION, 1); } if (isset($connectTimeoutLimit) && $connectTimeoutLimit != 0) { curl_setopt($curlHandle, CURLOPT_CONNECTTIMEOUT, $connectTimeoutLimit); } curl_setopt($curlHandle, CURLOPT_MAXREDIRS, 10); $result = curl_exec($curlHandle); if (curl_errno($curlHandle)) { $GLOBALS["ERRORSTRING"] = curl_error($curlHandle); curl_close($curlHandle); return FALSE; } curl_close($curlHandle); } } else { // This is a local file, so use fopen if (($fd = @fopen($filename, "rb")) === FALSE) { return FALSE; } while (($data = fread($fd, 4096)) != "") { $result .= $data; } fclose($fd); } return $result; }}class rootItemClass { var $operateAs; var $title; var $description; var $contentEncoded; var $link; var $pubDate; var $pubDate_t; var $pubDateDC; var $enclosureURL; var $enclosureLength; var $enclosureType; var $categoryArray; var $category; var $categoryDomain; var $guid; var $guidIsPermaLink; var $author; var $comments; var $source; var $sourceURL; var $creativeCommons; var $rssMeshExtra; var $rssMeshFeedImageTitle; var $rssMeshFeedImageUrl; var $rssMeshFeedImageLink; var $rssMeshFeedImageDescription; var $rssMeshFeedImageHeight; var $rssMeshFeedImageWidth; var $atomID; var $atomUpdated; var $atomContent; var $atomContentStartPos; var $atomAuthorEmail; var $contentEncodedUsed; var $itemStartPos; var $itemFullText; // Constructor Function rootItemClass($operateAs) { $this->operateAs = $operateAs; $this->title = ""; $this->description = ""; $this->contentEncoded = ""; $this->link = ""; $this->pubDate = ""; $this->pubDate_t = 0; $this->pubDateDC = ""; $this->enclosureURL = ""; $this->enclosureLength = ""; $this->enclosureType = ""; $this->categoryArray = Array(); $this->category = ""; $this->categoryDomain = ""; $this->guid = ""; $this->guidIsPermaLink = ""; $this->author = ""; $this->comments = ""; $this->source = ""; $this->sourceURL = ""; $this->creativeCommons = ""; $this->rssMeshExtra = ""; $this->rssMeshFeedImageTitle = ""; $this->rssMeshFeedImageUrl = ""; $this->rssMeshFeedImageLink = ""; $this->rssMeshFeedImageDescription = ""; $this->rssMeshFeedImageHeight = ""; $this->rssMeshFeedImageWidth = ""; $this->atomID = ""; $this->atomUpdated = ""; $this->atomContent = ""; $this->atomContentStartPos = 0; $this->atomAuthorEmail = ""; $this->contentEncodedUsed = 0; $this->itemStartPos = 0; $this->itemFullText = ""; } Function getValueOf($elementName) { if ($elementName == "~~~ItemTitle~~~") { return $this->title; } elseif ($elementName == "~~~ItemDescription~~~") { return $this->description; } elseif ($elementName == "~~~ItemContentEncoded~~~") { return $this->contentEncoded; } elseif ($elementName == "~~~ItemLink~~~") { return $this->link; } elseif ($elementName == "~~~ItemPubDate~~~") { return $this->pubDate; } elseif ($elementName == "~~~ItemPubDateAsNumber~~~") { return $this->pubDate_t; } elseif ($elementName == "~~~ItemEnclosureUrl~~~") { return $this->enclosureURL; } elseif ($elementName == "~~~ItemEnclosureType~~~") { return $this->enclosureType; } elseif ($elementName == "~~~ItemEnclosureLength~~~") { return $this->enclosureLength; } elseif ($elementName == "~~~ItemGuid~~~") { return $this->guid; } elseif ($elementName == "~~~ItemAuthor~~~") { return $this->author; } elseif ($elementName == "~~~ItemComments~~~") { return $this->comments; } elseif ($elementName == "~~~ItemSource~~~") { return $this->source; } elseif ($elementName == "~~~ItemSourceUrl~~~") { return $this->sourceURL; } elseif ($elementName == "~~~ItemCategory~~~") { if (count($this->categoryArray)) { return $this->categoryArray[0]["Category"]; } } elseif ($elementName == "~~~ItemCategoryDomain~~~") { if (count($this->categoryArray)) { return $this->categoryArray[0]["Domain"]; } } elseif ($elementName == "~~~ItemCreativeCommons~~~") { return $this->creativeCommons; } elseif ($elementName == "~~~ItemRssMeshExtra~~~") { return $this->rssMeshExtra; } elseif ($elementName == "~~~ItemRssMeshFeedImageTitle~~~") { return $this->rssMeshFeedImageTitle; } elseif ($elementName == "~~~ItemRssMeshFeedImageUrl~~~") { return $this->rssMeshFeedImageUrl; } elseif ($elementName == "~~~ItemRssMeshFeedImageLink~~~") { return $this->rssMeshFeedImageLink;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -