📄 feedforall_xmlparser.inc.php
字号:
<?php//// rssFilter.php Filter RSS feeds//// Copyright 2007 NotePage, Inc.// http://www.feedforall.com//// NotePage, Inc. grants registerd users of our FeedForAll and/or// FeedForAll Mac product(s) the right to install and use the// rssFilter.php script free of charge.// Please refer to the EULA included in the download for full license// terms and conditions.//// $Id: FeedForAll_XMLParser.inc.php,v 3.1 2007/04/25 12:33:47 housley Exp $//// $Log: FeedForAll_XMLParser.inc.php,v $// Revision 3.1 2007/04/25 12:33:47 housley// Some feeds use <dc:date> with dates in a completely wrong format,// try and get a date from the value.//// Revision 3.0 2007/04/16 14:23:03 housley// Release version 3.0 of the scripts//// Revision 2.30 2007/04/13 18:30:10 housley// * Atom:content might need whole string so always make it available// * atom:content of type xhtml is in a div that needs to be stripped and// then used as is.//// Revision 2.29 2007/04/11 12:11:11 housley// * Add more debug messages// * Reorder the add item code some//// Revision 2.28 2007/04/11 10:40:38 housley// Add some debug messages//// Revision 2.27 2007/04/06 11:18:03 housley// Since <dc:creator> isn't specified to be an email, we can't move it to// <author>//// Revision 2.26 2007/04/06 11:08:58 housley// Add support for the Dublin Core (dc) namespace//// Revision 2.25 2007/04/05 11:37:05 housley// Rename DcCreator so it can't interfer with a DublinCore extension//// Revision 2.24 2007/04/04 20:55:46 housley// Add the ability to set CURLOPT_CONNECTTIMEOUT//// Revision 2.23 2007/04/04 18:43:26 housley// * Update rssMesh to properly pass content:encoded through// * Make sure <description> is always populated// * Don't populate content:encoded from description in rssMesh//// Revision 2.22 2007/03/30 13:14:00 housley// Move where pubDate_t and pubDate are manipulated to the beging of the// code that processes an item. This removes redundant caculations of// pubDate_t//// Revision 2.21 2007/03/30 01:35:16 housley// Use pubDate_t for the pubDateAsNumber since it already there//// Revision 2.20 2007/03/30 01:34:12 housley// Move the very specific rssFilter code to rssFilter.php//// Revision 2.19 2007/03/28 23:23:11 housley// Add support for Atom <author><email> into RSS 2.0 <author>//// Revision 2.18 2007/03/28 13:26:30 housley// Support atom:content, at least in a basic form//// Revision 2.17 2007/03/27 23:49:02 housley// For non-RSS 2.0 formats create a valid pubDate from the appropiate date//// Revision 2.16 2007/03/27 23:16:31 housley// Add support for Atom 1.0 atom:updated date field//// Revision 2.15 2007/03/25 11:24:21 housley// Only to the replace on the one array value that we care about//// Revision 2.14 2007/03/19 14:13:24 housley// Fix some small bugs in the new code, and test//// Revision 2.13 2007/03/15 18:37:32 housley// Fix filter ordering//// Revision 2.12 2007/03/15 13:50:34 housley// * Clear the current tag, in endElement()// * Trim the feed level items at end of channel//// Revision 2.11 2007/03/15 01:21:24 housley// Changes needed for when there extra parsing files aren't included//// Revision 2.10 2007/03/14 17:55:05 housley// Support atom's id as guid//// Revision 2.9 2007/03/07 00:23:12 housley// Add isEmpty, notEmpty, alphaBefore and alphaAfter//// Revision 2.8 2007/03/06 13:31:05 housley// Change from ignoreCase to matchCase//// Revision 2.7 2007/03/05 21:13:11 housley// * Add support for working with pubDate as a date or time// * Show which items will and won't be used//// Revision 2.6 2007/03/05 15:10:07 housley// Add "Ends With"//// Revision 2.5 2007/03/05 01:12:16 housley// Move FeedForAll_scripts_convertEncoding and FeedForAll_scripts_readFile// into FeedForAll_XMLParser.inc.php, because they are used in every file//// Revision 2.4 2007/03/04 22:54:03 housley// Add methods to get the filter capabilities//// Revision 2.3 2007/03/04 13:41:53 housley// * Pass the parsing mode to the item class// * Cleanup the feed level processing// * rss2html uses the separate parser too//// Revision 2.2 2007/03/04 12:13:52 housley// If the feed is atom, check the link type if any//// Revision 2.1 2007/03/04 02:10:08 housley// Move the parser used by the paid scripts into its own file.//////// ==========================================================================// Below this point of the file there are no user editable options. Your// are welcome to make any modifications that you wish to any of the code// below, but that is not necessary for normal use.// ==========================================================================if (function_exists("FeedForAll_scripts_getRFDdate") === FALSE) { Function FeedForAll_scripts_getRFDdate($datestring) { if ($datestring[10] != "T") { // Might be a RFC 822 date if (($retVal = strtotime($datestring)) != -1) { return $retVal; } } $startTZ = 19; $year = substr($datestring, 0, 4); $month = substr($datestring, 5, 2); $day = substr($datestring, 8, 2); $hour = substr($datestring, 11, 2); $minute = substr($datestring, 14, 2); $second = substr($datestring, 17, 2); if ($datestring[$startTZ] == ".") { $curChar = $datestring[$startTZ]; while (($startTZ < strlen($datestring)) && ($curChar != "Z") && ($curChar != "+") && ($curChar != "-")) { $startTZ++; $curChar = $datestring[$startTZ]; } } if ($datestring[$startTZ] == "Z") { $offset_hour = 0; $offset_minute = 0; } else { if (substr($datestring, $startTZ, 1) == "-") { $offset_hour = substr($datestring, $startTZ+1, 2); $offset_minute = substr($datestring, $startTZ+4, 2); } else { $offset_hour = -1*substr($datestring, $startTZ+1, 2); $offset_minute = -1*substr($datestring, $startTZ+4, 2); } } return gmmktime((int)($hour+$offset_hour), (int)($minute+$offset_minute), (int)$second, (int)$month, (int)$day, (int)$year); }}if (function_exists("FeedForAll_scripts_convertEncoding") === FALSE) { Function FeedForAll_scripts_convertEncoding($XMLstring, $missingEncodingDefault="ISO-8859-1", $destinationEncoding="UTF-8") { $results = NULL; $inputEncoding = $missingEncodingDefault; $workString = $XMLstring; if (function_exists("mb_convert_encoding") !== FALSE) { if (preg_match("/<\?xml(.*)\?>/", $XMLstring, $results) === FALSE) return FALSE; if (count($results) == 0) return FALSE; $initialXMLHeader = $results[0]; $results[0] = str_replace("'", "\"", str_replace(" ", "", $results[0])); if (($location = stristr($results[0], "encoding=")) !== FALSE) { $parts = split("\"", $location); if (strcasecmp($parts[1], $destinationEncoding) == 0) { return $XMLstring; } $inputEncoding = $parts[1]; $modifiedXMLHeader = str_replace($inputEncoding, $destinationEncoding, $initialXMLHeader); } else { $modifiedXMLHeader = str_replace("?>", " encoding=\"$destinationEncoding\" ?>", $initialXMLHeader); } $workString = str_replace($initialXMLHeader, $modifiedXMLHeader, $workString); if (($newResult = mb_convert_encoding($workString, $destinationEncoding, $inputEncoding)) !== FALSE) { return $newResult; } } if (function_exists("iconv") !== FALSE) { if (preg_match("/<\?xml(.*)\?>/", $XMLstring, $results) === FALSE) return FALSE; if (count($results) == 0) return FALSE; $initialXMLHeader = $results[0]; $results = str_replace(" ", "", $results); $results = str_replace("'", "\"", $results); if (($location = stristr($results[0], "encoding=")) !== FALSE) { $parts = split("\"", $location); if (strcasecmp($parts[1], $destinationEncoding) == 0) { return $XMLstring; } $inputEncoding = $parts[1]; $modifiedXMLHeader = str_replace($inputEncoding, $destinationEncoding, $initialXMLHeader); } else { $modifiedXMLHeader = str_replace("?>", " encoding=\"$destinationEncoding\" ?>", $initialXMLHeader); } $workString = str_replace($initialXMLHeader, $modifiedXMLHeader, $workString); if (($newResult = iconv($inputEncoding, "$destinationEncoding//TRANSLIT", $workString)) !== FALSE) { return $newResult; } } return FALSE; }}if (function_exists("FeedForAll_scripts_readFile") === FALSE) { Function FeedForAll_scripts_readFile($filename, $useFopenURL, $useCaching = 0) { GLOBAL $connectTimeoutLimit; if ($useCaching); $GLOBALS["ERRORSTRING"] = ""; $result = ""; if (stristr($filename, "://")) { if ($useFopenURL) { if (($fd = @fopen($filename, "rb")) === FALSE) { return FALSE; } while (($data = fread($fd, 4096)) != "") { $result .= $data; } fclose($fd); } else { // This is a URL so use CURL $curlHandle = curl_init(); curl_setopt($curlHandle, CURLOPT_URL, $filename); curl_setopt($curlHandle, CURLOPT_HEADER, 0); curl_setopt($curlHandle, CURLOPT_RETURNTRANSFER, 1); curl_setopt($curlHandle, CURLOPT_USERAGENT, "FeedForAll rssFilter.php v2"); // curl_setopt($curlHandle, CURLOPT_AUTOREFERER, 1); curl_setopt($curlHandle, CURLOPT_REFERER, $filename); if (!(ini_get("safe_mode") || ini_get("open_basedir"))) { curl_setopt($curlHandle, CURLOPT_FOLLOWLOCATION, 1); } if (isset($connectTimeoutLimit) && $connectTimeoutLimit != 0) { curl_setopt($curlHandle, CURLOPT_CONNECTTIMEOUT, $connectTimeoutLimit); } curl_setopt($curlHandle, CURLOPT_MAXREDIRS, 10); $result = curl_exec($curlHandle); if (curl_errno($curlHandle)) { $GLOBALS["ERRORSTRING"] = curl_error($curlHandle); curl_close($curlHandle); return FALSE; } curl_close($curlHandle); } } else { // This is a local file, so use fopen if (($fd = @fopen($filename, "rb")) === FALSE) { return FALSE; } while (($data = fread($fd, 4096)) != "") { $result .= $data; } fclose($fd); } return $result; }}class rootItemClass { var $operateAs; var $title; var $description; var $contentEncoded; var $link; var $pubDate; var $pubDate_t; var $pubDateDC; var $enclosureURL; var $enclosureLength; var $enclosureType; var $categoryArray; var $category; var $categoryDomain; var $guid; var $guidIsPermaLink; var $author; var $comments; var $source; var $sourceURL; var $creativeCommons; var $rssMeshExtra; var $rssMeshFeedImageTitle; var $rssMeshFeedImageUrl; var $rssMeshFeedImageLink; var $rssMeshFeedImageDescription; var $rssMeshFeedImageHeight; var $rssMeshFeedImageWidth; var $atomID; var $atomUpdated; var $atomContent; var $atomContentStartPos;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -