microsummary.php.svn-base
来自「j2me is based on j2mepolish, client & se」· SVN-BASE 代码 · 共 406 行
SVN-BASE
406 行
<?php/* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Microsummary Engine. * * The Initial Developer of the Original Code is * Dietrich Ayala. * Portions created by the Initial Developer are Copyright (C) 2007 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Doug Turner <dougt@meer.net> * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** *///error_reporting(E_ERROR);//ini_set('display_errors', true);class microsummary { var $msdoc; // microsummary dom document var $xsldoc; // dom document of stylesheet embedded in the generator var $result; var $hintXPATH; var $hint; // load and parse a microsummary generator file function load($generator) { $this->msdoc = new DOMDocument(); $this->msdoc->loadXML($generator); // get pages $xpath = new DOMXPath($this->msdoc); // register default ns $namespace = $xpath->evaluate('namespace-uri(//*)'); // returns the namespace uri $xpath->registerNamespace('ms', $namespace); // sets the prefix "ms" for the default namespace // register xsl namespace $xpath->registerNamespace("xsl", "http://www.w3.org/1999/XSL/Transform"); // sets the prefix "ms" for the default namespace // get template node $xsltransform = $xpath->query('/ms:generator/ms:template/xsl:transform'); if (empty($xsltransform)) return; $templateNode = $xsltransform->item(0); // import stylesheet $this->xsldoc = DOMDocument::loadXML($this->msdoc->saveXML($templateNode)); // read the hint in $hintelm = $xpath->query('/ms:generator/ms:hint')->item(0); if (!empty($hintelm->nodeValue)) $this->hint = $this->fromXMLString($hintelm->nodeValue); else $this->hint = ""; $this->hintXPATH = ""; } function save() { if ($this->hintXPATH != "") { $valueof = $this->msdoc->getElementsByTagName("value-of"); $valueof->item(0)->removeAttribute("select"); $valueof->item(0)->setAttribute("select", $this->hintXPATH); } $result = $this->msdoc->saveXML(); return $result; } function fromXMLString($in) { $out = str_replace ("&", "&", $in); $out = str_replace (">", ">", $out); $out = str_replace ("<", "<", $out); $out = str_replace ("'","\'", $out); $out = str_replace (""","\"", $out); return $out; } function getNodePath($in_node) { if (!$in_node) { return null; } $buffer = ''; $cur = $in_node; do { // print xpath as we go: //echo $buffer . "\n"; $name = ''; $sep = '/'; $occur = 0; if (($type = $cur->nodeType) == XML_DOCUMENT_NODE) { if ($buffer[0] == '/') { break; } } else if ($type == XML_ATTRIBUTE_NODE) { $sep .= '@'; $name = $cur->nodeName; $next = $cur->parentNode; } else { if ($type == XML_ELEMENT_NODE) { $id = $cur->getAttribute('id'); if (!empty($id)) { if ($id == "microsummary_hint_id") { $ignore = true; } else { // Found a id. Lets use that as the base reference. if (empty($buffer)) return "id('".$id."')"; $buffer = "id('".$id."')" . $buffer; return $buffer; // We are done. } } } $name = $cur->nodeName; $next = $cur->parentNode; // now figure out the index $tmp = $cur->previousSibling; while ($tmp != false) { if ($name == $tmp->nodeName) { $occur++; } $tmp = $tmp->previousSibling; } $occur++; if ($type == XML_ELEMENT_NODE) { $name = $name; //??? } // fix the names for those nodes where xpath query and dom node name don't match elseif ($type == XML_COMMENT_NODE) { $ignore = true; $name = 'comment()'; } elseif ($type == XML_PI_NODE) { $ignore = true; $name = 'processing-instruction()'; } elseif ($type == XML_TEXT_NODE) { $ignore = true; $name = 'text()'; } // anything left here has not been coded yet (cdata is broken) else { $name = ''; $sep = ''; $occur = 0; } } if ($ignore == true) { } else if ($occur == 0) { $buffer = $sep . $name . $buffer; } else { $buffer = $sep . $name . '[' . $occur . ']' . $buffer; } $ignore = false; $cur = $next; } while ($cur != false); return $buffer; } function execute($applyTo, $useHint) { $rv = 1; // fetch if (!$str = $this->fetch($applyTo)) { die("unable to fetch $applyTo"); } // $str = str_replace("<head>", // "<head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>", $str); // $str = str_replace("charset=iso-8859-1", "charset=utf-8", $str); $str = str_replace("
", "", $str); // load into new dom document $d = new DOMDocument(); $d->preserveWhiteSpace = false; $d->resolveExternals = true; // for character entities // $d->strictErrorChecking = false; @ $d->loadHTML($str); /* // This is the traditional way of creating a // microsummary. However, the result is just text. // What we want is something richer. For how, we are // simply going to get the XML/HTML pointed at by the // select XPATH. $xpath = new DOMXPath($this->msdoc); // register default ns $namespace = $xpath->evaluate('namespace-uri(//*)'); // returns the namespace uri $xpath->registerNamespace("ms", $namespace); // sets the prefix "ms" for the default namespace // register xsl namespace $xpath->registerNamespace("xsl", "http://www.w3.org/1999/XSL/Transform"); // sets the prefix "ms" for the default namespace // instantiate xsl processor $xslt = new xsltProcessor; $xslt->importStyleSheet($this->xsldoc); // execute xsl against it $summary = $xslt->transformToXML($d); */ $valueof = $this->msdoc->getElementsByTagName("value-of"); $select = $valueof->item(0)->getAttribute("select"); // $select = "id(\"cnn_t1hd\")"; $docXpath = new DOMXPath($d); $docXpath->registerNamespace("html", "http://www.w3.org/1999/xhtml"); $result = $docXpath->query($select); $summary = ""; if ($result) { $node = $result->item(0); if ($node && $node->ownerDocument) { $summary = "<body>" . $node->ownerDocument->saveXML($node) . "</body>"; } } if ($summary == "" && $useHint == true) { echo "Not Found:\n"; echo $select; // $f = fopen("/home/dougt/dump", "w"); // fwrite ($f, $str); // fclose($f); return; // Lets ignore the hint for now. $rv = 0; //$query = "//*[.='test']"; ?? // need a smarter way from finding a hint (innerHTML) // in the page. if (!strstr($str, $this->hint)) { // So there wasn't a direct match. Lets try // checking to see if the hint needs to be massaged // into something more php friendly, or less like // Firefox. // // Currently we only need to kill any closing </li> // // TODO, we should make these tags optional in the // search. $this->hint = str_replace("</li>", "", $this->hint); // TODO: We have to turn the hint into a pattern // (escape it), then we can use something // like: // preg_match ( string pattern, string subject); // TODO: entities: ' for ' if (!strstr($str, $this->hint)) { // can't figure out what to do. No updating; no microsummaries // echo $this->hint; // $f = fopen("/home/dougt/dump", w); // fwrite ($f, $str); // fclose($f); echo "failing here"; return -1; } } // echo $str; $str = str_ireplace ($this->hint, "<div id=\"microsummary_hint_id\">" . $this->hint . "</div>", $str, $count); $d = new DOMDocument(); @ $d->loadHTML($str); $child = $d->getElementById("microsummary_hint_id")->childNodes->item(0); $this->hintXPATH = $this->getNodePath($child); // Verify the xpath actually worked. echo $this->hintXPATH; $verify = new DOMXPath($d); $result = $verify->query($this->hintXPATH); if ($result == null) { echo "Generated XPATH could not be found in Document."; } $rv = 2; // TODO Refactor so that after an update, it is easy to get the summmary $me_serialized = $this->save(); $ms = new microsummary(); $ms->load($me_serialized); $ms->execute($applyTo, false); $summary = $ms->result; } $this->result = $summary; return $rv; } // curl utility function function fetch($url) { $ch = curl_init(); curl_setopt ($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (X11; U; FreeBSD i386; en-US; rv:1.2a) Gecko/20021021"); curl_setopt($ch, CURLOPT_URL,$url); // set url to post to curl_setopt($ch, CURLOPT_RETURNTRANSFER,1); // return into a variable curl_setopt($ch, CURLOPT_HTTPHEADER, array('Accept-Charset:utf-8')); curl_setopt($ch, CURLOPT_TIMEOUT, 5); $result = curl_exec($ch); if (curl_errno($ch)) { return false; } curl_close($ch); return $result; }}?>
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?