📄 googlebase.py.txt
字号:
#!/usr/bin/python2.3# Copyright (c) 2006 Google Inc.## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License."""Extracts meta information from Google Base feeds.This modules makes an unauthenticated HTTP connection to Google Base towget meta information and make it available in a convenient form."""import urllibimport urllib2import urlparsefrom xml.dom import minidomBASE_URL = "http://base.google.com/"GM_NAMESPACE_URI = "http://base.google.com/ns-metadata/1.0"G_NAMESPACE_URI = "http://base.google.com/ns/1.0"ATOM_NAMESPACE_URI = "http://www.w3.org/2005/Atom"class GoogleBaseService(object): """Communicates with google base feeds. """ def __init__(self, developer_key, base_url = BASE_URL): """Creates a service object and initializez it with the developer key. Args: developer_key: developer key issued for this application base_url: Google Base server and path to connect to, used as the base url for connecting to the feeds. (default: http://base.google.com/) """ self._base_url = urlparse.urljoin(base_url, "/base/feeds/") self._opener = urllib2.build_opener() self._opener.addheaders = [("X-Google-Key", "key=" + developer_key)] def listMostCommonItemTypeAttributes(self, item_type, max_results=25, max_values=5): """Gets a list of the attributes most commonly used for an item type. Args: item_type: item type name max_results: maximum number of attributes to query for (25 by default) max_values: maximum number of example values to query for (5 by default) Returns: most commonly used attributes and their values, an object of type MostCommonAttributes """ feed = self.run_query("attributes", bq="[item type: %s]" % (item_type), max_results=max_results, max_values=max_values, refine="true") # Merge text nodes, to make it easier to work with feed.normalize() retval = MostCommonAttributes() for attribute in feed.getElementsByTagNameNS(GM_NAMESPACE_URI, "attribute"): textValues = [] for value in attribute.getElementsByTagNameNS(GM_NAMESPACE_URI, "value"): text = ''.join([c.toxml('utf-8') for c in value.childNodes]) textValues.append(text) retval.addAttribute((attribute.getAttribute("name"), attribute.getAttribute("type")), textValues) return retval def run_query(self, feed="snippets", **kargs): """Run a query on a feed. Args: feed: Google Base feed name (snippets, items, ...). "snippets" by default **kargs: feed parameters as keyword arguments (q, bq, max_results, ...) different feeds take different parameters. Replace - by _ in the parameter names. Returns: answer, as a DOM tree """ parameters = [ (key.replace('_', '-'), kargs[key]) for key in kargs] relative_url = "%s?%s" % (feed, urllib.urlencode(parameters)) url = urlparse.urljoin(self._base_url, relative_url) print url handle = self._opener.open(url) try: return minidom.parse(handle) finally: handle.close()class MostCommonAttributes(object): """Keeps the most common attributes and their examples. This class works very much like a map of attribute (name, type) to values that would keep the keys in order. The attributes are ordered from the most common to the least common. """ def __init__(self): self._attributes = [] self._values = {} def getExamplesFor(self, attribute): return self._values[attribute] def addAttribute(self, attribute, values): # This attribute is always there, but it's not interesting if attribute == ("item type", "text"): return self._attributes.append(attribute) self._values[attribute] = values def __getAttributes(self): return self._attributes attributes = property(__getAttributes)_TOESCAPE = '|:="[]()*#<>\\+-'def escape(to_escape): """Escape special characters in a string for inclusion in a Google Base query. Args: to_escape: a string Returns: the same string with a backslash in from on some special characters """ def escapeChar(c): if c in _TOESCAPE: return "\\" + c else: return c return ''.join([escapeChar(c) for c in to_escape])
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -