📄 __init__.py
字号:
"""Yahoo Search Web ServicesThis module implements a set of classes and functions to work with theYahoo Search Web Services. All results from these services are properlyformatted XML, and this package facilitates for proper parsing of theseresult sets. Some of the features include: * Extendandable API, with replaceable backend XML parsers, and I/O interface. * Type and value checking on search parameters, including automatic type conversion (when appropriate and possible) * Flexible return format, including DOM objects, or fully parsed result objectsYou can either instantiate a search object directly, or use the factoryfunction create_search() from the factory module. The supported classesof searches are: NewsSearch - News article search VideoSearch - Video and movie search ImageSearch - Image search LocalSearch - Local area search WebSearch - Web search ContextSearch - Web search with a context RelatedSuggestion - Web search Related Suggestion SpellingSuggestion - Web search Spelling Suggestion TermExtraction - Term Extraction service AlbumSearch - Find information about albums ArtistSearch - Information on a particular musical performer SongDownload - Find links to various song providers of a song PodcastSearch - Search for a Podcast site/feed SongSearch - Provide information about songs PageData - Shows a list of all pages belonging to a domain InlinkData - Shows the pages from other sites linking to a pageThe different sub-classes of search supports different sets of queryparameters. For details on all allowed parameters, please consult thespecific module documentation.Each of these parameter is implemented as an attribute of eachrespective class. For example, you can set parameters like: from yahoo.search.web import WebSearch app_id = "YahooDemo" srch = WebSearch(app_id) srch.query = "Leif Hedstrom" srch.results = 40or, if you are using the factory function: from yahoo.search.factory import create_search app_id = "YahooDemo" srch = create_search("Web", app_id, query="Leif Hedstrom", results=40) if srch is not None: # srch object ready to use ... else: print "error"or, the last alternative, a combination of the previous two: import yahoo.search.web app_id = "YahooDemo" srch = web.WebSearch(app_id, query="Leif Hedstrom", results=40)To retrieve a certain parameter value, simply access it as any normalattribute: print "Searched for ", srch.queryFor more information on these parameters, and their allowed values, pleasesee the official Yahoo Search Services documentation available at http://developer.yahoo.net/Once the webservice object has been created, you can retrieve a parsedobject (typically a DOM object) using the get_results() method: dom = srch.get_results()This DOM object contains all results, and can be used as is. For easieruse of the results, you can use the built-in results factory, which willtraverse the entire DOM object, and create a list of results objects. results = srch.parse_results(dom)or, by using the implicit call to get_results(): results = srch.parse_results() The default XML parser and results factories should be adequate for mostusers, so use the parse_results() when possible. However, both the XMLparser and the results parser can easily be overriden. See the examplesbelow for details. More information about the DOM parsers are availablein the yahoo.search.dom module, and it's subclasses.EXAMPLES:This simple application will create a search object using the firstcommand line argument as the "type" (e.g. "web" or "news"), and allsubsequent arguments forms the query string: #!/usr/bin/python import sys from yahoo.search.factory import create_search service = sys.argv[1] query = " ".join(sys.argv[2:]) app_id = "YahooDemo" srch = create_search(service, app_id, query=query, results=5) if srch is None: srch = create_search("Web", app_id, query=query, results=5) dom = srch.get_results() results = srch.parse_results(dom) for res in results: url = res.Url summary = res['Summary'] print "%s -> %s" (summary, url)The same example using the PyXML 4DOM parser: #!/usr/bin/python import sys from yahoo.search.factory import create_search from xml.dom.ext.reader import Sax2 query = " ".join(sys.argv[2:]) srch = create_search(sys.argv[1], "YahooDemo", query=query, results=5) if srch is not None: reader = Sax2.Reader() srch.install_xml_parser(reader.fromStream) . . .The last example will produce the same query, but uses an HTTP proxyfor the request: #!/usr/bin/python import sys from yahoo.search.factory import create_search import urllib2 query = " ".join(sys.argv[2:]) srch = create_search(sys.argv[1], "YahooDemo", query=query, results=5) if srch is not None: proxy = urllib2.ProxyHandler({"http" : "http://octopus:3128"}) opener = urllib2.build_opener(proxy) srch.install_opener(opener) . . .You can obviously "mix and match" as necessary here. I'm using theinstaller methods above for clarity, the APIs allows you to pass thosecustom handlers as arguments as well (see the documentation below)."""__revision__ = "$Id: __init__.py,v 1.19 2007/09/11 21:38:43 zwoop Exp $"__version__ = "$Revision: 1.19 $"__author__ = "Leif Hedstrom <leif@ogre.com>"__date__ = "Tue Sep 11 15:32:26 MDT 2007"import urllibimport urllib2import typesimport reimport timefrom yahoo.search import debug__revision__ = "$Id: __init__.py,v 1.19 2007/09/11 21:38:43 zwoop Exp $"__version__ = "$Revision: 1.19 $"__author__ = "Leif Hedstrom <leif@ogre.com>"__date__ = "Thu Jul 7 14:22:16 PDT 2005"## List of all sub-packages that we expose directly#__all__ = ["web", "news", "video", "image", "local", "term", "audio", "site"]## List of all supported languages.#LANGUAGES = {'default':"english", 'ar':"arabic", 'bg':"bulgarian", 'ca':"catalan", 'szh':"chinese-simplified", 'tzh':"chinese-traditional", 'hr':"croatian", 'cs':"czech", 'da':"danish", 'nl':"dutch", 'en':"english", 'et':"estonian", 'fi':"finnish", 'fr':"french", 'de':"german", 'el':"greek", 'he':"hebrew", 'hu':"hungarian", 'is':"icelandic", 'id':"indonesian", 'it':"italian", 'ja':"japanese", 'ko':"korean", 'lv':"latvian", 'lt':"lithuanian", 'no':"norwegian", 'fa':"persian", 'pl':"polish", 'pt':"portuguese", 'ro':"romanian", 'ru':"russian", 'sk':"slovak", 'sr':"serbian", 'sl':"slovenian", 'es':"spanish", 'sv':"swedish", 'th':"thai", 'tr':"turkish"}## List of all supported countries.#COUNTRIES = {'default':"any", 'any':"any", 'ar':"Argentina", 'au':"Australia", 'at':"Austria", 'be':"Belgium", 'br':"Brazil", 'ca':"Canada", 'cn':"China", 'cz':"Czechoslovakia", 'dk':"Denmark", 'fi':"Finland", 'fr':"France", 'de':"Germany", 'it':"Italy", 'jp':"Japan", 'kr':"Korea", 'nl':"Netherlands", 'no':"Norway", 'pl':"Poland", 'rf':"Russian Federation", 'es':"Spain",'se':"Sweden", 'ch':"Switzerland", 'tw':"Taiwan", 'uk':"United Kingdom", 'us':"United States"}## List of all supported regions.#REGIONS = { 'default':"us", 'ar':"Argentina", 'au':"Australia", 'at':"Austria", 'br':"Brazil", 'ca':"Canada", 'ct':"Catalan", 'dk':"Denmark", 'fi':"Finland", 'fr':"France", 'de':"Germany", 'in':"India", 'id':"Indonesia", 'it':"Italy", 'my':"Malaysia", 'mx':"Mexico", 'nl':"Netherlands", 'no':"Norway", 'ph':"Phillipines", 'ru':"Russian Federation", 'sg':"Singapore", 'es':"Spain", 'se':"Sweden", 'ch':"Switzerland", 'th':"Thailand", 'uk':"United Kingdom & Ireland", 'us':"United States (yahoo.com)"}## List of all Creative Content licenses.#CC_LICENSES = {'cc_any':"Any", 'cc_commercial':"Commercial", 'cc_modifiable':"Modifiable"}## List of all subscription types#SUBSCRIPTIONS = {'cr':"Consumer Reports", 'ft':"FT.com", 'forrester':"Forrester Research", 'ieee':"IEEE publications", 'nejm':"New England Journal of Medicine", 'thestreet':"TheStreet.com", 'wsj':"Wall Street Journal"}## Regular expressions#CALLBACK_REGEX = re.compile("^[a-zA-Z0-9\.\[\]\_]+$")## Exceptions and error handling#class Error(Exception): """Base class for all Yahoo Web Services exceptions."""class ParameterError(Error): """A parameter is missing, or has bad value""" passclass ServerError(Error): """The Yahoo server is unavailable.""" passclass ClassError(Error): """This can only occur if the APIs aren't installed or configured properly. If it happens, please contact the author."""class SearchError(Error): """An exception/error occured in the search.""" def __init__(self, err): Error.__init__(self, err) self.msg = "unknown error" for line in err.readlines(): start = line.find("<Message>") if start > -1: stop = line.find("</Message>") if stop > -1: self.msg = line[start+9:stop] def __str__(self): return self.msg## First a couple of base classes for the Search services. Most of them# are almost identical, so good candidates to sub-class one of these.#class _Search(debug.Debuggable, object): """Yahoo Search WebService - base class This class implements the core functionality of all Yahoo Search Services. """ NAME = "Search" SERVICE = "Search" PROTOCOL = "http" SERVER = "search.yahooapis.com" VERSION = "V1" METHOD = "GET" _NEXT_QID = 1 _RESULT_FACTORY = None def __init__(self, app_id, opener=None, xml_parser=None, result_factory=None, debug_level=0, **args): """The app_id is a required argument, the Yahoo search services will not accept requests without a proper app_id. A valid app_id is a combination of 8 - 40 characters, matching the regexp "^[a-zA-Z0-9 _()\[\]*+\-=,.:\\\@]{8,40}$" Please visit http://developer.yahoo.net/search/ to request an App ID for your own software or application. Four optional arguments can also be passed to the constructor: opener - Opener for urllib2 xml_parser - Function to parse XML (default: minidom) result_factory - Result factory class (default: none) debug_devel - Debug level (if any) All other "named" arguments are passed into as a dictionary to the set_params() method. The result factory is specific to the particular web service used, e.g. the different Yahoo Search services will each implement their own factory class. Both of these settings can be controlled via their respective install method (see below). """ super(_Search, self).__init__(debug_level) self._service = {'name' : self.NAME, 'protocol' : self.PROTOCOL, 'server' : self.SERVER, 'version' : self.VERSION, 'service' : self.SERVICE} self._app_id = None self.app_id = app_id self._require_oneof_params = [] self._urllib_opener = opener self._xml_parser = xml_parser if result_factory: self._result_factory = result_factory else: self._result_factory = self._RESULT_FACTORY if self._xml_parser is None: import xml.dom.minidom self._xml_parser = xml.dom.minidom.parse self._default_xml_parser = self._xml_parser self._qid = self._NEXT_QID self._NEXT_QID += 1 # All Search APIs now supports "output" and "callback". self._valid_params = { "output" : (types.StringTypes, "xml", str.lower, ("xml", "json", "php"), None, False), "callback" : (types.StringTypes, None, None, lambda x: CALLBACK_REGEX.match(x) is not None, "the characters A-Z a-z 0-9 . [] and _.", False), } self._init_valid_params() self._params = {} if args: self.set_params(args) # Implement the attribute handlers, to avoid confusion def __setattr__(self, name, value): if (hasattr(getattr(self.__class__, name, None), '__set__') or name[0] == '_'): super(_Search, self).__setattr__(name, value) else: # Special case for "output", since we need to disable the # XML parser as well. if (name == "output"): if (value in ("json", "php")): self._xml_parser = None else: self._xml_parser = self._default_xml_parser self.set_param(name, value) def __getattr__(self, name): if (hasattr(getattr(self.__class__, name, None), '__get__') or name[0] == '_'): return super(_Search, self).__getattr__(name) else: return self.get_param(name) def _init_valid_params(self):
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -