feedparser.py

来自「Harvestman-最新版本」· Python 代码 · 共 1,630 行 · 第 1/5 页

PY
1,630
字号
        # categories/tags/keywords/whatever are handled in _end_category        if element == 'category':            return output                # store output in appropriate place(s)        if self.inentry and not self.insource:            if element == 'content':                self.entries[-1].setdefault(element, [])                contentparams = copy.deepcopy(self.contentparams)                contentparams['value'] = output                self.entries[-1][element].append(contentparams)            elif element == 'link':                self.entries[-1][element] = output                if output:                    self.entries[-1]['links'][-1]['href'] = output            else:                if element == 'description':                    element = 'summary'                self.entries[-1][element] = output                if self.incontent:                    contentparams = copy.deepcopy(self.contentparams)                    contentparams['value'] = output                    self.entries[-1][element + '_detail'] = contentparams        elif (self.infeed or self.insource) and (not self.intextinput) and (not self.inimage):            context = self._getContext()            if element == 'description':                element = 'subtitle'            context[element] = output            if element == 'link':                context['links'][-1]['href'] = output            elif self.incontent:                contentparams = copy.deepcopy(self.contentparams)                contentparams['value'] = output                context[element + '_detail'] = contentparams        return output    def pushContent(self, tag, attrsD, defaultContentType, expectingText):        self.incontent += 1        self.contentparams = FeedParserDict({            'type': self.mapContentType(attrsD.get('type', defaultContentType)),            'language': self.lang,            'base': self.baseuri})        self.contentparams['base64'] = self._isBase64(attrsD, self.contentparams)        self.push(tag, expectingText)    def popContent(self, tag):        value = self.pop(tag)        self.incontent -= 1        self.contentparams.clear()        return value            def _mapToStandardPrefix(self, name):        colonpos = name.find(':')        if colonpos <> -1:            prefix = name[:colonpos]            suffix = name[colonpos+1:]            prefix = self.namespacemap.get(prefix, prefix)            name = prefix + ':' + suffix        return name            def _getAttribute(self, attrsD, name):        return attrsD.get(self._mapToStandardPrefix(name))    def _isBase64(self, attrsD, contentparams):        if attrsD.get('mode', '') == 'base64':            return 1        if self.contentparams['type'].startswith('text/'):            return 0        if self.contentparams['type'].endswith('+xml'):            return 0        if self.contentparams['type'].endswith('/xml'):            return 0        return 1    def _itsAnHrefDamnIt(self, attrsD):        href = attrsD.get('url', attrsD.get('uri', attrsD.get('href', None)))        if href:            try:                del attrsD['url']            except KeyError:                pass            try:                del attrsD['uri']            except KeyError:                pass            attrsD['href'] = href        return attrsD        def _save(self, key, value):        context = self._getContext()        context.setdefault(key, value)    def _start_rss(self, attrsD):        versionmap = {'0.91': 'rss091u',                      '0.92': 'rss092',                      '0.93': 'rss093',                      '0.94': 'rss094'}        if not self.version:            attr_version = attrsD.get('version', '')            version = versionmap.get(attr_version)            if version:                self.version = version            elif attr_version.startswith('2.'):                self.version = 'rss20'            else:                self.version = 'rss'        def _start_dlhottitles(self, attrsD):        self.version = 'hotrss'    def _start_channel(self, attrsD):        self.infeed = 1        self._cdf_common(attrsD)    _start_feedinfo = _start_channel    def _cdf_common(self, attrsD):        if attrsD.has_key('lastmod'):            self._start_modified({})            self.elementstack[-1][-1] = attrsD['lastmod']            self._end_modified()        if attrsD.has_key('href'):            self._start_link({})            self.elementstack[-1][-1] = attrsD['href']            self._end_link()        def _start_feed(self, attrsD):        self.infeed = 1        versionmap = {'0.1': 'atom01',                      '0.2': 'atom02',                      '0.3': 'atom03'}        if not self.version:            attr_version = attrsD.get('version')            version = versionmap.get(attr_version)            if version:                self.version = version            else:                self.version = 'atom'    def _end_channel(self):        self.infeed = 0    _end_feed = _end_channel        def _start_image(self, attrsD):        self.inimage = 1        self.push('image', 0)        context = self._getContext()        context.setdefault('image', FeedParserDict())                def _end_image(self):        self.pop('image')        self.inimage = 0    def _start_textinput(self, attrsD):        self.intextinput = 1        self.push('textinput', 0)        context = self._getContext()        context.setdefault('textinput', FeedParserDict())    _start_textInput = _start_textinput        def _end_textinput(self):        self.pop('textinput')        self.intextinput = 0    _end_textInput = _end_textinput    def _start_author(self, attrsD):        self.inauthor = 1        self.push('author', 1)    _start_managingeditor = _start_author    _start_dc_author = _start_author    _start_dc_creator = _start_author    _start_itunes_author = _start_author    def _end_author(self):        self.pop('author')        self.inauthor = 0        self._sync_author_detail()    _end_managingeditor = _end_author    _end_dc_author = _end_author    _end_dc_creator = _end_author    _end_itunes_author = _end_author    def _start_itunes_owner(self, attrsD):        self.inpublisher = 1        self.push('publisher', 0)    def _end_itunes_owner(self):        self.pop('publisher')        self.inpublisher = 0        self._sync_author_detail('publisher')    def _start_contributor(self, attrsD):        self.incontributor = 1        context = self._getContext()        context.setdefault('contributors', [])        context['contributors'].append(FeedParserDict())        self.push('contributor', 0)    def _end_contributor(self):        self.pop('contributor')        self.incontributor = 0    def _start_dc_contributor(self, attrsD):        self.incontributor = 1        context = self._getContext()        context.setdefault('contributors', [])        context['contributors'].append(FeedParserDict())        self.push('name', 0)    def _end_dc_contributor(self):        self._end_name()        self.incontributor = 0    def _start_name(self, attrsD):        self.push('name', 0)    _start_itunes_name = _start_name    def _end_name(self):        value = self.pop('name')        if self.inpublisher:            self._save_author('name', value, 'publisher')        elif self.inauthor:            self._save_author('name', value)        elif self.incontributor:            self._save_contributor('name', value)        elif self.intextinput:            context = self._getContext()            context['textinput']['name'] = value    _end_itunes_name = _end_name    def _start_width(self, attrsD):        self.push('width', 0)    def _end_width(self):        value = self.pop('width')        try:            value = int(value)        except:            value = 0        if self.inimage:            context = self._getContext()            context['image']['width'] = value    def _start_height(self, attrsD):        self.push('height', 0)    def _end_height(self):        value = self.pop('height')        try:            value = int(value)        except:            value = 0        if self.inimage:            context = self._getContext()            context['image']['height'] = value    def _start_url(self, attrsD):        self.push('href', 1)    _start_homepage = _start_url    _start_uri = _start_url    def _end_url(self):        value = self.pop('href')        if self.inauthor:            self._save_author('href', value)        elif self.incontributor:            self._save_contributor('href', value)        elif self.inimage:            context = self._getContext()            context['image']['href'] = value        elif self.intextinput:            context = self._getContext()            context['textinput']['link'] = value    _end_homepage = _end_url    _end_uri = _end_url    def _start_email(self, attrsD):        self.push('email', 0)    _start_itunes_email = _start_email    def _end_email(self):        value = self.pop('email')        if self.inpublisher:            self._save_author('email', value, 'publisher')        elif self.inauthor:            self._save_author('email', value)        elif self.incontributor:            self._save_contributor('email', value)    _end_itunes_email = _end_email    def _getContext(self):        if self.insource:            context = self.sourcedata        elif self.inentry:            context = self.entries[-1]        else:            context = self.feeddata        return context    def _save_author(self, key, value, prefix='author'):        context = self._getContext()        context.setdefault(prefix + '_detail', FeedParserDict())        context[prefix + '_detail'][key] = value        self._sync_author_detail()    def _save_contributor(self, key, value):        context = self._getContext()        context.setdefault('contributors', [FeedParserDict()])        context['contributors'][-1][key] = value    def _sync_author_detail(self, key='author'):        context = self._getContext()        detail = context.get('%s_detail' % key)        if detail:            name = detail.get('name')            email = detail.get('email')            if name and email:                context[key] = '%s (%s)' % (name, email)            elif name:                context[key] = name            elif email:                context[key] = email        else:            author = context.get(key)            if not author: return            emailmatch = re.search(r'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))''', author)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?