htmldocument.py

来自「Python Development Environment (Python I」· Python 代码 · 共 356 行
356 行
########################################################################
#
# File Name:            HTMLDocument.py
#
# Documentation:        http://docs.4suite.com/4DOM/HTMLDocument.py.html
#
"""
WWW: http://4suite.com/4DOM         e-mail: support@4suite.com

Copyright (c) 2000 Fourthought Inc, USA.   All Rights Reserved.
See  http://4suite.com/COPYRIGHT  for license and copyright information
"""

from xml.dom import Node
from xml.dom import NotSupportedErr

from xml.dom.Document import Document
from xml.dom import implementation
from xml.dom import ext

import string, sys

from xml.dom.html import HTML_DTD

class HTMLDocument(Document):

    def __init__(self):
        Document.__init__(self, None)
        # These only make sense in a browser environment, therefore
        # they never change
        self.__dict__['__referrer'] = ''
        self.__dict__['__domain'] = None
        self.__dict__['__URL'] = ''

        self.__dict__['__cookie'] = ''
        self.__dict__['__writable'] = 0
        self.__dict__['_html'] = vars(sys.modules['xml.dom.html'])

    ### Attribute Methods ###

    def _get_URL(self):
        return self.__dict__['__URL']

    def _get_anchors(self):
        anchors = self.getElementsByTagName('A');
        anchors = filter(lambda x: x._get_name(), anchors)
        return implementation._4dom_createHTMLCollection(anchors)

    def _get_applets(self):
        al = self.getElementsByTagName('APPLET')
        ol = self.getElementsByTagName('OBJECT')
        ol = filter(lambda x: x._get_code(), ol)
        return implementation._4dom_createHTMLCollection(al+ol)

    def _get_body(self):
        body = ''
        #Try to find the body or FRAMESET
        elements = self.getElementsByTagName('FRAMESET')
        if not elements:
            elements = self.getElementsByTagName('BODY')
        if elements:
            body = elements[0]
        else:
            #Create a body
            body = self.createElement('BODY')
            self.documentElement.appendChild(body)
        return body

    def _set_body(self, newBody):
        elements = self.getElementsByTagName('FRAMESET')
        if not elements:
            elements = self.getElementsByTagName('BODY')
        if elements:
            # Replace the existing one
            elements[0].parentNode.replaceChild(newBody, elements[0])
        else:
            # Add it
            self.documentElement.appendChild(newBody)

    def _get_cookie(self):
        return self.__dict__['__cookie']

    def _set_cookie(self, cookie):
        self.__dict__['__cookie'] = cookie

    def _get_domain(self):
        return self.__dict__['__domain']

    def _get_forms(self):
        forms = self.getElementsByTagName('FORM')
        return implementation._4dom_createHTMLCollection(forms)

    def _get_images(self):
        images = self.getElementsByTagName('IMG')
        return implementation._4dom_createHTMLCollection(images)

    def _get_links(self):
        areas = self.getElementsByTagName('AREA')
        anchors = self.getElementsByTagName('A')
        links = filter(lambda x: x._get_href(), areas+anchors)
        return implementation._4dom_createHTMLCollection(links)

    def _get_referrer(self):
        return self.__dict__['__referrer']

    def _get_title(self):
        elements = self.getElementsByTagName('TITLE')
        if elements:
            #Take the first
            title = elements[0]
            title.normalize()
            if title.firstChild:
                return title.firstChild.data
        return ''

    def _set_title(self, title):
        # See if we can find the title
        title_nodes = self.getElementsByTagName('TITLE')
        if title_nodes:
            title_node = title_nodes[0]
            title_node.normalize()
            if title_node.firstChild:
                title_node.firstChild.data = title
                return
        else:
            title_node = self.createElement('TITLE')
            self._4dom_getHead().appendChild(title_node)
        text = self.createTextNode(title)
        title_node.appendChild(text)

    ### Methods ###

    def close(self):
        self.__dict__['__writable'] = 0

    def getElementsByName(self, elementName):
        return self._4dom_getElementsByAttribute('*', 'NAME', elementName)

    def open(self):
        #Clear out the doc
        self.__dict__['__referrer'] = ''
        self.__dict__['__domain'] = None
        self.__dict__['__url'] = ''
        self.__dict__['__cookie'] = ''
        self.__dict__['__writable'] = 1

    def write(self, st):
        if not self.__dict__['__writable']:
            return
        #We need to parse the string here
        from xml.dom.ext.reader.HtmlLib import FromHTML
        d = FromHtml(st, self)
        if d != self:
            self.appendChild(d)

    def writeln(self, st):
        st = st + '\n'
        self.write(st)


    def getElementByID(self, ID):
        hc = self._4dom_getElementsByAttribute('*','ID',ID)
        if hc.length != 0:
            return hc[0]
        return None

    ### Overridden Methods ###

    def createElement(self, tagName):
        return self._4dom_createHTMLElement(tagName)

    def createAttribute(self, name):
        return Document.createAttribute(self, string.upper(name))

    def createCDATASection(*args, **kw):
        raise NotSupportedErr()

    def createEntityReference(*args, **kw):
        raise NotSupportedErr()

    def createProcessingInstruction(*args, **kw):
        raise NotSupportedErr()

    def _4dom_createEntity(*args, **kw):
        raise NotSupportedErr()

    def _4dom_createNotation(*args, **kw):
        raise NotSupportedErr()

    ### Internal Methods ###

    def _4dom_getElementsByAttribute(self, tagName, attribute, attrValue=None):
        nl = self.getElementsByTagName(tagName)
        hc = implementation._4dom_createHTMLCollection()
        for elem in nl:
            attr = elem.getAttribute(attribute)
            if attrValue == None and attr != '':
                hc.append(elem)
            elif attr == attrValue:
                hc.append(elem)
        return hc

    def _4dom_getHead(self):
        nl = self.getElementsByTagName('HEAD')
        if not nl:
            head = self.createElement('HEAD')
            #The head goes in front of the body
            body = self._get_body()
            self.documentElement.insertBefore(head, body)
        else:
            head = nl[0]
        return head

    def _4dom_createHTMLElement(self, tagName):
        lowered = string.lower(tagName)
        if not HTML_DTD.has_key(lowered):
            raise TypeError('Unknown HTML Element: %s' % tagName)

        if lowered in NoClassTags:
            from HTMLElement import HTMLElement
            return HTMLElement(self, tagName)

        #FIXME: capitalize() broken with unicode in Python 2.0
        #normTagName = string.capitalize(tagName)
        capitalized = string.upper(tagName[0]) + lowered[1:]
        element = HTMLTagMap.get(capitalized, capitalized)
        module = 'HTML%sElement' % element
        if not self._html.has_key(module):
            #Try to import it (should never fail)
            __import__('xml.dom.html.%s' % module)
        # Class and module have the same name
        klass = getattr(self._html[module], module)
        return klass(self, tagName)

    def cloneNode(self, deep):
        clone = HTMLDocument()
        clone.__dict__['__referrer'] = self._get_referrer()
        clone.__dict__['__domain'] = self._get_domain()
        clone.__dict__['__URL'] = self._get_URL()
        clone.__dict__['__cookie'] = self._get_cookie()
        if deep:
            if self.doctype is not None:
                # Cannot have any children, no deep needed
                dt = self.doctype.cloneNode(0)
                clone._4dom_setDocumentType(dt)
            if self.documentElement is not None:
                # The root element can have children, duh
                root = self.documentElement.cloneNode(1, newOwner=clone)
                clone.appendChild(root)
        return clone

    def isXml(self):
        return 0

    def isHtml(self):
        return 1

    ### Attribute Access Mappings ###

    _readComputedAttrs = Document._readComputedAttrs.copy()
    _readComputedAttrs.update ({
         'title'         : _get_title,
         'referrer'      : _get_referrer,
         'domain'        : _get_domain,
         'URL'           : _get_URL,
         'body'          : _get_body,
         'images'        : _get_images,
         'applets'       : _get_applets,
         'links'         : _get_links,
         'forms'         : _get_forms,
         'anchors'       : _get_anchors,
         'cookie'        : _get_cookie
      })

    _writeComputedAttrs = Document._writeComputedAttrs.copy()
    _writeComputedAttrs.update ({
         'title'         : _set_title,
         'body'          : _set_body,
         'cookie'        : _set_cookie,
      })

    # Create the read-only list of attributes
    _readOnlyAttrs = filter(lambda k,m=_writeComputedAttrs: not m.has_key(k),
                            Document._readOnlyAttrs + _readComputedAttrs.keys())

# HTML tags that don't map directly to a class name
HTMLTagMap =    {'Isindex':     'IsIndex',
                 'Optgroup':    'OptGroup',
                 'Textarea':    'TextArea',
                 'Fieldset':    'FieldSet',
                 'Ul':          'UList',
                 'Ol':          'OList',
                 'Dl':          'DList',
                 'Dir':         'Directory',
                 'Li':          'LI',
                 'P':           'Paragraph',
                 'H1':          'Heading',
                 'H2':          'Heading',
                 'H3':          'Heading',
                 'H4':          'Heading',
                 'H5':          'Heading',
                 'H6':          'Heading',
                 'Q':           'Quote',
                 'Blockquote':  'Quote',
                 'Br':          'BR',
                 'Basefont':    'BaseFont',
                 'Hr':          'HR',
                 'A':           'Anchor',
                 'Img':         'Image',
                 'Caption':     'TableCaption',
                 'Col':         'TableCol',
                 'Colgroup':    'TableCol',
                 'Td':          'TableCell',
                 'Th':          'TableCell',
                 'Tr':          'TableRow',
                 'Thead':       'TableSection',
                 'Tbody':       'TableSection',
                 'Tfoot':       'TableSection',
                 'Frameset':    'FrameSet',
                 'Iframe':      'IFrame',
                 'Form':        'Form',
                 'Ins' :        'Mod',
                 'Del' :        'Mod',
                }

#HTML Elements with no specific DOM Interface of their own
NoClassTags =   ['sub',
                 'sup',
                 'span',
                 'bdo',
                 'tt',
                 'i',
                 'b',
                 'u',
                 's',
                 'strike',
                 'big',
                 'small',
                 'em',
                 'strong',
                 'dfn',
                 'code',
                 'samp',
                 'kbd',
                 'var',
                 'cite',
                 'acronym',
                 'abbr',
                 'dd',
                 'dt',
                 'noframes',
                 'noscript',
                 'address',
                 'center',
                 ]
htmldocument.py - 源码说明

本页面展示了「Python Development Environment (Python IDE plugin for Eclipse). Features editor, code completion, re」中的 htmldocument.py 源码文件，采用 Python 编程语言编写，共 356 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Python相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?