mimetypes.py

来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Python 代码 · 共 436 行
436 行
"""Guess the MIME type of a file.This module defines two useful functions:guess_type(url, strict=1) -- guess the MIME type and encoding of a URL.guess_extension(type, strict=1) -- guess the extension for a given MIME type.It also contains the following, for tuning the behavior:Data:knownfiles -- list of files to parseinited -- flag set when init() has been calledsuffix_map -- dictionary mapping suffixes to suffixesencodings_map -- dictionary mapping suffixes to encodingstypes_map -- dictionary mapping suffixes to typesFunctions:init([files]) -- parse a list of files, default knownfilesread_mime_types(file) -- parse one file, return a dictionary or None"""import osimport posixpathimport urllib__all__ = ["guess_type","guess_extension","read_mime_types","init"]knownfiles = [    "/usr/local/etc/httpd/conf/mime.types",    "/usr/local/lib/netscape/mime.types",    "/usr/local/etc/httpd/conf/mime.types",     # Apache 1.2    "/usr/local/etc/mime.types",                # Apache 1.3    ]inited = 0class MimeTypes:    """MIME-types datastore.    This datastore can handle information from mime.types-style files    and supports basic determination of MIME type from a filename or    URL, and can guess a reasonable extension given a MIME type.    """    def __init__(self, filenames=()):        if not inited:            init()        self.encodings_map = encodings_map.copy()        self.suffix_map = suffix_map.copy()        self.types_map = types_map.copy()        self.common_types = common_types.copy()        for name in filenames:            self.read(name)    def guess_type(self, url, strict=1):        """Guess the type of a file based on its URL.        Return value is a tuple (type, encoding) where type is None if        the type can't be guessed (no or unknown suffix) or a string        of the form type/subtype, usable for a MIME Content-type        header; and encoding is None for no encoding or the name of        the program used to encode (e.g. compress or gzip).  The        mappings are table driven.  Encoding suffixes are case        sensitive; type suffixes are first tried case sensitive, then        case insensitive.        The suffixes .tgz, .taz and .tz (case sensitive!) are all        mapped to '.tar.gz'.  (This is table-driven too, using the        dictionary suffix_map.)        Optional `strict' argument when false adds a bunch of commonly found,        but non-standard types.        """        scheme, url = urllib.splittype(url)        if scheme == 'data':            # syntax of data URLs:            # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data            # mediatype := [ type "/" subtype ] *( ";" parameter )            # data      := *urlchar            # parameter := attribute "=" value            # type/subtype defaults to "text/plain"            comma = url.find(',')            if comma < 0:                # bad data URL                return None, None            semi = url.find(';', 0, comma)            if semi >= 0:                type = url[:semi]            else:                type = url[:comma]            if '=' in type or '/' not in type:                type = 'text/plain'            return type, None           # never compressed, so encoding is None        base, ext = posixpath.splitext(url)        while self.suffix_map.has_key(ext):            base, ext = posixpath.splitext(base + self.suffix_map[ext])        if self.encodings_map.has_key(ext):            encoding = self.encodings_map[ext]            base, ext = posixpath.splitext(base)        else:            encoding = None        types_map = self.types_map        common_types = self.common_types        if types_map.has_key(ext):            return types_map[ext], encoding        elif types_map.has_key(ext.lower()):            return types_map[ext.lower()], encoding        elif strict:            return None, encoding        elif common_types.has_key(ext):            return common_types[ext], encoding        elif common_types.has_key(ext.lower()):            return common_types[ext.lower()], encoding        else:            return None, encoding    def guess_extension(self, type, strict=1):        """Guess the extension for a file based on its MIME type.        Return value is a string giving a filename extension,        including the leading dot ('.').  The extension is not        guaranteed to have been associated with any particular data        stream, but would be mapped to the MIME type `type' by        guess_type().  If no extension can be guessed for `type', None        is returned.        Optional `strict' argument when false adds a bunch of commonly found,        but non-standard types.        """        type = type.lower()        for ext, stype in self.types_map.items():            if type == stype:                return ext        if not strict:            for ext, stype in common_types.items():                if type == stype:                    return ext        return None    def read(self, filename):        """Read a single mime.types-format file, specified by pathname."""        fp = open(filename)        self.readfp(fp)        fp.close()    def readfp(self, fp):        """Read a single mime.types-format file."""        map = self.types_map        while 1:            line = fp.readline()            if not line:                break            words = line.split()            for i in range(len(words)):                if words[i][0] == '#':                    del words[i:]                    break            if not words:                continue            type, suffixes = words[0], words[1:]            for suff in suffixes:                map['.' + suff] = typedef guess_type(url, strict=1):    """Guess the type of a file based on its URL.    Return value is a tuple (type, encoding) where type is None if the    type can't be guessed (no or unknown suffix) or a string of the    form type/subtype, usable for a MIME Content-type header; and    encoding is None for no encoding or the name of the program used    to encode (e.g. compress or gzip).  The mappings are table    driven.  Encoding suffixes are case sensitive; type suffixes are    first tried case sensitive, then case insensitive.    The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped    to ".tar.gz".  (This is table-driven too, using the dictionary    suffix_map).    Optional `strict' argument when false adds a bunch of commonly found, but    non-standard types.    """    init()    return guess_type(url, strict)def guess_extension(type, strict=1):    """Guess the extension for a file based on its MIME type.    Return value is a string giving a filename extension, including the    leading dot ('.').  The extension is not guaranteed to have been    associated with any particular data stream, but would be mapped to the    MIME type `type' by guess_type().  If no extension can be guessed for    `type', None is returned.    Optional `strict' argument when false adds a bunch of commonly found,    but non-standard types.    """    init()    return guess_extension(type, strict)def init(files=None):    global guess_extension, guess_type    global suffix_map, types_map, encodings_map, common_types    global inited    inited = 1    db = MimeTypes()    if files is None:        files = knownfiles    for file in files:        if os.path.isfile(file):            db.readfp(open(file))    encodings_map = db.encodings_map    suffix_map = db.suffix_map    types_map = db.types_map    guess_extension = db.guess_extension    guess_type = db.guess_type    common_types = db.common_typesdef read_mime_types(file):    try:        f = open(file)    except IOError:        return None    db = MimeTypes()    db.readfp(f)    return db.types_mapsuffix_map = {    '.tgz': '.tar.gz',    '.taz': '.tar.gz',    '.tz': '.tar.gz',    }encodings_map = {    '.gz': 'gzip',    '.Z': 'compress',    }# Before adding new types, make sure they are either registered with IANA, at# http://www.isi.edu/in-notes/iana/assignments/media-types# or extensions, i.e. using the x- prefix# If you add to these, please keep them sorted!types_map = {    '.a'      : 'application/octet-stream',    '.ai'     : 'application/postscript',    '.aif'    : 'audio/x-aiff',    '.aifc'   : 'audio/x-aiff',    '.aiff'   : 'audio/x-aiff',    '.au'     : 'audio/basic',    '.avi'    : 'video/x-msvideo',    '.bat'    : 'text/plain',    '.bcpio'  : 'application/x-bcpio',    '.bin'    : 'application/octet-stream',    '.bmp'    : 'image/x-ms-bmp',    '.c'      : 'text/plain',    # Duplicates :(    '.cdf'    : 'application/x-cdf',    '.cdf'    : 'application/x-netcdf',    '.cpio'   : 'application/x-cpio',    '.csh'    : 'application/x-csh',    '.css'    : 'text/css',    '.dll'    : 'application/octet-stream',    '.doc'    : 'application/msword',    '.dot'    : 'application/msword',    '.dvi'    : 'application/x-dvi',    '.eml'    : 'message/rfc822',    '.eps'    : 'application/postscript',    '.etx'    : 'text/x-setext',    '.exe'    : 'application/octet-stream',    '.gif'    : 'image/gif',    '.gtar'   : 'application/x-gtar',    '.h'      : 'text/plain',    '.hdf'    : 'application/x-hdf',    '.htm'    : 'text/html',    '.html'   : 'text/html',    '.ief'    : 'image/ief',    '.jpe'    : 'image/jpeg',    '.jpeg'   : 'image/jpeg',    '.jpg'    : 'image/jpeg',    '.js'     : 'application/x-javascript',    '.ksh'    : 'text/plain',    '.latex'  : 'application/x-latex',    '.m1v'    : 'video/mpeg',    '.man'    : 'application/x-troff-man',    '.me'     : 'application/x-troff-me',    '.mht'    : 'message/rfc822',    '.mhtml'  : 'message/rfc822',    '.mif'    : 'application/x-mif',    '.mov'    : 'video/quicktime',    '.movie'  : 'video/x-sgi-movie',    '.mp2'    : 'audio/mpeg',    '.mp3'    : 'audio/mpeg',    '.mpa'    : 'video/mpeg',    '.mpe'    : 'video/mpeg',    '.mpeg'   : 'video/mpeg',    '.mpg'    : 'video/mpeg',    '.ms'     : 'application/x-troff-ms',    '.nc'     : 'application/x-netcdf',    '.nws'    : 'message/rfc822',    '.o'      : 'application/octet-stream',    '.obj'    : 'application/octet-stream',    '.oda'    : 'application/oda',    '.p12'    : 'application/x-pkcs12',    '.p7c'    : 'application/pkcs7-mime',    '.pbm'    : 'image/x-portable-bitmap',    '.pdf'    : 'application/pdf',    '.pfx'    : 'application/x-pkcs12',    '.pgm'    : 'image/x-portable-graymap',    '.pl'     : 'text/plain',    '.png'    : 'image/png',    '.pnm'    : 'image/x-portable-anymap',    '.pot'    : 'application/vnd.ms-powerpoint',    '.ppa'    : 'application/vnd.ms-powerpoint',    '.ppm'    : 'image/x-portable-pixmap',    '.pps'    : 'application/vnd.ms-powerpoint',    '.ppt'    : 'application/vnd.ms-powerpoint',    '.ps'     : 'application/postscript',    '.pwz'    : 'application/vnd.ms-powerpoint',    '.py'     : 'text/x-python',    '.pyc'    : 'application/x-python-code',    '.pyo'    : 'application/x-python-code',    '.qt'     : 'video/quicktime',    '.ra'     : 'audio/x-pn-realaudio',    '.ram'    : 'application/x-pn-realaudio',    '.ras'    : 'image/x-cmu-raster',    '.rdf'    : 'application/xml',    '.rgb'    : 'image/x-rgb',    '.roff'   : 'application/x-troff',    '.rtx'    : 'text/richtext',    '.sgm'    : 'text/x-sgml',    '.sgml'   : 'text/x-sgml',    '.sh'     : 'application/x-sh',    '.shar'   : 'application/x-shar',    '.snd'    : 'audio/basic',    '.so'     : 'application/octet-stream',    '.src'    : 'application/x-wais-source',    '.sv4cpio': 'application/x-sv4cpio',    '.sv4crc' : 'application/x-sv4crc',    '.t'      : 'application/x-troff',    '.tar'    : 'application/x-tar',    '.tcl'    : 'application/x-tcl',    '.tex'    : 'application/x-tex',    '.texi'   : 'application/x-texinfo',    '.texinfo': 'application/x-texinfo',    '.tif'    : 'image/tiff',    '.tiff'   : 'image/tiff',    '.tr'     : 'application/x-troff',    '.tsv'    : 'text/tab-separated-values',    '.txt'    : 'text/plain',    '.ustar'  : 'application/x-ustar',    '.vcf'    : 'text/x-vcard',    '.wav'    : 'audio/x-wav',    '.wiz'    : 'application/msword',    '.xbm'    : 'image/x-xbitmap',    '.xlb'    : 'application/vnd.ms-excel',    # Duplicates :(    '.xls'    : 'application/excel',    '.xls'    : 'application/vnd.ms-excel',    '.xml'    : 'text/xml',    '.xpm'    : 'image/x-xpixmap',    '.xsl'    : 'application/xml',    '.xwd'    : 'image/x-xwindowdump',    '.zip'    : 'application/zip',    }# These are non-standard types, commonly found in the wild.  They will only# match if strict=0 flag is given to the API methods.# Please sort these toocommon_types = {    '.jpg' : 'image/jpg',    '.mid' : 'audio/midi',    '.midi': 'audio/midi',    '.pct' : 'image/pict',    '.pic' : 'image/pict',    '.pict': 'image/pict',    '.rtf' : 'application/rtf',    '.xul' : 'text/xul'    }if __name__ == '__main__':    import sys    import getopt    USAGE = """\Usage: mimetypes.py [options] typeOptions:    --help / -h       -- print this message and exit    --lenient / -l    -- additionally search of some common, but non-standard                         types.    --extension / -e  -- guess extension instead of typeMore than one type argument may be given."""    def usage(code, msg=''):        print USAGE        if msg: print msg        sys.exit(code)    try:        opts, args = getopt.getopt(sys.argv[1:], 'hle',                                   ['help', 'lenient', 'extension'])    except getopt.error, msg:        usage(1, msg)    strict = 1    extension = 0    for opt, arg in opts:        if opt in ('-h', '--help'):            usage(0)        elif opt in ('-l', '--lenient'):            strict = 0        elif opt in ('-e', '--extension'):            extension = 1    for gtype in args:        if extension:            guess = guess_extension(gtype, strict)            if not guess: print "I don't know anything about type", gtype            else: print guess        else:            guess, encoding = guess_type(gtype, strict)            if not guess: print "I don't know anything about type", gtype            else: print 'type:', guess, 'encoding:', encoding
mimetypes.py - 源码说明

本页面展示了「mallet是自然语言处理、机器学习领域的一个开源项目。」中的 mimetypes.py 源码文件，采用 Python 编程语言编写，共 436 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫开发者社区收录了大量与Mallet相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?