mhlib.py
来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Python 代码 · 共 1,004 行 · 第 1/3 页
PY
1,004 行
mimetools.Message.__init__(self, fp) def __repr__(self): """String representation.""" return 'Message(%s, %s)' % (repr(self.folder), self.number) def getheadertext(self, pred = None): """Return the message's header text as a string. If an argument is specified, it is used as a filter predicate to decide which headers to return (its argument is the header name converted to lower case).""" if not pred: return ''.join(self.headers) headers = [] hit = 0 for line in self.headers: if not line[0].isspace(): i = line.find(':') if i > 0: hit = pred(line[:i].lower()) if hit: headers.append(line) return ''.join(headers) def getbodytext(self, decode = 1): """Return the message's body text as string. This undoes a Content-Transfer-Encoding, but does not interpret other MIME features (e.g. multipart messages). To suppress decoding, pass 0 as an argument.""" self.fp.seek(self.startofbody) encoding = self.getencoding() if not decode or encoding in ('', '7bit', '8bit', 'binary'): return self.fp.read() from StringIO import StringIO output = StringIO() mimetools.decode(self.fp, output, encoding) return output.getvalue() def getbodyparts(self): """Only for multipart messages: return the message's body as a list of SubMessage objects. Each submessage object behaves (almost) as a Message object.""" if self.getmaintype() != 'multipart': raise Error, 'Content-Type is not multipart/*' bdry = self.getparam('boundary') if not bdry: raise Error, 'multipart/* without boundary param' self.fp.seek(self.startofbody) mf = multifile.MultiFile(self.fp) mf.push(bdry) parts = [] while mf.next(): n = str(self.number) + '.' + `1 + len(parts)` part = SubMessage(self.folder, n, mf) parts.append(part) mf.pop() return parts def getbody(self): """Return body, either a string or a list of messages.""" if self.getmaintype() == 'multipart': return self.getbodyparts() else: return self.getbodytext()class SubMessage(Message): def __init__(self, f, n, fp): """Constructor.""" Message.__init__(self, f, n, fp) if self.getmaintype() == 'multipart': self.body = Message.getbodyparts(self) else: self.body = Message.getbodytext(self) self.bodyencoded = Message.getbodytext(self, decode=0) # XXX If this is big, should remember file pointers def __repr__(self): """String representation.""" f, n, fp = self.folder, self.number, self.fp return 'SubMessage(%s, %s, %s)' % (f, n, fp) def getbodytext(self, decode = 1): if not decode: return self.bodyencoded if type(self.body) == type(''): return self.body def getbodyparts(self): if type(self.body) == type([]): return self.body def getbody(self): return self.bodyclass IntSet: """Class implementing sets of integers. This is an efficient representation for sets consisting of several continuous ranges, e.g. 1-100,200-400,402-1000 is represented internally as a list of three pairs: [(1,100), (200,400), (402,1000)]. The internal representation is always kept normalized. The constructor has up to three arguments: - the string used to initialize the set (default ''), - the separator between ranges (default ',') - the separator between begin and end of a range (default '-') The separators must be strings (not regexprs) and should be different. The tostring() function yields a string that can be passed to another IntSet constructor; __repr__() is a valid IntSet constructor itself. """ # XXX The default begin/end separator means that negative numbers are # not supported very well. # # XXX There are currently no operations to remove set elements. def __init__(self, data = None, sep = ',', rng = '-'): self.pairs = [] self.sep = sep self.rng = rng if data: self.fromstring(data) def reset(self): self.pairs = [] def __cmp__(self, other): return cmp(self.pairs, other.pairs) def __hash__(self): return hash(self.pairs) def __repr__(self): return 'IntSet(%s, %s, %s)' % (`self.tostring()`, `self.sep`, `self.rng`) def normalize(self): self.pairs.sort() i = 1 while i < len(self.pairs): alo, ahi = self.pairs[i-1] blo, bhi = self.pairs[i] if ahi >= blo-1: self.pairs[i-1:i+1] = [(alo, max(ahi, bhi))] else: i = i+1 def tostring(self): s = '' for lo, hi in self.pairs: if lo == hi: t = `lo` else: t = `lo` + self.rng + `hi` if s: s = s + (self.sep + t) else: s = t return s def tolist(self): l = [] for lo, hi in self.pairs: m = range(lo, hi+1) l = l + m return l def fromlist(self, list): for i in list: self.append(i) def clone(self): new = IntSet() new.pairs = self.pairs[:] return new def min(self): return self.pairs[0][0] def max(self): return self.pairs[-1][-1] def contains(self, x): for lo, hi in self.pairs: if lo <= x <= hi: return 1 return 0 def append(self, x): for i in range(len(self.pairs)): lo, hi = self.pairs[i] if x < lo: # Need to insert before if x+1 == lo: self.pairs[i] = (x, hi) else: self.pairs.insert(i, (x, x)) if i > 0 and x-1 == self.pairs[i-1][1]: # Merge with previous self.pairs[i-1:i+1] = [ (self.pairs[i-1][0], self.pairs[i][1]) ] return if x <= hi: # Already in set return i = len(self.pairs) - 1 if i >= 0: lo, hi = self.pairs[i] if x-1 == hi: self.pairs[i] = lo, x return self.pairs.append((x, x)) def addpair(self, xlo, xhi): if xlo > xhi: return self.pairs.append((xlo, xhi)) self.normalize() def fromstring(self, data): new = [] for part in data.split(self.sep): list = [] for subp in part.split(self.rng): s = subp.strip() list.append(int(s)) if len(list) == 1: new.append((list[0], list[0])) elif len(list) == 2 and list[0] <= list[1]: new.append((list[0], list[1])) else: raise ValueError, 'bad data passed to IntSet' self.pairs = self.pairs + new self.normalize()# Subroutines to read/write entries in .mh_profile and .mh_sequencesdef pickline(file, key, casefold = 1): try: f = open(file, 'r') except IOError: return None pat = re.escape(key) + ':' prog = re.compile(pat, casefold and re.IGNORECASE) while 1: line = f.readline() if not line: break if prog.match(line): text = line[len(key)+1:] while 1: line = f.readline() if not line or not line[0].isspace(): break text = text + line return text.strip() return Nonedef updateline(file, key, value, casefold = 1): try: f = open(file, 'r') lines = f.readlines() f.close() except IOError: lines = [] pat = re.escape(key) + ':(.*)\n' prog = re.compile(pat, casefold and re.IGNORECASE) if value is None: newline = None else: newline = '%s: %s\n' % (key, value) for i in range(len(lines)): line = lines[i] if prog.match(line): if newline is None: del lines[i] else: lines[i] = newline break else: if newline is not None: lines.append(newline) tempfile = file + "~" f = open(tempfile, 'w') for line in lines: f.write(line) f.close() os.rename(tempfile, file)# Test programdef test(): global mh, f os.system('rm -rf $HOME/Mail/@test') mh = MH() def do(s): print s; print eval(s) do('mh.listfolders()') do('mh.listallfolders()') testfolders = ['@test', '@test/test1', '@test/test2', '@test/test1/test11', '@test/test1/test12', '@test/test1/test11/test111'] for t in testfolders: do('mh.makefolder(%s)' % `t`) do('mh.listsubfolders(\'@test\')') do('mh.listallsubfolders(\'@test\')') f = mh.openfolder('@test') do('f.listsubfolders()') do('f.listallsubfolders()') do('f.getsequences()') seqs = f.getsequences() seqs['foo'] = IntSet('1-10 12-20', ' ').tolist() print seqs f.putsequences(seqs) do('f.getsequences()') testfolders.reverse() for t in testfolders: do('mh.deletefolder(%s)' % `t`) do('mh.getcontext()') context = mh.getcontext() f = mh.openfolder(context) do('f.getcurrent()') for seq in ['first', 'last', 'cur', '.', 'prev', 'next', 'first:3', 'last:3', 'cur:3', 'cur:-3', 'prev:3', 'next:3', '1:3', '1:-3', '100:3', '100:-3', '10000:3', '10000:-3', 'all']: try: do('f.parsesequence(%s)' % `seq`) except Error, msg: print "Error:", msg stuff = os.popen("pick %s 2>/dev/null" % `seq`).read() list = map(int, stuff.split()) print list, "<-- pick" do('f.listmessages()')if __name__ == '__main__': test()
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?