📄 score-nbest.py.svn-base
字号:
#!/usr/bin/python# $Id$"""Convert n-best list in mert.perl format to format required byVenugopal's MER trainer. This entails calculating the BLEU component scores.""""""usage: score-nbest.py <reffile>+ <outprefix> The input should be sorted by sentence number and piped into stdin Run it like this: sort -mnk 1,1 *.nbest | score-nbest.py ..."""import sys, itertools, reimport bleu#Comment out this line when moving to python 2.4from sets import Set as setdef process(sentnum, testsents): candsfile.write("%d %d\n" % (cur_sentnum, len(testsents))) for (sent,vector) in testsents: comps = bleu.cook_test(sent, cookedrefs[sentnum]) if comps['testlen'] != comps['guess'][0]: sys.stderr.write("ERROR: test length != guessed 1-grams\n") featsfile.write("%s %s %d\n" % (" ".join([str(v) for v in vector]), " ".join(["%d %d" % (c,g) for (c,g) in zip(comps['correct'], comps['guess'])]), comps['reflen']))if __name__ == "__main__": import os machtype=os.environ.get("MACHTYPE") if machtype == "i386": #import psyco #psyco.full() sys.stderr.write("psyco library is NOT imported. Uncomment code in score-nbest.py if you wish to enable it\n") else: sys.stderr.write("psyco library is not imported because it is not available for %s \n" % machtype) import getopt (opts,args) = getopt.getopt(sys.argv[1:], "casen", []) for (opt,parm) in opts: if opt == "-c": bleu.preserve_case = True if opt == "-a": bleu.eff_ref_len = "average" if opt == "-s": bleu.eff_ref_len = "shortest" if opt == "-e": bleu.eff_ref_len = "closest" if opt == "-n": bleu.nonorm = 1 print args cookedrefs = [] reffiles = [file(name) for name in args[:-1]] print reffiles for refs in itertools.izip(*reffiles): cookedrefs.append(bleu.cook_refs(refs)) outprefix = args[-1] featsfile = file(outprefix+"feats.opt", "w") candsfile = file(outprefix+"cands.opt", "w") cur_sentnum = None testsents = set() progress = 0 infile = sys.stdin # function that recognizes floats re_float=re.compile(r'^-?[-0-9.e]+$') is_float=lambda(x):re_float.match(x) for line in infile: try: ##Changed to add a further field - AA 29/11/05 #(sentnum, sent, vector) = line.split('|||') (sentnum, sent, vector, prob ) = line.split('|||') except: sys.stderr.write("ERROR: bad input line %s\n" % line) sentnum = int(sentnum) sent = " ".join(sent.split()) # filter out score labels (keep only floats) and convert numbers to floats vector = tuple(map(lambda(s): -float(s), filter(is_float, vector.split()))) if sentnum != cur_sentnum: if cur_sentnum is not None: process(cur_sentnum, testsents) cur_sentnum = sentnum testsents = set() testsents.add((sent,vector)) if progress % 10000 == 0: sys.stdout.write(".") sys.stdout.flush() progress += 1 process(cur_sentnum, testsents) sys.stdout.write("\n") featsfile.close() candsfile.close()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -