score-nbest.py

来自「moses开源的机器翻译系统」· Python 代码 · 共 110 行

110 行

#!/usr/bin/python

# $Id: score-nbest.py 1354 2007-04-12 17:48:48Z hieuhoang1972 $
"""Convert n-best list in mert.perl format to format required by
Venugopal's MER trainer. This entails calculating the BLEU component scores."""

"""usage: score-nbest.py <reffile>+ <outprefix>

   The input should be sorted by sentence number and piped into stdin
   Run it like this: sort -mnk 1,1 *.nbest | score-nbest.py ...
"""

import sys, itertools, re
import bleu
#Comment out this line when moving to python 2.4
from sets import Set as set

def process(sentnum, testsents):
    candsfile.write("%d %d\n" % (cur_sentnum, len(testsents)))
    for (sent,vector) in testsents:
        comps = bleu.cook_test(sent, cookedrefs[sentnum])

        if comps['testlen'] != comps['guess'][0]:
            sys.stderr.write("ERROR: test length != guessed 1-grams\n")
	featsfile.write("%s %s %d\n" % (" ".join([str(v) for v in vector]),
					    " ".join(["%d %d" % (c,g) for (c,g) in zip(comps['correct'], comps['guess'])]),
					    comps['reflen']))

if __name__ == "__main__":

    import os
    machtype=os.environ.get("MACHTYPE")
    if machtype == "i386":
        #import psyco
        #psyco.full()
        sys.stderr.write("psyco library is NOT imported. Uncomment code in score-nbest.py if you wish to enable it\n")
    else:
        sys.stderr.write("psyco library is not imported because it is not available for %s \n" % machtype)

    import getopt
    (opts,args) = getopt.getopt(sys.argv[1:], "casen", [])

    for (opt,parm) in opts:
        if opt == "-c":
            bleu.preserve_case = True
        if opt == "-a":
            bleu.eff_ref_len = "average"
        if opt == "-s":
            bleu.eff_ref_len = "shortest"
        if opt == "-e":
            bleu.eff_ref_len = "closest"
        if opt == "-n":
            bleu.nonorm = 1

    print args    
    cookedrefs = []
    reffiles = [file(name) for name in args[:-1]]
    print reffiles
    for refs in itertools.izip(*reffiles):
        cookedrefs.append(bleu.cook_refs(refs))
    
    outprefix = args[-1]

    featsfile = file(outprefix+"feats.opt", "w")
    candsfile = file(outprefix+"cands.opt", "w")

    cur_sentnum = None
    testsents = set()
    progress = 0

    infile = sys.stdin

    # function that recognizes floats
    re_float=re.compile(r'^-?[-0-9.e]+$')
    is_float=lambda(x):re_float.match(x)

    for line in infile:
        try:
            ##Changed to add a further field - AA 29/11/05
            #(sentnum, sent, vector) = line.split('|||')
            (sentnum, sent, vector, prob ) = line.split('|||')
        except:
            sys.stderr.write("ERROR: bad input line %s\n" % line)
        sentnum = int(sentnum)
        sent = " ".join(sent.split())
	# filter out score labels (keep only floats) and convert numbers to floats
        vector = tuple(map(lambda(s): -float(s), filter(is_float, vector.split())))

        if sentnum != cur_sentnum:
            if cur_sentnum is not None:
                process(cur_sentnum, testsents)
            cur_sentnum = sentnum
            testsents = set()
        testsents.add((sent,vector))

        if progress % 10000 == 0:
            sys.stdout.write(".")
            sys.stdout.flush()

        progress += 1
    process(cur_sentnum, testsents)

    sys.stdout.write("\n")
    featsfile.close()
    candsfile.close()

score-nbest.py - 源码说明

本页面展示了「moses开源的机器翻译系统」中的 score-nbest.py 源码文件，采用 Python 编程语言编写，共 110 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫下载站收录了大量与moses相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?