⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 paper.ps

📁 这是一款很好用的工具包
💻 PS
📖 第 1 页 / 共 4 页
字号:
(N-gram)f(LM.)g(The)h(script)1975 4424 y(eliminates)20b(N-grams)f(that)g(ha)o(v)o(e)g(become)i(OO)l(V)e(\(using)g(the)h(te)o(xtual)f(LM)1975 4511 y(format\))f(and)h(then)f(lets)gFh(ngram)f Fj(\002ll)f(in)i(ne)n(w)h(unigrams)f(and)h(renormalize)19754597 y(the)25 b(back)o(of)n(f)g(weights.)39 b(Other)24b(scripts)g(parse)h(the)f(diagnostic)h(output)g(of)19754684 y(the)g(tools,)g(such)h(as)e Fh(compute-best-mix)pFj(,)e(which)j(computes)h(opti-)1975 4771 y(mal)19 b(interpolation)h(weights)f(from)g Fh(ngram)44 b(-ppl)18 b Fj(output.)p1975 4880 789 4 v 2066 4936 a Ff(1)2095 4959 y Fe(Often)j(a)eFh(read)g Fe(function)j(is)d(also)i(needed,)g(b)o(ut)f(can)h(be)f(borro)n(wed)h(from)e(an)1975 5034 y(e)o(xisting)24 b(class)e(if)f(the)h(same)g(e)o(xternal)i(representation)h(is)c(used,)h(as)g(is)f(frequently)1975 5109 y(the)d(case)g(with)g(N-gram)f(based)h(models.)2066 5165 y Ff(2)2095 5189 y Fe(W)-5 b(e)18 b(considered)j(switching)g(to)e(the)h(Standard)g(T)-5 b(emplate)21 b(Library)f(\(STL\))d(for)19755264 y(containers,)k(b)o(ut)d(found)h(that)g(this)g(w)o(ould)g(incur)g(a)f(signi\002cant)j(loss)d(of)g(both)h(speed)1975 5339y(and)f(compactness.)p eop%%Page: 4 44 3 bop -186 83 a Fl(4.4.)45 b(File)18 b(f)n(ormats)-184203 y Fk(S)t(R)t(I)t(L)t(M)29 b Fj(uses)h(standard)g(\002le)f(formats)g(where)h(possible\227in)f(particular)m(,)-186 290 y(the)15b(ARP)-7 b(A)14 b(format)h(for)g(N-gram)h(back)o(of)n(f)g(LMs.)22b(W)-6 b(ord)15 b(graphs)h(use)f(SRI')l(s)-186 377 y(probabilistic)30b(\002nite-state)f(grammar)i(\(PFSG\))d(format,)k(which)f(can)f(be)-186464 y(con)m(v)o(erted)d(to)f(and)h(from)f(that)g(used)h(by)f(A)-8b(T&T')l(s)25 b(\002nite)h(state)f(machine)-186 550 y(toolkit)c([22)q(].)29 b(Where)21 b(ne)n(w)g(\002le)f(formats)h(were)g(needed)i(we)e(chose)h(easy-)-186 637 y(to-parse)j(te)o(xtual)g(representations.)42b(All)24 b(the)g(main)h(tools)g(can)g(read)h(and)-186724 y(write)f(compressed)i(\002les,)g(as)f(lar)o(ge)g(amounts)g(of)g(data)g(and)h(models)f(are)-186 811 y(often)g(associated)h(with)e(LM)h(w)o(ork.)45 b(W)-6 b(e)25 b(a)o(v)o(oided)h(binary)h(\002le)e(formats)-186 897 y(because)18 b(of)f(their)g(lack)g(of)g(portability)g(and)g(\003e)o(xibility)-5 b(,)17 b(and)g(prefer)g(to)g(use)-186984 y(compressed)j(te)o(xtual)f(formats)g(instead.)-1381153 y Fl(5.)45 b(SHOR)m(TCOMINGS)17 b(AND)i(FUTURE)f(DEVELOPMENTS)-1861307 y Fj(Man)o(y)36 b(well-established)g(LM)g(techniques)h(are)e(not)h(implemented)g(in)-184 1394 y Fk(S)t(R)t(I)t(L)t(M)rFj(,)17 b(such)j(as)f(deleted)g(interpolation)h(or)f(maximum)h(entrop)o(y)h(model-)-186 1480 y(ing,)c(mainly)h(because)h(these)e(ha)o(v)o(e)g(not)h(pro)o(v)o(en)g(essential)g(in)f(our)g(w)o(ork)h(so)-1861567 y(f)o(ar)l(.)k(One)15 b(candidate)h(for)f(future)g(addition)h(is)e(a)h(more)g(\003e)o(xible)g(class-based)-186 1654 y(model,)25b(since)f(re\002nements)g(of)g(class-based)g(LMs)g(seem)g(to)f(pro)o(vide)i(an)-186 1741 y(ef)n(fecti)n(v)o(e)17 b(and)h(ef)n(\002cient)e(w)o(ay)i(to)f(incorporate)g(grammatical)h(information)-1861827 y(into)30 b(the)f(LM)h([23].)55 b(The)30 b(lo)n(w-le)n(v)o(el)f(implementation)i(of)e(data)h(struc-)-186 1914 y(tures)18b(is)g(currently)h(biased)g(to)n(w)o(ard)f(speed)h(and)g(con)m(v)o(enience)i(rather)d(than)-186 2001 y(memory)27 b(usage;)32b(it)26 b(might)h(be)g(w)o(orthwhile)g(to)f(ree)n(v)n(aluate)i(this)e(choice)-186 2088 y(to)19 b(accommodate)j(e)n(v)o(er)o(-lar)o(ger)d(training)g(corpora.)28 b Fk(S)t(R)t(I)t(L)t(M)19 b Fj(currently)h(as-)-186 2174 y(sumes)15 b(single-byte)h(character)g(encoding)g(and)g(uses)f(only)h(whitespace)f(for)-186 2261 y(tok)o(enization;)32b(it)26 b(w)o(ould)i(be)f(desirable)g(to)g(include)g(support)h(for)f(multi-)-186 2348 y(byte)22 b(character)h(sets)f(and)g(SGML-tagged)h(formats)f(at)g(some)g(point.)33 b(Ul-)-186 2435 y(timately)-5b(,)24 b(ho)n(we)n(v)o(er)m(,)i(de)n(v)o(elopment)f(of)e(the)h(toolkit)f(will)g(continue)i(to)e(be)-186 2521 y(dri)n(v)o(en)d(by)f(research)h(needs,)f(and)h(is)e(therefore)h(hard)h(to)f(predict.)-612608 y Fl(A)-7 b(v)o(ailability)i(.)96 b Fk(S)t(R)t(I)t(L)t(M)43b Fj(is)g(freely)g(a)o(v)n(ailable)g(for)g(noncommer)o(-)-1862695 y(cial)28 b(users)h(under)g(an)g(Open)f(Source)h(Community)g(License,)i(designed)-186 2782 y(to)46 b(ensure)g(that)f(enhancements)j(by)e(others)g(\002nd)g(their)f(w)o(ay)h(back)-186 2868y(into)36 b(the)f(user)h(community)-5 b(.)75 b(Licensing)36b(for)g(commercial)g(purposes)-186 2955 y(is)j(also)h(a)o(v)n(ailable.)85 b(Documentation)41 b(and)f(softw)o(are)g(are)f(online)h(at)-1863042 y(http://www)-5 b(.speech.sri.com/projects/srilm/.)3903190 y Fd(6.)40 b(A)l(CKNO)m(WLEDGMENTS)-186 3324 y Fe(Fuliang)32b(W)-5 b(eng)31 b(wrote)g(the)h(initial)h(v)o(ersion)f(of)f(the)g(lattice)j(rescoring)e(tool)g(in)-184 3399 y Fc(S)t(R)t(I)t(L)t(M)rFe(;)15 b(Dimitra)20 b(V)-7 b(er)o(gyri)18 b(de)n(v)o(eloped)j(the)e(score)f(combination)j(optimizer)f(based)-186 3474 y(on)32b(simple)o(x)h(search;)42 b(Anand)33 b(V)-7 b(enkataraman)34b(contrib)o(uted)h(N-best)e(decoding)-186 3549 y(and)24b(other)h(enhancements)h(to)e(the)g(statistical)j(tagging)e(tools.)41b(De)n(v)o(elopment)26 b(of)-184 3624 y Fc(S)t(R)t(I)t(L)t(M)hFe(has)h(bene\002ted)j(greatly)f(from)e(its)h(use)g(and)g(constructi)n(v)o(e)j(criticism)e(by)-186 3699 y(man)o(y)23 b(colleagues)j(at)d(SRI,)f(the)i(Johns)f(Hopkins)g(summer)f(w)o(orkshops,)j(and)e(the)-1863774 y(lar)o(ger)30 b(research)h(community)l(.)57 b(Barbara)31b(Peskin)e(helped)i(impro)o(v)o(e)e(this)h(paper)-1863849 y(with)18 b(v)n(aluable)j(suggestions.)j(The)17b(w)o(ork)h(described)i(here)e(w)o(as)g(in)g(part)g(supported)-1863924 y(by)g(D)m(ARP)-6 b(A)19 b(under)g(contract)i(N66001-97-C-8544)f(and)f(by)g(NSF-STIMULA)-7 b(TE)-186 3999 y(grant)24b(IRI-9619921.)38 b(The)22 b(vie)n(ws)i(herein)g(are)f(those)g(of)f(the)h(author)h(and)f(do)g(not)-186 4074 y(re\003ect)c(the)e(policies)j(of)d(the)g(funding)h(agencies.)532 4221 y Fd(7.)40 b(REFERENCES)-1534356 y Fe([1])i(P)-7 b(.)18 b(Clarkson)j(and)f(R.)f(Rosenfeld,)34b(\223Statistical)24 b(language)e(modeling)f(using)-344431 y(the)h(CMU-Cambridge)g(toolkit\224,)40 b(in)21b(G.)e(K)n(okkinakis,)k(N.)d(F)o(ak)o(otakis,)k(and)-344506 y(E.)c(Dermatas,)i(editors,)h Fb(Pr)m(oc.)d(EUR)m(OSPEECH)pFe(,)e(v)o(ol.)j(1,)g(pp.)f(2707\2262710,)-34 4581 y(Rhodes,)e(Greece,)g(Sep.)f(1997.)-153 4684 y([2])42 b(F)-5 b(.)19 b(Jelinek,)35b(\223Up)20 b(from)f(trigrams!)30 b(The)20 b(struggle)h(for)f(impro)o(v)o(ed)h(language)-34 4759 y(models\224,)50 b Fb(in)25b(Pr)m(oc.)f(EUR)m(OSPEECH)p Fe(,)e(pp.)i(1037\2261040,)j(Geno)o(v)n(a,)g(Italy)l(,)-34 4833 y(Sep.)17 b(1991.)-153 4936y([3])42 b(R.)14 b(Rosenfeld,)22 b(\223T)-5 b(w)o(o)15b(decades)h(of)f(statistical)j(language)g(modeling:)j(Where)-345011 y(do)c(we)g(go)g(from)g(here?\224,)26 b Fb(Pr)m(oceedings)18b(of)g(the)f(IEEE)p Fe(,)f(v)o(ol.)g(88,)h(2000.)-1535114 y([4])42 b(D.)17 b(Jurafsk)o(y)i(and)g(J.)e(H.)g(Martin,)29b Fb(Speec)o(h)20 b(and)f(Langua)o(g)o(e)h(Pr)m(ocessing:)k(An)-345189 y(Intr)m(oduction)k(to)e(Natur)o(al)h(Langua)o(g)o(e)g(Pr)m(ocessing)o(,)h(Computational)h(Lin-)-34 5264 y(guistics,)16b(and)e(Speec)o(h)i(Reco)o(gnition)p Fe(,)21 b(Prentice-Hall,)d(Upper)c(Saddle)i(Ri)n(v)o(er)m(,)-34 5339 y(NJ,)g(2000.)200983 y([5])42 b(J.)18 b(J.)h(Odell,)33 b Fb(Lattice)22b(and)d(Langua)o(g)o(e)j(Model)e(T)-6 b(oolkit)22 b(Refer)n(ence)f(Manual)p Fe(,)2128 158 y(Entropic)d(Cambridge)h(Research)g(Laboratories,)g(Inc.,)d(1995.)2009 260 y([6])42 b(A.)22b(Stolck)o(e,)47 b Fb(Bayesian)24 b(Learning)h(of)e(Pr)m(obabilistic)j(Langua)o(g)o(e)f(Models)p Fe(,)2128 335 y(PhD)16 b(thesis,)i(Uni)n(v)o(ersity)h(of)e(California,)j(Berk)o(ele)o(y)l(,)f(CA,)d(July)h(1994.)2009 437 y([7])42 b(H.)18 b(Murv)o(eit,)i(J.)e(Butzber)o(ger)m(,)k(V)-9b(.)19 b(Digalakis,)i(and)e(M.)f(W)-5 b(eintraub,)33b(\223Lar)o(ge-)2128 512 y(v)o(ocab)o(ulary)15 b(dictation)h(using)d(SRI')l(s)h(DECIPHER)e(speech)j(recognition)h(sys-)2128587 y(tem:)32 b(Progressi)n(v)o(e)24 b(search)f(techniques\224,)46b Fb(in)22 b(Pr)m(oc.)g(ICASSP)p Fe(,)f(v)o(ol.)h(II,)g(pp.)2128661 y(319\226322,)17 b(Minneapolis,)j(Apr)l(.)c(1993.)2009763 y([8])42 b(M.)20 b(W)-5 b(eintraub,)24 b(Y)-9 b(.)21b(Aksu,)h(S.)e(Dharanipragada,)26 b(S.)20 b(Khudanpur)m(,)k(H.)d(Ne)o(y)l(,)2128 838 y(J.)h(Prange,)27 b(A.)c(Stolck)o(e,)k(F)-5b(.)22 b(Jelinek,)27 b(and)e(E.)d(Shriber)o(g,)49 b(\223LM95)24b(Project)2128 913 y(Report:)30 b(F)o(ast)21 b(training)i(and)f(portability\224,)40 b(Research)23 b(Note)f(1,)f(Center)h(for)2128988 y(Language)g(and)g(Speech)h(Processing,)g(Johns)f(Hopkins)g(Uni)n(v)o(ersity)l(,)i(Balti-)2128 1063 y(more,)16 b(Feb)m(.)h(1996.)20091165 y([9])42 b(S.)16 b(F)-5 b(.)16 b(Chen)i(and)g(J.)e(Goodman,)25b(\223)-5 b(An)17 b(empirical)j(study)e(of)f(smoothing)h(tech-)21281240 y(niques)k(for)f(language)j(modeling\224,)40 b(T)-5b(echnical)24 b(Report)f(TR-10-98,)g(Com-)2128 1315 y(puter)18b(Science)h(Group,)e(Harv)n(ard)h(Uni)n(v)o(ersity)l(,)h(Aug.)d(1998.)1975 1417 y([10])43 b(A.)22 b(Stolck)o(e,)48 b(\223Entrop)o(y-based)27b(pruning)d(of)g(back)o(of)n(f)h(language)h(models\224,)21281492 y Fb(in)f(Pr)m(oceedings)i(D)n(ARP)-6 b(A)23 b(Br)m(oadcast)j(Ne)o(ws)f(T)l(r)o(anscription)j(and)d(Under)o(-)2128 1567y(standing)c(W)-6 b(orkshop)p Fe(,)21 b(pp.)e(270\226274,)i(Lansdo)n(wne,)g(V)-9 b(A,)19 b(Feb)m(.)h(1998.)g(Mor)o(-)21281641 y(gan)d(Kaufmann.)1975 1743 y([11])43 b(P)-7 b(.)22b(F)-5 b(.)22 b(Bro)n(wn,)j(V)-9 b(.)23 b(J.)f(Della)j(Pietra,)h(P)-7b(.)22 b(V)-9 b(.)23 b(deSouza,)j(J.)d(C.)f(Lai,)j(and)f(R.)f(L.)21281818 y(Mercer)m(,)j(\223Class-based)20 b Fa(n)p Fe(-gram)d(models)h(of)g(natural)h(language\224,)28 b Fb(Compu-)2128 1893 y(tational)19b(Linguistics)p Fe(,)g(v)o(ol.)d(18,)h(pp.)g(467\226479,)g(1992.)19751995 y([12])43 b(R.)18 b(K)o(uhn)h(and)g(R.)f(de)h(Mori,)30b(\223)-5 b(A)19 b(cache-base)i(natural)g(language)g(model)e(for)21282070 y(speech)f(recognition\224,)27 b Fb(IEEE)16 b(P)-6b(AMI)p Fe(,)16 b(v)o(ol.)h(12,)f(pp.)h(570\226583,)g(June)h(1990.)19752172 y([13])43 b(A.)24 b(Stolck)o(e,)29 b(E.)c(Shriber)o(g,)j(D.)d(Hakkani-T)6 b(\250)-28 b(ur)m(,)29 b(and)d(G.)e(T)6b(\250)-28 b(ur)m(,)53 b(\223Modeling)2128 2247 y(the)26b(prosody)g(of)g(hidden)h(e)n(v)o(ents)g(for)f(impro)o(v)o(ed)h(w)o(ord)f(recognition\224,)57 b Fb(in)2128 2322 y(Pr)m(oc.)16b(EUR)m(OSPEECH)p Fe(,)f(v)o(ol.)i(1,)f(pp.)h(307\226310,)g(Budapest,)i(Sep.)d(1999.)1975 2424 y([14])43 b(A.)12 b(Stolck)o(e)j(and)f(E.)e(Shriber)o(g,)18 b(\223Statistical)f(language)f(modeling)f(for)e(speech)2128 2499 y(dis\003uencies\224,)44 b Fb(in)22b(Pr)m(oc.)g(ICASSP)p Fe(,)f(v)o(ol.)h(1,)h(pp.)f(405\226408,)i(Atlanta,)h(May)2128 2574 y(1996.)1975 2675 y([15])43b(R.)15 b(Iyer)m(,)i(M.)f(Ostendorf,)h(and)f(J.)g(R.)f(Rohlicek,)25b(\223Language)18 b(modeling)g(with)2128 2750 y(sentence-le)n(v)o(el)k(mixtures\224,)28 b Fb(in)18 b(Pr)m(oc.)f(ARP)-6 b(A)16b(HL)o(T)h(W)-6 b(orkshop)p Fe(,)18 b(pp.)f(82\22686,)21282825 y(Plainsboro,)h(NJ,)e(Mar)l(.)h(1994.)1975 2927y([16])43 b(K.)17 b(W)-6 b(.)16 b(Ma,)i(G.)f(Za)o(v)n(aliagk)o(os,)j(and)f(M.)e(Meteer)m(,)29 b(\223Sub-sentence)21 b(discourse)21283002 y(models)27 b(for)f(con)m(v)o(ersational)31 b(speech)d(recognition\224,)60 b Fb(in)26 b(Pr)m(oc.)g(ICASSP)pFe(,)2128 3077 y(v)o(ol.)16 b(II,)h(pp.)f(693\226696,)i(Seattle,)h(W)-8b(A,)16 b(May)h(1998.)1975 3179 y([17])43 b(A.)14 b(Stolck)o(e,)24b(\223Modeling)18 b(linguistic)g(se)o(gment)f(and)f(turn)g(boundaries)i(for)e(N-)2128 3254 y(best)21 b(rescoring)h(of)f(spontaneous)i(speech\224,)39 b(in)21 b(G.)f(K)n(okkinakis,)j(N.)d(F)o(ak)o(o-)21283329 y(takis,)29 b(and)e(E.)e(Dermatas,)k(editors,)hFb(Pr)m(oc.)c(EUR)m(OSPEECH)p Fe(,)e(v)o(ol.)i(5,)i(pp.)21283404 y(2779\2262782,)18 b(Rhodes,)f(Greece,)h(Sep.)f(1997.)19753506 y([18])43 b(A.)19 b(Stolck)o(e,)j(Y)-9 b(.)20 b(K)n(onig,)g(and)h(M.)e(W)-5 b(eintraub,)35 b(\223Explicit)23 b(w)o(ord)d(error)g(min-)2128 3581 y(imization)i(in)e(N-best)h(list)g(rescoring\224,)36b(in)20 b(G.)g(K)n(okkinakis,)h(N.)f(F)o(ak)o(otakis,)21283656 y(and)15 b(E.)g(Dermatas,)h(editors,)h Fb(Pr)m(oc.)d(EUR)m(OSPEECH)p Fe(,)g(v)o(ol.)h(1,)g(pp.)g(163\226166,)21283730 y(Rhodes,)i(Greece,)h(Sep.)f(1997.)1975 3832 y([19])43b(L.)21 b(Mangu,)k(E.)d(Brill,)j(and)e(A.)f(Stolck)o(e,)47b(\223Finding)24 b(consensus)g(in)f(speech)2128 3907y(recognition:)j(W)-5 b(ord)17 b(error)h(minimization)j(and)d(other)h(applications)i(of)d(con-)2128 3982 y(fusion)d(netw)o(orks\224,)21b Fb(Computer)15 b(Speec)o(h)i(and)d(Langua)o(g)o(e)pFe(,)j(v)o(ol.)d(14,)h(pp.)f(373\226)2128 4057 y(400,)i(Oct.)i(2000.)1975 4159 y([20])43 b(J.)22 b(G.)g(Fiscus,)46 b(\223)-5b(A)23 b(post-processing)i(system)f(to)f(yield)i(reduced)g(w)o(ord)e(er)o(-)2128 4234 y(ror)g(rates:)36 b(Recognizer)26 b(output)f(v)o(oting)g(error)f(reduction)i(\(R)m(O)m(VER\)\224,)45b Fb(in)2128 4309 y(Pr)m(oceedings)24 b(IEEE)d(A)o(utomatic)j(Speec)o(h)g(Reco)o(gnition)i(and)d(Under)o(stand-)2128 4384y(ing)17 b(W)-6 b(orkshop)p Fe(,)17 b(pp.)g(347\226352,)h(Santa)g(Barbara,)g(CA,)f(1997.)1975 4486 y([21])43 b(A.)25 b(Stolck)o(e,)31b(H.)26 b(Bratt,)k(J.)25 b(Butzber)o(ger)m(,)31 b(H.)26b(Franco,)k(V)-9 b(.)26 b(R.)g(Rao)h(Gadde,)2128 4561y(M.)32 b(Plauch)t(\264)-26 b(e,)38 b(C.)33 b(Riche)o(y)l(,)38b(E.)32 b(Shriber)o(g,)38 b(K.)32 b(S)6 b(\250)-28 b(onmez,)37b(F)-5 b(.)32 b(W)-5 b(eng,)37 b(and)2128 4636 y(J.)19b(Zheng,)34 b(\223The)20 b(SRI)g(March)h(2000)f(Hub-5)g(con)m(v)o(ersational)k(speech)d(tran-)2128 4710 y(scription)f(system\224,)27b Fb(in)18 b(Pr)m(oceedings)i(NIST)e(Speec)o(h)i(T)l(r)o(anscription)g(W)-6 b(ork-)2128 4785 y(shop)p Fe(,)16 b(Colle)o(ge)j(P)o(ark,)e(MD,)g(May)g(2000.)1975 4887 y([22])43 b(M.)54 b(Mohri,)64b(F)-5 b(.)54 b(Pereira,)65 b(and)56 b(M.)e(Rile)o(y)l(,)155b(\223FSM)55 b(Library\227)2128 4962 y(general-purpose)34b(\002nite-state)h(machine)e(softw)o(are)g(tools,)i(v)o(ersion)e(3.6\224,)2128 5037 y(http://www)l(.research.att.com/sw/tool)q(s/fsm/,)22 b(1998.)1975 5139 y([23])43 b(W)-6 b(.)25 b(W)-5 b(ang,)29b(Y)-9 b(.)26 b(Liu,)j(and)e(M.)f(P)-7 b(.)26 b(Harper)m(,)59b(\223Rescoring)29 b(ef)n(fecti)n(v)o(eness)i(of)21285214 y(language)22 b(models)f(using)g(dif)n(ferent)j(le)n(v)o(els)e(of)e(kno)n(wledge)j(and)e(their)h(inte-)2128 5289 y(gration\224,)kFb(in)17 b(Pr)m(oc.)f(ICASSP)p Fe(,)h(Orlando,)g(FL,)f(May)h(2002.)peop%%Trailerenduserdict /end-hook known{end-hook}if%%EOF

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -