📄 gram2sapixml.pl.in
字号:
#!@PERL@## Copyright (c) 2002 Takashi Sumiyoshi# ------------------------------------------------------------# Julian 妨及の矢恕 (.grammar, .voca) を SAPI XML 矢恕に恃垂します。# 苞眶なし弹瓢で蝗い数が山绩されます。# 悸乖には Jcode モジュ〖ルが涩妥です。# 叫蜗を UTF-8 妨及に恃垂するのに嘲婶コマンドとして iconv を蝗脱しています。# ------------------------------------------------------------# 庙罢: Julian 妨及の矢恕では、宝浩耽が蝗えません。嫡に SAPI XML 妨及で# は焊浩耽が蝗えません。このツ〖ルはその恃垂まではしないので、焊浩耽を# 崔む矢恕は、恃垂稿に缄侯度で饯赖する涩妥があります。# ------------------------------------------------------------# 叫蜗される SAPI XML 矢恕ファイルは、傅ファイルの矢恕の润姜眉淡规、姜眉淡规# をル〖ルに恃垂するという帽姐な恃垂であるため、よりエレガントにするには# 缄侯度で饯赖する涩妥があります。# ------------------------------------------------------------use strict;use Jcode;my $iconv = "iconv -f eucJP -t UTF-8"; # iconv command line############################################################# convertphone で蝗脱する############################################################sub vowel{ if ($_[0] eq "a") { return $_[1];} if ($_[0] eq "i") { return $_[2];} if ($_[0] eq "u") { return $_[3];} if ($_[0] eq "e") { return $_[4];} if ($_[0] eq "o") { return $_[5];} if ($_[0] eq "a:") { return $_[1]."〖";} if ($_[0] eq "i:") { return $_[2]."〖";} if ($_[0] eq "u:") { return $_[3]."〖";} if ($_[0] eq "e:") { return $_[4]."〖";} if ($_[0] eq "o:") { return $_[5]."〖";} return 0;}############################################################# サブル〖ティン: 掐蜗不燎芹误からカナ矢机误を栏喇。掐蜗は赖しいと簿年############################################################sub convertphone{ my $rval = ""; my $c; my $d; my $r; while($c = shift @_) { if ($c eq "k") { $d = shift @_; if ($r = vowel($d,"か","き","く","け","こ")) { $rval .= $r; } } if ($c eq "ky") { $d = shift @_; if ($r = vowel($d,"きゃ","kyi?","きゅ","kye?","きょ")) { $rval .= $r; } } if ($c eq "s") { $d = shift @_; if ($r = vowel($d,"さ","し","す","せ","そ")) { $rval .= $r; } } if ($c eq "sy") { $d = shift @_; if ($r = vowel($d,"しゃ","syi?","しゅ","しぇ","しょ")) { $rval .= $r; } } if ($c eq "sh") { $d = shift @_; if ($r = vowel($d,"しゃ","し","しゅ","しぇ","しょ")) { $rval .= $r; } } if ($c eq "t") { $d = shift @_; if ($r = vowel($d,"た","ち","つ","て","と")) { $rval .= $r; } } if ($c eq "ts") { $d = shift @_; if ($r = vowel($d,"た","ち","つ","て","と")) { $rval .= $r; } } if ($c eq "ty") { $d = shift @_; if ($r = vowel($d,"ちゃ","tyi?","ちゅ","ちぇ","ちょ")) { $rval .= $r; } } if ($c eq "ch") { $d = shift @_; if ($r = vowel($d,"ちゃ","ち","ちゅ","ちぇ","ちょ")) { $rval .= $r; } } if ($c eq "n") { $d = shift @_; if ($r = vowel($d,"な","に","ぬ","ね","の")) { $rval .= $r; } } if ($c eq "ny") { $d = shift @_; if ($r = vowel($d,"にゃ","nyi?","にゅ","にぇ","にょ")) { $rval .= $r; } } if ($c eq "h") { $d = shift @_; if ($r = vowel($d,"は","ひ","ふ","へ","ほ")) { $rval .= $r; } } if ($c eq "hy") { $d = shift @_; if ($r = vowel($d,"ひゃ","hyi?","ひゅ","ひぇ","ひょ")) { $rval .= $r; } } if ($c eq "f") { $d = shift @_; if ($r = vowel($d,"は","ひ","ふ","へ","ほ")) { $rval .= $r; } } if ($c eq "m") { $d = shift @_; if ($r = vowel($d,"ま","み","む","め","も")) { $rval .= $r; } } if ($c eq "my") { $d = shift @_; if ($r = vowel($d,"みゃ","myi?","みゅ","みぇ","みょ")) { $rval .= $r; } } if ($c eq "y") { $d = shift @_; if ($r = vowel($d,"や","い","ゆ","え","よ")) { $rval .= $r; } } if ($c eq "r") { $d = shift @_; if ($r = vowel($d,"ら","り","る","れ","ろ")) { $rval .= $r; } } if ($c eq "ry") { $d = shift @_; if ($r = vowel($d,"りゃ","ryi?","りゅ","りぇ","りょ")) { $rval .= $r; } } if ($c eq "w") { $d = shift @_; if ($r = vowel($d,"わ","うぃ","wu?","うぇ","を")) { $rval .= $r; } } if ($c eq "g") { $d = shift @_; if ($r = vowel($d,"が","ぎ","ぐ","げ","ご")) { $rval .= $r; } } if ($c eq "gy") { $d = shift @_; if ($r = vowel($d,"ぎゃ","gyi?","ぎゅ","ぎぇ","ぎょ")) { $rval .= $r; } } if ($c eq "z") { $d = shift @_; if ($r = vowel($d,"ざ","じ","ず","ぜ","ぞ")) { $rval .= $r; } } if ($c eq "zy") { $d = shift @_; if ($r = vowel($d,"じゃ","zyi?","じゅ","じぇ","じょ")) { $rval .= $r; } } if ($c eq "j") { $d = shift @_; if ($r = vowel($d,"じゃ","じ","じゅ","じぇ","じょ")) { $rval .= $r; } } if ($c eq "d") { $d = shift @_; if ($r = vowel($d,"だ","ぢ","づ","で","ど")) { $rval .= $r; } } if ($c eq "dy") { $d = shift @_; if ($r = vowel($d,"ぢゃ","dyi?","ぢゅ","ぢぇ","ぢょ")) { $rval .= $r; } } if ($c eq "b") { $d = shift @_; if ($r = vowel($d,"ば","び","ぶ","べ","ぼ")) { $rval .= $r; } } if ($c eq "by") { $d = shift @_; if ($r = vowel($d,"びゃ","byi?","びゅ","びぇ","びょ")) { $rval .= $r; } } if ($c eq "p") { $d = shift @_; if ($r = vowel($d,"ぱ","ぴ","ぷ","ぺ","ぽ")) { $rval .= $r; } } if ($c eq "py") { $d = shift @_; if ($r = vowel($d,"ぴゃ","pyi?","ぴゅ","ぴぇ","ぴょ")) { $rval .= $r; } } if ($c eq "N") { $rval .= "ん" } if ($c eq "q") { $rval .= "っ" } if ($c eq "sp") { $rval .= '@sp' } if ($c eq "silB") { $rval .= '@silB' } if ($c eq "silE") { $rval .= '@silE' } if ($r = vowel($c,"あ","い","う","え","お")) { $rval .= $r; } } return $rval;}############################################################# メイン簇眶############################################################if (@ARGV == 0){ print STDERR << "EOF";gram2sapixml.pl by Takashi Sumiyoshi 2002usage: gram2sapixml.pl [basename] ... input files: <basename>.grammar (Julian grammar file) <basename>.voca (Julian voca file) output file: <basename>.xml (SAPI Grammar XML file in UTF-8 Format)This script uses the iconv command to convert the encoding.EOF exit;}my $removesps = 1; # sp, silB, silE を近くwhile(@ARGV){ my $filebase = shift @ARGV; my $grammarfile = $filebase . ".grammar"; my $vocafile = $filebase . ".voca"; my $sapixmlfile = $filebase . ".xml"; print STDERR "Processing $vocafile, $grammarfile...\n"; my $vocaword = ""; my %lexicon_disp; my %lexicon_yomi; my %grammar_left; my @input; my $disp; my $yomi; my $hiragana; ### ### load voca file ### open (VOCA, $vocafile) or die "Cannot open $vocafile"; while(<VOCA>) { chomp; next if /^#/; @input = split (/[ \t]+/, $_); if (/^\%/) { s/#.*$//; $vocaword = substr($_, 1); # 黎片の % を却く $vocaword =~ s/^[ \t]+//g; $vocaword =~ s/[ \t]+$//g; } else { $disp = shift @input; $disp = Jcode->new($disp)->euc; if ($disp ne "") { if ($removesps == 1 && ($disp eq "sp" || $disp eq "silB" || $disp eq "silE")) { } else { # 不燎山淡をかな矢机误に恃垂 $hiragana = convertphone(@input);# print "voca [$vocaword] in $disp,$hiragana\n"; # lexicon_disp, lexicon_yomi に呈羌 push @{$lexicon_disp{$vocaword}}, $disp; push @{$lexicon_yomi{$vocaword}}, $hiragana; } } } } close (VOCA); ### ### load grammar file ### open (GRAMMAR, $grammarfile) or die "Cannot open $grammarfile"; my $left; while(<GRAMMAR>) { chomp; next if /^#/; s/#.*$//; next if $_ eq ""; @input = split (/[ \t:]+/, $_); $left = shift @input; # grammar_left は芹误へのリファレンスの芹误を妥燎にもつ息鳞芹误 # MEMO: [@input] を \@input とかすると悸挛がすべて票じになりまずい push @{$grammar_left{$left}}, [@input]; } close (GRAMMAR); ### ### save sapixml file ### ### ### convert by iconv ### open (SAPIXML, "| $iconv > $sapixmlfile") or die "Cannot open $sapixmlfile or cannot exec iconv"; print SAPIXML "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; print SAPIXML "<GRAMMAR>\n"; # まずは grammar ファイルのル〖ル # RULEREF を事べる my $i; my $n; my $a; my @b; foreach $i (keys %grammar_left) { if ($i eq "S") { print SAPIXML "<RULE name=\"$i\" toplevel=\"ACTIVE\">\n"; } else { print SAPIXML "<RULE name=\"$i\" toplevel=\"INACTIVE\">\n"; } print SAPIXML " <L>\n"; while ($a = shift @{$grammar_left{$i}}) { print SAPIXML " <P>\n"; @b = @{$a}; while ($n = shift @b) { if ($removesps == 1 && ! exists $lexicon_disp{$n} && ! exists $grammar_left{$n}) {# print SAPIXML "# <RULEREF name=\"$n\"/>\n"; } else { print SAPIXML " <RULEREF name=\"$n\"/>\n"; } } print SAPIXML " </P>\n"; } print SAPIXML " </L>\n"; print SAPIXML "</RULE>\n"; } # そして voca ファイルのカテゴリ叹ⅹ帽胳 foreach $i (keys %lexicon_disp) { print SAPIXML "<RULE name=\"$i\" toplevel=\"INACTIVE\">\n"; print SAPIXML " <L>\n"; while ($disp = shift @{$lexicon_disp{$i}}) { $yomi = shift @{$lexicon_yomi{$i}}; if ($disp eq $yomi) { print SAPIXML " <P>$yomi</P>\n"; } else { print SAPIXML " <P>/$disp/$yomi;</P>\n"; } } print SAPIXML " </L>\n"; print SAPIXML "</RULE>\n"; } print SAPIXML "</GRAMMAR>\n";}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -