📄 affixmgr.cxx
字号:
{
if (! compound ) return NULL;
return mystrdup(compound);
}
// utility method to look up root words in hash table
struct hentry * AffixMgr::lookup(const char * word)
{
if (! pHMgr) return NULL;
return pHMgr->lookup(word);
}
// return nosplitsugs
bool AffixMgr::get_nosplitsugs(void)
{
return nosplitsugs;
}
/* parse in the try string */
int AffixMgr::parse_try(char * line)
{
if (trystring) {
fprintf(stderr,"error: duplicate TRY strings\n");
return 1;
}
char * tp = line;
char * piece;
int i = 0;
int np = 0;
while ((piece=mystrsep(&tp,' '))) {
if (*piece != '\0') {
switch(i) {
case 0: { np++; break; }
case 1: { trystring = mystrdup(piece); np++; break; }
default: break;
}
i++;
}
free(piece);
}
if (np != 2) {
fprintf(stderr,"error: missing TRY information\n");
return 1;
}
return 0;
}
/* parse in the name of the character set used by the .dict and .aff */
int AffixMgr::parse_set(char * line)
{
if (encoding) {
fprintf(stderr,"error: duplicate SET strings\n");
return 1;
}
char * tp = line;
char * piece;
int i = 0;
int np = 0;
while ((piece=mystrsep(&tp,' '))) {
if (*piece != '\0') {
switch(i) {
case 0: { np++; break; }
case 1: { encoding = mystrdup(piece); np++; break; }
default: break;
}
i++;
}
free(piece);
}
if (np != 2) {
fprintf(stderr,"error: missing SET information\n");
return 1;
}
return 0;
}
/* parse in the flag used by the controlled compound words */
int AffixMgr::parse_cpdflag(char * line)
{
if (compound) {
fprintf(stderr,"error: duplicate compound flags used\n");
return 1;
}
char * tp = line;
char * piece;
int i = 0;
int np = 0;
while ((piece=mystrsep(&tp,' '))) {
if (*piece != '\0') {
switch(i) {
case 0: { np++; break; }
case 1: { compound = mystrdup(piece); np++; break; }
default: break;
}
i++;
}
free(piece);
}
if (np != 2) {
fprintf(stderr,"error: missing compound flag information\n");
return 1;
}
return 0;
}
/* parse in the min compound word length */
int AffixMgr::parse_cpdmin(char * line)
{
char * tp = line;
char * piece;
int i = 0;
int np = 0;
while ((piece=mystrsep(&tp,' '))) {
if (*piece != '\0') {
switch(i) {
case 0: { np++; break; }
case 1: { cpdmin = atoi(piece); np++; break; }
default: break;
}
i++;
}
free(piece);
}
if (np != 2) {
fprintf(stderr,"error: missing compound min information\n");
return 1;
}
if ((cpdmin < 1) || (cpdmin > 50)) cpdmin = 3;
return 0;
}
/* parse in the typical fault correcting table */
int AffixMgr::parse_reptable(char * line, FILE * af)
{
if (numrep != 0) {
fprintf(stderr,"error: duplicate REP tables used\n");
return 1;
}
char * tp = line;
char * piece;
int i = 0;
int np = 0;
while ((piece=mystrsep(&tp,' '))) {
if (*piece != '\0') {
switch(i) {
case 0: { np++; break; }
case 1: {
numrep = atoi(piece);
if (numrep < 1) {
fprintf(stderr,"incorrect number of entries in replacement table\n");
free(piece);
return 1;
}
reptable = (replentry *) malloc(numrep * sizeof(struct replentry));
np++;
break;
}
default: break;
}
i++;
}
free(piece);
}
if (np != 2) {
fprintf(stderr,"error: missing replacement table information\n");
return 1;
}
/* now parse the numrep lines to read in the remainder of the table */
char * nl = line;
for (int j=0; j < numrep; j++) {
fgets(nl,MAXLNLEN,af);
mychomp(nl);
tp = nl;
i = 0;
reptable[j].pattern = NULL;
reptable[j].replacement = NULL;
while ((piece=mystrsep(&tp,' '))) {
if (*piece != '\0') {
switch(i) {
case 0: {
if (strncmp(piece,"REP",3) != 0) {
fprintf(stderr,"error: replacement table is corrupt\n");
free(piece);
return 1;
}
break;
}
case 1: { reptable[j].pattern = mystrdup(piece); break; }
case 2: { reptable[j].replacement = mystrdup(piece); break; }
default: break;
}
i++;
}
free(piece);
}
if ((!(reptable[j].pattern)) || (!(reptable[j].replacement))) {
fprintf(stderr,"error: replacement table is corrupt\n");
return 1;
}
}
return 0;
}
/* parse in the character map table */
int AffixMgr::parse_maptable(char * line, FILE * af)
{
if (nummap != 0) {
fprintf(stderr,"error: duplicate MAP tables used\n");
return 1;
}
char * tp = line;
char * piece;
int i = 0;
int np = 0;
while ((piece=mystrsep(&tp,' '))) {
if (*piece != '\0') {
switch(i) {
case 0: { np++; break; }
case 1: {
nummap = atoi(piece);
if (nummap < 1) {
fprintf(stderr,"incorrect number of entries in map table\n");
free(piece);
return 1;
}
maptable = (mapentry *) malloc(nummap * sizeof(struct mapentry));
np++;
break;
}
default: break;
}
i++;
}
free(piece);
}
if (np != 2) {
fprintf(stderr,"error: missing map table information\n");
return 1;
}
/* now parse the nummap lines to read in the remainder of the table */
char * nl = line;
for (int j=0; j < nummap; j++) {
fgets(nl,MAXLNLEN,af);
mychomp(nl);
tp = nl;
i = 0;
maptable[j].set = NULL;
maptable[j].len = 0;
while ((piece=mystrsep(&tp,' '))) {
if (*piece != '\0') {
switch(i) {
case 0: {
if (strncmp(piece,"MAP",3) != 0) {
fprintf(stderr,"error: map table is corrupt\n");
free(piece);
return 1;
}
break;
}
case 1: { maptable[j].set = mystrdup(piece);
maptable[j].len = strlen(maptable[j].set);
break; }
default: break;
}
i++;
}
free(piece);
}
if ((!(maptable[j].set)) || (!(maptable[j].len))) {
fprintf(stderr,"error: map table is corrupt\n");
return 1;
}
}
return 0;
}
int AffixMgr::parse_affix(char * line, const char at, FILE * af)
{
int numents = 0; // number of affentry structures to parse
char achar='\0'; // affix char identifier
short ff=0;
struct affentry * ptr= NULL;
struct affentry * nptr= NULL;
char * tp = line;
char * nl = line;
char * piece;
int i = 0;
// split affix header line into pieces
int np = 0;
while ((piece=mystrsep(&tp,' '))) {
if (*piece != '\0') {
switch(i) {
// piece 1 - is type of affix
case 0: { np++; break; }
// piece 2 - is affix char
case 1: { np++; achar = *piece; break; }
// piece 3 - is cross product indicator
case 2: { np++; if (*piece == 'Y') ff = XPRODUCT; break; }
// piece 4 - is number of affentries
case 3: {
np++;
numents = atoi(piece);
ptr = (struct affentry *) malloc(numents * sizeof(struct affentry));
ptr->xpflg = ff;
ptr->achar = achar;
break;
}
default: break;
}
i++;
}
free(piece);
}
// check to make sure we parsed enough pieces
if (np != 4) {
fprintf(stderr, "error: affix %c header has insufficient data in line %s\n",achar,nl);
free(ptr);
return 1;
}
// store away ptr to first affentry
nptr = ptr;
// now parse numents affentries for this affix
for (int j=0; j < numents; j++) {
fgets(nl,MAXLNLEN,af);
mychomp(nl);
tp = nl;
i = 0;
np = 0;
// split line into pieces
while ((piece=mystrsep(&tp,' '))) {
if (*piece != '\0') {
switch(i) {
// piece 1 - is type
case 0: {
np++;
if (nptr != ptr) nptr->xpflg = ptr->xpflg;
break;
}
// piece 2 - is affix char
case 1: {
np++;
if (*piece != achar) {
fprintf(stderr, "error: affix %c is corrupt near line %s\n",achar,nl);
fprintf(stderr, "error: possible incorrect count\n");
free(piece);
return 1;
}
if (nptr != ptr) nptr->achar = ptr->achar;
break;
}
// piece 3 - is string to strip or 0 for null
case 2: {
np++;
nptr->strip = mystrdup(piece);
nptr->stripl = strlen(nptr->strip);
if (strcmp(nptr->strip,"0") == 0) {
free(nptr->strip);
nptr->strip=mystrdup("");
nptr->stripl = 0;
}
break;
}
// piece 4 - is affix string or 0 for null
case 3: {
np++;
nptr->appnd = mystrdup(piece);
nptr->appndl = strlen(nptr->appnd);
if (strcmp(nptr->appnd,"0") == 0) {
free(nptr->appnd);
nptr->appnd=mystrdup("");
nptr->appndl = 0;
}
break;
}
// piece 5 - is the conditions descriptions
case 4: { np++; encodeit(nptr,piece); }
default: break;
}
i++;
}
free(piece);
}
// check to make sure we parsed enough pieces
if (np != 5) {
fprintf(stderr, "error: affix %c is corrupt near line %s\n",achar,nl);
free(ptr);
return 1;
}
nptr++;
}
// now create SfxEntry or PfxEntry objects and use links to
// build an ordered (sorted by affix string) list
nptr = ptr;
for (int k = 0; k < numents; k++) {
if (at == 'P') {
PfxEntry * pfxptr = new PfxEntry(this,nptr);
build_pfxtree((AffEntry *)pfxptr);
} else {
SfxEntry * sfxptr = new SfxEntry(this,nptr);
build_sfxtree((AffEntry *)sfxptr);
}
nptr++;
}
free(ptr);
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -