📄 affixmgr.cxx
字号:
}
// now clean up by adding smart search termination strings:
// if you are already a superset of the previous prefix
// but not a subset of the next, search can end here
// so set NextNE properly
ptr = (PfxEntry *) pStart[i];
for (; ptr != NULL; ptr = ptr->getNext()) {
PfxEntry * nptr = ptr->getNext();
PfxEntry * mptr = NULL;
for (; nptr != NULL; nptr = nptr->getNext()) {
if (! isSubset(ptr->getKey(),nptr->getKey())) break;
mptr = nptr;
}
if (mptr) mptr->setNextNE(NULL);
}
}
return 0;
}
// reinitialize the SfxEntry links NextEQ and NextNE to speed searching
// using the idea of leading subsets this time
int AffixMgr::process_sfx_order()
{
SfxEntry* ptr;
// loop through each prefix list starting point
for (int i=1; i < SETSIZE; i++) {
ptr = (SfxEntry *) sStart[i];
// look through the remainder of the list
// and find next entry with affix that
// the current one is not a subset of
// mark that as destination for NextNE
// use next in list that you are a subset
// of as NextEQ
for (; ptr != NULL; ptr = ptr->getNext()) {
SfxEntry * nptr = ptr->getNext();
for (; nptr != NULL; nptr = nptr->getNext()) {
if (! isSubset(ptr->getKey(),nptr->getKey())) break;
}
ptr->setNextNE(nptr);
ptr->setNextEQ(NULL);
if ((ptr->getNext()) && isSubset(ptr->getKey(),(ptr->getNext())->getKey()))
ptr->setNextEQ(ptr->getNext());
}
// now clean up by adding smart search termination strings:
// if you are already a superset of the previous suffix
// but not a subset of the next, search can end here
// so set NextNE properly
ptr = (SfxEntry *) sStart[i];
for (; ptr != NULL; ptr = ptr->getNext()) {
SfxEntry * nptr = ptr->getNext();
SfxEntry * mptr = NULL;
for (; nptr != NULL; nptr = nptr->getNext()) {
if (! isSubset(ptr->getKey(),nptr->getKey())) break;
mptr = nptr;
}
if (mptr) mptr->setNextNE(NULL);
}
}
return 0;
}
// takes aff file condition string and creates the
// conds array - please see the appendix at the end of the
// file affentry.cxx which describes what is going on here
// in much more detail
void AffixMgr::encodeit(struct affentry * ptr, char * cs)
{
unsigned char c;
int i, j, k;
unsigned char mbr[MAXLNLEN];
// now clear the conditions array */
for (i=0;i<SETSIZE;i++) ptr->conds[i] = (unsigned char) 0;
// now parse the string to create the conds array */
int nc = strlen(cs);
int neg = 0; // complement indicator
int grp = 0; // group indicator
int n = 0; // number of conditions
int ec = 0; // end condition indicator
int nm = 0; // number of member in group
// if no condition just return
if (strcmp(cs,".")==0) {
ptr->numconds = 0;
return;
}
i = 0;
while (i < nc) {
c = *((unsigned char *)(cs + i));
// start group indicator
if (c == '[') {
grp = 1;
c = 0;
}
// complement flag
if ((grp == 1) && (c == '^')) {
neg = 1;
c = 0;
}
// end goup indicator
if (c == ']') {
ec = 1;
c = 0;
}
// add character of group to list
if ((grp == 1) && (c != 0)) {
*(mbr + nm) = c;
nm++;
c = 0;
}
// end of condition
if (c != 0) {
ec = 1;
}
if (ec) {
if (grp == 1) {
if (neg == 0) {
// set the proper bits in the condition array vals for those chars
for (j=0;j<nm;j++) {
k = (unsigned int) mbr[j];
ptr->conds[k] = ptr->conds[k] | (1 << n);
}
} else {
// complement so set all of them and then unset indicated ones
for (j=0;j<SETSIZE;j++) ptr->conds[j] = ptr->conds[j] | (1 << n);
for (j=0;j<nm;j++) {
k = (unsigned int) mbr[j];
ptr->conds[k] = ptr->conds[k] & ~(1 << n);
}
}
neg = 0;
grp = 0;
nm = 0;
} else {
// not a group so just set the proper bit for this char
// but first handle special case of . inside condition
if (c == '.') {
// wild card character so set them all
for (j=0;j<SETSIZE;j++) ptr->conds[j] = ptr->conds[j] | (1 << n);
} else {
ptr->conds[(unsigned int) c] = ptr->conds[(unsigned int)c] | (1 << n);
}
}
n++;
ec = 0;
}
i++;
}
ptr->numconds = n;
return;
}
// check word for prefixes
struct hentry * AffixMgr::prefix_check (const char * word, int len)
{
struct hentry * rv= NULL;
// first handle the special case of 0 length prefixes
PfxEntry * pe = (PfxEntry *) pStart[0];
while (pe) {
rv = pe->check(word,len);
if (rv) return rv;
pe = pe->getNext();
}
// now handle the general case
unsigned char sp = *((const unsigned char *)word);
PfxEntry * pptr = (PfxEntry *)pStart[sp];
while (pptr) {
if (isSubset(pptr->getKey(),word)) {
rv = pptr->check(word,len);
if (rv) return rv;
pptr = pptr->getNextEQ();
} else {
pptr = pptr->getNextNE();
}
}
return NULL;
}
// check if compound word is correctly spelled
struct hentry * AffixMgr::compound_check (const char * word, int len, char compound_flag)
{
int i;
struct hentry * rv= NULL;
char * st;
char ch;
// handle case of string too short to be a piece of a compound word
if (len < cpdmin) return NULL;
st = mystrdup(word);
for (i=cpdmin; i < (len - (cpdmin-1)); i++) {
ch = st[i];
st[i] = '\0';
rv = lookup(st);
if (!rv) rv = affix_check(st,i);
if ((rv) && (TESTAFF(rv->astr, compound_flag, rv->alen))) {
rv = lookup((word+i));
if ((rv) && (TESTAFF(rv->astr, compound_flag, rv->alen))) {
free(st);
return rv;
}
rv = affix_check((word+i),strlen(word+i));
if ((rv) && (TESTAFF(rv->astr, compound_flag, rv->alen))) {
free(st);
return rv;
}
rv = compound_check((word+i),strlen(word+i),compound_flag);
if (rv) {
free(st);
return rv;
}
}
st[i] = ch;
}
free(st);
return NULL;
}
// check word for suffixes
struct hentry * AffixMgr::suffix_check (const char * word, int len,
int sfxopts, AffEntry * ppfx)
{
struct hentry * rv = NULL;
// first handle the special case of 0 length suffixes
SfxEntry * se = (SfxEntry *) sStart[0];
while (se) {
rv = se->check(word,len, sfxopts, ppfx);
if (rv) return rv;
se = se->getNext();
}
// now handle the general case
unsigned char sp = *((const unsigned char *)(word + len - 1));
SfxEntry * sptr = (SfxEntry *) sStart[sp];
while (sptr) {
if (isRevSubset(sptr->getKey(),(word+len-1), len)) {
rv = sptr->check(word,len, sfxopts, ppfx);
if (rv) {
return rv;
}
sptr = sptr->getNextEQ();
} else {
sptr = sptr->getNextNE();
}
}
return NULL;
}
// check if word with affixes is correctly spelled
struct hentry * AffixMgr::affix_check (const char * word, int len)
{
struct hentry * rv= NULL;
// check all prefixes (also crossed with suffixes if allowed)
rv = prefix_check(word, len);
if (rv) return rv;
// if still not found check all suffixes
rv = suffix_check(word, len, 0, NULL);
return rv;
}
int AffixMgr::expand_rootword(struct guessword * wlst, int maxn,
const char * ts, int wl, const char * ap, int al)
{
int nh=0;
// first add root word to list
if (nh < maxn) {
wlst[nh].word = mystrdup(ts);
wlst[nh].allow = (1 == 0);
nh++;
}
// handle suffixes
for (int i = 0; i < al; i++) {
unsigned char c = (unsigned char) ap[i];
SfxEntry * sptr = (SfxEntry *)sFlag[c];
while (sptr) {
char * newword = sptr->add(ts, wl);
if (newword) {
if (nh < maxn) {
wlst[nh].word = newword;
wlst[nh].allow = sptr->allowCross();
nh++;
} else {
free(newword);
}
}
sptr = (SfxEntry *)sptr ->getFlgNxt();
}
}
int n = nh;
// handle cross products of prefixes and suffixes
for (int j=1;j<n ;j++)
if (wlst[j].allow) {
for (int k = 0; k < al; k++) {
unsigned char c = (unsigned char) ap[k];
PfxEntry * cptr = (PfxEntry *) pFlag[c];
while (cptr) {
if (cptr->allowCross()) {
int l1 = strlen(wlst[j].word);
char * newword = cptr->add(wlst[j].word, l1);
if (newword) {
if (nh < maxn) {
wlst[nh].word = newword;
wlst[nh].allow = cptr->allowCross();
nh++;
} else {
free(newword);
}
}
}
cptr = (PfxEntry *)cptr ->getFlgNxt();
}
}
}
// now handle pure prefixes
for (int m = 0; m < al; m ++) {
unsigned char c = (unsigned char) ap[m];
PfxEntry * ptr = (PfxEntry *) pFlag[c];
while (ptr) {
char * newword = ptr->add(ts, wl);
if (newword) {
if (nh < maxn) {
wlst[nh].word = newword;
wlst[nh].allow = ptr->allowCross();
nh++;
} else {
free(newword);
}
}
ptr = (PfxEntry *)ptr ->getFlgNxt();
}
}
return nh;
}
// return length of replacing table
int AffixMgr::get_numrep()
{
return numrep;
}
// return replacing table
struct replentry * AffixMgr::get_reptable()
{
if (! reptable ) return NULL;
return reptable;
}
// return length of character map table
int AffixMgr::get_nummap()
{
return nummap;
}
// return character map table
struct mapentry * AffixMgr::get_maptable()
{
if (! maptable ) return NULL;
return maptable;
}
// return text encoding of dictionary
char * AffixMgr::get_encoding()
{
if (! encoding ) {
encoding = mystrdup("ISO8859-1");
}
return mystrdup(encoding);
}
// return the preferred try string for suggestions
char * AffixMgr::get_try_string()
{
if (! trystring ) return NULL;
return mystrdup(trystring);
}
// return the compound words control flag
char * AffixMgr::get_compound()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -