⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 porter_english.dct

📁 PostgreSQL7.4.6 for Linux
💻 DCT
📖 第 1 页 / 共 2 页
字号:
        case 'u':            if (ends(z, "ous", 3)) break; return;        case 'v':            if (ends(z, "ive", 3)) break; return;        case 'z':            if (ends(z, "ize", 3)) break; return;        default:            return;    }    if (m(z) > 1) z->k = z->j;}/* step_5(z) removes a final -e if m(z) > 1, and changes -ll to -l if   m(z) > 1.*/static void step_5(struct english_stemmer * z){   z->j = z->k;    if (z->p[z->k] == 'e')    {   int a = m(z);        if (a > 1 || (a == 1 && !cvc(z, z->k - 1))) z->k--;    }    if (z->p[z->k] == 'l' && doublec(z, z->k) && m(z) > 1) z->k--;}static const char * english_stem(void * z_, const char * q, int i0, int i1){    struct english_stemmer * z = (struct english_stemmer *) z_;    int p_size = z->p_size;    if (i1 - i0 + 50 > p_size)    {   free(z->p);        p_size = i1 - i0 + 75; /* ample */ z->p_size = p_size;        z->p = (char *) malloc(p_size);    }    memmove(z->p, q + i0, i1 - i0 + 1);    z->k = i1 - i0;    {   const char * t = search_pool(z->irregulars, z->k + 1, z->p);        if (t != 0)  {		z->k = strlen(t) - 1;			return t;	}    }    if (z->k > 1) /*-DEPARTURE-*/   /* With this line, strings of length 1 or 2 don't go through the      stemming process, although no mention is made of this in the      published algorithm. Remove the line to match the published      algorithm. */    {   step_1ab(z); step_1c(z);        step_2(z);        step_3(z);        step_4(z);        step_5(z);    }    z->p[z->k + 1] = 0; /* C string form for now */    return z->p;}/* -NEW-   This is a table of irregular forms. It is quite short, but still   reflects the errors actually drawn to Martin Porter's attention over   a 20 year period!   Extend it as necessary.   The form of the table is:     "p1" "s11/s12/s13/ ... /"     "p2" "s21/s22/s23/ ... /"     ...     "pn" "sn1/sn2/sn3/ ... /"     0, 0   String sij is mapped to paradigm form pi, and the main stemming   process is then bypassed.*/static const char * irregular_forms[] = {    "sky",     "sky/skies/",    "die",     "dying/",    "lie",     "lying/",    "tie",     "tying/",    "news",    "news/",    "inning",  "innings/inning/",    "outing",  "outings/outing/",    "canning", "cannings/canning/",    "howe",    "howe/",    /*-NEW-*/    "proceed", "proceed/",    "exceed",  "exceed/",    "succeed", "succeed/",  /* Hiranmay Ghosh */    0, 0  /* terminator */};/* * is_stopword part */typedef struct {	unsigned char	val;	unsigned char	flag;	unsigned char	right;	unsigned char	child;} ESWNODE;/* is exists left tree ? */#define L	0x01/* finish word flag */#define F	0x02#define ISLEFT(x)	(((ESWNODE*)x)->flag & L)#define ISFINISH(x)	(((ESWNODE*)x)->flag & F)static ESWNODE engstoptree[] = {	{'m',L,9,126},	{'d',L,4,71},	{'b',L,2,40},	{'a',F,0,14},	{'c',0,0,62},	{'f',L,2,79},	{'e',0,0,75},	{'h',0,1,90},	{'i',F,0,108},	{'t',L,4,177},	{'o',L,2,135},	{'n',0,0,131},	{'s',0,0,156},	{'v',L,2,210},	{'u',0,0,201},	{'w',0,1,211},	{'y',0,0,237},	{'m',L|F,5,0},	{'f',L,2,12},	{'b',0,0,7},	{'g',0,1,13},	{'l',0,0,17},	{'r',L,2,19},	{'n',F,0,16},	{'s',F,1,0},	{'t',F,0,0},	{'o',0,0,1},	{'u',0,1,2},	{'v',F,0,0},	{'t',F,0,0},	{'t',0,0,1},	{'e',0,0,1},	{'r',F,0,0},	{'a',0,0,1},	{'i',0,0,1},	{'n',F,0,1},	{'s',0,0,1},	{'t',F,0,0},	{'l',F,0,0},	{'d',F,1,0},	{'i',F,0,0},	{'e',F,0,0},	{'o',L,2,21},	{'e',F,0,3},	{'u',0,1,21},	{'y',F,0,0},	{'f',L,3,9},	{'c',0,1,4},	{'e',0,0,6},	{'l',0,1,8},	{'t',0,0,9},	{'a',0,0,1},	{'u',0,0,1},	{'s',F,0,0},	{'n',F,0,0},	{'o',0,0,1},	{'r',F,0,0},	{'o',0,0,1},	{'w',F,0,0},	{'w',0,0,1},	{'e',0,0,1},	{'e',0,0,1},	{'n',F,0,0},	{'t',0,0,1},	{'h',F,0,0},	{'t',F,0,0},	{'a',0,1,2},	{'o',0,0,2},	{'n',F,0,0},	{'u',0,0,1},	{'l',0,0,1},	{'d',F,0,0},	{'o',L|F,2,4},	{'i',0,0,2},	{'u',0,0,5},	{'d',F,0,0},	{'e',F,1,0},	{'w',0,0,1},	{'n',F,0,0},	{'r',0,0,1},	{'e',F,0,0},	{'a',0,0,1},	{'c',0,0,1},	{'h',F,0,0},	{'o',L,2,5},	{'e',0,0,3},	{'r',0,1,4},	{'u',0,0,5},	{'w',F,0,0},	{'r',F,0,0},	{'o',0,0,1},	{'m',F,0,0},	{'r',0,0,1},	{'t',0,0,1},	{'h',0,0,1},	{'e',0,0,1},	{'r',F,0,0},	{'e',L|F,2,7},	{'a',F,0,3},	{'i',F,1,11},	{'o',0,0,15},	{'d',F,1,0},	{'v',0,0,1},	{'e',F,0,0},	{'r',F,0,1},	{'e',F,1,0},	{'s',0,0,1},	{'e',0,0,1},	{'l',0,0,1},	{'f',F,0,0},	{'m',F,0,1},	{'s',0,0,1},	{'e',0,0,1},	{'l',0,0,1},	{'f',F,0,0},	{'w',F,0,0},	{'n',L|F,2,4},	{'f',F,0,0},	{'s',F,1,0},	{'t',F,0,3},	{'t',0,0,1},	{'o',F,0,0},	{'s',0,0,1},	{'e',0,0,1},	{'l',0,0,1},	{'f',F,0,0},	{'o',L,3,6},	{'a',0,1,4},	{'e',F,0,0},	{'u',0,1,7},	{'y',F,0,8},	{'y',F,0,0},	{'r',0,1,2},	{'s',0,0,2},	{'e',F,0,0},	{'t',F,0,0},	{'s',0,0,1},	{'t',F,0,0},	{'s',0,0,1},	{'e',0,0,1},	{'l',0,0,1},	{'f',F,0,0},	{'o',F,0,1},	{'r',F,1,0},	{'t',F,0,0},	{'t',L,4,11},	{'n',L|F,2,7},	{'f',F,0,5},	{'r',F,0,0},	{'v',L,2,16},	{'u',0,0,9},	{'w',0,0,16},	{'f',F,0,0},	{'c',F,1,0},	{'l',0,0,1},	{'i',F,0,0},	{'h',0,0,1},	{'e',0,0,1},	{'r',F,0,0},	{'r',F,1,2},	{'t',F,0,0},	{'s',0,0,1},	{'e',0,0,1},	{'l',0,0,1},	{'v',F,0,0},	{'e',0,0,1},	{'r',F,0,0},	{'n',F,0,0},	{'h',L,2,6},	{'a',0,0,3},	{'o',F,1,12},	{'u',0,0,13},	{'m',0,0,1},	{'e',F,0,0},	{'e',L|F,2,0},	{'a',0,0,2},	{'o',0,0,3},	{'l',0,0,1},	{'l',F,0,0},	{'u',0,0,1},	{'l',0,0,1},	{'d',F,0,0},	{'m',0,0,1},	{'e',F,0,0},	{'c',0,0,1},	{'h',F,0,0},	{'h',0,1,2},	{'o',F,0,27},	{'i',L|F,3,0},	{'a',0,1,4},	{'e',F,0,5},	{'o',0,1,17},	{'r',0,0,18},	{'n',F,1,0},	{'t',F,0,0},	{'n',L|F,3,0},	{'i',0,1,5},	{'m',F,0,5},	{'s',L,2,9},	{'r',0,0,7},	{'y',F,0,0},	{'r',F,0,0},	{'s',0,0,1},	{'e',0,0,1},	{'l',0,0,1},	{'v',F,0,0},	{'e',F,0,0},	{'e',F,0,0},	{'s',0,0,1},	{'e',F,0,0},	{'o',0,0,1},	{'u',0,0,1},	{'g',0,0,1},	{'h',F,0,0},	{'o',F,0,0},	{'n',0,1,2},	{'p',F,0,0},	{'d',0,1,2},	{'t',0,0,3},	{'e',0,0,1},	{'r',F,0,0},	{'i',0,0,1},	{'l',F,0,0},	{'e',0,0,1},	{'r',0,0,1},	{'i',F,0,0},	{'h',L,3,7},	{'a',F,1,0},	{'e',F,0,3},	{'i',0,1,17},	{'o',0,0,20},	{'r',0,0,1},	{'e',F,0,0},	{'e',L,2,5},	{'a',0,0,3},	{'i',F,1,6},	{'o',F,0,9},	{'t',F,0,0},	{'n',F,1,0},	{'r',0,0,1},	{'e',F,0,0},	{'c',0,1,2},	{'l',0,0,2},	{'h',F,0,0},	{'e',F,0,0},	{'m',F,0,0},	{'l',0,1,2},	{'t',0,0,2},	{'l',F,0,0},	{'h',F,0,0},	{'u',0,0,1},	{'l',0,0,1},	{'d',F,0,0},	{'o',0,0,1},	{'u',F,0,1},	{'r',F,0,1},	{'s',0,0,1},	{'e',0,0,1},	{'l',0,0,1},	{'f',F,1,0},	{'v',F,0,0}};static unsigned intfind_english_stopword( unsigned char *buf, int len ) {	ESWNODE    *ptr = engstoptree;	int     result = 0;	unsigned char *cur = buf;	while( cur - buf < len ) {		if ( ptr->val == *cur ) {			cur++;			if ( ISFINISH(ptr) ) result = cur - buf;			if ( ! ptr->child ) break;			ptr += ptr->child;		} else if ( ptr->val > *cur ) {			if ( ISLEFT(ptr) )				ptr++;			else				break;		} else {			if ( ptr->right ) 				ptr += ptr->right;			else				break;		}	}	return result;} #undef L#undef F#undef ISLEFT#undef ISFINISHstatic intis_stopengword(void* obj,char* word,int len) {	return ( len == find_english_stopword((unsigned char*)word, len) ) ? 1 : 0;}static void * setup_english_stemmer(){    struct english_stemmer * z = (struct english_stemmer *) malloc(sizeof(struct english_stemmer));    z->p = 0; z->p_size = 0;    z->irregulars = create_pool(irregular_forms);    return (void *) z;}static void closedown_english_stemmer(void * z_){    struct english_stemmer * z = (struct english_stemmer *) z_;    free_pool(z->irregulars);    free(z->p);    free(z);}static char*engstemming(void* obj, char *word, int *len) {	struct english_stemmer * z = (struct english_stemmer *) obj;	const char* stemmed_word;	char *result = word;	while(result-word < *len) {		*result = tolower((unsigned char) *result);		result++;	}	stemmed_word = english_stem(obj, word, 0, *len-1);	*len = z->k + 1;	result = (char*)palloc( *len );	memcpy((void*)result, (void*)stemmed_word, *len);	return result;}#endif /* DICT_BODY */#ifdef DICT_TABLETABLE_DICT_START	"C",	setup_english_stemmer,	closedown_english_stemmer,	engstemming,	NULL,	is_stopengwordTABLE_DICT_END#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -