📄 zvrank.c
字号:
} /* else: tfs==0 && ds->terms[i].wt==0 */ return;}static void norm_max(void *rsi, void *dsi) { DS ds=(DS)dsi; int i, veclen; double tfm=0.0; /**/ veclen=ds->veclen; for (i=0; i < veclen; i++) { ds->terms[i].wt=ds->terms[i].tf*ds->terms[i].idf; if (ds->terms[i].wt > tfm) tfm=ds->terms[i].wt; } if (tfm > 0.0) for (i=0; i < veclen; i++) { ds->terms[i].wt=ds->terms[i].wt/tfm; } /* else: tfs==0 && ds->terms[i].wt==0 */ return;}/* add: norm_pivot, ... */static double sim_cosine(void *dsi1, void *dsi2) { DS ds1=(DS)dsi1; DS ds2=(DS)dsi2; int i, veclen; double smul=0.0, sdiv=0.0, sqr11=0.0, sqr22=0.0; double v1, v2; /**/ veclen=ds1->veclen; /* and ds2->veclen */ for (i=0; i < veclen; i++) { v1=ds1->terms[i].wt; v2=ds2->terms[i].wt; smul +=(v1*v2); sqr11+=(v1*v1); sqr22+=(v2*v2); } sdiv=sqrt(sqr11*sqr22); if (sdiv==0.0) return 0.0; return (smul/sdiv);}/* add: norm_jaccard, norm_dice, ... *//* end weighting functions *//* *** */static void zv_init_scheme(RS rs, const char *sname) { int slen; char c0, c1, c2, c3, c4, c5, c6; const char *def_rscheme="ntc-atn"; /* a good default */ /**/ yaz_log(LOG_DEBUG, "zv_init_scheme"); slen=strlen(sname); if (slen < 7) yaz_log(LOG_LOG, "zvrank: invalid weighting-scheme \"%s\"", sname); if (slen > 0) c0=sname[0]; else c0=def_rscheme[0]; if (slen > 1) c1=sname[1]; else c1=def_rscheme[1]; if (slen > 2) c2=sname[2]; else c2=def_rscheme[2]; c3='-'; if (slen > 4) c4=sname[4]; else c4=def_rscheme[4]; if (slen > 5) c5=sname[5]; else c5=def_rscheme[5]; if (slen > 6) c6=sname[6]; else c6=def_rscheme[6]; /**/ /* assign doc functions */ switch (c0) { case 'b': rs->d_tf_fct=tf_binary; rs->rscheme[0]='b'; break; case 'm': rs->d_tf_fct=tf_max_norm; rs->rscheme[0]='m'; yaz_log(LOG_DEBUG, "tf_max_norm: d_f_max required"); break; case 'a': rs->d_tf_fct=tf_aug_norm; rs->rscheme[0]='a'; yaz_log(LOG_DEBUG, "tf_aug_norm: d_f_max required"); break; case 's': rs->d_tf_fct=tf_square; rs->rscheme[0]='s'; break; case 'l': rs->d_tf_fct=tf_log; rs->rscheme[0]='l'; break; default: /* 'n' */ rs->d_tf_fct=tf_none; rs->rscheme[0]='n'; } switch (c1) { case 't': rs->d_idf_fct=idf_tfidf; rs->rscheme[1]='t'; yaz_log(LOG_DEBUG, "idf_tfidf: db_docs required"); break; case 'p': rs->d_idf_fct=idf_prob; rs->rscheme[1]='p'; yaz_log(LOG_DEBUG, "idf_prob: db_docs required"); break; case 'f': rs->d_idf_fct=idf_freq; rs->rscheme[1]='f'; yaz_log(LOG_DEBUG, "idf_freq: db_docs required"); break; case 's': rs->d_idf_fct=idf_squared; rs->rscheme[1]='s'; yaz_log(LOG_DEBUG, "idf_squared: db_docs required"); break; default: /* 'n' */ rs->d_idf_fct=idf_none; rs->rscheme[1]='n'; } switch (c2) { case 's': rs->d_norm_fct=norm_sum; rs->rscheme[2]='s'; break; case 'c': rs->d_norm_fct=norm_cosine; rs->rscheme[2]='c'; break; case 'f': rs->d_norm_fct=norm_fourth; rs->rscheme[2]='t'; break; case 'm': rs->d_norm_fct=norm_max; rs->rscheme[2]='m'; break; default: /* 'n' */ rs->d_norm_fct=norm_none; rs->rscheme[2]='n'; } /**/ rs->rscheme[3]='-'; /* assign query functions */ switch (c4) { case 'b': rs->q_tf_fct=tf_binary; rs->rscheme[4]='b'; break; case 'm': rs->q_tf_fct=tf_max_norm; yaz_log(LOG_DEBUG, "tf_max_norm: d_f_max required"); rs->rscheme[4]='m'; break; case 'a': rs->q_tf_fct=tf_aug_norm; rs->rscheme[4]='a'; yaz_log(LOG_DEBUG, "tf_aug_norm: d_f_max required"); break; case 's': rs->q_tf_fct=tf_square; rs->rscheme[4]='s'; break; case 'l': rs->q_tf_fct=tf_log; rs->rscheme[4]='l'; break; default: /* 'n' */ rs->q_tf_fct=tf_none; rs->rscheme[4]='n'; } switch (c5) { case 't': rs->q_idf_fct=idf_tfidf; rs->rscheme[5]='t'; yaz_log(LOG_DEBUG, "idf_tfidf: db_docs required"); break; case 'p': rs->q_idf_fct=idf_prob; rs->rscheme[5]='p'; yaz_log(LOG_DEBUG, "idf_prob: db_docs required"); break; case 'f': rs->q_idf_fct=idf_freq; rs->rscheme[5]='f'; yaz_log(LOG_DEBUG, "idf_freq: db_docs required"); break; case 's': rs->q_idf_fct=idf_squared; rs->rscheme[5]='s'; yaz_log(LOG_DEBUG, "idf_squared: db_docs required"); break; default: /* 'n' */ rs->q_idf_fct=idf_none; rs->rscheme[5]='n'; } switch (c6) { case 's': rs->q_norm_fct=norm_sum; rs->rscheme[6]='s'; break; case 'c': rs->q_norm_fct=norm_cosine; rs->rscheme[6]='c'; break; case 'f': rs->q_norm_fct=norm_fourth; rs->rscheme[6]='f'; break; case 'm': rs->q_norm_fct=norm_max; rs->rscheme[6]='m'; break; default: /* 'n' */ rs->q_norm_fct=norm_none; rs->rscheme[6]='n'; } rs->rscheme[7]='\0'; /**/ rs->sim_fct=sim_cosine; yaz_log(LOG_DEBUG, "zv_scheme %s", rs->rscheme); return;}static void zv_init(RS rs, const char *rscheme) { yaz_log(LOG_DEBUG, "zv_init"); /**/ rs->db_docs=100000; /* assign correct value here */ rs->db_terms=500000; /* assign correct value here (for debugging) */ rs->db_f_max=50; /* assign correct value here */ rs->db_f_max_str="a"; /* assign correct value here (for debugging) */ zv_init_scheme(rs, rscheme); return;}/******//* * zv_create: Creates/Initialises this rank handler. This routine is * called exactly once. The routine returns the class_handle. */static void *zv_create (ZebraHandle zh) { int i; Res res = zh->res; const char *wscheme; struct rank_class_info *ci = (struct rank_class_info *) xmalloc (sizeof(*ci)); yaz_log(LOG_DEBUG, "zv_create"); wscheme=res_get(res, "zvrank.weighting-scheme"); for (i=0; (i < strlen(wscheme)) && (i < 8); i++) ci->rscheme[i]=wscheme[i]; return ci;}/* * zv_destroy: Destroys this rank handler. This routine is called * when the handler is no longer needed - i.e. when the server * dies. The class_handle was previously returned by create. */static void zv_destroy (struct zebra_register *reg, void *class_handle) { struct rank_class_info *ci = (struct rank_class_info *) class_handle; yaz_log(LOG_DEBUG, "zv_destroy"); xfree (ci);}/* * zv_begin: Prepares beginning of "real" ranking. Called once for * each result set. The returned handle is a "set handle" and * will be used in each of the handlers below. */static void *zv_begin(struct zebra_register *reg, void *class_handle, RSET rset){ struct rs_info *rs=(struct rs_info *)xmalloc(sizeof(*rs)); struct rank_class_info *ci=(struct rank_class_info *)class_handle; int i; int veclen, gocc; /**/ yaz_log(LOG_DEBUG, "zv_begin"); veclen=rset->no_rset_terms; /* smaller vector here */ zv_init(rs, ci->rscheme); rs->veclen=veclen; prn_rs(rs); rs->qdoc=(struct ds_info *)xmalloc(sizeof(*rs->qdoc)); rs->qdoc->terms=(struct ts_info *)xmalloc(sizeof(*rs->qdoc->terms)*rs->veclen); rs->qdoc->veclen=veclen; rs->qdoc->d_f_max=1; /* no duplicates */ rs->qdoc->d_f_max_str=""; rs->rdoc=(struct ds_info *)xmalloc(sizeof(*rs->rdoc)); rs->rdoc->terms=(struct ts_info *)xmalloc(sizeof(*rs->rdoc->terms)*rs->veclen); rs->rdoc->veclen=veclen; rs->rdoc->d_f_max=10; /* just a guess */ rs->rdoc->d_f_max_str=""; /* yaz_log(LOG_DEBUG, "zv_begin_init"); */ for (i = 0; i < rs->veclen; i++) { gocc=rset->rset_terms[i]->nn; /* yaz_log(LOG_DEBUG, "zv_begin_init i=%d gocc=%d", i, gocc); */ rs->qdoc->terms[i].gocc=gocc; rs->qdoc->terms[i].locc=1; /* assume query has no duplicate terms */ rs->rdoc->terms[i].gocc=gocc; rs->rdoc->terms[i].locc=0; } (*rs->q_tf_fct)(rs, rs->qdoc); /* we do this once only */ (*rs->q_idf_fct)(rs, rs->qdoc); (*rs->q_norm_fct)(rs, rs->qdoc); return rs;}/* * zv_end: Terminates ranking process. Called after a result set * has been ranked. */static void zv_end (struct zebra_register *reg, void *rsi){ RS rs=(RS)rsi; yaz_log(LOG_DEBUG, "zv_end"); xfree(rs->qdoc->terms); xfree(rs->rdoc->terms); xfree(rs->qdoc); xfree(rs->rdoc); xfree(rs); return;}/* * zv_add: Called for each word occurence in a result set. This routine * should be as fast as possible. This routine should "incrementally" * update the score. */static void zv_add (void *rsi, int seqno, int i) { RS rs=(RS)rsi; /* yaz_log(LOG_DEBUG, "zvrank zv_add seqno=%d term_index=%d", seqno, term_index);*/ rs->rdoc->terms[i].locc++;}/* * zv_calc: Called for each document in a result. This handler should * produce a score based on previous call(s) to the add handler. The * score should be between 0 and 1000. If score cannot be obtained * -1 should be returned. */static int zv_calc (void *rsi, int sysno){ int i, veclen; int score=0; double dscore=0.0; RS rs=(RS)rsi; /* yaz_log(LOG_DEBUG, "zv_calc"); */ /**/ veclen=rs->veclen; if (veclen==0) return -1; for (i = 0; i < veclen; i++) { /* qdoc weight has already been calculated */ (*rs->d_tf_fct)(rs, rs->rdoc); (*rs->d_idf_fct)(rs, rs->rdoc); (*rs->d_norm_fct)(rs, rs->rdoc); dscore=rs->sim_fct(rs->qdoc, rs->rdoc); } score = dscore * 1000; yaz_log (LOG_LOG, "sysno=%d score=%d", sysno, score); if (score > 1000) /* should not happen */ score = 1000; return score;}/* * Pseudo-meta code with sequence of calls as they occur in a * server. Handlers are prefixed by --: * * server init * -- create * foreach search * rank result set * -- begin * foreach record * foreach word * -- add * -- calc * -- end * -- destroy * server close */static struct rank_control rank_control_vsm = { "zvrank", zv_create, zv_destroy, zv_begin, zv_end, zv_calc, zv_add,}; struct rank_control *rankzv_class = &rank_control_vsm;/* EOF */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -