📄 booleanscorer.pm
字号:
package KinoSearch::Search::BooleanScorer;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Search::Scorer );BEGIN { __PACKAGE__->init_instance_vars() }our %instance_vars;sub new { my $self = shift->SUPER::new; confess kerror() unless verify_args( \%instance_vars, @_ ); my %args = ( %instance_vars, @_ ); $self->set_similarity( $args{similarity} ); $self->_init_child; return $self;}1;__END____XS__MODULE = KinoSearch PACKAGE = KinoSearch::Search::BooleanScorervoid_init_child(scorer) Scorer *scorer;PPCODE: Kino_BoolScorer_init_child(scorer);=for commentAdd a scorer for a sub-query of the BooleanQuery.=cutvoid add_subscorer(scorer, subscorer_sv, occur) Scorer *scorer; SV *subscorer_sv; char *occur;PREINIT: BoolScorerChild* child; Scorer *subscorer; SV *subscorer_sv_copy;PPCODE: child = (BoolScorerChild*)scorer->child; Kino_extract_struct(subscorer_sv, subscorer, Scorer*, "KinoSearch::Search::Scorer"); subscorer_sv_copy = newSVsv(subscorer_sv); av_push(child->subscorers_av, subscorer_sv_copy); Kino_BoolScorer_add_subscorer(scorer, subscorer, occur);SV*_boolean_scorer_set_or_get(scorer, ...) Scorer* scorer;ALIAS: _get_subscorer_storage = 2CODE:{ BoolScorerChild* child = (BoolScorerChild*)scorer->child; KINO_START_SET_OR_GET_SWITCH case 2: RETVAL = newRV((SV*)child->subscorers_av); break; KINO_END_SET_OR_GET_SWITCH}OUTPUT: RETVALvoidDESTROY(scorer) Scorer *scorer;PPCODE: Kino_BoolScorer_destroy(scorer);__H__#ifndef H_KINO_BOOLEAN_SCORER#define H_KINO_BOOLEAN_SCORER 1#include "EXTERN.h"#include "perl.h"#include "XSUB.h"#include "KinoSearchSearchScorer.h"#include "KinoSearchUtilMemManager.h"#define KINO_MATCH_BATCH_SIZE (1 << 11)#define KINO_MATCH_BATCH_DOC_MASK (KINO_MATCH_BATCH_SIZE - 1)/* A MatchBatch can hold scoring data for 2048 documents. */typedef struct matchbatch { U32 count; float *scores; U32 *matcher_counts; U32 *bool_masks; U32 *recent_docs;} MatchBatch;typedef struct boolsubscorer { Scorer *scorer; U32 bool_mask; bool done; struct boolsubscorer *next_subscorer;} BoolSubScorer;typedef struct boolscorerchild { U32 doc; U32 end; U32 max_coord; float *coord_factors; U32 required_mask; U32 prohibited_mask; U32 next_mask; MatchBatch *mbatch; BoolSubScorer *subscorers; /* linked list */ AV *subscorers_av;} BoolScorerChild;void Kino_BoolScorer_init_child(Scorer*);MatchBatch* Kino_BoolScorer_new_mbatch();void Kino_BoolScorer_clear_mbatch(MatchBatch*);void Kino_BoolScorer_compute_coord_factors(Scorer*);void Kino_BoolScorer_add_subscorer(Scorer*, Scorer*, char*);bool Kino_BoolScorer_next(Scorer*);float Kino_BoolScorer_score(Scorer*);U32 Kino_BoolScorer_doc(Scorer*);void Kino_BoolScorer_destroy(Scorer*);#endif /* include guard */__C__#include "KinoSearchSearchBooleanScorer.h"voidKino_BoolScorer_init_child(Scorer *scorer) { BoolScorerChild *child; Kino_New(0, child, 1, BoolScorerChild); scorer->child = child; /* define Scorer's abstract methods */ scorer->next = Kino_BoolScorer_next; scorer->doc = Kino_BoolScorer_doc; scorer->score = Kino_BoolScorer_score; /* init */ child->end = 0; child->max_coord = 1; child->coord_factors = NULL; child->required_mask = 0; child->prohibited_mask = 0; child->next_mask = 1; child->mbatch = Kino_BoolScorer_new_mbatch(); child->subscorers = NULL; child->subscorers_av = newAV();}MatchBatch*Kino_BoolScorer_new_mbatch() { MatchBatch* mbatch; /* allocate and init */ Kino_New(0, mbatch, 1, MatchBatch); Kino_New(0, mbatch->scores, KINO_MATCH_BATCH_SIZE, float); Kino_New(0, mbatch->matcher_counts, KINO_MATCH_BATCH_SIZE, U32); Kino_New(0, mbatch->bool_masks, KINO_MATCH_BATCH_SIZE, U32); Kino_New(0, mbatch->recent_docs, KINO_MATCH_BATCH_SIZE, U32); mbatch->count = 0; return mbatch;}/* Return a MatchBatch to a "zeroed" state. Only the matcher_counts and the * count are actually cleared; the rest get initialized the next time a doc * gets captured. */voidKino_BoolScorer_clear_mbatch(MatchBatch *mbatch) { Zero(mbatch->matcher_counts, KINO_MATCH_BATCH_SIZE, U32); mbatch->count = 0;}/* BooleanScorers award bonus points to documents which match multiple * subqueries. This routine calculates the size of the bonuses. */voidKino_BoolScorer_compute_coord_factors(Scorer *scorer) { BoolScorerChild *child; float *coord_factors; U32 i; child = (BoolScorerChild*)scorer->child; Kino_New(0, child->coord_factors, (child->max_coord + 1), float); coord_factors = child->coord_factors; for (i = 0; i <= child->max_coord; i++) { *coord_factors++ = scorer->sim->coord(scorer->sim, i, child->max_coord); }}voidKino_BoolScorer_add_subscorer(Scorer* main_scorer, Scorer* subscorer, char *occur) { BoolScorerChild *child; BoolSubScorer *bool_subscorer; child = (BoolScorerChild*)main_scorer->child; Kino_New(0, bool_subscorer, 1, BoolSubScorer); bool_subscorer->scorer = subscorer; /* if this scorer is required or negated, assign it a unique mask bit. */ if (strnEQ(occur, "SHOULD", 6)) { bool_subscorer->bool_mask = 0; child->max_coord++; } else { if (child->next_mask == 0) { Kino_confess("more than 32 required or prohibited clauses"); } bool_subscorer->bool_mask = child->next_mask; child->next_mask <<= 1; if (strnEQ(occur, "MUST_NOT", 8)) { child->prohibited_mask |= bool_subscorer->bool_mask; } else { /* "MUST" occur */ child->max_coord++; child->required_mask |= bool_subscorer->bool_mask; } } /* prime the pump */ bool_subscorer->done = !subscorer->next(subscorer); /* link up the linked list of subscorers */ bool_subscorer->next_subscorer = child->subscorers; child->subscorers = bool_subscorer;}boolKino_BoolScorer_next(Scorer* scorer) { BoolScorerChild *child; MatchBatch *mbatch; bool more; U32 doc; U32 masked_doc; U32 bool_mask; BoolSubScorer *sub; child = (BoolScorerChild*)scorer->child; mbatch = child->mbatch; do { while (mbatch->count-- > 0) { /* check to see if the doc is prohibited */ doc = mbatch->recent_docs[ mbatch->count ]; masked_doc = doc & KINO_MATCH_BATCH_DOC_MASK; bool_mask = mbatch->bool_masks[masked_doc]; if ( (bool_mask & child->prohibited_mask) == 0 && (bool_mask & child->required_mask) == child->required_mask ) { /* it's not prohibited, so next() was successful */ child->doc = doc; return 1; } } /* refill the queue, processing all docs within the next range */ Kino_BoolScorer_clear_mbatch(mbatch); more = 0; child->end += KINO_MATCH_BATCH_SIZE; /* iterate through subscorers, caching results to the MatchBatch */ for (sub = child->subscorers; sub != NULL; sub = sub->next_subscorer) { Scorer *scorer = sub->scorer; while (!sub->done && scorer->doc(scorer) < child->end) { doc = scorer->doc(scorer); masked_doc = doc & KINO_MATCH_BATCH_DOC_MASK; if (mbatch->matcher_counts[masked_doc] == 0) { /* first scorer to hit this doc */ mbatch->recent_docs[mbatch->count] = doc; mbatch->count++; mbatch->matcher_counts[masked_doc] = 1; mbatch->scores[masked_doc] = scorer->score(scorer); mbatch->bool_masks[masked_doc] = sub->bool_mask; } else { mbatch->matcher_counts[masked_doc]++; mbatch->scores[masked_doc] += scorer->score(scorer); mbatch->bool_masks[masked_doc] |= sub->bool_mask; } /* check whether this scorer is exhausted */ sub->done = !scorer->next(scorer); } /* if at least one scorer succeeded, loop back */ if (!sub->done) { more = 1; } } } while (mbatch->count > 0 || more); /* out of docs! we're done. */ return 0;}floatKino_BoolScorer_score(Scorer* scorer){ BoolScorerChild *child = (BoolScorerChild*)scorer->child; MatchBatch *mbatch = child->mbatch; U32 masked_doc; float score; if (child->coord_factors == NULL) { Kino_BoolScorer_compute_coord_factors(scorer); } /* retrieve the docs accumulated score from the MatchBatch */ masked_doc = child->doc & KINO_MATCH_BATCH_DOC_MASK; score = mbatch->scores[masked_doc]; /* assign bonus for multi-subscorer matches */ score *= child->coord_factors[ mbatch->matcher_counts[masked_doc] ]; return score;}U32Kino_BoolScorer_doc(Scorer* scorer) { BoolScorerChild *child = (BoolScorerChild*)scorer->child; return child->doc;}voidKino_BoolScorer_destroy(Scorer * scorer) { BoolSubScorer *sub, *next_sub; BoolScorerChild *child; child = (BoolScorerChild*)scorer->child; if (child->mbatch != NULL) { Kino_Safefree(child->mbatch->scores); Kino_Safefree(child->mbatch->matcher_counts); Kino_Safefree(child->mbatch->bool_masks); Kino_Safefree(child->mbatch->recent_docs); Kino_Safefree(child->mbatch); } sub = child->subscorers; while (sub != NULL) { next_sub = sub->next_subscorer; Kino_Safefree(sub); sub = next_sub; /* individual scorers will be GC'd on their own by Perl */ } Kino_Safefree(child->coord_factors); SvREFCNT_dec((SV*)child->subscorers_av); Kino_Safefree(child); Kino_Scorer_destroy(scorer);}__POD__=begin devdocs=head1 NAMEKinoSearch::Search::BooleanScorer - scorer for BooleanQuery=head1 DESCRIPTION Implementation of Scorer for BooleanQuery.=head1 COPYRIGHTCopyright 2005-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=end devdocs=cut
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -