📄 blast_lookup.c
字号:
/* compute the index value */ _ComputeIndexIncremental(lookup->wordsize,lookup->charsize,lookup->mask, s, &index); /* if there are hits... */ if (PV_TEST(lookup, index)) { numhits = lookup->thick_backbone[index].num_used; ASSERT(numhits != 0); /* ...and there is enough space in the destination array, */ if ( numhits <= (array_size - totalhits) ) /* ...then copy the hits to the destination */ { Int4* src; Int4 i; if ( numhits <= HITS_ON_BACKBONE ) /* hits live in thick_backbone */ src = lookup->thick_backbone[index].payload.entries; else /* hits live in overflow array */ src = & (lookup->overflow [ lookup->thick_backbone[index].payload.overflow_cursor ] ); /* copy the hits. */ for(i=0;i<numhits;i++) { query_offsets[i + totalhits] = src[i]; subject_offsets[i + totalhits] = s - subject->sequence; } totalhits += numhits; } else /* not enough space in the destination array; return early */ { break; } } else /* no hits found */ { } } /* if we get here, we fell off the end of the sequence */ *offset = s - subject->sequence; return totalhits;}Int4 BlastRPSScanSubject(const LookupTableWrap* lookup_wrap, const BLAST_SequenceBlk *sequence, Int4* offset, Uint4 * table_offsets, Uint4 * sequence_offsets, Int4 array_size ){ Int4 index=0; Int4 table_correction; Uint1* s=NULL; Uint1* s_first=NULL; Uint1* s_last=NULL; Int4 numhits = 0; /* number of hits found for a given subject offset */ Int4 totalhits = 0; /* cumulative number of hits found */ RPSLookupTable* lookup = (RPSLookupTable *)lookup_wrap->lut; RPSBackboneCell *cell; s_first = sequence->sequence + *offset; s_last = sequence->sequence + sequence->length - lookup->wordsize; /* Calling code expects the returned sequence offsets to refer to the *first letter* in a word. The legacy RPS blast lookup table stores offsets to the *last* letter in each word, and so a correction is needed */ table_correction = lookup->wordsize - 1; _ComputeIndex(lookup->wordsize - 1, /* prime the index */ lookup->charsize, lookup->mask, s_first, &index); for(s=s_first; s <= s_last; s++) { /* compute the index value */ _ComputeIndexIncremental(lookup->wordsize,lookup->charsize,lookup->mask, s, &index); /* if there are hits... */ if (PV_TEST(lookup, index)) { cell = &lookup->rps_backbone[index]; numhits = cell->num_used; ASSERT(numhits != 0); if ( numhits <= (array_size - totalhits) ) { Int4* src; Int4 i; if ( numhits <= RPS_HITS_PER_CELL ) { /* hits live in thick_backbone */ for(i=0;i<numhits;i++) { table_offsets[i + totalhits] = cell->entries[i] - table_correction; sequence_offsets[i + totalhits] = s - sequence->sequence; } } else { /* hits (past the first) live in overflow array */ src = lookup->overflow + (cell->entries[1] / sizeof(Int4)); table_offsets[totalhits] = cell->entries[0] - table_correction; sequence_offsets[totalhits] = s - sequence->sequence; for(i=0;i<(numhits-1);i++) { table_offsets[i+totalhits+1] = src[i] - table_correction; sequence_offsets[i+totalhits+1] = s - sequence->sequence; } } totalhits += numhits; } else /* not enough space in the destination array; return early */ { break; } } else /* no hits found */ { } } /* if we get here, we fell off the end of the sequence */ *offset = s - sequence->sequence; return totalhits;}Int4 BlastAaLookupIndexQueries(LookupTable* lookup, Int4 ** matrix, BLAST_SequenceBlk* query, ListNode* locations, Int4 num_queries){ Int4 i; /* create neighbor array */ if (lookup->threshold>0) { MakeAllWordSequence(lookup); } /* index queries */ for(i=0;i<num_queries;i++) { _BlastAaLookupIndexQuery(lookup, matrix, (lookup->use_pssm == TRUE) ? NULL : &(query[i]), &(locations[i]), 0); } /* free neighbor array*/ if (lookup->neighbors != NULL) sfree(lookup->neighbors); return 0;}Int4 _BlastAaLookupIndexQuery(LookupTable* lookup, Int4 ** matrix, BLAST_SequenceBlk* query, ListNode* location, Int4 query_bias){ ListNode* loc; Int4 from, to; Int4 w; for(loc=location; loc; loc=loc->next) { from = ((SSeqRange*) loc->ptr)->left; to = ((SSeqRange*) loc->ptr)->right - lookup->wordsize + 1; for(w=from;w<=to;w++) { AddNeighboringWords(lookup, matrix, query, w, query_bias); } } return 0;}Int4 MakeAllWordSequence(LookupTable* lookup){ Int4 k,n; Int4 i; Int4 len; k=lookup->alphabet_size; n=lookup->wordsize; /* need k^n bytes for the de Bruijn sequence and n-1 to unwrap it. */ len = iexp(k,n) + (n-1); lookup->neighbors_length = len; lookup->neighbors = (Uint1*) malloc( len ); ASSERT(lookup->neighbors != NULL); /* generate the de Bruijn sequence */ debruijn(n,k,lookup->neighbors,NULL); /* unwrap it */ for(i=0;i<(n-1);i++) lookup->neighbors[len-n+1+i] = lookup->neighbors[i]; return 0;}Int4 AddNeighboringWords(LookupTable* lookup, Int4 ** matrix, BLAST_SequenceBlk* query, Int4 offset, Int4 query_bias){ if (lookup->use_pssm) { AddPSSMWordHits(lookup, matrix, offset, query_bias); } else { Uint1* w = NULL; ASSERT(query != NULL); w = query->sequence + offset; if (lookup->threshold == 0) { BlastAaLookupAddWordHit(lookup, w, query_bias + offset); lookup->exact_matches++; } else { AddWordHits(lookup, matrix, w, offset, query_bias); } } return 0;}static void AddWordHits(LookupTable* lookup, Int4** matrix, Uint1* word, Int4 offset, Int4 query_bias){ Uint1* s = lookup->neighbors; Uint1* s_end=s + lookup->neighbors_length - lookup->wordsize + 1; Uint1* w = word; Uint1 different; Int4 score; Int4 threshold = lookup->threshold; Int4 i; Int4* p0; Int4* p1; Int4* p2; Int4 corrected_offset = offset + query_bias; /* For each group of 'wordsize' bytes starting at 'w', * add the group to the lookup table at each offset 's' if * either * - w matches s, or * - the score derived from aligning w and s is high enough */ switch (lookup->wordsize) { case 1: p0 = matrix[w[0]]; while (s < s_end) { if ( (s[0] == w[0]) || (p0[s[0]] >= threshold) ) { BlastAaLookupAddWordHit(lookup,s,corrected_offset); if (s[0] == w[0]) lookup->exact_matches++; else lookup->neighbor_matches++; } s++; } break; case 2: p0 = matrix[w[0]]; p1 = matrix[w[1]]; while (s < s_end) { score = p0[s[0]] + p1[s[1]]; different = (w[0] ^ s[0]) | (w[1] ^ s[1]); if ( !different || (score >= threshold) ) { BlastAaLookupAddWordHit(lookup,s,corrected_offset); if (!different) lookup->exact_matches++; else lookup->neighbor_matches++; } s++; } break; case 3: p0 = matrix[w[0]]; p1 = matrix[w[1]]; p2 = matrix[w[2]]; while (s < s_end) { score = p0[s[0]] + p1[s[1]] + p2[s[2]]; different = (w[0] ^ s[0]) | (w[1] ^ s[1]) | (w[2] ^ s[2]); if ( !different || (score >= threshold) ) { BlastAaLookupAddWordHit(lookup,s,corrected_offset); if (!different) lookup->exact_matches++; else lookup->neighbor_matches++; } s++; } break; default: while (s < s_end) { score = 0; different = 0; for (i = 0; i < lookup->wordsize; i++) { score += matrix[w[i]][s[i]]; different |= w[i] ^ s[i]; } if ( !different || (score >= threshold) ) { BlastAaLookupAddWordHit(lookup,s,corrected_offset); if (!different) lookup->exact_matches++; else lookup->neighbor_matches++; } s++; } break; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -