📄 blast_lookup.c
字号:
}static void AddPSSMWordHits(LookupTable* lookup, Int4** matrix, Int4 offset, Int4 query_bias){ Uint1* s = lookup->neighbors; Uint1* s_end=s + lookup->neighbors_length - lookup->wordsize; Int4 score; Int4 threshold = lookup->threshold; Int4 i; Int4* p0; Int4* p1; Int4* p2; Int4 corrected_offset = offset + query_bias; /* Equivalent to AddWordHits(), except that the word score * is derived from a Position-Specific Scoring Matrix (PSSM) */ switch (lookup->wordsize) { case 1: p0 = matrix[offset]; while (s < s_end) { if (p0[s[0]] >= threshold) BlastAaLookupAddWordHit(lookup,s,corrected_offset); s++; } break; case 2: p0 = matrix[offset]; p1 = matrix[offset+1]; while (s < s_end) { score = p0[s[0]] + p1[s[1]]; if (score >= threshold) BlastAaLookupAddWordHit(lookup,s,corrected_offset); s++; } break; case 3: p0 = matrix[offset]; p1 = matrix[offset + 1]; p2 = matrix[offset + 2]; while (s < s_end) { score = p0[s[0]] + p1[s[1]] + p2[s[2]]; if (score >= threshold) BlastAaLookupAddWordHit(lookup,s,corrected_offset); s++; } break; default: while (s < s_end) { score = 0; for(i=0;i<lookup->wordsize;i++) score += matrix[offset + i][s[i]]; if (score >= threshold) BlastAaLookupAddWordHit(lookup,s,offset); s++; } break; }}/****************************************************** * * Nucleotide BLAST specific functions and definitions * ******************************************************//* Description in na_lookup.h */Int4 BlastNaScanSubject_AG(const LookupTableWrap* lookup_wrap, const BLAST_SequenceBlk* subject, Int4 start_offset, Uint4* NCBI_RESTRICT q_offsets, Uint4* NCBI_RESTRICT s_offsets, Int4 max_hits, Int4* end_offset){ LookupTable* lookup = (LookupTable*) lookup_wrap->lut; Uint1* s; Uint1* abs_start; Int4 index=0, s_off; Int4* lookup_pos; Int4 num_hits; Int4 q_off; PV_ARRAY_TYPE *pv_array = lookup->pv; Int4 total_hits = 0; Int4 compressed_wordsize, compressed_scan_step; Boolean full_byte_scan = (lookup->scan_step % COMPRESSION_RATIO == 0); Int4 i; abs_start = subject->sequence; s = abs_start + start_offset/COMPRESSION_RATIO; compressed_scan_step = lookup->scan_step / COMPRESSION_RATIO; compressed_wordsize = lookup->reduced_wordsize; index = 0; /* NB: s in this function always points to the start of the word! */ if (full_byte_scan) { Uint1* s_end = abs_start + subject->length/COMPRESSION_RATIO - compressed_wordsize; for ( ; s <= s_end; s += compressed_scan_step) { index = 0; for (i = 0; i < compressed_wordsize; ++i) index = ((index)<<FULL_BYTE_SHIFT) | s[i]; if (NA_PV_TEST(pv_array, index, PV_ARRAY_BTS)) { num_hits = lookup->thick_backbone[index].num_used; ASSERT(num_hits != 0); if (num_hits > (max_hits - total_hits)) break; if ( num_hits <= HITS_ON_BACKBONE ) /* hits live in thick_backbone */ lookup_pos = lookup->thick_backbone[index].payload.entries; else /* hits live in overflow array */ lookup_pos = & ( lookup->overflow[lookup->thick_backbone[index].payload.overflow_cursor] ); s_off = (s - abs_start)*COMPRESSION_RATIO; while (num_hits) { q_off = *((Uint4 *) lookup_pos); /* get next query offset */ lookup_pos++; num_hits--; q_offsets[total_hits] = q_off; s_offsets[total_hits++] = s_off; } } } *end_offset = (s - abs_start)*COMPRESSION_RATIO; } else { Int4 reduced_word_length = compressed_wordsize*COMPRESSION_RATIO; Int4 last_offset = subject->length - reduced_word_length; Uint1 bit; Int4 adjusted_index; for (s_off = start_offset; s_off <= last_offset; s_off += lookup->scan_step) { s = abs_start + (s_off / COMPRESSION_RATIO); bit = 2*(s_off % COMPRESSION_RATIO); /* Compute index for a word made of full bytes */ index = 0; for (i = 0; i < compressed_wordsize; ++i) index = ((index)<<FULL_BYTE_SHIFT) | (*s++); adjusted_index = BlastNaLookupAdjustIndex(s, index, lookup->mask, bit); if (NA_PV_TEST(pv_array, adjusted_index, PV_ARRAY_BTS)) { num_hits = lookup->thick_backbone[adjusted_index].num_used; ASSERT(num_hits != 0); if (num_hits > (max_hits - total_hits)) break; if ( num_hits <= HITS_ON_BACKBONE ) /* hits live in thick_backbone */ lookup_pos = lookup->thick_backbone[adjusted_index].payload.entries; else /* hits live in overflow array */ lookup_pos = & (lookup->overflow [ lookup->thick_backbone[adjusted_index].payload.overflow_cursor]); while (num_hits) { q_off = *((Uint4 *) lookup_pos); /* get next query offset */ lookup_pos++; num_hits--; q_offsets[total_hits] = q_off; s_offsets[total_hits++] = s_off; } } } *end_offset = s_off; } return total_hits;}/* Description in na_lookup.h */Int4 BlastNaScanSubject(const LookupTableWrap* lookup_wrap, const BLAST_SequenceBlk* subject, Int4 start_offset, Uint4* q_offsets, Uint4* s_offsets, Int4 max_hits, Int4* end_offset){ Uint1* s; Uint1* abs_start,* s_end; Int4 index=0, s_off; LookupTable* lookup = (LookupTable*) lookup_wrap->lut; Int4* lookup_pos; Int4 num_hits; Int4 q_off; PV_ARRAY_TYPE *pv_array = lookup->pv; Int4 total_hits = 0; Int4 reduced_word_length = lookup->reduced_wordsize*COMPRESSION_RATIO; Int4 i; abs_start = subject->sequence; s = abs_start + start_offset/COMPRESSION_RATIO; /* s_end points to the place right after the last full sequence byte */ s_end = abs_start + (*end_offset + reduced_word_length)/COMPRESSION_RATIO; index = 0; /* Compute the first index */ for (i = 0; i < lookup->reduced_wordsize; ++i) index = ((index)<<FULL_BYTE_SHIFT) | *s++; /* s points to the byte right after the end of the current word */ while (s <= s_end) { if (NA_PV_TEST(pv_array, index, PV_ARRAY_BTS)) { num_hits = lookup->thick_backbone[index].num_used; ASSERT(num_hits != 0); if (num_hits > (max_hits - total_hits)) break; if ( num_hits <= HITS_ON_BACKBONE ) /* hits live in thick_backbone */ lookup_pos = lookup->thick_backbone[index].payload.entries; else /* hits live in overflow array */ lookup_pos = & (lookup->overflow[lookup->thick_backbone[index].payload.overflow_cursor]); /* Save the hits offsets */ s_off = (s - abs_start)*COMPRESSION_RATIO - reduced_word_length; while (num_hits) { q_off = *((Uint4 *) lookup_pos); /* get next query offset */ lookup_pos++; num_hits--; q_offsets[total_hits] = q_off; s_offsets[total_hits++] = s_off; } } /* Compute the next value of the index */ index = (((index)<<FULL_BYTE_SHIFT) & lookup->mask) | (*s++); } /* Ending offset should point to the start of the word that ends at s */ *end_offset = (s - abs_start)*COMPRESSION_RATIO - reduced_word_length; return total_hits;}LookupTable* LookupTableDestruct(LookupTable* lookup){ sfree(lookup->thick_backbone); sfree(lookup->overflow); sfree(lookup->pv); sfree(lookup); return NULL;}RPSLookupTable* RPSLookupTableDestruct(RPSLookupTable* lookup){ /* The following will only free memory that was allocated by RPSLookupTableNew. */ sfree(lookup->rps_pssm); sfree(lookup->pv); sfree(lookup); return NULL;}/** Add a word information to the lookup table * @param lookup Pointer to the lookup table structure [in] [out] * @param w Pointer to the start of a word [in] * @param query_offset Offset into the query sequence where this word ends [in] */static Int4 BlastNaLookupAddWordHit(LookupTable* lookup, Uint1* w, Int4 query_offset){ Int4 index=0; Int4 chain_size = 0; /* Total number of elements in the chain */ Int4 hits_in_chain = 0; /* Number of occupied elements in the chain, not including the zeroth and first positions */ Int4* chain = NULL; /* compute its index */ if (Na_LookupComputeIndex(lookup, w, &index) == -1) /* Word contains ambiguities, skip it */ return 0; ASSERT(index < lookup->backbone_size); /* If backbone cell is null, initialize a new chain */ if (lookup->thin_backbone[index] == NULL) { chain_size = 8; hits_in_chain = 0; chain = calloc( chain_size, sizeof(Int4) ); ASSERT(chain != NULL); chain[0] = chain_size; chain[1] = hits_in_chain; lookup->thin_backbone[index] = chain; } else /* Otherwise, use the existing chain */ { chain = lookup->thin_backbone[index]; chain_size = chain[0]; hits_in_chain = chain[1]; } /* If the chain is full, allocate more room */ if ( (hits_in_chain + 2) == chain_size ) { chain_size = chain_size * 2; chain = realloc(chain, chain_size * sizeof(Int4) ); ASSERT(chain != NULL); lookup->thin_backbone[index] = chain; chain[0] = chain_size; } /* Add the hit */ chain[ chain[1] + 2 ] = query_offset; chain[1] += 1; return 0;}/* See description in blast_lookup.h */Int4 BlastNaLookupIndexQuery(LookupTable* lookup, BLAST_SequenceBlk* query, ListNode* location){ ListNode* loc; Int4 from, to; Int4 offset; Uint1* sequence; for(loc=location; loc; loc=loc->next) { from = ((SSeqRange*) loc->ptr)->left; to = ((SSeqRange*) loc->ptr)->right + 1; sequence = query->sequence + from; /* Last offset is such that full word fits in the sequence */ to -= lookup->reduced_wordsize*COMPRESSION_RATIO; for(offset = from; offset <= to; offset++) { BlastNaLookupAddWordHit(lookup, sequence, offset); ++sequence; } } return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -