📄 blast_util.c
字号:
}BlastQueryInfo* BlastQueryInfoFree(BlastQueryInfo* query_info){ sfree(query_info->context_offsets); sfree(query_info->length_adjustments); sfree(query_info->eff_searchsp_array); sfree(query_info); return NULL;}/** Convert a sequence in ncbi4na or blastna encoding into a packed sequence * in ncbi2na encoding. Needed for 2 sequences BLASTn comparison. */Int2 BLAST_PackDNA(Uint1* buffer, Int4 length, Uint1 encoding, Uint1** packed_seq){ Int4 new_length = length/COMPRESSION_RATIO + 1; Uint1* new_buffer = (Uint1*) malloc(new_length); Int4 index, new_index; Uint1 shift; /* bit shift to pack bases */ for (index=0, new_index=0; new_index < new_length-1; ++new_index, index += COMPRESSION_RATIO) { if (encoding == BLASTNA_ENCODING) new_buffer[new_index] = ((buffer[index]&NCBI2NA_MASK)<<6) | ((buffer[index+1]&NCBI2NA_MASK)<<4) | ((buffer[index+2]&NCBI2NA_MASK)<<2) | (buffer[index+3]&NCBI2NA_MASK); else new_buffer[new_index] = ((NCBI4NA_TO_BLASTNA[buffer[index]]&NCBI2NA_MASK)<<6) | ((NCBI4NA_TO_BLASTNA[buffer[index+1]]&NCBI2NA_MASK)<<4) | ((NCBI4NA_TO_BLASTNA[buffer[index+2]]&NCBI2NA_MASK)<<2) | (NCBI4NA_TO_BLASTNA[buffer[index+3]]&NCBI2NA_MASK); } /* Handle the last byte of the compressed sequence. Last 2 bits of the last byte tell the number of valid packed sequence bases in it. */ new_buffer[new_index] = length % COMPRESSION_RATIO; for (; index < length; index++) { switch (index%COMPRESSION_RATIO) { case 0: shift = 6; break; case 1: shift = 4; break; case 2: shift = 2; break; default: abort(); /* should never happen */ } if (encoding == BLASTNA_ENCODING) new_buffer[new_index] |= ((buffer[index]&NCBI2NA_MASK)<<shift); else new_buffer[new_index] |= ((NCBI4NA_TO_BLASTNA[buffer[index]]&NCBI2NA_MASK)<<shift); } *packed_seq = new_buffer; return 0;}Int2 BLAST_InitDNAPSequence(BLAST_SequenceBlk* query_blk, BlastQueryInfo* query_info){ Uint1* buffer,* seq,* tmp_seq; Int4 total_length, index, offset, i, context; Int4 length[CODON_LENGTH]; Int4* context_offsets = query_info->context_offsets; total_length = context_offsets[query_info->last_context+1] + 1; buffer = (Uint1*) malloc(total_length); for (index = 0; index <= query_info->last_context; index += CODON_LENGTH) { seq = &buffer[context_offsets[index]]; for (i = 0; i < CODON_LENGTH; ++i) { *seq++ = NULLB; length[i] = BLAST_GetQueryLength(query_info, index + i); } for (i = 0; ; ++i) { context = i % 3; offset = i / 3; if (offset >= length[context]) { /* Once one frame is past its end, we are done */ break; } tmp_seq = &query_blk->sequence[context_offsets[index+context]]; *seq++ = tmp_seq[offset]; } } /* The mixed-frame protein sequence buffer will be saved in 'sequence_start' */ query_blk->oof_sequence = buffer; query_blk->oof_sequence_allocated = TRUE; return 0;}/* Gets the translation array for a given genetic code. * This array is optimized for the NCBI2na alphabet. * The reverse complement can also be spcified. * @param genetic_code Genetic code string in ncbistdaa encoding [in] * @param reverse_complement Get translation table for the reverse strand? [in] * @return The translation table.*/static Uint1*BLAST_GetTranslationTable(const Uint1* genetic_code, Boolean reverse_complement){ Int2 index1, index2, index3, bp1, bp2, bp3; Int2 codon; Uint1* translation; /* The next array translate between the ncbi2na rep's and the rep's used by the genetic_code tables. The rep used by the genetic code arrays is in mapping: T=0, C=1, A=2, G=3 */ static Uint1 mapping[4] = {2, /* A in ncbi2na */ 1, /* C in ncbi2na. */ 3, /* G in ncbi2na. */ 0 /* T in ncbi2na. */ }; if (genetic_code == NULL) return NULL; translation = calloc(64, sizeof(Uint1)); if (translation == NULL) return NULL; for (index1=0; index1<4; index1++) { for (index2=0; index2<4; index2++) { for (index3=0; index3<4; index3++) { /* The reverse complement codon is saved in it's orginal (non-complement) form AND with the high-order bits reversed from the non-complement form, as this is how they appear in the sequence. */ if (reverse_complement) { bp1 = 3 - index1; bp2 = 3 - index2; bp3 = 3 - index3; codon = (mapping[bp1]<<4) + (mapping[bp2]<<2) + (mapping[bp3]); translation[(index3<<4) + (index2<<2) + index1] = genetic_code[codon]; } else { codon = (mapping[index1]<<4) + (mapping[index2]<<2) + (mapping[index3]); translation[(index1<<4) + (index2<<2) + index3] = genetic_code[codon]; } } } } return translation;}Int2 BLAST_GetAllTranslations(const Uint1* nucl_seq, Uint1 encoding, Int4 nucl_length, const Uint1* genetic_code, Uint1** translation_buffer_ptr, Int4** frame_offsets_ptr, Uint1** mixed_seq_ptr){ Uint1* translation_buffer,* mixed_seq; Uint1* translation_table = NULL,* translation_table_rc; Uint1* nucl_seq_rev; Int4 offset = 0, length; Int4 context; Int4* frame_offsets; Int2 frame; if (encoding != NCBI2NA_ENCODING && encoding != NCBI4NA_ENCODING) return -1; if ((translation_buffer = (Uint1*) malloc(2*(nucl_length+1)+1)) == NULL) return -1; if (encoding == NCBI4NA_ENCODING) { /* First produce the reverse strand of the nucleotide sequence */ GetReverseNuclSequence(nucl_seq, nucl_length, &nucl_seq_rev); } else { translation_table = BLAST_GetTranslationTable(genetic_code, FALSE); translation_table_rc = BLAST_GetTranslationTable(genetic_code, TRUE); } frame_offsets = (Int4*) malloc((NUM_FRAMES+1)*sizeof(Int4)); frame_offsets[0] = 0; for (context = 0; context < NUM_FRAMES; ++context) { frame = BLAST_ContextToFrame(blast_type_blastx, context); if (encoding == NCBI2NA_ENCODING) { if (frame > 0) { length = BLAST_TranslateCompressedSequence(translation_table, nucl_length, nucl_seq, frame, translation_buffer+offset); } else { length = BLAST_TranslateCompressedSequence(translation_table_rc, nucl_length, nucl_seq, frame, translation_buffer+offset); } } else { length = BLAST_GetTranslation(nucl_seq, nucl_seq_rev, nucl_length, frame, translation_buffer+offset, genetic_code); } /* Increment offset by 1 extra byte for the sentinel NULLB between frames. */ offset += length + 1; frame_offsets[context+1] = offset; } if (encoding == NCBI4NA_ENCODING) { sfree(nucl_seq_rev); } else { free(translation_table); sfree(translation_table_rc); } /* All frames are ready. For the out-of-frame gapping option, allocate and fill buffer with the mixed frame sequence */ if (mixed_seq_ptr) { Uint1* seq; Int4 index, i; *mixed_seq_ptr = mixed_seq = (Uint1*) malloc(2*(nucl_length+1)); seq = mixed_seq; for (index = 0; index < NUM_FRAMES; index += CODON_LENGTH) { for (i = 0; i <= nucl_length; ++i) { context = i % CODON_LENGTH; offset = i / CODON_LENGTH; *seq++ = translation_buffer[frame_offsets[index+context]+offset]; } } } if (translation_buffer_ptr) *translation_buffer_ptr = translation_buffer; else sfree(translation_buffer); if (frame_offsets_ptr) *frame_offsets_ptr = frame_offsets; else sfree(frame_offsets); return 0;}int GetPartialTranslation(const Uint1* nucl_seq, Int4 nucl_length, Int2 frame, const Uint1* genetic_code, Uint1** translation_buffer_ptr, Int4* protein_length, Uint1** mixed_seq_ptr){ Uint1* translation_buffer; Uint1* nucl_seq_rev = NULL; Int4 length; if ((translation_buffer = (Uint1*) malloc(2*(nucl_length+1)+1)) == NULL) return -1; if (translation_buffer_ptr) *translation_buffer_ptr = translation_buffer; if (frame < 0) { /* First produce the reverse strand of the nucleotide sequence */ GetReverseNuclSequence(nucl_seq, nucl_length, &nucl_seq_rev); } if (!mixed_seq_ptr) { length = BLAST_GetTranslation(nucl_seq, nucl_seq_rev, nucl_length, frame, translation_buffer, genetic_code); if (protein_length) *protein_length = length; } else { Int2 index; Int2 frame_sign = ((frame < 0) ? -1 : 1); Int4 offset = 0; Int4 frame_offsets[3]; Uint1* seq; for (index = 1; index <= 3; ++index) { length = BLAST_GetTranslation(nucl_seq, nucl_seq_rev, nucl_length, (short)(frame_sign*index), translation_buffer+offset, genetic_code); frame_offsets[index-1] = offset; offset += length + 1; } *mixed_seq_ptr = (Uint1*) malloc(2*(nucl_length+1)); if (protein_length) *protein_length = 2*nucl_length - 1; for (index = 0, seq = *mixed_seq_ptr; index <= nucl_length; ++index, ++seq) { *seq = translation_buffer[frame_offsets[index%3]+(index/3)]; } } sfree(nucl_seq_rev); if (!translation_buffer_ptr) sfree(translation_buffer); return 0;}Int4 FrameToContext(Int2 frame) { if (frame > 0) return frame - 1; else return 2 - frame;}Int4 BSearchInt4(Int4 n, Int4* A, Int4 size){ Int4 m, b, e; b = 0; e = size; while (b < e - 1) { m = (b + e) / 2; if (A[m] > n) e = m; else b = m; } return b;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -