📄 blast_setup_cxx.cpp
字号:
} else { pair<AutoPtr<Uint1, CDeleter<Uint1> >, TSeqPos> seqbuf( GetSequence(*itr->seqloc, encoding, itr->scope, eNa_strand_unknown, eSentinels)); int offset = qinfo->context_offsets[ctx_index]; memcpy(&buf[offset], seqbuf.first.get(), seqbuf.second); } ctx_index += nframes; if (mask) { if ( !last_mask ) head_mask = last_mask = mask; else { last_mask->next = mask; last_mask = last_mask->next; } } } if (BlastSeqBlkNew(seqblk) < 0) { NCBI_THROW(CBlastException, eOutOfMemory, "Query sequence block"); } BlastSeqBlkSetSequence(*seqblk, buf, buflen - 2); (*seqblk)->lcase_mask = head_mask; (*seqblk)->lcase_mask_allocated = TRUE; return;}voidSetupSubjects(const TSeqLocVector& subjects, EProgram prog, vector<BLAST_SequenceBlk*>* seqblk_vec, unsigned int* max_subjlen){ ASSERT(seqblk_vec); ASSERT(max_subjlen); ASSERT(subjects.size() != 0); // Nucleotide subject sequences are stored in ncbi2na format, but the // uncompressed format (ncbi4na/blastna) is also kept to re-evaluate with // the ambiguities bool subj_is_na = (prog == eBlastn || prog == eTblastn || prog == eTblastx); ESentinelType sentinels = eSentinels; if (prog == eTblastn || prog == eTblastx) { sentinels = eNoSentinels; } Uint1 encoding = GetSubjectEncoding(prog); // TODO: Should strand selection on the subject sequences be allowed? //ENa_strand strand = options->GetStrandOption(); int index = 0; // Needed for lower case masks only. *max_subjlen = 0; ITERATE(TSeqLocVector, itr, subjects) { BLAST_SequenceBlk* subj = NULL; pair<AutoPtr<Uint1, CDeleter<Uint1> >, TSeqPos> seqbuf( GetSequence(*itr->seqloc, encoding, itr->scope, eNa_strand_plus, sentinels)); if (BlastSeqBlkNew(&subj) < 0) { NCBI_THROW(CBlastException, eOutOfMemory, "Subject sequence block"); } /* Set the lower case mask, if it exists */ if (itr->mask) subj->lcase_mask = CSeqLoc2BlastMaskLoc(*itr->mask, index); ++index; if (subj_is_na) { BlastSeqBlkSetSequence(subj, seqbuf.first.release(), (sentinels == eSentinels) ? seqbuf.second - 2 : seqbuf.second); try { // Get the compressed sequence pair<AutoPtr<Uint1, CDeleter<Uint1> >, TSeqPos> comp_seqbuf( GetSequence(*itr->seqloc, NCBI2NA_ENCODING, itr->scope, eNa_strand_plus, eNoSentinels)); BlastSeqBlkSetCompressedSequence(subj, comp_seqbuf.first.release()); } catch (const CSeqVectorException&) { BlastSequenceBlkFree(subj); NCBI_THROW(CBlastException, eInvalidCharacter, "Gaps found in subject sequence"); } } else { BlastSeqBlkSetSequence(subj, seqbuf.first.release(), seqbuf.second - 2); } seqblk_vec->push_back(subj); (*max_subjlen) = MAX((*max_subjlen), sequence::GetLength(*itr->seqloc, itr->scope)); }}TSeqPos CalculateSeqBufferLength(TSeqPos sequence_length, Uint1 encoding, ENa_strand strand, ESentinelType sentinel) THROWS((CBlastException)){ TSeqPos retval = 0; switch (encoding) { // Strand and sentinels are irrelevant in this encoding. // Strand is always plus and sentinels cannot be represented case NCBI2NA_ENCODING: ASSERT(sentinel == eNoSentinels); ASSERT(strand == eNa_strand_plus); retval = sequence_length / COMPRESSION_RATIO + 1; break; case NCBI4NA_ENCODING: if (sentinel == eSentinels) { if (strand == eNa_strand_both) { retval = sequence_length * 2; retval += 3; } else { retval = sequence_length + 2; } } else { if (strand == eNa_strand_both) { retval = sequence_length * 2; } else { retval = sequence_length; } } break; case BLASTP_ENCODING: ASSERT(sentinel == eSentinels); ASSERT(strand == eNa_strand_unknown); retval = sequence_length + 2; break; default: NCBI_THROW(CBlastException, eBadParameter, "Unsupported encoding"); } return retval;}// Compresses sequence data on vector to buffer, which should have been// allocated and have the right size.static void CompressDNA(const CSeqVector& vec, Uint1* buffer, const int buflen){ TSeqPos i; // loop index of original sequence TSeqPos ci; // loop index for compressed sequence ASSERT(vec.GetCoding() == CSeq_data::e_Ncbi2na); for (ci = 0, i = 0; ci < (TSeqPos) buflen-1; ci++, i += COMPRESSION_RATIO) { buffer[ci] = ((vec[i+0] & NCBI2NA_MASK)<<6) | ((vec[i+1] & NCBI2NA_MASK)<<4) | ((vec[i+2] & NCBI2NA_MASK)<<2) | ((vec[i+3] & NCBI2NA_MASK)<<0); } buffer[ci] = 0; for (; i < vec.size(); i++) { Uint1 bit_shift = 0; switch (i%COMPRESSION_RATIO) { case 0: bit_shift = 6; break; case 1: bit_shift = 4; break; case 2: bit_shift = 2; break; default: abort(); // should never happen } buffer[ci] |= ((vec[i] & NCBI2NA_MASK)<<bit_shift); } // Set the number of bases in the last byte. buffer[ci] |= vec.size()%COMPRESSION_RATIO;}Uint1 GetSentinelByte(Uint1 encoding) THROWS((CBlastException)){ switch (encoding) { case BLASTP_ENCODING: return NULLB; case NCBI4NA_ENCODING: case BLASTNA_ENCODING: return 0xF; default: NCBI_THROW(CBlastException, eBadParameter, "Unsupported encoding"); }}pair<AutoPtr<Uint1, CDeleter<Uint1> >, TSeqPos>GetSequence(const CSeq_loc& sl, Uint1 encoding, CScope* scope, ENa_strand strand, ESentinelType sentinel) THROWS((CBlastException, CException)){ Uint1* buf = NULL; // buffer to write sequence Uint1* buf_var = NULL; // temporary pointer to buffer TSeqPos buflen; // length of buffer allocated TSeqPos i; // loop index of original sequence AutoPtr<Uint1, CDeleter<Uint1> > safe_buf; // contains buf to ensure // exception safety CBioseq_Handle handle = scope->GetBioseqHandle(sl); // might throw exception // Retrieves the correct strand (plus or minus), but not both CSeqVector sv = handle.GetSequenceView(sl, CBioseq_Handle::eViewConstructed, CBioseq_Handle::eCoding_Ncbi); switch (encoding) { // Protein sequences (query & subject) always have sentinels around sequence case BLASTP_ENCODING: sv.SetCoding(CSeq_data::e_Ncbistdaa); buflen = CalculateSeqBufferLength(sv.size(), BLASTP_ENCODING); buf = buf_var = (Uint1*) malloc(sizeof(Uint1)*buflen); safe_buf.reset(buf); *buf_var++ = GetSentinelByte(encoding); for (i = 0; i < sv.size(); i++) *buf_var++ = sv[i]; *buf_var++ = GetSentinelByte(encoding); break; case NCBI4NA_ENCODING: case BLASTNA_ENCODING: // Used for nucleotide blastn queries sv.SetCoding(CSeq_data::e_Ncbi4na); buflen = CalculateSeqBufferLength(sv.size(), NCBI4NA_ENCODING, strand, sentinel); buf = buf_var = (Uint1*) malloc(sizeof(Uint1)*buflen); safe_buf.reset(buf); if (sentinel == eSentinels) *buf_var++ = GetSentinelByte(encoding); if (encoding == BLASTNA_ENCODING) { for (i = 0; i < sv.size(); i++) { *buf_var++ = NCBI4NA_TO_BLASTNA[sv[i]]; } } else { for (i = 0; i < sv.size(); i++) { *buf_var++ = sv[i]; } } if (sentinel == eSentinels) *buf_var++ = GetSentinelByte(encoding); if (strand == eNa_strand_both) { // Get the minus strand if both strands are required sv = handle.GetSequenceView(sl, CBioseq_Handle::eViewConstructed, CBioseq_Handle::eCoding_Ncbi, eNa_strand_minus); sv.SetCoding(CSeq_data::e_Ncbi4na); if (encoding == BLASTNA_ENCODING) { for (i = 0; i < sv.size(); i++) { *buf_var++ = NCBI4NA_TO_BLASTNA[sv[i]]; } } else { for (i = 0; i < sv.size(); i++) { *buf_var++ = sv[i]; } } if (sentinel == eSentinels) { *buf_var++ = GetSentinelByte(encoding); } } break; /* Used only in Blast2Sequences for the subject sequence. * No sentinels can be used. As in readdb, remainder * (sv.size()%COMPRESSION_RATIO != 0) goes in the last 2 bits of the * last byte. */ case NCBI2NA_ENCODING: ASSERT(sentinel == eNoSentinels); sv.SetCoding(CSeq_data::e_Ncbi2na); buflen = CalculateSeqBufferLength(sv.size(), sv.GetCoding(), eNa_strand_plus, eNoSentinels); buf = (Uint1*) malloc(sizeof(Uint1)*buflen); safe_buf.reset(buf); CompressDNA(sv, buf, buflen); break; default: NCBI_THROW(CBlastException, eBadParameter, "Invalid encoding"); } return make_pair(safe_buf, buflen);}#if 0// Not used right now, need to complete implementationvoidBLASTGetTranslation(const Uint1* seq, const Uint1* seq_rev, const int nucl_length, const short frame, Uint1* translation){ TSeqPos ni = 0; // index into nucleotide sequence TSeqPos pi = 0; // index into protein sequence const Uint1* nucl_seq = frame >= 0 ? seq : seq_rev; translation[0] = NULLB; for (ni = ABS(frame)-1; ni < (TSeqPos) nucl_length-2; ni += CODON_LENGTH) { Uint1 residue = CGen_code_table::CodonToIndex(nucl_seq[ni+0], nucl_seq[ni+1], nucl_seq[ni+2]); if (IS_residue(residue)) translation[pi++] = residue; } translation[pi++] = NULLB; return;}#endifAutoPtr<Uint1, ArrayDeleter<Uint1> >FindGeneticCode(int genetic_code){ Uint1* retval = NULL; CSeq_data gc_ncbieaa(CGen_code_table::GetNcbieaa(genetic_code), CSeq_data::e_Ncbieaa); CSeq_data gc_ncbistdaa; TSeqPos nconv = CSeqportUtil::Convert(gc_ncbieaa, &gc_ncbistdaa, CSeq_data::e_Ncbistdaa); ASSERT(gc_ncbistdaa.IsNcbistdaa()); ASSERT(nconv == gc_ncbistdaa.GetNcbistdaa().Get().size()); try { retval = new Uint1[nconv]; } catch (const bad_alloc&) { return NULL; } for (unsigned int i = 0; i < nconv; i++) retval[i] = gc_ncbistdaa.GetNcbistdaa().Get()[i]; return retval;}stringFindMatrixPath(const char* matrix_name, bool is_prot){ string retval; string full_path; // full path to matrix file if (!matrix_name) return retval; string mtx(matrix_name); mtx = NStr::ToUpper(mtx); // Look for matrix file in local directory full_path = mtx; if (CFile(full_path).Exists()) { return retval; } // Obtain the matrix path from the ncbi configuration file CMetaRegistry::SEntry sentry; sentry = CMetaRegistry::Load("ncbi", CMetaRegistry::eName_RcOrIni); string path = sentry.registry ? sentry.registry->Get("NCBI", "Data") : ""; full_path = CFile::MakePath(path, mtx); if (CFile(full_path).Exists()) { retval = full_path; retval.erase(retval.size() - mtx.size()); return retval; } // Try appending "aa" or "nt" full_path = path; full_path += CFile::AddTrailingPathSeparator(full_path); full_path += is_prot ? "aa" : "nt"; full_path += CFile::AddTrailingPathSeparator(full_path); full_path += mtx;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -