⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 blast_setup_cxx.cpp

📁 ncbi源码
💻 CPP
📖 第 1 页 / 共 3 页
字号:
        } else {            pair<AutoPtr<Uint1, CDeleter<Uint1> >, TSeqPos> seqbuf(                GetSequence(*itr->seqloc, encoding, itr->scope,                            eNa_strand_unknown, eSentinels));            int offset = qinfo->context_offsets[ctx_index];            memcpy(&buf[offset], seqbuf.first.get(), seqbuf.second);        }        ctx_index += nframes;                if (mask) {            if ( !last_mask )                head_mask = last_mask = mask;            else {                last_mask->next = mask;                last_mask = last_mask->next;            }        }    }    if (BlastSeqBlkNew(seqblk) < 0) {        NCBI_THROW(CBlastException, eOutOfMemory, "Query sequence block");    }    BlastSeqBlkSetSequence(*seqblk, buf, buflen - 2);    (*seqblk)->lcase_mask = head_mask;    (*seqblk)->lcase_mask_allocated = TRUE;    return;}voidSetupSubjects(const TSeqLocVector& subjects,               EProgram prog,              vector<BLAST_SequenceBlk*>* seqblk_vec,               unsigned int* max_subjlen){    ASSERT(seqblk_vec);    ASSERT(max_subjlen);    ASSERT(subjects.size() != 0);    // Nucleotide subject sequences are stored in ncbi2na format, but the    // uncompressed format (ncbi4na/blastna) is also kept to re-evaluate with    // the ambiguities    bool subj_is_na = (prog == eBlastn  ||                       prog == eTblastn ||                       prog == eTblastx);    ESentinelType sentinels = eSentinels;    if (prog == eTblastn || prog == eTblastx) {        sentinels = eNoSentinels;    }    Uint1 encoding = GetSubjectEncoding(prog);           // TODO: Should strand selection on the subject sequences be allowed?    //ENa_strand strand = options->GetStrandOption();     int index = 0; // Needed for lower case masks only.    *max_subjlen = 0;    ITERATE(TSeqLocVector, itr, subjects) {        BLAST_SequenceBlk* subj = NULL;        pair<AutoPtr<Uint1, CDeleter<Uint1> >, TSeqPos> seqbuf(            GetSequence(*itr->seqloc, encoding, itr->scope,                        eNa_strand_plus, sentinels));        if (BlastSeqBlkNew(&subj) < 0) {            NCBI_THROW(CBlastException, eOutOfMemory, "Subject sequence block");        }        /* Set the lower case mask, if it exists */        if (itr->mask)            subj->lcase_mask = CSeqLoc2BlastMaskLoc(*itr->mask, index);        ++index;        if (subj_is_na) {            BlastSeqBlkSetSequence(subj, seqbuf.first.release(),                (sentinels == eSentinels) ? seqbuf.second - 2 : seqbuf.second);            try {                // Get the compressed sequence                pair<AutoPtr<Uint1, CDeleter<Uint1> >, TSeqPos> comp_seqbuf(                    GetSequence(*itr->seqloc, NCBI2NA_ENCODING, itr->scope,                                 eNa_strand_plus, eNoSentinels));                BlastSeqBlkSetCompressedSequence(subj,                                                  comp_seqbuf.first.release());            } catch (const CSeqVectorException&) {                BlastSequenceBlkFree(subj);                NCBI_THROW(CBlastException, eInvalidCharacter,                            "Gaps found in subject sequence");            }        } else {            BlastSeqBlkSetSequence(subj, seqbuf.first.release(),                                    seqbuf.second - 2);        }        seqblk_vec->push_back(subj);        (*max_subjlen) = MAX((*max_subjlen),                sequence::GetLength(*itr->seqloc, itr->scope));    }}TSeqPos CalculateSeqBufferLength(TSeqPos sequence_length, Uint1 encoding,                                 ENa_strand strand, ESentinelType sentinel)                                 THROWS((CBlastException)){    TSeqPos retval = 0;    switch (encoding) {    // Strand and sentinels are irrelevant in this encoding.    // Strand is always plus and sentinels cannot be represented    case NCBI2NA_ENCODING:        ASSERT(sentinel == eNoSentinels);        ASSERT(strand == eNa_strand_plus);        retval = sequence_length / COMPRESSION_RATIO + 1;        break;    case NCBI4NA_ENCODING:        if (sentinel == eSentinels) {            if (strand == eNa_strand_both) {                retval = sequence_length * 2;                retval += 3;            } else {                retval = sequence_length + 2;            }        } else {            if (strand == eNa_strand_both) {                retval = sequence_length * 2;            } else {                retval = sequence_length;            }        }        break;    case BLASTP_ENCODING:        ASSERT(sentinel == eSentinels);        ASSERT(strand == eNa_strand_unknown);        retval = sequence_length + 2;        break;    default:        NCBI_THROW(CBlastException, eBadParameter, "Unsupported encoding");    }    return retval;}// Compresses sequence data on vector to buffer, which should have been// allocated and have the right size.static void CompressDNA(const CSeqVector& vec, Uint1* buffer, const int buflen){    TSeqPos i;                  // loop index of original sequence    TSeqPos ci;                 // loop index for compressed sequence    ASSERT(vec.GetCoding() == CSeq_data::e_Ncbi2na);    for (ci = 0, i = 0; ci < (TSeqPos) buflen-1; ci++, i += COMPRESSION_RATIO) {        buffer[ci] = ((vec[i+0] & NCBI2NA_MASK)<<6) |                     ((vec[i+1] & NCBI2NA_MASK)<<4) |                     ((vec[i+2] & NCBI2NA_MASK)<<2) |                     ((vec[i+3] & NCBI2NA_MASK)<<0);    }    buffer[ci] = 0;    for (; i < vec.size(); i++) {            Uint1 bit_shift = 0;            switch (i%COMPRESSION_RATIO) {               case 0: bit_shift = 6; break;               case 1: bit_shift = 4; break;               case 2: bit_shift = 2; break;               default: abort();   // should never happen            }            buffer[ci] |= ((vec[i] & NCBI2NA_MASK)<<bit_shift);    }    // Set the number of bases in the last byte.    buffer[ci] |= vec.size()%COMPRESSION_RATIO;}Uint1 GetSentinelByte(Uint1 encoding) THROWS((CBlastException)){    switch (encoding) {    case BLASTP_ENCODING:        return NULLB;    case NCBI4NA_ENCODING:    case BLASTNA_ENCODING:        return 0xF;    default:        NCBI_THROW(CBlastException, eBadParameter, "Unsupported encoding");    }}pair<AutoPtr<Uint1, CDeleter<Uint1> >, TSeqPos>GetSequence(const CSeq_loc& sl, Uint1 encoding, CScope* scope,            ENa_strand strand, ESentinelType sentinel)             THROWS((CBlastException, CException)){    Uint1* buf = NULL;          // buffer to write sequence    Uint1* buf_var = NULL;      // temporary pointer to buffer    TSeqPos buflen;             // length of buffer allocated    TSeqPos i;                  // loop index of original sequence    AutoPtr<Uint1, CDeleter<Uint1> > safe_buf; // contains buf to ensure                                                // exception safety    CBioseq_Handle handle = scope->GetBioseqHandle(sl); // might throw exception    // Retrieves the correct strand (plus or minus), but not both    CSeqVector sv =        handle.GetSequenceView(sl, CBioseq_Handle::eViewConstructed,                               CBioseq_Handle::eCoding_Ncbi);    switch (encoding) {    // Protein sequences (query & subject) always have sentinels around sequence    case BLASTP_ENCODING:        sv.SetCoding(CSeq_data::e_Ncbistdaa);        buflen = CalculateSeqBufferLength(sv.size(), BLASTP_ENCODING);        buf = buf_var = (Uint1*) malloc(sizeof(Uint1)*buflen);        safe_buf.reset(buf);        *buf_var++ = GetSentinelByte(encoding);        for (i = 0; i < sv.size(); i++)            *buf_var++ = sv[i];        *buf_var++ = GetSentinelByte(encoding);        break;    case NCBI4NA_ENCODING:    case BLASTNA_ENCODING: // Used for nucleotide blastn queries        sv.SetCoding(CSeq_data::e_Ncbi4na);        buflen = CalculateSeqBufferLength(sv.size(), NCBI4NA_ENCODING,                                          strand, sentinel);        buf = buf_var = (Uint1*) malloc(sizeof(Uint1)*buflen);        safe_buf.reset(buf);        if (sentinel == eSentinels)            *buf_var++ = GetSentinelByte(encoding);        if (encoding == BLASTNA_ENCODING) {            for (i = 0; i < sv.size(); i++) {                *buf_var++ = NCBI4NA_TO_BLASTNA[sv[i]];            }        } else {            for (i = 0; i < sv.size(); i++) {                *buf_var++ = sv[i];            }        }        if (sentinel == eSentinels)            *buf_var++ = GetSentinelByte(encoding);        if (strand == eNa_strand_both) {            // Get the minus strand if both strands are required            sv = handle.GetSequenceView(sl, CBioseq_Handle::eViewConstructed,                                        CBioseq_Handle::eCoding_Ncbi,                                         eNa_strand_minus);            sv.SetCoding(CSeq_data::e_Ncbi4na);            if (encoding == BLASTNA_ENCODING) {                for (i = 0; i < sv.size(); i++) {                    *buf_var++ = NCBI4NA_TO_BLASTNA[sv[i]];                }            } else {                for (i = 0; i < sv.size(); i++) {                    *buf_var++ = sv[i];                }            }            if (sentinel == eSentinels) {                *buf_var++ = GetSentinelByte(encoding);            }        }        break;    /* Used only in Blast2Sequences for the subject sequence.      * No sentinels can be used. As in readdb, remainder      * (sv.size()%COMPRESSION_RATIO != 0) goes in the last 2 bits of the      * last byte.     */    case NCBI2NA_ENCODING:        ASSERT(sentinel == eNoSentinels);        sv.SetCoding(CSeq_data::e_Ncbi2na);        buflen = CalculateSeqBufferLength(sv.size(), sv.GetCoding(),                                          eNa_strand_plus, eNoSentinels);        buf = (Uint1*) malloc(sizeof(Uint1)*buflen);        safe_buf.reset(buf);        CompressDNA(sv, buf, buflen);        break;    default:        NCBI_THROW(CBlastException, eBadParameter, "Invalid encoding");    }    return make_pair(safe_buf, buflen);}#if 0// Not used right now, need to complete implementationvoidBLASTGetTranslation(const Uint1* seq, const Uint1* seq_rev,        const int nucl_length, const short frame, Uint1* translation){    TSeqPos ni = 0;     // index into nucleotide sequence    TSeqPos pi = 0;     // index into protein sequence    const Uint1* nucl_seq = frame >= 0 ? seq : seq_rev;    translation[0] = NULLB;    for (ni = ABS(frame)-1; ni < (TSeqPos) nucl_length-2; ni += CODON_LENGTH) {        Uint1 residue = CGen_code_table::CodonToIndex(nucl_seq[ni+0],                                                       nucl_seq[ni+1],                                                      nucl_seq[ni+2]);        if (IS_residue(residue))            translation[pi++] = residue;    }    translation[pi++] = NULLB;    return;}#endifAutoPtr<Uint1, ArrayDeleter<Uint1> >FindGeneticCode(int genetic_code){    Uint1* retval = NULL;    CSeq_data gc_ncbieaa(CGen_code_table::GetNcbieaa(genetic_code),            CSeq_data::e_Ncbieaa);    CSeq_data gc_ncbistdaa;    TSeqPos nconv = CSeqportUtil::Convert(gc_ncbieaa, &gc_ncbistdaa,            CSeq_data::e_Ncbistdaa);    ASSERT(gc_ncbistdaa.IsNcbistdaa());    ASSERT(nconv == gc_ncbistdaa.GetNcbistdaa().Get().size());    try {        retval = new Uint1[nconv];    } catch (const bad_alloc&) {        return NULL;    }    for (unsigned int i = 0; i < nconv; i++)        retval[i] = gc_ncbistdaa.GetNcbistdaa().Get()[i];    return retval;}stringFindMatrixPath(const char* matrix_name, bool is_prot){    string retval;    string full_path;       // full path to matrix file    if (!matrix_name)        return retval;    string mtx(matrix_name);    mtx = NStr::ToUpper(mtx);    // Look for matrix file in local directory    full_path = mtx;    if (CFile(full_path).Exists()) {        return retval;    }    // Obtain the matrix path from the ncbi configuration file    CMetaRegistry::SEntry sentry;    sentry = CMetaRegistry::Load("ncbi", CMetaRegistry::eName_RcOrIni);    string path = sentry.registry ? sentry.registry->Get("NCBI", "Data") : "";    full_path = CFile::MakePath(path, mtx);    if (CFile(full_path).Exists()) {        retval = full_path;        retval.erase(retval.size() - mtx.size());        return retval;    }    // Try appending "aa" or "nt"     full_path = path;    full_path += CFile::AddTrailingPathSeparator(full_path);    full_path += is_prot ? "aa" : "nt";    full_path += CFile::AddTrailingPathSeparator(full_path);    full_path += mtx;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -