⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 blast_setup_cxx.cpp

📁 ncbi源码
💻 CPP
📖 第 1 页 / 共 3 页
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_setup_cxx.cpp,v $ * PRODUCTION Revision 1000.7  2004/06/01 18:05:58  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.68 * PRODUCTION * =========================================================================== *//*  $Id: blast_setup_cxx.cpp,v 1000.7 2004/06/01 18:05:58 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Christiam Camacho** ===========================================================================*//// @file blast_setup_cxx.cpp/// Auxiliary setup functions for Blast objects interface.#include <ncbi_pch.hpp>#include <corelib/ncbiapp.hpp>#include <corelib/ncbireg.hpp>#include <corelib/metareg.hpp>#include <corelib/ncbifile.hpp>#include <objmgr/object_manager.hpp>#include <objmgr/scope.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/util/sequence.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seq/seqport_util.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>#include <objects/seq/Seq_data.hpp>#include <objects/seq/NCBIstdaa.hpp>#include <algo/blast/api/blast_options.hpp>#include "blast_setup.hpp"#include <algo/blast/core/blast_util.h>#include <algo/blast/core/blast_encoding.h>#include <algorithm>/** @addtogroup AlgoBlast * * @{ */BEGIN_NCBI_SCOPEBEGIN_SCOPE(blast)USING_SCOPE(ncbi::objects);/// Now allows query concatenationvoidSetupQueryInfo(const TSeqLocVector& queries, const CBlastOptions& options,                BlastQueryInfo** qinfo){    ASSERT(qinfo);    // Allocate and initialize the query info structure    if ( !((*qinfo) = (BlastQueryInfo*) calloc(1, sizeof(BlastQueryInfo)))) {        NCBI_THROW(CBlastException, eOutOfMemory, "Query info");    }    EProgram prog = options.GetProgram();    unsigned int nframes = GetNumberOfFrames(prog);    (*qinfo)->num_queries = static_cast<int>(queries.size());    (*qinfo)->first_context = 0;    (*qinfo)->last_context = (*qinfo)->num_queries * nframes - 1;    // Allocate the various arrays of the query info structure    int* context_offsets = NULL;    if ( !(context_offsets = (int*)           malloc(sizeof(int) * ((*qinfo)->last_context + 2)))) {        NCBI_THROW(CBlastException, eOutOfMemory, "Context offsets array");    }    if ( !((*qinfo)->eff_searchsp_array =            (Int8*) calloc((*qinfo)->last_context + 1, sizeof(Int8)))) {        NCBI_THROW(CBlastException, eOutOfMemory, "Search space array");    }    if ( !((*qinfo)->length_adjustments =            (int*) calloc((*qinfo)->last_context + 1, sizeof(int)))) {        NCBI_THROW(CBlastException, eOutOfMemory, "Length adjustments array");    }    (*qinfo)->context_offsets = context_offsets;    context_offsets[0] = 0;    bool is_na = (prog == eBlastn) ? true : false;    bool translate =         ((prog == eBlastx) || (prog == eTblastx)) ? true : false;    // Adjust first context depending on the first query strand    // Unless the strand option is set to single strand, the actual    // CSeq_locs dictate which strand to examine during the search.    ENa_strand strand_opt = options.GetStrandOption();    ENa_strand strand;    if (is_na || translate) {        if (strand_opt == eNa_strand_both ||             strand_opt == eNa_strand_unknown) {            strand = sequence::GetStrand(*queries.front().seqloc,                                          queries.front().scope);        } else {            strand = strand_opt;        }        if (strand == eNa_strand_minus) {            if (translate) {                (*qinfo)->first_context = 3;            } else {                (*qinfo)->first_context = 1;            }        }    }    // Set up the context offsets into the sequence that will be added    // to the sequence block structure.    unsigned int ctx_index = 0;      // index into context_offsets array    // Longest query length, to be saved in the query info structure    Uint4 max_length = 0;    ITERATE(TSeqLocVector, itr, queries) {        TSeqPos length = sequence::GetLength(*itr->seqloc, itr->scope);        ASSERT(length != numeric_limits<TSeqPos>::max());        strand = sequence::GetStrand(*itr->seqloc, itr->scope);        if (strand_opt == eNa_strand_minus || strand_opt == eNa_strand_plus) {            strand = strand_opt;        }        if (translate) {            for (unsigned int i = 0; i < nframes; i++) {                unsigned int prot_length =                     (length - i % CODON_LENGTH) / CODON_LENGTH;                max_length = MAX(max_length, prot_length);                switch (strand) {                case eNa_strand_plus:                    if (i < 3) {                        context_offsets[ctx_index + i + 1] =                             context_offsets[ctx_index + i] + prot_length + 1;                    } else {                        context_offsets[ctx_index + i + 1] =                             context_offsets[ctx_index + i];                    }                    break;                case eNa_strand_minus:                    if (i < 3) {                        context_offsets[ctx_index + i + 1] =                             context_offsets[ctx_index + i];                    } else {                        context_offsets[ctx_index + i + 1] =                            context_offsets[ctx_index + i] + prot_length + 1;                    }                    break;                case eNa_strand_both:                case eNa_strand_unknown:                    context_offsets[ctx_index + i + 1] =                         context_offsets[ctx_index + i] + prot_length + 1;                    break;                default:                    abort();                }            }        } else {            max_length = MAX(max_length, length);            if (is_na) {                switch (strand) {                case eNa_strand_plus:                    context_offsets[ctx_index + 1] =                        context_offsets[ctx_index] + length + 1;                    context_offsets[ctx_index + 2] =                        context_offsets[ctx_index + 1];                    break;                case eNa_strand_minus:                    context_offsets[ctx_index + 1] =                        context_offsets[ctx_index];                    context_offsets[ctx_index + 2] =                        context_offsets[ctx_index + 1] + length + 1;                    break;                case eNa_strand_both:                case eNa_strand_unknown:                    context_offsets[ctx_index + 1] =                        context_offsets[ctx_index] + length + 1;                    context_offsets[ctx_index + 2] =                        context_offsets[ctx_index + 1] + length + 1;                    break;                default:                    abort();                }            } else {    // protein                context_offsets[ctx_index + 1] =                     context_offsets[ctx_index] + length + 1;            }        }        ctx_index += nframes;    }    (*qinfo)->max_length = max_length;}Uint1GetQueryEncoding(EProgram program){    Uint1 retval = 0;    switch (program) {    case eBlastn:         retval = BLASTNA_ENCODING;         break;    case eBlastp:     case eTblastn:    case eRPSBlast:         retval = BLASTP_ENCODING;         break;    case eBlastx:    case eTblastx:    case eRPSTblastn:        retval = NCBI4NA_ENCODING;        break;    default:        NCBI_THROW(CBlastException, eBadParameter, "Query Encoding");    }    return retval;}Uint1GetSubjectEncoding(EProgram program){    Uint1 retval = 0;    switch (program) {    case eBlastn:         retval = BLASTNA_ENCODING;         break;    case eBlastp:     case eBlastx:        retval = BLASTP_ENCODING;         break;    case eTblastn:    case eTblastx:        retval = NCBI4NA_ENCODING;        break;    default:        NCBI_THROW(CBlastException, eBadParameter, "Subject Encoding");    }    return retval;}voidSetupQueries(const TSeqLocVector& queries, const CBlastOptions& options,             const CBlastQueryInfo& qinfo, BLAST_SequenceBlk** seqblk){    ASSERT(seqblk);    ASSERT(queries.size() != 0);    EProgram prog = options.GetProgram();    // Determine sequence encoding    Uint1 encoding = GetQueryEncoding(prog);    int buflen = qinfo->context_offsets[qinfo->last_context+1] + 1;    Uint1* buf = (Uint1*) calloc((buflen+1), sizeof(Uint1));    if ( !buf ) {        NCBI_THROW(CBlastException, eOutOfMemory, "Query sequence buffer");    }    bool is_na = (prog == eBlastn) ? true : false;    bool translate = ((prog == eBlastx) || (prog == eTblastx)) ? true : false;    unsigned int ctx_index = 0;      // index into context_offsets array    unsigned int nframes = GetNumberOfFrames(prog);    BlastMaskLoc* mask = NULL, *head_mask = NULL, *last_mask = NULL;    // Unless the strand option is set to single strand, the actual    // CSeq_locs dictacte which strand to examine during the search    ENa_strand strand_opt = options.GetStrandOption();    int index = 0;    ITERATE(TSeqLocVector, itr, queries) {        ENa_strand strand;        if ((is_na || translate) &&            (strand_opt == eNa_strand_unknown ||              strand_opt == eNa_strand_both))         {            strand = sequence::GetStrand(*itr->seqloc, itr->scope);        } else {            strand = strand_opt;        }        if (itr->mask)            mask = CSeqLoc2BlastMaskLoc(*itr->mask, index);        ++index;        if (translate) {            ASSERT(strand == eNa_strand_both ||                   strand == eNa_strand_plus ||                   strand == eNa_strand_minus);            // Get both strands of the original nucleotide sequence with            // sentinels            pair<AutoPtr<Uint1, CDeleter<Uint1> >, TSeqPos> seqbuf(                GetSequence(*itr->seqloc, encoding, itr->scope, strand,                            eSentinels));                        AutoPtr<Uint1, ArrayDeleter<Uint1> > gc =                 FindGeneticCode(options.GetQueryGeneticCode());            int na_length = sequence::GetLength(*itr->seqloc, itr->scope);            Uint1* seqbuf_rev = NULL;  // negative strand            if (strand == eNa_strand_both)               seqbuf_rev = seqbuf.first.get() + na_length + 1;            else if (strand == eNa_strand_minus)               seqbuf_rev = seqbuf.first.get() + 1;            // Populate the sequence buffer            for (unsigned int i = 0; i < nframes; i++) {                if (BLAST_GetQueryLength(qinfo, i) <= 0) {                    continue;                }                int offset = qinfo->context_offsets[ctx_index + i];                short frame = BLAST_ContextToFrame(prog, i);                BLAST_GetTranslation(seqbuf.first.get() + 1, seqbuf_rev,                   na_length, frame, &buf[offset], gc.get());            }            // Translate the lower case mask coordinates;            BlastMaskLocDNAToProtein(&mask, *itr->seqloc, itr->scope);        } else if (is_na) {            ASSERT(strand == eNa_strand_both ||                   strand == eNa_strand_plus ||                   strand == eNa_strand_minus);            pair<AutoPtr<Uint1, CDeleter<Uint1> >, TSeqPos> seqbuf(                GetSequence(*itr->seqloc, encoding, itr->scope, strand,                            eSentinels));            int idx = (strand == eNa_strand_minus) ?                 ctx_index + 1 : ctx_index;            int offset = qinfo->context_offsets[idx];            memcpy(&buf[offset], seqbuf.first.get(), seqbuf.second);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -