📄 blast_setup_cxx.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_setup_cxx.cpp,v $ * PRODUCTION Revision 1000.7 2004/06/01 18:05:58 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.68 * PRODUCTION * =========================================================================== *//* $Id: blast_setup_cxx.cpp,v 1000.7 2004/06/01 18:05:58 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Christiam Camacho** ===========================================================================*//// @file blast_setup_cxx.cpp/// Auxiliary setup functions for Blast objects interface.#include <ncbi_pch.hpp>#include <corelib/ncbiapp.hpp>#include <corelib/ncbireg.hpp>#include <corelib/metareg.hpp>#include <corelib/ncbifile.hpp>#include <objmgr/object_manager.hpp>#include <objmgr/scope.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/util/sequence.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seq/seqport_util.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>#include <objects/seq/Seq_data.hpp>#include <objects/seq/NCBIstdaa.hpp>#include <algo/blast/api/blast_options.hpp>#include "blast_setup.hpp"#include <algo/blast/core/blast_util.h>#include <algo/blast/core/blast_encoding.h>#include <algorithm>/** @addtogroup AlgoBlast * * @{ */BEGIN_NCBI_SCOPEBEGIN_SCOPE(blast)USING_SCOPE(ncbi::objects);/// Now allows query concatenationvoidSetupQueryInfo(const TSeqLocVector& queries, const CBlastOptions& options, BlastQueryInfo** qinfo){ ASSERT(qinfo); // Allocate and initialize the query info structure if ( !((*qinfo) = (BlastQueryInfo*) calloc(1, sizeof(BlastQueryInfo)))) { NCBI_THROW(CBlastException, eOutOfMemory, "Query info"); } EProgram prog = options.GetProgram(); unsigned int nframes = GetNumberOfFrames(prog); (*qinfo)->num_queries = static_cast<int>(queries.size()); (*qinfo)->first_context = 0; (*qinfo)->last_context = (*qinfo)->num_queries * nframes - 1; // Allocate the various arrays of the query info structure int* context_offsets = NULL; if ( !(context_offsets = (int*) malloc(sizeof(int) * ((*qinfo)->last_context + 2)))) { NCBI_THROW(CBlastException, eOutOfMemory, "Context offsets array"); } if ( !((*qinfo)->eff_searchsp_array = (Int8*) calloc((*qinfo)->last_context + 1, sizeof(Int8)))) { NCBI_THROW(CBlastException, eOutOfMemory, "Search space array"); } if ( !((*qinfo)->length_adjustments = (int*) calloc((*qinfo)->last_context + 1, sizeof(int)))) { NCBI_THROW(CBlastException, eOutOfMemory, "Length adjustments array"); } (*qinfo)->context_offsets = context_offsets; context_offsets[0] = 0; bool is_na = (prog == eBlastn) ? true : false; bool translate = ((prog == eBlastx) || (prog == eTblastx)) ? true : false; // Adjust first context depending on the first query strand // Unless the strand option is set to single strand, the actual // CSeq_locs dictate which strand to examine during the search. ENa_strand strand_opt = options.GetStrandOption(); ENa_strand strand; if (is_na || translate) { if (strand_opt == eNa_strand_both || strand_opt == eNa_strand_unknown) { strand = sequence::GetStrand(*queries.front().seqloc, queries.front().scope); } else { strand = strand_opt; } if (strand == eNa_strand_minus) { if (translate) { (*qinfo)->first_context = 3; } else { (*qinfo)->first_context = 1; } } } // Set up the context offsets into the sequence that will be added // to the sequence block structure. unsigned int ctx_index = 0; // index into context_offsets array // Longest query length, to be saved in the query info structure Uint4 max_length = 0; ITERATE(TSeqLocVector, itr, queries) { TSeqPos length = sequence::GetLength(*itr->seqloc, itr->scope); ASSERT(length != numeric_limits<TSeqPos>::max()); strand = sequence::GetStrand(*itr->seqloc, itr->scope); if (strand_opt == eNa_strand_minus || strand_opt == eNa_strand_plus) { strand = strand_opt; } if (translate) { for (unsigned int i = 0; i < nframes; i++) { unsigned int prot_length = (length - i % CODON_LENGTH) / CODON_LENGTH; max_length = MAX(max_length, prot_length); switch (strand) { case eNa_strand_plus: if (i < 3) { context_offsets[ctx_index + i + 1] = context_offsets[ctx_index + i] + prot_length + 1; } else { context_offsets[ctx_index + i + 1] = context_offsets[ctx_index + i]; } break; case eNa_strand_minus: if (i < 3) { context_offsets[ctx_index + i + 1] = context_offsets[ctx_index + i]; } else { context_offsets[ctx_index + i + 1] = context_offsets[ctx_index + i] + prot_length + 1; } break; case eNa_strand_both: case eNa_strand_unknown: context_offsets[ctx_index + i + 1] = context_offsets[ctx_index + i] + prot_length + 1; break; default: abort(); } } } else { max_length = MAX(max_length, length); if (is_na) { switch (strand) { case eNa_strand_plus: context_offsets[ctx_index + 1] = context_offsets[ctx_index] + length + 1; context_offsets[ctx_index + 2] = context_offsets[ctx_index + 1]; break; case eNa_strand_minus: context_offsets[ctx_index + 1] = context_offsets[ctx_index]; context_offsets[ctx_index + 2] = context_offsets[ctx_index + 1] + length + 1; break; case eNa_strand_both: case eNa_strand_unknown: context_offsets[ctx_index + 1] = context_offsets[ctx_index] + length + 1; context_offsets[ctx_index + 2] = context_offsets[ctx_index + 1] + length + 1; break; default: abort(); } } else { // protein context_offsets[ctx_index + 1] = context_offsets[ctx_index] + length + 1; } } ctx_index += nframes; } (*qinfo)->max_length = max_length;}Uint1GetQueryEncoding(EProgram program){ Uint1 retval = 0; switch (program) { case eBlastn: retval = BLASTNA_ENCODING; break; case eBlastp: case eTblastn: case eRPSBlast: retval = BLASTP_ENCODING; break; case eBlastx: case eTblastx: case eRPSTblastn: retval = NCBI4NA_ENCODING; break; default: NCBI_THROW(CBlastException, eBadParameter, "Query Encoding"); } return retval;}Uint1GetSubjectEncoding(EProgram program){ Uint1 retval = 0; switch (program) { case eBlastn: retval = BLASTNA_ENCODING; break; case eBlastp: case eBlastx: retval = BLASTP_ENCODING; break; case eTblastn: case eTblastx: retval = NCBI4NA_ENCODING; break; default: NCBI_THROW(CBlastException, eBadParameter, "Subject Encoding"); } return retval;}voidSetupQueries(const TSeqLocVector& queries, const CBlastOptions& options, const CBlastQueryInfo& qinfo, BLAST_SequenceBlk** seqblk){ ASSERT(seqblk); ASSERT(queries.size() != 0); EProgram prog = options.GetProgram(); // Determine sequence encoding Uint1 encoding = GetQueryEncoding(prog); int buflen = qinfo->context_offsets[qinfo->last_context+1] + 1; Uint1* buf = (Uint1*) calloc((buflen+1), sizeof(Uint1)); if ( !buf ) { NCBI_THROW(CBlastException, eOutOfMemory, "Query sequence buffer"); } bool is_na = (prog == eBlastn) ? true : false; bool translate = ((prog == eBlastx) || (prog == eTblastx)) ? true : false; unsigned int ctx_index = 0; // index into context_offsets array unsigned int nframes = GetNumberOfFrames(prog); BlastMaskLoc* mask = NULL, *head_mask = NULL, *last_mask = NULL; // Unless the strand option is set to single strand, the actual // CSeq_locs dictacte which strand to examine during the search ENa_strand strand_opt = options.GetStrandOption(); int index = 0; ITERATE(TSeqLocVector, itr, queries) { ENa_strand strand; if ((is_na || translate) && (strand_opt == eNa_strand_unknown || strand_opt == eNa_strand_both)) { strand = sequence::GetStrand(*itr->seqloc, itr->scope); } else { strand = strand_opt; } if (itr->mask) mask = CSeqLoc2BlastMaskLoc(*itr->mask, index); ++index; if (translate) { ASSERT(strand == eNa_strand_both || strand == eNa_strand_plus || strand == eNa_strand_minus); // Get both strands of the original nucleotide sequence with // sentinels pair<AutoPtr<Uint1, CDeleter<Uint1> >, TSeqPos> seqbuf( GetSequence(*itr->seqloc, encoding, itr->scope, strand, eSentinels)); AutoPtr<Uint1, ArrayDeleter<Uint1> > gc = FindGeneticCode(options.GetQueryGeneticCode()); int na_length = sequence::GetLength(*itr->seqloc, itr->scope); Uint1* seqbuf_rev = NULL; // negative strand if (strand == eNa_strand_both) seqbuf_rev = seqbuf.first.get() + na_length + 1; else if (strand == eNa_strand_minus) seqbuf_rev = seqbuf.first.get() + 1; // Populate the sequence buffer for (unsigned int i = 0; i < nframes; i++) { if (BLAST_GetQueryLength(qinfo, i) <= 0) { continue; } int offset = qinfo->context_offsets[ctx_index + i]; short frame = BLAST_ContextToFrame(prog, i); BLAST_GetTranslation(seqbuf.first.get() + 1, seqbuf_rev, na_length, frame, &buf[offset], gc.get()); } // Translate the lower case mask coordinates; BlastMaskLocDNAToProtein(&mask, *itr->seqloc, itr->scope); } else if (is_na) { ASSERT(strand == eNa_strand_both || strand == eNa_strand_plus || strand == eNa_strand_minus); pair<AutoPtr<Uint1, CDeleter<Uint1> >, TSeqPos> seqbuf( GetSequence(*itr->seqloc, encoding, itr->scope, strand, eSentinels)); int idx = (strand == eNa_strand_minus) ? ctx_index + 1 : ctx_index; int offset = qinfo->context_offsets[idx]; memcpy(&buf[offset], seqbuf.first.get(), seqbuf.second);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -