📄 blast_psi_cxx.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_psi_cxx.cpp,v $ * PRODUCTION Revision 1000.0 2004/06/01 18:13:18 gouriano * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.3 * PRODUCTION * =========================================================================== */static char const rcsid[] = "$Id: blast_psi_cxx.cpp,v 1000.0 2004/06/01 18:13:18 gouriano Exp $";/* =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Christiam Camacho * *//** @file blast_psi_cxx.cpp * Implementation of the C++ API for the PSI-BLAST PSSM generation engine. */#include <ncbi_pch.hpp>#include <algo/blast/api/blast_psi.hpp>#include <algo/blast/api/blast_exception.hpp>#include <corelib/ncbi_limits.hpp>// Object includes#include <objects/seqalign/Seq_align.hpp>#include <objects/seqalign/Seq_align_set.hpp>#include <objects/seqalign/Dense_seg.hpp>#include <objects/seqalign/Score.hpp>#include <objects/general/Object_id.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_interval.hpp>// Object manager includes#include <objmgr/scope.hpp>#include <objmgr/seq_vector.hpp>#include <objects/seq/Seq_data.hpp>// Core includes#include <algo/blast/core/blast_options.h>#include <algo/blast/core/blast_encoding.h>#include <serial/serial.hpp>#include <serial/objostr.hpp>#include <sstream>#include "../core/blast_psi_priv.h"BEGIN_NCBI_SCOPEUSING_SCOPE(objects);BEGIN_SCOPE(blast)//////////////////////////////////////////////////////////////////////////////// Static function prototypesstatic double s_GetEvalue(const CScore& score);static Uint1* s_ExtractSequenceFromSeqVector(CSeqVector& sv);static doubles_GetLowestEvalue(const CDense_seg::TScores& scores);// Debugging functionsstatic void s_DBG_printSequence(const Uint1* seq, TSeqPos len, ostream& out, bool show_markers = true, TSeqPos chars_per_line = 80);// End static function prototypes//////////////////////////////////////////////////////////////////////////////CScoreMatrixBuilder::CScoreMatrixBuilder(const Uint1* query, TSeqPos query_sz, CRef<CSeq_align_set> sset, CRef<CScope> scope, const PSIBlastOptions& opts){ if (!query) { NCBI_THROW(CBlastException, eBadParameter, "NULL query"); } if (!sset || sset->Get().front()->GetDim() != 2) { NCBI_THROW(CBlastException, eNotSupported, "Only 2-dimensional alignments are supported"); } m_Scope.Reset(scope); m_SeqAlignSet.Reset(sset); m_Opts = const_cast<PSIBlastOptions&>(opts); m_PssmDimensions.query_sz = query_sz; m_PssmDimensions.num_seqs = sset->Get().size(); m_AlignmentData.reset(PSIAlignmentDataNew(query, &m_PssmDimensions));}voidCScoreMatrixBuilder::Run(){ // this method should just extract information from Seq-align set and // populate alignment data structure and delegate everything else to core // PSSM library SelectSequencesForProcessing(); ExtractAlignmentData(); // @todo Will need a function to populate the score block for psiblast // add a new program type psiblast //PsiMatrix* retval = PSICreatePSSM(m_AlignmentData.get(), //&m_Opts, sbp, NULL); // @todo populate Score-mat ASN.1 structure with retval and sbp contents }// Selects those sequences which have an evalue lower than the inclusion// threshold in PsiInfo structurevoidCScoreMatrixBuilder::x_SelectByEvalue(){ TSeqPos seq_index = 0; m_AlignmentData->use_sequences[seq_index++] = true; // always use query // For each discontinuous Seq-align corresponding to each query-subj pair ITERATE(CSeq_align_set::Tdata, i, m_SeqAlignSet->Get()) { ASSERT((*i)->GetSegs().IsDisc()); // For each HSP of this query-subj pair ITERATE(CSeq_align::C_Segs::TDisc::Tdata, hsp_itr, (*i)->GetSegs().GetDisc().Get()) { // Look for HSP with score less than inclusion_ethresh double e = s_GetLowestEvalue((*hsp_itr)->GetScore()); if (e < m_Opts.inclusion_ethresh) { m_AlignmentData->use_sequences[seq_index] = true; break; } } seq_index++; } ASSERT(seq_index == GetNumAlignedSequences()+1);}voidCScoreMatrixBuilder::x_SelectBySeqId(const vector< CRef<CSeq_id> >& /*ids*/){ NCBI_THROW(CBlastException, eNotSupported, "select by id not implemented");}// Finds sequences in alignment that warrant further processing. The criteria// to determine this includes:// 1) Include those that have evalues that are lower than the inclusion evalue // threshold// 2) Could use some functor object if more criteria are neededvoidCScoreMatrixBuilder::SelectSequencesForProcessing(){ // Reset the use_sequences array sequence vector std::fill_n(m_AlignmentData->use_sequences, GetNumAlignedSequences() + 1, false); x_SelectByEvalue();}voidCScoreMatrixBuilder::ExtractAlignmentData(){ // Query sequence already processed TSeqPos seq_index = 1; ITERATE(list< CRef<CSeq_align> >, itr, m_SeqAlignSet->Get()) { if ( !m_AlignmentData->use_sequences[seq_index] ) { seq_index++; continue; } const list< CRef<CSeq_align> >& hsp_list = (*itr)->GetSegs().GetDisc().Get(); list< CRef<CSeq_align> >::const_iterator hsp; // HSPs with the same query-subject pair for (hsp = hsp_list.begin(); hsp != hsp_list.end(); ++hsp) { // Note: Std-seg can be converted to Denseg, will need conversion // from Dendiag to Denseg too if ( !(*hsp)->GetSegs().IsDenseg() ) { NCBI_THROW(CBlastException, eNotSupported, "Segment type not supported"); } double evalue = s_GetLowestEvalue((*hsp)->GetScore()); // this could be refactored to support other segment types, will // need to pass portion of subj. sequence and evalue x_ProcessDenseg((*hsp)->GetSegs().GetDenseg(), seq_index, evalue); } seq_index++; } ASSERT(seq_index == GetNumAlignedSequences()+1);}/// Iterates over Dense-seg for a given HSP and extracts alignment information /// to PsiAlignmentData structure. voidCScoreMatrixBuilder::x_ProcessDenseg(const CDense_seg& denseg, TSeqPos seq_index, double e_value){ ASSERT(denseg.GetDim() == 2); const Uint1 GAP = AMINOACID_TO_NCBISTDAA[(Uint1)'-']; const vector<TSignedSeqPos>& starts = denseg.GetStarts(); const vector<TSeqPos>& lengths = denseg.GetLens(); TSeqPos query_index = 0; // index into starts vector TSeqPos subj_index = 1; // index into starts vector TSeqPos subj_seq_idx = 0; // index into subject sequence buffer // Get the portion of the subject sequence corresponding to this Dense-seg TSeqPair seq = x_GetSubjectSequence(denseg, *m_Scope); // Iterate over all segments for (int segmt_idx = 0; segmt_idx < denseg.GetNumseg(); segmt_idx++) { TSeqPos query_offset = starts[query_index]; TSeqPos subject_offset = starts[subj_index]; // advance the query and subject indices for next iteration query_index += denseg.GetDim(); subj_index += denseg.GetDim(); if (query_offset == GAP_IN_ALIGNMENT) { // gap in query, just skip residues on subject sequence subj_seq_idx += lengths[segmt_idx]; continue;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -