⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 blast_psi_cxx.cpp

📁 ncbi源码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_psi_cxx.cpp,v $ * PRODUCTION Revision 1000.0  2004/06/01 18:13:18  gouriano * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.3 * PRODUCTION * =========================================================================== */static char const rcsid[] =    "$Id: blast_psi_cxx.cpp,v 1000.0 2004/06/01 18:13:18 gouriano Exp $";/* =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Author:  Christiam Camacho * *//** @file blast_psi_cxx.cpp * Implementation of the C++ API for the PSI-BLAST PSSM generation engine. */#include <ncbi_pch.hpp>#include <algo/blast/api/blast_psi.hpp>#include <algo/blast/api/blast_exception.hpp>#include <corelib/ncbi_limits.hpp>// Object includes#include <objects/seqalign/Seq_align.hpp>#include <objects/seqalign/Seq_align_set.hpp>#include <objects/seqalign/Dense_seg.hpp>#include <objects/seqalign/Score.hpp>#include <objects/general/Object_id.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_interval.hpp>// Object manager includes#include <objmgr/scope.hpp>#include <objmgr/seq_vector.hpp>#include <objects/seq/Seq_data.hpp>// Core includes#include <algo/blast/core/blast_options.h>#include <algo/blast/core/blast_encoding.h>#include <serial/serial.hpp>#include <serial/objostr.hpp>#include <sstream>#include "../core/blast_psi_priv.h"BEGIN_NCBI_SCOPEUSING_SCOPE(objects);BEGIN_SCOPE(blast)//////////////////////////////////////////////////////////////////////////////// Static function prototypesstatic double s_GetEvalue(const CScore& score);static Uint1* s_ExtractSequenceFromSeqVector(CSeqVector& sv);static doubles_GetLowestEvalue(const CDense_seg::TScores& scores);// Debugging functionsstatic void s_DBG_printSequence(const Uint1* seq, TSeqPos len, ostream& out,                     bool show_markers = true, TSeqPos chars_per_line = 80);// End static function prototypes//////////////////////////////////////////////////////////////////////////////CScoreMatrixBuilder::CScoreMatrixBuilder(const Uint1* query,                                         TSeqPos query_sz,                                         CRef<CSeq_align_set> sset,                                         CRef<CScope> scope,                                         const PSIBlastOptions& opts){    if (!query) {        NCBI_THROW(CBlastException, eBadParameter, "NULL query");    }    if (!sset || sset->Get().front()->GetDim() != 2) {        NCBI_THROW(CBlastException, eNotSupported,                    "Only 2-dimensional alignments are supported");    }    m_Scope.Reset(scope);    m_SeqAlignSet.Reset(sset);    m_Opts = const_cast<PSIBlastOptions&>(opts);    m_PssmDimensions.query_sz = query_sz;    m_PssmDimensions.num_seqs = sset->Get().size();    m_AlignmentData.reset(PSIAlignmentDataNew(query, &m_PssmDimensions));}voidCScoreMatrixBuilder::Run(){    // this method should just extract information from Seq-align set and    // populate alignment data structure and delegate everything else to core    // PSSM library    SelectSequencesForProcessing();    ExtractAlignmentData();    // @todo Will need a function to populate the score block for psiblast    // add a new program type psiblast    //PsiMatrix* retval = PSICreatePSSM(m_AlignmentData.get(),                                      //&m_Opts, sbp, NULL);    // @todo populate Score-mat ASN.1 structure with retval and sbp contents    }// Selects those sequences which have an evalue lower than the inclusion// threshold in PsiInfo structurevoidCScoreMatrixBuilder::x_SelectByEvalue(){    TSeqPos seq_index = 0;    m_AlignmentData->use_sequences[seq_index++] = true; // always use query    // For each discontinuous Seq-align corresponding to each query-subj pair    ITERATE(CSeq_align_set::Tdata, i, m_SeqAlignSet->Get()) {        ASSERT((*i)->GetSegs().IsDisc());        // For each HSP of this query-subj pair        ITERATE(CSeq_align::C_Segs::TDisc::Tdata, hsp_itr,                (*i)->GetSegs().GetDisc().Get()) {                        // Look for HSP with score less than inclusion_ethresh            double e = s_GetLowestEvalue((*hsp_itr)->GetScore());            if (e < m_Opts.inclusion_ethresh) {                m_AlignmentData->use_sequences[seq_index] = true;                break;            }        }        seq_index++;    }    ASSERT(seq_index == GetNumAlignedSequences()+1);}voidCScoreMatrixBuilder::x_SelectBySeqId(const vector< CRef<CSeq_id> >& /*ids*/){    NCBI_THROW(CBlastException, eNotSupported, "select by id not implemented");}// Finds sequences in alignment that warrant further processing. The criteria// to determine this includes:// 1) Include those that have evalues that are lower than the inclusion evalue //    threshold// 2) Could use some functor object if more criteria are neededvoidCScoreMatrixBuilder::SelectSequencesForProcessing(){    // Reset the use_sequences array sequence vector    std::fill_n(m_AlignmentData->use_sequences, GetNumAlignedSequences() + 1,                false);    x_SelectByEvalue();}voidCScoreMatrixBuilder::ExtractAlignmentData(){    // Query sequence already processed    TSeqPos seq_index = 1;      ITERATE(list< CRef<CSeq_align> >, itr, m_SeqAlignSet->Get()) {        if ( !m_AlignmentData->use_sequences[seq_index] ) {            seq_index++;            continue;        }        const list< CRef<CSeq_align> >& hsp_list =             (*itr)->GetSegs().GetDisc().Get();        list< CRef<CSeq_align> >::const_iterator hsp;        // HSPs with the same query-subject pair        for (hsp = hsp_list.begin(); hsp != hsp_list.end(); ++hsp) {            // Note: Std-seg can be converted to Denseg, will need conversion            // from Dendiag to Denseg too            if ( !(*hsp)->GetSegs().IsDenseg() ) {                NCBI_THROW(CBlastException, eNotSupported,                            "Segment type not supported");            }            double evalue = s_GetLowestEvalue((*hsp)->GetScore());            // this could be refactored to support other segment types, will            // need to pass portion of subj. sequence and evalue            x_ProcessDenseg((*hsp)->GetSegs().GetDenseg(), seq_index, evalue);        }        seq_index++;    }    ASSERT(seq_index == GetNumAlignedSequences()+1);}/// Iterates over Dense-seg for a given HSP and extracts alignment information /// to PsiAlignmentData structure. voidCScoreMatrixBuilder::x_ProcessDenseg(const CDense_seg& denseg,                                      TSeqPos seq_index, double e_value){    ASSERT(denseg.GetDim() == 2);    const Uint1 GAP = AMINOACID_TO_NCBISTDAA[(Uint1)'-'];    const vector<TSignedSeqPos>& starts = denseg.GetStarts();    const vector<TSeqPos>& lengths = denseg.GetLens();    TSeqPos query_index = 0;        // index into starts vector    TSeqPos subj_index = 1;         // index into starts vector    TSeqPos subj_seq_idx = 0;       // index into subject sequence buffer    // Get the portion of the subject sequence corresponding to this Dense-seg    TSeqPair seq = x_GetSubjectSequence(denseg, *m_Scope);    // Iterate over all segments    for (int segmt_idx = 0; segmt_idx < denseg.GetNumseg(); segmt_idx++) {        TSeqPos query_offset = starts[query_index];        TSeqPos subject_offset = starts[subj_index];        // advance the query and subject indices for next iteration        query_index += denseg.GetDim();        subj_index += denseg.GetDim();        if (query_offset == GAP_IN_ALIGNMENT) {            // gap in query, just skip residues on subject sequence            subj_seq_idx += lengths[segmt_idx];            continue;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -