⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 blast_seqalign.cpp

📁 ncbi源码
💻 CPP
📖 第 1 页 / 共 4 页
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_seqalign.cpp,v $ * PRODUCTION Revision 1000.6  2004/06/01 18:05:52  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.42 * PRODUCTION * =========================================================================== *//*  $Id: blast_seqalign.cpp,v 1000.6 2004/06/01 18:05:52 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Christiam Camacho** ===========================================================================*//// @file blast_seqalign.cpp/// Utility function to convert internal BLAST result structures into/// CSeq_align_set objects.#include <ncbi_pch.hpp>#include "blast_seqalign.hpp"#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqalign/Seq_align.hpp>#include <objects/seqalign/Seq_align_set.hpp>#include <objects/seqalign/Dense_seg.hpp>#include <objects/seqalign/Dense_diag.hpp>#include <objects/seqalign/Std_seg.hpp>#include <objects/seqalign/Score.hpp>#include <objects/general/Object_id.hpp>#include <objmgr/util/sequence.hpp>#include <serial/iterator.hpp>/** @addtogroup AlgoBlast * * @{ */BEGIN_NCBI_SCOPEUSING_SCOPE(objects);BEGIN_SCOPE(blast)#ifndef SMALLEST_EVALUE#define SMALLEST_EVALUE 1.0e-180#endif#ifndef GAP_VALUE#define GAP_VALUE -1#endif// Converts a frame into the appropriate strandstatic ENa_strandx_Frame2Strand(short frame){    if (frame > 0)        return eNa_strand_plus;    else if (frame < 0)        return eNa_strand_minus;    else        return eNa_strand_unknown;}static int x_GetCurrPos(int& pos, int pos2advance){    int retval;    if (pos < 0)        retval = -(pos + pos2advance - 1);    else        retval = pos;    pos += pos2advance;    return retval;}static TSeqPosx_GetAlignmentStart(int& curr_pos, const GapEditScript* esp,         ENa_strand strand, bool translate, int length, int original_length,         short frame){    TSeqPos retval;    if (strand == eNa_strand_minus) {        if (translate)            retval = original_length -                 CODON_LENGTH*(x_GetCurrPos(curr_pos, esp->num) + esp->num)                + frame + 1;        else            retval = length - x_GetCurrPos(curr_pos, esp->num) - esp->num;    } else {        if (translate)            retval = frame - 1 + CODON_LENGTH*x_GetCurrPos(curr_pos, esp->num);        else            retval = x_GetCurrPos(curr_pos, esp->num);    }    return retval;}/// C++ version of GXECollectDataForSeqalign/// Note that even though the edit_block is passed in, data for seqalign is/// collected from the esp argument for nsegs segmentsstatic voidx_CollectSeqAlignData(const GapEditBlock* edit_block,         const GapEditScript* esp_head, unsigned int nsegs,        vector<TSignedSeqPos>& starts, vector<TSeqPos>& lengths,         vector<ENa_strand>& strands){    ASSERT(edit_block != NULL);    GapEditScript* esp = const_cast<GapEditScript*>(esp_head);    ENa_strand m_strand, s_strand;      // strands of alignment    TSignedSeqPos m_start, s_start;     // running starts of alignment    int start1 = edit_block->start1;    // start of alignment on master sequence    int start2 = edit_block->start2;    // start of alignment on slave sequence    m_strand = x_Frame2Strand(edit_block->frame1);    s_strand = x_Frame2Strand(edit_block->frame2);    for (unsigned int i = 0; esp && i < nsegs; esp = esp->next, i++) {        switch (esp->op_type) {        case eGapAlignDecline:        case eGapAlignSub:            m_start = x_GetAlignmentStart(start1, esp, m_strand,                    edit_block->translate1 != 0, edit_block->length1,                    edit_block->original_length1, edit_block->frame1);            s_start = x_GetAlignmentStart(start2, esp, s_strand,                    edit_block->translate2 != 0, edit_block->length2,                    edit_block->original_length2, edit_block->frame2);            if (edit_block->reverse) {                strands.push_back(s_strand);                strands.push_back(m_strand);                starts.push_back(s_start);                starts.push_back(m_start);            } else {                strands.push_back(m_strand);                strands.push_back(s_strand);                starts.push_back(m_start);                starts.push_back(s_start);            }            break;        // Insertion on the master sequence (gap on slave)        case eGapAlignIns:            m_start = x_GetAlignmentStart(start1, esp, m_strand,                    edit_block->translate1 != 0, edit_block->length1,                    edit_block->original_length1, edit_block->frame1);            s_start = GAP_VALUE;            if (edit_block->reverse) {                strands.push_back(i == 0 ? eNa_strand_unknown : s_strand);                strands.push_back(m_strand);                starts.push_back(s_start);                starts.push_back(m_start);            } else {                strands.push_back(m_strand);                strands.push_back(i == 0 ? eNa_strand_unknown : s_strand);                starts.push_back(m_start);                starts.push_back(s_start);            }            break;        // Deletion on master sequence (gap; insertion on slave)        case eGapAlignDel:            m_start = GAP_VALUE;            s_start = x_GetAlignmentStart(start2, esp, s_strand,                    edit_block->translate2 != 0, edit_block->length2,                    edit_block->original_length2, edit_block->frame2);            if (edit_block->reverse) {                strands.push_back(s_strand);                strands.push_back(i == 0 ? eNa_strand_unknown : m_strand);                starts.push_back(s_start);                starts.push_back(m_start);            } else {                strands.push_back(i == 0 ? eNa_strand_unknown : m_strand);                strands.push_back(s_strand);                starts.push_back(m_start);                starts.push_back(s_start);            }            break;        default:            break;        }        lengths.push_back(esp->num);    }    // Make sure the vectors have the right size    if (lengths.size() != nsegs)        lengths.resize(nsegs);    if (starts.size() != nsegs*2)        starts.resize(nsegs*2);    if (strands.size() != nsegs*2)        strands.resize(nsegs*2);}static CRef<CDense_seg>x_CreateDenseg(const CSeq_id* master, const CSeq_id* slave,        vector<TSignedSeqPos>& starts, vector<TSeqPos>& lengths,         vector<ENa_strand>& strands){    ASSERT(master);    ASSERT(slave);    CRef<CDense_seg> dense_seg(new CDense_seg());    // Pairwise alignment is 2 dimensional    dense_seg->SetDim(2);    // Set the sequence ids    CDense_seg::TIds& ids = dense_seg->SetIds();    CRef<CSeq_id> tmp(new CSeq_id(master->AsFastaString()));    ids.push_back(tmp);    tmp.Reset(new CSeq_id(slave->AsFastaString()));    ids.push_back(tmp);    ids.resize(dense_seg->GetDim());    dense_seg->SetLens() = lengths;    dense_seg->SetStrands() = strands;    dense_seg->SetStarts() = starts;    dense_seg->SetNumseg(lengths.size());    return dense_seg;}static CSeq_align::C_Segs::TStdx_CreateStdSegs(const CSeq_id* master, const CSeq_id* slave,         vector<TSignedSeqPos>& starts, vector<TSeqPos>& lengths,         vector<ENa_strand>& strands, bool reverse, bool translate_master, bool        translate_slave){    ASSERT(master);    ASSERT(slave);    CSeq_align::C_Segs::TStd retval;    int nsegs = lengths.size();         // number of segments in alignment    TSignedSeqPos m_start, m_stop;      // start and stop for master sequence    TSignedSeqPos s_start, s_stop;      // start and stop for slave sequence    CRef<CSeq_id> master_id(new CSeq_id(master->AsFastaString()));    CRef<CSeq_id> slave_id(new CSeq_id(slave->AsFastaString()));        for (int i = 0; i < nsegs; i++) {        CRef<CStd_seg> std_seg(new CStd_seg());        CRef<CSeq_loc> master_loc(new CSeq_loc());        CRef<CSeq_loc> slave_loc(new CSeq_loc());        // Pairwise alignment is 2 dimensional        std_seg->SetDim(2);        // Set master seqloc        if ( (m_start = starts[2*i]) != GAP_VALUE) {            master_loc->SetInt().SetId(*master_id);            master_loc->SetInt().SetFrom(m_start);            if (translate_master || (reverse && translate_slave))                m_stop = m_start + CODON_LENGTH*lengths[i] - 1;            else                m_stop = m_start + lengths[i] - 1;            master_loc->SetInt().SetTo(m_stop);            master_loc->SetInt().SetStrand(strands[2*i]);        } else {            master_loc->SetEmpty(*master_id);        }        // Set slave seqloc        if ( (s_start = starts[2*i+1]) != GAP_VALUE) {            slave_loc->SetInt().SetId(*slave_id);            slave_loc->SetInt().SetFrom(s_start);            if (translate_slave || (reverse && translate_master))                s_stop = s_start + CODON_LENGTH*lengths[i] - 1;            else                s_stop = s_start + lengths[i] - 1;            slave_loc->SetInt().SetTo(s_stop);            slave_loc->SetInt().SetStrand(strands[2*i+1]);        } else {            slave_loc->SetEmpty(*slave_id);        }        if (reverse) {            std_seg->SetIds().push_back(slave_id);            std_seg->SetIds().push_back(master_id);            std_seg->SetLoc().push_back(slave_loc);            std_seg->SetLoc().push_back(master_loc);        } else {            std_seg->SetIds().push_back(master_id);            std_seg->SetIds().push_back(slave_id);            std_seg->SetLoc().push_back(master_loc);            std_seg->SetLoc().push_back(slave_loc);        }        retval.push_back(std_seg);    }    return retval;}/// Clone of GXECorrectUASequence (tools/gapxdrop.c)/// Assumes eGapAlignDecline regions are to the right of eGapAlign{Ins,Del}./// This function swaps them (eGapAlignDecline ends to the right of the gap)static void x_CorrectUASequence(GapEditBlock* edit_block){    GapEditScript* curr = NULL,* curr_last = NULL;    GapEditScript* indel_prev = NULL; // pointer to node before the last            // insertion or deletion followed immediately by GAPALIGN_DECLINE    bool last_indel = false;    // last operation was an insertion or deletion    for (curr = edit_block->esp; curr; curr = curr->next) {        // if GAPALIGN_DECLINE immediately follows an insertion or deletion        if (curr->op_type == eGapAlignDecline && last_indel) {            /* This is invalid condition and regions should be               exchanged */            if (indel_prev != NULL)                indel_prev->next = curr;            else                edit_block->esp = curr; /* First element in a list */            // CC: If flaking gaps are allowed, curr_last could be NULL and the            // following statement would core dump...            curr_last->next = curr->next;            curr->next = curr_last;        }        last_indel = false;        if (curr->op_type == eGapAlignIns || curr->op_type == eGapAlignDel) {            last_indel = true;            indel_prev = curr_last;        }        curr_last = curr;    }    return;}/// C++ version of GXEMakeSeqAlign (tools/gapxdrop.c)/// Creates a Seq-align for a single HSPstatic CRef<CSeq_align>x_CreateSeqAlign(const CSeq_id* master, const CSeq_id* slave,    vector<TSignedSeqPos> starts, vector<TSeqPos> lengths,    vector<ENa_strand> strands, bool translate_master, bool translate_slave,    bool reverse){    CRef<CSeq_align> sar(new CSeq_align());    sar->SetType(CSeq_align::eType_partial);    sar->SetDim(2);         // BLAST only creates pairwise alignments    if (translate_master || translate_slave) {        sar->SetSegs().SetStd() =            x_CreateStdSegs(master, slave, starts, lengths, strands,                    reverse, translate_master, translate_slave);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -