📄 blast_seqalign.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_seqalign.cpp,v $ * PRODUCTION Revision 1000.6 2004/06/01 18:05:52 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.42 * PRODUCTION * =========================================================================== *//* $Id: blast_seqalign.cpp,v 1000.6 2004/06/01 18:05:52 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Christiam Camacho** ===========================================================================*//// @file blast_seqalign.cpp/// Utility function to convert internal BLAST result structures into/// CSeq_align_set objects.#include <ncbi_pch.hpp>#include "blast_seqalign.hpp"#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqalign/Seq_align.hpp>#include <objects/seqalign/Seq_align_set.hpp>#include <objects/seqalign/Dense_seg.hpp>#include <objects/seqalign/Dense_diag.hpp>#include <objects/seqalign/Std_seg.hpp>#include <objects/seqalign/Score.hpp>#include <objects/general/Object_id.hpp>#include <objmgr/util/sequence.hpp>#include <serial/iterator.hpp>/** @addtogroup AlgoBlast * * @{ */BEGIN_NCBI_SCOPEUSING_SCOPE(objects);BEGIN_SCOPE(blast)#ifndef SMALLEST_EVALUE#define SMALLEST_EVALUE 1.0e-180#endif#ifndef GAP_VALUE#define GAP_VALUE -1#endif// Converts a frame into the appropriate strandstatic ENa_strandx_Frame2Strand(short frame){ if (frame > 0) return eNa_strand_plus; else if (frame < 0) return eNa_strand_minus; else return eNa_strand_unknown;}static int x_GetCurrPos(int& pos, int pos2advance){ int retval; if (pos < 0) retval = -(pos + pos2advance - 1); else retval = pos; pos += pos2advance; return retval;}static TSeqPosx_GetAlignmentStart(int& curr_pos, const GapEditScript* esp, ENa_strand strand, bool translate, int length, int original_length, short frame){ TSeqPos retval; if (strand == eNa_strand_minus) { if (translate) retval = original_length - CODON_LENGTH*(x_GetCurrPos(curr_pos, esp->num) + esp->num) + frame + 1; else retval = length - x_GetCurrPos(curr_pos, esp->num) - esp->num; } else { if (translate) retval = frame - 1 + CODON_LENGTH*x_GetCurrPos(curr_pos, esp->num); else retval = x_GetCurrPos(curr_pos, esp->num); } return retval;}/// C++ version of GXECollectDataForSeqalign/// Note that even though the edit_block is passed in, data for seqalign is/// collected from the esp argument for nsegs segmentsstatic voidx_CollectSeqAlignData(const GapEditBlock* edit_block, const GapEditScript* esp_head, unsigned int nsegs, vector<TSignedSeqPos>& starts, vector<TSeqPos>& lengths, vector<ENa_strand>& strands){ ASSERT(edit_block != NULL); GapEditScript* esp = const_cast<GapEditScript*>(esp_head); ENa_strand m_strand, s_strand; // strands of alignment TSignedSeqPos m_start, s_start; // running starts of alignment int start1 = edit_block->start1; // start of alignment on master sequence int start2 = edit_block->start2; // start of alignment on slave sequence m_strand = x_Frame2Strand(edit_block->frame1); s_strand = x_Frame2Strand(edit_block->frame2); for (unsigned int i = 0; esp && i < nsegs; esp = esp->next, i++) { switch (esp->op_type) { case eGapAlignDecline: case eGapAlignSub: m_start = x_GetAlignmentStart(start1, esp, m_strand, edit_block->translate1 != 0, edit_block->length1, edit_block->original_length1, edit_block->frame1); s_start = x_GetAlignmentStart(start2, esp, s_strand, edit_block->translate2 != 0, edit_block->length2, edit_block->original_length2, edit_block->frame2); if (edit_block->reverse) { strands.push_back(s_strand); strands.push_back(m_strand); starts.push_back(s_start); starts.push_back(m_start); } else { strands.push_back(m_strand); strands.push_back(s_strand); starts.push_back(m_start); starts.push_back(s_start); } break; // Insertion on the master sequence (gap on slave) case eGapAlignIns: m_start = x_GetAlignmentStart(start1, esp, m_strand, edit_block->translate1 != 0, edit_block->length1, edit_block->original_length1, edit_block->frame1); s_start = GAP_VALUE; if (edit_block->reverse) { strands.push_back(i == 0 ? eNa_strand_unknown : s_strand); strands.push_back(m_strand); starts.push_back(s_start); starts.push_back(m_start); } else { strands.push_back(m_strand); strands.push_back(i == 0 ? eNa_strand_unknown : s_strand); starts.push_back(m_start); starts.push_back(s_start); } break; // Deletion on master sequence (gap; insertion on slave) case eGapAlignDel: m_start = GAP_VALUE; s_start = x_GetAlignmentStart(start2, esp, s_strand, edit_block->translate2 != 0, edit_block->length2, edit_block->original_length2, edit_block->frame2); if (edit_block->reverse) { strands.push_back(s_strand); strands.push_back(i == 0 ? eNa_strand_unknown : m_strand); starts.push_back(s_start); starts.push_back(m_start); } else { strands.push_back(i == 0 ? eNa_strand_unknown : m_strand); strands.push_back(s_strand); starts.push_back(m_start); starts.push_back(s_start); } break; default: break; } lengths.push_back(esp->num); } // Make sure the vectors have the right size if (lengths.size() != nsegs) lengths.resize(nsegs); if (starts.size() != nsegs*2) starts.resize(nsegs*2); if (strands.size() != nsegs*2) strands.resize(nsegs*2);}static CRef<CDense_seg>x_CreateDenseg(const CSeq_id* master, const CSeq_id* slave, vector<TSignedSeqPos>& starts, vector<TSeqPos>& lengths, vector<ENa_strand>& strands){ ASSERT(master); ASSERT(slave); CRef<CDense_seg> dense_seg(new CDense_seg()); // Pairwise alignment is 2 dimensional dense_seg->SetDim(2); // Set the sequence ids CDense_seg::TIds& ids = dense_seg->SetIds(); CRef<CSeq_id> tmp(new CSeq_id(master->AsFastaString())); ids.push_back(tmp); tmp.Reset(new CSeq_id(slave->AsFastaString())); ids.push_back(tmp); ids.resize(dense_seg->GetDim()); dense_seg->SetLens() = lengths; dense_seg->SetStrands() = strands; dense_seg->SetStarts() = starts; dense_seg->SetNumseg(lengths.size()); return dense_seg;}static CSeq_align::C_Segs::TStdx_CreateStdSegs(const CSeq_id* master, const CSeq_id* slave, vector<TSignedSeqPos>& starts, vector<TSeqPos>& lengths, vector<ENa_strand>& strands, bool reverse, bool translate_master, bool translate_slave){ ASSERT(master); ASSERT(slave); CSeq_align::C_Segs::TStd retval; int nsegs = lengths.size(); // number of segments in alignment TSignedSeqPos m_start, m_stop; // start and stop for master sequence TSignedSeqPos s_start, s_stop; // start and stop for slave sequence CRef<CSeq_id> master_id(new CSeq_id(master->AsFastaString())); CRef<CSeq_id> slave_id(new CSeq_id(slave->AsFastaString())); for (int i = 0; i < nsegs; i++) { CRef<CStd_seg> std_seg(new CStd_seg()); CRef<CSeq_loc> master_loc(new CSeq_loc()); CRef<CSeq_loc> slave_loc(new CSeq_loc()); // Pairwise alignment is 2 dimensional std_seg->SetDim(2); // Set master seqloc if ( (m_start = starts[2*i]) != GAP_VALUE) { master_loc->SetInt().SetId(*master_id); master_loc->SetInt().SetFrom(m_start); if (translate_master || (reverse && translate_slave)) m_stop = m_start + CODON_LENGTH*lengths[i] - 1; else m_stop = m_start + lengths[i] - 1; master_loc->SetInt().SetTo(m_stop); master_loc->SetInt().SetStrand(strands[2*i]); } else { master_loc->SetEmpty(*master_id); } // Set slave seqloc if ( (s_start = starts[2*i+1]) != GAP_VALUE) { slave_loc->SetInt().SetId(*slave_id); slave_loc->SetInt().SetFrom(s_start); if (translate_slave || (reverse && translate_master)) s_stop = s_start + CODON_LENGTH*lengths[i] - 1; else s_stop = s_start + lengths[i] - 1; slave_loc->SetInt().SetTo(s_stop); slave_loc->SetInt().SetStrand(strands[2*i+1]); } else { slave_loc->SetEmpty(*slave_id); } if (reverse) { std_seg->SetIds().push_back(slave_id); std_seg->SetIds().push_back(master_id); std_seg->SetLoc().push_back(slave_loc); std_seg->SetLoc().push_back(master_loc); } else { std_seg->SetIds().push_back(master_id); std_seg->SetIds().push_back(slave_id); std_seg->SetLoc().push_back(master_loc); std_seg->SetLoc().push_back(slave_loc); } retval.push_back(std_seg); } return retval;}/// Clone of GXECorrectUASequence (tools/gapxdrop.c)/// Assumes eGapAlignDecline regions are to the right of eGapAlign{Ins,Del}./// This function swaps them (eGapAlignDecline ends to the right of the gap)static void x_CorrectUASequence(GapEditBlock* edit_block){ GapEditScript* curr = NULL,* curr_last = NULL; GapEditScript* indel_prev = NULL; // pointer to node before the last // insertion or deletion followed immediately by GAPALIGN_DECLINE bool last_indel = false; // last operation was an insertion or deletion for (curr = edit_block->esp; curr; curr = curr->next) { // if GAPALIGN_DECLINE immediately follows an insertion or deletion if (curr->op_type == eGapAlignDecline && last_indel) { /* This is invalid condition and regions should be exchanged */ if (indel_prev != NULL) indel_prev->next = curr; else edit_block->esp = curr; /* First element in a list */ // CC: If flaking gaps are allowed, curr_last could be NULL and the // following statement would core dump... curr_last->next = curr->next; curr->next = curr_last; } last_indel = false; if (curr->op_type == eGapAlignIns || curr->op_type == eGapAlignDel) { last_indel = true; indel_prev = curr_last; } curr_last = curr; } return;}/// C++ version of GXEMakeSeqAlign (tools/gapxdrop.c)/// Creates a Seq-align for a single HSPstatic CRef<CSeq_align>x_CreateSeqAlign(const CSeq_id* master, const CSeq_id* slave, vector<TSignedSeqPos> starts, vector<TSeqPos> lengths, vector<ENa_strand> strands, bool translate_master, bool translate_slave, bool reverse){ CRef<CSeq_align> sar(new CSeq_align()); sar->SetType(CSeq_align::eType_partial); sar->SetDim(2); // BLAST only creates pairwise alignments if (translate_master || translate_slave) { sar->SetSegs().SetStd() = x_CreateStdSegs(master, slave, starts, lengths, strands, reverse, translate_master, translate_slave);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -