📄 nw_formatter.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: nw_formatter.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 18:04:54 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.10 * PRODUCTION * =========================================================================== *//* $Id: nw_formatter.cpp,v 1000.2 2004/06/01 18:04:54 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Yuri Kapustin * * =========================================================================== * */#include <ncbi_pch.hpp>#include <algo/align/nw_formatter.hpp>#include <algo/align/nw_aligner.hpp>#include <algo/align/align_exception.hpp>#include <objects/seqalign/Score.hpp>#include <objects/general/Object_id.hpp>#include <objects/seqalign/Dense_seg.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/seqalign/Seq_align.hpp>#include <serial/objostrasn.hpp>#include <serial/serial.hpp>#include <iterator>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);CNWFormatter::CNWFormatter (const CNWAligner& aligner): m_aligner(&aligner){}void CNWFormatter::AsSeqAlign(CSeq_align* seqalign) const{ if(seqalign == 0) { NCBI_THROW(CAlgoAlignException, eBadParameter, "Invalid address specified"); } const vector<CNWAligner::ETranscriptSymbol>& transcript = *(m_aligner->GetTranscript()); if(transcript.size() == 0) { NCBI_THROW(CAlgoAlignException, eNoData, "Zero size transcript (forgot to run the aligner?)"); } seqalign->Reset(); // the alignment is pairwise seqalign->SetDim(2); // this is a global alignment seqalign->SetType(CSeq_align::eType_global); // seq-ids CRef< CSeq_id > id1 ( new CSeq_id ); CRef< CObject_id > local_oid1 (new CObject_id); local_oid1->SetStr(m_Seq1Id); id1->SetLocal(*local_oid1); CRef< CSeq_id > id2 ( new CSeq_id ); CRef< CObject_id > local_oid2 (new CObject_id); local_oid2->SetStr(m_Seq2Id); id2->SetLocal(*local_oid2); // the score was calculated during the main process CRef< CScore > score (new CScore); CRef< CObject_id > id (new CObject_id); id->SetStr("score"); score->SetId(*id); CRef< CScore::C_Value > val (new CScore::C_Value); val->SetInt(m_aligner->GetScore()); score->SetValue(*val); CSeq_align::TScore& scorelist = seqalign->SetScore(); scorelist.push_back(score); // create segments and add them to this seq-align CRef< CSeq_align::C_Segs > segs (new CSeq_align::C_Segs); CDense_seg& ds = segs->SetDenseg(); ds.SetDim(2); CDense_seg::TIds& ids = ds.SetIds(); ids.push_back( id1 ); ids.push_back( id2 ); CDense_seg::TStarts& starts = ds.SetStarts(); CDense_seg::TLens& lens = ds.SetLens(); CDense_seg::TStrands& strands = ds.SetStrands(); // iterate through transcript size_t seg_count = 0; {{ const char * const S1 = m_aligner->GetSeq1(); const char * const S2 = m_aligner->GetSeq2(); const char *seq1 = S1, *seq2 = S2; const char *start1 = seq1, *start2 = seq2; vector<CNWAligner::ETranscriptSymbol>::const_reverse_iterator ib = transcript.rbegin(), ie = transcript.rend(), ii; CNWAligner::ETranscriptSymbol ts = *ib; bool intron = (ts == CNWAligner::eTS_Intron); char seg_type0 = ((ts == CNWAligner::eTS_Insert || intron )? 1: (ts == CNWAligner::eTS_Delete)? 2: 0); size_t seg_len = 0; for (ii = ib; ii != ie; ++ii) { ts = *ii; intron = (ts == CNWAligner::eTS_Intron); char seg_type = ((ts == CNWAligner::eTS_Insert || intron )? 1: (ts == CNWAligner::eTS_Delete)? 2: 0); if(seg_type0 != seg_type) { starts.push_back( (seg_type0 == 1)? -1: start1 - S1 ); starts.push_back( (seg_type0 == 2)? -1: start2 - S2 ); lens.push_back(seg_len); strands.push_back(eNa_strand_plus); strands.push_back(eNa_strand_plus); ++seg_count; start1 = seq1; start2 = seq2; seg_type0 = seg_type; seg_len = 1; } else { ++seg_len; } if(seg_type != 1) ++seq1; if(seg_type != 2) ++seq2; } // the last one starts.push_back( (seg_type0 == 1)? -1: start1 - S1 ); starts.push_back( (seg_type0 == 2)? -1: start2 - S2 ); lens.push_back(seg_len); strands.push_back(eNa_strand_plus); strands.push_back(eNa_strand_plus); ++seg_count; }} ds.SetNumseg(seg_count); ds.SetIds(); seqalign->SetSegs(*segs);}void CNWFormatter::AsText(string* output, ETextFormatType type, size_t line_width) const{ CNcbiOstrstream ss; const vector<CNWAligner::ETranscriptSymbol>& transcript = *(m_aligner->GetTranscript()); if(transcript.size() == 0) { NCBI_THROW(CAlgoAlignException, eNoData, "Zero size transcript (forgot to run the aligner?)"); } switch (type) { case eFormatType1: { if(m_Seq1Id.size() && m_Seq2Id.size()) { ss << '>' << m_Seq1Id << '\t' << m_Seq2Id << endl; } vector<char> v1, v2; size_t aln_size = x_ApplyTranscript(&v1, &v2); unsigned i1 = 0, i2 = 0; for (size_t i = 0; i < aln_size; ) { ss << i << '\t' << i1 << ':' << i2 << endl; int i0 = i; for (size_t jPos = 0; i < aln_size && jPos < line_width; ++i, ++jPos) { char c = v1[i0 + jPos]; ss << c; if(c != '-' && c != '+') i1++; } ss << endl; string marker_line(line_width, ' '); i = i0; for (size_t jPos = 0; i < aln_size && jPos < line_width; ++i, ++jPos) { char c1 = v1[i0 + jPos]; char c = v2[i0 + jPos]; ss << c; if(c != '-' && c != '+') i2++; if(c != c1 && c != '-' && c1 != '-' && c1 != '+') marker_line[jPos] = '^'; } ss << endl << marker_line << endl; } } break; case eFormatType2: { if(m_Seq1Id.size() && m_Seq2Id.size()) { ss << '>' << m_Seq1Id << '\t' << m_Seq2Id << endl; } vector<char> v1, v2; size_t aln_size = x_ApplyTranscript(&v1, &v2); unsigned i1 = 0, i2 = 0; for (size_t i = 0; i < aln_size; ) { ss << i << '\t' << i1 << ':' << i2 << endl; int i0 = i; for (size_t jPos = 0; i < aln_size && jPos < line_width; ++i, ++jPos) { char c = v1[i0 + jPos]; ss << c; if(c != '-' && c != '+') i1++; } ss << endl; string line2 (line_width, ' '); string line3 (line_width, ' '); i = i0; for (size_t jPos = 0; i < aln_size && jPos < line_width; ++i, ++jPos) { char c1 = v1[i0 + jPos]; char c2 = v2[i0 + jPos]; if(c2 != '-' && c2 != '+') i2++; if(c2 == c1) line2[jPos] = '|';
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -