首页 › 资源下载 › 生物技术 › ncbi源码 › 源码查看
nw_formatter.cpp

来自「ncbi源码」· C++ 代码 · 共 528 行 · 第 1/2 页
CPP
528 行
/* * =========================================================================== * PRODUCTION $Log: nw_formatter.cpp,v $ * PRODUCTION Revision 1000.2  2004/06/01 18:04:54  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.10 * PRODUCTION * =========================================================================== *//* $Id: nw_formatter.cpp,v 1000.2 2004/06/01 18:04:54 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE                           *               National Center for Biotechnology Information *                                                                           *  This software/database is a "United States Government Work" under the    *  terms of the United States Copyright Act.  It was written as part of     *  the author's official duties as a United States Government employee and  *  thus cannot be copyrighted.  This software/database is freely available  *  to the public for use. The National Library of Medicine and the U.S.     *  Government have not placed any restriction on its use or reproduction.   *                                                                           *  Although all reasonable efforts have been taken to ensure the accuracy   *  and reliability of the software and data, the NLM and the U.S.            *  Government do not and cannot warrant the performance or results that     *  may be obtained by using this software or data. The NLM and the U.S.     *  Government disclaim all warranties, express or implied, including        *  warranties of performance, merchantability or fitness for any particular *  purpose.                                                                 *                                                                           *  Please cite the author in any work or product based on this material.    * * =========================================================================== * * Author:  Yuri Kapustin * * =========================================================================== * */#include <ncbi_pch.hpp>#include <algo/align/nw_formatter.hpp>#include <algo/align/nw_aligner.hpp>#include <algo/align/align_exception.hpp>#include <objects/seqalign/Score.hpp>#include <objects/general/Object_id.hpp>#include <objects/seqalign/Dense_seg.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/seqalign/Seq_align.hpp>#include <serial/objostrasn.hpp>#include <serial/serial.hpp>#include <iterator>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);CNWFormatter::CNWFormatter (const CNWAligner& aligner):    m_aligner(&aligner){}void CNWFormatter::AsSeqAlign(CSeq_align* seqalign) const{    if(seqalign == 0) {        NCBI_THROW(CAlgoAlignException,                   eBadParameter,                   "Invalid address specified");    }        const vector<CNWAligner::ETranscriptSymbol>& transcript =         *(m_aligner->GetTranscript());    if(transcript.size() == 0) {        NCBI_THROW(CAlgoAlignException,                   eNoData,                   "Zero size transcript (forgot to run the aligner?)");    }    seqalign->Reset();    // the alignment is pairwise    seqalign->SetDim(2);    // this is a global alignment    seqalign->SetType(CSeq_align::eType_global);    // seq-ids    CRef< CSeq_id > id1 ( new CSeq_id );    CRef< CObject_id > local_oid1 (new CObject_id);    local_oid1->SetStr(m_Seq1Id);    id1->SetLocal(*local_oid1);    CRef< CSeq_id > id2 ( new CSeq_id );    CRef< CObject_id > local_oid2 (new CObject_id);    local_oid2->SetStr(m_Seq2Id);    id2->SetLocal(*local_oid2);        // the score was calculated during the main process    CRef< CScore > score (new CScore);    CRef< CObject_id > id (new CObject_id);    id->SetStr("score");    score->SetId(*id);    CRef< CScore::C_Value > val (new CScore::C_Value);    val->SetInt(m_aligner->GetScore());    score->SetValue(*val);    CSeq_align::TScore& scorelist = seqalign->SetScore();    scorelist.push_back(score);    // create segments and add them to this seq-align    CRef< CSeq_align::C_Segs > segs (new CSeq_align::C_Segs);    CDense_seg& ds = segs->SetDenseg();    ds.SetDim(2);    CDense_seg::TIds& ids = ds.SetIds();    ids.push_back( id1 );    ids.push_back( id2 );    CDense_seg::TStarts&  starts  = ds.SetStarts();    CDense_seg::TLens&    lens    = ds.SetLens();    CDense_seg::TStrands& strands = ds.SetStrands();        // iterate through transcript    size_t seg_count = 0;    {{         const char * const S1 = m_aligner->GetSeq1();        const char * const S2 = m_aligner->GetSeq2();        const char *seq1 = S1, *seq2 = S2;        const char *start1 = seq1, *start2 = seq2;        vector<CNWAligner::ETranscriptSymbol>::const_reverse_iterator            ib = transcript.rbegin(),            ie = transcript.rend(),            ii;                CNWAligner::ETranscriptSymbol ts = *ib;        bool intron = (ts == CNWAligner::eTS_Intron);        char seg_type0 = ((ts == CNWAligner::eTS_Insert || intron )? 1:                          (ts == CNWAligner::eTS_Delete)? 2: 0);        size_t seg_len = 0;        for (ii = ib;  ii != ie; ++ii) {            ts = *ii;            intron = (ts == CNWAligner::eTS_Intron);            char seg_type = ((ts == CNWAligner::eTS_Insert || intron )? 1:                             (ts == CNWAligner::eTS_Delete)? 2: 0);            if(seg_type0 != seg_type) {                starts.push_back( (seg_type0 == 1)? -1: start1 - S1 );                starts.push_back( (seg_type0 == 2)? -1: start2 - S2 );                lens.push_back(seg_len);                strands.push_back(eNa_strand_plus);                strands.push_back(eNa_strand_plus);                ++seg_count;                start1 = seq1;                start2 = seq2;                seg_type0 = seg_type;                seg_len = 1;            }            else {                ++seg_len;            }            if(seg_type != 1) ++seq1;            if(seg_type != 2) ++seq2;        }        // the last one        starts.push_back( (seg_type0 == 1)? -1: start1 - S1 );        starts.push_back( (seg_type0 == 2)? -1: start2 - S2 );        lens.push_back(seg_len);        strands.push_back(eNa_strand_plus);        strands.push_back(eNa_strand_plus);        ++seg_count;    }}    ds.SetNumseg(seg_count);    ds.SetIds();    seqalign->SetSegs(*segs);}void CNWFormatter::AsText(string* output, ETextFormatType type,                          size_t line_width) const{    CNcbiOstrstream ss;    const vector<CNWAligner::ETranscriptSymbol>& transcript =        *(m_aligner->GetTranscript());    if(transcript.size() == 0) {        NCBI_THROW(CAlgoAlignException,                   eNoData,                   "Zero size transcript (forgot to run the aligner?)");    }    switch (type) {    case eFormatType1: {        if(m_Seq1Id.size() && m_Seq2Id.size()) {            ss << '>' << m_Seq1Id << '\t' << m_Seq2Id << endl;        }        vector<char> v1, v2;        size_t aln_size = x_ApplyTranscript(&v1, &v2);        unsigned i1 = 0, i2 = 0;        for (size_t i = 0;  i < aln_size; ) {            ss << i << '\t' << i1 << ':' << i2 << endl;            int i0 = i;            for (size_t jPos = 0;  i < aln_size  &&  jPos < line_width;                 ++i, ++jPos) {                char c = v1[i0 + jPos];                ss << c;                if(c != '-'  &&  c != '+')                    i1++;            }            ss << endl;                        string marker_line(line_width, ' ');            i = i0;            for (size_t jPos = 0;  i < aln_size  &&  jPos < line_width;                 ++i, ++jPos) {                char c1 = v1[i0 + jPos];                char c  = v2[i0 + jPos];                ss << c;                if(c != '-' && c != '+')                    i2++;                if(c != c1  &&  c != '-'  &&  c1 != '-'  &&  c1 != '+')                    marker_line[jPos] = '^';            }            ss << endl << marker_line << endl;        }    }    break;    case eFormatType2: {        if(m_Seq1Id.size() && m_Seq2Id.size()) {            ss << '>' << m_Seq1Id << '\t' << m_Seq2Id << endl;        }        vector<char> v1, v2;        size_t aln_size = x_ApplyTranscript(&v1, &v2);        unsigned i1 = 0, i2 = 0;        for (size_t i = 0;  i < aln_size; ) {            ss << i << '\t' << i1 << ':' << i2 << endl;            int i0 = i;            for (size_t jPos = 0;  i < aln_size  &&  jPos < line_width;                 ++i, ++jPos) {                char c = v1[i0 + jPos];                ss << c;                if(c != '-'  &&  c != '+')                    i1++;            }            ss << endl;                        string line2 (line_width, ' ');            string line3 (line_width, ' ');            i = i0;            for (size_t jPos = 0;  i < aln_size  &&  jPos < line_width;                 ++i, ++jPos) {                char c1 = v1[i0 + jPos];                char c2  = v2[i0 + jPos];                if(c2 != '-' && c2 != '+')                    i2++;                if(c2 == c1)                    line2[jPos] = '|';
nw_formatter.cpp - 源码说明

本页面展示了「ncbi源码」中的 nw_formatter.cpp 源码文件，采用 C++ 编程语言编写，共 528 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ncbi相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?