cav_alndisplay.cpp

来自「ncbi源码」· C++ 代码 · 共 1,340 行 · 第 1/4 页

CPP
1,340
字号
/* * =========================================================================== * PRODUCTION $Log: cav_alndisplay.cpp,v $ * PRODUCTION Revision 1000.3  2004/06/01 19:41:15  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.5 * PRODUCTION * =========================================================================== *//*  $Id: cav_alndisplay.cpp,v 1000.3 2004/06/01 19:41:15 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Authors:  Paul Thiessen** File Description:*      Classes to hold alignment display** ===========================================================================*/#include <ncbi_pch.hpp>#include <corelib/ncbistl.hpp>#include <corelib/ncbiobj.hpp>#include <map>#include <deque>#include <memory>#include <iomanip>#include <math.h>#include <objtools/cddalignview/cav_alndisplay.hpp>#include <objtools/cddalignview/cav_seqset.hpp>#include <objtools/cddalignview/cav_alignset.hpp>#include <objtools/cddalignview/cddalignview.h>BEGIN_NCBI_SCOPEconst double AlignmentDisplay::SHOW_IDENTITY = 100000.0;// HTML colorsstatic const int nBlockColors = 2;static const string    bgColor("#FFFFFF"), blockBGColors[nBlockColors] = { "#FFFFE0", "#FFE8FF" },    rulerColor("#700777"), numColor("#229922"), featColor("#888811"),    plainColor("#888888"), blockColor("#2233CC"), conservedColor("#FF4466");#define LEFT_JUSTIFY resetiosflags(IOS_BASE::right) << setiosflags(IOS_BASE::left)#define RIGHT_JUSTIFY resetiosflags(IOS_BASE::left) << setiosflags(IOS_BASE::right)static string StrToLower(const string& str){    string newStr(str);    for (int i=0; i<newStr.size(); ++i) newStr[i] = tolower(newStr[i]);    return newStr;}class UnalignedInterval{public:    int alnLocBefore, alnLocAfter, seqLocFrom, seqLocTo;};typedef list < UnalignedInterval > IntervalList;AlignmentDisplay::AlignmentDisplay(const SequenceSet *sequenceSet, const AlignmentSet *alignmentSet) :    status(CAV_ERROR_DISPLAY){    // start with master row at top, all in lowercase - will be capitalized as things are    // aligned to it. Also, add an IndexAlnLocToSeqLocRow for the master (only).    if (!sequenceSet->master) {        ERR_POST(Error << "Need to know master sequence of SequenceSet for AlignmentDisplay construction");        return;    }    textRows.push_back(new TextRow(StrToLower(sequenceSet->master->sequenceString)));    // initialize the master index row    indexAlnLocToSeqLocRows.push_back(        new IndexAlnLocToSeqLocRow(sequenceSet->master, sequenceSet->master->sequenceString.size()));    int l;    for (l=0; l<sequenceSet->master->sequenceString.size(); ++l)        indexAlnLocToSeqLocRows[0]->SetSeqLocAt(l, l);    // loop through alignments - one new row for each alignment    AlignmentSet::AlignmentList::const_iterator a, ae = alignmentSet->alignments.end();    for (a=alignmentSet->alignments.begin(); a!=ae; ++a) {        // add index row for each slave that contains only Sequence* for now        indexAlnLocToSeqLocRows.push_back(new IndexAlnLocToSeqLocRow((*a)->slave));        // first add blank row that will be filled in with slave        textRows.push_back(new TextRow(GetWidth()));        // during this pass through the master, do several things:        //  - change aligned master residues to uppercase        //  - fill in aligned slave residues with uppercase        //  - make a list of all unaligned slave regions, and the current display        //      coordinates of the aligned slave residues on either side        int prevAlignedSlaveSeqLoc = -1, prevAlignedSlaveAlnLoc = -1;        IntervalList intervalList;        for (int alnLoc=0; alnLoc<=GetWidth(); ++alnLoc) {            int masterSeqLoc = -1, alignedSlaveSeqLoc = -1;            if (alnLoc < GetWidth()) {                masterSeqLoc = indexAlnLocToSeqLocRows.front()->GetSeqLocAt(alnLoc);                if (masterSeqLoc >= 0) {                    alignedSlaveSeqLoc = (*a)->masterToSlave[masterSeqLoc];                    if (alignedSlaveSeqLoc >= 0) {                        textRows.front()->SetCharAt(alnLoc,                            toupper(textRows.front()->GetCharAt(alnLoc)));                        textRows.back()->SetCharAt(alnLoc,                            toupper((*a)->slave->sequenceString[alignedSlaveSeqLoc]));                    }                }            } else {                masterSeqLoc = (*a)->master->sequenceString.size();                alignedSlaveSeqLoc = (*a)->slave->sequenceString.size();            }            if (alignedSlaveSeqLoc >= 0) {                if (alignedSlaveSeqLoc - prevAlignedSlaveSeqLoc > 1) {                    intervalList.resize(intervalList.size() + 1);                    intervalList.back().alnLocBefore = prevAlignedSlaveAlnLoc;                    intervalList.back().alnLocAfter = alnLoc;                    intervalList.back().seqLocFrom = prevAlignedSlaveSeqLoc + 1;                    intervalList.back().seqLocTo = alignedSlaveSeqLoc - 1;                }                prevAlignedSlaveSeqLoc = alignedSlaveSeqLoc;                prevAlignedSlaveAlnLoc = alnLoc;            }        }        // now, put in the unaligned regions of the slave. If there is more space        // than residues, then pad with spaces as necessary. If        // there isn't enough space, then add gaps to all prior alignments.        IntervalList::iterator i, ie = intervalList.end();        int alnLocOffset = 0;   // to track # inserted columns        for (i=intervalList.begin(); i!=ie; ++i) {            // compensate for inserted columns in display            i->alnLocBefore += alnLocOffset;            i->alnLocAfter += alnLocOffset;            int                displaySpace = i->alnLocAfter - i->alnLocBefore - 1,                unalignedLength = i->seqLocTo - i->seqLocFrom + 1,                extraSpace = displaySpace - unalignedLength;            // add gaps to make space            if (extraSpace < 0) {                // where to insert gaps if display space is too small                int insertPos;                if (i->seqLocFrom == 0) {   // left tail                    insertPos = 0;                } else if (i->seqLocTo == (*a)->slave->sequenceString.size() - 1) { // right tail                    insertPos = GetWidth();                } else {    // between aligned blocks                    insertPos = i->alnLocAfter - displaySpace / 2;                }                InsertGaps(-extraSpace, insertPos);                alnLocOffset -= extraSpace;                extraSpace = 0;            }            // fill in unaligned regions with lowercase, right-justifying only for left tail            for (l=0; l<unalignedLength; ++l) {                textRows.back()->SetCharAt(                    i->alnLocBefore + 1 + ((i->seqLocFrom == 0) ? l + extraSpace : l),                    tolower((*a)->slave->sequenceString[i->seqLocFrom + l]));            }        }    }    ERR_POST(Info << "initial alignment display size: " << GetWidth() << " x " << GetNRows());    // the above algorithm introduces more gaps than are strictly necessary. This    // will "squeeze" the alignment, deleting gaps wherever possible    Squeeze();    // finally, redistribute unaligned residues so that they are split equally between    // flanking aligned residues    SplitUnaligned();    // The Squeeze and SplitUnaligned leave the master index row out of sync, so reindex it    indexAlnLocToSeqLocRows[0]->ReIndex(*(textRows[0]));    // find first and last aligned master residues (in alignment coords)    firstAlnLoc = GetWidth();    lastAlnLoc = -1;    for (l=0; l<GetWidth(); ++l)        if (IsAligned(textRows[0]->GetCharAt(l))) {            firstAlnLoc = l;            break;        }    for (l=GetWidth()-1; l>=0; --l)        if (IsAligned(textRows[0]->GetCharAt(l))) {            lastAlnLoc = l;            break;        }    ERR_POST(Info << "final alignment display size: " << GetWidth() << " x " << GetNRows());    ERR_POST(Info << "aligned region: " << firstAlnLoc << " through " << lastAlnLoc);    status = CAV_SUCCESS;}AlignmentDisplay::~AlignmentDisplay(){    int i;    for (i=0; i<indexAlnLocToSeqLocRows.size(); ++i) delete indexAlnLocToSeqLocRows[i];    for (i=0; i<textRows.size(); ++i) delete textRows[i];}// shift unaligned residues as far as possible to the leftvoid AlignmentDisplay::ShiftUnalignedLeft(void){    int gapLoc, resLoc;    for (int row=0; row<GetNRows(); ++row) {        gapLoc = 0;        while (gapLoc < GetWidth()) {            // find a gap            while (gapLoc < GetWidth() && !IsGap(textRows[row]->GetCharAt(gapLoc))) ++gapLoc;            if (gapLoc == GetWidth()) break;            // find the next unaligned residue            resLoc = gapLoc + 1;            while (resLoc < GetWidth() && IsGap(textRows[row]->GetCharAt(resLoc))) ++resLoc;            if (resLoc == GetWidth()) break;            if (!IsUnaligned(textRows[row]->GetCharAt(resLoc))) {                gapLoc = resLoc + 1;                continue;            }            // shift unaligned residues over            while (resLoc < GetWidth() && IsUnaligned(textRows[row]->GetCharAt(resLoc))) {                textRows[row]->SetCharAt(gapLoc++, textRows[row]->GetCharAt(resLoc));                textRows[row]->SetCharAt(resLoc++, '-');            }        }    }}void AlignmentDisplay::Squeeze(void){    // move all unaligned residues as far left as possible - makes it much simpler    // to identify squeezable regions    ShiftUnalignedLeft();    typedef vector < int > SqueezeLocs;    SqueezeLocs squeezeLocs(GetNRows());    // find last aligned residue; stop search one after that    int alnLoc, lastAlignedLoc;    for (lastAlignedLoc=GetWidth()-2;         lastAlignedLoc>=0 && !IsAligned(textRows[0]->GetCharAt(lastAlignedLoc));         --lastAlignedLoc) ;    ERR_POST(Info << "checking for squeeze up to " << (lastAlignedLoc+1));    for (alnLoc=0; alnLoc<=lastAlignedLoc+1; ++alnLoc) {        // check to see whether each row is squeezable at this location        int row, nGaps, minNGaps = GetWidth();        for (row=0; row<GetNRows(); ++row) {            if (!textRows[row]->IsSqueezable(alnLoc, &nGaps, &(squeezeLocs[row]), minNGaps))                break;            if (nGaps < minNGaps) minNGaps = nGaps;        }        // if all rows are squeezable, then do the squeeze        if (row == GetNRows()) {            ERR_POST(Info << "squeezing " << minNGaps << " gaps at loc " << alnLoc);            for (row=0; row<GetNRows(); ++row)                textRows[row]->DeleteGaps(minNGaps, squeezeLocs[row]);            lastAlignedLoc -= minNGaps; // account for shift of lastAlignedLoc        }        // after checking very first column, skip to first aligned loc to save time        if (alnLoc == 0)            while (alnLoc<=lastAlignedLoc && !IsAligned(textRows[0]->GetCharAt(alnLoc))) ++alnLoc;    }}// redistribute unaligned residues so they're split left/right between aligned residues.// This assumes ShiftUnalignedLeft() has already been called, so that all unaligned// residues are on the left of the gaps.void AlignmentDisplay::SplitUnaligned(void){    // alnLocs of various key residues    int firstAligned, prevAligned, nextAligned, firstUnaligned, lastUnaligned;    int nGaps, nShift, shiftRes, shiftGap, i;    for (int row=0; row<GetNRows(); ++row) {        // find first aligned loc; count gaps up to there        nGaps = 0;        for (i=0; i<GetWidth()-2 && !IsAligned(textRows[row]->GetCharAt(i)); ++i)             if (IsGap(textRows[row]->GetCharAt(i))) ++nGaps;        firstAligned = i;        // right-shift left tails        if (nGaps > 0) {            for (i=0; i<firstAligned-nGaps; ++i) {                textRows[row]->SetCharAt(firstAligned-1-i, textRows[row]->GetCharAt(firstAligned-1-nGaps-i));                textRows[row]->SetCharAt(firstAligned-1-nGaps-i, '-');            }        }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?