cav_alndisplay.cpp
来自「ncbi源码」· C++ 代码 · 共 1,340 行 · 第 1/4 页
CPP
1,340 行
/* * =========================================================================== * PRODUCTION $Log: cav_alndisplay.cpp,v $ * PRODUCTION Revision 1000.3 2004/06/01 19:41:15 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.5 * PRODUCTION * =========================================================================== *//* $Id: cav_alndisplay.cpp,v 1000.3 2004/06/01 19:41:15 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Authors: Paul Thiessen** File Description:* Classes to hold alignment display** ===========================================================================*/#include <ncbi_pch.hpp>#include <corelib/ncbistl.hpp>#include <corelib/ncbiobj.hpp>#include <map>#include <deque>#include <memory>#include <iomanip>#include <math.h>#include <objtools/cddalignview/cav_alndisplay.hpp>#include <objtools/cddalignview/cav_seqset.hpp>#include <objtools/cddalignview/cav_alignset.hpp>#include <objtools/cddalignview/cddalignview.h>BEGIN_NCBI_SCOPEconst double AlignmentDisplay::SHOW_IDENTITY = 100000.0;// HTML colorsstatic const int nBlockColors = 2;static const string bgColor("#FFFFFF"), blockBGColors[nBlockColors] = { "#FFFFE0", "#FFE8FF" }, rulerColor("#700777"), numColor("#229922"), featColor("#888811"), plainColor("#888888"), blockColor("#2233CC"), conservedColor("#FF4466");#define LEFT_JUSTIFY resetiosflags(IOS_BASE::right) << setiosflags(IOS_BASE::left)#define RIGHT_JUSTIFY resetiosflags(IOS_BASE::left) << setiosflags(IOS_BASE::right)static string StrToLower(const string& str){ string newStr(str); for (int i=0; i<newStr.size(); ++i) newStr[i] = tolower(newStr[i]); return newStr;}class UnalignedInterval{public: int alnLocBefore, alnLocAfter, seqLocFrom, seqLocTo;};typedef list < UnalignedInterval > IntervalList;AlignmentDisplay::AlignmentDisplay(const SequenceSet *sequenceSet, const AlignmentSet *alignmentSet) : status(CAV_ERROR_DISPLAY){ // start with master row at top, all in lowercase - will be capitalized as things are // aligned to it. Also, add an IndexAlnLocToSeqLocRow for the master (only). if (!sequenceSet->master) { ERR_POST(Error << "Need to know master sequence of SequenceSet for AlignmentDisplay construction"); return; } textRows.push_back(new TextRow(StrToLower(sequenceSet->master->sequenceString))); // initialize the master index row indexAlnLocToSeqLocRows.push_back( new IndexAlnLocToSeqLocRow(sequenceSet->master, sequenceSet->master->sequenceString.size())); int l; for (l=0; l<sequenceSet->master->sequenceString.size(); ++l) indexAlnLocToSeqLocRows[0]->SetSeqLocAt(l, l); // loop through alignments - one new row for each alignment AlignmentSet::AlignmentList::const_iterator a, ae = alignmentSet->alignments.end(); for (a=alignmentSet->alignments.begin(); a!=ae; ++a) { // add index row for each slave that contains only Sequence* for now indexAlnLocToSeqLocRows.push_back(new IndexAlnLocToSeqLocRow((*a)->slave)); // first add blank row that will be filled in with slave textRows.push_back(new TextRow(GetWidth())); // during this pass through the master, do several things: // - change aligned master residues to uppercase // - fill in aligned slave residues with uppercase // - make a list of all unaligned slave regions, and the current display // coordinates of the aligned slave residues on either side int prevAlignedSlaveSeqLoc = -1, prevAlignedSlaveAlnLoc = -1; IntervalList intervalList; for (int alnLoc=0; alnLoc<=GetWidth(); ++alnLoc) { int masterSeqLoc = -1, alignedSlaveSeqLoc = -1; if (alnLoc < GetWidth()) { masterSeqLoc = indexAlnLocToSeqLocRows.front()->GetSeqLocAt(alnLoc); if (masterSeqLoc >= 0) { alignedSlaveSeqLoc = (*a)->masterToSlave[masterSeqLoc]; if (alignedSlaveSeqLoc >= 0) { textRows.front()->SetCharAt(alnLoc, toupper(textRows.front()->GetCharAt(alnLoc))); textRows.back()->SetCharAt(alnLoc, toupper((*a)->slave->sequenceString[alignedSlaveSeqLoc])); } } } else { masterSeqLoc = (*a)->master->sequenceString.size(); alignedSlaveSeqLoc = (*a)->slave->sequenceString.size(); } if (alignedSlaveSeqLoc >= 0) { if (alignedSlaveSeqLoc - prevAlignedSlaveSeqLoc > 1) { intervalList.resize(intervalList.size() + 1); intervalList.back().alnLocBefore = prevAlignedSlaveAlnLoc; intervalList.back().alnLocAfter = alnLoc; intervalList.back().seqLocFrom = prevAlignedSlaveSeqLoc + 1; intervalList.back().seqLocTo = alignedSlaveSeqLoc - 1; } prevAlignedSlaveSeqLoc = alignedSlaveSeqLoc; prevAlignedSlaveAlnLoc = alnLoc; } } // now, put in the unaligned regions of the slave. If there is more space // than residues, then pad with spaces as necessary. If // there isn't enough space, then add gaps to all prior alignments. IntervalList::iterator i, ie = intervalList.end(); int alnLocOffset = 0; // to track # inserted columns for (i=intervalList.begin(); i!=ie; ++i) { // compensate for inserted columns in display i->alnLocBefore += alnLocOffset; i->alnLocAfter += alnLocOffset; int displaySpace = i->alnLocAfter - i->alnLocBefore - 1, unalignedLength = i->seqLocTo - i->seqLocFrom + 1, extraSpace = displaySpace - unalignedLength; // add gaps to make space if (extraSpace < 0) { // where to insert gaps if display space is too small int insertPos; if (i->seqLocFrom == 0) { // left tail insertPos = 0; } else if (i->seqLocTo == (*a)->slave->sequenceString.size() - 1) { // right tail insertPos = GetWidth(); } else { // between aligned blocks insertPos = i->alnLocAfter - displaySpace / 2; } InsertGaps(-extraSpace, insertPos); alnLocOffset -= extraSpace; extraSpace = 0; } // fill in unaligned regions with lowercase, right-justifying only for left tail for (l=0; l<unalignedLength; ++l) { textRows.back()->SetCharAt( i->alnLocBefore + 1 + ((i->seqLocFrom == 0) ? l + extraSpace : l), tolower((*a)->slave->sequenceString[i->seqLocFrom + l])); } } } ERR_POST(Info << "initial alignment display size: " << GetWidth() << " x " << GetNRows()); // the above algorithm introduces more gaps than are strictly necessary. This // will "squeeze" the alignment, deleting gaps wherever possible Squeeze(); // finally, redistribute unaligned residues so that they are split equally between // flanking aligned residues SplitUnaligned(); // The Squeeze and SplitUnaligned leave the master index row out of sync, so reindex it indexAlnLocToSeqLocRows[0]->ReIndex(*(textRows[0])); // find first and last aligned master residues (in alignment coords) firstAlnLoc = GetWidth(); lastAlnLoc = -1; for (l=0; l<GetWidth(); ++l) if (IsAligned(textRows[0]->GetCharAt(l))) { firstAlnLoc = l; break; } for (l=GetWidth()-1; l>=0; --l) if (IsAligned(textRows[0]->GetCharAt(l))) { lastAlnLoc = l; break; } ERR_POST(Info << "final alignment display size: " << GetWidth() << " x " << GetNRows()); ERR_POST(Info << "aligned region: " << firstAlnLoc << " through " << lastAlnLoc); status = CAV_SUCCESS;}AlignmentDisplay::~AlignmentDisplay(){ int i; for (i=0; i<indexAlnLocToSeqLocRows.size(); ++i) delete indexAlnLocToSeqLocRows[i]; for (i=0; i<textRows.size(); ++i) delete textRows[i];}// shift unaligned residues as far as possible to the leftvoid AlignmentDisplay::ShiftUnalignedLeft(void){ int gapLoc, resLoc; for (int row=0; row<GetNRows(); ++row) { gapLoc = 0; while (gapLoc < GetWidth()) { // find a gap while (gapLoc < GetWidth() && !IsGap(textRows[row]->GetCharAt(gapLoc))) ++gapLoc; if (gapLoc == GetWidth()) break; // find the next unaligned residue resLoc = gapLoc + 1; while (resLoc < GetWidth() && IsGap(textRows[row]->GetCharAt(resLoc))) ++resLoc; if (resLoc == GetWidth()) break; if (!IsUnaligned(textRows[row]->GetCharAt(resLoc))) { gapLoc = resLoc + 1; continue; } // shift unaligned residues over while (resLoc < GetWidth() && IsUnaligned(textRows[row]->GetCharAt(resLoc))) { textRows[row]->SetCharAt(gapLoc++, textRows[row]->GetCharAt(resLoc)); textRows[row]->SetCharAt(resLoc++, '-'); } } }}void AlignmentDisplay::Squeeze(void){ // move all unaligned residues as far left as possible - makes it much simpler // to identify squeezable regions ShiftUnalignedLeft(); typedef vector < int > SqueezeLocs; SqueezeLocs squeezeLocs(GetNRows()); // find last aligned residue; stop search one after that int alnLoc, lastAlignedLoc; for (lastAlignedLoc=GetWidth()-2; lastAlignedLoc>=0 && !IsAligned(textRows[0]->GetCharAt(lastAlignedLoc)); --lastAlignedLoc) ; ERR_POST(Info << "checking for squeeze up to " << (lastAlignedLoc+1)); for (alnLoc=0; alnLoc<=lastAlignedLoc+1; ++alnLoc) { // check to see whether each row is squeezable at this location int row, nGaps, minNGaps = GetWidth(); for (row=0; row<GetNRows(); ++row) { if (!textRows[row]->IsSqueezable(alnLoc, &nGaps, &(squeezeLocs[row]), minNGaps)) break; if (nGaps < minNGaps) minNGaps = nGaps; } // if all rows are squeezable, then do the squeeze if (row == GetNRows()) { ERR_POST(Info << "squeezing " << minNGaps << " gaps at loc " << alnLoc); for (row=0; row<GetNRows(); ++row) textRows[row]->DeleteGaps(minNGaps, squeezeLocs[row]); lastAlignedLoc -= minNGaps; // account for shift of lastAlignedLoc } // after checking very first column, skip to first aligned loc to save time if (alnLoc == 0) while (alnLoc<=lastAlignedLoc && !IsAligned(textRows[0]->GetCharAt(alnLoc))) ++alnLoc; }}// redistribute unaligned residues so they're split left/right between aligned residues.// This assumes ShiftUnalignedLeft() has already been called, so that all unaligned// residues are on the left of the gaps.void AlignmentDisplay::SplitUnaligned(void){ // alnLocs of various key residues int firstAligned, prevAligned, nextAligned, firstUnaligned, lastUnaligned; int nGaps, nShift, shiftRes, shiftGap, i; for (int row=0; row<GetNRows(); ++row) { // find first aligned loc; count gaps up to there nGaps = 0; for (i=0; i<GetWidth()-2 && !IsAligned(textRows[row]->GetCharAt(i)); ++i) if (IsGap(textRows[row]->GetCharAt(i))) ++nGaps; firstAligned = i; // right-shift left tails if (nGaps > 0) { for (i=0; i<firstAligned-nGaps; ++i) { textRows[row]->SetCharAt(firstAligned-1-i, textRows[row]->GetCharAt(firstAligned-1-nGaps-i)); textRows[row]->SetCharAt(firstAligned-1-nGaps-i, '-'); } }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?