📄 su_block_multiple_alignment.cpp
字号:
VectorRemoveElements(m_rowDoubles, removeRows, slavesToRemove.size()); VectorRemoveElements(m_rowStrings, removeRows, slavesToRemove.size()); // delete row from all blocks, removing any zero-width blocks TRACE_MESSAGE("deleting alignment rows from blocks"); BlockList::iterator b = m_blocks.begin(), br, be = m_blocks.end(); while (b != be) { (*b)->DeleteRows(removeRows, slavesToRemove.size()); if ((*b)->m_width == 0) { br = b; ++b; TRACE_MESSAGE("deleting block resized to zero width"); RemoveBlock(*br); } else ++b; } // update total alignment m_width UpdateBlockMap(); InitCache(); return true;}bool BlockMultipleAlignment::MergeAlignment(const BlockMultipleAlignment *newAlignment){ // check to see if the alignment is compatible - must have same master sequence // and blocks of new alignment must contain m_blocks of this alignment; at same time, // build up map of aligned blocks in new alignment that correspond to aligned blocks // of this object, for convenient lookup later if (newAlignment->GetMaster() != GetMaster()) return false; const Block::Range *newRange, *thisRange; BlockList::const_iterator nb, nbe = newAlignment->m_blocks.end(); BlockList::iterator b, be = m_blocks.end(); typedef map < UngappedAlignedBlock *, const UngappedAlignedBlock * > AlignedBlockMap; AlignedBlockMap correspondingNewBlocks; for (b=m_blocks.begin(); b!=be; ++b) { if (!(*b)->IsAligned()) continue; for (nb=newAlignment->m_blocks.begin(); nb!=nbe; ++nb) { if (!(*nb)->IsAligned()) continue; newRange = (*nb)->GetRangeOfRow(0); thisRange = (*b)->GetRangeOfRow(0); if (newRange->from <= thisRange->from && newRange->to >= thisRange->to) { correspondingNewBlocks[dynamic_cast<UngappedAlignedBlock*>(b->GetPointer())] = dynamic_cast<const UngappedAlignedBlock*>(nb->GetPointer()); break; } } if (nb == nbe) return false; // no corresponding block found } // add slave sequences from new alignment; also copy scores/status unsigned int i, nNewRows = newAlignment->m_sequences.size() - 1; m_sequences.resize(m_sequences.size() + nNewRows); m_rowDoubles.resize(m_rowDoubles.size() + nNewRows); m_rowStrings.resize(m_rowStrings.size() + nNewRows); for (i=0; i<nNewRows; ++i) { m_sequences[m_sequences.size() + i - nNewRows] = newAlignment->m_sequences[i + 1]; SetRowDouble(NRows() + i - nNewRows, newAlignment->GetRowDouble(i + 1)); SetRowStatusLine(NRows() + i - nNewRows, newAlignment->GetRowStatusLine(i + 1)); } // now that we know blocks are compatible, add new rows at end of this alignment, containing // all rows (except master) from new alignment; only that part of the aligned blocks from // the new alignment that intersect with the aligned blocks from this object are added, so // that this object's block structure is unchanged // resize all aligned blocks for (b=m_blocks.begin(); b!=be; ++b) (*b)->AddRows(nNewRows); // set ranges of aligned blocks, and add them to the list AlignedBlockMap::const_iterator ab, abe = correspondingNewBlocks.end(); const Block::Range *thisMaster, *newMaster; for (ab=correspondingNewBlocks.begin(); ab!=abe; ++ab) { thisMaster = ab->first->GetRangeOfRow(0); newMaster = ab->second->GetRangeOfRow(0); for (i=0; i<nNewRows; ++i) { newRange = ab->second->GetRangeOfRow(i + 1); ab->first->SetRangeOfRow(NRows() + i - nNewRows, newRange->from + thisMaster->from - newMaster->from, newRange->to + thisMaster->to - newMaster->to); } } // delete then recreate the unaligned blocks, in case a merge requires the // creation of a new unaligned block for (b=m_blocks.begin(); b!=be; ) { if (!(*b)->IsAligned()) { BlockList::iterator bb(b); ++bb; m_blocks.erase(b); b = bb; } else ++b; } InitCache(); // update this alignment, but leave row scores/status alone if (!AddUnalignedBlocks() || !UpdateBlockMap(false)) { ERROR_MESSAGE("BlockMultipleAlignment::MergeAlignment() - internal update after merge failed"); return false; } return true;}template < class T >bool ReorderVector(T& v, const std::vector < unsigned int >& newOrder){ // check validity of new ordering if (newOrder.size() != v.size()) { ERROR_MESSAGE("ReorderVector() - wrong size newOrder"); return false; } vector < bool > isPresent(v.size(), false); unsigned int r; for (r=0; r<v.size(); r++) { if (isPresent[newOrder[r]]) { ERROR_MESSAGE("ReorderVector() - invalid newOrder: repeated/missing row"); return false; } isPresent[newOrder[r]] = true; } // not terribly efficient - makes a whole new copy with the new order, then re-copies back T reordered(v.size()); for (r=0; r<v.size(); r++) reordered[r] = v[newOrder[r]]; v = reordered; return true;}bool BlockMultipleAlignment::ReorderRows(const std::vector < unsigned int >& newOrder){ // can't reorder master if (newOrder[0] != 0) { ERROR_MESSAGE("ReorderRows() - can't move master row"); return false; } bool okay = (ReorderVector(m_sequences, newOrder) && ReorderVector(m_rowDoubles, newOrder) && ReorderVector(m_rowStrings, newOrder)); if (!okay) { ERROR_MESSAGE("reordering of sequences and status info failed"); return false; } BlockList::iterator b, be = m_blocks.end(); for (b=m_blocks.begin(); b!=be; ++b) okay = (okay && (*b)->ReorderRows(newOrder)); if (!okay) ERROR_MESSAGE("reordering of block ranges failed"); return okay;}CSeq_align * CreatePairwiseSeqAlignFromMultipleRow(const BlockMultipleAlignment *multiple, const BlockMultipleAlignment::UngappedAlignedBlockList& m_blocks, unsigned int slaveRow){ if (!multiple || slaveRow >= multiple->NRows()) { ERROR_MESSAGE("CreatePairwiseSeqAlignFromMultipleRow() - bad parameters"); return NULL; } CSeq_align *seqAlign = new CSeq_align(); seqAlign->SetType(CSeq_align::eType_partial); seqAlign->SetDim(2); CSeq_align::C_Segs::TDendiag& denDiags = seqAlign->SetSegs().SetDendiag(); denDiags.resize((m_blocks.size() > 0) ? m_blocks.size() : 1); CSeq_align::C_Segs::TDendiag::iterator d, de = denDiags.end(); BlockMultipleAlignment::UngappedAlignedBlockList::const_iterator b = m_blocks.begin(); const Block::Range *range; for (d=denDiags.begin(); d!=de; ++d, ++b) { CDense_diag *denDiag = new CDense_diag(); d->Reset(denDiag); denDiag->SetDim(2); denDiag->SetIds().resize(2); // master row CRef < CSeq_id > id(new CSeq_id()); id->Assign(multiple->GetSequenceOfRow(0)->GetPreferredIdentifier()); denDiag->SetIds().front() = id; if (m_blocks.size() > 0) { range = (*b)->GetRangeOfRow(0); denDiag->SetStarts().push_back(range->from); } else denDiag->SetStarts().push_back(0); // slave row id.Reset(new CSeq_id()); id->Assign(multiple->GetSequenceOfRow(slaveRow)->GetPreferredIdentifier()); denDiag->SetIds().back() = id; if (m_blocks.size() > 0) { range = (*b)->GetRangeOfRow(slaveRow); denDiag->SetStarts().push_back(range->from); } else denDiag->SetStarts().push_back(0); // block m_width denDiag->SetLen((m_blocks.size() > 0) ? (*b)->m_width : 0); } return seqAlign;}unsigned int BlockMultipleAlignment::NAlignedBlocks(void) const{ unsigned int n = 0; BlockList::const_iterator b, be = m_blocks.end(); for (b=m_blocks.begin(); b!=be; ++b) if ((*b)->IsAligned()) ++n; return n;}unsigned int BlockMultipleAlignment::GetAlignmentIndex(unsigned int row, unsigned int seqIndex, eUnalignedJustification justification){ if (row >= NRows() || seqIndex >= GetSequenceOfRow(row)->Length()) { ERROR_MESSAGE("BlockMultipleAlignment::GetAlignmentIndex() - coordinate out of range"); return eUndefined; } unsigned int alignmentIndex, blockColumn; const Block *block = NULL; const Block::Range *range; for (alignmentIndex=0; alignmentIndex<m_totalWidth; ++alignmentIndex) { // check each block to see if index is in range if (block != m_blockMap[alignmentIndex].block) { block = m_blockMap[alignmentIndex].block; range = block->GetRangeOfRow(row); if (seqIndex >= range->from && seqIndex <= range->to) { // override requested justification for end blocks if (block == m_blocks.back()) // also true if there's a single aligned block justification = eLeft; else if (block == m_blocks.front()) justification = eRight; // search block columns to find index (inefficient, but avoids having to write // inverse functions of Block::GetIndexAt() for (blockColumn=0; blockColumn<block->m_width; ++blockColumn) { if (seqIndex == block->GetIndexAt(blockColumn, row, justification)) return alignmentIndex + blockColumn; } ERROR_MESSAGE("BlockMultipleAlignment::GetAlignmentIndex() - can't find index in block"); return eUndefined; } } } // should never get here... ERROR_MESSAGE("BlockMultipleAlignment::GetAlignmentIndex() - confused"); return eUndefined;}bool Block::ReorderRows(const std::vector < unsigned int >& newOrder){ return ReorderVector(m_ranges, newOrder);}///// UngappedAlignedBlock methods /////char UngappedAlignedBlock::GetCharacterAt(unsigned int blockColumn, unsigned int row) const{ return m_parentAlignment->GetSequenceOfRow(row)->m_sequenceString[GetIndexAt(blockColumn, row)];}Block * UngappedAlignedBlock::Clone(const BlockMultipleAlignment *newMultiple) const{ UngappedAlignedBlock *copy = new UngappedAlignedBlock(newMultiple); const Block::Range *range; for (unsigned int row=0; row<NSequences(); ++row) { range = GetRangeOfRow(row); copy->SetRangeOfRow(row, range->from, range->to); } copy->m_width = m_width; return copy;}void UngappedAlignedBlock::DeleteRow(unsigned int row){ RangeList::iterator r = m_ranges.begin(); for (unsigned int i=0; i<row; ++i) ++r; m_ranges.erase(r);}void UngappedAlignedBlock::DeleteRows(vector < bool >& removeRows, unsigned int nToRemove){ VectorRemoveElements(m_ranges, removeRows, nToRemove);}///// UnalignedBlock methods /////unsigned int UnalignedBlock::GetIndexAt(unsigned int blockColumn, unsigned int row, BlockMultipleAlignment::eUnalignedJustification justification) const{ const Block::Range *range = GetRangeOfRow(row); unsigned int seqIndex = BlockMultipleAlignment::eUndefined, rangeWidth, rangeMiddle, extraSpace; switch (justification) { case BlockMultipleAlignment::eLeft: seqIndex = range->from + blockColumn; break; case BlockMultipleAlignment::eRight: seqIndex = range->to - m_width + blockColumn + 1; break; case BlockMultipleAlignment::eCenter: rangeWidth = (range->to - range->from + 1); extraSpace = (m_width - rangeWidth) / 2; if (blockColumn < extraSpace || blockColumn >= extraSpace + rangeWidth) seqIndex = BlockMultipleAlignment::eUndefined; else seqIndex = range->from + blockColumn - extraSpace; break; case BlockMultipleAlignment::eSplit: rangeWidth = (range->to - range->from + 1); rangeMiddle = (rangeWidth / 2) + (rangeWidth % 2); extraSpace = m_width - rangeWidth; if (blockColumn < rangeMiddle) seqIndex = range->from + blockColumn; else if (blockColumn >= extraSpace + rangeMiddle) seqIndex = range->to - m_width + blockColumn + 1; else seqIndex = BlockMultipleAlignment::eUndefined; break; } if (seqIndex < range->from || seqIndex > range->to) seqIndex = BlockMultipleAlignment::eUndefined; return seqIndex;}void UnalignedBlock::Resize(void){ m_width = 0; for (unsigned int i=0; i<NSequences(); ++i) { unsigned int blockWidth = m_ranges[i].to - m_ranges[i].from + 1; if (blockWidth > m_width) m_width = blockWidth; }}void UnalignedBlock::DeleteRow(unsigned int row){ RangeList::iterator r = m_ranges.begin(); for (unsigned int i=0; i<row; ++i) ++r; m_ranges.erase(r); Resize();}void UnalignedBlock::DeleteRows(vector < bool >& removeRows, unsigned int nToRemove){ VectorRemoveElements(m_ranges, removeRows, nToRemove); Resize();}Block * UnalignedBlock::Clone(const BlockMultipleAlignment *newMultiple) const{ UnalignedBlock *copy = new UnalignedBlock(newMultiple); const Block::Range *range; for (unsigned int row=0; row<NSequences(); ++row) { range = GetRangeOfRow(row); copy->SetRangeOfRow(row, range->from, range->to); } copy->m_width = m_width; return copy;}END_SCOPE(struct_util)/** ---------------------------------------------------------------------------* $Log: su_block_multiple_alignment.cpp,v $* Revision 1000.0 2004/06/01 18:14:28 gouriano* PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.9** Revision 1.9 2004/05/28 10:07:39 thiessen* fix GCC warning** Revision 1.8 2004/05/28 09:46:57 thiessen* restructure C-toolkit header usage ; move C Bioseq storage into su_sequence_set** Revision 1.7 2004/05/27 21:34:08 thiessen* add PSSM calculation (requires C-toolkit)** Revision 1.6 2004/05/26 14:49:59 thiessen* UNDEFINED -> eUndefined** Revision 1.5 2004/05/26 14:30:16 thiessen* adjust handling of alingment data ; add row ordering** Revision 1.4 2004/05/26 02:40:24 thiessen* progress towards LOO - all but PSSM and row ordering** Revision 1.3 2004/05/25 16:24:50 thiessen* remove WorkShop warnings** Revision 1.2 2004/05/25 16:12:30 thiessen* fix GCC warnings** Revision 1.1 2004/05/25 15:52:17 thiessen* add BlockMultipleAlignment, IBM algorithm**/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -