📄 cn3d_threader.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: cn3d_threader.cpp,v $ * PRODUCTION Revision 1000.3 2004/06/01 18:28:21 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.43 * PRODUCTION * =========================================================================== *//* $Id: cn3d_threader.cpp,v 1000.3 2004/06/01 18:28:21 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Authors: Paul Thiessen** File Description:* class to isolate and run the threader** ===========================================================================*/#ifdef _MSC_VER#pragma warning(disable:4018) // disable signed/unsigned mismatch warning in MSVC#endif#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp> // must come first to avoid NCBI type clashes#include <corelib/ncbistl.hpp>#include <memory>#include "block_multiple_alignment.hpp"#include "cn3d_threader.hpp"#include "sequence_set.hpp"#include "molecule.hpp"#include "structure_set.hpp"#include "residue.hpp"#include "coord_set.hpp"#include "atom_set.hpp"#include "cn3d_tools.hpp"#include "molecule_identifier.hpp"// C-toolkit stuff#include <objseq.h>#include <objalign.h>#include <thrdatd.h>#include <thrddecl.h>#include <cddutil.h>#include <ncbistr.h>USING_NCBI_SCOPE;BEGIN_SCOPE(Cn3D)// define to include debugging output (threader structures to files)//#define DEBUG_THREADER// always do debugging in Debug build mode#if (!defined(DEBUG_THREADER) && defined(_DEBUG))#define DEBUG_THREADER#endif// default threading optionsThreaderOptions::ThreaderOptions(void) : weightPSSM(0.5), loopLengthMultiplier(1.5), nRandomStarts(1), nResultAlignments(1), terminalResidueCutoff(-1), mergeAfterEachSequence(true), freezeIsolatedBlocks(true){}ThreaderOptions globalThreaderOptions;// gives threader residue number for a character (-1 if non-standard aa)static int LookupThreaderResidueNumberFromCharacterAbbrev(char r){ typedef map < char, int > Char2Int; static Char2Int charMap; if (charMap.size() == 0) { for (int i=0; i<Threader::ThreaderResidues.size(); ++i) charMap[Threader::ThreaderResidues[i]] = i; } Char2Int::const_iterator c = charMap.find(toupper(r)); return ((c != charMap.end()) ? c->second : -1);}const int Threader::SCALING_FACTOR = 1000000;const string Threader::ThreaderResidues = "ARNDCQEGHILKMFPSTWYV";Threader::Threader(void){}Threader::~Threader(void){ ContactMap::iterator c, ce = contacts.end(); for (c=contacts.begin(); c!=ce; ++c) FreeFldMtf(c->second);}Seq_Mtf * Threader::CreateSeqMtf(const BlockMultipleAlignment *multiple, double weightPSSM, BLAST_KarlinBlkPtr karlinBlock){ // special case for "PSSM" of single-row "alignment" - just use BLOSUM62 score if (multiple->NRows() == 1) { Seq_Mtf *seqMtf = NewSeqMtf(multiple->GetMaster()->Length(), ThreaderResidues.size()); for (int res=0; res<multiple->GetMaster()->Length(); ++res) for (int aa=0; aa<ThreaderResidues.size(); ++aa) seqMtf->ww[res][aa] = ThrdRound( weightPSSM * SCALING_FACTOR * GetBLOSUM62Score(multiple->GetMaster()->sequenceString[res], ThreaderResidues[aa])); TRACEMSG("Created Seq_Mtf (PSSM) from BLOSUM62 scores"); return seqMtf; } // convert all sequences to Bioseqs multiple->GetMaster()->parentSet->CreateAllBioseqs(multiple); // create SeqAlign from this BlockMultipleAlignment SeqAlignPtr seqAlign = multiple->CreateCSeqAlign();#ifdef DEBUG_THREADER // dump for debugging SeqAlignPtr saChain = seqAlign; AsnIoPtr aip = AsnIoOpen("Seq-align.debug.txt", "w"); while (saChain) { SeqAlignAsnWrite(saChain, aip, NULL); saChain = saChain->next; } aip = AsnIoClose(aip);#endif // "spread" unaligned residues between aligned blocks, for PSSM construction CddDegapSeqAlign(seqAlign); Seq_Mtf *seqMtf = NULL; for (int i=11; i>=1; --i) { // first try auto-determined pseudocount (-1); if fails, find higest <= 10 that works int pseudocount = (i == 11) ? -1 : i; seqMtf = CddDenDiagCposComp2KBP( multiple->GetMaster()->parentSet->GetOrCreateBioseq(multiple->GetMaster()), pseudocount, seqAlign, NULL, NULL, weightPSSM, SCALING_FACTOR, NULL, karlinBlock ); if (seqMtf) break; else WARNINGMSG("Cannot use " << ((pseudocount == -1) ? "(empirical) " : "") << "pseudocount of " << pseudocount); } if (seqMtf) TRACEMSG("created Seq_Mtf (PSSM)"); else ERRORMSG("Cannot find any pseudocount that yields an acceptable PSSM!"); SeqAlignSetFree(seqAlign); return seqMtf;}Cor_Def * Threader::CreateCorDef(const BlockMultipleAlignment *multiple, double loopLengthMultiplier){ static const int MIN_LOOP_MAX = 2; static const int EXTENSION_MAX = 10; BlockMultipleAlignment::UngappedAlignedBlockList alignedBlocks; multiple->GetUngappedAlignedBlocks(&alignedBlocks); Cor_Def *corDef = NewCorDef(alignedBlocks.size()); // zero loop constraints for tails corDef->lll.llmn[0] = corDef->lll.llmn[alignedBlocks.size()] = corDef->lll.llmx[0] = corDef->lll.llmx[alignedBlocks.size()] = corDef->lll.lrfs[0] = corDef->lll.lrfs[alignedBlocks.size()] = 0; // loop constraints for unaligned regions between aligned blocks BlockMultipleAlignment::UngappedAlignedBlockList::const_iterator b = alignedBlocks.begin(), be = alignedBlocks.end(); int n, max; for (n=1, ++b; b!=be; ++n, ++b) { const UnalignedBlock *uaBlock = multiple->GetUnalignedBlockBefore(*b); if (uaBlock) { max = (int) (loopLengthMultiplier * uaBlock->width); if (max < MIN_LOOP_MAX) max = MIN_LOOP_MAX; } else max = MIN_LOOP_MAX; corDef->lll.llmn[n] = 0; corDef->lll.llmx[n] = max; corDef->lll.lrfs[n] = max; } // minimum block sizes (in coordinates of master) const Block::Range *range; int mid; for (n=0, b=alignedBlocks.begin(); b!=be; ++b, ++n) { range = (*b)->GetRangeOfRow(0); mid = (range->from + range->to) / 2; corDef->sll.rfpt[n] = mid; corDef->sll.nomn[n] = mid - range->from; corDef->sll.comn[n] = range->to - mid; } // left extension - trim to available residues corDef->sll.nomx[0] = corDef->sll.nomn[0] + EXTENSION_MAX; if (corDef->sll.rfpt[0] - corDef->sll.nomx[0] < 0) corDef->sll.nomx[0] = corDef->sll.rfpt[0]; // right extension - trim to available residues corDef->sll.comx[alignedBlocks.size() - 1] = corDef->sll.comn[alignedBlocks.size() - 1] + EXTENSION_MAX; if (corDef->sll.rfpt[alignedBlocks.size() - 1] + corDef->sll.comx[alignedBlocks.size() - 1] >= multiple->GetMaster()->Length()) corDef->sll.comx[alignedBlocks.size() - 1] = multiple->GetMaster()->Length() - corDef->sll.rfpt[alignedBlocks.size() - 1] - 1; // extensions into unaligned areas between blocks const Block::Range *prevRange = NULL; int nUnaligned, extN; for (n=0, b=alignedBlocks.begin(); b!=be; ++b, ++n) { range = (*b)->GetRangeOfRow(0); if (n > 0) { nUnaligned = range->from - prevRange->to - 1; extN = nUnaligned / 2; // N extension of right block gets smaller portion if nUnaligned is odd corDef->sll.nomx[n] = corDef->sll.nomn[n] + extN; corDef->sll.comx[n - 1] = corDef->sll.comn[n - 1] + nUnaligned - extN; } prevRange = range; } // no fixed segments corDef->fll.n = 0; return corDef;}Qry_Seq * Threader::CreateQrySeq(const BlockMultipleAlignment *multiple, const BlockMultipleAlignment *pairwise, int terminalCutoff){ const Sequence *slaveSeq = pairwise->GetSequenceOfRow(1); BlockMultipleAlignment::UngappedAlignedBlockList multipleABlocks, pairwiseABlocks; multiple->GetUngappedAlignedBlocks(&multipleABlocks); pairwise->GetUngappedAlignedBlocks(&pairwiseABlocks); // query has # constraints = # blocks in multiple alignment Qry_Seq *qrySeq = NewQrySeq(slaveSeq->Length(), multipleABlocks.size()); // fill in residue numbers int i; for (i=0; i<slaveSeq->Length(); ++i) qrySeq->sq[i] = LookupThreaderResidueNumberFromCharacterAbbrev(slaveSeq->sequenceString[i]); // if a block in the multiple is contained in the pairwise (looking at master coords), // then add a constraint to keep it there BlockMultipleAlignment::UngappedAlignedBlockList::const_iterator m, me = multipleABlocks.end(), p, pe = pairwiseABlocks.end(); const Block::Range *multipleRange, *pairwiseRange; for (i=0, m=multipleABlocks.begin(); m!=me; ++i, ++m) { multipleRange = (*m)->GetRangeOfRow(0); for (p=pairwiseABlocks.begin(); p!=pe; ++p) { pairwiseRange = (*p)->GetRangeOfRow(0); if (pairwiseRange->from <= multipleRange->from && pairwiseRange->to >= multipleRange->to) { int masterCenter = (multipleRange->from + multipleRange->to) / 2; // offset of master residue at center of multiple block int offset = masterCenter - pairwiseRange->from; pairwiseRange = (*p)->GetRangeOfRow(1); // slave residue in pairwise aligned to master residue at center of multiple block qrySeq->sac.mn[i] = qrySeq->sac.mx[i] = pairwiseRange->from + offset; break; } } } // if a terminal block is unconstrained (mn,mx == -1), set limits for how far the new // (religned) block is allowed to be from the edge of the next block or from the // aligned region set upon demotion if (terminalCutoff >= 0) { if (qrySeq->sac.mn[0] == -1) { if (pairwise->alignSlaveFrom >= 0) { qrySeq->sac.mn[0] = pairwise->alignSlaveFrom - terminalCutoff; } else if (pairwiseABlocks.size() > 0) { const Block::Range *nextQryBlock = pairwiseABlocks.front()->GetRangeOfRow(1); qrySeq->sac.mn[0] = nextQryBlock->from - 1 - terminalCutoff; } if (qrySeq->sac.mn[0] < 0) qrySeq->sac.mn[0] = 0; INFOMSG("new N-terminal block constrained to query loc >= " << qrySeq->sac.mn[0] + 1); } if (qrySeq->sac.mx[multipleABlocks.size() - 1] == -1) { if (pairwise->alignSlaveTo >= 0) { qrySeq->sac.mx[multipleABlocks.size() - 1] = pairwise->alignSlaveTo + terminalCutoff; } else if (pairwiseABlocks.size() > 0) { const Block::Range *prevQryBlock = pairwiseABlocks.back()->GetRangeOfRow(1); qrySeq->sac.mx[multipleABlocks.size() - 1] = prevQryBlock->to + 1 + terminalCutoff; } if (qrySeq->sac.mx[multipleABlocks.size() - 1] >= qrySeq->n || qrySeq->sac.mx[multipleABlocks.size() - 1] < 0) qrySeq->sac.mx[multipleABlocks.size() - 1] = qrySeq->n - 1; INFOMSG("new C-terminal block constrained to query loc <= " << qrySeq->sac.mx[multipleABlocks.size() - 1] + 1); } } return qrySeq;}/*---------------------------------------------------------------------------- * stuff to read in the contact potential. (code swiped from DDV) *---------------------------------------------------------------------------*/static Int4 CountWords(char* Str) { Int4 i, Count=0; Boolean InsideStr; InsideStr = FALSE; for (i=0; i<StrLen(Str); ++i) { if (!InsideStr && (Str[i] != ' ')) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -