⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 su_block_multiple_alignment.cpp

📁 ncbi源码
💻 CPP
📖 第 1 页 / 共 4 页
字号:
/* * =========================================================================== * PRODUCTION $Log: su_block_multiple_alignment.cpp,v $ * PRODUCTION Revision 1000.0  2004/06/01 18:14:28  gouriano * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.9 * PRODUCTION * =========================================================================== *//*  $Id: su_block_multiple_alignment.cpp,v 1000.0 2004/06/01 18:14:28 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Authors:  Paul Thiessen** File Description:*      Classes to hold alignment data** ===========================================================================*/#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <corelib/ncbi_limits.hpp>#include <util/tables/raw_scoremat.h>#include <objects/seqalign/Dense_diag.hpp>#include "su_block_multiple_alignment.hpp"#include "su_sequence_set.hpp"#include "su_private.hpp"// C-toolkit stuff for PSSM calculation#include <objalign.h>#include <blast.h>#include <blastkar.h>#include <cddutil.h>#include <thrdatd.h>#include <thrddecl.h>USING_NCBI_SCOPE;USING_SCOPE(objects);BEGIN_SCOPE(struct_util)// from su_sequence_set.cppextern BioseqPtr GetOrCreateBioseq(const Sequence *sequence);extern void AddCSeqId(SeqIdPtr *sid, const ncbi::objects::CSeq_id& cppid);static const int SCALING_FACTOR = 1000000;static const string ThreaderResidues = "ARNDCQEGHILKMFPSTWYV";static const string BLASTResidues = "-ABCDEFGHIKLMNPQRSTVWXYZU*";BlockMultipleAlignment::BlockMultipleAlignment(const SequenceList& sequenceList){    m_sequences = sequenceList;    m_pssm = NULL;    InitCache();    m_rowDoubles.resize(m_sequences.size(), 0.0);    m_rowStrings.resize(m_sequences.size());}void BlockMultipleAlignment::InitCache(void){    m_cachePrevRow = eUndefined;    m_cachePrevBlock = NULL;    m_cacheBlockIterator = m_blocks.begin();}BlockMultipleAlignment::~BlockMultipleAlignment(void){    RemovePSSM();}BlockMultipleAlignment * BlockMultipleAlignment::Clone(void) const{    BlockMultipleAlignment *copy = new BlockMultipleAlignment(m_sequences);    BlockList::const_iterator b, be = m_blocks.end();    for (b=m_blocks.begin(); b!=be; ++b)        copy->m_blocks.push_back(CRef<Block>((*b)->Clone(copy)));    copy->UpdateBlockMap();    copy->m_rowDoubles = m_rowDoubles;    copy->m_rowStrings = m_rowStrings;    return copy;}static inline unsigned int ScreenResidueCharacter(char original){    char ch = toupper(original);    switch (ch) {        case 'A': case 'R': case 'N': case 'D': case 'C':        case 'Q': case 'E': case 'G': case 'H': case 'I':        case 'L': case 'K': case 'M': case 'F': case 'P':        case 'S': case 'T': case 'W': case 'Y': case 'V':        case 'B': case 'Z':            break;        default:            ch = 'X'; // make all but natural aa's just 'X'    }    return ch;}static int GetBLOSUM62Score(char a, char b){    static SNCBIFullScoreMatrix Blosum62Matrix;    static bool unpacked = false;    if (!unpacked) {        NCBISM_Unpack(&NCBISM_Blosum62, &Blosum62Matrix);        unpacked = true;    }    return Blosum62Matrix.s[ScreenResidueCharacter(a)][ScreenResidueCharacter(b)];}// creates a SeqAlign from a BlockMultipleAlignmentstatic SeqAlignPtr CreateCSeqAlign(const BlockMultipleAlignment& multiple){    // one SeqAlign (chained into a linked list) for each slave row    SeqAlignPtr prevSap = NULL, firstSap = NULL;    for (unsigned int row=1; row<multiple.NRows(); ++row) {        SeqAlignPtr sap = SeqAlignNew();        if (prevSap) prevSap->next = sap;        prevSap = sap;        if (!firstSap) firstSap = sap;        sap->type = SAT_PARTIAL;        sap->dim = 2;        sap->segtype = SAS_DENDIAG;        DenseDiagPtr prevDd = NULL;        BlockMultipleAlignment::UngappedAlignedBlockList blocks;        multiple.GetUngappedAlignedBlocks(&blocks);        BlockMultipleAlignment::UngappedAlignedBlockList::const_iterator b, be = blocks.end();        for (b=blocks.begin(); b!=be; ++b) {            DenseDiagPtr dd = DenseDiagNew();            if (prevDd) prevDd->next = dd;            prevDd = dd;            if (b == blocks.begin()) sap->segs = dd;            dd->dim = 2;            AddCSeqId(&(dd->id), multiple.GetSequenceOfRow(0)->GetPreferredIdentifier());            AddCSeqId(&(dd->id), multiple.GetSequenceOfRow(row)->GetPreferredIdentifier());            dd->len = (*b)->m_width;            dd->starts = (Int4Ptr) MemNew(2 * sizeof(Int4));            const Block::Range *range = (*b)->GetRangeOfRow(0);            dd->starts[0] = range->from;            range = (*b)->GetRangeOfRow(row);            dd->starts[1] = range->from;        }    }    return firstSap;}static void CreateAllBioseqs(const BlockMultipleAlignment& multiple){    for (unsigned int row=0; row<multiple.NRows(); ++row)        GetOrCreateBioseq(multiple.GetSequenceOfRow(row));}static Seq_Mtf * CreateSeqMtf(const BlockMultipleAlignment& multiple,    double weightPSSM, BLAST_KarlinBlkPtr karlinBlock){    // special case for "PSSM" of single-row "alignment" - just use BLOSUM62 score    if (multiple.NRows() == 1) {        Seq_Mtf *seqMtf = NewSeqMtf(multiple.GetMaster()->Length(), ThreaderResidues.size());        for (unsigned int res=0; res<multiple.GetMaster()->Length(); ++res)            for (unsigned int aa=0; aa<ThreaderResidues.size(); ++aa)                seqMtf->ww[res][aa] = ThrdRound(weightPSSM * SCALING_FACTOR *                    GetBLOSUM62Score(multiple.GetMaster()->m_sequenceString[res], ThreaderResidues[aa]));        WARNING_MESSAGE("Created Seq_Mtf (PSSM) from BLOSUM62 scores");        return seqMtf;    }    // convert all sequences to Bioseqs    CreateAllBioseqs(multiple);    // create SeqAlign from this BlockMultipleAlignment    SeqAlignPtr seqAlign = CreateCSeqAlign(multiple);    // "spread" unaligned residues between aligned blocks, for PSSM construction    CddDegapSeqAlign(seqAlign);    Seq_Mtf *seqMtf = NULL;    for (int i=11; i>=1; --i) {        // first try auto-determined pseudocount (-1); if fails, find higest <= 10 that works        int pseudocount = (i == 11) ? -1 : i;        seqMtf = CddDenDiagCposComp2KBP(            GetOrCreateBioseq(multiple.GetMaster()),            pseudocount,            seqAlign,            NULL,            NULL,            weightPSSM,            SCALING_FACTOR,            NULL,            karlinBlock        );        if (seqMtf)            break;        else            WARNING_MESSAGE("Cannot use " << ((pseudocount == -1) ? "(empirical) " : "")                << "pseudocount of " << pseudocount);    }    if (seqMtf)        TRACE_MESSAGE("created Seq_Mtf (PSSM)");    else        ERROR_MESSAGE("Cannot find any pseudocount that yields an acceptable PSSM!");    SeqAlignSetFree(seqAlign);    return seqMtf;}// gives BLAST residue number for a character (or # for 'X' if char not found)int LookupBLASTResidueNumberFromCharacter(unsigned char r){    typedef map < unsigned char, int > Char2Int;    static Char2Int charMap;    if (charMap.size() == 0) {        for (unsigned int i=0; i<BLASTResidues.size(); ++i)            charMap[BLASTResidues[i]] = i;    }    Char2Int::const_iterator n = charMap.find(toupper(r));    if (n != charMap.end())        return n->second;    else        return charMap.find('X')->second;}// gives BLAST residue number for a threader residue number (or # for 'X' if char == -1)static int LookupBLASTResidueNumberFromThreaderResidueNumber(unsigned char r){    r = toupper(r);    return LookupBLASTResidueNumberFromCharacter(            (r < ThreaderResidues.size()) ? ThreaderResidues[r] : 'X');}static Int4 Round(double d){    if (d >= 0.0)        return (Int4) (d + 0.5);    else        return (Int4) (d - 0.5);}const BLAST_Matrix * BlockMultipleAlignment::GetPSSM(void) const{    if (m_pssm) return m_pssm;    // for now, use threader's SeqMtf    BLAST_KarlinBlkPtr karlinBlock = BlastKarlinBlkCreate();    Seq_Mtf *seqMtf = CreateSeqMtf(*this, 1.0, karlinBlock);    m_pssm = (BLAST_Matrix *) MemNew(sizeof(BLAST_Matrix));    m_pssm->is_prot = TRUE;    m_pssm->name = StringSave("BLOSUM62");    m_pssm->karlinK = karlinBlock->K;    m_pssm->rows = seqMtf->n + 1;    m_pssm->columns = 26;    int i, j;    m_pssm->matrix = (Int4 **) MemNew(m_pssm->rows * sizeof(Int4 *));    for (i=0; i<m_pssm->rows; ++i) {        m_pssm->matrix[i] = (Int4 *) MemNew(m_pssm->columns * sizeof(Int4));        // set scores from threader matrix        if (i < seqMtf->n) {            // initialize all rows with custom score, or BLAST_SCORE_MIN; to match what Aron's function creates            for (j=0; j<m_pssm->columns; ++j)                m_pssm->matrix[i][j] = (j == 21 ? -1 : (j == 25 ? -4 : BLAST_SCORE_MIN));            for (j=0; j<seqMtf->AlphabetSize; ++j) {                m_pssm->matrix[i][LookupBLASTResidueNumberFromThreaderResidueNumber(j)] =                    Round(((double) seqMtf->ww[i][j]) / SCALING_FACTOR);            }        } else {            // initialize last row with BLAST_SCORE_MIN            for (j=0; j<m_pssm->columns; ++j)                m_pssm->matrix[i][j] = BLAST_SCORE_MIN;        }    }    m_pssm->posFreqs = NULL;    FreeSeqMtf(seqMtf);    BlastKarlinBlkDestruct(karlinBlock);    return m_pssm;}void BlockMultipleAlignment::RemovePSSM(void) const{    if (m_pssm) {        BLAST_MatrixDestruct(m_pssm);        m_pssm = NULL;    }}bool BlockMultipleAlignment::CheckAlignedBlock(const Block *block) const{    if (!block || !block->IsAligned()) {        ERROR_MESSAGE("CheckAlignedBlock() - checks aligned blocks only");        return false;    }    if (block->NSequences() != m_sequences.size()) {        ERROR_MESSAGE("CheckAlignedBlock() - block size mismatch");        return false;    }    // make sure ranges are reasonable for each sequence    unsigned int row;    const Block        *prevBlock = GetBlockBefore(block),        *nextBlock = GetBlockAfter(block);    const Block::Range *range, *prevRange = NULL, *nextRange = NULL;    SequenceList::const_iterator sequence = m_sequences.begin();    for (row=0; row<block->NSequences(); ++row, ++sequence) {        range = block->GetRangeOfRow(row);        if (prevBlock) prevRange = prevBlock->GetRangeOfRow(row);        if (nextBlock) nextRange = nextBlock->GetRangeOfRow(row);        if (range->to - range->from + 1 != block->m_width ||       // check block m_width            (prevRange && range->from <= prevRange->to) ||          // check for range overlap            (nextRange && range->to >= nextRange->from) ||          // check for range overlap            range->from > range->to ||                              // check range values            range->to >= (*sequence)->Length())                     // check bounds of end        {            ERROR_MESSAGE("CheckAlignedBlock() - range error");            return false;        }    }    return true;}bool BlockMultipleAlignment::AddAlignedBlockAtEnd(UngappedAlignedBlock *newBlock){    m_blocks.push_back(CRef<Block>(newBlock));    return CheckAlignedBlock(newBlock);}UnalignedBlock * BlockMultipleAlignment::    CreateNewUnalignedBlockBetween(const Block *leftBlock, const Block *rightBlock){    if ((leftBlock && !leftBlock->IsAligned()) ||        (rightBlock && !rightBlock->IsAligned())) {        ERROR_MESSAGE("CreateNewUnalignedBlockBetween() - passed an unaligned block");        return NULL;    }    unsigned int row, from, to, length;    SequenceList::const_iterator s, se = m_sequences.end();    UnalignedBlock *newBlock = new UnalignedBlock(this);    newBlock->m_width = 0;    for (row=0, s=m_sequences.begin(); s!=se; ++row, ++s) {        if (leftBlock)            from = leftBlock->GetRangeOfRow(row)->to + 1;        else            from = 0;        if (rightBlock)            to = rightBlock->GetRangeOfRow(row)->from - 1;        else            to = (*s)->Length() - 1;        newBlock->SetRangeOfRow(row, from, to);        length = to - from + 1;        if ((to - from + 1) < 0) { // just to make sure...            ERROR_MESSAGE("CreateNewUnalignedBlockBetween() - unaligned length < 0");            return NULL;        }        if (length > newBlock->m_width)            newBlock->m_width = length;    }    if (newBlock->m_width == 0) {        delete newBlock;        return NULL;    } else        return newBlock;}bool BlockMultipleAlignment::AddUnalignedBlocks(void){    BlockList::iterator a, ae = m_blocks.end();    const Block *alignedBlock = NULL, *prevAlignedBlock = NULL;    Block *newUnalignedBlock;    // unaligned m_blocks to the left of each aligned block    for (a=m_blocks.begin(); a!=ae; ++a) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -