⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cn3d_blast.cpp

📁 ncbi源码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/* * =========================================================================== * PRODUCTION $Log: cn3d_blast.cpp,v $ * PRODUCTION Revision 1000.2  2004/06/01 18:28:09  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.34 * PRODUCTION * =========================================================================== *//*  $Id: cn3d_blast.cpp,v 1000.2 2004/06/01 18:28:09 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Authors:  Paul Thiessen** File Description:*      module for aligning with BLAST and related algorithms** ===========================================================================*/#ifdef _MSC_VER#pragma warning(disable:4018)   // disable signed/unsigned mismatch warning in MSVC#endif#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <objects/seqalign/Seq_align.hpp>#include <objects/seqalign/Dense_seg.hpp>#include <objects/seqalign/Score.hpp>#include <objects/general/Object_id.hpp>#ifdef __WXMSW__#include <windows.h>#include <wx/msw/winundef.h>#endif#include <wx/wx.h>// C stuff#include <objseq.h>#include <blast.h>#include <blastkar.h>#include "cn3d_blast.hpp"#include "structure_set.hpp"#include "sequence_set.hpp"#include "block_multiple_alignment.hpp"#include "cn3d_tools.hpp"#include "asn_converter.hpp"#include "molecule_identifier.hpp"#include "cn3d_threader.hpp"// hack so I can catch memory leaks specific to this module, at the line where allocation occurs#ifdef _DEBUG#ifdef MemNew#undef MemNew#endif#define MemNew(sz) memset(malloc(sz), 0, (sz))#endifUSING_NCBI_SCOPE;USING_SCOPE(objects);BEGIN_SCOPE(Cn3D)const string BLASTer::BLASTResidues = "-ABCDEFGHIKLMNPQRSTVWXYZU*";// gives BLAST residue number for a character (or # for 'X' if char not found)int LookupBLASTResidueNumberFromCharacter(char r){    typedef map < char, int > Char2Int;    static Char2Int charMap;    if (charMap.size() == 0) {        for (int i=0; i<BLASTer::BLASTResidues.size(); ++i)            charMap[BLASTer::BLASTResidues[i]] = i;    }    Char2Int::const_iterator n = charMap.find(toupper(r));    if (n != charMap.end())        return n->second;    else        return charMap.find('X')->second;}// gives BLAST residue number for a threader residue number (or # for 'X' if char == -1)int LookupBLASTResidueNumberFromThreaderResidueNumber(char r){    r = toupper(r);    return LookupBLASTResidueNumberFromCharacter(            (r >= 0 && r < Threader::ThreaderResidues.size()) ? Threader::ThreaderResidues[r] : 'X');}#ifdef _DEBUG#define PRINT_PSSM // for testing/debugging#endifstatic BLAST_OptionsBlkPtr CreateBlastOptionsBlk(void){    BLAST_OptionsBlkPtr bob = BLASTOptionNew("blastp", true);    bob->db_length = 2717223;    // size of CDD database v1.62    bob->scalingFactor = 1.0;    return bob;}static void SetEffectiveSearchSpaceSize(BLAST_OptionsBlkPtr bob, Int4 queryLength){    bob->searchsp_eff = BLASTCalculateSearchSpace(bob, 1, bob->db_length, queryLength);}void BLASTer::CreateNewPairwiseAlignmentsByBlast(const BlockMultipleAlignment *multiple,    const AlignmentList& toRealign, AlignmentList *newAlignments, bool usePSSM){    if (usePSSM && !multiple) {        ERRORMSG("usePSSM true, but NULL multiple alignment");        return;    }    // set up BLAST stuff    BLAST_OptionsBlkPtr options = CreateBlastOptionsBlk();    if (!options) {        ERRORMSG("BLASTOptionNew() failed");        return;    }    // create Seq-loc intervals    SeqLocPtr masterSeqLoc = (SeqLocPtr) MemNew(sizeof(SeqLoc));    masterSeqLoc->choice = SEQLOC_INT;    SeqIntPtr masterSeqInt = SeqIntNew();    masterSeqLoc->data.ptrvalue = masterSeqInt;    if (multiple) {        BlockMultipleAlignment::UngappedAlignedBlockList uaBlocks;        multiple->GetUngappedAlignedBlocks(&uaBlocks);        if (uaBlocks.size() > 0) {            int excess = 0;            if (!RegistryGetInteger(REG_ADVANCED_SECTION, REG_FOOTPRINT_RES, &excess))                WARNINGMSG("Can't get footprint excess residues from registry");            masterSeqInt->from = uaBlocks.front()->GetRangeOfRow(0)->from - excess;            if (masterSeqInt->from < 0)                masterSeqInt->from = 0;            masterSeqInt->to = uaBlocks.back()->GetRangeOfRow(0)->to + excess;            if (masterSeqInt->to >= multiple->GetMaster()->Length())                masterSeqInt->to = multiple->GetMaster()->Length() - 1;        } else {            masterSeqInt->from = 0;            masterSeqInt->to = multiple->GetMaster()->Length() - 1;        }    }    SeqLocPtr slaveSeqLoc = (SeqLocPtr) MemNew(sizeof(SeqLoc));    slaveSeqLoc->choice = SEQLOC_INT;    SeqIntPtr slaveSeqInt = SeqIntNew();    slaveSeqLoc->data.ptrvalue = slaveSeqInt;    // create BLAST_Matrix if using PSSM from multiple    const BLAST_Matrix *BLASTmatrix = NULL;    if (usePSSM) BLASTmatrix = multiple->GetPSSM();    string err;    AlignmentList::const_iterator s, se = toRealign.end();    for (s=toRealign.begin(); s!=se; ++s) {        if (multiple && (*s)->GetMaster() != multiple->GetMaster())            ERRORMSG("master sequence mismatch");        // get C Bioseq for master        const Sequence *masterSeq = multiple ? multiple->GetMaster() : (*s)->GetMaster();        Bioseq *masterBioseq = masterSeq->parentSet->GetOrCreateBioseq(masterSeq);        masterSeqInt->id = masterBioseq->id;        // override master alignment interval if specified        if (!multiple) {            if ((*s)->alignMasterFrom >= 0 && (*s)->alignMasterFrom < masterSeq->Length() &&                (*s)->alignMasterTo >= 0 && (*s)->alignMasterTo < masterSeq->Length() &&                (*s)->alignMasterFrom <= (*s)->alignMasterTo)            {                masterSeqInt->from = (*s)->alignMasterFrom;                masterSeqInt->to = (*s)->alignMasterTo;            } else {                masterSeqInt->from = 0;                masterSeqInt->to = masterSeq->Length() - 1;            }        }        // get C Bioseq for slave of each incoming (pairwise) alignment        const Sequence *slaveSeq = (*s)->GetSequenceOfRow(1);        Bioseq *slaveBioseq = slaveSeq->parentSet->GetOrCreateBioseq(slaveSeq);        // setup Seq-loc interval for slave        slaveSeqInt->id = slaveBioseq->id;        slaveSeqInt->from =            ((*s)->alignSlaveFrom >= 0 && (*s)->alignSlaveFrom < slaveSeq->Length()) ?                (*s)->alignSlaveFrom : 0;        slaveSeqInt->to =            ((*s)->alignSlaveTo >= 0 && (*s)->alignSlaveTo < slaveSeq->Length()) ?                (*s)->alignSlaveTo : slaveSeq->Length() - 1;        INFOMSG("for BLAST: master range " <<                (masterSeqInt->from + 1) << " to " << (masterSeqInt->to + 1) << ", slave range " <<                (slaveSeqInt->from + 1) << " to " << (slaveSeqInt->to + 1));        // set search space size        SetEffectiveSearchSpaceSize(options, slaveSeqInt->to - slaveSeqInt->from + 1);        INFOMSG("effective search space size: " << ((int) options->searchsp_eff));        // actually do the BLAST alignment        SeqAlign *salp = NULL;        if (usePSSM) {            INFOMSG("calling BlastTwoSequencesByLocWithCallback(); PSSM db_length "                << (int) options->db_length << ", K " << BLASTmatrix->karlinK);            salp = BlastTwoSequencesByLocWithCallback(                masterSeqLoc, slaveSeqLoc,                "blastp", options,                NULL, NULL, NULL,                const_cast<BLAST_Matrix*>(BLASTmatrix));        } else {            INFOMSG("calling BlastTwoSequencesByLoc()");            salp = BlastTwoSequencesByLoc(masterSeqLoc, slaveSeqLoc, "blastp", options);        }        // create new alignment structure        BlockMultipleAlignment::SequenceList *seqs = new BlockMultipleAlignment::SequenceList(2);        (*seqs)[0] = masterSeq;        (*seqs)[1] = slaveSeq;        BlockMultipleAlignment *newAlignment =            new BlockMultipleAlignment(seqs, slaveSeq->parentSet->alignmentManager);        newAlignment->SetRowDouble(0, kMax_Double);        newAlignment->SetRowDouble(1, kMax_Double);        // process the result        if (!salp) {            WARNINGMSG("BLAST failed to find a significant alignment");        } else {            // convert C SeqAlign to C++ for convenience#ifdef _DEBUG            AsnIoPtr aip = AsnIoOpen("seqalign.txt", "w");            SeqAlignAsnWrite(salp, aip, NULL);            AsnIoFree(aip, true);#endif            CSeq_align sa;            bool okay = ConvertAsnFromCToCPP(salp, (AsnWriteFunc) SeqAlignAsnWrite, &sa, &err);            SeqAlignSetFree(salp);            if (!okay) {                ERRORMSG("Conversion of SeqAlign to C++ object failed");                continue;            }            // make sure the structure of this SeqAlign is what we expect            if (!sa.IsSetDim() || sa.GetDim() != 2 ||                !sa.GetSegs().IsDenseg() || sa.GetSegs().GetDenseg().GetDim() != 2 ||                !masterSeq->identifier->MatchesSeqId(*(sa.GetSegs().GetDenseg().GetIds().front())) ||                !slaveSeq->identifier->MatchesSeqId(*(sa.GetSegs().GetDenseg().GetIds().back()))) {                ERRORMSG("Confused by BLAST result format");                continue;            }            // fill out with blocks from BLAST alignment            CDense_seg::TStarts::const_iterator iStart = sa.GetSegs().GetDenseg().GetStarts().begin();            CDense_seg::TLens::const_iterator iLen = sa.GetSegs().GetDenseg().GetLens().begin();#ifdef _DEBUG            int raw_pssm = 0, raw_bl62 = 0;#endif            for (int i=0; i<sa.GetSegs().GetDenseg().GetNumseg(); ++i) {                int masterStart = *(iStart++), slaveStart = *(iStart++), len = *(iLen++);                if (masterStart >= 0 && slaveStart >= 0 && len > 0) {                    UngappedAlignedBlock *newBlock = new UngappedAlignedBlock(newAlignment);                    newBlock->SetRangeOfRow(0, masterStart, masterStart + len - 1);                    newBlock->SetRangeOfRow(1, slaveStart, slaveStart + len - 1);                    newBlock->width = len;                    newAlignment->AddAlignedBlockAtEnd(newBlock);#ifdef _DEBUG                    // calculate score manually, to compare with that returned by BLAST                    for (int j=0; j<len; ++j) {                        if (usePSSM)                            raw_pssm += BLASTmatrix->matrix                                [masterStart + j]                                [LookupBLASTResidueNumberFromCharacter(                                    slaveSeq->sequenceString[slaveStart + j])];                        raw_bl62 += GetBLOSUM62Score(                            masterSeq->sequenceString[masterStart + j],                            slaveSeq->sequenceString[slaveStart + j]);                    }#endif                }#ifdef _DEBUG                else if ((masterStart < 0 || slaveStart < 0) && len > 0) {                    int gap = options->gap_open + options->gap_extend * len;                    if (usePSSM) raw_pssm -= gap;                    raw_bl62 -= gap;                }#endif            }#ifdef _DEBUG            TRACEMSG("Calculated raw score by PSSM: " << raw_pssm);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -