首页 › 资源下载 › 生物技术 › ncbi源码 › 源码查看
blast_seqalign.cpp

来自「ncbi源码」· C++ 代码 · 共 1,557 行 · 第 1/4 页
CPP
1,557 行
    } else {        CRef<CDense_seg> dense_seg =             x_CreateDenseg(master, slave, starts, lengths, strands);        sar->SetSegs().SetDenseg(*dense_seg);    }    return sar;}static CRef<CSeq_align>x_GapEditBlock2SeqAlign(GapEditBlock* edit_block,         const CSeq_id* id1, const CSeq_id* id2){    ASSERT(edit_block != NULL);    vector<TSignedSeqPos> starts;    vector<TSeqPos> lengths;    vector<ENa_strand> strands;    bool is_disc_align = false;    int nsegs = 0;      // number of segments in edit_block->esp    for (GapEditScript* t = edit_block->esp; t; t = t->next, nsegs++) {        if (t->op_type == eGapAlignDecline)            is_disc_align = true;    }    if (is_disc_align) {        /* By request of Steven Altschul - we need to have            the unaligned part being to the left if it is adjacent to the           gap (insertion or deletion) - so this function will do           shuffeling */        x_CorrectUASequence(edit_block);        CRef<CSeq_align> seqalign(new CSeq_align());        seqalign->SetType(CSeq_align::eType_partial);        seqalign->SetDim(2);         // BLAST only creates pairwise alignments        bool skip_region;        GapEditScript* curr = NULL,* curr_head = edit_block->esp;        while (curr_head) {            skip_region = false;            for (nsegs = 0, curr = curr_head; curr; curr = curr->next, nsegs++){                if (curr->op_type == eGapAlignDecline) {                    if (nsegs != 0) { // end of aligned region                        break;                    } else {                        while (curr && curr->op_type == eGapAlignDecline) {                            nsegs++;                            curr = curr->next;                        }                        skip_region = true;                        break;                    }                }            }            // build seqalign for required regions only            if (!skip_region) {                x_CollectSeqAlignData(edit_block, curr_head, nsegs, starts,                        lengths, strands);                CRef<CSeq_align> sa_tmp = x_CreateSeqAlign(id1, id2, starts,                        lengths, strands, edit_block->translate1 !=0,                        edit_block->translate2 != 0, edit_block->reverse != 0);                // Add this seqalign to the list                if (sa_tmp)                    seqalign->SetSegs().SetDisc().Set().push_back(sa_tmp);            }            curr_head = curr;        }        return seqalign;    } else {        x_CollectSeqAlignData(edit_block, edit_block->esp, nsegs,                 starts, lengths, strands);        return x_CreateSeqAlign(id1, id2, starts, lengths, strands,                edit_block->translate1 != 0, edit_block->translate2 != 0,                edit_block->reverse != 0);    }}/** Get the current position. */static Int4 get_current_pos(Int4* pos, Int4 length){    Int4 val;    if(*pos < 0)        val = -(*pos + length -1);    else        val = *pos;    *pos += length;    return val;}/** This function is used for Out-Of-Frame traceback conversion * Convert an OOF EditScript chain to a SeqAlign of type StdSeg. * Used for a non-simple interval (i.e., one without subs. or  * deletions).   * The first SeqIdPtr in the chain of subject_id and query_id is  * duplicated for the SeqAlign.*/static CRef<CSeq_align>x_OOFEditBlock2SeqAlign(EProgram program,     GapEditBlock* edit_block,     const CSeq_id* query_id, const CSeq_id* subject_id){    ASSERT(edit_block != NULL);    CRef<CSeq_align> seqalign(new CSeq_align());    Boolean reverse = FALSE;    GapEditScript* curr,* esp;    Int2 frame1, frame2;    Int4 start1, start2;    Int4 original_length1, original_length2;    CRef<CSeq_interval> seq_int1_last;    CRef<CSeq_interval> seq_int2_last;    CConstRef<CSeq_id> id1;    CConstRef<CSeq_id> id2;    CRef<CSeq_loc> slp1, slp2;    ENa_strand strand1, strand2;    bool first_shift;    Int4 from1, from2, to1, to2;    CRef<CSeq_id> tmp;    if (program == eBlastx) {       reverse = TRUE;       start1 = edit_block->start2;       start2 = edit_block->start1;       frame1 = edit_block->frame2;       frame2 = edit_block->frame1;       original_length1 = edit_block->original_length2;       original_length2 = edit_block->original_length1;       id1.Reset(subject_id);       id2.Reset(query_id);    } else {        start1 = edit_block->start1;       start2 = edit_block->start2;       frame1 = edit_block->frame1;       frame2 = edit_block->frame2;       original_length1 = edit_block->original_length1;       original_length2 = edit_block->original_length2;       id1.Reset(query_id);       id2.Reset(subject_id);    }     strand1 = x_Frame2Strand(frame1);    strand2 = x_Frame2Strand(frame2);        esp = edit_block->esp;        seqalign->SetDim(2); /**only two dimention alignment**/        seqalign->SetType(CSeq_align::eType_partial); /**partial for gapped translating search. */    esp = edit_block->esp;    first_shift = false;    for (curr=esp; curr; curr=curr->next) {        slp1.Reset(new CSeq_loc());        slp2.Reset(new CSeq_loc());                switch (curr->op_type) {        case eGapAlignDel: /* deletion of three nucleotides. */                        first_shift = false;            slp1->SetInt().SetFrom(get_current_pos(&start1, curr->num));            slp1->SetInt().SetTo(MIN(start1,original_length1) - 1);            tmp.Reset(new CSeq_id(id1->AsFastaString()));            slp1->SetInt().SetId(*tmp);            slp1->SetInt().SetStrand(strand1);                        /* Empty nucleotide piece */            tmp.Reset(new CSeq_id(id2->AsFastaString()));            slp2->SetEmpty(*tmp);            seq_int1_last.Reset(&slp1->SetInt());            /* Keep previous seq_int2_last, in case there is a frame shift               immediately after this gap */                        break;        case eGapAlignIns: /* insertion of three nucleotides. */            /* If gap is followed after frameshift - we have to               add this element for the alignment to be correct */                        if(first_shift) { /* Second frameshift in a row */                /* Protein coordinates */                slp1->SetInt().SetFrom(get_current_pos(&start1, 1));                to1 = MIN(start1,original_length1) - 1;                slp1->SetInt().SetTo(to1);                tmp.Reset(new CSeq_id(id1->AsFastaString()));                slp1->SetInt().SetId(*tmp);                slp1->SetInt().SetStrand(strand1);                                /* Nucleotide scale shifted by op_type */                from2 = get_current_pos(&start2, 3);                to2 = MIN(start2,original_length2) - 1;                slp2->SetInt().SetFrom(from2);                slp2->SetInt().SetTo(to2);                if (start2 > original_length2)                    slp1->SetInt().SetTo(to1 - 1);                                /* Transfer to DNA minus strand coordinates */                if(strand2 == eNa_strand_minus) {                    slp2->SetInt().SetTo(original_length2 - from2 - 1);                    slp2->SetInt().SetFrom(original_length2 - to2 - 1);                }                                tmp.Reset(new CSeq_id(id2->AsFastaString()));                slp2->SetInt().SetId(*tmp);                slp2->SetInt().SetStrand(strand2);                CRef<CStd_seg> seg(new CStd_seg());                seg->SetDim(2);                CStd_seg::TIds& ids = seg->SetIds();                if (reverse) {                    seg->SetLoc().push_back(slp2);                    seg->SetLoc().push_back(slp1);                    tmp.Reset(new CSeq_id(id2->AsFastaString()));                    ids.push_back(tmp);                    tmp.Reset(new CSeq_id(id1->AsFastaString()));                    ids.push_back(tmp);                } else {                    seg->SetLoc().push_back(slp1);                    seg->SetLoc().push_back(slp2);                    tmp.Reset(new CSeq_id(id1->AsFastaString()));                    ids.push_back(tmp);                    tmp.Reset(new CSeq_id(id2->AsFastaString()));                    ids.push_back(tmp);                }                ids.resize(seg->GetDim());                                seqalign->SetSegs().SetStd().push_back(seg);            }            first_shift = false;            /* Protein piece is empty */            tmp.Reset(new CSeq_id(id1->AsFastaString()));            slp1->SetEmpty(*tmp);                        /* Nucleotide scale shifted by 3, protein gapped */            from2 = get_current_pos(&start2, curr->num*3);            to2 = MIN(start2,original_length2) - 1;            slp2->SetInt().SetFrom(from2);            slp2->SetInt().SetTo(to2);            /* Transfer to DNA minus strand coordinates */            if(strand2 == eNa_strand_minus) {                slp2->SetInt().SetTo(original_length2 - from2 - 1);                slp2->SetInt().SetFrom(original_length2 - to2 - 1);            }            tmp.Reset(new CSeq_id(id2->AsFastaString()));            slp2->SetInt().SetId(*tmp);            slp2->SetInt().SetStrand(strand2);                        seq_int1_last.Reset(NULL);            seq_int2_last.Reset(&slp2->SetInt()); /* Will be used to adjust "to" value */                        break;        case eGapAlignSub: /* Substitution. */            first_shift = false;            /* Protein coordinates */            from1 = get_current_pos(&start1, curr->num);            to1 = MIN(start1, original_length1) - 1;            /* Adjusting last segment and new start point in               nucleotide coordinates */            from2 = get_current_pos(&start2, curr->num*((Uint1)curr->op_type));            to2 = start2 - 1;            /* Chop off three bases and one residue at a time.               Why does this happen, seems like a bug?            */            while (to2 >= original_length2) {                to2 -= 3;                to1--;            }            /* Transfer to DNA minus strand coordinates */            if(strand2 == eNa_strand_minus) {                int tmp_int;                tmp_int = to2;                to2 = original_length2 - from2 - 1;                from2 = original_length2 - tmp_int - 1;            }            slp1->SetInt().SetFrom(from1);            slp1->SetInt().SetTo(to1);            tmp.Reset(new CSeq_id(id1->AsFastaString()));            slp1->SetInt().SetId(*tmp);            slp1->SetInt().SetStrand(strand1);            slp2->SetInt().SetFrom(from2);            slp2->SetInt().SetTo(to2);            tmp.Reset(new CSeq_id(id2->AsFastaString()));            slp2->SetInt().SetId(*tmp);            slp2->SetInt().SetStrand(strand2);                       seq_int1_last.Reset(&slp1->SetInt()); /* Will be used to adjust "to" value */            seq_int2_last.Reset(&slp2->SetInt()); /* Will be used to adjust "to" value */                        break;        case eGapAlignDel2:	/* gap of two nucleotides. */        case eGapAlignDel1: /* Gap of one nucleotide. */        case eGapAlignIns1: /* Insertion of one nucleotide. */        case eGapAlignIns2: /* Insertion of two nucleotides. */            if(first_shift) { /* Second frameshift in a row */                /* Protein coordinates */                from1 = get_current_pos(&start1, 1);                to1 = MIN(start1,original_length1) - 1;                /* Nucleotide scale shifted by op_type */                from2 = get_current_pos(&start2, (Uint1)curr->op_type);                to2 = start2 - 1;                if (to2 >= original_length2) {                    to2 = original_length2 -1;                    to1--;                }                /* Transfer to DNA minus strand coordinates */                if(strand2 == eNa_strand_minus) {                    int tmp_int;                    tmp_int = to2;                    to2 = original_length2 - from2 - 1;                    from2 = original_length2 - tmp_int - 1;                }                slp1->SetInt().SetFrom(from1);                slp1->SetInt().SetTo(to1);                tmp.Reset(new CSeq_id(id1->AsFastaString()));                slp1->SetInt().SetId(*tmp);                slp1->SetInt().SetStrand(strand1);                slp2->SetInt().SetFrom(from2);                slp2->SetInt().SetTo(to2);                tmp.Reset(new CSeq_id(id2->AsFastaString()));                slp2->SetInt().SetId(*tmp);                slp2->SetInt().SetStrand(strand2);                seq_int1_last.Reset(&slp1->SetInt());                 seq_int2_last.Reset(&slp2->SetInt());                 break;            }                        first_shift = true;            /* If this substitution is following simple frameshift               we do not need to start new segment, but may continue               old one */            if(seq_int2_last) {                get_current_pos(&start2, curr->num*((Uint1)curr->op_type-3));                if(strand2 != eNa_strand_minus) {                    seq_int2_last->SetTo(start2 - 1);                } else {                    /* Transfer to DNA minus strand coordinates */                    seq_int2_last->SetFrom(original_length2 - start2);                }                /* Adjustment for multiple shifts - theoretically possible,                   but very improbable */                if(seq_int2_last->GetFrom() > seq_int2_last->GetTo()) {                                        if(strand2 != eNa_strand_minus) {                        seq_int2_last->SetTo(seq_int2_last->GetTo() + 3);                    } else {                        seq_int2_last->SetFrom(seq_int2_last->GetFrom() - 3);                    }                                        if (seq_int1_last.GetPointer() &&						seq_int1_last->GetTo() != 0)                        seq_int1_last->SetTo(seq_int1_last->GetTo() + 1);                }            } else if ((Uint1)curr->op_type > 3) {                /* Protein piece is empty */
blast_seqalign.cpp - 源码说明

本页面展示了「ncbi源码」中的 blast_seqalign.cpp 源码文件，采用 C++ 编程语言编写，共 1,557 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ncbi相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?