📄 blast_seqalign.cpp
字号:
tmp.Reset(new CSeq_id(id1->AsFastaString())); slp1->SetEmpty(*tmp); /* Simulating insertion of nucleotides */ from2 = get_current_pos(&start2, curr->num*((Uint1)curr->op_type-3)); to2 = MIN(start2,original_length2) - 1; /* Transfer to DNA minus strand coordinates */ if(strand2 == eNa_strand_minus) { int tmp_int; tmp_int = to2; to2 = original_length2 - from2 - 1; from2 = original_length2 - tmp_int - 1; } slp2->SetInt().SetFrom(from2); slp2->SetInt().SetTo(to2); tmp.Reset(new CSeq_id(id2->AsFastaString())); slp2->SetInt().SetId(*tmp); seq_int1_last.Reset(NULL); seq_int2_last.Reset(&slp2->SetInt()); /* Will be used to adjust "to" value */ break; } else { continue; /* Main loop */ } continue; /* Main loop */ /* break; */ default: continue; /* Main loop */ /* break; */ } CRef<CStd_seg> seg(new CStd_seg()); seg->SetDim(2); CStd_seg::TIds& ids = seg->SetIds(); if (reverse) { seg->SetLoc().push_back(slp2); seg->SetLoc().push_back(slp1); tmp.Reset(new CSeq_id(id2->AsFastaString())); ids.push_back(tmp); tmp.Reset(new CSeq_id(id1->AsFastaString())); ids.push_back(tmp); } else { seg->SetLoc().push_back(slp1); seg->SetLoc().push_back(slp2); tmp.Reset(new CSeq_id(id1->AsFastaString())); ids.push_back(tmp); tmp.Reset(new CSeq_id(id2->AsFastaString())); ids.push_back(tmp); } ids.resize(seg->GetDim()); seqalign->SetSegs().SetStd().push_back(seg); } return seqalign;}/// Creates and initializes CScore with i (if it's non-zero) or dstatic CRef<CScore> x_MakeScore(const string& ident_string, double d = 0.0, int i = 0){ CRef<CScore> retval(new CScore()); CRef<CObject_id> id(new CObject_id()); id->SetStr(ident_string); retval->SetId(*id); CRef<CScore::C_Value> val(new CScore::C_Value()); if (i) val->SetInt(i); else val->SetReal(d); retval->SetValue(*val); return retval;}/// C++ version of GetScoreSetFromBlastHsp (tools/blastutl.c)static voidx_BuildScoreList(const BlastHSP* hsp, const BlastScoreBlk* sbp, const BlastScoringOptions* score_options, CSeq_align::TScore& scores, EProgram program){ string score_type; Blast_KarlinBlk* kbp = NULL; if (!hsp) return; score_type = "score"; if (hsp->score) scores.push_back(x_MakeScore(score_type, 0.0, hsp->score)); score_type = "sum_n"; if (hsp->num > 1) scores.push_back(x_MakeScore(score_type, 0.0, hsp->num)); // Set the E-Value double evalue = (hsp->evalue < SMALLEST_EVALUE) ? 0.0 : hsp->evalue; score_type = (hsp->num == 1) ? "e_value" : "sum_e"; if (evalue >= 0.0) scores.push_back(x_MakeScore(score_type, evalue)); // Calculate the bit score from the raw score score_type = "bit_score"; if (program == eBlastn || !score_options->gapped_calculation) kbp = sbp->kbp[hsp->context]; else kbp = sbp->kbp_gap[hsp->context]; double bit_score = ((hsp->score*kbp->Lambda) - kbp->logK)/NCBIMATH_LN2; if (bit_score >= 0.0) scores.push_back(x_MakeScore(score_type, bit_score)); // Set the identity score score_type = "num_ident"; if (hsp->num_ident > 0) scores.push_back(x_MakeScore(score_type, 0.0, hsp->num_ident)); if (hsp->splice_junction > 0) { // Splice junction(s) was (were) found between linked HSPs score_type = "splice_junction"; scores.push_back(x_MakeScore(score_type, 0.0, hsp->splice_junction)); } return;}static voidx_AddScoresToSeqAlign(CRef<CSeq_align>& seqalign, const BlastHSP* hsp, const BlastScoreBlk* sbp, EProgram program, const BlastScoringOptions* score_options){ // Add the scores for this HSP CSeq_align::TScore& score_list = seqalign->SetScore(); x_BuildScoreList(hsp, sbp, score_options, score_list, program);}CRef<CDense_diag>x_UngappedHSPToDenseDiag(BlastHSP* hsp, const CSeq_id *query_id, const CSeq_id *subject_id, const BlastScoreBlk* sbp, const BlastScoringOptions* score_options, EProgram program, Int4 query_length, Int4 subject_length){ CRef<CDense_diag> retval(new CDense_diag()); // Pairwise alignment is 2 dimensional retval->SetDim(2); // Set the sequence ids CDense_diag::TIds& ids = retval->SetIds(); CRef<CSeq_id> tmp(new CSeq_id(query_id->AsFastaString())); ids.push_back(tmp); tmp.Reset(new CSeq_id(subject_id->AsFastaString())); ids.push_back(tmp); ids.resize(retval->GetDim()); retval->SetLen(hsp->query.length); CDense_diag::TStrands& strands = retval->SetStrands(); strands.push_back(x_Frame2Strand(hsp->query.frame)); strands.push_back(x_Frame2Strand(hsp->subject.frame)); CDense_diag::TStarts& starts = retval->SetStarts(); if (hsp->query.frame >= 0) { starts.push_back(hsp->query.offset); } else { starts.push_back(query_length - hsp->query.offset - hsp->query.length); } if (hsp->subject.frame >= 0) { starts.push_back(hsp->subject.offset); } else { starts.push_back(subject_length - hsp->subject.offset - hsp->subject.length); } CSeq_align::TScore& score_list = retval->SetScores(); x_BuildScoreList(hsp, sbp, score_options, score_list, program); return retval;}CRef<CStd_seg>x_UngappedHSPToStdSeg(BlastHSP* hsp, const CSeq_id *query_id, const CSeq_id *subject_id, const BlastScoreBlk* sbp, const BlastScoringOptions* score_options, EProgram program, Int4 query_length, Int4 subject_length){ CRef<CStd_seg> retval(new CStd_seg()); // Pairwise alignment is 2 dimensional retval->SetDim(2); CRef<CSeq_loc> query_loc(new CSeq_loc()); CRef<CSeq_loc> subject_loc(new CSeq_loc()); // Set the sequence ids CStd_seg::TIds& ids = retval->SetIds(); CRef<CSeq_id> tmp(new CSeq_id(query_id->AsFastaString())); query_loc->SetInt().SetId(*tmp); ids.push_back(tmp); tmp.Reset(new CSeq_id(subject_id->AsFastaString())); subject_loc->SetInt().SetId(*tmp); ids.push_back(tmp); ids.resize(retval->GetDim()); query_loc->SetInt().SetStrand(x_Frame2Strand(hsp->query.frame)); subject_loc->SetInt().SetStrand(x_Frame2Strand(hsp->subject.frame)); if (hsp->query.frame == 0) { query_loc->SetInt().SetFrom(hsp->query.offset); query_loc->SetInt().SetTo(hsp->query.offset + hsp->query.length - 1); } else if (hsp->query.frame > 0) { query_loc->SetInt().SetFrom(CODON_LENGTH*(hsp->query.offset) + hsp->query.frame - 1); query_loc->SetInt().SetTo(CODON_LENGTH*(hsp->query.offset+ hsp->query.length) + hsp->query.frame - 2); } else { query_loc->SetInt().SetFrom(query_length - CODON_LENGTH*(hsp->query.offset+hsp->query.length) + hsp->query.frame + 1); query_loc->SetInt().SetTo(query_length - CODON_LENGTH*hsp->query.offset + hsp->query.frame); } if (hsp->subject.frame == 0) { subject_loc->SetInt().SetFrom(hsp->subject.offset); subject_loc->SetInt().SetTo(hsp->subject.offset + hsp->subject.length - 1); } else if (hsp->subject.frame > 0) { subject_loc->SetInt().SetFrom(CODON_LENGTH*(hsp->subject.offset) + hsp->subject.frame - 1); subject_loc->SetInt().SetTo(CODON_LENGTH*(hsp->subject.offset+ hsp->subject.length) + hsp->subject.frame - 2); } else { subject_loc->SetInt().SetFrom(subject_length - CODON_LENGTH*(hsp->subject.offset+hsp->subject.length) + hsp->subject.frame + 1); subject_loc->SetInt().SetTo(subject_length - CODON_LENGTH*hsp->subject.offset + hsp->subject.frame); } retval->SetLoc().push_back(query_loc); retval->SetLoc().push_back(subject_loc); CSeq_align::TScore& score_list = retval->SetScores(); x_BuildScoreList(hsp, sbp, score_options, score_list, program); return retval;}static CRef<CSeq_align>BLASTUngappedHspListToSeqAlign(EProgram program, BlastHSPList* hsp_list, const CSeq_id *query_id, const CSeq_id *subject_id, Int4 query_length, Int4 subject_length, const BlastScoringOptions* score_options, const BlastScoreBlk* sbp){ CRef<CSeq_align> retval(new CSeq_align()); BlastHSP** hsp_array; int index; retval->SetType(CSeq_align::eType_diags); hsp_array = hsp_list->hsp_array; /* All HSPs are put in one seqalign, containing a list of * DenseDiag for same molecule search, or StdSeg for translated searches. */ if (program == eBlastn || program == eBlastp || program == eRPSBlast) { for (index=0; index<hsp_list->hspcnt; index++) { BlastHSP* hsp = hsp_array[index]; retval->SetSegs().SetDendiag().push_back( x_UngappedHSPToDenseDiag(hsp, query_id, subject_id, sbp, score_options, program, query_length, subject_length)); } } else { // Translated search for (index=0; index<hsp_list->hspcnt; index++) { BlastHSP* hsp = hsp_array[index]; retval->SetSegs().SetStd().push_back( x_UngappedHSPToStdSeg(hsp, query_id, subject_id, sbp, score_options, program, query_length, subject_length)); } } return retval;}// This is called for each query and each subject in the BLAST search// We always return CSeq_aligns of type disc to allow multiple HSPs// corresponding to the same query-subject pair to be grouped in one CSeq_alignstatic CRef<CSeq_align>BLASTHspListToSeqAlign(EProgram program, BlastHSPList* hsp_list, const CSeq_id *query_id, const CSeq_id *subject_id, const BlastScoringOptions* score_options, const BlastScoreBlk* sbp){ CRef<CSeq_align> retval(new CSeq_align()); retval->SetType(CSeq_align::eType_disc); retval->SetDim(2); // BLAST only creates pairwise alignments // Process the list of HSPs corresponding to one subject sequence and // create one seq-align for each list of HSPs (use disc seqalign when // multiple HSPs are found). BlastHSP** hsp_array = hsp_list->hsp_array; for (int index = 0; index < hsp_list->hspcnt; index++) { BlastHSP* hsp = hsp_array[index]; CRef<CSeq_align> seqalign; if (score_options->is_ooframe) { seqalign = x_OOFEditBlock2SeqAlign(program, hsp->gap_info, query_id, subject_id); } else { seqalign = x_GapEditBlock2SeqAlign(hsp->gap_info, query_id, subject_id); } x_AddScoresToSeqAlign(seqalign, hsp, sbp, program, score_options); retval->SetSegs().SetDisc().Set().push_back(seqalign); } return retval;}static voidx_GetSequenceLengthAndId(const SSeqLoc* ss, // [in] const BlastSeqSrc* seq_src, // [in] int oid, // [in] CSeq_id** seqid, // [out] TSeqPos* length) // [out]{ ASSERT(ss || seq_src); ASSERT(seqid); ASSERT(length); if ( !seq_src ) { *seqid = const_cast<CSeq_id*>(&sequence::GetId(*ss->seqloc, ss->scope)); *length = sequence::GetLength(*ss->seqloc, ss->scope); } else { ListNode* seqid_wrap; seqid_wrap = BLASTSeqSrcGetSeqId(seq_src, (void*) &oid); ASSERT(seqid_wrap); if (seqid_wrap->choice == BLAST_SEQSRC_CPP_SEQID) { *seqid = (CSeq_id*)seqid_wrap->ptr; ListNodeFree(seqid_wrap); } else if (seqid_wrap->choice == BLAST_SEQSRC_CPP_SEQID_REF) { *seqid = ((CRef<CSeq_id>*)seqid_wrap->ptr)->GetPointer(); } else { /** FIXME!!! This is wrong, because the id created here will not be registered! However if sequence source returns a C object, we cannot handle it here. */ char* id = BLASTSeqSrcGetSeqIdStr(seq_src, (void*) &oid); string id_str(id); *seqid = new CSeq_id(id_str); sfree(id); } *length = BLASTSeqSrcGetSeqLen(seq_src, (void*) &oid); } return;}/// Constructs an empty seq-align-set containing an empty discontinuous/// seq-align.static CSeq_align_set*x_CreateEmptySeq_align_set(CSeq_align_set* sas){ CSeq_align_set* retval = NULL; if (!sas) { retval = new CSeq_align_set; } else { retval = sas; } CRef<CSeq_align> empty_seqalign(new CSeq_align); empty_seqalign->SetType(CSeq_align::eType_disc);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -