📄 blast_seqalign.cpp
字号:
empty_seqalign->SetSegs().SetDisc(*new CSeq_align_set); retval->Set().push_back(empty_seqalign); return retval;}/// Always remap the query, the subject is remapped if it's given (assumes/// alignment created by BLAST 2 Sequences API)./// Since the query strands were already taken into account when CSeq_align /// was created, only start position shifts in the CSeq_loc's are relevant in /// this function. However full remapping is necessary for the subject sequence/// if it is on a negative strand.static voidx_RemapAlignmentCoordinates(CRef<CSeq_align> sar, const SSeqLoc* query, const SSeqLoc* subject = NULL){ _ASSERT(sar); ASSERT(query); const int query_dimension = 0; const int subject_dimension = 1; // If subject is on a minus strand, we'll need to flip subject strands // and remap subject coordinates on all segments. // Otherwise we only need to shift query and/or subject coordinates, // if the respective location starts not from 0. bool remap_subject = (subject && subject->seqloc->IsInt() && subject->seqloc->GetInt().GetStrand() == eNa_strand_minus); TSeqPos q_shift = 0, s_shift = 0; if (query->seqloc->IsInt()) { q_shift = query->seqloc->GetInt().GetFrom(); } if (subject && subject->seqloc->IsInt()) { s_shift = subject->seqloc->GetInt().GetFrom(); } if (remap_subject || q_shift > 0 || s_shift > 0) { for (CTypeIterator<CDense_seg> itr(Begin(*sar)); itr; ++itr) { const vector<ENa_strand> strands = itr->GetStrands(); // Create temporary CSeq_locs with strands either matching // (for query and for subject if it is not on a minus strand), // or opposite to those in the segment, to force RemapToLoc to // behave in the correct way. if (q_shift > 0) { CSeq_loc q_seqloc; ENa_strand q_strand = strands[0]; q_seqloc.SetInt().SetFrom(q_shift); q_seqloc.SetInt().SetTo(query->seqloc->GetInt().GetTo()); q_seqloc.SetInt().SetStrand(q_strand); q_seqloc.SetInt().SetId().Assign(sequence::GetId(*query->seqloc, query->scope)); itr->RemapToLoc(query_dimension, q_seqloc); } if (remap_subject || s_shift > 0) { CSeq_loc s_seqloc; ENa_strand s_strand; if (remap_subject) { s_strand = ((strands[1] == eNa_strand_plus) ? eNa_strand_minus : eNa_strand_plus); } else { s_strand = strands[1]; } s_seqloc.SetInt().SetFrom(s_shift); s_seqloc.SetInt().SetTo(subject->seqloc->GetInt().GetTo()); s_seqloc.SetInt().SetStrand(s_strand); s_seqloc.SetInt().SetId().Assign(sequence::GetId(*subject->seqloc, subject->scope)); itr->RemapToLoc(subject_dimension, s_seqloc); } } }}CSeq_align_set*BLAST_HitList2CSeqAlign(const BlastHitList* hit_list, EProgram prog, SSeqLoc &query, const BlastSeqSrc* seq_src, const BlastScoringOptions* score_options, const BlastScoreBlk* sbp){ CSeq_align_set* seq_aligns = new CSeq_align_set(); bool is_gapped = score_options->gapped_calculation ? true : false; ASSERT(seq_src); if (!hit_list) { return x_CreateEmptySeq_align_set(seq_aligns); } TSeqPos query_length = 0; CSeq_id* qid = NULL; CConstRef<CSeq_id> query_id; x_GetSequenceLengthAndId(&query, NULL, 0, &qid, &query_length); query_id.Reset(qid); TSeqPos subj_length = 0; CSeq_id* sid = NULL; CConstRef<CSeq_id> subject_id; for (int index = 0; index < hit_list->hsplist_count; index++) { BlastHSPList* hsp_list = hit_list->hsplist_array[index]; if (!hsp_list) continue; x_GetSequenceLengthAndId(NULL, seq_src, hsp_list->oid, &sid, &subj_length); subject_id.Reset(sid); // Create a CSeq_align for each matching sequence CRef<CSeq_align> hit_align; if (is_gapped) { hit_align = BLASTHspListToSeqAlign(prog, hsp_list, query_id, subject_id, score_options, sbp); } else { hit_align = BLASTUngappedHspListToSeqAlign(prog, hsp_list, query_id, subject_id, query_length, subj_length, score_options, sbp); } ListNode* subject_loc_wrap = BLASTSeqSrcGetSeqLoc(seq_src, (void*)&hsp_list->oid); SSeqLoc* subject_loc = NULL; if (subject_loc_wrap && subject_loc_wrap->choice == BLAST_SEQSRC_CPP_SEQLOC) subject_loc = (SSeqLoc*) subject_loc_wrap->ptr; x_RemapAlignmentCoordinates(hit_align, &query, subject_loc); seq_aligns->Set().push_back(hit_align); } return seq_aligns;}TSeqAlignVectorBLAST_Results2CSeqAlign(const BlastHSPResults* results, EProgram prog, TSeqLocVector &query, const BlastSeqSrc* seq_src, const BlastScoringOptions* score_options, const BlastScoreBlk* sbp){ ASSERT(results->num_queries == (int)query.size()); ASSERT(seq_src); TSeqAlignVector retval; CConstRef<CSeq_id> query_id; // Process each query's hit list for (int index = 0; index < results->num_queries; index++) { BlastHitList* hit_list = results->hitlist_array[index]; CRef<CSeq_align_set> seq_aligns(BLAST_HitList2CSeqAlign(hit_list, prog, query[index], seq_src, score_options, sbp)); retval.push_back(seq_aligns); _TRACE("Query " << index << ": " << seq_aligns->Get().size() << " seqaligns"); } return retval;}TSeqAlignVectorBLAST_OneSubjectResults2CSeqAlign(const BlastHSPResults* results, EProgram prog, TSeqLocVector &query, const BlastSeqSrc* seq_src, Int4 subject_index, const BlastScoringOptions* score_options, const BlastScoreBlk* sbp){ ASSERT(results->num_queries == (int)query.size()); ASSERT(seq_src); TSeqAlignVector retval; CConstRef<CSeq_id> subject_id; CConstRef<CSeq_id> query_id; CSeq_id* sid = NULL; CSeq_id* qid = NULL; bool is_gapped = score_options->gapped_calculation ? true : false; TSeqPos subj_length = 0; TSeqPos query_length = 0; // Subject is the same for all queries, so retrieve its id right away x_GetSequenceLengthAndId(NULL, seq_src, subject_index, &sid, &subj_length); subject_id.Reset(sid); // Process each query's hit list for (int index = 0; index < results->num_queries; index++) { x_GetSequenceLengthAndId(&query[index], NULL, 0, &qid, &query_length); query_id.Reset(qid); CRef<CSeq_align_set> seq_aligns; BlastHitList* hit_list = results->hitlist_array[index]; BlastHSPList* hsp_list = NULL; // Find the HSP list corresponding to this subject, if it exists if (hit_list) { int result_index; for (result_index = 0; result_index < hit_list->hsplist_count; ++result_index) { hsp_list = hit_list->hsplist_array[result_index]; if (hsp_list->oid == subject_index) break; } } if (hsp_list) { CRef<CSeq_align> hit_align; if (is_gapped) { hit_align = BLASTHspListToSeqAlign(prog, hsp_list, query_id, subject_id, score_options, sbp); } else { hit_align = BLASTUngappedHspListToSeqAlign(prog, hsp_list, query_id, subject_id, query_length, subj_length, score_options, sbp); } ListNode* subject_loc_wrap = BLASTSeqSrcGetSeqLoc(seq_src, (void*)&hsp_list->oid); SSeqLoc* subject_loc = NULL; if (subject_loc_wrap && subject_loc_wrap->choice == BLAST_SEQSRC_CPP_SEQLOC) subject_loc = (SSeqLoc*) subject_loc_wrap->ptr; x_RemapAlignmentCoordinates(hit_align, &query[index], subject_loc); ListNodeFree(subject_loc_wrap); seq_aligns.Reset(new CSeq_align_set()); seq_aligns->Set().push_back(hit_align); } else { seq_aligns.Reset(x_CreateEmptySeq_align_set(NULL)); } retval.push_back(seq_aligns); } return retval;}END_SCOPE(blast)END_NCBI_SCOPE/* @} *//** ===========================================================================** $Log: blast_seqalign.cpp,v $* Revision 1000.6 2004/06/01 18:05:52 gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.42** Revision 1.42 2004/05/21 21:41:02 gorelenk* Added PCH ncbi_pch.hpp** Revision 1.41 2004/05/19 15:26:04 dondosha* Edit script operations changed from macros to an enum** Revision 1.40 2004/05/05 14:42:25 dondosha* Correction in x_RemapAlignmentCoordinates for whole vs. interval Seq-locs** Revision 1.39 2004/04/28 19:40:45 dondosha* Fixed x_RemapAlignmentCoordinates function to work correctly with all strand combinations** Revision 1.38 2004/04/19 12:59:12 madden* Changed BLAST_KarlinBlk to Blast_KarlinBlk to avoid conflict with blastkar.h structure** Revision 1.37 2004/04/16 14:28:19 papadopo* add use of eRPSBlast program** Revision 1.36 2004/04/06 20:47:14 dondosha* Check if BLASTSeqSrcGetSeqId returns a pointer to CRef instead of a simple pointer to CSeq_id** Revision 1.35 2004/03/24 19:14:14 dondosha* BlastHSP structure does not have ordering_method field any more, but it does contain a splice_junction field** Revision 1.34 2004/03/23 18:22:06 dondosha* Minor memory leak fix** Revision 1.33 2004/03/23 14:10:50 camacho* Minor doxygen fix** Revision 1.32 2004/03/19 19:22:55 camacho* Move to doxygen group AlgoBlast, add missing CVS logs at EOF** Revision 1.31 2004/03/16 22:03:38 camacho* Remove dead code** Revision 1.30 2004/03/15 19:58:55 dondosha* Added BLAST_OneSubjectResults2CSeqalign function to retrieve single subject results from BlastHSPResults** Revision 1.29 2003/12/19 20:16:10 dondosha* Get length in x_GetSequenceLengthAndId regardless of whether search is gapped; do not call RemapToLoc for whole Seq-locs** Revision 1.28 2003/12/03 16:43:47 dondosha* Renamed BlastMask to BlastMaskLoc, BlastResults to BlastHSPResults** Revision 1.27 2003/12/01 20:02:39 coulouri* fix msvc warning** Revision 1.26 2003/11/24 20:59:12 ucko* Change one ASSERT to _ASSERT (probably more appropriate in general)* due to MSVC brokenness.** Revision 1.25 2003/11/24 17:14:58 camacho* Remap alignment coordinates to original Seq-locs** Revision 1.24 2003/11/04 18:37:36 dicuccio* Fix for brain-dead MSVC (operator && is ambiguous)** Revision 1.23 2003/11/04 17:13:31 dondosha* Implemented conversion of results to seqalign for out-of-frame search** Revision 1.22 2003/10/31 22:08:39 dondosha* Implemented conversion of BLAST results to seqalign for ungapped search** Revision 1.21 2003/10/31 00:05:15 camacho* Changes to return discontinuous seq-aligns for each query-subject pair** Revision 1.20 2003/10/30 21:40:57 dondosha* Removed unneeded extra argument from BLAST_Results2CSeqAlign** Revision 1.19 2003/10/28 20:53:59 camacho* Temporarily use exception for unimplemented function** Revision 1.18 2003/10/15 16:59:42 coulouri* type correctness fixes** Revision 1.17 2003/09/09 15:18:02 camacho* Fix includes** Revision 1.16 2003/08/19 20:27:51 dondosha* Rewrote the results-to-seqalign conversion slightly** Revision 1.15 2003/08/19 13:46:13 dicuccio* Added 'USING_SCOPE(objects)' to .cpp files for ease of reading implementation.** Revision 1.14 2003/08/18 22:17:36 camacho* Renaming of SSeqLoc members** Revision 1.13 2003/08/18 20:58:57 camacho* Added blast namespace, removed *__.hpp includes** Revision 1.12 2003/08/18 17:07:41 camacho* Introduce new SSeqLoc structure (replaces pair<CSeq_loc, CScope>).* Change in function to read seqlocs from files.** Revision 1.11 2003/08/15 15:56:32 dondosha* Corrections in implementation of results-to-seqalign function** Revision 1.10 2003/08/12 19:19:34 dondosha* Use TSeqLocVector type** Revision 1.9 2003/08/11 14:00:41 dicuccio* Indenting changes. Fixed use of C++ namespaces (USING_SCOPE(objects) inside of* BEGIN_NCBI_SCOPE block)** Revision 1.8 2003/08/08 19:43:07 dicuccio* Compilation fixes: #include file rearrangement; fixed use of 'list' and* 'vector' as variable names; fixed missing ostrea<< for __int64** Revision 1.7 2003/08/01 17:40:56 dondosha* Use renamed functions and structures from local blastkar.h** Revision 1.6 2003/07/31 19:45:33 camacho* Eliminate Ptr notation** Revision 1.5 2003/07/25 22:12:46 camacho* Use BLAST Sequence Source to retrieve sequence identifier** Revision 1.4 2003/07/25 13:55:58 camacho* Removed unnecessary #includes** Revision 1.3 2003/07/23 21:28:23 camacho* Use new local gapinfo structures** Revision 1.2 2003/07/14 21:40:22 camacho* Pacify compiler warnings** Revision 1.1 2003/07/10 18:34:19 camacho* Initial revision*** ===========================================================================*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -