📄 blast_hits.c
字号:
index1--) { if (q_off >= hsp_array[index1]->query.offset && s_off >= hsp_array[index1]->subject.offset && q_end <= hsp_array[index1]->query.end && s_end <= hsp_array[index1]->subject.end && evalue >= hsp_array[index1]->evalue) { hsp_array[index] = Blast_HSPFree(hsp_array[index]); break; } else if (q_off <= hsp_array[index1]->query.offset && s_off <= hsp_array[index1]->subject.offset && q_end >= hsp_array[index1]->query.end && s_end >= hsp_array[index1]->subject.end && evalue <= hsp_array[index1]->evalue) { hsp_array[index1] = Blast_HSPFree(hsp_array[index1]); shift_needed = TRUE; } } /* If some lower indexed HSPs have been removed, shift the subsequent HSPs */ if (shift_needed) { /* Find the first non-NULL HSP, going backwards */ while (index1 >= 0 && !hsp_array[index1]) index1--; /* Go forward, and shift any non-NULL HSPs */ for (index2 = ++index1; index1 <= new_hspcnt; index1++) { if (hsp_array[index1]) hsp_array[index2++] = hsp_array[index1]; } new_hspcnt = index2 - 1; shift_needed = FALSE; } if (hsp_array[index] != NULL) hsp_array[++new_hspcnt] = hsp_array[index]; } /* Set all HSP pointers that will not be used any more to NULL */ memset(&hsp_array[new_hspcnt+1], 0, (hsp_list->hspcnt - new_hspcnt - 1)*sizeof(BlastHSP*)); hsp_list->hspcnt = new_hspcnt + 1; return 0;}Int2 Blast_HSPListReevaluateWithAmbiguities(BlastHSPList* hsp_list, BLAST_SequenceBlk* query_blk, BLAST_SequenceBlk* subject_blk, const BlastHitSavingOptions* hit_options, BlastQueryInfo* query_info, BlastScoreBlk* sbp, const BlastScoringParameters* score_params, const BlastSeqSrc* seq_src){ BlastHSP** hsp_array,* hsp; Uint1* query_start,* subject_start = NULL; Int4 index, context, hspcnt; Boolean purge, delete_hsp; Int2 status = 0; GetSeqArg seq_arg; Boolean gapped_calculation = score_params->options->gapped_calculation; if (!hsp_list) return status; hspcnt = hsp_list->hspcnt; hsp_array = hsp_list->hsp_array; memset((void*) &seq_arg, 0, sizeof(seq_arg)); /* In case of no traceback, return without doing anything */ if (!hsp_list->traceback_done && gapped_calculation) { if (hsp_list->hspcnt > 1) status = Blast_HSPListUniqSort(hsp_list); return status; } if (hsp_list->hspcnt == 0) /* All HSPs have been deleted */ return status; /* Retrieve the unpacked subject sequence and save it in the sequence_start element of the subject structure. NB: for the BLAST 2 Sequences case, the uncompressed sequence must already be there */ if (seq_src) { seq_arg.oid = subject_blk->oid; seq_arg.encoding = BLASTNA_ENCODING; BLASTSeqSrcGetSequence(seq_src, (void*) &seq_arg); subject_blk->sequence_start = seq_arg.seq->sequence_start; subject_blk->sequence = seq_arg.seq->sequence_start + 1; } /* The sequence in blastna encoding is now stored in sequence_start */ subject_start = subject_blk->sequence_start + 1; purge = FALSE; for (index=0; index<hspcnt; index++) { if (hsp_array[index] == NULL) continue; else hsp = hsp_array[index]; context = hsp->context; query_start = query_blk->sequence + query_info->context_offsets[context]; delete_hsp = Blast_HSPReevaluateWithAmbiguities(hsp, query_start, subject_start, hit_options, score_params, query_info, sbp); if (delete_hsp) { /* This HSP is now below the cutoff */ hsp_array[index] = Blast_HSPFree(hsp_array[index]); purge = TRUE; } } if (purge) { Blast_HSPListPurgeNullHSPs(hsp_list); } /* Check for HSP inclusion once more */ if (hsp_list->hspcnt > 1) status = Blast_HSPListUniqSort(hsp_list); BlastSequenceBlkFree(seq_arg.seq); subject_blk->sequence = NULL; return status;}/** Combine two HSP lists, without altering the individual HSPs, and without * reallocating the HSP array. * @param hsp_list New HSP list [in] * @param combined_hsp_list Old HSP list, to which new HSPs are added [in] [out] * @param new_hspcnt How many HSPs to save in the combined list? The extra ones * are freed. The best scoring HSPs are saved. This argument * cannot be greater than the allocated size of the * combined list's HSP array. [in] */static void Blast_HSPListsCombineByScore(BlastHSPList* hsp_list, BlastHSPList* combined_hsp_list, Int4 new_hspcnt){ Int4 index, index1, index2; BlastHSP** new_hsp_array; ASSERT(new_hspcnt <= combined_hsp_list->allocated); if (new_hspcnt >= hsp_list->hspcnt + combined_hsp_list->hspcnt) { /* All HSPs from both arrays are saved */ for (index=combined_hsp_list->hspcnt, index1=0; index1<hsp_list->hspcnt; index1++) { if (hsp_list->hsp_array[index1] != NULL) combined_hsp_list->hsp_array[index++] = hsp_list->hsp_array[index1]; } } else { /* Not all HSPs are be saved; sort both arrays by score and save only the new_hspcnt best ones. For the merged set of HSPs, allocate array the same size as in the old HSP list. */ new_hsp_array = (BlastHSP**) malloc(combined_hsp_list->allocated*sizeof(BlastHSP*)); qsort(combined_hsp_list->hsp_array, combined_hsp_list->hspcnt, sizeof(BlastHSP*), score_compare_hsps); qsort(hsp_list->hsp_array, hsp_list->hspcnt, sizeof(BlastHSP*), score_compare_hsps); index1 = index2 = 0; for (index = 0; index < new_hspcnt; ++index) { if (index1 < combined_hsp_list->hspcnt && (index2 >= hsp_list->hspcnt || (combined_hsp_list->hsp_array[index1]->score >= hsp_list->hsp_array[index2]->score))) { new_hsp_array[index] = combined_hsp_list->hsp_array[index1]; ++index1; } else { new_hsp_array[index] = hsp_list->hsp_array[index2]; ++index2; } } /* Free the extra HSPs that could not be saved */ for ( ; index1 < combined_hsp_list->hspcnt; ++index1) { combined_hsp_list->hsp_array[index1] = Blast_HSPFree(combined_hsp_list->hsp_array[index1]); } for ( ; index2 < hsp_list->hspcnt; ++index2) { hsp_list->hsp_array[index2] = Blast_HSPFree(hsp_list->hsp_array[index2]); } /* Point combined_hsp_list's HSP array to the new one */ sfree(combined_hsp_list->hsp_array); combined_hsp_list->hsp_array = new_hsp_array; } combined_hsp_list->hspcnt = index; /* Second HSP list now does not own any HSPs */ hsp_list->hspcnt = 0;}Int2 Blast_HSPListAppend(BlastHSPList* hsp_list, BlastHSPList** combined_hsp_list_ptr, Int4 hsp_num_max){ BlastHSPList* combined_hsp_list = *combined_hsp_list_ptr; BlastHSP** new_hsp_array; Int4 new_hspcnt; if (!hsp_list || hsp_list->hspcnt == 0) return 0; /* If no previous HSP list, just return a copy of the new one. */ if (!combined_hsp_list) { *combined_hsp_list_ptr = Blast_HSPListDup(hsp_list); hsp_list->hspcnt = 0; return 0; } /* Just append new list to the end of the old list, in case of multiple frames of the subject sequence */ new_hspcnt = MIN(combined_hsp_list->hspcnt + hsp_list->hspcnt, hsp_num_max); if (new_hspcnt > combined_hsp_list->allocated && !combined_hsp_list->do_not_reallocate) { Int4 new_allocated = MIN(2*new_hspcnt, hsp_num_max); new_hsp_array = (BlastHSP**) realloc(combined_hsp_list->hsp_array, new_allocated*sizeof(BlastHSP*)); if (new_hsp_array) { combined_hsp_list->allocated = new_allocated; combined_hsp_list->hsp_array = new_hsp_array; } else { combined_hsp_list->do_not_reallocate = TRUE; new_hspcnt = combined_hsp_list->allocated; } } if (combined_hsp_list->allocated == hsp_num_max) combined_hsp_list->do_not_reallocate = TRUE; Blast_HSPListsCombineByScore(hsp_list, combined_hsp_list, new_hspcnt); return 0;}Int2 Blast_HSPListsMerge(BlastHSPList* hsp_list, BlastHSPList** combined_hsp_list_ptr, Int4 hsp_num_max, Int4 start, Boolean merge_hsps){ BlastHSPList* combined_hsp_list = *combined_hsp_list_ptr; BlastHSP* hsp, *hsp_var; BlastHSP** hspp1,** hspp2; Int4 index, index1; Int4 hspcnt1, hspcnt2, new_hspcnt = 0; BlastHSP** new_hsp_array; if (!hsp_list || hsp_list->hspcnt == 0) return 0; /* If no previous HSP list, just return a copy of the new one. */ if (!combined_hsp_list) { *combined_hsp_list_ptr = Blast_HSPListDup(hsp_list); hsp_list->hspcnt = 0; return 0; } /* Merge the two HSP lists for successive chunks of the subject sequence */ hspcnt1 = hspcnt2 = 0; /* Put all HSPs that intersect the overlap region at the front of the respective HSP arrays. */ for (index = 0; index < combined_hsp_list->hspcnt; index++) { hsp = combined_hsp_list->hsp_array[index]; if (hsp->subject.end > start) { /* At least part of this HSP lies in the overlap strip. */ hsp_var = combined_hsp_list->hsp_array[hspcnt1]; combined_hsp_list->hsp_array[hspcnt1] = hsp; combined_hsp_list->hsp_array[index] = hsp_var; ++hspcnt1; } } for (index = 0; index < hsp_list->hspcnt; index++) { hsp = hsp_list->hsp_array[index]; if (hsp->subject.offset < start + DBSEQ_CHUNK_OVERLAP) { /* At least part of this HSP lies in the overlap strip. */ hsp_var = hsp_list->hsp_array[hspcnt2]; hsp_list->hsp_array[hspcnt2] = hsp; hsp_list->hsp_array[index] = hsp_var; ++hspcnt2; } } hspp1 = combined_hsp_list->hsp_array; hspp2 = hsp_list->hsp_array; /* Sort HSPs from in the overlap region by diagonal */ qsort(hspp1, hspcnt1, sizeof(BlastHSP*), diag_compare_hsps); qsort(hspp2, hspcnt2, sizeof(BlastHSP*), diag_compare_hsps); for (index=0; index<hspcnt1; index++) { for (index1 = 0; index1<hspcnt2; index1++) { /* Skip already deleted HSPs */ if (!hspp2[index1]) continue; if (hspp1[index]->context == hspp2[index1]->context && ABS(diag_compare_hsps(&hspp1[index], &hspp2[index1])) < OVERLAP_DIAG_CLOSE) { if (merge_hsps) { if (Blast_HSPsMerge(hspp1[index], hspp2[index1], start)) { /* Free the second HSP. */ hspp2[index1] = Blast_HSPFree(hspp2[index1]); } } else { /* No gap information available */ if (Blast_HSPContained(hspp1[index], hspp2[index1])) { sfree(hspp1[index]); /* Point the first HSP to the new HSP; free the second HSP. */ hspp1[index] = hspp2[index1]; hspp2[index1] = NULL; /* This HSP has been removed, so break out of the inner loop */ break; } else if (Blast_HSPContained(hspp2[index1], hspp1[index])) { /* Just free the second HSP */ hspp2[index1] = Blast_HSPFree(hspp2[index1]); } } } else { /* This and remaining HSPs are too far from the one being checked */ break; } } } /* Purge the nulled out HSPs from the new HSP list */ Blast_HSPListPurgeNullHSPs(hsp_list); /* The new number of HSPs is now the sum of the remaining counts in the two lists, but if there is a restriction on the number of HSPs to keep, it might have to be reduced. */ new_hspcnt = MIN(hsp_list->hspcnt + combined_hsp_list->hspcnt, hsp_num_max); if (new_hspcnt >= combined_hsp_list->allocated-1 && combined_hsp_list->do_not_reallocate == FALSE) { Int4 new_allocated = MIN(2*new_hspcnt, hsp_num_max); if (new_allocated > combined_hsp_list->allocated) { new_hsp_array = (BlastHSP**) realloc(combined_hsp_list->hsp_array, new_allocated*sizeof(BlastHSP*)); if (new_hsp_array == NULL) { combined_hsp_list->do_not_reallocate = TRUE; } else { combined_hsp_list->hsp_array = new_hsp_array; combined_hsp_list->allocated = new_allocated; } } else { combined_hsp_list->do_not_reallocate = TRUE; } new_hspcnt = MIN(new_hspcnt, combined_hsp_list->allocated); } Blast_HSPListsCombineByScore(hsp_list, combined_hsp_list, new_hspcnt); return 0;}void Blast_HSPListAdjustOffsets(BlastHSPList* hsp_list, Int4 offset){ Int4 index; BlastHSP* hsp; for (index=0; index<hsp_list->hspcnt; index++) { hsp = hsp_list->hsp_array[index]; hsp->subject.offset += offset; hsp->subject.end += offset; hsp->subject.gapped_start += offset; if (hsp->gap_info) hsp->gap_info->start2 += offset; }}/** Callback for sorting hsp lists by their best evalue/score; * Evalues are compared only up to a relative error of * FUZZY_EVALUE_COMPARE_FACTOR. * It is assumed that the HSP arrays in each hit list are already sorted by * e-value/score.*/static intevalue_compare_hsp_lists(const void* v1, const void* v2){ BlastHSPList* h1,* h2; int retval = 0; h1 = *(BlastHSPList**) v1; h2 = *(BlastHSPList**) v2; /* If any of the HSP lists is empty, it is considered "worse" than the other, unless the other is also empty. */ if (h1->hspcnt == 0 && h2->hspcnt == 0) return 0; else if (h1->hspcnt == 0) return 1; else if (h2->hspcnt == 0) return -1; if ((retval = fuzzy_evalue_comp(h1->hsp_array[0]->evalue, h2->hsp_array[0]->evalue)) != 0) return retval; if (h1->hsp_array[0]->score > h2->hsp_array[0]->score) return -1; if (h1->hsp_array[0]->score < h2->hsp_array[0]->score) return 1; /* In case of equal best E-values and scores, order will be determined
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -