📄 blast_hits.c
字号:
by ordinal ids of the subject sequences */ if (h1->oid > h2->oid) return -1; if (h1->oid < h2->oid) return 1; return 0;}/******************************************************************************** Functions manipulating BlastHitList's********************************************************************************//* description in blast_hits.h */BlastHitList* Blast_HitListNew(Int4 hitlist_size){ BlastHitList* new_hitlist = (BlastHitList*) calloc(1, sizeof(BlastHitList)); new_hitlist->hsplist_max = hitlist_size; new_hitlist->low_score = INT4_MAX; return new_hitlist;}/* description in blast_hits.h*/BlastHitList* Blast_HitListFree(BlastHitList* hitlist){ if (!hitlist) return NULL; Blast_HitListHSPListsFree(hitlist); sfree(hitlist); return NULL;}/* description in blast_hits.h */Int2 Blast_HitListHSPListsFree(BlastHitList* hitlist){ Int4 index; if (!hitlist) return 0; for (index = 0; index < hitlist->hsplist_count; ++index) hitlist->hsplist_array[index] = Blast_HSPListFree(hitlist->hsplist_array[index]); sfree(hitlist->hsplist_array); hitlist->hsplist_count = 0; return 0;}static void Blast_HitListPurge(BlastHitList* hit_list){ Int4 index, hsplist_count; if (!hit_list) return; hsplist_count = hit_list->hsplist_count; for (index = 0; index < hsplist_count && hit_list->hsplist_array[index]->hspcnt > 0; ++index); hit_list->hsplist_count = index; /* Free all empty HSP lists */ for ( ; index < hsplist_count; ++index) { Blast_HSPListFree(hit_list->hsplist_array[index]); }}/** This is a copy of a static function from ncbimisc.c. * Turns array into a heap with respect to a given comparison function. */static voidheapify (char* base0, char* base, char* lim, char* last, size_t width, int (*compar )(const void*, const void* )){ size_t i; char ch; char* left_son,* large_son; left_son = base0 + 2*(base-base0) + width; while (base <= lim) { if (left_son == last) large_son = left_son; else large_son = (*compar)(left_son, left_son+width) >= 0 ? left_son : left_son+width; if ((*compar)(base, large_son) < 0) { for (i=0; i<width; ++i) { ch = base[i]; base[i] = large_son[i]; large_son[i] = ch; } base = large_son; left_son = base0 + 2*(base-base0) + width; } else break; }}/** The first part of qsort: create heap only, without sorting it * @param b An array [in] [out] * @param nel Number of elements in b [in] * @param width The size of each element [in] * @param compar Callback to compare two heap elements [in] */static void CreateHeap (void* b, size_t nel, size_t width, int (*compar )(const void*, const void* )) { char* base = (char*)b; size_t i; char* base0 = (char*)base,* lim,* basef; if (nel < 2) return; lim = &base[((nel-2)/2)*width]; basef = &base[(nel-1)*width]; i = nel/2; for (base = &base0[(i - 1)*width]; i > 0; base = base - width) { heapify(base0, base, lim, basef, width, compar); i--; }}/** Given a BlastHitList* with a heapified HSP list array, remove * the worst scoring HSP list and insert the new HSP list in the heap * @param hit_list Contains all HSP lists for a given query [in] [out] * @param hsp_list A new HSP list to be inserted into the hit list [in] */static void Blast_HitListInsertHSPListInHeap(BlastHitList* hit_list, BlastHSPList* hsp_list){ Blast_HSPListFree(hit_list->hsplist_array[0]); hit_list->hsplist_array[0] = hsp_list; if (hit_list->hsplist_count >= 2) { heapify((char*)hit_list->hsplist_array, (char*)hit_list->hsplist_array, (char*)&hit_list->hsplist_array[(hit_list->hsplist_count-1)/2], (char*)&hit_list->hsplist_array[hit_list->hsplist_count-1], sizeof(BlastHSPList*), evalue_compare_hsp_lists); } hit_list->worst_evalue = hit_list->hsplist_array[0]->hsp_array[0]->evalue; hit_list->low_score = hit_list->hsplist_array[0]->hsp_array[0]->score;}Int2 Blast_HitListUpdate(BlastHitList* hit_list, BlastHSPList* hsp_list){ if (hit_list->hsplist_count < hit_list->hsplist_max) { /* If the array of HSP lists for this query is not yet allocated, do it here */ if (!hit_list->hsplist_array) hit_list->hsplist_array = (BlastHSPList**) malloc(hit_list->hsplist_max*sizeof(BlastHSPList*)); /* Just add to the end; sort later */ hit_list->hsplist_array[hit_list->hsplist_count++] = hsp_list; hit_list->worst_evalue = MAX(hsp_list->hsp_array[0]->evalue, hit_list->worst_evalue); hit_list->low_score = MIN(hsp_list->hsp_array[0]->score, hit_list->low_score); } else { /* Compare e-values only with a certain precision */ int evalue_order = fuzzy_evalue_comp(hsp_list->hsp_array[0]->evalue, hit_list->worst_evalue); if (evalue_order > 0 || (evalue_order == 0 && (hsp_list->hsp_array[0]->score < hit_list->low_score))) { /* This hit list is less significant than any of those already saved; discard it. Note that newer hits with score and e-value both equal to the current worst will be saved, at the expense of some older hit. */ Blast_HSPListFree(hsp_list); } else { if (!hit_list->heapified) { CreateHeap(hit_list->hsplist_array, hit_list->hsplist_count, sizeof(BlastHSPList*), evalue_compare_hsp_lists); hit_list->heapified = TRUE; } Blast_HitListInsertHSPListInHeap(hit_list, hsp_list); } } return 0;}/******************************************************************************** Functions manipulating BlastHSPResults********************************************************************************/Int2 Blast_HSPResultsInit(Int4 num_queries, BlastHSPResults** results_ptr){ BlastHSPResults* results; Int2 status = 0; results = (BlastHSPResults*) malloc(sizeof(BlastHSPResults)); results->num_queries = num_queries; results->hitlist_array = (BlastHitList**) calloc(num_queries, sizeof(BlastHitList*)); *results_ptr = results; return status;}BlastHSPResults* Blast_HSPResultsFree(BlastHSPResults* results){ Int4 index; if (!results) return NULL; for (index = 0; index < results->num_queries; ++index) Blast_HitListFree(results->hitlist_array[index]); sfree(results->hitlist_array); sfree(results); return NULL;}Int2 Blast_HSPResultsSortByEvalue(BlastHSPResults* results){ Int4 index; BlastHitList* hit_list; for (index = 0; index < results->num_queries; ++index) { hit_list = results->hitlist_array[index]; if (hit_list && hit_list->hsplist_count > 1) { qsort(hit_list->hsplist_array, hit_list->hsplist_count, sizeof(BlastHSPList*), evalue_compare_hsp_lists); } Blast_HitListPurge(hit_list); } return 0;}Int2 Blast_HSPResultsSaveHitList(Uint1 program, BlastHSPResults* results, BlastHSPList* hsp_list, BlastHitSavingParameters* hit_parameters){ Int2 status = 0; BlastHSPList** hsp_list_array; BlastHSP* hsp; Int4 index; BlastHitSavingOptions* hit_options = hit_parameters->options; Uint1 context_factor; if (!hsp_list) return 0; if (program == blast_type_blastn) { context_factor = NUM_STRANDS; } else if (program == blast_type_blastx || program == blast_type_tblastx) { context_factor = NUM_FRAMES; } else { context_factor = 1; } /* Sort the HSPs by e-value/score. E-value has a priority here, because lower scoring HSPs in linked sets might have lower e-values, and must be moved higher in the list. */ if (hsp_list->hspcnt > 1) { qsort(hsp_list->hsp_array, hsp_list->hspcnt, sizeof(BlastHSP*), evalue_compare_hsps); } /* Rearrange HSPs into multiple hit lists if more than one query */ if (results->num_queries > 1) { BlastHSPList* tmp_hsp_list; hsp_list_array = calloc(results->num_queries, sizeof(BlastHSPList*)); for (index = 0; index < hsp_list->hspcnt; index++) { hsp = hsp_list->hsp_array[index]; tmp_hsp_list = hsp_list_array[hsp->context/context_factor]; if (!tmp_hsp_list) { hsp_list_array[hsp->context/context_factor] = tmp_hsp_list = Blast_HSPListNew(hit_options->hsp_num_max); tmp_hsp_list->oid = hsp_list->oid; tmp_hsp_list->traceback_done = hsp_list->traceback_done; } if (!tmp_hsp_list || tmp_hsp_list->do_not_reallocate) { tmp_hsp_list = NULL; } else if (tmp_hsp_list->hspcnt >= tmp_hsp_list->allocated) { BlastHSP** new_hsp_array; Int4 new_size = MIN(2*tmp_hsp_list->allocated, tmp_hsp_list->hsp_max); if (new_size == tmp_hsp_list->hsp_max) tmp_hsp_list->do_not_reallocate = TRUE; new_hsp_array = realloc(tmp_hsp_list->hsp_array, new_size*sizeof(BlastHSP*)); if (!new_hsp_array) { tmp_hsp_list->do_not_reallocate = TRUE; tmp_hsp_list = NULL; } else { tmp_hsp_list->hsp_array = new_hsp_array; tmp_hsp_list->allocated = new_size; } } if (tmp_hsp_list) { tmp_hsp_list->hsp_array[tmp_hsp_list->hspcnt++] = hsp; } else { /* Cannot add more HSPs; free the memory */ hsp_list->hsp_array[index] = Blast_HSPFree(hsp); } } /* Insert the hit list(s) into the appropriate places in the results structure */ for (index = 0; index < results->num_queries; index++) { if (hsp_list_array[index]) { if (!results->hitlist_array[index]) { results->hitlist_array[index] = Blast_HitListNew(hit_options->prelim_hitlist_size); } Blast_HitListUpdate(results->hitlist_array[index], hsp_list_array[index]); } } sfree(hsp_list_array); /* All HSPs from the hsp_list structure are now moved to the results structure, so set the HSP count back to 0 */ hsp_list->hspcnt = 0; Blast_HSPListFree(hsp_list); } else if (hsp_list->hspcnt > 0) { /* Single query; save the HSP list directly into the results structure */ if (!results->hitlist_array[0]) { results->hitlist_array[0] = Blast_HitListNew(hit_options->prelim_hitlist_size); } Blast_HitListUpdate(results->hitlist_array[0], hsp_list); } return status; }void Blast_HSPResultsRPSUpdate(BlastHSPResults *final_result, BlastHSPResults *init_result){ Int4 i, j, k; BlastHitList **hitlist; BlastHSPList **hsplist; BlastHSP **hsp; /* Allocate the (one) hitlist of the final result, if it hasn't been allocated already */ if (!final_result->hitlist_array[0]) final_result->hitlist_array[0] = Blast_HitListNew(500); /* For each DB sequence... */ hitlist = init_result->hitlist_array; for (i = 0; i < init_result->num_queries; i++) { if (hitlist[i] == NULL) continue; /* DB sequence i has HSPLists; for each HSPList (corresponding to a distinct DB sequence)... */ hsplist = hitlist[i]->hsplist_array; for (j = 0; j < hitlist[i]->hsplist_count; j++) { /* Change the OID to make this HSPList correspond to a distinct DB sequence */ hsplist[j]->oid = i; /* For each HSP in list j, there are no distinct contexts/frames anymore */ hsp = hsplist[j]->hsp_array; for (k = 0; k < hsplist[j]->hspcnt; k++) hsp[k]->context = 0; /* Add the modified HSPList to the new set of results */ if (hsplist[j]->hspcnt) Blast_HitListUpdate(final_result->hitlist_array[0], hsplist[j]); else hsplist[j] = Blast_HSPListFree(hsplist[j]); } /* Remove the original hit list (all of its contents have been moved) */ hitlist[i]->hsplist_count = 0; init_result->hitlist_array[i] = Blast_HitListFree(hitlist[i]); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -