📄 link_hsps.c
字号:
hp1 = (LinkHSPStruct**) v1; hp2 = (LinkHSPStruct**) v2; h1 = (*hp1)->hsp; h2 = (*hp2)->hsp; if (h1->context > h2->context) return 1; else if (h1->context < h2->context) return -1; if (h1->query.offset < h2->query.offset) return 1; if (h1->query.offset > h2->query.offset) return -1; return 0;}static intrev_compare_hsps_transl(const void *v1, const void *v2){ BlastHSP* h1,* h2; LinkHSPStruct** hp1,** hp2; Int4 context1, context2; hp1 = (LinkHSPStruct**) v1; hp2 = (LinkHSPStruct**) v2; h1 = (*hp1)->hsp; h2 = (*hp2)->hsp; context1 = h1->context/3; context2 = h2->context/3; if (context1 > context2) return 1; else if (context1 < context2) return -1; if (h1->query.offset < h2->query.offset) return 1; if (h1->query.offset > h2->query.offset) return -1; return 0;}static intrev_compare_hsps_tbn(const void *v1, const void *v2){ BlastHSP* h1,* h2; LinkHSPStruct** hp1,** hp2; hp1 = (LinkHSPStruct**) v1; hp2 = (LinkHSPStruct**) v2; h1 = (*hp1)->hsp; h2 = (*hp2)->hsp; if (h1->context > h2->context) return 1; else if (h1->context < h2->context) return -1; if (SIGN(h1->subject.frame) != SIGN(h2->subject.frame)) { if (h1->subject.frame > h2->subject.frame) return 1; else return -1; } if (h1->query.offset < h2->query.offset) return 1; if (h1->query.offset > h2->query.offset) return -1; return 0;}static intrev_compare_hsps_tbx(const void *v1, const void *v2){ BlastHSP* h1,* h2; LinkHSPStruct** hp1,** hp2; Int4 context1, context2; hp1 = (LinkHSPStruct**) v1; hp2 = (LinkHSPStruct**) v2; h1 = (*hp1)->hsp; h2 = (*hp2)->hsp; context1 = h1->context/3; context2 = h2->context/3; if (context1 > context2) return 1; else if (context1 < context2) return -1; if (SIGN(h1->subject.frame) != SIGN(h2->subject.frame)) { if (h1->subject.frame > h2->subject.frame) return 1; else return -1; } if (h1->query.offset < h2->query.offset) return 1; if (h1->query.offset > h2->query.offset) return -1; return 0;}/** The helper array contains the info used frequently in the inner * for loops of the HSP linking algorithm. * One array of helpers will be allocated for each thread. */typedef struct LinkHelpStruct { LinkHSPStruct* ptr; Int4 q_off_trim; Int4 s_off_trim; Int4 sum[BLAST_NUMBER_OF_ORDERING_METHODS]; Int4 maxsum1; Int4 next_larger;} LinkHelpStruct;static LinkHSPStruct* LinkHSPStructReset(LinkHSPStruct* lhsp){ BlastHSP* hsp; if (!lhsp) { lhsp = (LinkHSPStruct*) calloc(1, sizeof(LinkHSPStruct)); lhsp->hsp = (BlastHSP*) calloc(1, sizeof(BlastHSP)); } else { if (!lhsp->hsp) { hsp = (BlastHSP*) calloc(1, sizeof(BlastHSP)); } else { hsp = lhsp->hsp; memset(hsp, 0, sizeof(BlastHSP)); } memset(lhsp, 0, sizeof(LinkHSPStruct)); lhsp->hsp = hsp; } return lhsp;}static Int2link_hsps(Uint1 program_number, BlastHSPList* hsp_list, BlastQueryInfo* query_info, BLAST_SequenceBlk* subject, BlastScoreBlk* sbp, const BlastHitSavingParameters* hit_params, Boolean gapped_calculation){ LinkHSPStruct* H,* H2,* best[2],* first_hsp,* last_hsp,** hp_frame_start; LinkHSPStruct* hp_start = NULL; BlastHSP* hsp; BlastHSP** hsp_array; Blast_KarlinBlk** kbp; Int4 maxscore, cutoff[2]; Boolean linked_set, ignore_small_gaps; double gap_decay_rate, gap_prob, prob[2]; Int4 index, index1, num_links, frame_index; LinkOrderingMethod ordering_method; Int4 num_query_frames, num_subject_frames; Int4 *hp_frame_number; Int4 gap_size, number_of_hsps, total_number_of_hsps; Int4 query_length, subject_length, length_adjustment; LinkHSPStruct* link; Int4 H2_index,H_index; Int4 i; Int4 max_q_diff; Int4 path_changed; /* will be set if an element is removed that may change an existing path */ Int4 first_pass, use_current_max; LinkHelpStruct *lh_helper=0; Int4 lh_helper_size; Int4 query_context; /* AM: to support query concatenation. */ Boolean translated_query; LinkHSPStruct** link_hsp_array; if (hsp_list == NULL) return -1; hsp_array = hsp_list->hsp_array; lh_helper_size = MAX(1024,hsp_list->hspcnt+5); lh_helper = (LinkHelpStruct *) calloc(lh_helper_size, sizeof(LinkHelpStruct)); if (gapped_calculation) kbp = sbp->kbp_gap; else kbp = sbp->kbp; total_number_of_hsps = hsp_list->hspcnt; number_of_hsps = total_number_of_hsps; gap_size = hit_params->gap_size; gap_prob = hit_params->gap_prob; gap_decay_rate = hit_params->gap_decay_rate; translated_query = (program_number == blast_type_blastx || program_number == blast_type_tblastx); if (program_number == blast_type_tblastn || program_number == blast_type_tblastx) num_subject_frames = NUM_STRANDS; else num_subject_frames = 1; link_hsp_array = (LinkHSPStruct**) malloc(total_number_of_hsps*sizeof(LinkHSPStruct*)); for (index = 0; index < total_number_of_hsps; ++index) { link_hsp_array[index] = (LinkHSPStruct*) calloc(1, sizeof(LinkHSPStruct)); link_hsp_array[index]->hsp = hsp_array[index]; } /* Sort by (reverse) position. */ if (translated_query) { qsort(link_hsp_array,total_number_of_hsps,sizeof(LinkHSPStruct*), rev_compare_hsps_tbx); } else { qsort(link_hsp_array,total_number_of_hsps,sizeof(LinkHSPStruct*), rev_compare_hsps_tbn); } cutoff[0] = hit_params->cutoff_small_gap; cutoff[1] = hit_params->cutoff_big_gap; ignore_small_gaps = hit_params->ignore_small_gaps; if (translated_query) num_query_frames = NUM_STRANDS*query_info->num_queries; else num_query_frames = query_info->num_queries; hp_frame_start = calloc(num_subject_frames*num_query_frames, sizeof(LinkHSPStruct*)); hp_frame_number = calloc(num_subject_frames*num_query_frames, sizeof(Int4));/* hook up the HSP's */ hp_frame_start[0] = link_hsp_array[0]; /* Put entries with different frame parity into separate 'query_frame's. -cfj */ { Int4 cur_frame=0; for (index=0;index<number_of_hsps;index++) { H=link_hsp_array[index]; H->start_of_chain = FALSE; hp_frame_number[cur_frame]++; H->prev= index ? link_hsp_array[index-1] : NULL; H->next= index<(number_of_hsps-1) ? link_hsp_array[index+1] : NULL; if (H->prev != NULL && ((H->hsp->context/3) != (H->prev->hsp->context/3) || (SIGN(H->hsp->subject.frame) != SIGN(H->prev->hsp->subject.frame)))) { /* If frame switches, then start new list. */ hp_frame_number[cur_frame]--; hp_frame_number[++cur_frame]++; hp_frame_start[cur_frame] = H; H->prev->next = NULL; H->prev = NULL; } } num_query_frames = cur_frame+1; } /* max_q_diff is the maximum amount q.offset can differ from q.offset_trim */ /* This is used to break out of H2 loop early */ for (index=0;index<number_of_hsps;index++) { H = link_hsp_array[index]; hsp = H->hsp; H->q_offset_trim = hsp->query.offset + MIN(((hsp->query.length)/4), 5); H->q_end_trim = hsp->query.end - MIN(((hsp->query.length)/4), 5); H->s_offset_trim = hsp->subject.offset + MIN(((hsp->subject.length)/4), 5); H->s_end_trim = hsp->subject.end - MIN(((hsp->subject.length)/4), 5); } max_q_diff = 5; for (frame_index=0; frame_index<num_query_frames; frame_index++) { hp_start = LinkHSPStructReset(hp_start); hp_start->next = hp_frame_start[frame_index]; hp_frame_start[frame_index]->prev = hp_start; number_of_hsps = hp_frame_number[frame_index]; query_context = hp_start->next->hsp->context; length_adjustment = query_info->length_adjustments[query_context]; query_length = BLAST_GetQueryLength(query_info, query_context); query_length = MAX(query_length - length_adjustment, 1); subject_length = subject->length; /* in nucleotides even for tblast[nx] */ /* If subject is translated, length adjustment is given in nucleotide scale. */ if (program_number == blast_type_tblastn || program_number == blast_type_tblastx) { length_adjustment /= CODON_LENGTH; subject_length /= CODON_LENGTH; } subject_length = MAX(subject_length - length_adjustment, 1); lh_helper[0].ptr = hp_start; lh_helper[0].q_off_trim = 0; lh_helper[0].s_off_trim = 0; lh_helper[0].maxsum1 = -10000; lh_helper[0].next_larger = 0; /* lh_helper[0] = empty = additional end marker * lh_helper[1] = hsp_start = empty entry used in original code * lh_helper[2] = hsp_array->next = hsp_array[0] * lh_helper[i] = ... = hsp_array[i-2] (for i>=2) */ first_pass=1; /* do full search */ path_changed=1; for (H=hp_start->next; H!=NULL; H=H->next) H->hsp_link.changed=1; while (number_of_hsps > 0) { Int4 max[3]; max[0]=max[1]=max[2]=-10000; /* Initialize the 'best' parameter */ best[0] = best[1] = NULL; /* See if we can avoid recomputing all scores: * - Find the max paths (based on old scores). * - If no paths were changed by removal of nodes (ie research==0) * then these max paths are still the best. * - else if these max paths were unchanged, then they are still the best. */ use_current_max=0; if (!first_pass){ Int4 max0,max1; /* Find the current max sums */ if(!ignore_small_gaps){ max0 = -cutoff[0]; max1 = -cutoff[1]; for (H=hp_start->next; H!=NULL; H=H->next) { Int4 sum0=H->hsp_link.sum[0]; Int4 sum1=H->hsp_link.sum[1]; if(sum0>=max0) { max0=sum0; best[0]=H; } if(sum1>=max1) { max1=sum1; best[1]=H; } } } else { maxscore = -cutoff[1]; for (H=hp_start->next; H!=NULL; H=H->next) { Int4 sum=H->hsp_link.sum[1]; if(sum>=maxscore)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -