📄 blast_hits.c
字号:
s = &subject[hsp->subject.offset]; } else { s = &query[hsp->query.offset]; q = &subject[hsp->subject.offset]; } num_ident = 0; align_length = 0; for (esp = hsp->gap_info->esp; esp; esp = esp->next) { switch (esp->op_type) { case eGapAlignSub: /* Substitution */ align_length += esp->num; for (i=0; i<esp->num; i++) { if (*q == *s) num_ident++; ++q; s += CODON_LENGTH; } break; case eGapAlignIns: /* Insertion */ align_length += esp->num; s += esp->num * CODON_LENGTH; break; case eGapAlignDel: /* Deletion */ align_length += esp->num; q += esp->num; break; case eGapAlignDel2: /* Gap of two nucleotides. */ s -= 2; break; case eGapAlignDel1: /* Gap of one nucleotide. */ s -= 1; break; case eGapAlignIns1: /* Insertion of one nucleotide. */ s += 1; break; case eGapAlignIns2: /* Insertion of two nucleotides. */ s += 2; break; default: s += esp->num * CODON_LENGTH; q += esp->num; break; } } *align_length_ptr = align_length; *num_ident_ptr = num_ident; return 0;}/** TRUE if c is between a and b; f between d and f. Determines if the * coordinates are already in an HSP that has been evaluated. */#define CONTAINED_IN_HSP(a,b,c,d,e,f) (((a <= c && b >= c) && (d <= f && e >= f)) ? TRUE : FALSE)/** Checks if the first HSP is contained in the second. */static Boolean Blast_HSPContained(BlastHSP* hsp1, BlastHSP* hsp2){ Boolean hsp_start_is_contained=FALSE, hsp_end_is_contained=FALSE; if (hsp1->score > hsp2->score) return FALSE; if (CONTAINED_IN_HSP(hsp2->query.offset, hsp2->query.end, hsp1->query.offset, hsp2->subject.offset, hsp2->subject.end, hsp1->subject.offset) == TRUE) { hsp_start_is_contained = TRUE; } if (CONTAINED_IN_HSP(hsp2->query.offset, hsp2->query.end, hsp1->query.end, hsp2->subject.offset, hsp2->subject.end, hsp1->subject.end) == TRUE) { hsp_end_is_contained = TRUE; } return (hsp_start_is_contained && hsp_end_is_contained);}/** Comparison callback function for sorting HSPs by score */static intscore_compare_hsps(const void* v1, const void* v2){ BlastHSP* h1,* h2; h1 = *((BlastHSP**) v1); h2 = *((BlastHSP**) v2); /* No need to check e-values, because for the same subject sequence e-value is always inverse proportional to score. However e-values are less sensitive, since they both can be 0, when scores are large but different. */ if (h1->score > h2->score) return -1; else if (h1->score < h2->score) return 1; /* Tie-breakers: decreasing subject offsets; decreasing subject ends, decreasing query offsets, decreasing query ends */ else if (h1->subject.offset > h2->subject.offset) return -1; else if (h1->subject.offset < h2->subject.offset) return 1; else if (h1->subject.end > h2->subject.end) return -1; else if (h1->subject.end < h2->subject.end) return 1; else if (h1->query.offset > h2->query.offset) return -1; else if (h1->query.offset < h2->query.offset) return 1; else if (h1->query.end > h2->query.end) return -1; else if (h1->query.end < h2->query.end) return 1; return 0;}#define FUZZY_EVALUE_COMPARE_FACTOR 1e-6/** Compares 2 real numbers up to a fixed precision */static int fuzzy_evalue_comp(double evalue1, double evalue2){ if (evalue1 < (1-FUZZY_EVALUE_COMPARE_FACTOR)*evalue2) return -1; else if (evalue1 > (1+FUZZY_EVALUE_COMPARE_FACTOR)*evalue2) return 1; else return 0;}/** Comparison callback function for sorting HSPs by e-value and score, before saving BlastHSPList in a BlastHitList. E-value has priority over score, because lower scoring HSPs might have lower e-values, if they are linked with sum statistics. E-values are compared only up to a certain precision. */static intevalue_compare_hsps(const void* v1, const void* v2){ BlastHSP* h1,* h2; int retval = 0; h1 = *((BlastHSP**) v1); h2 = *((BlastHSP**) v2); if ((retval = fuzzy_evalue_comp(h1->evalue, h2->evalue)) != 0) return retval; return score_compare_hsps(v1, v2);}/** Comparison callback function for sorting HSPs by diagonal and flagging * the HSPs contained in or identical to other HSPs for future deletion.*/static intdiag_uniq_compare_hsps(const void* v1, const void* v2){ BlastHSP* h1,* h2; BlastHSP** hp1,** hp2; hp1 = (BlastHSP**) v1; hp2 = (BlastHSP**) v2; h1 = *hp1; h2 = *hp2; if (h1==NULL && h2==NULL) return 0; else if (h1==NULL) return 1; else if (h2==NULL) return -1; /* Separate different queries and/or strands */ if (h1->context < h2->context) return -1; else if (h1->context > h2->context) return 1; /* Check if one HSP is contained in the other, if so, leave only the longer one, given it has lower evalue */ if (h1->query.offset >= h2->query.offset && h1->query.end <= h2->query.end && h1->subject.offset >= h2->subject.offset && h1->subject.end <= h2->subject.end && h1->score <= h2->score) { (*hp1)->score = 0; } else if (h1->query.offset <= h2->query.offset && h1->query.end >= h2->query.end && h1->subject.offset <= h2->subject.offset && h1->subject.end >= h2->subject.end && h1->score >= h2->score) { (*hp2)->score = 0; } return (h1->query.offset - h1->subject.offset) - (h2->query.offset - h2->subject.offset);}static intnull_compare_hsps(const void* v1, const void* v2){ BlastHSP* h1,* h2; h1 = *((BlastHSP**) v1); h2 = *((BlastHSP**) v2); if ((h1 && h2) || (!h1 && !h2)) return 0; else if (!h1) return 1; else return -1;}/** Comparison callback for sorting HSPs by diagonal. Do not compare * diagonals for HSPs from different contexts. The absolute value of the * return is the diagonal difference between the HSPs. */static intdiag_compare_hsps(const void* v1, const void* v2){ BlastHSP* h1,* h2; h1 = *((BlastHSP**) v1); h2 = *((BlastHSP**) v2); if (h1->context < h2->context) return INT4_MIN; else if (h1->context > h2->context) return INT4_MAX; return (h1->query.offset - h1->subject.offset) - (h2->query.offset - h2->subject.offset);}/** An auxiliary structure used for merging HSPs */typedef struct BlastHSPSegment { Int4 q_start, q_end; Int4 s_start, s_end; struct BlastHSPSegment* next;} BlastHSPSegment;#define OVERLAP_DIAG_CLOSE 10/** Merge the two HSPs if they intersect. * @param hsp1 The first HSP; also contains the result of merge. [in] [out] * @param hsp2 The second HSP [in] * @param start The starting offset of the subject coordinates where the * intersection is possible [in]*/static BooleanBlast_HSPsMerge(BlastHSP* hsp1, BlastHSP* hsp2, Int4 start){ BlastHSPSegment* segments1,* segments2,* new_segment1,* new_segment2; GapEditScript* esp1,* esp2,* esp; Int4 end = start + DBSEQ_CHUNK_OVERLAP - 1; Int4 min_diag, max_diag, num1, num2, dist = 0, next_dist = 0; Int4 diag1_start, diag1_end, diag2_start, diag2_end; Int4 index; Uint1 intersection_found; EGapAlignOpType op_type = eGapAlignSub; if (!hsp1->gap_info || !hsp2->gap_info) { /* Assume that this is an ungapped alignment, hence simply compare diagonals. Do not merge if they are on different diagonals */ if (diag_compare_hsps(&hsp1, &hsp2) == 0 && hsp1->query.end >= hsp2->query.offset) { hsp1->query.end = hsp2->query.end; hsp1->subject.end = hsp2->subject.end; hsp1->query.length = hsp1->query.end - hsp1->query.offset; hsp1->subject.length = hsp1->subject.end - hsp1->subject.offset; return TRUE; } else return FALSE; } /* Find whether these HSPs have an intersection point */ segments1 = (BlastHSPSegment*) calloc(1, sizeof(BlastHSPSegment)); esp1 = hsp1->gap_info->esp; esp2 = hsp2->gap_info->esp; segments1->q_start = hsp1->query.offset; segments1->s_start = hsp1->subject.offset; while (segments1->s_start < start) { if (esp1->op_type == eGapAlignIns) segments1->q_start += esp1->num; else if (segments1->s_start + esp1->num < start) { if (esp1->op_type == eGapAlignSub) { segments1->s_start += esp1->num; segments1->q_start += esp1->num; } else if (esp1->op_type == eGapAlignDel) segments1->s_start += esp1->num; } else break; esp1 = esp1->next; } /* Current esp is the first segment within the overlap region */ segments1->s_end = segments1->s_start + esp1->num - 1; if (esp1->op_type == eGapAlignSub) segments1->q_end = segments1->q_start + esp1->num - 1; else segments1->q_end = segments1->q_start; new_segment1 = segments1; for (esp = esp1->next; esp; esp = esp->next) { new_segment1->next = (BlastHSPSegment*) calloc(1, sizeof(BlastHSPSegment)); new_segment1->next->q_start = new_segment1->q_end + 1; new_segment1->next->s_start = new_segment1->s_end + 1; new_segment1 = new_segment1->next; if (esp->op_type == eGapAlignSub) { new_segment1->q_end += esp->num - 1; new_segment1->s_end += esp->num - 1; } else if (esp->op_type == eGapAlignIns) { new_segment1->q_end += esp->num - 1; new_segment1->s_end = new_segment1->s_start; } else { new_segment1->s_end += esp->num - 1; new_segment1->q_end = new_segment1->q_start; } } /* Now create the second segments list */ segments2 = (BlastHSPSegment*) calloc(1, sizeof(BlastHSPSegment)); segments2->q_start = hsp2->query.offset; segments2->s_start = hsp2->subject.offset; segments2->q_end = segments2->q_start + esp2->num - 1; segments2->s_end = segments2->s_start + esp2->num - 1; new_segment2 = segments2; for (esp = esp2->next; esp && new_segment2->s_end < end; esp = esp->next) { new_segment2->next = (BlastHSPSegment*) calloc(1, sizeof(BlastHSPSegment)); new_segment2->next->q_start = new_segment2->q_end + 1; new_segment2->next->s_start = new_segment2->s_end + 1; new_segment2 = new_segment2->next; if (esp->op_type == eGapAlignIns) { new_segment2->s_end = new_segment2->s_start; new_segment2->q_end = new_segment2->q_start + esp->num - 1; } else if (esp->op_type == eGapAlignDel) { new_segment2->s_end = new_segment2->s_start + esp->num - 1; new_segment2->q_end = new_segment2->q_start; } else if (esp->op_type == eGapAlignSub) { new_segment2->s_end = new_segment2->s_start + esp->num - 1; new_segment2->q_end = new_segment2->q_start + esp->num - 1; } } new_segment1 = segments1; new_segment2 = segments2; intersection_found = 0; num1 = num2 = 0; while (new_segment1 && new_segment2 && !intersection_found) { if (new_segment1->s_end < new_segment2->s_start || new_segment1->q_end < new_segment2->q_start) { new_segment1 = new_segment1->next; num1++; continue; } if (new_segment2->s_end < new_segment1->s_start || new_segment2->q_end < new_segment1->q_start) { new_segment2 = new_segment2->next; num2++; continue; } diag1_start = new_segment1->s_start - new_segment1->q_start; diag2_start = new_segment2->s_start - new_segment2->q_start; diag1_end = new_segment1->s_end - new_segment1->q_end; diag2_end = new_segment2->s_end - new_segment2->q_end; if (diag1_start == diag1_end && diag2_start == diag2_end && diag1_start == diag2_start) { /* Both segments substitutions, on same diagonal */ intersection_found = 1; dist = new_segment2->s_end - new_segment1->s_start + 1; break; } else if (diag1_start != diag1_end && diag2_start != diag2_end) { /* Both segments gaps - must intersect */ intersection_found = 3; op_type = eGapAlignIns; dist = new_segment2->s_end - new_segment1->s_start + 1; next_dist = new_segment2->q_end - new_segment1->q_start - dist + 1; if (new_segment2->q_end - new_segment1->q_start < dist) { dist = new_segment2->q_end - new_segment1->q_start + 1; op_type = eGapAlignDel; next_dist = new_segment2->s_end - new_segment1->s_start - dist + 1; } break; } else if (diag1_start != diag1_end) { max_diag = MAX(diag1_start, diag1_end); min_diag = MIN(diag1_start, diag1_end); if (diag2_start >= min_diag && diag2_start <= max_diag) { intersection_found = 2; dist = diag2_start - min_diag + 1; if (new_segment1->s_end == new_segment1->s_start) next_dist = new_segment2->s_end - new_segment1->s_end + 1; else next_dist = new_segment2->q_end - new_segment1->q_end + 1; break; } } else if (diag2_start != diag2_end) { max_diag = MAX(diag2_start, diag2_end); min_diag = MIN(diag2_start, diag2_end); if (diag1_start >= min_diag && diag1_start <= max_diag) { intersection_found = 2; next_dist = max_diag - diag1_start + 1; if (new_segment2->s_end == new_segment2->s_start) dist = new_segment2->s_start - new_segment1->s_start + 1; else dist = new_segment2->q_start - new_segment1->q_start + 1; break; } } if (new_segment1->s_end <= new_segment2->s_end) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -