📄 greedy_align.c
字号:
/* * =========================================================================== * PRODUCTION $Log: greedy_align.c,v $ * PRODUCTION Revision 1000.2 2004/06/01 18:08:02 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.19 * PRODUCTION * =========================================================================== *//* $Id: greedy_align.c,v 1000.2 2004/06/01 18:08:02 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Webb Miller and Co. Adopted for NCBI libraries by Sergey Shavirin * * Initial Creation Date: 10/27/1999 * *//** @file greedy_align.c * Greedy gapped alignment functions */static char const rcsid[] = "$Id: greedy_align.c,v 1000.2 2004/06/01 18:08:02 gouriano Exp $";#include <algo/blast/core/greedy_align.h>#include <algo/blast/core/blast_util.h> /* for NCBI2NA_UNPACK_BASE macros */enum { EDIT_OP_MASK = 0x3, EDIT_OP_ERR = 0x0, EDIT_OP_INS = 0x1, EDIT_OP_DEL = 0x2, EDIT_OP_REP = 0x3};enum { /* half of the (fixed) match score */ ERROR_FRACTION=2, /* 1/this */ MAX_SPACE=1000000, sC = 0, sI = 1, sD = 2, LARGE=100000000};/* -------- From original file edit.c ------------- */static Uint4 edit_val_get(edit_op_t op){ return op >> 2;}static Uint4 edit_opc_get(edit_op_t op){ return op & EDIT_OP_MASK;}static edit_op_t edit_op_cons(Uint4 op, Uint4 val){ return (val << 2) | (op & EDIT_OP_MASK);}static edit_op_t edit_op_inc(edit_op_t op, Uint4 n){ return edit_op_cons(edit_opc_get(op), edit_val_get(op) + n);}static edit_op_t edit_op_inc_last(MBGapEditScript *es, Uint4 n){ edit_op_t *last; ASSERT (es->num > 0); last = &(es->op[es->num-1]); *last = edit_op_inc(*last, n); return *last;}static Int4 edit_script_ready(MBGapEditScript *es, Uint4 n){ edit_op_t *p; Uint4 m = n + n/2; if (es->size <= n) { p = realloc(es->op, m*sizeof(edit_op_t)); if (p == 0) { return 0; } else { es->op = p; es->size = m; } } return 1;}static Int4 edit_script_readyplus(MBGapEditScript *es, Uint4 n){ if (es->size - es->num <= n) return edit_script_ready(es, n + es->num); return 1;}static Int4 edit_script_put(MBGapEditScript *es, Uint4 op, Uint4 n){ if (!edit_script_readyplus(es, 2)) return 0; es->last = op; ASSERT(op != 0); es->op[es->num] = edit_op_cons(op, n); es->num += 1; es->op[es->num] = 0; /* sentinal */ return 1;}static MBGapEditScript *edit_script_init(MBGapEditScript *es){ es->op = 0; es->size = es->num = 0; es->last = 0; edit_script_ready(es, 8); return es;}static edit_op_t *edit_script_first(MBGapEditScript *es){ return es->num > 0 ? &es->op[0] : 0;}static edit_op_t *edit_script_next(MBGapEditScript *es, edit_op_t *op){ /* XXX - assumes flat address space */ if (&es->op[0] <= op && op < &es->op[es->num-1]) return op+1; else return 0;}static Int4 edit_script_more(MBGapEditScript *data, Uint4 op, Uint4 k){ if (op == EDIT_OP_ERR) {#ifdef ERR_POST_EX_DEFINED ErrPostEx(SEV_FATAL, 1, 0, "edit_script_more: bad opcode %d:%d", op, k);#endif return -1; } if (edit_opc_get(data->last) == op) edit_op_inc_last(data, k); else edit_script_put(data, op, k); return 0;}MBGapEditScript *MBGapEditScriptAppend(MBGapEditScript *es, MBGapEditScript *et){ edit_op_t *op; for (op = edit_script_first(et); op; op = edit_script_next(et, op)) edit_script_more(es, edit_opc_get(*op), edit_val_get(*op)); return es;}MBGapEditScript *MBGapEditScriptNew(void){ MBGapEditScript *es = calloc(1, sizeof(*es)); if (!es) return 0; return edit_script_init(es);}MBGapEditScript *MBGapEditScriptFree(MBGapEditScript *es){ if (es) { if (es->op) sfree(es->op); memset(es, 0, sizeof(*es)); sfree(es); } return 0;}static Int4 edit_script_del(MBGapEditScript *data, Uint4 k){ return edit_script_more(data, EDIT_OP_DEL, k);}static Int4 edit_script_ins(MBGapEditScript *data, Uint4 k){ return edit_script_more(data, EDIT_OP_INS, k);}static Int4 edit_script_rep(MBGapEditScript *data, Uint4 k){ return edit_script_more(data, EDIT_OP_REP, k);}static MBGapEditScript *edit_script_reverse_inplace(MBGapEditScript *es){ Uint4 i; const Uint4 num = es->num; const Uint4 mid = num/2; const Uint4 end = num-1; for (i = 0; i < mid; ++i) { const edit_op_t t = es->op[i]; es->op[i] = es->op[end-i]; es->op[end-i] = t; } return es;}SMBSpace* MBSpaceNew(){ SMBSpace* p; Int4 amount; p = (SMBSpace*) malloc(sizeof(SMBSpace)); amount = MAX_SPACE; p->space_array = (SThreeVal*) malloc(sizeof(SThreeVal)*amount); if (p->space_array == NULL) { sfree(p); return NULL; } p->used = 0; p->size = amount; p->next = NULL; return p;}static void refresh_mb_space(SMBSpace* sp){ while (sp) { sp->used = 0; sp = sp->next; }}void MBSpaceFree(SMBSpace* sp){ SMBSpace* next_sp; while (sp) { next_sp = sp->next; sfree(sp->space_array); sfree(sp); sp = next_sp; }}static SThreeVal* get_mb_space(SMBSpace* S, Int4 amount){ SThreeVal* s; if (amount < 0) return NULL; while (S->used+amount > S->size) { if (S->next == NULL) if ((S->next = MBSpaceNew()) == NULL) {#ifdef ERR_POST_EX_DEFINED ErrPostEx(SEV_WARNING, 0, 0, "Cannot get new space for greedy extension");#endif return NULL; } S = S->next; } s = S->space_array+S->used; S->used += amount; return s;}/* ----- */static Int4 gcd(Int4 a, Int4 b){ Int4 c; if (a < b) { c = a; a = b; b = c; } while ((c = a % b) != 0) { a = b; b = c; } return b;}static Int4 gdb3(Int4* a, Int4* b, Int4* c){ Int4 g; if (*b == 0) g = gcd(*a, *c); else g = gcd(*a, gcd(*b, *c)); if (g > 1) { *a /= g; *b /= g; *c /= g; } return g;}static Int4 get_lastC(SThreeVal** flast_d, Int4* lower, Int4* upper, Int4* d, Int4 diag, Int4 Mis_cost, Int4* row1){ Int4 row; if (diag >= lower[(*d)-Mis_cost] && diag <= upper[(*d)-Mis_cost]) { row = flast_d[(*d)-Mis_cost][diag].C; if (row >= MAX(flast_d[*d][diag].I, flast_d[*d][diag].D)) { *d = *d-Mis_cost; *row1 = row; return sC; } } if (flast_d[*d][diag].I > flast_d[*d][diag].D) { *row1 = flast_d[*d][diag].I; return sI; } else { *row1 = flast_d[*d][diag].D; return sD; }}static Int4 get_last_ID(SThreeVal** flast_d, Int4* lower, Int4* upper, Int4* d, Int4 diag, Int4 GO_cost, Int4 GE_cost, Int4 IorD){ Int4 ndiag; Int4 row; if (IorD == sI) { ndiag = diag -1;} else ndiag = diag+1; if (ndiag >= lower[(*d)-GE_cost] && ndiag <=upper[(*d)-GE_cost]) row = (IorD == sI)? flast_d[(*d)-GE_cost][ndiag].I: flast_d[(*d)-GE_cost][ndiag].D; else row = -100; if (ndiag >= lower[(*d)-GO_cost-GE_cost] && ndiag <= upper[(*d)-GO_cost-GE_cost] && row < flast_d[(*d)-GO_cost-GE_cost][ndiag].C) { *d = (*d)-GO_cost-GE_cost; return sC; } *d = (*d)-GE_cost; return IorD;}static Int4 get_lastI(SThreeVal** flast_d, Int4* lower, Int4* upper, Int4* d, Int4 diag, Int4 GO_cost, Int4 GE_cost){ return get_last_ID(flast_d, lower, upper, d, diag, GO_cost, GE_cost, sI);}static int get_lastD(SThreeVal** flast_d, Int4* lower, Int4* upper, Int4* d, Int4 diag, Int4 GO_cost, Int4 GE_cost){ return get_last_ID(flast_d, lower, upper, d, diag, GO_cost, GE_cost, sD);}/* --- From file align.c --- *//* ----- */static Int4 get_last(Int4 **flast_d, Int4 d, Int4 diag, Int4 *row1){ if (flast_d[d-1][diag-1] > MAX(flast_d[d-1][diag], flast_d[d-1][diag+1])) { *row1 = flast_d[d-1][diag-1]; return diag-1; } if (flast_d[d-1][diag] > flast_d[d-1][diag+1]) { *row1 = flast_d[d-1][diag]; return diag; } *row1 = flast_d[d-1][diag+1]; return diag+1;}/* Version to search a (possibly) packed nucl. sequence againstan unpacked sequence. s2 is the packed nucl. sequence.s1 can be packed or unpacked. If rem == 4, then s1 is unpacked.len2 corresponds to the unpacked (true) length. * Basic O(ND) time, O(N) space, alignment function. * Parameters: * s1, len1 - first sequence and its length * s2, len2 - second sequence and its length * reverse - direction of alignment * xdrop_threshold - * mismatch_cost - * e1, e2 - endpoint of the computed alignment * edit_script - * rem - offset shift within a packed sequence */Int4 BLAST_GreedyAlign(const Uint1* s1, Int4 len1, const Uint1* s2, Int4 len2, Boolean reverse, Int4 xdrop_threshold, Int4 match_cost, Int4 mismatch_cost, Int4* e1, Int4* e2, SGreedyAlignMem* gamp, MBGapEditScript *S, Uint1 rem){#define ICEIL(x,y) ((((x)-1)/(y))+1) Int4 col, /* column number */ d, /* current distance */ k, /* current diagonal */ flower, fupper, /* boundaries for searching diagonals */ row, /* row number */ MAX_D, /* maximum cost */ ORIGIN, return_val = 0; Int4** flast_d = gamp->flast_d; /* rows containing the last d */ Int4* max_row; /* reached for cost d=0, ... len1. */ Int4 X_pen = xdrop_threshold; Int4 M_half = match_cost/2; Int4 Op_cost = mismatch_cost + M_half*2; Int4 D_diff = ICEIL(X_pen+M_half, Op_cost); Int4 x, cur_max, b_diag = 0, best_diag = INT4_MAX/2; Int4* max_row_free = gamp->max_row_free; char nlower = 0, nupper = 0; SMBSpace* space = gamp->space; Int4 max_len = len2; MAX_D = (Int4) (len1/ERROR_FRACTION + 1); ORIGIN = MAX_D + 2; *e1 = *e2 = 0; if (reverse) { if (!(rem & 4)) { for (row = 0; row < len2 && row < len1 && (s2[len2-1-row] == NCBI2NA_UNPACK_BASE(s1[(len1-1-row)/4], 3-(len1-1-row)%4)); row++) /*empty*/ ;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -