⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 greedy_align.c

📁 ncbi源码
💻 C
📖 第 1 页 / 共 2 页
字号:
/* * =========================================================================== * PRODUCTION $Log: greedy_align.c,v $ * PRODUCTION Revision 1000.2  2004/06/01 18:08:02  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.19 * PRODUCTION * =========================================================================== *//* $Id: greedy_align.c,v 1000.2 2004/06/01 18:08:02 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Webb Miller and Co. Adopted for NCBI libraries by Sergey Shavirin * * Initial Creation Date: 10/27/1999 * *//** @file greedy_align.c * Greedy gapped alignment functions */static char const rcsid[] =     "$Id: greedy_align.c,v 1000.2 2004/06/01 18:08:02 gouriano Exp $";#include <algo/blast/core/greedy_align.h>#include <algo/blast/core/blast_util.h> /* for NCBI2NA_UNPACK_BASE macros */enum {    EDIT_OP_MASK = 0x3,    EDIT_OP_ERR  = 0x0,    EDIT_OP_INS  = 0x1,    EDIT_OP_DEL  = 0x2,    EDIT_OP_REP  = 0x3};enum {         /* half of the (fixed) match score */    ERROR_FRACTION=2,  /* 1/this */    MAX_SPACE=1000000,    sC = 0, sI = 1, sD = 2, LARGE=100000000};/* -------- From original file edit.c ------------- */static Uint4 edit_val_get(edit_op_t op){    return op >> 2;}static Uint4 edit_opc_get(edit_op_t op){    return op & EDIT_OP_MASK;}static edit_op_t edit_op_cons(Uint4 op, Uint4 val){    return (val << 2) | (op & EDIT_OP_MASK);}static edit_op_t edit_op_inc(edit_op_t op, Uint4 n){    return edit_op_cons(edit_opc_get(op), edit_val_get(op) + n);}static edit_op_t edit_op_inc_last(MBGapEditScript *es, Uint4 n){    edit_op_t *last;    ASSERT (es->num > 0);    last = &(es->op[es->num-1]);    *last = edit_op_inc(*last, n);    return *last;}static Int4 edit_script_ready(MBGapEditScript *es, Uint4 n){    edit_op_t *p;    Uint4 m = n + n/2;        if (es->size <= n) {        p = realloc(es->op, m*sizeof(edit_op_t));        if (p == 0) {            return 0;        } else {            es->op = p;            es->size = m;        }    }    return 1;}static Int4 edit_script_readyplus(MBGapEditScript *es, Uint4 n){    if (es->size - es->num <= n)        return edit_script_ready(es, n + es->num);    return 1;}static Int4 edit_script_put(MBGapEditScript *es, Uint4 op, Uint4 n){    if (!edit_script_readyplus(es, 2))        return 0;    es->last = op;    ASSERT(op != 0);    es->op[es->num] = edit_op_cons(op, n);    es->num += 1;    es->op[es->num] = 0; /* sentinal */    return 1;}static MBGapEditScript *edit_script_init(MBGapEditScript *es){	es->op = 0;	es->size = es->num = 0;	es->last = 0;	edit_script_ready(es, 8);	return es;}static edit_op_t *edit_script_first(MBGapEditScript *es){    return es->num > 0 ? &es->op[0] : 0;}static edit_op_t *edit_script_next(MBGapEditScript *es, edit_op_t *op){    /* XXX - assumes flat address space */    if (&es->op[0] <= op && op < &es->op[es->num-1])        return op+1;    else        return 0;}static Int4 edit_script_more(MBGapEditScript *data, Uint4 op, Uint4 k){    if (op == EDIT_OP_ERR) {#ifdef ERR_POST_EX_DEFINED        ErrPostEx(SEV_FATAL, 1, 0,                   "edit_script_more: bad opcode %d:%d", op, k);#endif        return -1;    }        if (edit_opc_get(data->last) == op)        edit_op_inc_last(data, k);    else        edit_script_put(data, op, k);    return 0;}MBGapEditScript *MBGapEditScriptAppend(MBGapEditScript *es, MBGapEditScript *et){    edit_op_t *op;        for (op = edit_script_first(et); op; op = edit_script_next(et, op))        edit_script_more(es, edit_opc_get(*op), edit_val_get(*op));    return es;}MBGapEditScript *MBGapEditScriptNew(void){    MBGapEditScript *es = calloc(1, sizeof(*es));    if (!es)        return 0;    return edit_script_init(es);}MBGapEditScript *MBGapEditScriptFree(MBGapEditScript *es){    if (es) {        if (es->op)            sfree(es->op);        memset(es, 0, sizeof(*es));        sfree(es);    }    return 0;}static Int4 edit_script_del(MBGapEditScript *data, Uint4 k){    return edit_script_more(data, EDIT_OP_DEL, k);}static Int4 edit_script_ins(MBGapEditScript *data, Uint4 k){    return edit_script_more(data, EDIT_OP_INS, k);}static Int4 edit_script_rep(MBGapEditScript *data, Uint4 k){    return edit_script_more(data, EDIT_OP_REP, k);}static MBGapEditScript *edit_script_reverse_inplace(MBGapEditScript *es){    Uint4 i;    const Uint4 num = es->num;    const Uint4 mid = num/2;    const Uint4 end = num-1;        for (i = 0; i < mid; ++i) {        const edit_op_t t = es->op[i];        es->op[i] = es->op[end-i];        es->op[end-i] = t;    }    return es;}SMBSpace* MBSpaceNew(){    SMBSpace* p;    Int4 amount;        p = (SMBSpace*) malloc(sizeof(SMBSpace));    amount = MAX_SPACE;    p->space_array = (SThreeVal*) malloc(sizeof(SThreeVal)*amount);    if (p->space_array == NULL) {       sfree(p);       return NULL;    }    p->used = 0;     p->size = amount;    p->next = NULL;    return p;}static void refresh_mb_space(SMBSpace* sp){   while (sp) {      sp->used = 0;      sp = sp->next;   }}void MBSpaceFree(SMBSpace* sp){   SMBSpace* next_sp;   while (sp) {      next_sp = sp->next;      sfree(sp->space_array);      sfree(sp);      sp = next_sp;   }}static SThreeVal* get_mb_space(SMBSpace* S, Int4 amount){    SThreeVal* s;    if (amount < 0)         return NULL;          while (S->used+amount > S->size) {       if (S->next == NULL)          if ((S->next = MBSpaceNew()) == NULL) {#ifdef ERR_POST_EX_DEFINED	     ErrPostEx(SEV_WARNING, 0, 0, "Cannot get new space for greedy extension");#endif	     return NULL;          }       S = S->next;    }    s = S->space_array+S->used;    S->used += amount;        return s;}/* ----- */static Int4 gcd(Int4 a, Int4 b){    Int4 c;    if (a < b) {        c = a;        a = b; b = c;    }    while ((c = a % b) != 0) {        a = b; b = c;    }    return b;}static Int4 gdb3(Int4* a, Int4* b, Int4* c){    Int4 g;    if (*b == 0) g = gcd(*a, *c);    else g = gcd(*a, gcd(*b, *c));    if (g > 1) {        *a /= g;        *b /= g;        *c /= g;    }    return g;}static Int4 get_lastC(SThreeVal** flast_d, Int4* lower, Int4* upper,                       Int4* d, Int4 diag, Int4 Mis_cost, Int4* row1){    Int4 row;        if (diag >= lower[(*d)-Mis_cost] && diag <= upper[(*d)-Mis_cost]) {        row = flast_d[(*d)-Mis_cost][diag].C;        if (row >= MAX(flast_d[*d][diag].I, flast_d[*d][diag].D)) {            *d = *d-Mis_cost;            *row1 = row;            return sC;        }    }    if (flast_d[*d][diag].I > flast_d[*d][diag].D) {        *row1 = flast_d[*d][diag].I;        return sI;    } else {        *row1 = flast_d[*d][diag].D;        return sD;    }}static Int4 get_last_ID(SThreeVal** flast_d, Int4* lower, Int4* upper,                         Int4* d, Int4 diag, Int4 GO_cost,                         Int4 GE_cost, Int4 IorD){    Int4 ndiag;     Int4 row;    if (IorD == sI) { ndiag = diag -1;} else ndiag = diag+1;    if (ndiag >= lower[(*d)-GE_cost] && ndiag <=upper[(*d)-GE_cost])         row = (IorD == sI)? flast_d[(*d)-GE_cost][ndiag].I: flast_d[(*d)-GE_cost][ndiag].D;    else row = -100;    if (ndiag >= lower[(*d)-GO_cost-GE_cost] && ndiag <= upper[(*d)-GO_cost-GE_cost] && row < flast_d[(*d)-GO_cost-GE_cost][ndiag].C) {        *d = (*d)-GO_cost-GE_cost;        return sC;    }    *d = (*d)-GE_cost;    return IorD;}static Int4 get_lastI(SThreeVal** flast_d, Int4* lower, Int4* upper,                       Int4* d, Int4 diag, Int4 GO_cost, Int4 GE_cost){    return get_last_ID(flast_d, lower, upper, d, diag, GO_cost, GE_cost, sI);}static int get_lastD(SThreeVal** flast_d, Int4* lower, Int4* upper,                      Int4* d, Int4 diag, Int4 GO_cost, Int4 GE_cost){    return get_last_ID(flast_d, lower, upper, d, diag, GO_cost, GE_cost, sD);}/* --- From file align.c --- *//* ----- */static Int4 get_last(Int4 **flast_d, Int4 d, Int4 diag, Int4 *row1){    if (flast_d[d-1][diag-1] > MAX(flast_d[d-1][diag], flast_d[d-1][diag+1])) {        *row1 = flast_d[d-1][diag-1];        return diag-1;    }     if (flast_d[d-1][diag] > flast_d[d-1][diag+1]) {        *row1 = flast_d[d-1][diag];        return diag;    }    *row1 = flast_d[d-1][diag+1];    return diag+1;}/*	Version to search a (possibly) packed nucl. sequence againstan unpacked sequence.  s2 is the packed nucl. sequence.s1 can be packed or unpacked. If rem == 4, then s1 is unpacked.len2 corresponds to the unpacked (true) length. * Basic O(ND) time, O(N) space, alignment function.  * Parameters: *   s1, len1        - first sequence and its length *   s2, len2        - second sequence and its length *   reverse         - direction of alignment *   xdrop_threshold -  *   mismatch_cost   - *   e1, e2          - endpoint of the computed alignment *   edit_script     - *   rem             - offset shift within a packed sequence */Int4 BLAST_GreedyAlign(const Uint1* s1, Int4 len1,			  const Uint1* s2, Int4 len2,			  Boolean reverse, Int4 xdrop_threshold, 			  Int4 match_cost, Int4 mismatch_cost,			  Int4* e1, Int4* e2, 			  SGreedyAlignMem* gamp, MBGapEditScript *S,                          Uint1 rem){#define ICEIL(x,y) ((((x)-1)/(y))+1)    Int4 col,			/* column number */        d,				/* current distance */        k,				/* current diagonal */        flower, fupper,            /* boundaries for searching diagonals */        row,		        /* row number */        MAX_D, 			/* maximum cost */        ORIGIN,        return_val = 0;    Int4** flast_d = gamp->flast_d; /* rows containing the last d */    Int4* max_row;		/* reached for cost d=0, ... len1.  */        Int4 X_pen = xdrop_threshold;    Int4 M_half = match_cost/2;    Int4 Op_cost = mismatch_cost + M_half*2;    Int4 D_diff = ICEIL(X_pen+M_half, Op_cost);        Int4 x, cur_max, b_diag = 0, best_diag = INT4_MAX/2;    Int4* max_row_free = gamp->max_row_free;    char nlower = 0, nupper = 0;    SMBSpace* space = gamp->space;    Int4 max_len = len2;     MAX_D = (Int4) (len1/ERROR_FRACTION + 1);    ORIGIN = MAX_D + 2;    *e1 = *e2 = 0;        if (reverse) {       if (!(rem & 4)) {          for (row = 0; row < len2 && row < len1 &&                   (s2[len2-1-row] ==                   NCBI2NA_UNPACK_BASE(s1[(len1-1-row)/4],                                         3-(len1-1-row)%4));                row++)             /*empty*/ ;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -