⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dfa.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
    max_char = parse_info->charset->size;    pos = (short *) imalloc (sizeof(*pos) * (parse_info->position+1));    tran_set = cp_Set (parse_info->poset, parse_info->root->firstpos);    i = add_DFA_state (dfas, &tran_set, &dfa_from);    assert (i);    dfa_from->rule_no = 0;    poset = parse_info->poset;    followpos = parse_info->followpos;    while ((dfa_from = get_DFA_state (dfas)))    {        pos_i = pos;        j = i = 0;        for (tran_set = dfa_from->set; tran_set; tran_set = tran_set->next)            if ((c = posar[tran_set->value]->u.ch[0]) >= 0 && c <= max_char)                 *pos_i++ = tran_set->value;            else if (c < 0)            {                if (i == 0 || c > i)                    i = c;                c = posar[tran_set->value]->u.ch[1];                if (j == 0 || c > j)                    j = c;            }        *pos_i = -1;        dfa_from->rule_no = -i;        dfa_from->rule_nno = -j;        for (char_1 = 0; char_1 <= max_char; char_1++)        {            char_0 = max_char+1;            for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)                if (posar[i]->u.ch[1] >= char_1                     && (c=posar[i]->u.ch[0]) < char_0)		{                    if (c < char_1)                        char_0 = char_1;                    else                        char_0 = c;		}            if (char_0 > max_char)                break;            char_1 = max_char;                            tran_set = mk_Set (poset);            for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)            {                if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1)                    char_1 = c - 1;                /* forward chunk */                else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1)                    char_1 = c;                    /* backward chunk */                if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0)                    tran_set = union_Set (poset, tran_set, followpos[i]);            }            if (tran_set)            {                add_DFA_state (dfas, &tran_set, &dfa_to);                add_DFA_tran (dfas, dfa_from, char_0, char_1, dfa_to->no);            }        }    }    ifree (pos);    sort_DFA_states (dfas);}static void pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas){    struct DFA_state *s;    struct DFA_tran *tran;    int prev_no, i, c, no;    for (no=0; no < dfas->no; ++no)    {        s = dfas->sortarray[no];        assert (s->no == no);        printf ("DFA state %d", no);        if (s->rule_no)            printf ("#%d", s->rule_no);        if (s->rule_nno)            printf (" #%d", s->rule_nno);        putchar (':');        pr_Set (parse_info->poset, s->set);        prev_no = -1;        for (i=s->tran_no, tran=s->trans; --i >= 0; tran++)        {            if (prev_no != tran->to)            {                if (prev_no != -1)                    printf ("]\n");                printf (" goto %d on [", tran->to);                prev_no = tran->to;            }            for (c = tran->ch[0]; c <= tran->ch[1]; c++)                printf ("%s", str_char(c));        }        if (prev_no != -1)            printf ("]\n");        putchar ('\n');    }}static void pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas){    long i, j;    int k;    printf ("%d/%d tree nodes used, %d bytes each\n",        parse_info->use_Tnode, parse_info->max_Tnode, sizeof (struct Tnode));    k = inf_BSetHandle (parse_info->charset, &i, &j);    printf ("%ld/%ld character sets, %d bytes each\n",            i/k, j/k, k*sizeof(BSetWord));    k = inf_SetType (parse_info->poset, &i, &j);    printf ("%ld/%ld poset items, %d bytes each\n", i, j, k);    printf ("%d DFA states\n", dfas->no);}static void pr_followpos (struct DFA_parse *parse_info){    struct Tnode **posar = parse_info->posar;    int i;    printf ("\nfollowsets:\n");    for (i=1; i <= parse_info->position; i++)    {        printf ("%3d:", i);        pr_Set (parse_info->poset, parse_info->followpos[i]);        putchar ('\t');        if (posar[i]->u.ch[0] < 0)            printf ("#%d", -posar[i]->u.ch[0]);        else if (posar[i]->u.ch[1] > posar[i]->u.ch[0])        {            putchar ('[');            out_char (posar[i]->u.ch[0]);            if (posar[i]->u.ch[1] > posar[i]->u.ch[0]+1)                putchar ('-');            out_char (posar[i]->u.ch[1]);            putchar (']');        }        else            out_char (posar[i]->u.ch[0]);        putchar ('\n');    }    putchar ('\n');}void dfa_parse_cmap_clean (struct DFA *d){    struct DFA_parse *dfa = d->parse_info;    assert (dfa);    if (!dfa->charMap)    {        dfa->charMapSize = 7;        dfa->charMap = (int *)	    imalloc (dfa->charMapSize * sizeof(*dfa->charMap));    }    dfa->charMap[0] = 0;}void dfa_parse_cmap_new (struct DFA *d, const int *cmap){    struct DFA_parse *dfa = d->parse_info;    const int *cp;    int size;    assert (dfa);    for (cp = cmap; *cp; cp += 2)        ;    size = cp - cmap + 1;    if (size > dfa->charMapSize)    {        if (dfa->charMap)            ifree (dfa->charMap);        dfa->charMapSize = size;        dfa->charMap = (int *) imalloc (size * sizeof(*dfa->charMap));    }    memcpy (dfa->charMap, cmap, size * sizeof(*dfa->charMap));}void dfa_parse_cmap_del (struct DFA *d, int from){    struct DFA_parse *dfa = d->parse_info;    int *cc;    assert (dfa);    for (cc = dfa->charMap; *cc; cc += 2)        if (*cc == from)        {            while ((cc[0] = cc[2]))            {                cc[1] = cc[3];                 cc += 2;            }            break;        }}void dfa_parse_cmap_add (struct DFA *d, int from, int to){    struct DFA_parse *dfa = d->parse_info;    int *cc;    int indx, size;    assert (dfa);    for (cc = dfa->charMap; *cc; cc += 2)        if (*cc == from)        {            cc[1] = to;            return ;        }    indx = cc - dfa->charMap;    size = dfa->charMapSize;    if (indx >= size)    {        int *cn = (int *) imalloc ((size+16) * sizeof(*dfa->charMap));        memcpy (cn, dfa->charMap, indx*sizeof(*dfa->charMap));        ifree (dfa->charMap);        dfa->charMap = cn;        dfa->charMapSize = size+16;    }    dfa->charMap[indx] = from;    dfa->charMap[indx+1] = to;    dfa->charMap[indx+2] = 0;}void dfa_parse_cmap_thompson (struct DFA *d){    static int thompson_chars[] =    {        '*',  L_CLOS0,        '+',  L_CLOS1,        '|',  L_ALT,        '^',  L_START,        '$',  L_END,        '?',  L_QUEST,        '.',  L_ANY,        '(',  L_LP,        ')',  L_RP,        ' ',  0,        '\t', 0,        '\n', 0,        0    };    dfa_parse_cmap_new (d, thompson_chars);}static struct DFA_parse *dfa_parse_init (void){    struct DFA_parse *parse_info = 	(struct DFA_parse *) imalloc (sizeof (struct DFA_parse));    parse_info->charset = mk_BSetHandle (255, 20);    parse_info->position = 0;    parse_info->rule = 0;    parse_info->root = NULL;    parse_info->anyset = mk_BSet (&parse_info->charset);    res_BSet (parse_info->charset, parse_info->anyset);    com_BSet (parse_info->charset, parse_info->anyset);    parse_info->use_Tnode = parse_info->max_Tnode = 0;    parse_info->start = parse_info->end = NULL;    parse_info->charMap = NULL;    parse_info->charMapSize = 0;    parse_info->cmap = NULL;    return parse_info;}static void rm_dfa_parse (struct DFA_parse **dfap){    struct DFA_parse *parse_info = *dfap;    assert (parse_info);    term_Tnode(parse_info);    rm_BSetHandle (&parse_info->charset);    ifree (parse_info->charMap);    ifree (parse_info);    *dfap = NULL;}static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk){    struct DFA_states *dfas;    struct DFA_parse *parse_info = dfap;    assert (poset_chunk > 10);    assert (dfap);    parse_info->poset = mk_SetType (poset_chunk);    init_pos(parse_info);    init_followpos(parse_info);    assert (parse_info->root);    dfa_trav (parse_info, parse_info->root);    if (debug_dfa_followpos)        pr_followpos(parse_info);    init_DFA_states (&dfas, parse_info->poset, (int) (STATE_HASH));    mk_dfa_tran (parse_info, dfas);    if (debug_dfa_tran)    {        pr_tran (parse_info, dfas);    }    if (dfa_verbose)        pr_verbose (parse_info, dfas);    del_pos(parse_info);    del_followpos(parse_info);    rm_SetType (parse_info->poset);    return dfas;}struct DFA *dfa_init (void){    struct DFA *dfa;    dfa = (struct DFA *) imalloc (sizeof(*dfa));    dfa->parse_info = dfa_parse_init ();    dfa->state_info = NULL;    dfa->states = NULL;    dfa_parse_cmap_thompson (dfa);    return dfa;}void dfa_set_cmap (struct DFA *dfa, void *vp,		   const char **(*cmap)(void *vp, const char **from, int len)){    dfa->parse_info->cmap = cmap;    dfa->parse_info->cmap_data = vp;}int dfa_parse (struct DFA *dfa, const char **pattern){    struct Tnode *top;    struct DFA_parse *parse_info;    assert (dfa);    assert (dfa->parse_info);    parse_info = dfa->parse_info;    if (!parse_info->cmap)    {	res_BSet (parse_info->charset, parse_info->anyset);	add_BSet (parse_info->charset, parse_info->anyset, '\n');	com_BSet (parse_info->charset, parse_info->anyset);    }    do_parse (parse_info, pattern, &top);    if (parse_info->err_code)        return parse_info->err_code;    if ( !dfa->parse_info->root )        dfa->parse_info->root = top;    else    {        struct Tnode *n;        n = mk_Tnode (parse_info);        n->pos = OR;        n->u.p[0] = dfa->parse_info->root;        n->u.p[1] = top;        dfa->parse_info->root = n;    }    return 0;}void dfa_mkstate (struct DFA *dfa){    assert (dfa);    dfa->state_info = mk_dfas (dfa->parse_info, POSET_CHUNK);    dfa->no_states = dfa->state_info->no;    dfa->states = dfa->state_info->sortarray;    rm_dfa_parse (&dfa->parse_info);}void dfa_delete (struct DFA **dfap){    assert (dfap);    assert (*dfap);    if ((*dfap)->parse_info)        rm_dfa_parse (&(*dfap)->parse_info);    if ((*dfap)->state_info)        rm_DFA_states (&(*dfap)->state_info);    ifree (*dfap);    *dfap = NULL;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -