⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dfa.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
		char mapfrom[2];		const char *mcp = mapfrom;		mapfrom[0] = ch0;		mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1);		assert (mapto);		ch0 = mapto[0][0];	    }	    add_BSet (parse_info->charset, parse_info->look_chars, ch0);	    ch1 = nextchar_set (parse_info, &esc1);	}        if (!esc1 && ch1 == '-')        {            int open_range = 0;            if ((ch1 = nextchar_set (parse_info, &esc1)) == 0)                break;#if DFA_OPEN_RANGE            if (!esc1 && ch1 == ']')            {                ch1 = 255;                open_range = 1;            }#else            if (!esc1 && ch1 == ']')            {                add_BSet (parse_info->charset, parse_info->look_chars, '-');                break;            }#endif            if (!open_range && parse_info->cmap)            {                const char **mapto;		char mapfrom[2];                const char *mcp = mapfrom;                mapfrom[0] = ch1;                mapto = (*parse_info->cmap) (parse_info->cmap_data, &mcp, 1);                assert (mapto);                ch1 = mapto[0][0];            }            for (i=ch0; ++i<=ch1;)                add_BSet (parse_info->charset, parse_info->look_chars, i);            if (!open_range)                ch0 = nextchar_set (parse_info, &esc0);            else                break;        }        else        {            esc0 = esc1;            ch0 = ch1;        }    }    if (cc)        com_BSet (parse_info->charset, parse_info->look_chars);    return L_CHARS;}static int map_l_char (struct DFA_parse *parse_info){    const char **mapto;    const char *cp0 = (const char *) (parse_info->expr_ptr-1);    int i = 0, len = strlen(cp0);    if (cp0[0] == 1 && cp0[1])    {        parse_info->expr_ptr++;        parse_info->look_ch = ((unsigned char *) cp0)[1];        return L_CHAR;    }    if (!parse_info->cmap)        return L_CHAR;    mapto = (*parse_info->cmap) (parse_info->cmap_data, &cp0, len);    assert (mapto);        parse_info->expr_ptr = (const unsigned char *) cp0;    parse_info->look_ch = ((unsigned char **) mapto)[i][0];    logf (LOG_DEBUG, "map from %c to %d", parse_info->expr_ptr[-1], parse_info->look_ch);    return L_CHAR;}static int lex_sub(struct DFA_parse *parse_info){    int esc;    while ((parse_info->look_ch = nextchar (parse_info, &esc)) != 0)        if (parse_info->look_ch == '\"')        {            if (esc)                return map_l_char (parse_info);            parse_info->inside_string = !parse_info->inside_string;        }        else if (esc || parse_info->inside_string)            return map_l_char (parse_info);        else if (parse_info->look_ch == '[')            return read_charset(parse_info);        else         {            const int *cc;            for (cc = parse_info->charMap; *cc; cc += 2)                if (*cc == (int) (parse_info->look_ch))                {                    if (!cc[1])                        --parse_info->expr_ptr;                    return cc[1];                }            return map_l_char (parse_info);        }    return 0;}static void lex (struct DFA_parse *parse_info){    parse_info->lookahead = lex_sub (parse_info);}static const char *str_char (unsigned c){    static char s[6];    s[0] = '\\';    if (c < 32 || c >= 127)        switch (c)        {        case '\r':            strcpy (s+1, "r");            break;        case '\n':            strcpy (s+1, "n");            break;        case '\t':            strcpy (s+1, "t");            break;        default:            sprintf (s+1, "x%02x", c);            break;        }    else        switch (c)        {        case '\"':            strcpy (s+1, "\"");            break;        case '\'':            strcpy (s+1, "\'");            break;        case '\\':            strcpy (s+1, "\\");            break;        default:            s[0] = c;            s[1] = '\0';        }    return s;}static void out_char (int c){    printf ("%s", str_char (c));}static void term_Tnode (struct DFA_parse *parse_info){    struct Tblock *t, *t1;    for (t = parse_info->start; (t1 = t);)    {        t=t->next;        ifree (t1->tarray);        ifree (t1);    }}static struct Tnode *mk_Tnode (struct DFA_parse *parse_info){    struct Tblock *tnew;    if (parse_info->use_Tnode == parse_info->max_Tnode)    {        tnew = (struct Tblock *) imalloc (sizeof(struct Tblock));        tnew->tarray = (struct Tnode *) imalloc (TADD*sizeof(struct Tnode));        if (!tnew->tarray)            return NULL;        if (parse_info->use_Tnode == 0)            parse_info->start = tnew;        else            parse_info->end->next = tnew;        parse_info->end = tnew;        tnew->next = NULL;        parse_info->max_Tnode += TADD;    }    return parse_info->end->tarray+(parse_info->use_Tnode++ % TADD);}struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info, BSet charset){    struct Tnode *tn1, *tn0 = mk_Tnode(parse_info);    int ch1, ch0 = trav_BSet (parse_info->charset, charset, 0);    if (ch0 == -1)        tn0->pos = EPSILON;    else    {        tn0->u.ch[0] = ch0;        tn0->pos = ++parse_info->position;        ch1 = travi_BSet (parse_info->charset, charset, ch0+1) ;        if (ch1 == -1)            tn0->u.ch[1] = parse_info->charset->size;        else        {            tn0->u.ch[1] = ch1-1;            while ((ch0=trav_BSet (parse_info->charset, charset, ch1)) != -1)            {                tn1 = mk_Tnode(parse_info);                tn1->pos = OR;                tn1->u.p[0] = tn0;                tn0 = tn1;                tn1 = tn0->u.p[1] = mk_Tnode(parse_info);                tn1->u.ch[0] = ch0;                tn1->pos = ++(parse_info->position);                if ((ch1 = travi_BSet (parse_info->charset, charset,                                       ch0+1)) == -1)                {                    tn1->u.ch[1] = parse_info->charset->size;                    break;                }                tn1->u.ch[1] = ch1-1;            }        }    }    return tn0;}static void del_followpos (struct DFA_parse *parse_info){    ifree (parse_info->followpos);}static void init_pos (struct DFA_parse *parse_info){    parse_info->posar = (struct Tnode **) imalloc (sizeof(struct Tnode*)                                        * (1+parse_info->position));}static void del_pos (struct DFA_parse *parse_info){    ifree (parse_info->posar);}static void add_follow (struct DFA_parse *parse_info,			Set lastpos, Set firstpos){    while (lastpos)    {        parse_info->followpos[lastpos->value] = 	    union_Set (parse_info->poset,		       parse_info->followpos[lastpos->value], firstpos);        lastpos = lastpos->next;    }                                                            }static void dfa_trav (struct DFA_parse *parse_info, struct Tnode *n){    struct Tnode **posar = parse_info->posar;    SetType poset = parse_info->poset;        switch (n->pos)    {    case CAT:        dfa_trav (parse_info, n->u.p[0]);        dfa_trav (parse_info, n->u.p[1]);        n->nullable = n->u.p[0]->nullable & n->u.p[1]->nullable;        n->firstpos = mk_Set (poset);        n->firstpos = union_Set (poset, n->firstpos, n->u.p[0]->firstpos);        if (n->u.p[0]->nullable)            n->firstpos = union_Set (poset, n->firstpos, n->u.p[1]->firstpos);        n->lastpos = mk_Set (poset);        n->lastpos = union_Set (poset, n->lastpos, n->u.p[1]->lastpos);        if (n->u.p[1]->nullable)            n->lastpos = union_Set (poset, n->lastpos, n->u.p[0]->lastpos);        add_follow (parse_info, n->u.p[0]->lastpos, n->u.p[1]->firstpos);        n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);        n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);        n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);        n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);        if (debug_dfa_trav)            printf ("CAT");        break;    case OR:        dfa_trav (parse_info, n->u.p[0]);        dfa_trav (parse_info, n->u.p[1]);        n->nullable = n->u.p[0]->nullable | n->u.p[1]->nullable;        n->firstpos = merge_Set (poset, n->u.p[0]->firstpos,                                 n->u.p[1]->firstpos);        n->lastpos = merge_Set (poset, n->u.p[0]->lastpos,                                n->u.p[1]->lastpos);        n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);        n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);        n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);        n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);        if (debug_dfa_trav)            printf ("OR");        break;    case PLUS:        dfa_trav (parse_info, n->u.p[0]);        n->nullable = n->u.p[0]->nullable;        n->firstpos = n->u.p[0]->firstpos;        n->lastpos = n->u.p[0]->lastpos;        add_follow (parse_info, n->lastpos, n->firstpos);        if (debug_dfa_trav)            printf ("PLUS");        break;    case STAR:        dfa_trav (parse_info, n->u.p[0]);        n->nullable = 1;        n->firstpos = n->u.p[0]->firstpos;        n->lastpos = n->u.p[0]->lastpos;        add_follow (parse_info, n->lastpos, n->firstpos);        if (debug_dfa_trav)            printf ("STAR");        break;    case EPSILON:        n->nullable = 1;        n->lastpos = mk_Set (poset);        n->firstpos = mk_Set (poset);        if (debug_dfa_trav)            printf ("EPSILON");        break;    default:        posar[n->pos] = n;        n->nullable = 0;        n->firstpos = mk_Set (poset);        n->firstpos = add_Set (poset, n->firstpos, n->pos);        n->lastpos = mk_Set (poset);        n->lastpos = add_Set (poset, n->lastpos, n->pos);        if (debug_dfa_trav)	{            if (n->u.ch[0] < 0)                printf ("#%d (n#%d)", -n->u.ch[0], -n->u.ch[1]);            else if (n->u.ch[1] > n->u.ch[0])            {                putchar ('[');                out_char (n->u.ch[0]);                if (n->u.ch[1] > n->u.ch[0]+1)                    putchar ('-');                out_char (n->u.ch[1]);                putchar (']');            }            else                out_char (n->u.ch[0]);	}    }    if (debug_dfa_trav)    {        printf ("\n nullable : %c\n", n->nullable ? '1' : '0');        printf (" firstpos :");        pr_Set (poset, n->firstpos);        printf (" lastpos  :");        pr_Set (poset, n->lastpos);    }}static void init_followpos (struct DFA_parse *parse_info){    Set *fa;    int i;    parse_info->followpos = fa =	(Set *) imalloc (sizeof(Set) * (1+parse_info->position));    for (i = parse_info->position+1; --i >= 0; fa++)        *fa = mk_Set (parse_info->poset);}static void mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas){    int i, j, c;    int max_char;    short *pos, *pos_i;    Set tran_set;    int char_0, char_1;    struct DFA_state *dfa_from, *dfa_to;    struct Tnode **posar;    SetType poset;    Set *followpos;    assert (parse_info);    posar = parse_info->posar;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -