⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 d1_read.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
{    return data1_mk_comment_n (dh, mem, buf, strlen(buf), parent);}char *data1_insert_string_n (data1_handle dh, data1_node *res,                             NMEM m, const char *str, size_t len){    char *b;    if (len >= DATA1_LOCALDATA)        b = (char *) nmem_malloc (m, len+1);    else        b = res->lbuf;    memcpy (b, str, len);    b[len] = 0;    return b;}char *data1_insert_string (data1_handle dh, data1_node *res,                           NMEM m, const char *str){    return data1_insert_string_n (dh, res, m, str, strlen(str));}static data1_node *data1_add_insert_taggeddata(data1_handle dh,                                               data1_node *at,                                               const char *tagname, NMEM m,                                               int local_allowed,					       int insert_mode){    data1_node *root = at->root;    data1_node *partag = get_parent_tag (dh, at);    data1_element *e = NULL;    data1_node *datn = 0;    data1_node *tagn = 0;    if (!partag)        e = data1_getelementbytagname (dh, root->u.root.absyn, 0, tagname);    else     {	e = partag->u.tag.element;        if (e)            e = data1_getelementbytagname (dh, root->u.root.absyn, e, tagname);    }    if (local_allowed || e)    {        if (insert_mode)            tagn = data1_insert_node (dh, m, DATA1N_tag, at);        else            tagn = data1_append_node (dh, m, DATA1N_tag, at);        tagn->u.tag.tag = data1_insert_string (dh, tagn, m, tagname);        tagn->u.tag.element = e;        datn = data1_mk_node2 (dh, m, DATA1N_data, tagn);    }    return datn;}data1_node *data1_mk_tag_data(data1_handle dh, data1_node *at,                              const char *tagname, NMEM m){    return data1_add_insert_taggeddata (dh, at, tagname, m, 1, 0);}/* * Insert a tagged node into the record root as first child of the node at * which should be root or tag itself). Returns pointer to the data node, * which can then be modified. */data1_node *data1_mk_tag_data_wd(data1_handle dh, data1_node *at,                                 const char *tagname, NMEM m){    return data1_add_insert_taggeddata (dh, at, tagname, m, 0, 1);}data1_node *data1_insert_taggeddata (data1_handle dh, data1_node *root,                                     data1_node *at, const char *tagname,                                     NMEM m){    return data1_add_insert_taggeddata (dh, at, tagname, m, 0, 1);}data1_node *data1_add_taggeddata (data1_handle dh, data1_node *root,                                  data1_node *at, const char *tagname,                                  NMEM m){    return data1_add_insert_taggeddata (dh, at, tagname, m, 1, 0);}data1_node *data1_mk_tag_data_int (data1_handle dh, data1_node *at,                                   const char *tag, int num,                                   NMEM nmem){    data1_node *node_data;        node_data = data1_mk_tag_data (dh, at, tag, nmem);    if (!node_data)	return 0;    node_data->u.data.what = DATA1I_num;    node_data->u.data.data = node_data->lbuf;    sprintf (node_data->u.data.data, "%d", num);    node_data->u.data.len = strlen (node_data->u.data.data);    return node_data;}data1_node *data1_mk_tag_data_oid (data1_handle dh, data1_node *at,                                   const char *tag, Odr_oid *oid,                                   NMEM nmem){    data1_node *node_data;    char str[128], *p = str;    Odr_oid *ii;        node_data = data1_mk_tag_data (dh, at, tag, nmem);    if (!node_data)	return 0;        for (ii = oid; *ii >= 0; ii++)    {	if (ii != oid)	    *p++ = '.';	sprintf (p, "%d", *ii);	p += strlen (p);    }    node_data->u.data.what = DATA1I_oid;    node_data->u.data.len = strlen (str);    node_data->u.data.data = data1_insert_string (dh, node_data, nmem, str);    return node_data;}data1_node *data1_mk_tag_data_text (data1_handle dh, data1_node *at,                                    const char *tag, const char *str,                                    NMEM nmem){    data1_node *node_data;        node_data = data1_mk_tag_data (dh, at, tag, nmem);    if (!node_data)	return 0;    node_data->u.data.what = DATA1I_text;    node_data->u.data.len = strlen (str);    node_data->u.data.data = data1_insert_string (dh, node_data, nmem, str);    return node_data;}data1_node *data1_mk_tag_data_text_uni (data1_handle dh, data1_node *at,                                        const char *tag, const char *str,                                        NMEM nmem){    data1_node *node = data1_search_tag (dh, at->child, tag);    if (!node)        return data1_mk_tag_data_text (dh, at, tag, str, nmem);    else    {	data1_node *node_data = node->child;	node_data->u.data.what = DATA1I_text;	node_data->u.data.len = strlen (str);	node_data->u.data.data = data1_insert_string (dh, node_data,						      nmem, str);        node_data->child = node_data->last_child = 0;	return node_data;    }}static int ampr (int (*get_byte)(void *fh), void *fh, int *amp){#if 1    int c = (*get_byte)(fh);    *amp = 0;    return c;#else    int c = (*get_byte)(fh);    *amp = 0;    if (c == '&')    {        char ent[20];        int i = 0;               while (1)        {            c = (*get_byte)(fh);            if (c == ';')            {                ent[i] = 0;                                c = ' ';                if (!strcmp (ent, "quot"))                    c = '"';                if (!strcmp (ent, "apos"))                    c = '\'';                if (!strcmp (ent, "gt"))                    c = '>';                if (!strcmp (ent, "lt"))                    c = '<';                if (!strcmp (ent, "amp"))                    c = '&';                *amp = 1;                break;            }            else if (c == 0 || d1_isspace(c))                break;            if (i < 19)                ent[i++] = c;        }    }    return c;#endif}data1_xattr *data1_read_xattr (data1_handle dh, NMEM m,			       int (*get_byte)(void *fh), void *fh,			       WRBUF wrbuf, int *ch, int *amp){    data1_xattr *p_first = 0;    data1_xattr **pp = &p_first;    int c = *ch;    for (;;)    {	data1_xattr *p;	int len;	while (*amp || (c && d1_isspace(c)))	    c = ampr (get_byte, fh, amp);	if (*amp == 0 && (c == 0 || c == '>' || c == '/'))	    break;	*pp = p = (data1_xattr *) nmem_malloc (m, sizeof(*p));	p->next = 0;	pp = &p->next;	p->value = 0;        p->what = DATA1I_xmltext;		wrbuf_rewind(wrbuf);	while (c && c != '=' && c != '>' && c != '/' && !d1_isspace(c))	{	    wrbuf_putc (wrbuf, c);	    c = ampr (get_byte, fh, amp);	}	wrbuf_putc (wrbuf, '\0');	len = wrbuf_len(wrbuf);	p->name = (char*) nmem_malloc (m, len);	strcpy (p->name, wrbuf_buf(wrbuf));	if (c == '=')	{	    c = ampr (get_byte, fh, amp);	    if (*amp == 0 && c == '"')	    {		c = ampr (get_byte, fh, amp);			wrbuf_rewind(wrbuf);		while (*amp || (c && c != '"'))		{		    wrbuf_putc (wrbuf, c);		    c = ampr (get_byte, fh, amp);	        }	        if (c)		    c = ampr (get_byte, fh, amp);	    }	    else if (*amp == 0 && c == '\'')	    {		c = ampr (get_byte, fh, amp);			wrbuf_rewind(wrbuf);		while (*amp || (c && c != '\''))		{		    wrbuf_putc (wrbuf, c);		    c = ampr (get_byte, fh, amp);	        }	        if (c)		    c = ampr (get_byte, fh, amp);	    }	    else	    {	        wrbuf_rewind(wrbuf);	        while (*amp || (c && c != '>' && c != '/'))	        {		    wrbuf_putc (wrbuf, c);		    c = ampr (get_byte, fh, amp);	        }            }	    wrbuf_putc (wrbuf, '\0');	    len = wrbuf_len(wrbuf);	    p->value = (char*) nmem_malloc (m, len);	    strcpy (p->value, wrbuf_buf(wrbuf));	}    }    *ch = c;    return p_first;}/* * Ugh. Sometimes functions just grow and grow on you. This one reads a * 'node' and its children. */data1_node *data1_read_nodex (data1_handle dh, NMEM m,			      int (*get_byte)(void *fh), void *fh, WRBUF wrbuf){    data1_node *d1_stack[256];    data1_node *res;    int c, amp;    int level = 0;    int line = 1;    d1_stack[level] = 0;    c = ampr (get_byte, fh, &amp);    while (c != '\0')    {	data1_node *parent = level ? d1_stack[level-1] : 0;	if (amp == 0 && c == '<') /* beginning of tag */	{	    data1_xattr *xattr;	    char tag[64];	    char args[256];	    int null_tag = 0;	    int end_tag = 0;	    size_t i = 0;	    c = ampr (get_byte, fh, &amp);	    if (amp == 0 && c == '/')	    {		end_tag = 1;		c = ampr (get_byte, fh, &amp);	    }	    else if (amp == 0 && c == '!')	    {                int c0, amp0;                              wrbuf_rewind(wrbuf);                                c0 = ampr (get_byte, fh, &amp0);                if (amp0 == 0 && c0 == '\0')                    break;                c = ampr (get_byte, fh, &amp);                                if (amp0 == 0 && c0 == '-' && amp == 0 && c == '-')                {                    /* COMMENT: <!-- ... --> */                    int no_dash = 0;                                        c = ampr (get_byte, fh, &amp);                    while (amp || c)                    {                        if (amp == 0 && c == '-')                            no_dash++;                        else if (amp == 0 && c == '>' && no_dash >= 2)                        {                            if (level > 0)                                d1_stack[level] =                                     data1_mk_comment_n (                                        dh, m,                                        wrbuf_buf(wrbuf), wrbuf_len(wrbuf)-2,                                        d1_stack[level-1]);                            c = ampr (get_byte, fh, &amp); /* skip > */                            break;                        }                        else                            no_dash = 0;                        wrbuf_putc (wrbuf, c);                        c = ampr (get_byte, fh, &amp);                    }                    continue;                }                else                {   /* DIRECTIVE: <! .. > */			    int blevel = 0;                    while (amp || c)                    {                        if (amp == 0 && c == '>' && blevel == 0)                        {                            c = ampr (get_byte, fh, &amp);			    break;                        }			if (amp == 0 && c == '[')			    blevel++;			if (amp == 0 && c == ']' && blevel > 0)			    blevel--;                        c = ampr (get_byte, fh, &amp);                    }                    continue;                }	    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -