⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regxread.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 4 页
字号:
            }            else                t++;    }    return 0;}static int execTok (struct lexSpec *spec, const char **src,                    const char **tokBuf, int *tokLen){    const char *s = *src;    while (*s == ' ' || *s == '\t')        s++;    if (!*s)        return 0;    if (*s == '$' && s[1] >= '0' && s[1] <= '9')    {        int n = 0;        s++;        while (*s >= '0' && *s <= '9')            n = n*10 + (*s++ -'0');        if (spec->arg_no == 0)        {            *tokBuf = "";            *tokLen = 0;        }        else        {            if (n >= spec->arg_no)                n = spec->arg_no-1;            *tokBuf = f_win_get (spec, spec->arg_start[n], spec->arg_end[n],				 tokLen);        }    }    else if (*s == '\"')    {        *tokBuf = ++s;        while (*s && *s != '\"')            s++;        *tokLen = s - *tokBuf;        if (*s)            s++;        *src = s;    }    else if (*s == '\n' || *s == ';')    {        *src = s+1;        return 1;    }    else if (*s == '-')    {        *tokBuf = s++;        while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&               *s != ';')            s++;        *tokLen = s - *tokBuf;        *src = s;        return 3;    }    else    {        *tokBuf = s++;        while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&               *s != ';')            s++;        *tokLen = s - *tokBuf;    }    *src = s;    return 2;}static char *regxStrz (const char *src, int len, char *str){    if (len > 63)        len = 63;    memcpy (str, src, len);    str[len] = '\0';    return str;}#if HAVE_TCL_Hstatic int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp,			  int argc, char **argv){    struct lexSpec *spec = (struct lexSpec *) clientData;    if (argc < 2)	return TCL_ERROR;    if (!strcmp(argv[1], "record") && argc == 3)    {	char *absynName = argv[2];        data1_node *res;#if REGX_DEBUG	logf (LOG_LOG, "begin record %s", absynName);#endif        res = data1_mk_root (spec->dh, spec->m, absynName);                spec->d1_stack[spec->d1_level++] = res;        res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);        spec->d1_stack[spec->d1_level++] = res;        spec->d1_stack[spec->d1_level] = NULL;    }    else if (!strcmp(argv[1], "element") && argc == 3)    {	tagBegin (spec, argv[2], strlen(argv[2]));    }    else if (!strcmp (argv[1], "variant") && argc == 5)    {	variantBegin (spec, argv[2], strlen(argv[2]),		      argv[3], strlen(argv[3]),		      argv[4], strlen(argv[4]));    }    else if (!strcmp (argv[1], "context") && argc == 3)    {	struct lexContext *lc = spec->context;#if REGX_DEBUG	logf (LOG_LOG, "begin context %s",argv[2]);#endif	while (lc && strcmp (argv[2], lc->name))	    lc = lc->next;	if (lc)	{	    spec->context_stack[++(spec->context_stack_top)] = lc;	}	else	    logf (LOG_WARN, "unknown context %s", argv[2]);    }    else	return TCL_ERROR;    return TCL_OK;}static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp,			int argc, char **argv){    struct lexSpec *spec = (struct lexSpec *) clientData;    if (argc < 2)	return TCL_ERROR;        if (!strcmp (argv[1], "record"))    {	while (spec->d1_level)	{	    tagDataRelease (spec);	    (spec->d1_level)--;	}#if REGX_DEBUG	logf (LOG_LOG, "end record");#endif	spec->stop_flag = 1;    }    else if (!strcmp (argv[1], "element"))    {	int min_level = 1;	char *element = 0;	if (argc >= 3 && !strcmp(argv[2], "-record"))	{	    min_level = 0;	    if (argc == 4)		element = argv[3];	}	else	    if (argc == 3)		element = argv[2];	tagEnd (spec, min_level, element, (element ? strlen(element) : 0));	if (spec->d1_level == 0)	{#if REGX_DEBUG	    logf (LOG_LOG, "end element end records");#endif	    spec->stop_flag = 1;	}    }    else if (!strcmp (argv[1], "context"))    {#if REGX_DEBUG	logf (LOG_LOG, "end context");#endif	if (spec->context_stack_top)	    (spec->context_stack_top)--;    }    else	return TCL_ERROR;    return TCL_OK;}static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp,			 int argc, char **argv){    int argi = 1;    int textFlag = 0;    const char *element = 0;    struct lexSpec *spec = (struct lexSpec *) clientData;        while (argi < argc)    {	if (!strcmp("-text", argv[argi]))	{	    textFlag = 1;	    argi++;	}	else if (!strcmp("-element", argv[argi]))	{	    argi++;	    if (argi < argc)		element = argv[argi++];	}	else	    break;    }    if (element)	tagBegin (spec, element, strlen(element));    while (argi < argc)    {#if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0)	Tcl_DString ds;	char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds);	execData (spec, native, strlen(native), textFlag);	Tcl_DStringFree (&ds);#else	execData (spec, argv[argi], strlen(argv[argi]), textFlag);#endif	argi++;    }    if (element)	tagEnd (spec, 1, NULL, 0);    return TCL_OK;}static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp,			   int argc, char **argv){    struct lexSpec *spec = (struct lexSpec *) clientData;    int argi = 1;    int offset = 0;    int no;        while (argi < argc)    {	if (!strcmp("-offset", argv[argi]))	{	    argi++;	    if (argi < argc)	    {		offset = atoi(argv[argi]);		argi++;	    }	}	else	    break;    }    if (argi != argc-1)	return TCL_ERROR;    no = atoi(argv[argi]);    if (no >= spec->arg_no)	no = spec->arg_no - 1;    spec->ptr = spec->arg_start[no] + offset;    return TCL_OK;}static void execTcl (struct lexSpec *spec, struct regxCode *code){       int i;    int ret;    for (i = 0; i < spec->arg_no; i++)    {	char var_name[10], *var_buf;	int var_len, ch;		sprintf (var_name, "%d", i);	var_buf = f_win_get (spec, spec->arg_start[i], spec->arg_end[i],			     &var_len);		if (var_buf)	{	    ch = var_buf[var_len];	    var_buf[var_len] = '\0';	    Tcl_SetVar (spec->tcl_interp, var_name, var_buf, 0);	    var_buf[var_len] = ch;	}    }#if HAVE_TCL_OBJECTS    ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj);#else    ret = Tcl_GlobalEval (spec->tcl_interp, code->str);#endif    if (ret != TCL_OK)    {    	const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0);	logf(LOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s", 	    spec->tcl_interp->errorLine,	    spec->tcl_interp->result,	    err ? err : "[NO ERRORINFO]");    }}/* HAVE_TCL_H */#endifstatic void execCode (struct lexSpec *spec, struct regxCode *code){    const char *s = code->str;    int cmd_len, r;    const char *cmd_str;        r = execTok (spec, &s, &cmd_str, &cmd_len);    while (r)    {        char *p, ptmp[64];                if (r == 1)        {            r = execTok (spec, &s, &cmd_str, &cmd_len);            continue;        }        p = regxStrz (cmd_str, cmd_len, ptmp);        if (!strcmp (p, "begin"))        {            r = execTok (spec, &s, &cmd_str, &cmd_len);            if (r < 2)	    {		logf (LOG_WARN, "missing keyword after 'begin'");                continue;	    }            p = regxStrz (cmd_str, cmd_len, ptmp);            if (!strcmp (p, "record"))            {                r = execTok (spec, &s, &cmd_str, &cmd_len);                if (r < 2)                    continue;                if (spec->d1_level == 0)                {                    static char absynName[64];                    data1_node *res;                    if (cmd_len > 63)                        cmd_len = 63;                    memcpy (absynName, cmd_str, cmd_len);                    absynName[cmd_len] = '\0';#if REGX_DEBUG                    logf (LOG_LOG, "begin record %s", absynName);#endif                    res = data1_mk_root (spec->dh, spec->m, absynName);                                        spec->d1_stack[spec->d1_level++] = res;                    res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);                    spec->d1_stack[spec->d1_level++] = res;                    spec->d1_stack[spec->d1_level] = NULL;                }                r = execTok (spec, &s, &cmd_str, &cmd_len);            }            else if (!strcmp (p, "element"))            {                r = execTok (spec, &s, &cmd_str, &cmd_len);                if (r < 2)                    continue;                tagBegin (spec, cmd_str, cmd_len);                r = execTok (spec, &s, &cmd_str, &cmd_len);            } 	    else if (!strcmp (p, "variant"))	    {		int class_len;		const char *class_str = NULL;		int type_len;		const char *type_str = NULL;		int value_len;		const char *value_str = NULL;		r = execTok (spec, &s, &cmd_str, &cmd_len);		if (r < 2)		    continue;		class_str = cmd_str;		class_len = cmd_len;		r = execTok (spec, &s, &cmd_str, &cmd_len);		if (r < 2)		    continue;		type_str = cmd_str;		type_len = cmd_len;		r = execTok (spec, &s, &cmd_str, &cmd_len);		if (r < 2)		    continue;		value_str = cmd_str;		value_len = cmd_len;                variantBegin (spec, class_str, class_len,			      type_str, type_len, value_str, value_len);						r = execTok (spec, &s, &cmd_str, &cmd_len);	    }	    else if (!strcmp (p, "context"))	    {		if (r > 1)		{		    struct lexContext *lc = spec->context;		    r = execTok (spec, &s, &cmd_str, &cmd_len);		    p = regxStrz (cmd_str, cmd_len, ptmp);#if REGX_DEBUG		    logf (LOG_LOG, "begin context %s", p);#endif		    while (lc && strcmp (p, lc->name))			lc = lc->next;		    if (lc)			spec->context_stack[++(spec->context_stack_top)] = lc;		    else			logf (LOG_WARN, "unknown context %s", p);		    		}		r = execTok (spec, &s, &cmd_str, &cmd_len);	    }	    else	    {		logf (LOG_WARN, "bad keyword '%s' after begin", p);	    }        }        else if (!strcmp (p, "end"))        {            r = execTok (spec, &s, &cmd_str, &cmd_len);            if (r < 2)	    {		logf (LOG_WARN, "missing keyword after 'end'");		continue;	    }	    p = regxStrz (cmd_str, cmd_len, ptmp);	    if (!strcmp (p, "record"))	    {		while (spec->d1_level)		{		    tagDataRelease (spec);		    (spec->d1_level)--;		}		r = execTok (spec, &s, &cmd_str, &cmd_len);#if REGX_DEBUG		logf (LOG_LOG, "end record");#endif		spec->stop_flag = 1;	    }	    else if (!strcmp (p, "element"))	    {                int min_level = 1;                while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)                {                    if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len))                        min_level = 0;                }		if (r > 2)		{		    tagEnd (spec, min_level, cmd_str, cmd_len);		    r = execTok (spec, &s, &cmd_str, &cmd_len);		}		else		    tagEnd (spec, min_level, NULL, 0);                if (spec->d1_level == 0)                {#if REGX_DEBUG		    logf (LOG_LOG, "end element end records");#endif		    spec->stop_flag = 1;                }	    }	    else if (!strcmp (p, "context"))	    {#if REGX_DEBUG		logf (LOG_LOG, "end context");#endif		if (spec->context_stack_top)		    (spec->context_stack_top)--;		r = execTok (spec, &s, &cmd_str, &cmd_len);	    }	    

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -