⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 seshigh.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
        char *charset = 0;        Z_SOAP *soap_package = 0;        static Z_SOAP_Handler soap_handlers[2] = {            {"http://www.loc.gov/zing/srw/v1.0/", 0,             (Z_SOAP_fun) yaz_srw_codec},            {0, 0, 0}        };#endif                if (*p0 == '/')            p0++;        p1 = strchr(p0, '?');        if (!p1)            p1 = p0 + strlen(p0);        if (p1 != p0)        {            db = odr_malloc(assoc->decode, p1 - p0 + 1);            memcpy (db, p0, p1 - p0);            db[p1 - p0] = '\0';        }#if HAVE_XML2        if (p1 && *p1 == '?' && p1[1])        {            Z_SRW_PDU *res = yaz_srw_get(o, Z_SRW_searchRetrieve_response);            Z_SRW_PDU *sr = yaz_srw_get(o, Z_SRW_searchRetrieve_request);            char *query = uri_val(p1, "query", o);            char *pQuery = uri_val(p1, "pQuery", o);            char *sortKeys = uri_val(p1, "sortKeys", o);                        if (query)            {                sr->u.request->query_type = Z_SRW_query_type_cql;                sr->u.request->query.cql = query;            }            if (pQuery)            {                sr->u.request->query_type = Z_SRW_query_type_pqf;                sr->u.request->query.pqf = pQuery;            }            if (sortKeys)            {                sr->u.request->sort_type = Z_SRW_sort_type_sort;                sr->u.request->sort.sortKeys = sortKeys;            }            sr->u.request->recordSchema = uri_val(p1, "recordSchema", o);            sr->u.request->recordPacking = uri_val(p1, "recordPacking", o);            if (!sr->u.request->recordPacking)                sr->u.request->recordPacking = "xml";            uri_val_int(p1, "maximumRecords", o,                         &sr->u.request->maximumRecords);            uri_val_int(p1, "startRecord", o,                        &sr->u.request->startRecord);            if (sr->u.request->startRecord)                yaz_log(LOG_LOG, "startRecord=%d", *sr->u.request->startRecord);            sr->u.request->database = db;            srw_bend_search(assoc, req, sr->u.request, res->u.response);                        soap_package = odr_malloc(o, sizeof(*soap_package));            soap_package->which = Z_SOAP_generic;            soap_package->u.generic =                odr_malloc(o, sizeof(*soap_package->u.generic));            soap_package->u.generic->p = res;            soap_package->u.generic->ns = soap_handlers[0].ns;            soap_package->u.generic->no = 0;                        soap_package->ns = "SRU";            p = z_get_HTTP_Response(o, 200);            hres = p->u.HTTP_Response;            ret = z_soap_codec_enc(assoc->encode, &soap_package,                                   &hres->content_buf, &hres->content_len,                                   soap_handlers, charset);            if (!charset)                z_HTTP_header_add(o, &hres->headers, "Content-Type", "text/xml");            else            {                char ctype[60];                strcpy(ctype, "text/xml; charset=");                strcat(ctype, charset);                z_HTTP_header_add(o, &hres->headers, "Content-Type", ctype);            }        }        else        {            Z_SRW_PDU *res = yaz_srw_get(o, Z_SRW_explain_response);            Z_SRW_PDU *sr = yaz_srw_get(o, Z_SRW_explain_request);            srw_bend_explain(assoc, req, sr->u.explain_request,                            res->u.explain_response);            if (res->u.explain_response->explainData_buf)            {                soap_package = odr_malloc(o, sizeof(*soap_package));                soap_package->which = Z_SOAP_generic;                                soap_package->u.generic =                    odr_malloc(o, sizeof(*soap_package->u.generic));                                soap_package->u.generic->p = res;                soap_package->u.generic->ns = soap_handlers[0].ns;                soap_package->u.generic->no = 0;                                soap_package->ns = "SRU";                                p = z_get_HTTP_Response(o, 200);                hres = p->u.HTTP_Response;                                ret = z_soap_codec_enc(assoc->encode, &soap_package,                                       &hres->content_buf, &hres->content_len,                                       soap_handlers, charset);                if (!charset)                    z_HTTP_header_add(o, &hres->headers, "Content-Type", "text/xml");                else                {                    char ctype[60];                    strcpy(ctype, "text/xml; charset=");                    strcat(ctype, charset);                    z_HTTP_header_add(o, &hres->headers, "Content-Type",                                      ctype);                }            }        }#endif#ifdef DOCDIR	if (strlen(hreq->path) >= 5 && strlen(hreq->path) < 80 &&			 !memcmp(hreq->path, "/doc/", 5))        {	    FILE *f;            char fpath[120];	    strcpy(fpath, DOCDIR);	    strcat(fpath, hreq->path+4);	    f = fopen(fpath, "rb");	    if (f) {                struct stat sbuf;                if (fstat(fileno(f), &sbuf) || !S_ISREG(sbuf.st_mode))                {                    fclose(f);                    f = 0;                }            }            if (f)            {		long sz;		fseek(f, 0L, SEEK_END);		sz = ftell(f);		if (sz >= 0 && sz < 500000)		{		    const char *ctype = "application/octet-stream";		    const char *cp;                    p = z_get_HTTP_Response(o, 200);                    hres = p->u.HTTP_Response;		    hres->content_buf = (char *) odr_malloc(o, sz + 1);		    hres->content_len = sz;		    fseek(f, 0L, SEEK_SET);		    fread(hres->content_buf, 1, sz, f);		    if ((cp = strrchr(fpath, '.'))) {			cp++;			if (!strcmp(cp, "png"))			    ctype = "image/png";			else if (!strcmp(cp, "gif"))			    ctype = "image/gif";			else if (!strcmp(cp, "xml"))			    ctype = "text/xml";			else if (!strcmp(cp, "html"))			    ctype = "text/html";		    }                    z_HTTP_header_add(o, &hres->headers, "Content-Type", ctype);		}		fclose(f);	    }	}#endif#if 0	if (!strcmp(hreq->path, "/"))         {#ifdef DOCDIR            struct stat sbuf;#endif            const char *doclink = "";            p = z_get_HTTP_Response(o, 200);            hres = p->u.HTTP_Response;            hres->content_buf = (char *) odr_malloc(o, 400);#ifdef DOCDIR            if (stat(DOCDIR "/yaz.html", &sbuf) == 0 && S_ISREG(sbuf.st_mode))                doclink = "<P><A HREF=\"/doc/yaz.html\">Documentation</A></P>";#endif            sprintf (hres->content_buf,                      "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n"                     "<HTML>\n"                     " <HEAD>\n"                     "  <TITLE>YAZ " YAZ_VERSION "</TITLE>\n"                     " </HEAD>\n"                     " <BODY>\n"                     "  <P><A HREF=\"http://www.indexdata.dk/yaz/\">YAZ</A> "                      YAZ_VERSION "</P>\n"                     "%s"                     " </BODY>\n"                     "</HTML>\n", doclink);            hres->content_len = strlen(hres->content_buf);            z_HTTP_header_add(o, &hres->headers, "Content-Type", "text/html");        }#endif        if (!p)        {            p = z_get_HTTP_Response(o, 404);        }    }    else if (!strcmp(hreq->method, "POST"))    {        const char *content_type = z_HTTP_header_lookup(hreq->headers,                                                        "Content-Type");        if (content_type && !yaz_strcmp_del("text/xml", content_type, "; "))        {            Z_SOAP *soap_package = 0;            int ret = -1;            int http_code = 500;            const char *charset_p = 0;            char *charset = 0;            static Z_SOAP_Handler soap_handlers[2] = {#if HAVE_XML2                {"http://www.loc.gov/zing/srw/v1.0/", 0,                 (Z_SOAP_fun) yaz_srw_codec},#endif                {0, 0, 0}            };            if ((charset_p = strstr(content_type, "; charset=")))            {                int i = 0;                charset_p += 10;                while (i < 20 && charset_p[i] &&                       !strchr("; \n\r", charset_p[i]))                    i++;                charset = odr_malloc(assoc->encode, i+1);                memcpy(charset, charset_p, i);                charset[i] = '\0';                yaz_log(LOG_LOG, "SOAP encoding %s", charset);            }            ret = z_soap_codec(assoc->decode, &soap_package,                                &hreq->content_buf, &hreq->content_len,                               soap_handlers);#if HAVE_XML2            if (!ret && soap_package->which == Z_SOAP_generic &&                soap_package->u.generic->no == 0)            {                /* SRW package */                Z_SRW_PDU *sr = soap_package->u.generic->p;                                if (sr->which == Z_SRW_searchRetrieve_request)                {                    Z_SRW_PDU *res =                        yaz_srw_get(assoc->encode,                                    Z_SRW_searchRetrieve_response);                    if (!sr->u.request->database)                    {                        const char *p0 = hreq->path, *p1;                        if (*p0 == '/')                            p0++;                        p1 = strchr(p0, '?');                        if (!p1)                            p1 = p0 + strlen(p0);                        if (p1 != p0)                        {                            sr->u.request->database =                                odr_malloc(assoc->decode, p1 - p0 + 1);                            memcpy (sr->u.request->database, p0, p1 - p0);                            sr->u.request->database[p1 - p0] = '\0';                        }                        else                            sr->u.request->database = "Default";                    }                    srw_bend_search(assoc, req, sr->u.request,                                    res->u.response);                                        soap_package->u.generic->p = res;                    http_code = 200;                }                else if (sr->which == Z_SRW_explain_request)                {                    Z_SRW_PDU *res =                        yaz_srw_get(assoc->encode, Z_SRW_explain_response);                    srw_bend_explain(assoc, req, sr->u.explain_request,                                     res->u.explain_response);                    if (!res->u.explain_response->explainData_buf)                    {                        z_soap_error(assoc->encode, soap_package,                                     "SOAP-ENV:Client", "Explain Not Supported", 0);                    }                    else                    {                        soap_package->u.generic->p = res;                        http_code = 200;                    }                }                else                {                    z_soap_error(assoc->encode, soap_package,                                 "SOAP-ENV:Client", "Bad method", 0);                 }            }#endif            p = z_get_HTTP_Response(o, 200);            hres = p->u.HTTP_Response;            ret = z_soap_codec_enc(assoc->encode, &soap_package,                                   &hres->content_buf, &hres->content_len,                                   soap_handlers, charset);            hres->code = http_code;            if (!charset)                z_HTTP_header_add(o, &hres->headers, "Content-Type", "text/xml");            else            {                char ctype[60];                strcpy(ctype, "text/xml; charset=");                strcat(ctype, charset);                z_HTTP_header_add(o, &hres->headers, "Content-Type", ctype);            }        }        if (!p) /* still no response ? */            p = z_get_HTTP_Response(o, 500);    }    else    {        p = z_get_HTTP_Response(o, 405);        hres = p->u.HTTP_Response;        z_HTTP_header_add(o, &hres->headers, "Allow", "GET, POST");    }    hres = p->u.HTTP_Response;    if (!strcmp(hreq->version, "1.0"))     {        const char *v = z_HTTP_header_lookup(hreq->headers, "Connection");        if (v && !strcmp(v, "Keep-Alive"))            keepalive = 1;        else            keepalive = 0;        hres->version = "1.0";    }    else    {        const char *v = z_HTTP_header_lookup(hreq->headers, "Connection");        if (v && !strcmp(v, "close"))            keepalive = 0;        else            keepalive = 1;        hres->version = "1.1";    }    if (!keepalive)    {        z_HTTP_header_add(o, &hres->headers, "Connection", "close");        assoc->state = ASSOC_DEAD;    }    else    {        int t;        const char *alive = z_HTTP_header_lookup(hreq->headers, "Keep-Alive");        if (alive && isdigit(*alive))            t = atoi(alive);        else            t = 15;        if (t < 0 || t > 3600)            t = 3600;        iochan_settimeout(assoc->client_chan,t);        z_HTTP_header_add(o, &hres->headers, "Connection", "Keep-Alive");    }    process_gdu_response(assoc, req, p);}static void process_gdu_request(association *assoc, request *req){    if (req->gdu_request->which == Z_GDU_Z3950)    {        char *msg = 0;        req->apdu_request = req->gdu_request->u.z3950;        if (process_z_request(assoc, req, &msg) < 0)            do_close_req(assoc, Z_Close_systemProblem, msg, req);    }    else if (req->gdu_request->which == Z_GDU_HTTP_Request)        process_http_request(assoc, req);    else    {        do_close_req(assoc, Z_Close_systemProblem, "bad protocol packet", req);    }}/* * Initiate request processing. */static int process_z_request(association *assoc, request *req, char **msg){    int fd = -1;    Z_APDU *res;    int retval;        *msg = "Unknown Error";    assert(req && req->state == REQUEST_IDLE);    if (req->apdu_request->which != Z_APDU_initRequest && !assoc->init)    {	*msg = "Missing InitRequest";	return -1;    }    switch (req->apdu_request->which)    {    case Z_APDU_initRequest:

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -