⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 templateex.cpp

📁 概述:数据的纵向收集
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include "stdafx.h"
#include "TemplateEx.h"
#include "Template.h"
#include "StoreMgr.h"
#include "BloomFilter.h"
#include "PageStat.h"
#include "Priorizer.h"
#include "Crawler.h"
#include "Wininet.h"
#include <fstream>

//////////////////////////////////////////////////////////////////////////
//lua调试
//2007.4.17,sunwang
static int lua_message(const char* msg)
{
	DebugString("[PageCrawler]","lua_report:(%s)",msg);
	LOGE("lua_report:(%s)",msg);
	return 1;
}
static int lua_report (lua_State *L, int status) {
	if (status && !lua_isnil(L, -1)) {
		const char *msg = lua_tostring(L, -1);
		if (msg == NULL) msg = "(error object is not a string)";
		lua_message(msg);
		lua_pop(L, 1);
	}
	return status;
}

//js支持
//<--express
//-->string

static void
Js_ErrorReporter(JSContext *cx, const char *message, JSErrorReport *report)
{
	DebugString("[PageCrawler]","js_report:(%s)",message);
	LOGE("js_report:(%s)",message);
}

static JSBool
Js_Load(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
{
	static JSBool compileOnly = JS_FALSE;

	uintN i;
	JSString *str;
	const char *filename;
	JSScript *script;
	JSBool ok;
	jsval result;
	JSErrorReporter older;
	uint32 oldopts;

	for (i = 0; i < argc; i++) {
		str = JS_ValueToString(cx, argv[i]);
		if (!str)
			return JS_FALSE;
		argv[i] = STRING_TO_JSVAL(str);
		filename = JS_GetStringBytes(str);
		errno = 0;
		older = JS_SetErrorReporter(cx, Js_ErrorReporter);
		oldopts = JS_GetOptions(cx);
		JS_SetOptions(cx, oldopts | JSOPTION_COMPILE_N_GO);
		script = JS_CompileFile(cx, obj, filename);
		if (!script) {
			ok = JS_FALSE;
		} else {
			ok = !compileOnly
				? JS_ExecuteScript(cx, obj, script, &result)
				: JS_TRUE;
			JS_DestroyScript(cx, script);
		}
		JS_SetOptions(cx, oldopts);
		JS_SetErrorReporter(cx, older);
		if (!ok)
			return JS_FALSE;
	}

	return JS_TRUE;
}

static int JsCall(lua_State* L)
{
	
	CIOLocker locker(&s_csJsCall);

	static JSFunctionSpec shell_functions[] = {
		{"load",            Js_Load,           1},
		{0}
	};

	const char* lpszJS = lua_tostring(L,-1);
	if(lpszJS==0) 
	{
		Js_ErrorReporter(NULL,"invalid parameter arg1=null",0);
		return 1;
	};

	JSRuntime *rt; 
	JSContext *cx; 
	JSObject *global; 
	JSClass global_class = { 
		"global",0, 
			JS_PropertyStub,JS_PropertyStub,JS_PropertyStub,JS_PropertyStub, 
			JS_EnumerateStub,JS_ResolveStub,JS_ConvertStub,JS_FinalizeStub 
	}; 
	/* 
	* You always need: 
	*        a runtime per process, 
	*        a context per thread, 
	*        a global object per context, 
	*        standard classes (e.g. Date). 
	*/ 
	rt = JS_NewRuntime(0x100000); 
	cx = JS_NewContext(rt, 0x10000); 
	JS_SetErrorReporter(cx, Js_ErrorReporter);
	global = JS_NewObject(cx, &global_class, NULL, NULL); 
	JS_InitStandardClasses(cx, global); 

	if (!JS_DefineFunctions(cx, global, shell_functions))
	{
		Js_ErrorReporter(NULL,"JS_DefineFunctions error",0);
		JS_DestroyContext(cx);
		JS_DestroyRuntime(rt);
		JS_ShutDown();
		return 1;
	}

	/* 
	* Now suppose script contains some JS to evaluate, say "22/7" as a 
	* bad approximation for Math.PI, or something longer, such as this: 
	* "(function fact(n){if (n <= 1) return 1; return n * fact(n-1)})(5)" 
	* to compute 5! 
	*/ 
	jsval rval; 
	JSString *str; 
	JSBool ok; 

	ok = JS_EvaluateScript(cx, global, lpszJS, strlen(lpszJS), 
		__FILE__, __LINE__, &rval); 
	str = JS_ValueToString(cx, rval);

	//return the result
	lua_pushstring (L,JS_GetStringBytes(str));

	/* For each context you've created: */
	JS_DestroyContext(cx);

	/* For each runtime: */
	JS_DestroyRuntime(rt);

	/* And finally: */
	JS_ShutDown();

	return 1;
}

//////////////////////////////////////////////////////////////////////////
//存储调试
static int debug_template_file = 0;
static const char* getoutputpath(void)
{
    static char app[MAX_PATH] = {0};
    if (*app == 0)
    {
        GetModuleFileName(NULL, app, MAX_PATH);
        *strrchr(app, '\\') = 0;
        strcat(app, "\\crawler_output.txt");
    }

    return app;
}

static void OutputDebugInfo(const char* module, 
                            const char* value1, 
                            const char* value2, 
                            const char* value3, 
                            const char* value4, 
                            const char* value5)
{
    FILE* pfile = fopen(getoutputpath(), "a");
    if (pfile)
    {
        char buffer[0x10000]; //64k buffer

        sprintf(buffer, 
            "\t<item type=\"%s\">\r\n"
            "\t\t<value1><![CDATA[%s]]></value1>\r\n"
            "\t\t<value2><![CDATA[%s]]></value2>\r\n"
            "\t\t<value3><![CDATA[%s]]></value3>\r\n"
            "\t\t<value4><![CDATA[%s]]></value4>\r\n"
            "\t\t<value5><![CDATA[%s]]></value5>\r\n"
            "\t</item>\r\n"
            , module, value1, value2, value3, value4, value5);

        fwrite(buffer, 1, strlen(buffer), pfile);

        fclose(pfile);
    }
}

//////////////////////////////////////////////////////////////////////////
//<-
//d : value
//->
//null
static int SetLog(lua_State *L)
{
	int nValue = (int)lua_tointeger(L,-1);

	CTemplate::Instance()->SetLog(nValue);
	return 0;
}

//<-
//d : value
//->
//null
static int SetSid(lua_State *L)
{
	int nValue = (int)lua_tointeger(L,-1);
	
	CTemplate::Instance()->SetSid(nValue);
	return 0;
}

//<-
//d : value
//->
//null
static int SetUtf8(lua_State *L)
{
	int nValue = (int)lua_tointeger(L,-1);

	CTemplate::Instance()->SetUtf8(nValue);
	return 0;
}

//<-
//d : value
//->
//null
static int SetThread(lua_State *L)
{
	int nValue = (int)lua_tointeger(L,-1);

	CTemplate::Instance()->SetThread(nValue);
	return 0;
}

//<-
//d : value
//->
//null
static int SetCTimeOut(lua_State *L)
{
	int nValue = (int)lua_tointeger(L,-1);

	CTemplate::Instance()->SetCTimeOut(nValue);
	return 0;
}

//<-
//d : value
//->
//null
static int SetConnect(lua_State *L)
{
	int nValue = (int)lua_tointeger(L,-1);

	CTemplate::Instance()->SetConnect(nValue);
	return 0;
}

//<-
//s : useragent
//->
//null
static int SetUserAgent(lua_State *L)
{
	const char* strValue = lua_tostring(L,-1);

	CTemplate::Instance()->SetUserAgent(strValue);
	return 0;
}

//<-
//s : seedpage
//->
//null
static int SetSeedPage(lua_State *L)
{
	const char* strValue = lua_tostring(L,-1);

	CTemplate::Instance()->SetSeedPage(strValue);
	return 0;
}

//<-
//s : seedpage
//d : start
//d : end
//->
//null
static int SetSeedPattern(lua_State *L)
{
	const char* strValue = lua_tostring(L,-3);
	int nValue1 = (int)lua_tointeger(L,-2);
	int nValue2 = (int)lua_tointeger(L,-1);

	CTemplate::Instance()->SetSeedPattern(strValue,nValue1,nValue2);
	return 0;
}

//<-
//s : connenttype
//->
//null
static int SetConnentType(lua_State *L)
{
	const char* strValue = lua_tostring(L,-1);

	CTemplate::Instance()->SetConnentType(strValue);
	return 0;
}

//<-
//s : filter
//->
//null
static int SetFilterSkip(lua_State *L)
{
	const char* strValue = lua_tostring(L,-1);

	CTemplate::Instance()->SetFilter(strValue,FT_SKIP);
	return 0;
}
//<-
//s : filter
//->
//null
static int SetFilterParse(lua_State *L)
{
	const char* strValue = lua_tostring(L,-1);

	CTemplate::Instance()->SetFilter(strValue,FT_PARSE);
	return 0;
}
//<-
//s : filter
//->
//null
static int SetFilterStore(lua_State *L)
{
	const char* strValue = lua_tostring(L,-1);

	CTemplate::Instance()->SetFilter(strValue,FT_STORE);
	return 0;
}

//<-
//s : url
//->
//null
static int AddTask(lua_State *L)
{
	const char* strValue = lua_tostring(L,-1);

	if(!CBloomFilter::Instance()->find(strValue))
	{
		CPageStat objTask;
		objTask.m_strUrl=strValue;
		if(!CPageStat::Get(objTask))
		{
			CPageStat::Build(objTask,strValue,NULL,true,
				CTemplate::Instance()->GetTaskTypeX(strValue,f_strlen(strValue)));
		}
		CPriorizer::Push(objTask);
	}
	return 0;
}

//<-
//s : url
//s : refer 
//->
//null
static int AddTaskEx(lua_State *L)
{
	const char* strValue1 = lua_tostring(L,-2);
	const char* strValue2 = lua_tostring(L,-1);

	if(!CBloomFilter::Instance()->find(strValue1))
	{
		CPageStat objTask;
		objTask.m_strUrl=strValue1;
		if(!CPageStat::Get(objTask))
		{
			CPageStat::Build(objTask,strValue1,strValue2,true,
				CTemplate::Instance()->GetTaskTypeX(strValue1,f_strlen(strValue1)));
		}
		CPriorizer::Push(objTask);
	}
	return 0;
}

//<-
//s : pattern
//s : foo
//->
//null
static int RegisterStoreCallback(lua_State *L)
{
	const char* strValue1 = lua_tostring(L,-2);
	const char* strValue2 = lua_tostring(L,-1);

	CTemplateEx::SetStoreCallback(strValue1,strValue2);
	return 0;
}

//<-
//s : task pattern
//s : content pattern
//s : foo
//->
//null
static int RegisterStoreCallbackEx(lua_State *L)
{
	const char* strValue1 = lua_tostring(L,-3); //task pattern
	const char* strValue2 = lua_tostring(L,-2); //content pattern
	const char* strValue3 = lua_tostring(L,-1); //foo name

	CTemplateEx::SetStoreCallbackEx(strValue1,strValue2,strValue3);
	return 0;
}

//<-
//s : task pattern
//s : content pattern
//s : foo
//->
//null
static int RegisterStoreCallbackEx2(lua_State *L)
{
	const char* strValue1 = lua_tostring(L,-3); //task pattern
	const char* strValue2 = lua_tostring(L,-2); //content pattern
	const char* strValue3 = lua_tostring(L,-1); //foo name

	CTemplateEx::SetStoreCallbackEx2(strValue1,strValue2,strValue3);
	return 0;
}

//<-
//s : refer
//s : url
//s : content
//->
//null
static int AddTaskStoreAlbum(lua_State *L)
{
	const char* strValue1 = lua_tostring(L,-3);
	const char* strValue2 = lua_tostring(L,-2);
	const char* strValue3 = lua_tostring(L,-1);

    if (debug_template_file == 0)
    {
        CStoreMgr::AddTaskStoreAlbum(strValue1,strValue2,strValue3);
    }
    else
    {
        OutputDebugInfo("AddTaskStoreAlbum", strValue1,strValue2,strValue3, "", "");
    }

	return 0;
}

//<-
//s : refer
//s : url
//s : title
//s : singer
//->
//null
static int AddTaskStoreSong(lua_State *L)
{
	const char* strValue1 = lua_tostring(L,-4);
	const char* strValue2 = lua_tostring(L,-3);
	const char* strValue3 = lua_tostring(L,-2);
	const char* strValue4 = lua_tostring(L,-1);

    if (debug_template_file == 0)
    {
        CStoreMgr::AddTaskStoreSong(strValue1,strValue2,strValue3,strValue4);
    }
    else
    {
        OutputDebugInfo("AddTaskStoreSong", strValue1,strValue2,strValue3, strValue4, "");
    }

	return 0;
}

//<-
//s : refer
//s : url
//s : lyric
//->
//null
static int AddTaskStoreLyric(lua_State *L)
{
	const char* strValue1 = lua_tostring(L,-3);
	const char* strValue2 = lua_tostring(L,-2);
	const char* strValue3 = lua_tostring(L,-1);

    if (debug_template_file == 0)
    {
        CStoreMgr::AddTaskStoreLyric(strValue1,strValue2,strValue3);
    }
    else
    {
        OutputDebugInfo("AddTaskStoreLyric", strValue1,strValue2,strValue3, "", "");
    }

	return 0;
}

//<-
//s : furl-refer
//s : furl
//s : refer
//->
//null
static int AddTaskStoreLink(lua_State *L)
{
	const char* strValue1 = lua_tostring(L,-3);
	const char* strValue2 = lua_tostring(L,-2);
	const char* strValue3 = lua_tostring(L,-1);

    if (debug_template_file == 0)
    {
        CStoreMgr::AddTaskStoreLink(strValue1,strValue2,strValue3);
    }
    else
    {
        OutputDebugInfo("AddTaskStoreLink", strValue1,strValue2,strValue3, "", "");
    }

	return 0;
}

//<-
//s : refer
//s : url
//s : title
//s : content
//s : subtype
//->
//null
static int AddTaskStoreInfo(lua_State *L)
{
	const char* strValue1 = lua_tostring(L,-5);
	const char* strValue2 = lua_tostring(L,-4);
	const char* strValue3 = lua_tostring(L,-3);
	const char* strValue4 = lua_tostring(L,-2);
	const char* strValue5 = lua_tostring(L,-1);

    if (debug_template_file == 0)
    {
        CStoreMgr::AddTaskStoreInfo(strValue1,strValue2,strValue3,strValue4,strValue5);
    }
    else
    {
        OutputDebugInfo("AddTaskStoreInfo", strValue1,strValue2,strValue3, strValue4, strValue5);
    }

	return 0;
}

//<-
//d : value
//->
//null
static int SetDebugLua(lua_State *L)
{
	int nValue = (int)lua_tointeger(L,-1);
    debug_template_file = nValue;

	return 0;
}

//<--string
//--> null
static int LetMeSee(lua_State *L)
{
	const char* strValue1 = lua_tostring(L,-1);
	if(strValue1==NULL)
	{
		lua_message("invalid parameter arg1=null");
		return 0;
	};

	DebugString("[PageCrawler]","LetMeSee:(%s)",strValue1);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -