📄 templateex.cpp
字号:
#include "stdafx.h"
#include "TemplateEx.h"
#include "Template.h"
#include "StoreMgr.h"
#include "BloomFilter.h"
#include "PageStat.h"
#include "Priorizer.h"
#include "Crawler.h"
#include "Wininet.h"
#include <fstream>
//////////////////////////////////////////////////////////////////////////
//lua调试
//2007.4.17,sunwang
static int lua_message(const char* msg)
{
DebugString("[PageCrawler]","lua_report:(%s)",msg);
LOGE("lua_report:(%s)",msg);
return 1;
}
static int lua_report (lua_State *L, int status) {
if (status && !lua_isnil(L, -1)) {
const char *msg = lua_tostring(L, -1);
if (msg == NULL) msg = "(error object is not a string)";
lua_message(msg);
lua_pop(L, 1);
}
return status;
}
//js支持
//<--express
//-->string
static void
Js_ErrorReporter(JSContext *cx, const char *message, JSErrorReport *report)
{
DebugString("[PageCrawler]","js_report:(%s)",message);
LOGE("js_report:(%s)",message);
}
static JSBool
Js_Load(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
{
static JSBool compileOnly = JS_FALSE;
uintN i;
JSString *str;
const char *filename;
JSScript *script;
JSBool ok;
jsval result;
JSErrorReporter older;
uint32 oldopts;
for (i = 0; i < argc; i++) {
str = JS_ValueToString(cx, argv[i]);
if (!str)
return JS_FALSE;
argv[i] = STRING_TO_JSVAL(str);
filename = JS_GetStringBytes(str);
errno = 0;
older = JS_SetErrorReporter(cx, Js_ErrorReporter);
oldopts = JS_GetOptions(cx);
JS_SetOptions(cx, oldopts | JSOPTION_COMPILE_N_GO);
script = JS_CompileFile(cx, obj, filename);
if (!script) {
ok = JS_FALSE;
} else {
ok = !compileOnly
? JS_ExecuteScript(cx, obj, script, &result)
: JS_TRUE;
JS_DestroyScript(cx, script);
}
JS_SetOptions(cx, oldopts);
JS_SetErrorReporter(cx, older);
if (!ok)
return JS_FALSE;
}
return JS_TRUE;
}
static int JsCall(lua_State* L)
{
CIOLocker locker(&s_csJsCall);
static JSFunctionSpec shell_functions[] = {
{"load", Js_Load, 1},
{0}
};
const char* lpszJS = lua_tostring(L,-1);
if(lpszJS==0)
{
Js_ErrorReporter(NULL,"invalid parameter arg1=null",0);
return 1;
};
JSRuntime *rt;
JSContext *cx;
JSObject *global;
JSClass global_class = {
"global",0,
JS_PropertyStub,JS_PropertyStub,JS_PropertyStub,JS_PropertyStub,
JS_EnumerateStub,JS_ResolveStub,JS_ConvertStub,JS_FinalizeStub
};
/*
* You always need:
* a runtime per process,
* a context per thread,
* a global object per context,
* standard classes (e.g. Date).
*/
rt = JS_NewRuntime(0x100000);
cx = JS_NewContext(rt, 0x10000);
JS_SetErrorReporter(cx, Js_ErrorReporter);
global = JS_NewObject(cx, &global_class, NULL, NULL);
JS_InitStandardClasses(cx, global);
if (!JS_DefineFunctions(cx, global, shell_functions))
{
Js_ErrorReporter(NULL,"JS_DefineFunctions error",0);
JS_DestroyContext(cx);
JS_DestroyRuntime(rt);
JS_ShutDown();
return 1;
}
/*
* Now suppose script contains some JS to evaluate, say "22/7" as a
* bad approximation for Math.PI, or something longer, such as this:
* "(function fact(n){if (n <= 1) return 1; return n * fact(n-1)})(5)"
* to compute 5!
*/
jsval rval;
JSString *str;
JSBool ok;
ok = JS_EvaluateScript(cx, global, lpszJS, strlen(lpszJS),
__FILE__, __LINE__, &rval);
str = JS_ValueToString(cx, rval);
//return the result
lua_pushstring (L,JS_GetStringBytes(str));
/* For each context you've created: */
JS_DestroyContext(cx);
/* For each runtime: */
JS_DestroyRuntime(rt);
/* And finally: */
JS_ShutDown();
return 1;
}
//////////////////////////////////////////////////////////////////////////
//存储调试
static int debug_template_file = 0;
static const char* getoutputpath(void)
{
static char app[MAX_PATH] = {0};
if (*app == 0)
{
GetModuleFileName(NULL, app, MAX_PATH);
*strrchr(app, '\\') = 0;
strcat(app, "\\crawler_output.txt");
}
return app;
}
static void OutputDebugInfo(const char* module,
const char* value1,
const char* value2,
const char* value3,
const char* value4,
const char* value5)
{
FILE* pfile = fopen(getoutputpath(), "a");
if (pfile)
{
char buffer[0x10000]; //64k buffer
sprintf(buffer,
"\t<item type=\"%s\">\r\n"
"\t\t<value1><![CDATA[%s]]></value1>\r\n"
"\t\t<value2><![CDATA[%s]]></value2>\r\n"
"\t\t<value3><![CDATA[%s]]></value3>\r\n"
"\t\t<value4><![CDATA[%s]]></value4>\r\n"
"\t\t<value5><![CDATA[%s]]></value5>\r\n"
"\t</item>\r\n"
, module, value1, value2, value3, value4, value5);
fwrite(buffer, 1, strlen(buffer), pfile);
fclose(pfile);
}
}
//////////////////////////////////////////////////////////////////////////
//<-
//d : value
//->
//null
static int SetLog(lua_State *L)
{
int nValue = (int)lua_tointeger(L,-1);
CTemplate::Instance()->SetLog(nValue);
return 0;
}
//<-
//d : value
//->
//null
static int SetSid(lua_State *L)
{
int nValue = (int)lua_tointeger(L,-1);
CTemplate::Instance()->SetSid(nValue);
return 0;
}
//<-
//d : value
//->
//null
static int SetUtf8(lua_State *L)
{
int nValue = (int)lua_tointeger(L,-1);
CTemplate::Instance()->SetUtf8(nValue);
return 0;
}
//<-
//d : value
//->
//null
static int SetThread(lua_State *L)
{
int nValue = (int)lua_tointeger(L,-1);
CTemplate::Instance()->SetThread(nValue);
return 0;
}
//<-
//d : value
//->
//null
static int SetCTimeOut(lua_State *L)
{
int nValue = (int)lua_tointeger(L,-1);
CTemplate::Instance()->SetCTimeOut(nValue);
return 0;
}
//<-
//d : value
//->
//null
static int SetConnect(lua_State *L)
{
int nValue = (int)lua_tointeger(L,-1);
CTemplate::Instance()->SetConnect(nValue);
return 0;
}
//<-
//s : useragent
//->
//null
static int SetUserAgent(lua_State *L)
{
const char* strValue = lua_tostring(L,-1);
CTemplate::Instance()->SetUserAgent(strValue);
return 0;
}
//<-
//s : seedpage
//->
//null
static int SetSeedPage(lua_State *L)
{
const char* strValue = lua_tostring(L,-1);
CTemplate::Instance()->SetSeedPage(strValue);
return 0;
}
//<-
//s : seedpage
//d : start
//d : end
//->
//null
static int SetSeedPattern(lua_State *L)
{
const char* strValue = lua_tostring(L,-3);
int nValue1 = (int)lua_tointeger(L,-2);
int nValue2 = (int)lua_tointeger(L,-1);
CTemplate::Instance()->SetSeedPattern(strValue,nValue1,nValue2);
return 0;
}
//<-
//s : connenttype
//->
//null
static int SetConnentType(lua_State *L)
{
const char* strValue = lua_tostring(L,-1);
CTemplate::Instance()->SetConnentType(strValue);
return 0;
}
//<-
//s : filter
//->
//null
static int SetFilterSkip(lua_State *L)
{
const char* strValue = lua_tostring(L,-1);
CTemplate::Instance()->SetFilter(strValue,FT_SKIP);
return 0;
}
//<-
//s : filter
//->
//null
static int SetFilterParse(lua_State *L)
{
const char* strValue = lua_tostring(L,-1);
CTemplate::Instance()->SetFilter(strValue,FT_PARSE);
return 0;
}
//<-
//s : filter
//->
//null
static int SetFilterStore(lua_State *L)
{
const char* strValue = lua_tostring(L,-1);
CTemplate::Instance()->SetFilter(strValue,FT_STORE);
return 0;
}
//<-
//s : url
//->
//null
static int AddTask(lua_State *L)
{
const char* strValue = lua_tostring(L,-1);
if(!CBloomFilter::Instance()->find(strValue))
{
CPageStat objTask;
objTask.m_strUrl=strValue;
if(!CPageStat::Get(objTask))
{
CPageStat::Build(objTask,strValue,NULL,true,
CTemplate::Instance()->GetTaskTypeX(strValue,f_strlen(strValue)));
}
CPriorizer::Push(objTask);
}
return 0;
}
//<-
//s : url
//s : refer
//->
//null
static int AddTaskEx(lua_State *L)
{
const char* strValue1 = lua_tostring(L,-2);
const char* strValue2 = lua_tostring(L,-1);
if(!CBloomFilter::Instance()->find(strValue1))
{
CPageStat objTask;
objTask.m_strUrl=strValue1;
if(!CPageStat::Get(objTask))
{
CPageStat::Build(objTask,strValue1,strValue2,true,
CTemplate::Instance()->GetTaskTypeX(strValue1,f_strlen(strValue1)));
}
CPriorizer::Push(objTask);
}
return 0;
}
//<-
//s : pattern
//s : foo
//->
//null
static int RegisterStoreCallback(lua_State *L)
{
const char* strValue1 = lua_tostring(L,-2);
const char* strValue2 = lua_tostring(L,-1);
CTemplateEx::SetStoreCallback(strValue1,strValue2);
return 0;
}
//<-
//s : task pattern
//s : content pattern
//s : foo
//->
//null
static int RegisterStoreCallbackEx(lua_State *L)
{
const char* strValue1 = lua_tostring(L,-3); //task pattern
const char* strValue2 = lua_tostring(L,-2); //content pattern
const char* strValue3 = lua_tostring(L,-1); //foo name
CTemplateEx::SetStoreCallbackEx(strValue1,strValue2,strValue3);
return 0;
}
//<-
//s : task pattern
//s : content pattern
//s : foo
//->
//null
static int RegisterStoreCallbackEx2(lua_State *L)
{
const char* strValue1 = lua_tostring(L,-3); //task pattern
const char* strValue2 = lua_tostring(L,-2); //content pattern
const char* strValue3 = lua_tostring(L,-1); //foo name
CTemplateEx::SetStoreCallbackEx2(strValue1,strValue2,strValue3);
return 0;
}
//<-
//s : refer
//s : url
//s : content
//->
//null
static int AddTaskStoreAlbum(lua_State *L)
{
const char* strValue1 = lua_tostring(L,-3);
const char* strValue2 = lua_tostring(L,-2);
const char* strValue3 = lua_tostring(L,-1);
if (debug_template_file == 0)
{
CStoreMgr::AddTaskStoreAlbum(strValue1,strValue2,strValue3);
}
else
{
OutputDebugInfo("AddTaskStoreAlbum", strValue1,strValue2,strValue3, "", "");
}
return 0;
}
//<-
//s : refer
//s : url
//s : title
//s : singer
//->
//null
static int AddTaskStoreSong(lua_State *L)
{
const char* strValue1 = lua_tostring(L,-4);
const char* strValue2 = lua_tostring(L,-3);
const char* strValue3 = lua_tostring(L,-2);
const char* strValue4 = lua_tostring(L,-1);
if (debug_template_file == 0)
{
CStoreMgr::AddTaskStoreSong(strValue1,strValue2,strValue3,strValue4);
}
else
{
OutputDebugInfo("AddTaskStoreSong", strValue1,strValue2,strValue3, strValue4, "");
}
return 0;
}
//<-
//s : refer
//s : url
//s : lyric
//->
//null
static int AddTaskStoreLyric(lua_State *L)
{
const char* strValue1 = lua_tostring(L,-3);
const char* strValue2 = lua_tostring(L,-2);
const char* strValue3 = lua_tostring(L,-1);
if (debug_template_file == 0)
{
CStoreMgr::AddTaskStoreLyric(strValue1,strValue2,strValue3);
}
else
{
OutputDebugInfo("AddTaskStoreLyric", strValue1,strValue2,strValue3, "", "");
}
return 0;
}
//<-
//s : furl-refer
//s : furl
//s : refer
//->
//null
static int AddTaskStoreLink(lua_State *L)
{
const char* strValue1 = lua_tostring(L,-3);
const char* strValue2 = lua_tostring(L,-2);
const char* strValue3 = lua_tostring(L,-1);
if (debug_template_file == 0)
{
CStoreMgr::AddTaskStoreLink(strValue1,strValue2,strValue3);
}
else
{
OutputDebugInfo("AddTaskStoreLink", strValue1,strValue2,strValue3, "", "");
}
return 0;
}
//<-
//s : refer
//s : url
//s : title
//s : content
//s : subtype
//->
//null
static int AddTaskStoreInfo(lua_State *L)
{
const char* strValue1 = lua_tostring(L,-5);
const char* strValue2 = lua_tostring(L,-4);
const char* strValue3 = lua_tostring(L,-3);
const char* strValue4 = lua_tostring(L,-2);
const char* strValue5 = lua_tostring(L,-1);
if (debug_template_file == 0)
{
CStoreMgr::AddTaskStoreInfo(strValue1,strValue2,strValue3,strValue4,strValue5);
}
else
{
OutputDebugInfo("AddTaskStoreInfo", strValue1,strValue2,strValue3, strValue4, strValue5);
}
return 0;
}
//<-
//d : value
//->
//null
static int SetDebugLua(lua_State *L)
{
int nValue = (int)lua_tointeger(L,-1);
debug_template_file = nValue;
return 0;
}
//<--string
//--> null
static int LetMeSee(lua_State *L)
{
const char* strValue1 = lua_tostring(L,-1);
if(strValue1==NULL)
{
lua_message("invalid parameter arg1=null");
return 0;
};
DebugString("[PageCrawler]","LetMeSee:(%s)",strValue1);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -