⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 spidercore.cpp

📁 功能强大的网络蜘蛛软件
💻 CPP
📖 第 1 页 / 共 5 页
字号:
int SpiderCore::StartupThread(string thread_name){    for(vector<geturlParam *>::iterator it=thread_info.begin(); it != thread_info.end(); it++)    {        if((*it)->thread_name == thread_name)        {            geturlParam *lp = (geturlParam *) new geturlParam;            lp->thread_number = (*it)->thread_number;            lp->thread_name = (*it)->thread_name;            lp->mapname = (*it)->mapname;            lp->table = (*it)->mapname;            lp->sc = (*it)->sc;            lp->url_flag = (*it)->url_flag;            lp->num_per_time = FETCH_NUM_DATA_PER_TIME;            lp->pevent_handle = (*it)->pevent_handle;            lp->pFunc = (*it)->pFunc;            this->DoCreateThread((void *)lp);            return 0;        }    }    cout << "Sorry,can not find such thread:" << thread_name << ", please choose verify it" << endl;    return 1;}int SpiderCore::CancelThread(string thread_name){    for(vector<geturlParam *>::iterator it=thread_info.begin(); it != thread_info.end(); it++)    {        if((*it)->thread_name == thread_name)        {            int ret = 0;            ret = pthread_cancel(*((*it)->pthread_handle));            if(ret==0)            {                cout << "cancel succeed!" << endl;                return 0;            }        }    }    cout << "can not find such thread:" << thread_name << endl;    return 1;}int SpiderCore::ListRunningThreads(){    cout << "thread_name, thread_number\n" << endl;    for (map<string, unsigned int>::iterator it=thread_number.begin(); it != thread_number.end(); it++)    {        cout << it->first << "," << it->second << endl;    }    cout << endl;    return 0;}int SpiderCore::ListStartUpThreads(){    cout << "thread_name, thread_number\n" << endl;    for(vector<geturlParam *>::iterator it=thread_info.begin(); it != thread_info.end(); it++)    {        cout << (*it)->thread_name << "," << (*it)->thread_number << endl;    }    cout << endl;    return 0;}int SpiderCore::ThreadControleCenter(string action){    if(action=="help")    {     cout << "------------------" << endl;     cout << "PID:" << getpid() << endl;     cout << "type the words below:" << endl;     cout << "liststartup list start up threads, some maybe end up" << endl;     cout << "listrunning list the running threads" << endl;     cout << "stop end up the spider program" << endl;     cout << "startthread start up a thread" << endl;     cout << "cancelthread end up a thread" << endl;     cout << "help show the actions you could do" << endl;     cout << "------------------" << endl;    }else if(action=="startthread"){     string name="";     cout << "please enter the thread name:" << endl;     cin >> name;     if(name.size()>0)     {      this->StartupThread(name);     }    }else if(action=="cancelthread"){     string name="";     cout << "please enter the thread name:" << endl;     cin >> name;     if(name.size()>0)     {        this->CancelThread(name);     }    }else if(action == "stop"){       SPIDER_STOP = true;    }else if(action == "listrunning"){        this->ListRunningThreads();    }else if(action == "liststartup"){        this->ListStartUpThreads();    }else{        cout << "Unknown Option:" << action << endl;    }    return 0;}/* *Thread Controle above */void SpiderCore::StartSpiderThreads(){    vector<string> nodes;    string content = "";    string subContent = "";    string mapname  =  "main";    int ret = 0;    unsigned int depth = 1;    typedef multimap<int, string>::iterator multimap_it;    unsigned int handle_i = 0;    geturlParam* plparamThread = 0;    pthread_t *phandle = 0;    unsigned int t_number;    pthread_cond_t *pEhandle = 0;    pthread_mutex_t *pEMutex = 0;    pthread_mutex_init(&count_lock,NULL);    fo.WriteToFileLn(spider_log_file, "", 0);    fo.WriteToFileLn(spider_insert_error_log_file, "", 0);    string logstr = "the program start...";    cout << logstr << endl;    fo.WriteToFileLn(spider_log_file, logstr, 1);    //开始遍历带正则的节点    while (depth <= this->depth)    {        typedef multimap<int, string>::iterator mapname_it;        mapname_it	sta = this->regex_mapname.lower_bound(depth),end = this->regex_mapname.upper_bound(depth);        //sta->first:multimap iterator first , number type        //sta->second:.. .. second, string type, the map regex node        while (sta != end)        {            map<string, string>::iterator map_it;            map_it = this->config_map.find(sta->second + "_childnum");//map_it childnum node            mapname = sta->second;            if (map_it == this->config_map.end())            {                string logstr = "In function StartSpiderThreads:can not find:" + sta->second + "_childnum";                fo.WriteToFileLn(spider_log_file, logstr, 1);                return;//can not find mapname            }            if (map_it->second != "0") //有子节点            {                //配置抓链接线程参数                //创建事件对象                //sta++;continue;                string mapname_url_tmp = mapname+"_url";                pEhandle = (pthread_cond_t *) new pthread_cond_t;                pthread_cond_init(pEhandle, NULL);                thread_event.insert(make_pair(mapname_url_tmp, pEhandle));                pEMutex = (pthread_mutex_t *) new pthread_mutex_t;                pthread_mutex_init(pEMutex, NULL);                thread_event_lock.insert(make_pair(mapname_url_tmp, pEMutex));                t_number = Functions::PowUInt(2, handle_i);                plparamThread = ( geturlParam* )new geturlParam;                //set up param for geturl                plparamThread->thread_number = t_number;                plparamThread->thread_name = mapname_url_tmp;                plparamThread->mapname = mapname;                plparamThread->table = mapname;                plparamThread->sc = this;                plparamThread->url_flag = "0";                plparamThread->num_per_time = FETCH_NUM_DATA_PER_TIME;                plparamThread->pevent_handle = pEhandle;                //启动抓首页链接线程                phandle = (pthread_t *)new pthread_t;                if (depth == 1)                {                    plparamThread->pFunc = FunGetMainURLProc;                    ret = pthread_create(phandle, NULL, FunGetMainURLProc, (void *)plparamThread);                }                else                {                    plparamThread->pFunc = FunGetSubURLProc;                    ret = pthread_create(phandle, NULL, FunGetSubURLProc, (void *)plparamThread);                }                if (ret)                {                    string logstr = "In function StartSpiderThreads:CreateThread " + mapname + "_url" + "failed!";                    fo.WriteToFileLn(spider_log_file, logstr, 1);                }                plparamThread->pthread_handle = phandle;                thread_number.insert(make_pair(mapname+"_url", t_number));                thread_info.push_back(plparamThread);                handle_i++;                //配置抓内容链接参数                t_number = Functions::PowUInt(2, handle_i);                //创建事件对象                string mapname_content_tmp = mapname + "_content";                pEhandle = (pthread_cond_t *) new pthread_cond_t;                pthread_cond_init(pEhandle, NULL);                thread_event.insert(make_pair(mapname_content_tmp, pEhandle));                pEMutex = (pthread_mutex_t *) new pthread_mutex_t;                pthread_mutex_init(pEMutex, NULL);                thread_event_lock.insert(make_pair(mapname_content_tmp, pEMutex));                plparamThread = ( geturlParam* )new geturlParam;                //set up param for geturl                plparamThread->thread_number = t_number;                plparamThread->thread_name = mapname_content_tmp;                plparamThread->mapname = mapname;                plparamThread->table = mapname;                plparamThread->sc = this;                plparamThread->url_flag="0";                plparamThread->num_per_time=FETCH_NUM_DATA_PER_TIME;                plparamThread->pevent_handle = pEhandle;                plparamThread->pFunc = FunGetContentURLProc;                //启动抓内容链接线程                phandle = (pthread_t *)new pthread_t;                ret=pthread_create(phandle, NULL, FunGetContentURLProc, (void *)plparamThread);                if (ret)                {                    string logstr="In function StartSpiderThreads:CreateThread " + mapname_content_tmp + "failed!";                    fo.WriteToFileLn(spider_log_file, logstr, 1);                }                plparamThread->pthread_handle = phandle;                thread_number.insert(make_pair(mapname+"_content", t_number));                thread_info.push_back(plparamThread);                handle_i++;            }            else            {                //内容抓取线程从数据库中读取资料时对互斥操作                pEMutex = (pthread_mutex_t *) new pthread_mutex_t;                pthread_mutex_init(pEMutex, NULL);                thread_event_lock.insert(make_pair(mapname, pEMutex));                for (int content_thread_i=0; content_thread_i<FETCH_CONTENT_THREADS_NUM; content_thread_i++)                {                   t_number = Functions::PowUInt(2, handle_i);                   plparamThread = ( geturlParam* )new geturlParam;                   //set up param for geturl                   plparamThread->thread_number = t_number;                   plparamThread->thread_name = mapname;                   plparamThread->mapname = mapname;                   plparamThread->table = mapname;                   plparamThread->sc = this;                   plparamThread->url_flag = "1";                   plparamThread->num_per_time = FETCH_NUM_DATA_PER_TIME;                   plparamThread->pevent_handle = NULL;                   plparamThread->pFunc = FunGetContentProc;                   plparamThread->thread_type = 1;                   phandle = (pthread_t *)new pthread_t;                   ret = pthread_create(phandle, NULL, FunGetContentProc, (void *)plparamThread);                   if (ret)                   {                      string logstr = "In function StartSpiderThreads:CreateThread " + mapname + "failed!";                      fo.WriteToFileLn(spider_log_file, logstr, 1);                   }                    plparamThread->pthread_handle = phandle;                    thread_number.insert(make_pair(mapname, t_number));                    thread_info.push_back(plparamThread);                }                handle_i++;            }            sta++;        }        depth++;    }    string action="";    while(1)    {      cout << "you could enter words, type 'help' to get the detal" << endl;      cin >> action;      if(action == "quit" || action == "q")      {         break;      }      if(action.size()>0)      {         this->ThreadControleCenter(action);      }      action = "";    }    cout << "you have typed '" << action << "',to quit navigation" << endl;    while (1)    {        if (thread_number.empty())        {            string logstr="the program finilished the target and exit!";            cout << logstr << endl;            fo.WriteToFileLn(spider_log_file, logstr, 1);            break;        }        sleep(5);    }    //释放线程句柄开辟的空间    for(vector<geturlParam *>::iterator it = thread_info.begin(); it != thread_info.end(); it++)    {        delete (*it);    }    //释放条件变量开辟的空间    for(map<string, pthread_cond_t *>::iterator it = thread_event.begin(); it != thread_event.end(); it++)    {        delete it->second;    }    //释放互斥锁开辟的空间    for(map<string, pthread_mutex_t *>::iterator it = thread_event_lock.begin(); it != thread_event_lock.end(); it++)    {        pthread_mutex_destroy(it->second);        delete it->second;    }}void SpiderCore::print_config_map(){    map<string, string>::iterator it;    string context="";    fo.WriteToFileLn("spider_config_map.txt", "config_map", 0);    for (it=this->config_map.begin();it !=this->config_map.end(); it++)    {        context = it->first + ":" + it->second;        fo.WriteToFileLn("spider_config_map.txt", context, 1);        cout << context << endl;    }}void SpiderCore::print_spider_regex_node_tree(){    multimap<int, string>::iterator it;    char depth[12]={0};    string context="";    fo.WriteToFileLn("spider_regex_node_tree.txt", "spider_regex_node_tree", 0);    for (it=this->r

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -