⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 savesentencedlg.cpp

📁 将网页文件中的词汇提取出来
💻 CPP
📖 第 1 页 / 共 2 页
字号:
		strTemp = "";
		int i = strData.GetLength();
		strData.TrimLeft();
		strData.TrimRight(); 
		m_nLength = strData.GetLength();
		if(m_nLength == 0)
		{
			m_nLength = 0;
			continue;
		}
		if(strstr(strData,"<STYLE") || strstr(strData,"<style"))//格式过滤
		{
			if(strstr(strData,"</STYLE>") || strstr(strData,"</style>"))
				continue;
			while(inFile.ReadString(strData))
			{
				if(strstr(strData,"</STYLE>") || strstr(strData,"</style>"))
				{
					strData.Empty();
					break;
				}
			}
			continue;
		}

		if(strstr(strData,"<SCRIPT") || strstr(strData,"<script")) //JavaScript过滤
		{
			if(strstr(strData,"</SCRIPT>") || strstr(strData,"</script>"))
				continue;
			while(inFile.ReadString(strData))
			{
				if(strstr(strData,"</SCRIPT>") || strstr(strData,"</script>"))
				{
					strData.Empty();
					break;
				}
			}
			continue;
		}

		num1 = strData.Find("<!--");//注释过滤
		num2 = strData.Find("-->");
		if(!(num1 == -1) && !(num2 == -1))
		{
			strData = strData.Mid(num2+3);
			m_nLength = strData.GetLength();
			if(m_nLength == 0)
			{
				m_nLength = 0;
				continue;
			}
		}

		num3 = strData.Find("<");//标记过滤
		num4 = strData.Find(">");
		m_nLength = strData.GetLength();
		if(m_nLength == 0)
		{
			m_nLength = 0;
			continue;
		}

		while(!(num3 == -1) && !(num4 == -1))
		{
			if(num3 == 0)
			{
				strData = strData.Mid(num4+1);
				m_nLength = strData.GetLength();
				if(m_nLength == 0)
				{
					m_nLength = 0;
					break;
				}
				num3 = strData.Find("<");//标记过滤
				num4 = strData.Find(">");
				continue;
			}
			else
			{
				strContent = strData.Left(num3);
				strData = strData.Mid(num4+1);
			    temp = strContent.Replace("&nbsp;"," ");
				temp = strContent.Replace("&lt;","<");
				temp = strContent.Replace("&gt;",">");
				temp = 0;
      			strZong+=strContent;
				strContent = "";
				m_nLength = strData.GetLength();
				if(m_nLength == 0)
				{
					m_nLength = 0;
					break;
				}
				num3 = strData.Find("<");//标记过滤
				num4 = strData.Find(">");
				continue;
			}
		}	
		m_nLength = strData.GetLength();
		if(m_nLength == 0)
			{
				m_nLength = 0;
				continue;
			}
		if(!(num3 == -1) && (num4 == -1))
		{
			strTemp = strData;
			strTemp.TrimLeft();
			strTemp.TrimRight();
			continue;
		}
		else if(num3 == -1 && num4 == -1)//没有标记输出
		{
			strContent = strData;
			temp = strContent.Replace("&nbsp;"," ");
			temp = strContent.Replace("&lt;","<");
			temp = strContent.Replace("&gt;",">");
			temp = 0;
			strZong+=strContent;
			strContent = "";
			continue;
		}
	}
        inFile.Close();
	
	    //AfxMessageBox("解析完成!");
       
		strZong = strTemp+strZong;
		strTemp = "";
	
		strZong.TrimLeft();
		strZong.TrimRight(); 
	
	  	 
	    strContent = strZong;
	    strContent.Replace(" ",",");
    	strContent.Replace(",",",");
		strContent.Replace(";",",");
		strContent.Replace("。",",");
		strContent.Replace("?",",");
    	strContent.Replace("!",",");
		strContent.Replace(":",",");
				
		int weizhi;
        CString shuchu;
        weizhi=strContent.Find(",");
        while(!(weizhi==-1))
		{
		   shuchu=strContent.Left(weizhi);
           strContent=strContent.Mid(weizhi+2);
		       
	        if(shuchu.GetLength()>2)
			{ 

	             m_input=shuchu+"\r\n";
	             save.Save(m_input);
	    	     m_input="";
			};
				   
			shuchu="";
			weizhi=strContent.Find(",");
		}
        shuchu=strContent;
		if(shuchu.GetLength()>2)
		{ 
		    m_input=shuchu+"\r\n";
		    save.Save(m_input);
			m_input="";
		};
		shuchu="";    
		strContent = "";
        
        //AfxMessageBox("语句已经保存!");
        //CDialog::OnOK();
	

}

void CSaveSentenceDlg::wenjiancount()
{
      
      _chdir(m_strOpenPath); // 进入要查找的路径(也可为某一具体的目录) 
      Search_Wenjian(); 
      m_strOpenPath.Format(m_strOpenPath+"有 %d个文件!",wenjianshu);
	  //MessageBox(m_strOpenPath); 
	  return;


}


void CSaveSentenceDlg::Search_Wenjian()
{
    
long handle; 

   struct _finddata_t filestruct;   //表示文件(或目录)的信息 

   //char path_search[_MAX_PATH];  //表示当前处理的目录

   // "*"表示查找任何的文件或子目录, filestruct为查找结果 

   handle = _findfirst("*", &filestruct); 

    // 如果handle为-1, 表示当前目录为空, 则结束查找而返回 
    
    if(handle==-1)
		return; 

   // 检查找到的第一个实体是否是一个目录(filestruct.name为其名称) 

   if( ::GetFileAttributes(filestruct.name) & FILE_ATTRIBUTE_DIRECTORY ) 

  { 

    // 如果是目录, 则进入该目录并递归调用函数Search_Dirctory进行查找, 

    // 注意: 如果目录名的首字符为'.'(即为"."或".."), 则不用进行查找 

    if( filestruct.name[0] != '.' ) 

	{ 
           TCHAR szBuffer[MAX_PATH];
           //CString strMulu="进入目录 ";
           _chdir(filestruct.name);
  	       GetCurrentDirectory(MAX_PATH,szBuffer);
	       
           Search_Wenjian(); 
	 	   _chdir("..");

        // 查找完毕之后, 返回上一级目录 
     
	} 

  } 

  else // 如果第一个实体不是目录, 调用切分

  { 

    // 调用切分 
       //CString strWenjian;
       wenjianshu++;
       //m_MyProgress.SetPos((int)(wenjianshu*100/2000));
	   //Beep( 750, 300 );
	
  } 

   // 继续对当前目录中的下一个子目录或文件进行与上面同样的查找 

  while(!(_findnext(handle,&filestruct))) 

  { 

     if( ::GetFileAttributes(filestruct.name) & FILE_ATTRIBUTE_DIRECTORY ) 
	 { 

         if(*filestruct.name != '.') 
		 { 

           TCHAR szBuffer[MAX_PATH];
           //CString strMulu="进入目录 ";
           _chdir(filestruct.name);
  	       GetCurrentDirectory(MAX_PATH,szBuffer);
	       
           Search_Wenjian(); 
	 	   _chdir(".."); 
		 } 

	 } 
     else 
	 { 

       
       
		     //CString strWenjian;
             wenjianshu++;
             //m_MyProgress.SetPos((int)(wenjianshu*100/2000));

			 //Beep( 750, 300 );
        
	 } 
  } 

   _findclose(handle); // 最后结束整个查找工作 


}

//DEL void CSaveSentenceDlg::OnLianjie() 
//DEL {
//DEL  
//DEL     //public:
//DEL 	// TODO: Add your control notification handler code here
//DEL 	CString m_strExePath;
//DEL 	//private:
//DEL     SQLHENV m_Henv;		//环境句柄
//DEL     SQLHDBC m_Hdbc;		//连接句柄
//DEL     SQLHSTMT m_Hstmt;		//语句句柄
//DEL     SQLRETURN m_Result;	//返回句柄
//DEL 	char path[MAX_PATH] = {'\0'};
//DEL     GetModuleFileName(NULL,path,MAX_PATH);//得到执行文件名
//DEL     m_strExePath.Format("%s", path);
//DEL     int iPosition;
//DEL     iPosition = m_strExePath.ReverseFind('\\');
//DEL     m_strExePath = m_strExePath.Left(iPosition + 1);
//DEL 	CString strAccessPath = m_strExePath + "sentence.mdb";
//DEL 	int iLen =strAccessPath.GetLength();
//DEL 	char cpConfig[MAX_PATH];
//DEL 
//DEL     strcpy(cpConfig,"DSN=sentence\0");
//DEL 	strcpy(cpConfig+13,"DBQ=");
//DEL 	strcpy(cpConfig+17,strAccessPath);
//DEL 	strcpy(cpConfig+17+iLen,"\0");
//DEL     strcpy(cpConfig+18+iLen,"DEFAULTDIR=");
//DEL 	strcpy(cpConfig+18+iLen+11,m_strExePath);
//DEL 	strcpy(cpConfig+28+iLen+m_strExePath.GetLength(),"\0\0");
//DEL 
//DEL 	if(!SQLConfigDataSource( NULL, ODBC_ADD_DSN,
//DEL         "Microsoft Access Driver (*.mdb)\0",cpConfig))
//DEL 	{
//DEL 	  AfxMessageBox("失败!");
//DEL 	}
//DEL 
//DEL 	//初始化环境
//DEL     SQLAllocHandle(SQL_HANDLE_ENV,SQL_NULL_HANDLE,&m_Henv);
//DEL     SQLSetEnvAttr(m_Henv,SQL_ATTR_ODBC_VERSION,(void*)SQL_OV_ODBC3,0);
//DEL     //建立连接
//DEL     SQLAllocHandle(SQL_HANDLE_DBC,m_Henv,&m_Hdbc);
//DEL     //m_Result=SQLConnect(m_Hdbc,(LPBYTE)"测试数据库",SQL_NTS,(LPBYTE)""/*用户名*/,SQL_NTS,(LPBYTE)""/*密码*/,SQL_NTS);
//DEL 
//DEL     m_Result=SQLConnect(m_Hdbc,(LPBYTE)"sentence",SQL_NTS,(LPBYTE)""/*用户名*/,SQL_NTS,(LPBYTE)""/*密码*/,SQL_NTS);
//DEL 
//DEL 
//DEL      //分配一个语句句柄
//DEL     SQLAllocHandle(SQL_HANDLE_STMT,m_Hdbc,&m_Hstmt);
//DEL 	
//DEL }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -