⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sea.java

📁 the code for indian language tamil web classification
💻 JAVA
字号:
import java.util.regex.*;
import java.io.*;
import java.util.StringTokenizer;
public class sea{
	public static void main(String[] args) throws IOException{
		BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
		System.out.print("Enter the webpage as *.html to find its class ");
		String filenam = in.readLine();
FileInputStream fstream = new FileInputStream(filenam);
BufferedReader br=new BufferedReader(new InputStreamReader(fstream));
String string=br.readLine();
StringTokenizer st1=new StringTokenizer(string,"  ");
String agri[]={"payO",
"vilai",
"nilam",
"e1rivAyu",
"vithai",
"u1yir",
"chakthi",
"paoruL",
"A1yvu",
"varuvAy"};
String  med[]={"u1dampu",
"A1rOkkiyam",
"kAychchal",
"e1thirppu chakthi",
"vairas",
"kirumikaL", 
"dAkdar",
"pLU",
"chuththam",
"raththaO1ddam",
"raththam"
};
String  edu[]={"mAnNavarkaL",
"padi",
"mAnNava",
"mAnNaviyar",
"kalvi",
"padippu",
"thErchchi",
"pAdaththiddam",
"palkalai",
"A1yvu"};
String  busi[]={"panGku","chaNthai ","charivu","panGkukaL","puLLi","vanGki","kadan","E1RRumathi","vaddi","rEd","chathavItham"};
String  music[]={
"ichai", 
"i1nnichai", 
"vithvAn",
"viruthu", 
"kachchEri", 
"pAdal",
"pAd",
"rachi"};
//String  tech[]={"thaozhilNudpam"};
//while(st1.hasMoreTokens())
//{
int i;
//String str=br.readLine();
		//String str = in.readLine();
int a=0;
int a1=0;
int a2=0;
int a3=0;
int a4=0;
for(i=0;i<med[i].length();i++)
{
		Pattern pattern = Pattern.compile(med[i]);
		Matcher matcher = pattern.matcher(string);
				while(matcher.find()){
			a = a + 1;

 }
}
System.out.println("medicine:" + a);
for(i=0;i<busi[i].length();i++)
{

		Pattern pattern1 = Pattern.compile(busi[i]);
		Matcher matcher1 = pattern1.matcher(string);
				while(matcher1.find()){
			a1 = a1 + 1;
 }
}
System.out.println("business:" + a1);
for(i=0;i<agri[i].length();i++)
{
		Pattern pattern2 = Pattern.compile(agri[i]);
		Matcher matcher2 = pattern2.matcher(string);
				while(matcher2.find()){
			a2= a2 + 1;

 }
}
System.out.println("biotech:" + a2);
for(i=0;i<music[i].length();i++)
{
		Pattern pattern3 = Pattern.compile(music[i]);
		Matcher matcher3 = pattern3.matcher(string);
				while(matcher3.find()){
			a3= a3 + 1;

 }
}
System.out.println("music:" + a3);
for(i=0;i<edu[i].length();i++)
{
		Pattern pattern4 = Pattern.compile(edu[i]);
		Matcher matcher4 = pattern4.matcher(string);
				while(matcher4.find()){
			a4 = a4 + 1;

 }
}
System.out.println("edu:" + a4);
 

  if(a>a1&&a>a2&&a>a3&&a>a4)
{
System.out.println("The page belong to medicine");
}
if(a1>a&&a1>a2&&a1>a3&&a1>a4)
{
System.out.println("The page belong to business");
}
if(a2>a&&a2>a1&&a2>a3&&a2>a4)
{
System.out.println("The page belong to biotech");
}
if(a3>a&&a3>a1&&a3>a2&&a3>a4)
{
System.out.println("The page belong to music");
}
if(a4>a&&a4>a1&&a4>a3&&a4>a2)
{
System.out.println("The page belong to education");
}
if(a==0&&a1==0&&a2==0&&a3==0&&a4==0)
{
System.out.println("The page belong to unknown category");
}

//threshold 20
else if(a>20&&a1>20) 
{

System.out.println("The page belong to both medicine and business");
}
else if(a>20&&a2>20)
{
System.out.println("The page belong to both medicine and biotech");

}
else if(a>20&&a3>20)
{
System.out.println("The page belong to both medicine and music");

}
else if(a>20&&a4>20)
{
System.out.println("The page belong to both medicine and education");

}
 if(a1>20&&a2>20) 
{

System.out.println("The page belong to both biotech and business");
}
else if(a1>20&&a3>20)
{
System.out.println("The page belong to both business and music");

}
else if(a1>20&&a4>20)
{
System.out.println("The page belong to both business and education");

}
else if(a2>20&&a3>20)
{
System.out.println("The page belong to both biotech and music");

}

else if(a2>20&&a4>20)
{
System.out.println("The page belong to both biotech and education");

}
else if(a3>20&&a4>20)
{
System.out.println("The page belong to both music and education");

}







}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -