📄 documentclassifier.java

📁 常用机器学习算法,java编写源代码,内含常用分类算法,包括说明文档

💻 JAVA

字号:

/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).   http://www.cs.umass.edu/~mccallum/mallet   This software is provided under the terms of the Common Public License,   version 1.0, as published by http://www.opensource.org.  For further   information, see the file `LICENSE' included with this distribution. *//**	 Takes a list of directory names as arguments, (each directory	 should contain all the text files for each class), performs a random train/test split,	 trains a classifier, and outputs accuracy on the testing and training sets.   @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */package edu.umass.cs.mallet.base.classify.examples;import edu.umass.cs.mallet.base.classify.*;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.pipe.*;import edu.umass.cs.mallet.base.pipe.iterator.*;import java.io.*;public class DocumentClassifier{	static public void main (String[] args)	{		// Create Java File objects for each of the arguments		File[] directories = new File[args.length];		for (int i = 0; i < args.length; i++)			directories[i] = new File (args[i]);		// Create the pipeline that will take as input {data = File, target = String for classname}		// and turn them into {data = FeatureVector, target = Label}		Pipe instancePipe = new SerialPipes (new Pipe[] {			new Target2Label (),							  // Target String -> class label			new Input2CharSequence (),				  // Data File -> String containing contents			new CharSubsequence (CharSubsequence.SKIP_HEADER), // Remove UseNet or email header			new CharSequence2TokenSequence (),  // Data String -> TokenSequence			new TokenSequenceLowercase (),		  // TokenSequence words lowercased			new TokenSequenceRemoveStopwords (),// Remove stopwords from sequence			new TokenSequence2FeatureSequence(),// Replace each Token with a feature index			new FeatureSequence2FeatureVector(),// Collapse word order into a "feature vector"			new PrintInputAndTarget(),		});		// Create an empty list of the training instances		InstanceList ilist = new InstanceList (instancePipe);		// Add all the files in the directories to the list of instances.		// The Instance that goes into the beginning of the instancePipe		// will have a File in the "data" slot, and a string from args[] in the "target" slot.		ilist.add (new FileIterator (directories, FileIterator.STARTING_DIRECTORIES));		// Make a test/train split; ilists[0] will be for training; ilists[1] will be for testing		InstanceList[] ilists = ilist.split (new double[] {.5, .5});		// Create a classifier trainer, and use it to create a classifier		ClassifierTrainer naiveBayesTrainer = new NaiveBayesTrainer ();		Classifier classifier = naiveBayesTrainer.train (ilists[0]);		System.out.println ("The training accuracy is "+ classifier.getAccuracy (ilists[0]));		System.out.println ("The testing accuracy is "+ classifier.getAccuracy (ilists[1]));	}	}

💿 文件大小 5351 K

👤 上传用户 lihuitao1987

📂 所属分类数学计算

🏷️ 相关标签

#java #机器学习 #分类算法 #文档

⌨️ 快捷键说明

复制代码 Ctrl + C

搜索代码 Ctrl + F

全屏模式 F11

切换主题 Ctrl + Shift + D

显示快捷键 ?

增大字号 Ctrl + =

减小字号 Ctrl + -