⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 filelistiterator.java

📁 这是一个matlab的java实现。里面有许多内容。请大家慢慢捉摸。
💻 JAVA
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).   http://www.cs.umass.edu/~mccallum/mallet   This software is provided under the terms of the Common Public License,   version 1.0, as published by http://www.opensource.org.  For further   information, see the file `LICENSE' included with this distribution. *//**    @author Gary Huang <a href="mailto:ghuang@cs.umass.edu">ghuang@cs.umass.edu</a> */package edu.umass.cs.mallet.base.pipe.iterator;import edu.umass.cs.mallet.base.pipe.Pipe;import edu.umass.cs.mallet.base.types.Label;import edu.umass.cs.mallet.base.types.Instance;import edu.umass.cs.mallet.base.types.Alphabet;import edu.umass.cs.mallet.base.util.Strings;import java.util.ArrayList;import java.util.Iterator;import java.net.URI;import java.util.regex.*;import java.io.*;/** * An iterator that generates instances for a pipe from a list of filenames. * Each file is treated as a text file whose target is determined by  * a user-specified regular expression pattern applied to the filename * *  @author Gary Huang <a href="mailto:ghuang@cs.umass.edu">ghuang@cs.umass.edu</a> */public class FileListIterator extends AbstractPipeInputIterator{    FileFilter fileFilter;    ArrayList fileArray;    Iterator subIterator;    Pattern targetPattern;  // Set target slot to string coming from 1st group of this Pattern    int commonPrefixIndex;        /** Special value that means to use the directories[i].getPath() as the target name */    // xxx Note that these are specific to UNIX directory delimiter characters!  Fix this.        /** Use as label names the directories of the given files,     * optionally removing common prefix of all starting directories     */    public static final Pattern STARTING_DIRECTORIES = Pattern.compile ("_STARTING_DIRECTORIES_");    /** Use as label names the first directory in the filename. */    public static final Pattern FIRST_DIRECTORY = Pattern.compile ("/?([^/]*)/.+");    /** Use as label name the last directory in the filename. */    public static final Pattern LAST_DIRECTORY = Pattern.compile(".*/([^/]+)/[^/]+"); // was ("([^/]*)/[^/]+");    /** Use as label names all the directory names in the filename. */    public static final Pattern ALL_DIRECTORIES = Pattern.compile ("^(.*)/[^/]+");            /* Pass null as targetPattern to get null targets */    /**     * Construct an iterator over the given arry of Files     *     * The instances constructed from the files are returned in the same order     * as they appear in the given array     *     * @param files  Array of files from which to construct instances     * @param fileFilter   class implementing interface FileFilter that will decide which names to accept.     *                     May be null.     * @param targetPattern  regex Pattern applied to the filename whose first parenthesized group     *                       on matching is taken to be the target value of the generated instance.     *                       The pattern is applied to the filename with the matcher.find() method.     * @param removeCommonPrefix boolean that modifies the behavior of the STARTING_DIRECTORIES      *                           pattern, removing the common prefix of all initially specified      *                           directories, leaving the remainder of each filename as the target value.     *     */    public FileListIterator(File[] files, FileFilter fileFilter,			    Pattern targetPattern, boolean removeCommonPrefix)     {	this.fileFilter = fileFilter;	this.fileArray = new ArrayList();	this.targetPattern = targetPattern;	fillFileArrayAssignCommonPrefixIndexAndSubIterator(files, removeCommonPrefix);    }    public FileListIterator(String[] filenames, FileFilter fileFilter,			    Pattern targetPattern, boolean removeCommonPrefix)     {	this(FileIterator.stringArray2FileArray(filenames), fileFilter, 	     targetPattern, removeCommonPrefix);    }    /**     * Construct a FileListIterator with the file containing the list of files, which     * contains one filename per line.       *     * The instances constructed from the filelist are returned in the same order     * as listed     */    public FileListIterator(File filelist, FileFilter fileFilter,			    Pattern targetPattern, boolean removeCommonPrefix) throws FileNotFoundException, IOException     {	this.fileFilter = fileFilter;	this.fileArray = new ArrayList();	this.targetPattern = targetPattern;	ArrayList filenames = new ArrayList();	BufferedReader reader = new BufferedReader(new FileReader(filelist));	String filename = reader.readLine();		while (filename != null && filename.trim().length() > 0) {	    filenames.add(filename.trim());	    filename = reader.readLine();	}	reader.close();	// convert list of filenames to array of files	File[] fa = new File[filenames.size()];	for (int i = 0; i < filenames.size(); i++)	    fa[i] = new File((String) filenames.get(i));	fillFileArrayAssignCommonPrefixIndexAndSubIterator(fa, removeCommonPrefix);    }    public FileListIterator(String filelistName, FileFilter fileFilter,			    Pattern targetPattern, boolean removeCommonPrefix) throws FileNotFoundException, IOException    {	this (new File(filelistName), fileFilter, targetPattern, removeCommonPrefix);    }    public FileListIterator(String filelistName, Pattern targetPattern) throws FileNotFoundException, IOException    {	this (new File(filelistName), null, targetPattern, true);    }    // The PipeInputIterator interface    public Instance nextInstance ()    {	File nextFile = (File) subIterator.next();	String path = nextFile.getParent();	String targetName = null;		if (targetPattern == STARTING_DIRECTORIES) {	    targetName = path.substring(commonPrefixIndex);	} 	else if (targetPattern != null) {	    Matcher m = targetPattern.matcher(path);	    if (m.find ()){		targetName = m.group (1);	    }	}	return new Instance (nextFile, targetName, nextFile.toURI(), null);    }        public File nextFile ()    {	return (File) subIterator.next();		    }        public boolean hasNext ()	    {	return subIterator.hasNext();    }        public ArrayList getFileArray()    {	return fileArray;    }    private void fillFileArrayAssignCommonPrefixIndexAndSubIterator(File[] files, boolean removeCommonPrefix)    {	ArrayList filenames = new ArrayList();	for (int i = 0; i < files.length; i++) {	    if (files[i].isDirectory())		throw new IllegalArgumentException(files[i] + " is not a file.");	    else if (! files[i].exists())		throw new IllegalArgumentException(files[i] + " does not exist.");	    if (this.fileFilter == null || this.fileFilter.accept(files[i])) {		this.fileArray.add(files[i]);				if (removeCommonPrefix)		    filenames.add(files[i].getPath());	    }	}	this.subIterator = this.fileArray.iterator();		if (removeCommonPrefix) { // find the common prefix index of all filenames	    	    String[] fn = new String[filenames.size()];	    	    for (int i = 0; i < fn.length; i++)		fn[i] = (String) filenames.get(i);	   	    this.commonPrefixIndex = Strings.commonPrefixIndex(fn);	}	else 	    this.commonPrefixIndex = 0;    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -