📄 csv2array.java
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. */package edu.umass.cs.mallet.base.pipe;import edu.umass.cs.mallet.base.pipe.Pipe;import edu.umass.cs.mallet.base.types.FeatureVector;import edu.umass.cs.mallet.base.types.Alphabet;import edu.umass.cs.mallet.base.types.Labeling;import edu.umass.cs.mallet.base.types.Instance;import edu.umass.cs.mallet.base.util.MalletLogger;import edu.umass.cs.mallet.base.util.CharSequenceLexer;import java.util.logging.*;import java.lang.reflect.Array;/** Converts a string of comma separated values to an array. To be used prior to {@link Array2FeatureVector}. Note that this class assumes that each location of the line corresponds to a feature index (i.e. "dense" representation) eg: instance 1: 1,0,0,1,0,0,1 << feature alphabet size = 7 instance 2: 0,0,1,0,0,0,1 << feature alphabet size = 7 @author Aron Culotta */public class Csv2Array extends Pipe { CharSequenceLexer lexer; int numberFeatures = -1; private static Logger logger = MalletLogger.getLogger(Csv2Array.class.getName()); public Csv2Array () { this.lexer = new CharSequenceLexer ("([^,]+)"); } public Csv2Array (String regex) { this.lexer = new CharSequenceLexer (regex); } public Csv2Array (CharSequenceLexer l) { this.lexer = l; } /** Convert the data in an <CODE>Instance</CODE> from a CharSequence * of comma-separated-values to an array, where each index is the * feature name. */ public Instance pipe( Instance carrier ) { CharSequence c = (CharSequence)carrier.getData(); int nf = countNumberFeatures (c); if (numberFeatures == -1) // first instance seen numberFeatures = nf; else if (numberFeatures != nf) throw new IllegalArgumentException ("Instances must have same-length feature vectors. length_i: " + numberFeatures + " length_j: " + nf); double[] feats = new double[numberFeatures]; lexer.setCharSequence (c); int i=0; while (lexer.hasNext()) feats[i++] = Double.parseDouble ((String)lexer.next()); carrier.setData (feats); return carrier; } private int countNumberFeatures (CharSequence c) { String s = c.toString(); int ret = 0; int pos = 0; while ((pos = s.indexOf (",", pos) + 1) != 0) ret++; return ret+1; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -