📄 miningexcelstream.java
字号:
}
// Row back to first row to insert data
rows = sheet.rowIterator();
firstRow = (HSSFRow) rows.next();
cells = firstRow.cellIterator();
}
}
// Add back all column names and data types
MiningAttribute attribute;
for (int k = 0; k < colName.size(); k += 2) {
int icell_key = ((Integer) colName.get(k)).intValue();
String fieldname = (String) colName.get(k + 1);
if (colType.containsKey(new Integer(icell_key))) {
Integer type = (Integer) colType
.get(new Integer(icell_key));
if (type.intValue() == HSSFCell.CELL_TYPE_NUMERIC)
attribute = new NumericAttribute();
else {
attribute = new CategoricalAttribute();
((CategoricalAttribute) attribute)
.setUnboundedCategories(true);
}
} else {
attribute = new CategoricalAttribute();
((CategoricalAttribute) attribute)
.setUnboundedCategories(true);
}
attribute.setName(fieldname);
header.addElement(attribute);
}
//****************************
// Calculate inverse mapping:
int ncell = cmax + 1;
int[] ihInv = new int[ncell];
for (int j = 0; j < ncell; j++)
ihInv[j] = -1;
for (int j = 0; j < ih.size(); j++)
ihInv[ih.IntegerAt(j)] = j;
// Add attributes to meta data:
for (int j = 0; j < ncell; j++) {
int icell = ihInv[j];
// System.out.println("icell = " + icell);
if (icell > -1) {
metaData.addMiningAttribute((MiningAttribute) header
.elementAt(icell));
}
}
// Now read data matrix:
int nAtt = metaData.getAttributesNumber();
Vector data = new Vector();
while (rows.hasNext()) {
HSSFRow row = (HSSFRow) rows.next();
cells = row.cellIterator();
// Vector objects = new Vector();
Vector objects = new Vector(cmax+1);
for(int i =0; i<=cmax; i++){
objects.add(i, new Double(Category.MISSING_VALUE));
}
int lastcell = 0;
while (cells.hasNext()) {
HSSFCell cell = (HSSFCell) cells.next();
int icell = cell.getCellNum();
int index =-1;
if (icell < 0)
throw new MiningException("illegal index of cell");
//<< added by Joyce 2005/02/25
//<<modified by tyleung 2005/4/8
index = mycellidex.indexOf(new Integer(icell));
if (mycellidex.contains(new Integer(icell)))
{
//
// if (icell != ((Integer) mycellidex.get(lastcell)).intValue())
// {
// int last = mycellidex.indexOf(new Integer(icell));
// for (int i = lastcell; i < last; i++)
// {
// objects.addElement(new Double(Category.MISSING_VALUE));
// }
// lastcell = last;
// }
// lastcell++;
//>>
if (cell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC)
{
// double temp = cell.getNumericCellValue();
if (cell.getCellType() == HSSFCell.CELL_TYPE_ERROR
|| cell.getCellType() == HSSFCell.CELL_TYPE_BLANK)
{
// objects.addElement(new Double(Category.MISSING_VALUE));
objects.set(icell, new Double(Category.MISSING_VALUE));
}else
{
// Added by Kelvin Jor to get DateCellValue
Integer type = (Integer) colType.get(new Integer(cell.getCellNum()));
if (type.intValue() == Constants.CELL_TYPE_DATE)
{
// objects.addElement(cell.getDateCellValue());
objects.set(icell, cell.getDateCellValue());
}else
{
// objects.addElement(new Double(cell
// .getNumericCellValue()));
objects.set(icell, new Double(cell
.getNumericCellValue()));
}
}
} else
{
// String temp = cell.getStringCellValue();
if (cell.getCellType() == HSSFCell.CELL_TYPE_ERROR
//<<tyleung 30/3/2005 to accept space as missing values
|| cell.getCellType() == HSSFCell.CELL_TYPE_BLANK
|| cell.getStringCellValue().trim().equals("")) {
// objects.addElement(new Double(Category.MISSING_VALUE));
objects.set(icell, new Double(Category.MISSING_VALUE));
//tyleung 30/3/2005
} else if (cell.getCellType() == HSSFCell.CELL_TYPE_BOOLEAN){
// objects.addElement(new Boolean(cell.getBooleanCellValue()));
objects.set(icell, new Boolean(cell.getBooleanCellValue()));
} else {
// objects.addElement(cell.getStringCellValue());
objects.set(icell, cell.getStringCellValue());
}
}
}else
{
// If the cell exists outside the index, just skip this cell
}
}
// << added by Joyce 2005/02/25 for remaining missing values
// for (int i = lastcell; i < mycellidex.size(); i++)
// {
// objects.addElement(new Double(Category.MISSING_VALUE));
//
// }
//>>>
// Add data to row:
// Object[] rowArr = new Object[nAtt];
// for (int j = 0; j < nAtt; j++) {
// int icell = ihInv[j];
// if (icell > -1 && icell < objects.size())
// rowArr[j] = objects.elementAt(icell);
// else
// rowArr[j] = "";
// }
// ;
// data.addElement(rowArr);
data.addElement(objects.toArray());
}
;
//tyleung 2005/4/8>>
// Create object array:
int nVec = data.size();
Object[][] mObArr = new Object[nVec][nAtt];
for (int i = 0; i < nVec; i++) {
Object[] rowArr = (Object[]) data.elementAt(i);
for (int j = 0; j < nAtt; j++)
mObArr[i][j] = rowArr[j];
}
;
// Create data table:
dataTable = new MiningArrayStream(mObArr, metaData);
}
updateCategoricalAttrsType();
return metaData;
}
//<<Frank J. Xu, 16/02/2005
// determine if a categorical attribute is bounded
//Add method to reset the categorical attributes' type of csv stream.
public void updateCategoricalAttrsType()throws MiningException{
MiningVector mv;
reset();
MiningDataSpecification metaData = this.getMetaData();
MiningAttribute[] attrs = metaData.getAttributesArray();
Vector categoricalAttrs = new Vector();
for(int i = 0; i < attrs.length; i++){
if(attrs[i] instanceof CategoricalAttribute){
categoricalAttrs.add(attrs[i]);
}
}
if(categoricalAttrs.size() > 0){
Vector[] categoricalVal = new Vector[categoricalAttrs.size()];
try{
while (this.next() ) {
mv = this.read();
for(int j = 0; j < categoricalAttrs.size(); j++){
Category catObj = mv.getValueCategory((CategoricalAttribute)(categoricalAttrs.get(j)));
//add distinct value.
if(null == categoricalVal[j]){
categoricalVal[j] = new Vector();
if(catObj != null)
categoricalVal[j].add(catObj);
}
else{
//<<13/04/2005, Frank J. Xu
/*
int k = 0;
for(k = 0; k < categoricalVal[j].size(); k++){
if(null == catObj)
break;
else if(categoricalVal[j].get(k).equals(catObj))
break;
}
if((k == categoricalVal[j].size()) && (catObj != null))
categoricalVal[j].add(catObj);
*/
//The order of categorical value affects the correctness of
//assessment operations, so adjust the order from the
//data importing operations.
insertCatValByOrder(categoricalVal[j], catObj);
//>>13/04/2005, Frank J. Xu
}
}
}
//update categorical attributes type based on the threshold.
for(int j = 0; j < categoricalAttrs.size(); j++)
{
// If there are no data records, categoricalVal[j] could be null
if((categoricalVal[j]!=null) && (categoricalVal[j].size() > 0) && (categoricalVal[j].size() < MiningInputStream.CATEGORICAL_ATTRIBUTE_BOUND) && (categoricalAttrs.get(j)!=null))
{
((CategoricalAttribute)(categoricalAttrs.get(j))).setUnboundedCategories(false);
}
ArrayList catVals = new ArrayList();
for(int k = 0; k < categoricalVal[j].size(); k++){
catVals.add(categoricalVal[j].get(k));
}
((CategoricalAttribute)(categoricalAttrs.get(j))).setValues(catVals);
}
//update attributes array.
int categoricalAttrsIndex = 0;
for(int i = 0; i < attrs.length; i++){
if(attrs[i] instanceof CategoricalAttribute){
attrs[i] = (MiningAttribute) categoricalAttrs.get(categoricalAttrsIndex);
categoricalAttrsIndex++;
}
}
//update metadata.
metaData.setAttributesArray(attrs);
this.metaData = metaData;
}
catch (MiningException ex){
ex.printStackTrace();
}
}
}
private void insertCatValByOrder(Vector categoricalVal, Category catObj)
{
int k = 0;
int insertIndex = -1;
if(catObj != null)
{
String srcDisplayName = catObj.getDisplayValue();
for(k = 0; k < categoricalVal.size(); k++)
{
if(categoricalVal.get(k).equals(catObj))
break;
else
{
String tarDisplayName = ((Category)categoricalVal.get(k)).getDisplayValue();
if(srcDisplayName.compareTo(tarDisplayName) < 0){
insertIndex = k;
break;
}
}
}
if(k == categoricalVal.size())
insertIndex = k;
if((insertIndex >=0))
{
categoricalVal.insertElementAt(catObj, insertIndex);
}
}
}
//Frank J. Xu, 16/02/2005>>
// -----------------------------------------------------------------------
// Methods of cursor positioning
// -----------------------------------------------------------------------
/**
* Places the cursor before first row.
*
* @exception MiningException
* couldn't reset cursor
*/
public void reset() throws MiningException {
dataTable.reset();
}
/**
* Advance cursor by one position.
*
* @return true if next vector exists, else false
* @exception MiningException
* couldn't advance cursor
*/
public boolean next() throws MiningException {
return dataTable.next();
}
/**
* Move cursor to given position.
*
* @param position
* new position of the cursor
* @return true if cursor could be positioned, false if not
* @exception MiningException
* could not move cursor
*/
public boolean move(int position) throws MiningException {
return dataTable.move(position);
}
// -----------------------------------------------------------------------
// Methods of reading from the stream
// -----------------------------------------------------------------------
/**
* Reads current data vector.
*
* @return data vector at current cursor position
* @exception MiningException
* couldn't read mining vector
*/
public MiningVector read() throws MiningException {
return dataTable.read();
}
// -----------------------------------------------------------------------
// Methods of getting num of sheets
// -----------------------------------------------------------------------
public int getNumberOfSheets() {
return wb.getNumberOfSheets();
}
// -----------------------------------------------------------------------
// Methods of getting sheet name
// -----------------------------------------------------------------------
public String getSheetName(int a_index) {
return wb.getSheetName(a_index);
}
// -----------------------------------------------------------------------
// Methods of set reading from the sheet at
// -----------------------------------------------------------------------
public void setSheetAt(int a_index) {
sheet = wb.getSheetAt(a_index);
}
// -----------------------------------------------------------------------
// Test
// -----------------------------------------------------------------------
/**
* Test of Excel stream.
*
* @param args
* arguments (ignored)
*/
public static void main(String[] args) {
try {
MiningExcelStream mas = new MiningExcelStream(
"data/excel/vowel.xls", null);
System.out.println(mas);
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -