📄 mininginputstream.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Valentine Stepanenko (valentine.stepanenko@zsoft.ru)
* @author Michael Thess
* @version 1.1
*/
package com.prudsys.pdm.Input;
import java.util.Enumeration;
import com.prudsys.pdm.Core.MiningAttribute;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.MiningMatrixElement;
import com.prudsys.pdm.Input.Records.Arff.MiningArffStream;
import com.prudsys.pdm.Input.Records.Csv.MiningCsvStream;
import com.prudsys.pdm.Input.Records.Transactions.TransactionStream;
/**
* A table of vectors representing an input mining data, which is usually
* generated by reading a file or by executing a statement that queries
* a database. Each vector in this table is going to be a MiningVector or
* one of its subclasses.<p>
*
* A <code>MiningInputStream</code> object maintains a cursor pointing to its
* current vector. Initially the cursor is positioned before the first row.
* The <code>next</code> method moves the cursor to the next row, and because it
* returns false when there are no more rows in the <code>MiningInputStream</code>
* object, it can be used in a while loop to iterate through the data.
* Use <code>read()</code> method to retrieve vector from the current cursor
* position. Depending on the input mining source's nature we will recognize a
* few <code>MiningInputStream</code> types: forward only (default), resetable
* and navigatable.<p>
*
* A default <code>MiningInputStream</code> object has a cursor that moves
* forward only. Thus, it is possible to iterate through it only once and
* only from the first row to the last row.
* Resetable <code>MiningInputStream</code> allows to position cursor back to
* the first vector and repeat data reading.
* Navigatable <code>MiningInputStream</code> allows to position cursor to
* any vector in the table.<p>
*
* In Version 1.1 an update mechanism for mining input streams was added.
* Updateble input streams allow to edit the existing input stream. All
* updateble methods start with 'update'.
*
* The meta data of a <code>MiningInputStream</code> is returned by the
* <code>getMetaData</code> method.<p>
*/
public abstract class MiningInputStream extends com.prudsys.pdm.Cwm.Core.Class implements MiningMatrixElement
{
// -----------------------------------------------------------------------
// Variables declarations
// -----------------------------------------------------------------------
//<<Frank J. Xu, 30/03/2005
//Add function to update the type of categorical data.
public static final int CATEGORICAL_ATTRIBUTE_BOUND = 50;
//>>Frank J. Xu, 30/03/2005
/** Meta data with attributes description. Logical data model. */
protected MiningDataSpecification metaData;
/** Physical data model. */
protected org.omg.cwm.objectmodel.core.Package physicalModel;
/** Current cursor position. */
protected int cursorPosition = -1;
/** MiningVector located on the current cursor position. */
protected MiningVector cursorVector;
/** True, if MiningInputStream contains missing values. */
protected boolean missingValues;
// -----------------------------------------------------------------------
// Constructor
// -----------------------------------------------------------------------
/**
* MiningInputStream constructor.
*/
public MiningInputStream()
{
}
// -----------------------------------------------------------------------
// Getter and setter methods
// -----------------------------------------------------------------------
/**
* Returns the meta data.
*
* @return the meta data
* @exception MiningException coul not retrieve meta data
*/
public MiningDataSpecification getMetaData() throws MiningException
{
return metaData;
}
/**
* Returns the logical model of the Data Mining source. Of course,
* this is the meta data itself.
*
* @return logical model in terms of CWM
*/
public org.omg.cwm.objectmodel.core.Classifier getLogicalModel()
{
return metaData;
}
/**
* The MiningInputStream's metaData is the logical model of the Data Mining
* source. Of course, the data also has a physical representation. It is
* stored in memory, files, databases, etc. The physical representation
* is modeled through extended mining input streams like MiningArrayStream,
* MiningSqlStream, etc. This method delivers this physical model in
* terms of CWM. <p>
*
* The method returns the physical model on the CWM Package level,
* i.e. for databases it delivers the catalog, for files the record file,
* for multidimensional data the schema, etc. Notice that the logical model
* (metaData) is just from the CWM Classifier level. Package level of the
* physical model is required since the data of the logical model may
* be distributed over some Classifier-like structures like tables in
* databases. <p>
*
* Use the method findPhysicalModel to determine the physical model before
* calling this one. <p>
*
* The mapping between the logical model (metaData) and the
* physical model (delivered by this method) is obtained by the
* method getPhysicalToLogicalModelTransformation.
*
* @return physical model in terms of CWM
* @exception MiningException couldn't access phsyical model
*/
public org.omg.cwm.objectmodel.core.Package getPhysicalModel()
{
return physicalModel;
}
/**
* Determines physical model and assigns it to variable physicalModel.
* Use getPhysicalModel to retrieve the model.
*
* @throws MiningException error while determening physical model
*/
public abstract void findPhysicalModel() throws MiningException;
/**
* Returns the CWM mapping from the physical to the logical data model.
*
* @return transformation of physical to logical data model
* @throws MiningException couldn't get transformation
*/
public abstract org.omg.cwm.analysis.transformation.TransformationMap getPhysicalToLogicalModelTransformation()
throws MiningException;
/**
* Returns the supported stream access methods of a mining input stream
* implemenation. They are from the following list: "recognize", "reset",
* "move", "updateSetMetaData", "updateRemoveAllVectors",
* "updateSetMetaData". This allows a consumer method of the stream
* dynamically to apply the supported methods. Notice that every stream
* must actively support the methods "next" and "read"; therefore
* they are not included in this list.
*
* @return list of supported stream methods
*/
public abstract Enumeration getSupportedStreamMethods();
/**
* Return the current cursor position.
*
* @return the current cursor position.
*/
public int getCursorPosition()
{
return cursorPosition;
}
/**
* Determines the number of vectors. <p>
*
* In this most simple implementation, it utilizes the
* reset and next methods. The cursor position is stored
* into a temporary variable and finally recovered. <p>
*
* Most implementations of MiningInputStream will
* overwrite this method.
*
* @return number of vectors
* @exception MiningException method reset not implemented
*/
public int getVectorsNumber() throws MiningException
{
int currCursorPosition = cursorPosition;
// Count vectors:
reset();
int numbVec = 0;
while (next())
numbVec = numbVec + 1;
// Restore cursor position;
reset();
for (int i = 0; i < currCursorPosition; i++)
next();
return numbVec;
}
/**
* Return true if there are missing values.
*
* @return true if there are missing values
*/
public boolean isMissingValues()
{
return missingValues;
}
// -----------------------------------------------------------------------
// General stream methods
// -----------------------------------------------------------------------
/**
* Open mining data stream.
*
* @exception MiningException if a mining source access error occurs
*/
public abstract void open() throws MiningException;
/**
* Close mining data stream.
*
* @exception MiningException if a mining source access error occurs
*/
public abstract void close() throws MiningException;
/**
* Recognize the input stream's meta data by analyzing the input stream.
*
* @return the MiningDataSpecification
* @exception MiningException if an error occurs
*/
public abstract MiningDataSpecification recognize() throws MiningException;
// -----------------------------------------------------------------------
// Methods of cursor positioning
// -----------------------------------------------------------------------
/**
* Resets the cursor from its current position to position before
* the first row. Some mininig sources doesn't support reseting.
*
* @exception MiningException if a mining source access error occurs
*/
public abstract void reset() throws MiningException;
/**
* Moves the cursor down one row from its current position.
* A <code>MiningInputStream</code> cursor is initially positioned
* before the first row; the first call to the method
* <code>next</code> makes the first row the current row; the
* second call makes the second row the current row, and so on.
*
* @return <code>true</code> if the new current row is valid;
* <code>false</code> if there are no more rows
* @exception MiningException if a mining source access error occurs
*/
public abstract boolean next() throws MiningException;
/**
* Moves the cursor to the given row number in
* this <code>MiningInputStream</code> object.
*
* <p>If the row number is positive, the cursor moves to
* the given row number with respect to the beginning of the
* mining input stream. The first row is row 1, the second
* is row 2, and so on.
*
* <p>If the given row number is negative, the cursor moves to
* an absolute row position with respect to the end of the
* mining input stream.
*
* <p>An attempt to position the cursor beyond the first/last row in
* the mining input stream leaves the cursor before the first row or
* after the last row.
*
* @param position new cursor position
* @return <code>true</code> if the cursor is on the mining input stream;
* <code>false</code> otherwise
* @exception MiningException if a mining source access error
* occurs, the row is <code>0</code>, or the MiningInputStream type is
* <code>TYPE_FORWARD_ONLY</code>
*/
public abstract boolean move( int position ) throws MiningException;
// -----------------------------------------------------------------------
// Methods of reading from the stream
// -----------------------------------------------------------------------
/**
* Reads the MiningVector on the current cursor position from the
* input stream.
*
* <p> A subclass must provide an implementation of this method.
*
* @return the MiningVector
* @exception MiningException if an error occurs
*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -