📄 transactionaltonontransactional.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Michael Thess
* @version 1.1
*/
package com.prudsys.pdm.Transform.Special;
import java.util.Vector;
import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.MiningAttribute;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.NumericAttribute;
import com.prudsys.pdm.Input.MiningInputStream;
import com.prudsys.pdm.Input.MiningSparseVector;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Transform.MiningStreamTransformer;
/**
* Transforms a mining input stream from transactional into non-transactional
* format.
*/
public class TransactionalToNontransactional implements MiningStreamTransformer
{
// -----------------------------------------------------------------------
// Variables declarations
// -----------------------------------------------------------------------
/** Default name of transaction attribute. */
private String vectorNrIdAttName = "vectorNrId";
/** Default name of position attribute. */
private String attributeIndexAttName = "attributeIndex";
/** Default name of item attribute. */
private String attributeValueAttName = "attributeValue";
// -----------------------------------------------------------------------
// Constructor
// -----------------------------------------------------------------------
/**
* Empty constructor.
*/
public TransactionalToNontransactional()
{
}
// -----------------------------------------------------------------------
// Getter and setter methods
// -----------------------------------------------------------------------
/**
* Returns name of vector number ID attribute.
*
* @return name of vector number ID atribute
*/
public String getVectorNrIdAttName()
{
return vectorNrIdAttName;
}
/**
* Sets name of vector number ID attribute.
*
* @param vectorNrIdAttName new name of vector number ID attribute
*/
public void setVectorNrIdAttName(String vectorNrIdAttName)
{
this.vectorNrIdAttName = vectorNrIdAttName;
}
/**
* Returns name of attribute index attribute.
*
* @return name of attribute index attribute
*/
public String getAttributeIndexAttName()
{
return attributeIndexAttName;
}
/**
* Sets name of attribute index attribute.
*
* @param attributeIndexAttName new name attribute index attribute
*/
public void setAttributeIndexAttName(String attributeIndexAttName)
{
this.attributeIndexAttName = attributeIndexAttName;
}
/**
* Returns name of attribute value attribute.
*
* @return name of attribute value attribute
*/
public String getAttributeValueAttName()
{
return attributeValueAttName;
}
/**
* Sets name of attribute value attribute.
*
* @param attributeValueAttName new name of attribute value attribute
*/
public void setAttributeValueAttName(String attributeValueAttName)
{
this.attributeValueAttName = attributeValueAttName;
}
// -----------------------------------------------------------------------
// Transformation methods
// -----------------------------------------------------------------------
/**
* Transforms transactional into non-transactional mining input stream.
* The targetStream must contain an updatable mining stream.
*
* @param sourceStream mining stream used as source to transformation
* @param targetStream mining strema used as target of transformation
* @return number of vectors of transformation
* @exception MiningException can't transform stream
*/
public int transform( MiningInputStream sourceStream, MiningInputStream targetStream )
throws MiningException {
// Transform metadata:
MiningDataSpecification metaDataSource = sourceStream.getMetaData();
MiningAttribute vectorNrId = metaDataSource.getMiningAttribute(vectorNrIdAttName);
MiningAttribute attributeValue = metaDataSource.getMiningAttribute(attributeValueAttName);
MiningAttribute attributeIndex = metaDataSource.getMiningAttribute(attributeIndexAttName);
if (vectorNrId == null)
throw new MiningException ("no valid attribute of transaction ID defined");
if (attributeValue == null)
throw new MiningException ("no valid attribute of values defined");
if (! (attributeIndex instanceof NumericAttribute) )
attributeIndex = null;
MiningDataSpecification targetMetaData = new MiningDataSpecification();
targetMetaData.setRelationName( "Non-Transactional: " + metaDataSource.getRelationName() );
// First scan to transform meta data:
sourceStream.reset();
Vector targetAttributes = new Vector();
while ( sourceStream.next() ) {
// Read transactional vector:
MiningVector mv = sourceStream.read();
// Get transaction ID:
double key = mv.getValue(vectorNrId);
if ( Category.isMissingValue(key) )
continue; // missing transaction ID => ignore line
Category cat = ( (CategoricalAttribute)vectorNrId ).getCategory(key);
String transId = cat.toString();
// Get attribute value:
key = mv.getValue(attributeValue);
Category catValue = ( (CategoricalAttribute)attributeValue ).getCategory(key);
// Get value index:
int index = (int) mv.getValue(attributeIndex);
// Add new attribute if out of bounds:
if (index > targetAttributes.size()-1) {
for (int i = targetAttributes.size(); i < index+1; i++)
targetAttributes.addElement(
new CategoricalAttribute("att_" + String.valueOf(i)) );
};
CategoricalAttribute catAtt = (CategoricalAttribute) targetAttributes.elementAt(index);
if (catValue != null) {
key = catAtt.getKey(catValue);
if (Category.isMissingValue(key))
catAtt.addCategory(catValue);
};
};
for (int i = 0; i < targetAttributes.size(); i++)
targetMetaData.addMiningAttribute( (MiningAttribute) targetAttributes.elementAt(i) );
targetStream.updateSetMetaData( targetMetaData );
// Second scan to transform data:
int nVec = 0;
sourceStream.reset();
boolean firstScan = true;
String oldTransId = "";
Vector values = new Vector();
Vector indexes = new Vector();
while ( sourceStream.next() ) {
// Read transactional vector:
MiningVector mv = sourceStream.read();
// Get transaction ID:
double key = mv.getValue(vectorNrId);
if ( Category.isMissingValue(key) )
continue; // missing transaction ID => ignore line
Category cat = ( (CategoricalAttribute)vectorNrId ).getCategory(key);
String transId = cat.toString();
// Get attribute value:
key = mv.getValue(attributeValue);
cat = ( (CategoricalAttribute)attributeValue ).getCategory(key);
// Get value index:
int index = (int) mv.getValue(attributeIndex);
if (firstScan) {
oldTransId = transId;
firstScan = false;
};
// New transaction => write old transaction to stream:
if (! transId.equals(oldTransId)) {
int[] ind = new int[indexes.size()];
for (int i = 0; i < indexes.size(); i++)
ind[i] = ((Integer) indexes.elementAt(i)).intValue();
double[] val = new double[values.size()];
for (int i = 0; i < values.size(); i++) {
double key2 = ((Double) values.elementAt(i)).doubleValue();
if ( ! Category.isMissingValue(key2) ) {
cat = ( (CategoricalAttribute)attributeValue ).getCategory(key2);
key2 = ( (CategoricalAttribute)targetMetaData.getMiningAttribute( ind[i] )).getKey(cat);
};
val[i] = key2;
};
MiningSparseVector msp = new MiningSparseVector(1.0, val, ind);
msp.setMetaData(targetMetaData);
targetStream.updateAppendVector(msp);
nVec = nVec + 1;
values.removeAllElements();
indexes.removeAllElements();
oldTransId = transId;
};
indexes.addElement( new Integer(index) );
values.addElement( new Double(key) );
};
// Write last transaction to stream:
int[] ind = new int[indexes.size()];
for (int i = 0; i < indexes.size(); i++)
ind[i] = ((Integer) indexes.elementAt(i)).intValue();
double[] val = new double[values.size()];
for (int i = 0; i < values.size(); i++) {
double key2 = ((Double) values.elementAt(i)).doubleValue();
if ( ! Category.isMissingValue(key2) ) {
Category cat = ( (CategoricalAttribute)attributeValue ).getCategory(key2);
key2 = ( (CategoricalAttribute)targetMetaData.getMiningAttribute( ind[i] )).getKey(cat);
};
val[i] = key2;
};
MiningSparseVector msp = new MiningSparseVector(1.0, val, ind);
msp.setMetaData(targetMetaData);
targetStream.updateAppendVector(msp);
nVec = nVec + 1;
return 0;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -