📄 categoricalattribute.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Valentine Stepanenko (valentine.stepanenko@zsoft.ru)
* @author Michael Thess
* @version 1.0
*/
package com.prudsys.pdm.Core;
import java.util.ArrayList;
import java.util.Hashtable;
import com.prudsys.pdm.Adapters.PmmlVersion20.DataField;
import com.prudsys.pdm.Adapters.PmmlVersion20.Taxonomy;
import com.prudsys.pdm.Adapters.PmmlVersion20.Value;
import com.prudsys.pdm.Utils.StringUtils;
/**
* An attribute with discrete values upon which
* performing numeric operations is typically not meaningful. <p>
*
* From CWM Data Mining. <p>
*
* Superclasses:
* <ul>
* <li> MiningAttribute
* </ul>
* Contained Elements:
* <ul>
* <li> Category
* </ul>
* Attributes:
* <ul>
* <li> <i>unboundedCategories</i>: Indicates that the set
* of categories is not fixed apriori. Hence the set of categories
* grows dynamically. Examples are names and identifiers. <br>
* - type: Boolean <br>
* - multiplicity: exactly one
* <li> <i>unstoredCategories</i>: Unbounded categories which are not
* stored at all. Examples are transaction identifiers. <br>
* - type: Boolean <br>
* - multiplicity: exactly one
* <li> <i>key</i>: Key value to reference category. <br>
* - type: Float <br>
* - multiplicity: exactly one
* </ul>
* References:
* <ul>
* <li> <i>taxonomy</i>: References the taxonomy. <br>
* - class: CategoryHierarchy <br>
* - defined by: UsesAsTaxonomy::taxonomy <br>
* - multiplicity: zero or one
* <li> <i>category</i>: References the Category. <br>
* - class: Category <br>
* - defined by: ContainsCategory::category <br>
* - multiplicity: zero or more
* </ul>
* Constraints:
* <ul>
* <li> Category values must be unique.
* </ul>
*
* Remark: Concerning the data types, a non-default data type should only be
* used when it appears in all categories of the attribute. Otherwise, to mixed
* categories the UNDEFINED data type has to applied or the DEFAULT (i.e. STRING)
* data type, since all Categories hava a display value which is a String.
*
* @see Category
* @see CategoryHierarchy
*/
public class CategoricalAttribute extends MiningAttribute
{
// -----------------------------------------------------------------------
// Constants of data type of categorical attribute
// -----------------------------------------------------------------------
/** String (default type). */
public static final int STRING = 0;
/** Double, using Java Double wrapper class. */
public static final int DOUBLE = 10;
/** Float, using Java Float wrapper class.. */
public static final int FLOAT = 11;
/** Integer, using Java Integer wrapper class.. */
public static final int INTEGER = 12;
/** Boolean, using Java Boolean wrapper class. */
public static final int BOOLEAN = 13;
/** Datetime prudsys, using Java Double wrapper class. */
public static final int DATETIME_PRUDSYS = 100;
/** Datetime Unix, using Java Date class. */
public static final int DATETIME_UNIX = 101;
/** Any other class. */
public static final int USER_SPECIFIC = 1000;
// -----------------------------------------------------------------------
// Variables declarations
// -----------------------------------------------------------------------
/** The attribute's values. Represents mapping key -> category. */
protected ArrayList<Category> values = new ArrayList<Category>( 100 );
/** Mapping of values to indexes. Represents mapping category -> key. */
protected Hashtable<Category, Double> values2indexes = new Hashtable<Category, Double>( 100 );
/** The attribute's categories. Not used; values is used instead. */
protected Category category[] = null;
/** Taxonomy of categories, if defined. */
protected CategoryHierarchy taxonomy = null;
/** The number of categories is apriori not known. */
protected boolean unboundedCategories = false;
/** Like unboundedCategories and only with current category stored. */
protected boolean unstoredCategories = false;
//--------------- New for Meta Data Basis Trafos ------------------------
/** Reference to cat. attribute operations object owned by this attribute.*/
protected CategoricalAttributeOperations catAttOp = null;
// -----------------------------------------------------------------------
// Constructors
// -----------------------------------------------------------------------
/**
* Empty constructor.
*/
public CategoricalAttribute()
{
this(null);
}
/**
* Create categorical attribute with given name.
*
* @param name of categorical attribute
*/
public CategoricalAttribute( String name )
{
this.setName(name);
this.catAttOp = new CategoricalAttributeOperations(this);
}
// -----------------------------------------------------------------------
// Getter and setter methods
// -----------------------------------------------------------------------
/**
* Returns list of values (categories) of the attribute.
*
* @return list of values of the attribute
*/
public ArrayList<Category> getValues()
{
return values;
}
/**
* Sets list of values for the categorical attribute.
*
* Internally, keys are assigned to the values. The key-value mappings
* can be accessed via the methods getKey and getCategory.
*
* @param values list of values of the categorical attribute, values should be Category objects
* @throws MiningException if can't set these values
* @see Category
*/
public void setValues( ArrayList<Category> values ) throws MiningException
{
if (values == null)
{
throw new MiningException( "Can't set null values list." );
}
if (unstoredCategories && values.size() > 1)
{
throw new MiningException( "Can't set multiple values for option unstoredCategories." );
}
this.values = values;
values2indexes.clear();
int n = values.size();
for (int i = 0; i < n; i++)
{
values2indexes.put( values.get(i), new Double(i) );
}
}
/**
* Removes all values (categories) of the attribute.
*/
public void removeValues() {
values.clear();
values2indexes.clear();
}
/**
* Returns number of categories, i.e. number of values of the attribute.
*
* @return number of categories of the attribute
*/
public int getCategoriesNumber()
{
return values.size();
}
/**
* Adds new category to list and creates its key.<p>
*
* Attention! For testing if the returned key is a missing value
* do not use <p>
* <code>
* double key = <categorical attribute>.getKey(<category>); <br>
* if (key == Category.MISSING_VALUE) <br>
* ...; <br>
* </code> <p>
* but chose <p>
* <code>
* double key = <categorical attribute>.getKey(<category>); <br>
* if (Category.isMissingValue(key)) <br>
* ...; <br>
* </code> <p>
* instead.
*
* @param category new category object
* @return key of new category, Category.MISSING_VALUE if category is
* already existing for this attribute or is null
* @see Category
*/
public double addCategory( Category category )
{
if ( category == null || values2indexes.containsKey(category) )
return Category.MISSING_VALUE;
if (unstoredCategories) {
if (values.size() > 0) values.remove(0); // faster than clear
values2indexes = new Hashtable<Category, Double>(); // faster than clear()!!
};
int nval = values.size();
values.add(category);
values2indexes.put(category, new Double(nval));
return nval;
}
/**
* Returns key for a given category. <p>
*
* Attention! For testing if the returned key is a missing value
* do not use <p>
* <code>
* double key = < categorical attribute > .getKey(< category > ); <br>
* if (key == Category.MISSING_VALUE) <br>
* ...; <br>
* </code> <p>
* but chose <p>
* <code>
* double key = < categorical attribute > .getKey(< category > ); <br>
* if (Category.isMissingValue(key)) <br>
* ...; <br>
* </code> <p>
* instead.
*
* @param category category for accessing key
* @return key for given category, Category.MISSING_VALUE if no key for
* category found
* @see Category
*/
public double getKey( Category category )
{
if (category == null)
return Category.MISSING_VALUE;
Double index = values2indexes.get(category);
if (index != null)
{
return index.doubleValue();
}
else
{
return Category.MISSING_VALUE;
}
}
/**
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -