⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 categoricalattribute.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

 /**
  * Title: XELOPES Data Mining Library
  * Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
  * Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
  * Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
  * @author Valentine Stepanenko (valentine.stepanenko@zsoft.ru)
  * @author Michael Thess
  * @version 1.0
  */

package com.prudsys.pdm.Core;

import java.util.ArrayList;
import java.util.Hashtable;

import com.prudsys.pdm.Adapters.PmmlVersion20.DataField;
import com.prudsys.pdm.Adapters.PmmlVersion20.Taxonomy;
import com.prudsys.pdm.Adapters.PmmlVersion20.Value;
import com.prudsys.pdm.Utils.StringUtils;

 /**
  * An attribute with discrete values upon which
  * performing numeric operations is typically not meaningful. <p>
  *
  * From CWM Data Mining. <p>
  *
  * Superclasses:
  * <ul>
  *   <li> MiningAttribute
  * </ul>
  * Contained Elements:
  * <ul>
  *   <li> Category
  * </ul>
  * Attributes:
  * <ul>
  *   <li> <i>unboundedCategories</i>: Indicates that the set
  *   of categories is not fixed apriori. Hence the set of categories
  *   grows dynamically. Examples are names and identifiers. <br>
  *     - type: Boolean <br>
  *     - multiplicity: exactly one
  *   <li> <i>unstoredCategories</i>: Unbounded categories which are not
  *   stored at all. Examples are transaction identifiers. <br>
  *     - type: Boolean <br>
  *     - multiplicity: exactly one
  *   <li> <i>key</i>: Key value to reference category. <br>
  *     - type: Float <br>
  *     - multiplicity: exactly one
  * </ul>
  * References:
  * <ul>
  *   <li> <i>taxonomy</i>: References the taxonomy. <br>
  *     - class: CategoryHierarchy <br>
  *     - defined by: UsesAsTaxonomy::taxonomy <br>
  *     - multiplicity: zero or one
  *   <li> <i>category</i>: References the Category. <br>
  *     - class: Category <br>
  *     - defined by: ContainsCategory::category <br>
  *     - multiplicity: zero or more
  * </ul>
  * Constraints:
  * <ul>
  *   <li> Category values must be unique.
  * </ul>
  *
  * Remark: Concerning the data types, a non-default data type should only be
  * used when it appears in all categories of the attribute. Otherwise, to mixed
  * categories the UNDEFINED data type has to applied or the DEFAULT (i.e. STRING)
  * data type, since all Categories hava a display value which is a String.
  *
  * @see Category
  * @see CategoryHierarchy
  */
public class CategoricalAttribute extends MiningAttribute
{
    // -----------------------------------------------------------------------
    //  Constants of data type of categorical attribute
    // -----------------------------------------------------------------------
    /** String (default type). */
    public static final int STRING = 0;

    /** Double, using Java Double wrapper class. */
    public static final int DOUBLE = 10;

    /** Float, using Java Float wrapper class.. */
    public static final int FLOAT = 11;

    /** Integer, using Java Integer wrapper class.. */
    public static final int INTEGER = 12;

    /** Boolean, using Java Boolean wrapper class. */
    public static final int BOOLEAN = 13;

    /** Datetime prudsys, using Java Double wrapper class.  */
    public static final int DATETIME_PRUDSYS = 100;

    /** Datetime Unix, using Java Date class. */
    public static final int DATETIME_UNIX = 101;

    /** Any other class. */
    public static final int USER_SPECIFIC = 1000;

    // -----------------------------------------------------------------------
    //  Variables declarations
    // -----------------------------------------------------------------------
    /** The attribute's values. Represents mapping key -> category. */
    protected ArrayList<Category> values = new ArrayList<Category>( 100 );

    /** Mapping of values to indexes. Represents mapping category -> key. */
    protected Hashtable<Category, Double> values2indexes = new Hashtable<Category, Double>( 100 );

    /** The attribute's categories. Not used; values is used instead. */
    protected Category category[] = null;

    /** Taxonomy of categories, if defined. */
    protected CategoryHierarchy taxonomy = null;

    /** The number of categories is apriori not known. */
    protected boolean unboundedCategories = false;

    /** Like unboundedCategories and only with current category stored. */
    protected boolean unstoredCategories = false;

    //--------------- New for Meta Data Basis Trafos ------------------------
    /** Reference to cat. attribute operations object owned by this attribute.*/
    protected CategoricalAttributeOperations catAttOp = null;

    // -----------------------------------------------------------------------
    //  Constructors
    // -----------------------------------------------------------------------
    /**
     * Empty constructor.
     */
    public CategoricalAttribute()
    {
      this(null);
    }

    /**
     * Create categorical attribute with given name.
     *
     * @param name of categorical attribute
     */
    public CategoricalAttribute( String name )
    {
      this.setName(name);
      this.catAttOp = new CategoricalAttributeOperations(this);
    }

    // -----------------------------------------------------------------------
    //  Getter and setter methods
    // -----------------------------------------------------------------------
    /**
     * Returns list of values (categories) of the attribute.
     *
     * @return list of values of the attribute
     */
    public ArrayList<Category> getValues()
    {
        return values;
    }

    /**
     * Sets list of values for the categorical attribute.
     *
     * Internally, keys are assigned to the values. The key-value mappings
     * can be accessed via the methods getKey and getCategory.
     *
     * @param values list of values of the categorical attribute, values should be Category objects
     * @throws MiningException if can't set these values
     * @see Category
     */
    public void setValues( ArrayList<Category> values ) throws MiningException
    {
        if (values == null)
        {
            throw new MiningException( "Can't set null values list." );
        }
        if (unstoredCategories && values.size() > 1)
        {
            throw new MiningException( "Can't set multiple values for option unstoredCategories." );
        }
        this.values = values;

        values2indexes.clear();
        int n = values.size();
        for (int i = 0; i < n; i++)
        {
            values2indexes.put( values.get(i), new Double(i) );
        }
    }

    /**
     * Removes all values (categories) of the attribute.
     */
    public void removeValues() {

        values.clear();
        values2indexes.clear();
    }

    /**
     * Returns number of categories, i.e. number of values of the attribute.
     *
     * @return number of categories of the attribute
     */
    public int getCategoriesNumber()
    {
        return values.size();
    }

   /**
    * Adds new category to list and creates its key.<p>
    *
    * Attention! For testing if the returned key is a missing value
    * do not use <p>
    * <code>
    * double key = &ltcategorical attribute&gt.getKey(&ltcategory&gt); <br>
    * if (key == Category.MISSING_VALUE) <br>
    *   ...; <br>
    * </code> <p>
    * but chose <p>
    * <code>
    * double key = &ltcategorical attribute&gt.getKey(&ltcategory&gt); <br>
    * if (Category.isMissingValue(key)) <br>
    *   ...; <br>
    * </code> <p>
    * instead.
    *
    * @param category new category object
    * @return key of new category, Category.MISSING_VALUE if category is
    * already existing for this attribute or is null
    * @see Category
    */
   public double addCategory( Category category )
   {
        if ( category == null || values2indexes.containsKey(category) )
          return Category.MISSING_VALUE;

        if (unstoredCategories) {
          if (values.size() > 0) values.remove(0); // faster than clear
          values2indexes = new Hashtable<Category, Double>();        // faster than clear()!!
        };

        int nval = values.size();
        values.add(category);
        values2indexes.put(category, new Double(nval));

        return nval;
   }

    /**
     * Returns key for a given category. <p>
     *
     * Attention! For testing if the returned key is a missing value
     * do not use <p>
     * <code>
     * double key = &lt categorical attribute &gt .getKey(&lt category &gt ); <br>
     * if (key == Category.MISSING_VALUE) <br>
     *   ...; <br>
     * </code> <p>
     * but chose <p>
     * <code>
     * double key = &lt categorical attribute &gt .getKey(&lt category &gt ); <br>
     * if (Category.isMissingValue(key)) <br>
     *   ...; <br>
     * </code> <p>
     * instead.
     *
     * @param category category for accessing key
     * @return key for given category, Category.MISSING_VALUE if no key for
     * category found
     * @see Category
     */
    public double getKey( Category category )
    {
        if (category == null)
            return Category.MISSING_VALUE;

        Double index = values2indexes.get(category);
        if (index != null)
        {
            return index.doubleValue();
        }
        else
        {
            return Category.MISSING_VALUE;
        }
    }

    /**

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -