⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 miningexcelstream.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:

					}

					// Row back to first row to insert data
					rows = sheet.rowIterator();
					firstRow = (HSSFRow) rows.next();
					cells = firstRow.cellIterator();
				}
			}
			// Add back all column names and data types
			MiningAttribute attribute;
			for (int k = 0; k < colName.size(); k += 2) {
				int icell_key = ((Integer) colName.get(k)).intValue();
				String fieldname = (String) colName.get(k + 1);
				if (colType.containsKey(new Integer(icell_key))) {
					Integer type = (Integer) colType
							.get(new Integer(icell_key));
					if (type.intValue() == HSSFCell.CELL_TYPE_NUMERIC)
						attribute = new NumericAttribute();
					else {
						attribute = new CategoricalAttribute();
						((CategoricalAttribute) attribute)
								.setUnboundedCategories(true);

					}
				} else {
					attribute = new CategoricalAttribute();
					((CategoricalAttribute) attribute)
							.setUnboundedCategories(true);
				}

				attribute.setName(fieldname);
				header.addElement(attribute);

			}
			//****************************

			// Calculate inverse mapping:
			int ncell = cmax + 1;
			int[] ihInv = new int[ncell];
			for (int j = 0; j < ncell; j++)
				ihInv[j] = -1;
			for (int j = 0; j < ih.size(); j++)
				ihInv[ih.IntegerAt(j)] = j;

			// Add attributes to meta data:
			for (int j = 0; j < ncell; j++) {
				int icell = ihInv[j];
//				System.out.println("icell = " + icell);
				if (icell > -1) {
					metaData.addMiningAttribute((MiningAttribute) header
							.elementAt(icell));
				}
			}

			// Now read data matrix:
			int nAtt = metaData.getAttributesNumber();
			Vector data = new Vector();
			while (rows.hasNext()) {
				HSSFRow row = (HSSFRow) rows.next();
				cells = row.cellIterator();

//				Vector objects = new Vector();
				Vector objects = new Vector(cmax+1);
				for(int i =0; i<=cmax; i++){
				    objects.add(i, new Double(Category.MISSING_VALUE));
				}
				int lastcell = 0;

				while (cells.hasNext()) {

					HSSFCell cell = (HSSFCell) cells.next();
					int icell = cell.getCellNum();
					int index =-1;
					if (icell < 0)
						throw new MiningException("illegal index of cell");

					//<< added by Joyce 2005/02/25
					//<<modified by tyleung 2005/4/8
					index = mycellidex.indexOf(new Integer(icell));
					if (mycellidex.contains(new Integer(icell)))
					{
					    
//					    
//						if (icell != ((Integer) mycellidex.get(lastcell)).intValue()) 
//						{
//							int last = mycellidex.indexOf(new Integer(icell));
//							for (int i = lastcell; i < last; i++) 
//							{
//								objects.addElement(new Double(Category.MISSING_VALUE));
//							}
//							lastcell = last;
//						}
//						lastcell++;

					//>>

						if (cell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) 
						{
						  //  double temp = cell.getNumericCellValue();
							if (cell.getCellType() == HSSFCell.CELL_TYPE_ERROR
									|| cell.getCellType() == HSSFCell.CELL_TYPE_BLANK) 
							{
//							   	objects.addElement(new Double(Category.MISSING_VALUE));
							    objects.set(icell, new Double(Category.MISSING_VALUE));
							}else 
							{
								// Added by Kelvin Jor to get DateCellValue
								Integer type = (Integer) colType.get(new Integer(cell.getCellNum()));
								if (type.intValue() == Constants.CELL_TYPE_DATE)
								{
//									objects.addElement(cell.getDateCellValue());
									objects.set(icell, cell.getDateCellValue());
								}else
								{
//									objects.addElement(new Double(cell
//											.getNumericCellValue()));
									objects.set(icell, new Double(cell
											.getNumericCellValue()));
								}
							}
						} else 
						{
//						    String temp = cell.getStringCellValue();
							if (cell.getCellType() == HSSFCell.CELL_TYPE_ERROR
							//<<tyleung 30/3/2005 to accept space as missing values
									|| cell.getCellType() == HSSFCell.CELL_TYPE_BLANK
									|| cell.getStringCellValue().trim().equals("")) {
//							   			objects.addElement(new Double(Category.MISSING_VALUE));
							    		objects.set(icell, new Double(Category.MISSING_VALUE));
							    //tyleung 30/3/2005
							} else if (cell.getCellType() == HSSFCell.CELL_TYPE_BOOLEAN){
//								objects.addElement(new Boolean(cell.getBooleanCellValue()));
							    objects.set(icell, new Boolean(cell.getBooleanCellValue()));
							    							    
							} else {
//						        objects.addElement(cell.getStringCellValue());
							    objects.set(icell, cell.getStringCellValue());
							}
	
						}
					}else
					{
						// If the cell exists outside the index, just skip this cell
					}
				}
				//				<< added by Joyce 2005/02/25 for remaining missing values
//				for (int i = lastcell; i < mycellidex.size(); i++) 
//				{
//					objects.addElement(new Double(Category.MISSING_VALUE));
//					
//				}
				//>>>

				// Add data to row:
//				Object[] rowArr = new Object[nAtt];
//				for (int j = 0; j < nAtt; j++) {
//					int icell = ihInv[j];
//					if (icell > -1 && icell < objects.size())
//						rowArr[j] = objects.elementAt(icell);
//					else
//						rowArr[j] = "";
//				}
//				;
//				data.addElement(rowArr);
				data.addElement(objects.toArray());
			}
			;
			//tyleung 2005/4/8>>
			
			// Create object array:
			int nVec = data.size();
			Object[][] mObArr = new Object[nVec][nAtt];
			for (int i = 0; i < nVec; i++) {
				Object[] rowArr = (Object[]) data.elementAt(i);
				for (int j = 0; j < nAtt; j++)
					mObArr[i][j] = rowArr[j];
			}
			;

			// Create data table:
			dataTable = new MiningArrayStream(mObArr, metaData);
		}
		updateCategoricalAttrsType();
		return metaData;
	}

	  //<<Frank J. Xu, 16/02/2005
	  // determine if a categorical attribute is bounded 
	  //Add method to reset the categorical attributes' type of csv stream.
	  public void updateCategoricalAttrsType()throws MiningException{
	  	MiningVector mv;
	  	reset();
	  	MiningDataSpecification metaData = this.getMetaData();
	  	MiningAttribute[] attrs = metaData.getAttributesArray();
	  	Vector categoricalAttrs = new Vector();
	  	
	  	for(int i = 0; i < attrs.length; i++){
	  		if(attrs[i] instanceof CategoricalAttribute){
	  			categoricalAttrs.add(attrs[i]);
	  		}
	  	}
	  	
	  	if(categoricalAttrs.size() > 0){
	  	  	Vector[] categoricalVal = new Vector[categoricalAttrs.size()];
	  	    try{
	  	    	while (this.next() ) {
	  	    		mv = this.read();
	  	    		for(int j = 0; j < categoricalAttrs.size(); j++){
	  	        		Category catObj = mv.getValueCategory((CategoricalAttribute)(categoricalAttrs.get(j)));
	  	        		//add distinct value.
	  	        		if(null == categoricalVal[j]){
	  	        			categoricalVal[j] = new Vector();
	  	        			if(catObj != null)
	  	        				categoricalVal[j].add(catObj);  	        			
	  	        		}  	        			
	  	        		else{
	  	        			//<<13/04/2005, Frank J. Xu	  	        			
	  	        			/*
	  	        			int k = 0;
	  	        			for(k = 0; k < categoricalVal[j].size(); k++){
	  	        				if(null == catObj)
	  	        					break;
	  	        				else if(categoricalVal[j].get(k).equals(catObj))
	  	        					break;
	  	        			}
	  	        			if((k == categoricalVal[j].size()) && (catObj != null))
	  	        				categoricalVal[j].add(catObj);
	  	        			*/
	  	        			//The order of categorical value affects the correctness of 
	  	        			//assessment operations, so adjust the order from the 
	  	        			//data importing operations.  	        			
	  	        			insertCatValByOrder(categoricalVal[j], catObj);
	  	        			//>>13/04/2005, Frank J. Xu	  	        			
	  	        		}
	  	    		}
	  	    	} 
	  	    	
	  	    	//update categorical attributes type based on the threshold.
	  	    	for(int j = 0; j < categoricalAttrs.size(); j++)
	  	    	{
  	    			// If there are no data records, categoricalVal[j] could be null
  	    			if((categoricalVal[j]!=null) && (categoricalVal[j].size() > 0) && (categoricalVal[j].size() < MiningInputStream.CATEGORICAL_ATTRIBUTE_BOUND) && (categoricalAttrs.get(j)!=null))
  	    			{
  	    				((CategoricalAttribute)(categoricalAttrs.get(j))).setUnboundedCategories(false);
  	    			}
  	    			ArrayList catVals = new ArrayList();
  	    			for(int k = 0; k < categoricalVal[j].size(); k++){
  	    				catVals.add(categoricalVal[j].get(k));
  	    			}
  	    			((CategoricalAttribute)(categoricalAttrs.get(j))).setValues(catVals);	  	    			
	  	    	}
	  	    	
	  	    	//update attributes array.
	  	    	int categoricalAttrsIndex = 0;
	  	    	for(int i = 0; i < attrs.length; i++){
	  	    		if(attrs[i] instanceof CategoricalAttribute){
	  	    			attrs[i] = (MiningAttribute) categoricalAttrs.get(categoricalAttrsIndex);
	  	    			categoricalAttrsIndex++;
	  	    		}
	  	    	}
	  	    	//update metadata.
	  	    	metaData.setAttributesArray(attrs);
	  	    	this.metaData = metaData;
	  	    }
	  	    catch (MiningException ex){
	  	    	ex.printStackTrace();
	  	    }  	  	
	  	}
	  }

	  private void insertCatValByOrder(Vector categoricalVal, Category catObj)
	  {
	  	int k = 0;
	  	int insertIndex = -1;  	
	  	if(catObj != null)
	  	{
	  		String srcDisplayName = catObj.getDisplayValue();
	  	  	for(k = 0; k < categoricalVal.size(); k++)
	  	  	{
	  	  		if(categoricalVal.get(k).equals(catObj))
	  	  			break;
	  	  		else
	  	  		{
	  	  			String tarDisplayName = ((Category)categoricalVal.get(k)).getDisplayValue();
	  	  			if(srcDisplayName.compareTo(tarDisplayName) < 0){
	  	  				insertIndex = k;
	  	  				break;
	  	  			}  				  			
	  	  		}
	  	  	}
	  	  	
	  	  	if(k == categoricalVal.size())
	  	  		insertIndex = k;
	  	  	
	  	  	if((insertIndex >=0))
	  	  	{
	  	  		categoricalVal.insertElementAt(catObj, insertIndex);
	  	  	}  		
	  	  }  	  	  
	  	}	  	  
	  //Frank J. Xu, 16/02/2005>>

	// -----------------------------------------------------------------------
	//  Methods of cursor positioning
	// -----------------------------------------------------------------------
	/**
	 * Places the cursor before first row.
	 * 
	 * @exception MiningException
	 *                couldn't reset cursor
	 */
	public void reset() throws MiningException {
		dataTable.reset();
	}

	/**
	 * Advance cursor by one position.
	 * 
	 * @return true if next vector exists, else false
	 * @exception MiningException
	 *                couldn't advance cursor
	 */
	public boolean next() throws MiningException {
		return dataTable.next();
	}

	/**
	 * Move cursor to given position.
	 * 
	 * @param position
	 *            new position of the cursor
	 * @return true if cursor could be positioned, false if not
	 * @exception MiningException
	 *                could not move cursor
	 */
	public boolean move(int position) throws MiningException {
		return dataTable.move(position);
	}

	// -----------------------------------------------------------------------
	//  Methods of reading from the stream
	// -----------------------------------------------------------------------
	/**
	 * Reads current data vector.
	 * 
	 * @return data vector at current cursor position
	 * @exception MiningException
	 *                couldn't read mining vector
	 */
	public MiningVector read() throws MiningException {
		return dataTable.read();
	}

	//  -----------------------------------------------------------------------
	//  Methods of getting num of sheets
	// -----------------------------------------------------------------------
	public int getNumberOfSheets() {
		return wb.getNumberOfSheets();
	}

	//  -----------------------------------------------------------------------
	//  Methods of getting sheet name
	// -----------------------------------------------------------------------
	public String getSheetName(int a_index) {
		return wb.getSheetName(a_index);
	}

	//  -----------------------------------------------------------------------
	//  Methods of set reading from the sheet at
	// -----------------------------------------------------------------------
	public void setSheetAt(int a_index) {
		sheet = wb.getSheetAt(a_index);
	}

	// -----------------------------------------------------------------------
	//  Test
	// -----------------------------------------------------------------------
	/**
	 * Test of Excel stream.
	 * 
	 * @param args
	 *            arguments (ignored)
	 */
	public static void main(String[] args) {
		try {
			MiningExcelStream mas = new MiningExcelStream(
					"data/excel/vowel.xls", null);
			System.out.println(mas);
		} catch (Exception ex) {
			ex.printStackTrace();
		}
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -