⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 filesystemsourcefactory.java

📁 dm s preparing process. In this case we use O distance.
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
		/*
		 * 把文件中的内容转置后形成数据源,后添加的方法
		 * 如果指定了数据源信息,那么就存在信息校验的问题,如果指定的数据源信息与文件实际的信息不一致,那么将按照文件中的情况修改数据源信息
		 */
		private void init2(BufferedReader reader) {
			boolean firstLine = true;
			while (true) {
				String s = readLine(reader);
				if (null == s) {
					break;
				} else if (s.startsWith("#") || s.equals("")) {
					// XXX 添加读曲文件格式的代码
					continue;
				}

				Pattern pattern = Pattern.compile("[" + formatter.getAttrSeperator() + "]+");
				// 对于未转置的矩阵来说,这是一行的所有属性组成的数组
				String[] columns = null;
				try {
					columns = pattern.split(s);
				} catch (RuntimeException exob) {
					throw new RuntimeException(
							"SimpleFileSystemSource is not suitable for current source, there are too many columns in data source file "
									.concat(location));
				}
				int len = columns.length;

				// 对于未转置的矩阵,当前读入第一行,也就是转置后的所有记录的第一个属性
				if (firstLine) {
					for (int i = 0; i < len; i++) {
						RecordImpl record = new RecordImpl();
						Double d = Double.parseDouble(columns[i]);
						if (0 != d.doubleValue()) {
							record.items.put(0, d);
						}
						records.add(new Integer(i), record);
					}
					// 若SourceInfo中包含有效的列信息,且对于未转置的矩阵来说,第一个记录省略了属性
					// 1. row info有效,且rowNum>len
					if (!isRowNull && rowNum > len) {
						// 对于未转置的矩阵来说,把第一个记录省略的属性添加到数据源中
						// 转置阵,添加记录
						while (rowNum > len) {
							records.add(new Integer(len), new RecordImpl());
							len++;
						}
					}
					// 1. info为空,rowNum一定小于len
					// 2. info不空但rowNum < len,数据源信息的colNum给的不准
					else {
						rowNum = len;
					}

					firstLine = false;
				}
				// 添加所有记录的第colNum个属性
				else {
					// 当发现新纪录时记录的时候,以前因为省略记录而没有添加到records中
					while (rowNum < len) {
						Record record = new RecordImpl();
						records.add(new Integer(rowNum++), record);
					}
					for (int i = 0; i < len; i++) {
						Double d = Double.parseDouble(columns[i]);
						if (0 == d.doubleValue()) {
							continue;
						}
						((RecordImpl) records.get(i)).items.put(new Integer(colNum), d);
					}
				}
				if (colNum == Integer.MAX_VALUE)
					throw new RuntimeException("SimpleFileSystemSource is not suitable for current source, "
							+ "there are too many records in data source file ".concat(location));
				colNum++;
			}
			if (!isColNull) {
				if (colNum > sourceInfo.getRecordNum()) {
					log.warn("There are " + colNum + " records in " + location
							+ " which is more than the record number[" + sourceInfo.getRecordNum()
							+ "] u predict in Method createSource or source file. "
							+ "Here, the program simply set the record number to " + colNum + ".[position:"
							+ new Exception().getStackTrace()[0].getLineNumber() + "].");
				} else if (sourceInfo.getRecordNum() > colNum) {
					if (sourceInfo.getRecordNum() > colNum)
						log.warn("File[" + location + "] does not contains enough records. u predicted "
								+ sourceInfo.getRecordNum() + ", but actually there is only " + colNum
								+ ". Here, program simply set the record number to " + colNum + "[position:"
								+ new Exception().getStackTrace()[0].getLineNumber() + "].");
				}
			}
			if (!isRowNull) {
				if (rowNum > sourceInfo.getColumnNum()) {
					log.warn("There are " + rowNum + " columns in " + location
							+ " which is more than the column number[" + sourceInfo.getColumnNum()
							+ "] u predict in Method createSource or source file."
							+ "Here, the program simply set the column number to " + rowNum + ".[position:"
							+ new Exception().getStackTrace()[0].getLineNumber() + "].");
				}
				// 如果指定的属性个数>实际的属性个数,即使数据源文件中没有这些属性的值,也要创建这些属性,认为可能这些属性都是0而被省略了
				// 对于转置矩阵来说,就是即使没有设置创建全是0的行
			}
		}

		/**
		 * 从数据源中获取一个记录
		 */
		public Record getRecord(int index) {
			return records.get(index);
		}

		public Iterator<Record> iterator() {
			return records.iterator();
		}

		@Override
		public String toString() {
			CharArrayWriter cWriter = new CharArrayWriter();
			PrintWriter out = new PrintWriter(cWriter);
			out.print("There are ");
			out.print(rowNum);
			out.print(" records in file system source.Each record has ");
			out.print(colNum);
			out.println(" attributes.");
			Iterator<Record> iter = records.iterator();
			int count = 0;
			while(iter.hasNext()) {
				out.println();
				out.print("Record ");
				out.print(count++);
				out.println(" :");
				Record record = iter.next();
				for(int i=0;i<colNum;i++) {
					out.printf("%.3f\t",record.getItem(i));
				}
				out.println();
			}
			out.close();
			cWriter.close();
			return cWriter.toString();
		}

		/**
		 * 记录的实现,忽略0元素,这里把0硬编码,认为是有道理的 如果认为不合理,可以添加一个default value
		 * RecordImpl(String record)供不转置时创建记录使用;RecordImpl()供转置时创建记录使用
		 * 
		 * @author Avon
		 */
		private class RecordImpl implements Record {
			private static final long serialVersionUID = 2910079439135958308L;

			// dvalue就是稀疏阵中大量存在的值
			HashMap<Integer, Double> items;

			RecordImpl() {
				items = new HashMap<Integer, Double>();
			}

			// 如果不是转置情况,使用这个构造函数
			RecordImpl(String record) {
				/*
				 * initCapacity=8 loadfactory=.9 is suitable for current case
				 * initCapacity is pow of 2
				 */
				items = new HashMap<Integer, Double>(8, 0.9F);
				Pattern pattern = Pattern.compile("[" + formatter.getAttrSeperator() + "]+");
				String[] colums = null;
				try {
					colums = pattern.split(record);
				} catch (RuntimeException exob) {
					throw new RuntimeException(
							"SimpleFileSystemSource is not suitable for current source, there are too many columns in data source file "
									.concat(location).concat("of record").concat(new Integer(rowNum).toString()));
				}
				int i = 0;
				for (String item : colums) {
					if (item.equals("0")) {
						i++;
						continue;
					}
					items.put(i++, Double.parseDouble(item));
				}
				// 如果没有初始化数据源对象的"属性个数"属性
				if (isColNull) {
					/*
					 * 取文件中包含列最多的一行的列数作为数据源的列数,这里允许省略一行记录中结尾的0, 而且适当的省略可以提高速度
					 */
					colNum = (i > colNum) ? i : colNum;
				} else {
					/*
					 * 如果指定了数据源的属性个数,那么这个个数要比文件中第recodrNum+1个记录的属性个数要少。
					 * 允许文件中某个记录的属性个数少于指定的个数,少的内容都是默认值,所有的非默认值都不能省略
					 */
					if (i > colNum)
						colNum = i;
				}
			}

			public double[] getAsArray() {
				double[] doubles = new double[(int) sourceInfo.getColumnNum()];
				for (int i = 0; i < doubles.length; i++) {
					doubles[i] = 0;
				}
				Iterator<Integer> key = items.keySet().iterator();
				while (key.hasNext()) {
					int col = key.next();
					double value = items.get(col);
					doubles[col] = value;
				}
				return doubles;
			}

			public double getItem(long column) {
				if (column > sourceInfo.getColumnNum())
					throw new RuntimeException("There are only " + sourceInfo.getColumnNum() + " columns. The column "
							+ column + " u access does not exist.");
				int col = (int) column;
				if (items.containsKey(col))
					return items.get(col);
				else
					return 0;
			}

		}

	}

	/*
	 * 所谓ComplexFileSystemSourceImpl,就是指把文件系统中的内容包装成记录,形成数据源,这个数据源
	 * 中的所有记录不是全部在内存中,这主要是由于记录太多的原因,要考虑的内存使用策略 @author Avon
	 */
	@SuppressWarnings("unused")
	private abstract class ComplexFileSystemSourceImpl extends FileSystemSource {

		public ComplexFileSystemSourceImpl(String location, Formatter formatter, SourceInfo sourceInfo,
				boolean transposable) {
			super(location, formatter, sourceInfo, transposable);
		}
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -