📄 filesystemsourcefactory.java
字号:
/*
* 把文件中的内容转置后形成数据源,后添加的方法
* 如果指定了数据源信息,那么就存在信息校验的问题,如果指定的数据源信息与文件实际的信息不一致,那么将按照文件中的情况修改数据源信息
*/
private void init2(BufferedReader reader) {
boolean firstLine = true;
while (true) {
String s = readLine(reader);
if (null == s) {
break;
} else if (s.startsWith("#") || s.equals("")) {
// XXX 添加读曲文件格式的代码
continue;
}
Pattern pattern = Pattern.compile("[" + formatter.getAttrSeperator() + "]+");
// 对于未转置的矩阵来说,这是一行的所有属性组成的数组
String[] columns = null;
try {
columns = pattern.split(s);
} catch (RuntimeException exob) {
throw new RuntimeException(
"SimpleFileSystemSource is not suitable for current source, there are too many columns in data source file "
.concat(location));
}
int len = columns.length;
// 对于未转置的矩阵,当前读入第一行,也就是转置后的所有记录的第一个属性
if (firstLine) {
for (int i = 0; i < len; i++) {
RecordImpl record = new RecordImpl();
Double d = Double.parseDouble(columns[i]);
if (0 != d.doubleValue()) {
record.items.put(0, d);
}
records.add(new Integer(i), record);
}
// 若SourceInfo中包含有效的列信息,且对于未转置的矩阵来说,第一个记录省略了属性
// 1. row info有效,且rowNum>len
if (!isRowNull && rowNum > len) {
// 对于未转置的矩阵来说,把第一个记录省略的属性添加到数据源中
// 转置阵,添加记录
while (rowNum > len) {
records.add(new Integer(len), new RecordImpl());
len++;
}
}
// 1. info为空,rowNum一定小于len
// 2. info不空但rowNum < len,数据源信息的colNum给的不准
else {
rowNum = len;
}
firstLine = false;
}
// 添加所有记录的第colNum个属性
else {
// 当发现新纪录时记录的时候,以前因为省略记录而没有添加到records中
while (rowNum < len) {
Record record = new RecordImpl();
records.add(new Integer(rowNum++), record);
}
for (int i = 0; i < len; i++) {
Double d = Double.parseDouble(columns[i]);
if (0 == d.doubleValue()) {
continue;
}
((RecordImpl) records.get(i)).items.put(new Integer(colNum), d);
}
}
if (colNum == Integer.MAX_VALUE)
throw new RuntimeException("SimpleFileSystemSource is not suitable for current source, "
+ "there are too many records in data source file ".concat(location));
colNum++;
}
if (!isColNull) {
if (colNum > sourceInfo.getRecordNum()) {
log.warn("There are " + colNum + " records in " + location
+ " which is more than the record number[" + sourceInfo.getRecordNum()
+ "] u predict in Method createSource or source file. "
+ "Here, the program simply set the record number to " + colNum + ".[position:"
+ new Exception().getStackTrace()[0].getLineNumber() + "].");
} else if (sourceInfo.getRecordNum() > colNum) {
if (sourceInfo.getRecordNum() > colNum)
log.warn("File[" + location + "] does not contains enough records. u predicted "
+ sourceInfo.getRecordNum() + ", but actually there is only " + colNum
+ ". Here, program simply set the record number to " + colNum + "[position:"
+ new Exception().getStackTrace()[0].getLineNumber() + "].");
}
}
if (!isRowNull) {
if (rowNum > sourceInfo.getColumnNum()) {
log.warn("There are " + rowNum + " columns in " + location
+ " which is more than the column number[" + sourceInfo.getColumnNum()
+ "] u predict in Method createSource or source file."
+ "Here, the program simply set the column number to " + rowNum + ".[position:"
+ new Exception().getStackTrace()[0].getLineNumber() + "].");
}
// 如果指定的属性个数>实际的属性个数,即使数据源文件中没有这些属性的值,也要创建这些属性,认为可能这些属性都是0而被省略了
// 对于转置矩阵来说,就是即使没有设置创建全是0的行
}
}
/**
* 从数据源中获取一个记录
*/
public Record getRecord(int index) {
return records.get(index);
}
public Iterator<Record> iterator() {
return records.iterator();
}
@Override
public String toString() {
CharArrayWriter cWriter = new CharArrayWriter();
PrintWriter out = new PrintWriter(cWriter);
out.print("There are ");
out.print(rowNum);
out.print(" records in file system source.Each record has ");
out.print(colNum);
out.println(" attributes.");
Iterator<Record> iter = records.iterator();
int count = 0;
while(iter.hasNext()) {
out.println();
out.print("Record ");
out.print(count++);
out.println(" :");
Record record = iter.next();
for(int i=0;i<colNum;i++) {
out.printf("%.3f\t",record.getItem(i));
}
out.println();
}
out.close();
cWriter.close();
return cWriter.toString();
}
/**
* 记录的实现,忽略0元素,这里把0硬编码,认为是有道理的 如果认为不合理,可以添加一个default value
* RecordImpl(String record)供不转置时创建记录使用;RecordImpl()供转置时创建记录使用
*
* @author Avon
*/
private class RecordImpl implements Record {
private static final long serialVersionUID = 2910079439135958308L;
// dvalue就是稀疏阵中大量存在的值
HashMap<Integer, Double> items;
RecordImpl() {
items = new HashMap<Integer, Double>();
}
// 如果不是转置情况,使用这个构造函数
RecordImpl(String record) {
/*
* initCapacity=8 loadfactory=.9 is suitable for current case
* initCapacity is pow of 2
*/
items = new HashMap<Integer, Double>(8, 0.9F);
Pattern pattern = Pattern.compile("[" + formatter.getAttrSeperator() + "]+");
String[] colums = null;
try {
colums = pattern.split(record);
} catch (RuntimeException exob) {
throw new RuntimeException(
"SimpleFileSystemSource is not suitable for current source, there are too many columns in data source file "
.concat(location).concat("of record").concat(new Integer(rowNum).toString()));
}
int i = 0;
for (String item : colums) {
if (item.equals("0")) {
i++;
continue;
}
items.put(i++, Double.parseDouble(item));
}
// 如果没有初始化数据源对象的"属性个数"属性
if (isColNull) {
/*
* 取文件中包含列最多的一行的列数作为数据源的列数,这里允许省略一行记录中结尾的0, 而且适当的省略可以提高速度
*/
colNum = (i > colNum) ? i : colNum;
} else {
/*
* 如果指定了数据源的属性个数,那么这个个数要比文件中第recodrNum+1个记录的属性个数要少。
* 允许文件中某个记录的属性个数少于指定的个数,少的内容都是默认值,所有的非默认值都不能省略
*/
if (i > colNum)
colNum = i;
}
}
public double[] getAsArray() {
double[] doubles = new double[(int) sourceInfo.getColumnNum()];
for (int i = 0; i < doubles.length; i++) {
doubles[i] = 0;
}
Iterator<Integer> key = items.keySet().iterator();
while (key.hasNext()) {
int col = key.next();
double value = items.get(col);
doubles[col] = value;
}
return doubles;
}
public double getItem(long column) {
if (column > sourceInfo.getColumnNum())
throw new RuntimeException("There are only " + sourceInfo.getColumnNum() + " columns. The column "
+ column + " u access does not exist.");
int col = (int) column;
if (items.containsKey(col))
return items.get(col);
else
return 0;
}
}
}
/*
* 所谓ComplexFileSystemSourceImpl,就是指把文件系统中的内容包装成记录,形成数据源,这个数据源
* 中的所有记录不是全部在内存中,这主要是由于记录太多的原因,要考虑的内存使用策略 @author Avon
*/
@SuppressWarnings("unused")
private abstract class ComplexFileSystemSourceImpl extends FileSystemSource {
public ComplexFileSystemSourceImpl(String location, Formatter formatter, SourceInfo sourceInfo,
boolean transposable) {
super(location, formatter, sourceInfo, transposable);
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -