📄 textparse.java
字号:
/**
*
*/
import java.io.*;
import java.util.*;
/**
* @author Administrator
*
*/
public class TextParse {
/**
* @param args
*/
public ArrayList<String> recordList = new ArrayList<String>();
public ArrayList<SiteData> siteDataList = new ArrayList<SiteData>();
public ArrayList<SiteData> groupDataList = new ArrayList<SiteData>();
//读取文件信息到列表
public void ReadFile(String filename)throws IOException {
File file = new File(filename);
FileReader in = new FileReader(file);
BufferedReader br = new BufferedReader(in);
String str = new String();
while((str=br.readLine())!=null){
recordList.add(str);
}
in.close();
}
//过滤掉非MD传感器数据\水井数据
public void FilterNonMDAndWellSensor(String sensorID){
String str1 = new String();
String cmpString = new String();
int found = 0;
int i = 0;
while(i<recordList.size()){
str1 = (String) recordList.get(i);
cmpString = "";
cmpString = cmpString.concat("# USGS ");
cmpString = cmpString.concat(sensorID);
if(str1.contains(cmpString)){//找到描述行,删除
recordList.remove(i);
i--;
found = 1;
}
else
{
if(found==1){//在找到描述行的基础上,查找数据段描述
cmpString = "";
cmpString = cmpString.concat("Data provided for site ");
cmpString = cmpString.concat(sensorID);
if(str1.contains(cmpString)){//找到相对应的数据段描述
do{
recordList.remove(i);
i--;
i++;
//System.out.println(str1);
str1 = (String) recordList.get(i);
}while(!str1.contains("agency_cd"));
do{
recordList.remove(i);
i--;
i++;
if(i==recordList.size()) break;
str1 = (String) recordList.get(i);
}while(str1.charAt(0)!='#');//删除数据段
}
}
i++;
//System.out.println(i);
}
}
}
// 过滤掉每行开头起始符为"#"的行,以agency_cd为起始的行是实际的数据行
//接着去除以上数据段中的第二行检索数据库的返回时间值:5s 15s 16d...
// 去除第一列
//分别去除带有_cd的数据列,即相对应的P值列
public void FilterComment(){
String str = new String();
String []strings = new String[10];
ArrayList<SensorData> sensorDataList = null;
SensorData data = new SensorData();
SiteData siteData = null;
ValueDataItem dataitem = new ValueDataItem();
int i = 0;
do{
str = (String)recordList.get(i);
if(str.contains("Data provided for site")){
strings = str.split(" ");
sensorDataList = new ArrayList<SensorData>();
siteData = new SiteData();
siteData.SiteNo = strings[strings.length-1];
i++;//跳过# DD parameter statistic Description
}
else
{
i++;
continue;
}
i++;
str = (String)recordList.get(i);
SensorData head = new SensorData();
do{
strings = str.split(" ");
dataitem = new ValueDataItem();
dataitem.DD = strings[1].trim();
dataitem.Parameter = strings[2].trim();
dataitem.Statistic = strings[3].trim();
head.Value.add(dataitem);
dataitem = null;
i++;
str = (String)recordList.get(i);
}while(!str.equals("#"));
//滤除注释行
do{
i++;
str = (String)recordList.get(i);
}while(!str.contains("agency_cd"));
//
i++;
str = (String)recordList.get(i);//接着去除以上数据段中的第二行检索数据库的返回时间值
i++;
str = (String)recordList.get(i);
while(str.charAt(0)!='#'){//读数据段
strings = str.split("\\t");
data = new SensorData();
data.Agency = strings[0];
data.SiteNo = strings[1];
data.Datatime = strings[2];
for(int j=0; j<head.Value.size(); j++){
dataitem = new ValueDataItem();
if(head.Value.get(j).Statistic.equals("00001")){
dataitem.DD = head.Value.get(j).DD;
dataitem.Parameter = head.Value.get(j).Parameter;
dataitem.Statistic = "00001";
if(strings.length<=(3+2*j)){
dataitem.MaxVal = "";
}
else{
dataitem.MaxVal = strings[3+2*j];
}
data.Value.add(dataitem);
}
if(head.Value.get(j).Statistic.equals("00002")){
dataitem.DD = head.Value.get(j).DD;
dataitem.Parameter = head.Value.get(j).Parameter;
dataitem.Statistic = "00002";
if(strings.length<=(3+2*j)){
dataitem.MinVal = "";
}
else{
dataitem.MinVal = strings[3+2*j];
}
data.Value.add(dataitem);
}
if(head.Value.get(j).Statistic.equals("00003")){
dataitem.DD = head.Value.get(j).DD;
dataitem.Statistic = "00003";
dataitem.Parameter = head.Value.get(j).Parameter;
if(strings.length<=(3+2*j)){
dataitem.MeanVal = "";
}
else{
dataitem.MeanVal = strings[3+2*j];
}
data.Value.add(dataitem);
}
dataitem = null;
}
sensorDataList.add(data);
data = null;
i++;
if(i==recordList.size()) break;
str = (String)recordList.get(i);
};
siteData.SiteData = sensorDataList;
siteDataList.add(siteData);
sensorDataList = null;
siteData = null;
head = null;
// System.out.println(i);
}while(i<recordList.size());
}
//将过滤好数据段写文件
//type=0 显示数据段数据
//type=1 显示滤掉最值后的数据
//head 是否打印表头
public void WriteFilterFile(String filename, int type, boolean head) throws IOException{
File file = new File(filename);
FileWriter out = new FileWriter(file);
BufferedWriter bw = new BufferedWriter(out);
String str;
// SiteData siteData = new SiteData();
SensorData data = new SensorData();
ArrayList<SensorData> sensorDataList = new ArrayList<SensorData>();
ValueDataItem dataItem = new ValueDataItem();
for(int siteno=0; siteno<siteDataList.size(); siteno++){
sensorDataList = siteDataList.get(siteno).SiteData;
data = sensorDataList.get(0);
if(head){
if(data.Value.size()>0){
str = "site_no\tdatetime";
bw.write(str);
for(int j=0; j<data.Value.size(); j++){
dataItem = data.Value.get(j);
if(type==0){
str = "\t"+dataItem.DD + "_" + dataItem.Parameter + "_" + dataItem.Statistic ;
}
else{
if(dataItem.Statistic.equals("00003")){
str = "\t"+dataItem.DD + "_" + dataItem.Parameter + "_" + dataItem.Statistic ;
}
else{
continue;
}
}
bw.write(str);
}
bw.write("\n");
}
}
for(int i=0; i<sensorDataList.size(); i++){
data = sensorDataList.get(i);
//str = data.Agency;
//bw.write(str);
//bw.write("\t");
str = data.SiteNo;
bw.write(str);
bw.write("\t");
str = data.Datatime;
bw.write(str);
for(int j=0; j<data.Value.size(); j++){
dataItem = data.Value.get(j);
if(type==0)
{
bw.write("\t");
if(dataItem.Statistic.equals("00001")){
str = dataItem.MaxVal;
}
if(dataItem.Statistic.equals("00002")){
str = dataItem.MinVal;
}
if(dataItem.Statistic.equals("00003")){
str = dataItem.MeanVal;
}
}
else
{
if(dataItem.Statistic.equals("00003")){
bw.write("\t");
str = dataItem.MeanVal;
}
else{
continue;
}
}
bw.write(str);
}
bw.write("\n");
}
bw.write("\n\n");
}
bw.close();
out.close();
System.out.println("finished write file!");
}
//比较两个传感器数据是否是同种传感器同种数据段数据
private boolean CmpSensorData(SensorData data1, SensorData data2){
int i,j;
ValueDataItem dataItem1 = new ValueDataItem();
ValueDataItem dataItem2 = new ValueDataItem();
ArrayList<ValueDataItem> itemList1 = new ArrayList<ValueDataItem>();
ArrayList<ValueDataItem> itemList2 = new ArrayList<ValueDataItem>();
for(i=0; i<data1.Value.size(); i++){
dataItem1 = data1.Value.get(i);
if(dataItem1.Statistic.equals("00003")){
itemList1.add(dataItem1);
}
}
for(j=0; j<data2.Value.size(); j++){
dataItem2 = data2.Value.get(j);
if(dataItem2.Statistic.equals("00003")){
itemList2.add(dataItem2);
}
}
if(itemList1.size()!=itemList2.size()){
return false;
}
else{
for(i=0; i<itemList1.size(); i++){
dataItem1 = itemList1.get(i);
dataItem2 = itemList2.get(i);
//if(!dataItem1.DD.equals(dataItem2.DD)||!dataItem1.Parameter.equals(dataItem2.Parameter)){
if(!dataItem1.Parameter.equals(dataItem2.Parameter)){
return false;
}
}
}
return true;
}
//数据分组
public void GroupData(){
ArrayList<SensorData> sensorDataList = new ArrayList<SensorData>();
ArrayList<SensorData> groupSensorDataList = new ArrayList<SensorData>();
SensorData data1 = new SensorData();
SensorData data2 = new SensorData();
boolean found = false;
for(int i=0; i<siteDataList.size(); i++){
sensorDataList = siteDataList.get(i).SiteData;
data1 = sensorDataList.get(0);
found = false;
//System.out.printf("list = %d ", i);
for(int j=0; j<groupDataList.size(); j++){
groupSensorDataList = groupDataList.get(j).SiteData;
data2 = groupSensorDataList.get(0);
if(CmpSensorData(data1,data2)){//找到则将该站点数据加入到相同的组中
//System.out.printf("find group = %d\n",j);
found = true;
for(int k=0; k<sensorDataList.size(); k++){
data1 = sensorDataList.get(k);
groupSensorDataList.add(data1);
}
break;
}
}
if(!found){//没有找到,则生成新组
groupDataList.add(siteDataList.get(i));
//System.out.printf("group new = %d\n",groupDataList.size()-1);
}
}
}
//写分组文件
//将分组好的文件写入N个group文件中
//head 是否打印表头
//separate 分隔符
public void WriteGroupFile(boolean head, String separate) throws IOException{
int i,j,k;
String str = new String();
SensorData data = new SensorData();
ValueDataItem dataItem = new ValueDataItem();
for(i=0; i<groupDataList.size(); i++){
String filename = new String("group");
String index = String.valueOf(i+1);
filename = filename+index+".txt";
File file = new File(filename);
FileWriter out = new FileWriter(file);
BufferedWriter bw = new BufferedWriter(out);
data = groupDataList.get(i).SiteData.get(0);
if(head ){
str = "site_no" +separate+"datetime";
bw.write(str);
for(j=0; j<data.Value.size(); j++){
dataItem = data.Value.get(j);
if(dataItem.Statistic.equals("00003")){
str = separate + dataItem.Parameter + "_" + dataItem.Statistic;
}
else{
continue;
}
bw.write(str);
}
bw.write("\n");
}
for(k=0; k<groupDataList.get(i).SiteData.size(); k++){
data = groupDataList.get(i).SiteData.get(k);
str = data.SiteNo;
bw.write(str);
bw.write(separate);
str = data.Datatime;
bw.write(str);
for(j=0; j<data.Value.size(); j++){
dataItem = data.Value.get(j);
if(dataItem.Statistic.equals("00003")){
bw.write(separate);
str = dataItem.MeanVal;
bw.write(str);
}
}
bw.write("\n");
}
bw.close();
out.close();
}
System.out.println("finished write file!");
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -