⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 textparse.java

📁 文本文件数据提取
💻 JAVA
字号:
/**
 * 
 */
import java.io.*;
import java.util.*;
/**
 * @author Administrator
 *
 */
public class TextParse {

	/**
	 * @param args
	 */
	public ArrayList<String> recordList = new ArrayList<String>();
	public ArrayList<SiteData> siteDataList = new ArrayList<SiteData>();
	public ArrayList<SiteData> groupDataList = new ArrayList<SiteData>();
	//读取文件信息到列表
	public void ReadFile(String filename)throws IOException {
		File file = new File(filename);
		FileReader in = new FileReader(file);
		BufferedReader br = new BufferedReader(in);
		String str = new String();
		while((str=br.readLine())!=null){
			recordList.add(str);	
        }
        in.close();
	}
	
	//过滤掉非MD传感器数据\水井数据
	public void FilterNonMDAndWellSensor(String sensorID){
		String str1 = new String();
		String cmpString = new String();
		int found = 0;
		int i = 0;
		while(i<recordList.size()){
			str1 =  (String) recordList.get(i);
			cmpString = "";
			cmpString = cmpString.concat("#    USGS ");
			cmpString = cmpString.concat(sensorID);
			if(str1.contains(cmpString)){//找到描述行,删除
				recordList.remove(i);
				i--;
				found = 1;
			}
			else
			{
				if(found==1){//在找到描述行的基础上,查找数据段描述
					cmpString = "";
					cmpString = cmpString.concat("Data provided for site ");
					cmpString = cmpString.concat(sensorID);
					if(str1.contains(cmpString)){//找到相对应的数据段描述
						
						do{
							recordList.remove(i);
							i--;
							i++;
							//System.out.println(str1);
							str1 = (String) recordList.get(i);
						}while(!str1.contains("agency_cd")); 
						
						do{
							recordList.remove(i);
							i--;
							i++;
							if(i==recordList.size()) break;
							str1 = (String) recordList.get(i);
						}while(str1.charAt(0)!='#');//删除数据段
					}
				}
				i++;
				//System.out.println(i);
			}		
		}
		
	}

	// 过滤掉每行开头起始符为"#"的行,以agency_cd为起始的行是实际的数据行 
	//接着去除以上数据段中的第二行检索数据库的返回时间值:5s	15s	16d...
	// 去除第一列
	//分别去除带有_cd的数据列,即相对应的P值列
	public void FilterComment(){
 		String str = new String();
		String []strings = new String[10];
		ArrayList<SensorData> sensorDataList = null;
		SensorData data = new SensorData();
		SiteData siteData = null;
		ValueDataItem dataitem = new ValueDataItem();
		int i = 0;
		do{
			
			str = (String)recordList.get(i);
			if(str.contains("Data provided for site")){
				strings = str.split(" ");
				sensorDataList = new ArrayList<SensorData>();
				siteData = new SiteData();
				siteData.SiteNo = strings[strings.length-1];
				i++;//跳过#    DD parameter statistic   Description
			}
			else
			{
				i++;
				continue;
			}
			
			i++;
			str = (String)recordList.get(i);
			SensorData head =  new SensorData();
			do{
				strings = str.split("   ");
				
				dataitem = new ValueDataItem();
				dataitem.DD = strings[1].trim();
				dataitem.Parameter = strings[2].trim();
				dataitem.Statistic = strings[3].trim();
				head.Value.add(dataitem);
				dataitem = null;
				i++;
				str = (String)recordList.get(i);
			}while(!str.equals("#"));
			
			//滤除注释行
			do{
				i++;
				str = (String)recordList.get(i);
			}while(!str.contains("agency_cd"));
			//
			i++;
			str = (String)recordList.get(i);//接着去除以上数据段中的第二行检索数据库的返回时间值
			i++;
			str = (String)recordList.get(i);
			while(str.charAt(0)!='#'){//读数据段
				strings = str.split("\\t");
				data = new SensorData();
				data.Agency = strings[0];
				data.SiteNo = strings[1];
				data.Datatime = strings[2];
				for(int j=0; j<head.Value.size(); j++){
					dataitem = new ValueDataItem();
					if(head.Value.get(j).Statistic.equals("00001")){
						dataitem.DD = head.Value.get(j).DD;
						dataitem.Parameter = head.Value.get(j).Parameter;
						dataitem.Statistic = "00001";
						if(strings.length<=(3+2*j)){
							dataitem.MaxVal = "";
						}
						else{
							dataitem.MaxVal = strings[3+2*j];
						}
						
						data.Value.add(dataitem);
					}
					if(head.Value.get(j).Statistic.equals("00002")){
						dataitem.DD = head.Value.get(j).DD;
						dataitem.Parameter = head.Value.get(j).Parameter;
						dataitem.Statistic = "00002";
						if(strings.length<=(3+2*j)){
							dataitem.MinVal = "";
						}
						else{
							dataitem.MinVal = strings[3+2*j];
						}
						data.Value.add(dataitem);
					}
					if(head.Value.get(j).Statistic.equals("00003")){
						dataitem.DD = head.Value.get(j).DD;
						dataitem.Statistic = "00003";
						dataitem.Parameter = head.Value.get(j).Parameter;
						if(strings.length<=(3+2*j)){
							dataitem.MeanVal = "";
						}
						else{
							dataitem.MeanVal = strings[3+2*j];
						}
						data.Value.add(dataitem);
					}
					dataitem = null;
				}
				sensorDataList.add(data);	
				data = null;
				i++;
				if(i==recordList.size()) break;
				str = (String)recordList.get(i);
			};	
			siteData.SiteData = sensorDataList;
			siteDataList.add(siteData);
			sensorDataList = null;
			siteData = null;
			head = null;			
		//	System.out.println(i);
		}while(i<recordList.size());
	}

	//将过滤好数据段写文件
	//type=0 显示数据段数据
	//type=1 显示滤掉最值后的数据
	//head 是否打印表头
	public void WriteFilterFile(String filename, int type, boolean head) throws IOException{
		File file = new File(filename);
		FileWriter out = new FileWriter(file);
        BufferedWriter bw = new BufferedWriter(out);
        String str;
 //       SiteData siteData = new SiteData();
        SensorData data = new SensorData();
        ArrayList<SensorData> sensorDataList = new ArrayList<SensorData>();
        ValueDataItem dataItem = new ValueDataItem();
        for(int siteno=0; siteno<siteDataList.size(); siteno++){
        	sensorDataList = siteDataList.get(siteno).SiteData;
        	data = sensorDataList.get(0);
        	if(head){
        		if(data.Value.size()>0){
     	    	str = "site_no\tdatetime";
     	    	bw.write(str);
     	    	for(int j=0; j<data.Value.size(); j++){
     	    		dataItem = data.Value.get(j);
     	    		if(type==0){
     	    			str = "\t"+dataItem.DD + "_" + dataItem.Parameter + "_" +  dataItem.Statistic ;
     	    		}
     	    		else{
     	    			if(dataItem.Statistic.equals("00003")){
     	    				str = "\t"+dataItem.DD + "_" + dataItem.Parameter + "_" +  dataItem.Statistic ;
     	    			}
     	    			else{
     	    				continue;
     	    			}	
     	    		}
     	    		
     	    		bw.write(str);
     	    	}
     	    	bw.write("\n");
             }
        	}
            
             
             for(int i=0; i<sensorDataList.size(); i++){
             	data = sensorDataList.get(i);
     	    	//str = data.Agency;
     	    	//bw.write(str);
     	    	//bw.write("\t");
     	    	str = data.SiteNo;
     	    	bw.write(str);
     	    	bw.write("\t");
     	    	str = data.Datatime;
     	    	bw.write(str);
     	    	for(int j=0; j<data.Value.size(); j++){
     	    		dataItem = data.Value.get(j);
     	    		
     	    		if(type==0)
     	    		{
     	    			bw.write("\t");
     		    		if(dataItem.Statistic.equals("00001")){		    
     		    	    	str = dataItem.MaxVal;		    	    	
     		    		}
     		    		if(dataItem.Statistic.equals("00002")){		    			
     		    	    	str = dataItem.MinVal;		    	    
     		    		}
     		    		if(dataItem.Statistic.equals("00003")){
     		    			str = dataItem.MeanVal;	    	    
     		    		}
     	    		}
     	    		else
     	    		{
     	    			if(dataItem.Statistic.equals("00003")){
     	    				bw.write("\t");
     	    				str = dataItem.MeanVal;
     	    			}
     	    			else{
     	    				continue;
     	    			} 				
     	    		}	    		
     	    		bw.write(str);
     	    	}
     	    	bw.write("\n");
             }
             bw.write("\n\n");
        }
       
        
        bw.close();
        out.close();
        System.out.println("finished write file!");
	}
	
	//比较两个传感器数据是否是同种传感器同种数据段数据
	private boolean CmpSensorData(SensorData data1, SensorData data2){
		int i,j;
		ValueDataItem dataItem1 = new ValueDataItem();
		ValueDataItem dataItem2 = new ValueDataItem();
		ArrayList<ValueDataItem> itemList1 = new ArrayList<ValueDataItem>();
		ArrayList<ValueDataItem> itemList2 = new ArrayList<ValueDataItem>();
		
		for(i=0; i<data1.Value.size(); i++){
			dataItem1 = data1.Value.get(i);
			if(dataItem1.Statistic.equals("00003")){
					itemList1.add(dataItem1);
			}
		}
		for(j=0; j<data2.Value.size(); j++){
			dataItem2 = data2.Value.get(j);
			if(dataItem2.Statistic.equals("00003")){
				itemList2.add(dataItem2);
			}
		}	
		
		if(itemList1.size()!=itemList2.size()){
			return false;
		}
		else{
			for(i=0; i<itemList1.size(); i++){
				dataItem1 = itemList1.get(i);
				dataItem2 = itemList2.get(i);
				//if(!dataItem1.DD.equals(dataItem2.DD)||!dataItem1.Parameter.equals(dataItem2.Parameter)){
				if(!dataItem1.Parameter.equals(dataItem2.Parameter)){
					return false;
				}
			}	
		}
		
		return true;
	}
	//数据分组
	public void GroupData(){
		ArrayList<SensorData> sensorDataList = new ArrayList<SensorData>();
		ArrayList<SensorData> groupSensorDataList = new ArrayList<SensorData>();
		SensorData data1 = new SensorData();
		SensorData data2 = new SensorData();
	
		boolean found = false;
		for(int i=0; i<siteDataList.size(); i++){
        	sensorDataList = siteDataList.get(i).SiteData;
        	data1 = sensorDataList.get(0);
        	found = false;
        	//System.out.printf("list = %d ", i);
        	for(int j=0; j<groupDataList.size(); j++){
        		groupSensorDataList = groupDataList.get(j).SiteData;
            	data2 = groupSensorDataList.get(0);
            	if(CmpSensorData(data1,data2)){//找到则将该站点数据加入到相同的组中
            		//System.out.printf("find group = %d\n",j);
            		found = true;
            		for(int k=0; k<sensorDataList.size(); k++){
            			data1 = sensorDataList.get(k);
            			groupSensorDataList.add(data1);
            		}
            		break;
            	}      	       		     	
        	}
        	if(!found){//没有找到,则生成新组
        		groupDataList.add(siteDataList.get(i));
        		//System.out.printf("group new = %d\n",groupDataList.size()-1);
        	}
		}
	}

	//写分组文件
	//将分组好的文件写入N个group文件中
	//head 是否打印表头
	//separate 分隔符
	public void WriteGroupFile(boolean head, String separate) throws IOException{
		int i,j,k;
		String str = new String();
		SensorData data = new SensorData();
		ValueDataItem dataItem = new ValueDataItem();
		for(i=0; i<groupDataList.size(); i++){
			String filename = new String("group");
			String index = String.valueOf(i+1);
			filename = filename+index+".txt";
			File file = new File(filename);
			FileWriter out = new FileWriter(file);
	        BufferedWriter bw = new BufferedWriter(out);
	        data = groupDataList.get(i).SiteData.get(0);
	        if(head	){
	 	    	str = "site_no" +separate+"datetime";
	 	    	bw.write(str);
	 	    	for(j=0; j<data.Value.size(); j++){
	 	    		dataItem = data.Value.get(j);
 	    			if(dataItem.Statistic.equals("00003")){
 	    				str = separate + dataItem.Parameter + "_" +  dataItem.Statistic;	  
 	    				
 	    			}
 	    			else{
 	    				continue;
 	    			}	
 	    			bw.write(str);
 	    		} 	  
	 	    	bw.write("\n");
	        }
	        for(k=0; k<groupDataList.get(i).SiteData.size(); k++){
             	data = groupDataList.get(i).SiteData.get(k);
     	    	str = data.SiteNo;
     	    	bw.write(str);
     	    	bw.write(separate);
     	    	str = data.Datatime;
     	    	bw.write(str);
     	    	for(j=0; j<data.Value.size(); j++){
     	    		dataItem = data.Value.get(j);
 	    			if(dataItem.Statistic.equals("00003")){
 	    				bw.write(separate);
 	    				str = dataItem.MeanVal; 
 	    				bw.write(str);    	    		
     	    		}	    		  	    		
     	    	}
     	    	bw.write("\n");
             }        
	        bw.close();
	        out.close();
		}
		
        System.out.println("finished write file!");
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -