📄 testsequencefileinputfilter.java
字号:
/** * Copyright 2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package org.apache.hadoop.mapred;import java.io.*;import java.util.*;import junit.framework.TestCase;import org.apache.commons.logging.*;import org.apache.hadoop.fs.*;import org.apache.hadoop.io.*;import org.apache.hadoop.conf.*;public class TestSequenceFileInputFilter extends TestCase { private static final Log LOG = InputFormatBase.LOG; private static final int MAX_LENGTH = 15000; private static final Configuration conf = new Configuration(); private static final JobConf job = new JobConf(conf); private static final FileSystem fs; private static final Path inDir = new Path(System.getProperty("test.build.data",".") + "/mapred"); private static final Path inFile = new Path(inDir, "test.seq"); private static final Random random = new Random(1); private static final Reporter reporter = new Reporter() { public void setStatus(String status) throws IOException {} public void progress() throws IOException {} }; static { job.setInputPath(inDir); try { fs = FileSystem.getNamed( "local", conf); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } } private static void createSequenceFile(int numRecords) throws Exception { // create a file with length entries SequenceFile.Writer writer = new SequenceFile.Writer(fs, inFile, Text.class, BytesWritable.class); try { for (int i = 1; i <= numRecords; i++) { Text key = new Text(Integer.toString(i)); byte[] data = new byte[random.nextInt(10)]; random.nextBytes(data); BytesWritable value = new BytesWritable(data); writer.append(key, value); } } finally { writer.close(); } } private int countRecords(int numSplits) throws IOException { InputFormat format = new SequenceFileInputFilter(); Text key = new Text(); BytesWritable value = new BytesWritable(); if(numSplits==0) { numSplits = random.nextInt(MAX_LENGTH/(SequenceFile.SYNC_INTERVAL/20))+1; } FileSplit[] splits = format.getSplits(fs, job, numSplits); // check each split int count = 0; for (int j = 0; j < splits.length; j++) { RecordReader reader = format.getRecordReader(fs, splits[j], job, reporter); try { while (reader.next(key, value)) { LOG.info("Accept record "+key.toString()); count++; } } finally { reader.close(); } } return count; } public void testRegexFilter() throws Exception { // set the filter class LOG.info("Testing Regex Filter with patter: \\A10*"); SequenceFileInputFilter.setFilterClass(job, SequenceFileInputFilter.RegexFilter.class); SequenceFileInputFilter.RegexFilter.setPattern(job, "\\A10*"); // clean input dir fs.delete(inDir); // for a variety of lengths for (int length = 1; length < MAX_LENGTH; length+= random.nextInt(MAX_LENGTH/10)+1) { LOG.info("******Number of records: "+length); createSequenceFile(length); int count = countRecords(0); assertEquals(count, length==0?0:(int)Math.log10(length)+1); } // clean up fs.delete(inDir); } public void testPercentFilter() throws Exception { LOG.info("Testing Percent Filter with frequency: 1000"); // set the filter class SequenceFileInputFilter.setFilterClass(job, SequenceFileInputFilter.PercentFilter.class); SequenceFileInputFilter.PercentFilter.setFrequency(job, 1000); // clean input dir fs.delete(inDir); // for a variety of lengths for (int length = 0; length < MAX_LENGTH; length+= random.nextInt(MAX_LENGTH/10)+1) { LOG.info("******Number of records: "+length); createSequenceFile(length); int count = countRecords(1); LOG.info("Accepted "+count+" records"); int expectedCount = length/1000; if(expectedCount*1000!=length) expectedCount++; assertEquals(count, expectedCount); } // clean up fs.delete(inDir); } public void testMD5Filter() throws Exception { // set the filter class LOG.info("Testing MD5 Filter with frequency: 1000"); SequenceFileInputFilter.setFilterClass(job, SequenceFileInputFilter.MD5Filter.class); SequenceFileInputFilter.MD5Filter.setFrequency(job, 1000); // clean input dir fs.delete(inDir); // for a variety of lengths for (int length = 0; length < MAX_LENGTH; length+= random.nextInt(MAX_LENGTH/10)+1) { LOG.info("******Number of records: "+length); createSequenceFile(length); LOG.info("Accepted "+countRecords(0)+" records"); } // clean up fs.delete(inDir); } public static void main(String[] args) throws Exception { TestSequenceFileInputFilter filter = new TestSequenceFileInputFilter(); filter.testRegexFilter(); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -