testtext.java

来自「hadoop:Nutch集群平台」· Java 代码 · 共 222 行
JAVA
222 行
/** * Copyright 2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package org.apache.hadoop.io;import junit.framework.TestCase;import java.nio.ByteBuffer;import java.nio.charset.CharacterCodingException;import java.util.Random;import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;/** Unit tests for LargeUTF8. */public class TestText extends TestCase {  private static final Log LOG= LogFactory.getLog("org.apache.hadoop.io.TestText");  private static final int NUM_ITERATIONS = 100;  public TestText(String name) { super(name); }  private static final Random RANDOM = new Random(1);  private static final int RAND_LEN = -1;    // generate a valid java String  private static String getTestString(int len) throws Exception {    StringBuffer buffer = new StringBuffer();        int length = (len==RAND_LEN) ? RANDOM.nextInt(1000) : len;    while (buffer.length()<length) {        int codePoint = RANDOM.nextInt(Character.MAX_CODE_POINT);        char tmpStr[] = new char[2];        if(Character.isDefined(codePoint)) {            //unpaired surrogate            if(codePoint < Character.MIN_SUPPLEMENTARY_CODE_POINT &&                    !Character.isHighSurrogate((char)codePoint) &&                    !Character.isLowSurrogate((char)codePoint) ) {               Character.toChars(codePoint, tmpStr, 0);               buffer.append(tmpStr);            }        }    }    return buffer.toString();  }    public static String getTestString() throws Exception {    return getTestString(RAND_LEN);  }    public static String getLongString() throws Exception {      String str = getTestString();      int length = Short.MAX_VALUE+str.length();      StringBuffer buffer = new StringBuffer();      while(buffer.length()<length)          buffer.append(str);            return buffer.toString();  }  public void testWritable() throws Exception {    for (int i = 0; i < NUM_ITERATIONS; i++) {        String str;        if(i == 0 )            str = getLongString();        else            str = getTestString();        TestWritable.testWritable(new Text(str));    }  }  public void testCoding() throws Exception {      String before = "Bad \t encoding \t testcase";      Text text = new Text(before);      String after = text.toString();      assertTrue(before.equals(after));      for (int i = 0; i < NUM_ITERATIONS; i++) {          // generate a random string          if(i == 0 )              before = getLongString();          else              before = getTestString();              // test string to utf8          ByteBuffer bb = Text.encode(before);                    byte[] utf8Text = bb.array();          byte[] utf8Java = before.getBytes("UTF-8");          assertEquals(0, WritableComparator.compareBytes(                      utf8Text, 0, bb.limit(),                      utf8Java, 0, utf8Java.length));                        // test utf8 to string          after = Text.decode(utf8Java);          assertTrue(before.equals(after));      }  }      public void testIO() throws Exception {    DataOutputBuffer out = new DataOutputBuffer();    DataInputBuffer in = new DataInputBuffer();    for (int i = 0; i < NUM_ITERATIONS; i++) {        // generate a random string        String before;                  if(i == 0 )            before = getLongString();        else            before = getTestString();                // write it        out.reset();        Text.writeString(out, before);                // test that it reads correctly        in.reset(out.getData(), out.getLength());        String after = Text.readString(in);        assertTrue(before.equals(after));                // Test compatibility with Java's other decoder         int strLenSize = WritableUtils.getVIntSize(Text.utf8Length(before));        String after2 = new String(out.getData(), strLenSize,                 out.getLength()-strLenSize, "UTF-8");        assertTrue(before.equals(after2));      }  }  public void testCompare() throws Exception {      DataOutputBuffer out1 = new DataOutputBuffer();      DataOutputBuffer out2 = new DataOutputBuffer();      DataOutputBuffer out3 = new DataOutputBuffer();      Text.Comparator comparator = new Text.Comparator();      for (int i=0; i<NUM_ITERATIONS; i++ ) {          // reset output buffer          out1.reset();          out2.reset();          out3.reset();          // generate two random strings          String str1 = getTestString();          String str2 = getTestString();          if(i == 0 ) {              str1 = getLongString();              str2 = getLongString();          } else {              str1 = getTestString();              str2 = getTestString();          }                    // convert to texts          Text txt1 = new Text(str1);          Text txt2 = new Text(str2);          Text txt3 = new Text(str1);                    // serialize them          txt1.write(out1);          txt2.write(out2);          txt3.write(out3);                    // compare two strings by looking at their binary formats          int ret1 = comparator.compare(out1.getData(), 0, out1.getLength(),                  out2.getData(), 0, out2.getLength());          // compare two strings          int ret2 = txt1.compareTo(txt2);                    assertEquals(ret1, ret2);                    // test equal          assertEquals(txt1.compareTo(txt3), 0);          assertEquals(comparator.compare(out1.getData(), 0, out3.getLength(),                  out3.getData(), 0, out3.getLength()), 0);      }  }        public void testFind() throws Exception {      Text text = new Text("abcd\u20acbdcd\u20ac");      assertTrue(text.find("abd")==-1);      assertTrue(text.find("ac")==-1);      assertTrue(text.find("\u20ac")==4);      assertTrue(text.find("\u20ac", 5)==11);  }  public void testValidate() throws Exception {      Text text = new Text("abcd\u20acbdcd\u20ac");      byte [] utf8 = text.getBytes();      int length = text.getLength();      Text.validateUTF8(utf8, 0, length);  }  public void testTextText() throws CharacterCodingException {    Text a=new Text("abc");    Text b=new Text("a");    b.set(a);    assertEquals("abc",b.toString());  }  public static void main(String[] args)  throws Exception  {    TestText test = new TestText("main");    test.testIO();    test.testCompare();    test.testCoding();    test.testWritable();    test.testFind();    test.testValidate();  }}
testtext.java - 源码说明

本页面展示了「hadoop:Nutch集群平台」中的 testtext.java 源码文件，采用 Java 编程语言编写，共 222 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与hadoop相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?