📄 unicodestring.java
字号:
/* ==================================================================== Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.==================================================================== */ package org.apache.poi.hssf.record;import org.apache.poi.util.BitField;import org.apache.poi.util.BitFieldFactory;import org.apache.poi.util.LittleEndian;import org.apache.poi.util.HexDump;import java.util.Iterator;import java.util.List;import java.util.ArrayList;import java.util.Collections;/** * Title: Unicode String<P> * Description: Unicode String record. We implement these as a record, although * they are really just standard fields that are in several records. * It is considered more desirable then repeating it in all of them.<P> * REFERENCE: PG 264 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)<P> * @author Andrew C. Oliver * @author Marc Johnson (mjohnson at apache dot org) * @author Glen Stampoultzis (glens at apache.org) * @version 2.0-pre */public class UnicodeString implements Comparable{ public final static short sid = 0xFFF; private short field_1_charCount; // = 0; private byte field_2_optionflags; // = 0; private String field_3_string; // = null; private List field_4_format_runs; private byte[] field_5_ext_rst; private static final BitField highByte = BitFieldFactory.getInstance(0x1); private static final BitField extBit = BitFieldFactory.getInstance(0x4); private static final BitField richText = BitFieldFactory.getInstance(0x8); public static class FormatRun implements Comparable { private short character; private short fontIndex; public FormatRun(short character, short fontIndex) { this.character = character; this.fontIndex = fontIndex; } public short getCharacterPos() { return character; } public short getFontIndex() { return fontIndex; } public boolean equals(Object o) { if ((o == null) || (o.getClass() != this.getClass())) { return false; } FormatRun other = ( FormatRun ) o; return ((character == other.character) && (fontIndex == other.fontIndex)); } public int compareTo(Object obj) { FormatRun r = (FormatRun)obj; if ((character == r.character) && (fontIndex == r.fontIndex)) return 0; if (character == r.character) return fontIndex - r.fontIndex; else return character - r.character; } public String toString() { return "character="+character+",fontIndex="+fontIndex; } } private UnicodeString() { //Used for clone method. } public UnicodeString(String str) { setString(str); } /** * construct a unicode string record and fill its fields, ID is ignored * @param id - ignored * @param size - size of the data * @param data - the bytes of the string/fields */ public UnicodeString(RecordInputStream in) { validateSid(in.getSid()); fillFields(in); } public int hashCode() { int stringHash = 0; if (field_3_string != null) stringHash = field_3_string.hashCode(); return field_1_charCount + stringHash; } /** * Our handling of equals is inconsistent with compareTo. The trouble is because we don't truely understand * rich text fields yet it's difficult to make a sound comparison. * * @param o The object to compare. * @return true if the object is actually equal. */ public boolean equals(Object o) { if ((o == null) || (o.getClass() != this.getClass())) { return false; } UnicodeString other = ( UnicodeString ) o; //Ok lets do this in stages to return a quickly, first check the actual string boolean eq = ((field_1_charCount == other.field_1_charCount) && (field_2_optionflags == other.field_2_optionflags) && field_3_string.equals(other.field_3_string)); if (!eq) return false; //Ok string appears to be equal but now lets compare formatting runs if ((field_4_format_runs == null) && (other.field_4_format_runs == null)) //Strings are equal, and there are not formtting runs. return true; if (((field_4_format_runs == null) && (other.field_4_format_runs != null)) || (field_4_format_runs != null) && (other.field_4_format_runs == null)) //Strings are equal, but one or the other has formatting runs return false; //Strings are equal, so now compare formatting runs. int size = field_4_format_runs.size(); if (size != other.field_4_format_runs.size()) return false; for (int i=0;i<size;i++) { FormatRun run1 = (FormatRun)field_4_format_runs.get(i); FormatRun run2 = (FormatRun)other.field_4_format_runs.get(i); if (!run1.equals(run2)) return false; } //Well the format runs are equal as well!, better check the ExtRst data //Which by the way we dont know how to decode! if ((field_5_ext_rst == null) && (other.field_5_ext_rst == null)) return true; if (((field_5_ext_rst == null) && (other.field_5_ext_rst != null)) || ((field_5_ext_rst != null) && (other.field_5_ext_rst == null))) return false; size = field_5_ext_rst.length; if (size != field_5_ext_rst.length) return false; //Check individual bytes! for (int i=0;i<size;i++) { if (field_5_ext_rst[i] != other.field_5_ext_rst[i]) return false; } //Phew!! After all of that we have finally worked out that the strings //are identical. return true; } /** * NO OP */ protected void validateSid(short id) { // included only for interface compliance } /** * called by the constructor, should set class level fields. Should throw * runtime exception for bad/icomplete data. * * @param data raw data * @param size size of data * @param offset of the records data (provided a big array of the file) */ protected void fillFields(RecordInputStream in) { field_1_charCount = in.readShort(); field_2_optionflags = in.readByte(); int runCount = 0; int extensionLength = 0; //Read the number of rich runs if rich text. if ( isRichText() ) { runCount = in.readShort(); } //Read the size of extended data if present. if ( isExtendedText() ) { extensionLength = in.readInt(); } //Now need to get the string data. //Turn off autocontinuation so that we can catch the continue boundary in.setAutoContinue(false); StringBuffer tmpString = new StringBuffer(field_1_charCount); int stringCharCount = field_1_charCount; boolean isCompressed = ((field_2_optionflags & 1) == 0); while (stringCharCount != 0) { if (in.remaining() == 0) { if (in.isContinueNext()) { in.nextRecord(); //Check if we are now reading, compressed or uncompressed unicode. byte optionflags = in.readByte(); isCompressed = ((optionflags & 1) == 0); } else throw new RecordFormatException("Expected continue record."); } if (isCompressed) { //Typecast direct to char from byte with high bit set causes all ones //in the high byte of the char (which is of course incorrect) char ch = (char)( (short)0xff & (short)in.readByte() ); tmpString.append(ch); } else { char ch = (char) in.readShort(); tmpString.append(ch); } stringCharCount --; } field_3_string = tmpString.toString(); //Turn back on autocontinuation in.setAutoContinue(true); if (isRichText() && (runCount > 0)) { field_4_format_runs = new ArrayList(runCount); for (int i=0;i<runCount;i++) { field_4_format_runs.add(new FormatRun(in.readShort(), in.readShort())); //read reserved //in.readInt(); } } if (isExtendedText() && (extensionLength > 0)) { field_5_ext_rst = new byte[extensionLength]; for (int i=0;i<extensionLength;i++) { field_5_ext_rst[i] = in.readByte(); } } } /** * get the number of characters in the string * * * @return number of characters * */ public short getCharCount() { return field_1_charCount; } /** * set the number of characters in the string * @param cc - number of characters */ public void setCharCount(short cc) { field_1_charCount = cc; } /** * get the option flags which among other things return if this is a 16-bit or * 8 bit string * * @return optionflags bitmask * */ public byte getOptionFlags() { return field_2_optionflags; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -