range.java
来自「EXCEL read and write」· Java 代码 · 共 1,078 行 · 第 1/3 页
JAVA
1,078 行
/* ==================================================================== Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.==================================================================== */package org.apache.poi.hwpf.usermodel;import org.apache.poi.util.LittleEndian;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.usermodel.CharacterRun;import org.apache.poi.hwpf.usermodel.Paragraph;import org.apache.poi.hwpf.usermodel.ParagraphProperties;import org.apache.poi.hwpf.usermodel.Section;import org.apache.poi.hwpf.model.CPSplitCalculator;import org.apache.poi.hwpf.model.FileInformationBlock;import org.apache.poi.hwpf.model.PropertyNode;import org.apache.poi.hwpf.model.StyleSheet;import org.apache.poi.hwpf.model.CHPX;import org.apache.poi.hwpf.model.PAPX;import org.apache.poi.hwpf.model.SEPX;import org.apache.poi.hwpf.model.TextPiece;import org.apache.poi.hwpf.model.ListTables;import org.apache.poi.hwpf.sprm.CharacterSprmCompressor;import org.apache.poi.hwpf.sprm.ParagraphSprmCompressor;import org.apache.poi.hwpf.sprm.SprmBuffer;import java.util.List;import java.util.NoSuchElementException;import java.lang.ref.WeakReference;/** * This class is the central class of the HWPF object model. All properties * that apply to a range of characters in a Word document extend this class. * * It is possible to insert text and/or properties at the beginning or end of a * range. * * Ranges are only valid if there hasn't been an insert in a prior Range since * the Range's creation. Once an element (text, paragraph, etc.) has been * inserted into a Range, subsequent Ranges become unstable. * * @author Ryan Ackley */public class Range{ public static final int TYPE_PARAGRAPH = 0; public static final int TYPE_CHARACTER= 1; public static final int TYPE_SECTION = 2; public static final int TYPE_TEXT = 3; public static final int TYPE_LISTENTRY = 4; public static final int TYPE_TABLE = 5; public static final int TYPE_UNDEFINED = 6; /** Needed so inserts and deletes will ripple up through containing Ranges */ private WeakReference _parent; /** The starting character offset of this range.*/ protected int _start; /** The ending character offset of this range.*/ protected int _end; /** The document this range blongs to.*/ protected HWPFDocument _doc; /** Have we loaded the section indexes yet*/ boolean _sectionRangeFound; /** All sections that belong to the document this Range belongs to.*/ protected List _sections; /** The start index in the sections list for this Range*/ protected int _sectionStart; /** The end index in the sections list for this Range.*/ protected int _sectionEnd; /** Have we loaded the paragraph indexes yet.*/ protected boolean _parRangeFound; /** All paragraphs that belong to the document this Range belongs to.*/ protected List _paragraphs; /** The start index in the paragraphs list for this Range*/ protected int _parStart; /** The end index in the paragraphs list for this Range.*/ protected int _parEnd; /** Have we loaded the characterRun indexes yet.*/ protected boolean _charRangeFound; /** All CharacterRuns that belong to the document this Range belongs to.*/ protected List _characters; /** The start index in the characterRuns list for this Range*/ protected int _charStart; /** The end index in the characterRuns list for this Range. */ protected int _charEnd; /** Have we loaded the Text indexes yet*/ protected boolean _textRangeFound; /** All text pieces that belong to the document this Range belongs to.*/ protected List _text; /** The start index in the text list for this Range.*/ protected int _textStart; /** The end index in the text list for this Range.*/ protected int _textEnd;// protected Range()// {//// } /** * Used to construct a Range from a document. This is generally used to * create a Range that spans the whole document, or at least one * whole part of the document (eg main text, header, comment) * * @param start Starting character offset of the range. * @param end Ending character offset of the range. * @param doc The HWPFDocument the range is based on. */ public Range(int start, int end, HWPFDocument doc) { _start = start; _end = end; _doc = doc; _sections = _doc.getSectionTable().getSections(); _paragraphs = _doc.getParagraphTable().getParagraphs(); _characters = _doc.getCharacterTable().getTextRuns(); _text = _doc.getTextTable().getTextPieces(); _parent = new WeakReference(null); sanityCheckStartEnd(); } /** * Used to create Ranges that are children of other Ranges. * * @param start Starting character offset of the range. * @param end Ending character offset of the range. * @param parent The parent this range belongs to. */ protected Range(int start, int end, Range parent) { _start = start; _end = end; _doc = parent._doc; _sections = parent._sections; _paragraphs = parent._paragraphs; _characters = parent._characters; _text = parent._text; _parent = new WeakReference(parent); sanityCheckStartEnd(); } /** * Constructor used to build a Range from indexes in one of its internal * lists. * * @param startIdx The starting index in the list. * @param endIdx The ending index in the list. * @param idxType The list type. * @param parent The parent Range this range belongs to. */ protected Range(int startIdx, int endIdx, int idxType, Range parent) { _doc = parent._doc; _sections = parent._sections; _paragraphs = parent._paragraphs; _characters = parent._characters; _text = parent._text; _parent = new WeakReference(parent); switch (idxType) { case TYPE_PARAGRAPH: _parStart = parent._parStart + startIdx; _parEnd = parent._parStart + endIdx; _start = ((PropertyNode)_paragraphs.get(_parStart)).getStart(); _end = ((PropertyNode)_paragraphs.get(_parEnd)).getEnd(); _parRangeFound = true; break; case TYPE_CHARACTER: _charStart = parent._charStart + startIdx; _charEnd = parent._charStart + endIdx; _start = ((PropertyNode)_characters.get(_charStart)).getStart(); _end = ((PropertyNode)_characters.get(_charEnd)).getEnd(); _charRangeFound = true; break; case TYPE_SECTION: _sectionStart = parent._sectionStart + startIdx; _sectionEnd = parent._sectionStart + endIdx; _start = ((PropertyNode)_sections.get(_sectionStart)).getStart(); _end = ((PropertyNode)_sections.get(_sectionEnd)).getEnd(); _sectionRangeFound = true; break; case TYPE_TEXT: _textStart = parent._textStart + startIdx; _textEnd = parent._textStart + endIdx; _start = ((PropertyNode)_text.get(_textStart)).getStart(); _end = ((PropertyNode)_text.get(_textEnd)).getEnd(); _textRangeFound = true; break; } sanityCheckStartEnd(); } /** * Ensures that the start and end were were given * are actually valid, to avoid issues later on * if they're not */ private void sanityCheckStartEnd() { if(_start < 0) { throw new IllegalArgumentException("Range start must not be negative. Given " + _start); } if(_end < _start) { throw new IllegalArgumentException("The end (" + _end + ") must not be before the start ("+_start+")"); } } /** * Does any <code>TextPiece</code> in this Range use unicode? * * @return true if it does and false if it doesn't */ public boolean usesUnicode() { initText(); for (int i = _textStart; i < _textEnd; i++) { TextPiece piece = (TextPiece)_text.get(i); if (piece.isUnicode()) return true; } return false; } /** * Gets the text that this Range contains. * * @return The text for this range. */ public String text() { initText(); StringBuffer sb = new StringBuffer(); for (int x = _textStart; x < _textEnd; x++) { TextPiece piece = (TextPiece)_text.get(x); // Figure out where in this piece the text // we're after lives int rStart = 0; int rEnd = piece.characterLength(); if(_start > piece.getStart()) { rStart = _start - piece.getStart(); } if(_end < piece.getEnd()) { rEnd -= (piece.getEnd() - _end); } // Luckily TextPieces work in characters, so we don't // need to worry about unicode here sb.append(piece.substring(rStart, rEnd)); } return sb.toString(); } /** * Removes any fields (eg macros, page markers etc) * from the string. * Normally used to make some text suitable for showing * to humans, and the resultant text should not normally * be saved back into the document! */ public static String stripFields(String text) { // First up, fields can be nested... // A field can be 0x13 [contents] 0x15 // Or it can be 0x13 [contents] 0x14 [real text] 0x15 // If there are no fields, all easy if(text.indexOf('\u0013') == -1) return text; // Loop over until they're all gone // That's when we're out of both 0x13s and 0x15s while( text.indexOf('\u0013') > -1 && text.indexOf('\u0015') > -1) { int first13 = text.indexOf('\u0013'); int next13 = text.indexOf('\u0013', first13+1); int first14 = text.indexOf('\u0014', first13+1); int last15 = text.lastIndexOf('\u0015'); // If they're the wrong way around, give up if(last15 < first13) { break; } // If no more 13s and 14s, just zap if(next13 == -1 && first14 == -1) { text = text.substring(0, first13) + text.substring(last15+1); break; } // If a 14 comes before the next 13, then // zap from the 13 to the 14, and remove // the 15 if(first14 != -1 && (first14 < next13 || next13 == -1)) { text = text.substring(0, first13) + text.substring(first14+1, last15) + text.substring(last15+1); continue; } // Another 13 comes before the next 14. // This means there's nested stuff, so we // can just zap the lot text = text.substring(0, first13) + text.substring(last15+1); continue; } return text; } /**
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?