⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 util.java

📁 HTML解析器是一个Java库
💻 JAVA
字号:
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.0
// Copyright (C) 2007 Martin Jericho
// http://jerichohtml.sourceforge.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.

package net.htmlparser.jericho;

import java.util.*;
import java.io.*;

/**
 * Contains miscellaneous utility methods not directly associated with the HTML Parser library.
 */
public final class Util {
	private static final int BUFFER_SIZE=2048;
	private static final String CSVNewLine=System.getProperty("line.separator");

	private Util() {}

	/**
	 * Returns the text loaded from the specified <code>Reader</code> as a string.
	 * <p>
	 * If a <code>null</code> argument is supplied to this method, an empty string is returned.
	 * <p>
	 * To load text from an <code>InputStream</code>, use <code>getString(new InputStreamReader(inputStream,encoding))</code>.
	 *
	 * @param reader  the <code>java.io.Reader</code> from which to load the text.
	 * @return the text loaded from the specified <code>java.io.Reader</code> as a string.
	 * @throws java.io.IOException if an I/O error occurs.
	 */
	public static String getString(final Reader reader) throws IOException {
		if (reader==null) return "";
		try {
			int charsRead;
			final char[] copyBuffer=new char[BUFFER_SIZE];
			final StringBuilder sb=new StringBuilder();
			while ((charsRead=reader.read(copyBuffer,0,BUFFER_SIZE))!=-1)
				sb.append(copyBuffer,0,charsRead);
			return sb.toString();
		} finally {
			reader.close();
		}
	}

	/**
	 * Outputs the specified array of strings to the specified <code>Writer</code> in the format of a line for a CSV file.
	 * <p>
	 * "CSV" stands for <i>Comma Separated Values</i>.
	 * There is no formal specification for a CSV file, so there is significant variation in
	 * the way different applications handle issues like the encoding of different data types and special characters.
	 * <p>
	 * Generally, a CSV file contains a list of records separated by line breaks, with each record consisting of a list of 
	 * field values separated by commas.
	 * Each record in the file should contain the same number of field values, with the values at each position representing the same
	 * type of data in all the records.  In this way the file can also be divided into columns, often with the first line of the
	 * file containing the column labels.
	 * <p>
	 * Columns can have different data types such as text, numeric, date / time and boolean.
	 * A text value is often delimited with single (<code>'</code>) or double-quotes (<code>"</code>), 
	 * especially if the value contains a comma, line feed, or other special character that is significant to the syntax.
	 * Encoding techniques for including quote characters themselves in text values vary widely.
	 * Values of other types are generally unquoted to distinguish them from text values.
	 * <p>
	 * This method produces output that is readable by MS-Excel, conforming to the following rules:
	 * <p>
	 * <ul>
	 *  <li>All values are considered to be of type text, except for the static constants {@link Config#ColumnValueTrue}
	 *   and {@link Config#ColumnValueFalse}, representing the boolean values <code>true</code> and <code>false</code> respectively.
	 *  <li>All text values are enclosed in double-quotes.
	 *  <li>Double-quote characters contained in text values are encoded using two consecutive double-quotes (<code>""</code>).
	 *  <li><code>null</code> values are represented as empty fields.
	 *  <li>The end of each record is represented by a carriage-return / line-feed (CR/LF) pair.
	 *  <li>Line breaks inside text values are represented by a single line feed (LF) character.
	 * </ul>
	 *
	 * @param writer  the destination <code>java.io.Writer</code> for the output.
	 * @throws java.io.IOException if an I/O error occurs.
	 * @see FormFields#getColumnLabels()
	 * @see FormFields#getColumnValues(Map)
	 */
  public static void outputCSVLine(final Writer writer, final String[] values) throws IOException {
  	for (int i=0; i<values.length;) {
			final String value=values[i];
  		if (value!=null) {
				if (value==Config.ColumnValueTrue || value==Config.ColumnValueFalse) {
					writer.write(value); // assumes neither ColumnTrue or ColumnFalse contain double quotes.
				} else {
		 			writer.write('"');
					outputValueEscapeQuotes(writer,value);
					writer.write('"');
				}
			}
			if (++i!=values.length) writer.write(',');
  	}
		writer.write(CSVNewLine);
  }

  private static void outputValueEscapeQuotes(final Writer writer, final String text) throws IOException {
		for (int i=0; i<text.length(); i++) {
			final char ch=text.charAt(i);
			writer.write(ch);
			if (ch=='"') writer.write(ch);
		}
  }

	static char[] getConcatenatedCharArray(final String string1, final String string2) {
		final char[] charArray=new char[string1.length()+string2.length()];
		string1.getChars(0,string1.length(),charArray,0);
		string2.getChars(0,string2.length(),charArray,string1.length());
		return charArray;
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -