⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 html2txt.java

📁 Java转换为html
💻 JAVA
字号:
/*
 * Copyright (c) 2001 Shiraz Kanga.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer. 
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */ 
 
 
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;

// Recurse through a directory of html files and convert them to text.
public class html2txt
{
  static boolean inText = true;
  static String fileSeparator = System.getProperty ("file.separator");
  static int numFiles = 0;

  // regular search and replace of all occurances of oldString with newString.
  static String strReplace (String currLine, String oldString, String newString)
  {
    int index = 0;
    while ((index = currLine.indexOf (oldString, index)) >= 0)
    {
      // Replace the old string with the new string (inefficient - but works!)
      currLine = currLine.substring (0, index) + newString + 
        currLine.substring (index + oldString.length ());
      index += newString.length ();
    }
    return currLine;
  }
  
  // This is the actual filter. Removes anything between a < and a >
  public static final String filterLine (String currLine) throws IOException
  {
    if (currLine == null)
      return null;

    StringBuffer currBuf = new StringBuffer ();
    char [] theChars = currLine.toCharArray ();

    for (int i = 0; i < currLine.length (); i++)
    {
      if (theChars [i] == '<')
        inText = false;
      if (theChars [i] == '>')
        inText = true;

      if (inText && theChars [i] != '>') 
        currBuf.append (theChars [i]);
    }

    // Should add more of these
    String str1 = strReplace (currBuf.toString (), "&nbsp;", " ");
    String str2 = strReplace (str1, "&lt;", "<");
    String str3 = strReplace (str2, "&gt;", ">");

    return str3;    
  }

  // Process this file
  public static void doFile (String fileName)
  {
    try
    {
      File inFile = new File (fileName);
      String inputText = readFile (inFile);
      String outputText = filterLine (inputText);
  
      numFiles ++;
      System.out.println ("Processed file \"" + fileName + "\"");
  
      File outFile = new File (fileName + ".txt");
      writeFile (outFile, outputText);
    }
    catch (IOException e)
    {
      System.out.println ("ERROR: I/O Exception while processing file \"" + 
        fileName + "\"");
      e.printStackTrace ();
    }
  }

  // Writes a String to a File
  public static void writeFile (String fileName, String text)
    throws IOException, FileNotFoundException
  {
    BufferedWriter theWriter = new BufferedWriter (new FileWriter (fileName));
    stringToWriter (theWriter, text);
  }

  // Writes a String to a File
  public static void writeFile (File fileHandle, String text)
    throws IOException, FileNotFoundException
  {
    BufferedWriter theWriter = new BufferedWriter (new FileWriter (fileHandle));
    stringToWriter (theWriter, text);
  }

  // Writes the entire contents of a String into a BufferedWriter
  public static void stringToWriter (BufferedWriter theWriter, String text)
    throws IOException
  {
    theWriter.write (text, 0, text.length ());
    theWriter.close ();
  }

  // Reads a File into a String.
  public static String readFile (String fileName)
    throws IOException, FileNotFoundException
  {
    BufferedReader theReader = new BufferedReader (new FileReader (fileName));
    return (readerToString (theReader));
  }

  // Reads a File into a String.
  public static String readFile (File fileHandle)
    throws IOException, FileNotFoundException
  {
    BufferedReader theReader = new BufferedReader (new FileReader (fileHandle));
    return (readerToString (theReader));
  }

  // Read the entire contents of the BufferedReader into a String.
  public static String readerToString (BufferedReader theReader)
    throws IOException
  {
    StringBuffer retVal = new StringBuffer (100000);
    String currLine = null;

    if (theReader != null)
    {
      while ((currLine = theReader.readLine ()) != null)
      {
        retVal.append (currLine);
        retVal.append ("\n");               // UNIX style - works
        //retVal.append ("\r\n");           // DOS style - works
        //retVal.append (fileSeparator);    // Java style - fails!!
      }
    }
    theReader.close ();

    return retVal.toString ();
  }

  // Main recursive routing to locate all files in a tree that have a specific ending
  private static void fileLister (String rootDir)
  {
    File f = new File (rootDir);
    String [] fileList = f.list ();
    if (fileList == null)
      return;
    for (int i = fileList.length - 1; i >= 0; i--)
    {
      f = new File (rootDir, fileList [i]);
      if (f.isDirectory ())
        fileLister (rootDir + fileSeparator + fileList [i]);
      else if (fileList [i].toUpperCase ().endsWith ("HTML") 
        || fileList [i].toUpperCase ().endsWith ("HTM"))
      {
        doFile (rootDir + fileSeparator + fileList [i]);
      }
    }
  }

  public static void getFiles (String rootDir)
  {
    fileLister (rootDir);
  }

  public static void main (String [] args)
  {
    String rootDir = "";

    if (args.length == 0)   // No directory argument supplied. Use current directory
      rootDir = ".";
    else
      rootDir = args [0];

    System.out.println ("Converting *.htm and *.html in directory \"" + 
      rootDir + "\" to text.");

    getFiles (rootDir);
    // All Done.
    System.out.println ("Done.");
  }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -