⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 format.c

📁 将HTML转换为TXT文件的程序
💻 C
📖 第 1 页 / 共 3 页
字号:
/* ------------------------------------------------------------------------- *//* * Copyright (c) 1999 *      GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. *      http://www.gmrs.de *      All rights reserved. *      Author: Arno Unkrig (arno.unkrig@gmrs.de) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in the *    documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software *    must display the following acknowledgement: *      This product includes software developed by GMRS Software GmbH. * 4. The name of GMRS Software GmbH may not be used to endorse or promote *    products derived from this software without specific prior written *    permission. * * THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. *//* ------------------------------------------------------------------------- */#ident "$Id: format.C,v 1.27 1999/11/23 19:03:25 arno Exp $"#include <strstream.h>#include <stdlib.h>#include <ctype.h>#include <vector>#include <map>#include "html.h"#include "HTMLParser.h"#include "sgml.h"#include "cmp_nocase.h"#include "format.h"#include "Properties.h"#ifndef nelems#define nelems(array) (sizeof(array) / sizeof((array)[0]))#endif/* ------------------------------------------------------------------------- */static Line *line_format(const list<auto_ptr<Element> > *elements);static Area *make_up(const Line &line, Area::size_type w, int halign);static Area *format(  const list<auto_ptr<Element> > *elements,  Area::size_type                w,  int                            halign);static void format(  const list<auto_ptr<Element> > *elements,  Area::size_type                indent_left,  Area::size_type                w,  int                            halign,  ostream                        &os);/* ------------------------------------------------------------------------- *//* * Helper class that retrieves several block-formatting properties in one * go. */struct BlockFormat {  Area::size_type vspace_before;  Area::size_type vspace_after;  Area::size_type indent_left;  Area::size_type indent_right;  BlockFormat(    const char      *item_name,    Area::size_type default_vspace_before = 0,    Area::size_type default_vspace_after  = 0,    Area::size_type default_indent_left   = 0,    Area::size_type default_indent_right  = 0  );  Area::size_type effective_width(Area::size_type) const;};/* * Helper class that retrieves several list-formatting properties in one * go. */struct ListFormat {  Area::size_type           vspace_before;  Area::size_type           vspace_between;  Area::size_type           vspace_after;  auto_ptr<vector<int> >    indents;  auto_ptr<vector<string> > default_types;  ListFormat(    const char      *item_name,    Area::size_type default_vspace_before  = 0,    Area::size_type default_vspace_between = 0,    Area::size_type default_vspace_after   = 0,    const char      *default_indents       = "6",    const char      *default_default_types = "DISC CIRCLE SQUARE"  );  Area::size_type get_indent(int nesting) const;  const string    &get_default_type(int nesting) const;  int             get_type(    const list<TagAttribute> *attributes,    int                      nesting,    int                      default_default_type  ) const;};/* ------------------------------------------------------------------------- */// Attributes: VERSION (ignored)Area *Document::format(Area::size_type w, int halign) const{  static BlockFormat bf("DOCUMENT");  auto_ptr<Area> res(body.format(bf.effective_width(w), halign));  if (!res.get()) return 0;  *res >>= bf.indent_left;  res->prepend(bf.vspace_before);  res->append(bf.vspace_after);  return res.release();}voidDocument::format(  Area::size_type indent_left,  Area::size_type w,  int             halign,  ostream         &os) const{  static BlockFormat bf("DOCUMENT");  for (int i = 0; i < bf.vspace_before; ++i) os << endl;  body.format(    indent_left + bf.indent_left, bf.effective_width(w), halign,    os  );  for (int i = 0; i < bf.vspace_after; ++i) os << endl;}/* ------------------------------------------------------------------------- */// Attributes: BACKGROUND BGCOLOR TEXT LINK VLINK ALINK (ignored)Area *Body::format(Area::size_type w, int halign) const{  static BlockFormat bf("BODY");  auto_ptr<Area> res(    ::format(content.get(), bf.effective_width(w), halign)  );  if (!res.get()) return 0;  *res >>= bf.indent_left;  res->prepend(bf.vspace_before);  res->append(bf.vspace_after);  return res.release();}voidBody::format(  Area::size_type indent_left,  Area::size_type w,  int             halign,  ostream         &os) const{  static BlockFormat bf("BODY");  for (int i = 0; i < bf.vspace_before; ++i) os << endl;  ::format(    content.get(),    indent_left + bf.indent_left, bf.effective_width(w), halign,    os  );  for (int i = 0; i < bf.vspace_after; ++i) os << endl;}/* ------------------------------------------------------------------------- */enum {  NO_BULLET,  ARABIC_NUMBERS, LOWER_ALPHA, UPPER_ALPHA, LOWER_ROMAN, UPPER_ROMAN,  DISC, SQUARE, CIRCLE, CUSTOM1, CUSTOM2, CUSTOM3};// Attributes: TYPE (processed) COMPACT (ignored)Area *OrderedList::format(Area::size_type w, int /*halign*/ ) const{  if (!items.get()) return 0;  static ListFormat lf("OL", 0, 0, 0, "6", "1");  int            type = lf.get_type(attributes.get(), nesting, ARABIC_NUMBERS);  auto_ptr<Area> res;  const list<auto_ptr<ListItem> > &il(*items);  list<auto_ptr<ListItem> >::const_iterator i;  int number = 1;  for (i = il.begin(); i != il.end(); ++i) {    auto_ptr<Area> a((*i)->format(w, type, lf.get_indent(nesting), &number));    if (a.get()) {      if (res.get()) {        res->append(lf.vspace_between);      } else {        res.reset(new Area);        res->append(lf.vspace_before);      }      *res += *a;    }  }  if (res.get()) res->append(lf.vspace_after);  return res.release();}/* * <UL>, <DIR> and <MENU> are currently formatted totally identically, because * this is what Netscape does, and the HTML 3.2 spec and "HTML -- The * Definitive Guide" give no clear indication as to how to format them. */// Attributes: TYPE (processed) COMPACT (ignored)Area *UnorderedList::format(Area::size_type w, int /*halign*/ ) const{  if (!items.get()) return 0;  static ListFormat lf("UL");  int               type = lf.get_type(attributes.get(), nesting, SQUARE);  auto_ptr<Area>    res;  const list<auto_ptr<ListItem> > &il(*items);  list<auto_ptr<ListItem> >::const_iterator i;  for (i = il.begin(); i != il.end(); ++i) {    auto_ptr<Area> a((*i)->format(w, type, lf.get_indent(nesting)));    if (a.get()) {      if (res.get()) {        res->append(lf.vspace_between);      } else {        res.reset(new Area);        res->append(lf.vspace_before);      }      *res += *a;    }  }  if (res.get()) res->append(lf.vspace_after);  return res.release();}// Attributes: TYPE (extension, processed) COMPACT (ignored)Area *Dir::format(Area::size_type w, int /*halign*/ ) const{  if (!items.get()) return 0;  static ListFormat lf("DIR");  int               type = lf.get_type(attributes.get(), nesting, SQUARE);  auto_ptr<Area>    res;  const list<auto_ptr<ListItem> > &il(*items);  list<auto_ptr<ListItem> >::const_iterator i;  for (i = il.begin(); i != il.end(); ++i) {    auto_ptr<Area> a((*i)->format(w, type, lf.get_indent(nesting)));    if (a.get()) {      if (res.get()) {        res->append(lf.vspace_between);      } else {        res.reset(new Area);        res->append(lf.vspace_before);      }      *res += *a;    }  }  if (res.get()) res->append(lf.vspace_after);  return res.release();}// Attributes: TYPE (extension, processed) COMPACT (ignored)Area *Menu::format(Area::size_type w, int /*halign*/ ) const{  if (!items.get()) return 0;  static ListFormat lf("MENU", 0, 0, 0, "2", "NO_BULLET");  int               type = lf.get_type(attributes.get(), nesting, NO_BULLET);  auto_ptr<Area>    res;  const list<auto_ptr<ListItem> > &il(*items);  list<auto_ptr<ListItem> >::const_iterator i;  for (i = il.begin(); i != il.end(); ++i) {    auto_ptr<Area> a((*i)->format(w, type, lf.get_indent(nesting)));    if (a.get()) {      if (res.get()) {        res->append(lf.vspace_between);      } else {        res.reset(new Area);        res->append(lf.vspace_before);      }      *res += *a;    }  }  if (res.get()) res->append(lf.vspace_after);  return res.release();}// Attributes: TYPE VALUE (ignored)Area *ListNormalItem::format(  Area::size_type w,  int             type,  Area::size_type indent,  int             *number_in_out /*= 0*/) const{  int number = 0;  if (number_in_out) {    number = *number_in_out = get_attribute(      attributes.get(), "VALUE", *number_in_out    );  }  static const char *disc_bullet =                               Formatting::getString("LI.disc_bullet",    "*");  static const char *square_bullet =                               Formatting::getString("LI.square_bullet",  "#");  static const char *circle_bullet =                               Formatting::getString("LI.circle_bullet",  "o");  static const char *custom1_bullet =                               Formatting::getString("LI.custom1_bullet", "+");  static const char *custom2_bullet =                               Formatting::getString("LI.custom2_bullet", "-");  static const char *custom3_bullet =                               Formatting::getString("LI.custom3_bullet", "~");  string bullet;  switch (type) {  case NO_BULLET:                          break;  case DISC:      bullet = disc_bullet;    break;  case SQUARE:    bullet = square_bullet;  break;  case CIRCLE:    bullet = circle_bullet;  break;  case CUSTOM1:   bullet = custom1_bullet; break;  case CUSTOM2:   bullet = custom2_bullet; break;  case CUSTOM3:   bullet = custom3_bullet; break;  case ARABIC_NUMBERS:    {      ostrstream oss;      oss << number << '.' << ends;      bullet = oss.str();      oss.rdbuf()->freeze(0);    }    break;  case LOWER_ALPHA:    bullet = number <= 26 ? (char) (number - 1 + 'a') : 'z';    bullet += '.';    break;  case UPPER_ALPHA:    bullet = number <= 26 ? (char) (number - 1 + 'A') : 'Z';    bullet += '.';    break;  case LOWER_ROMAN:    {      static const char *lower_roman[] = {        "0", "i",  "ii",  "iii",  "iv",  "v",  "vi",  "vii",  "viii",  "ix",        "x", "xi", "xii", "xiii", "xiv", "xv", "xvi", "xvii", "xviii", "xix",        "xx","xxi","xxii","xxiii","xxiv","xxv","xxvi","xxvii","xxviii","xxix"      };      const char *p = (        number >= 0 && number < (int) nelems(lower_roman) ?        lower_roman[number] :        "???"      );      bullet = p;      bullet += '.';    }    break;  case UPPER_ROMAN:    {      static const char *upper_roman[] = {        "0", "I",  "II",  "III",  "IV",  "V",  "VI",  "VII",  "VIII",  "IX",        "X", "XI", "XII", "XIII", "XIV", "XV", "XVI", "XVII", "XVIII", "XIX",        "XX","XXI","XXII","XXIII","XXIV","XXV","XXVI","XXVII","XXVIII","XXIX"      };      const char *p = (        number >= 0 && number < (int) nelems(upper_roman) ?        upper_roman[number] :        "???"      );      bullet = p;      bullet += '.';    }    break;  }  if (bullet.length() >= indent) indent = bullet.length() + 1;  auto_ptr<Area> res(::format(flow.get(), w - indent, Area::LEFT));  // KLUDGE: Some people write "<UL> <B><LI>Bla</B>Bla </UL>", which actually  // defines a bold and empty list item before "Bla Bla". This is very  // difficult to handle... so... let's just ignore empty list items.  if (!res.get()) return 0;  *res >>= indent;  res->insert(bullet, indent - bullet.length() - 1, 0);  if (number_in_out) (*number_in_out)++;  return res.release();}Area *ListBlockItem::format(  Area::size_type w,  int             /*type*/,  Area::size_type indent,  int             * /*number_in_out*/ /*= 0*/) const{  if (!block.get()) return 0;  auto_ptr<Area> res(block->format(w - indent, Area::LEFT));  if (!res.get()) return 0;  /*   * Hm... shouldn't there be a bullet before the item?   */  *res >>= indent;  return res.release();}/* ------------------------------------------------------------------------- */// Attributes: COMPACT (ignored)Area *DefinitionList::format(Area::size_type w, int halign) const{  static struct DefinitionListFormat {    const Area::size_type vspace_before;    const Area::size_type vspace_between;    const Area::size_type vspace_after;    DefinitionListFormat() :      vspace_before (Formatting::getInt("DL.vspace.before",  0)),      vspace_between(Formatting::getInt("DL.vspace.between", 0)),      vspace_after  (Formatting::getInt("DL.vspace.after",   0))    {}  }              dlf;  auto_ptr<Area> res;  if (preamble.get()) {    res.reset(::format(preamble.get(), w, halign));    if (res.get()) res->prepend(dlf.vspace_before);  }  if (items.get()) {    const list<auto_ptr<DefinitionListItem> > &il(*items);    list<auto_ptr<DefinitionListItem> >::const_iterator i;    for (i = il.begin(); i != il.end(); ++i) {      auto_ptr<Area> a((*i)->format(w, halign));      if (!a.get()) continue;      if (res.get()) {        res->append(dlf.vspace_between);        *res += *a;      } else {        res = a;        res->prepend(dlf.vspace_before);      }    }  }  if (res.get()) res->append(dlf.vspace_after);  return res.release();}Area *TermName::format(Area::size_type w, int halign) const{  static BlockFormat bf("DT", 0, 0, 2);  auto_ptr<Area> res(::format(flow.get(), bf.effective_width(w), halign));  if (!res.get()) return 0;  *res >>= bf.indent_left;  res->prepend(bf.vspace_before);  res->append(bf.vspace_after);  return res.release();}Area *TermDefinition::format(Area::size_type w, int halign) const{  static BlockFormat bf("DD", 0, 0, 6);  auto_ptr<Area> res(::format(flow.get(), bf.effective_width(w), halign));  if (!res.get()) return 0;  *res >>= bf.indent_left;  res->prepend(bf.vspace_before);  res->append(bf.vspace_after);  return res.release();}/* ------------------------------------------------------------------------- */// Attributes: ALIGN NOSHADE SIZE WIDTH (ignored)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -