📄 bk_xhtml.c

📁 NullSofts criptable install system2.28源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/*
 * xhtml backend for Halibut
 * (initial implementation by James Aylett)
 *
 * Still to do:
 *
 *  +++ doesn't handle non-breaking hyphens. Not sure how to yet.
 *  +++ entity names (from a file -- ideally supply normal SGML files)
 *  +++ configuration directive to file split where the current layout
 *      code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
 *      perhaps others.
 *
 * Limitations:
 *
 *  +++ biblio/index references target the nearest section marker, rather
 *   than having a dedicated target themselves. In large bibliographies
 *   this will cause problems. (The solution is to fake up a response
 *   from xhtml_find_section(), probably linking it into the sections
 *   chain just in case we need it again, and to make freeing it up
 *   easier.) docsrc.pl used to work as we do, however, and SGT agrees that
 *   this is acceptable for now.
 *  +++ can't cope with leaf-level == 0. It's all to do with the
 *   top-level file not being normal, probably not even having a valid
 *   section level, and stuff like that. I question whether this is an
 *   issue, frankly; small manuals that fit on one page should probably
 *   not be written in halibut at all.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "halibut.h"

struct xhtmlsection_Struct {
  struct xhtmlsection_Struct *next;     /* next sibling (NULL if split across files) */
  struct xhtmlsection_Struct *child;    /* NULL if split across files */
  struct xhtmlsection_Struct *parent;   /* NULL if split across files */
  struct xhtmlsection_Struct *chain;    /* single structure independent of weird trees */
  paragraph *para;
  struct xhtmlfile_Struct *file;        /* which file is this a part of? */
  char *fragment;               /* fragment id within the file */
  int level;
};

struct xhtmlfile_Struct {
  struct xhtmlfile_Struct *next;
  struct xhtmlfile_Struct *child;
  struct xhtmlfile_Struct *parent;
  char *filename;
  struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */
  int is_leaf;                  /* is this file a leaf file, ie does it not have any children? */
};

typedef struct xhtmlsection_Struct xhtmlsection;
typedef struct xhtmlfile_Struct xhtmlfile;
typedef struct xhtmlindex_Struct xhtmlindex;

struct xhtmlindex_Struct {
  int nsection;
  int size;
  xhtmlsection **sections;
};

typedef struct {
  int just_numbers;
  wchar_t *number_suffix;
} xhtmlheadfmt;

typedef struct {
  int contents_depth[6];
  int leaf_contains_contents;
  int leaf_level;
  int leaf_smallest_contents;
  int include_version_id;
  wchar_t *author, *description;
  wchar_t *head_end, *body, *body_start, *body_end, *address_start,
      *address_end, *nav_attrs;
  wchar_t *rlink_prefix, *rlink_suffix;
  wchar_t *chm_toc_file, *chm_ind_file;
  int suppress_address;
  xhtmlheadfmt fchapter, *fsect;
  int nfsect;
} xhtmlconfig;

/*static void xhtml_level(paragraph *, int);
static void xhtml_level_0(paragraph *);
static void xhtml_docontents(FILE *, paragraph *, int);
static void xhtml_dosections(FILE *, paragraph *, int);
static void xhtml_dobody(FILE *, paragraph *, int);*/

static void xhtml_doheader(FILE *, word *);
static void xhtml_dofooter(FILE *);
static void xhtml_versionid(FILE *, word *, int);

static void xhtml_utostr(wchar_t *, char **);
static int xhtml_para_level(paragraph *);
static int xhtml_reservedchar(int);

static int xhtml_convert(wchar_t *, char **, int);
static void xhtml_rdaddwc(rdstringc *, word *, word *);
static void xhtml_para(FILE *, word *);
static void xhtml_codepara(FILE *, word *);
static void xhtml_heading(FILE *, paragraph *);

static void chm_doheader(FILE *, word *);
static void chm_dofooter(FILE *);
/* File-global variables are much easier than passing these things
 * all over the place. Evil, but easier. We can replace this with a single
 * structure at some point.
 */
static xhtmlconfig conf;
static keywordlist *keywords;
static indexdata *idx;
static xhtmlfile *topfile;
static xhtmlsection *topsection;
static paragraph *sourceparas;
static xhtmlfile *lastfile;
static xhtmlfile *xhtml_last_file = NULL;
static int last_level = -1;
static xhtmlsection *currentsection;
static FILE* chm_toc = NULL;
static FILE* chm_ind = NULL;


static xhtmlconfig xhtml_configure(paragraph * source)
{
  xhtmlconfig ret;

  /*
   * Defaults.
   */
  ret.contents_depth[0] = 2;
  ret.contents_depth[1] = 3;
  ret.contents_depth[2] = 4;
  ret.contents_depth[3] = 5;
  ret.contents_depth[4] = 6;
  ret.contents_depth[5] = 7;
  ret.leaf_level = 2;
  ret.leaf_smallest_contents = 4;
  ret.leaf_contains_contents = FALSE;
  ret.include_version_id = TRUE;
  ret.author = NULL;
  ret.description = NULL;
  ret.head_end = NULL;
  ret.body = NULL;
  ret.body_start = NULL;
  ret.body_end = NULL;
  ret.address_start = NULL;
  ret.address_end = NULL;
  ret.nav_attrs = NULL;
  ret.suppress_address = FALSE;
  ret.chm_toc_file = NULL;
  ret.chm_ind_file = NULL;
  chm_toc = NULL;
  chm_ind = NULL;
  ret.fchapter.just_numbers = FALSE;
  ret.fchapter.number_suffix = ustrdup(L": ");
  ret.nfsect = 2;
  ret.fsect = mknewa(xhtmlheadfmt, ret.nfsect);
  ret.fsect[0].just_numbers = FALSE;
  ret.fsect[0].number_suffix = ustrdup(L": ");
  ret.fsect[1].just_numbers = TRUE;
  ret.fsect[1].number_suffix = ustrdup(L" ");
  ret.rlink_prefix = NULL;
  ret.rlink_suffix = NULL;

  for (; source; source = source->next)
  {
    if (source->type == para_Config)
    {
      if (!ustricmp(source->keyword, L"xhtml-contents-depth-0"))
      {
        ret.contents_depth[0] = utoi(uadv(source->keyword));
      } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1"))
      {
        ret.contents_depth[1] = utoi(uadv(source->keyword));
      } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2"))
      {
        ret.contents_depth[2] = utoi(uadv(source->keyword));
      } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3"))
      {
        ret.contents_depth[3] = utoi(uadv(source->keyword));
      } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4"))
      {
        ret.contents_depth[4] = utoi(uadv(source->keyword));
      } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5"))
      {
        ret.contents_depth[5] = utoi(uadv(source->keyword));
      } else if (!ustricmp(source->keyword, L"xhtml-leaf-level"))
      {
        ret.leaf_level = utoi(uadv(source->keyword));
      } else
          if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents"))
      {
        ret.leaf_smallest_contents = utoi(uadv(source->keyword));
      } else if (!ustricmp(source->keyword, L"xhtml-versionid"))
      {
        ret.include_version_id = utob(uadv(source->keyword));
      } else
          if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents"))
      {
        ret.leaf_contains_contents = utob(uadv(source->keyword));
      } else if (!ustricmp(source->keyword, L"xhtml-suppress-address"))
      {
        ret.suppress_address = utob(uadv(source->keyword));
      } else if (!ustricmp(source->keyword, L"xhtml-author"))
      {
        ret.author = uadv(source->keyword);
      } else if (!ustricmp(source->keyword, L"chm-toc-file"))
      {
        ret.chm_toc_file = uadv(source->keyword);
      } else if (!ustricmp(source->keyword, L"chm-ind-file"))
      {
        ret.chm_ind_file = uadv(source->keyword);
      } else if (!ustricmp(source->keyword, L"xhtml-description"))
      {
        ret.description = uadv(source->keyword);
      } else if (!ustricmp(source->keyword, L"xhtml-head-end"))
      {
        ret.head_end = uadv(source->keyword);
      } else if (!ustricmp(source->keyword, L"xhtml-body-start"))
      {
        ret.body_start = uadv(source->keyword);
      } else if (!ustricmp(source->keyword, L"xhtml-body-tag"))
      {
        ret.body = uadv(source->keyword);
      } else if (!ustricmp(source->keyword, L"xhtml-body-end"))
      {
        ret.body_end = uadv(source->keyword);
      } else if (!ustricmp(source->keyword, L"xhtml-address-start"))
      {
        ret.address_start = uadv(source->keyword);
      } else if (!ustricmp(source->keyword, L"xhtml-address-end"))
      {
        ret.address_end = uadv(source->keyword);
      } else
          if (!ustricmp(source->keyword, L"xhtml-navigation-attributes"))
      {
        ret.nav_attrs = uadv(source->keyword);
      } else if (!ustricmp(source->keyword, L"xhtml-chapter-numeric"))
      {
        ret.fchapter.just_numbers = utob(uadv(source->keyword));
      } else if (!ustricmp(source->keyword, L"xhtml-chapter-suffix"))
      {
        ret.fchapter.number_suffix = ustrdup(uadv(source->keyword));
      } else if (!ustricmp(source->keyword, L"xhtml-rlink-prefix"))
      {
        ret.rlink_prefix = uadv(source->keyword);
      } else if (!ustricmp(source->keyword, L"xhtml-rlink-suffix"))
      {
        ret.rlink_suffix = uadv(source->keyword);
      } else if (!ustricmp(source->keyword, L"xhtml-section-numeric"))
      {
        wchar_t *p = uadv(source->keyword);
        int n = 0;
        if (uisdigit(*p))
        {
          n = utoi(p);
          p = uadv(p);
        }
        if (n >= ret.nfsect)
        {
          int i;
          ret.fsect = resize(ret.fsect, n + 1);
          for (i = ret.nfsect; i <= n; i++)
            ret.fsect[i] = ret.fsect[ret.nfsect - 1];
          ret.nfsect = n + 1;
        }
        ret.fsect[n].just_numbers = utob(p);
      } else if (!ustricmp(source->keyword, L"xhtml-section-suffix"))
      {
        wchar_t *p = uadv(source->keyword);
        int n = 0;
        if (uisdigit(*p))
        {
          n = utoi(p);
          p = uadv(p);
        }
        if (n >= ret.nfsect)
        {
          int i;
          ret.fsect = resize(ret.fsect, n + 1);
          for (i = ret.nfsect; i <= n; i++)
            ret.fsect[i] = ret.fsect[ret.nfsect - 1];
          ret.nfsect = n + 1;
        }
        ret.fsect[n].number_suffix = ustrdup(p);
      }
    }
  }

  /*  printf(" !!! leaf_level = %i\n", ret.leaf_level);
     printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
     printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
     printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
     printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
     printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
     printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
     printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents); */
  return ret;
}

static xhtmlsection *xhtml_new_section(xhtmlsection * last)
{
  xhtmlsection *ret = mknew(xhtmlsection);
  ret->next = NULL;
  ret->child = NULL;
  ret->parent = NULL;
  ret->chain = last;
  ret->para = NULL;
  ret->file = NULL;
  ret->fragment = NULL;
  ret->level = -1;              /* marker: end of chain */
  return ret;
}

/* Returns NULL or the section that marks that paragraph */
static xhtmlsection *xhtml_find_section(paragraph * p)
{
  xhtmlsection *ret = topsection;
  if (xhtml_para_level(p) == -1)
  {                             /* first, we back-track to a section paragraph */
    paragraph *p2 = sourceparas;
    paragraph *p3 = NULL;
    while (p2 && p2 != p)
    {
      if (xhtml_para_level(p2) != -1)
      {
        p3 = p2;
      }
      p2 = p2->next;
    }
    if (p3 == NULL)
    {                           /* for some reason, we couldn't find a section before this paragraph ... ? */
      /* Note that this can happen, if you have a cross-reference to before the first chapter starts.
       * So don't do that, then.
       */
      return NULL;
    }
    p = p3;
  }
  while (ret && ret->para != p)
  {
/*    printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
    ret = ret->chain;
  }
  return ret;
}

static xhtmlfile *xhtml_new_file(xhtmlsection * sect)
{
  xhtmlfile *ret = mknew(xhtmlfile);

  ret->next = NULL;
  ret->child = NULL;
  ret->parent = NULL;
  ret->filename = NULL;
  ret->sections = sect;
  ret->is_leaf = (sect != NULL && sect->level == conf.leaf_level);
  if (sect == NULL)
  {
    if (conf.leaf_level == 0)
    {                           /* currently unused */
#define FILENAME_MANUAL "Manual.html"
#define FILENAME_CONTENTS "Contents.html"
      ret->filename = smalloc(strlen(FILENAME_MANUAL) + 1);
      sprintf(ret->filename, FILENAME_MANUAL);
    } else
    {
      ret->filename = smalloc(strlen(FILENAME_CONTENTS) + 1);
      sprintf(ret->filename, FILENAME_CONTENTS);
    }
  } else
  {
    paragraph *p = sect->para;
    rdstringc fname_c = { 0, 0, NULL };
    char *c;
    word *w;
    for (w = (p->kwtext) ? (p->kwtext) : (p->words); w; w = w->next)
    {
      switch (removeattr(w->type))
      {
      case word_Normal:
        /*case word_Emph:
           case word_Code:
           case word_WeakCode: */
        xhtml_utostr(w->text, &c);
        rdaddsc(&fname_c, c);
        sfree(c);
        break;
      }
    }
    rdaddsc(&fname_c, ".html");
    ret->filename = rdtrimc(&fname_c);
  }
  /*  printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false")); */
  return ret;
}

/*
 * Walk the tree fixing up files which are actually leaf (ie
 * have no children) but aren't at leaf level, so they have the
 * leaf flag set.
 */
void xhtml_fixup_layout(xhtmlfile * file)
{
  if (file->child == NULL)
  {
    file->is_leaf = TRUE;
  } else
  {
    xhtml_fixup_layout(file->child);
  }
  if (file->next)
    xhtml_fixup_layout(file->next);
}

/*
 * Create the tree structure so we know where everything goes.
 * Method:
 *
 * Ignoring file splitting, we have three choices with each new section:
 * 
 * +-----------------+-----------------+
 * |                 |                 |
 * X            +----X----+           (1)
 *              |         |
 *              Y        (2)
 *              |
 *             (3)
 *
 * Y is the last section we added (currentsect).
 * If sect is the section we want to add, then:
 *
 * (1) if sect->level < currentsect->level
 * (2) if sect->level == currentsect->level
 * (3) if sect->level > currentsect->level
 *
 * This requires the constraint that you never skip section numbers
 * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
 *
 * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
 * more than one level at a time. Lots of asserts, and probably part of
 * the algorithm here, rely on this being true. (It currently isn't
 * enforced by halibut, however.)
 *
 * File splitting makes this harder. For instance, say we added at (3)
 * above and now need to add another section. We are splitting at level
 * 2, ie the level of Y. Z is the last section we added:
 *
 * +-----------------+-----------------+
 * |                 |                 |
 * X            +----X----+           (1)
 *              |         |
 *         +----Y----+   (1)
 *         |         |
 *         Z        (2)
 *         |
 *        (3)
 *
 * The (1) case is now split; we need to search upwards to find where
 * to actually link in. The other two cases remain the same (and will
 * always be like this).
 *
 * File splitting makes this harder, however. The decision of whether
 * to split to a new file is always on the same condition, however (is
 * the level of this section higher than the leaf_level configuration
 * value or not).
 *
 * Treating the cases backwards:
 *
 * (3) same file if sect->level > conf.leaf_level, otherwise new file
 *
 *     if in the same file, currentsect->child points to sect
 *     otherwise the linking is done through the file tree (which works
 *     in more or less the same way, ie currentfile->child points to
 *     the new file)
 *
 * (2) same file if sect->level > conf.leaf_level, otherwise new file
 *
 *     if in the same file, currentsect->next points to sect
 *     otherwise file linking and currentfile->next points to the new
 *     file (we know that Z must have caused a new file to be created)
 *
 * (1) same file if sect->level > conf.leaf_level, otherwise new file
 *
 *     this is actually effectively the same case as (2) here,
 *     except that we first have to travel up the sections to figure
 *     out which section this new one will be a sibling of. In doing
 *     so, we may disappear off the top of a file and have to go up
12 3 4 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -