📄 bk_xhtml.c
字号:
/*
* xhtml backend for Halibut
* (initial implementation by James Aylett)
*
* Still to do:
*
* +++ doesn't handle non-breaking hyphens. Not sure how to yet.
* +++ entity names (from a file -- ideally supply normal SGML files)
* +++ configuration directive to file split where the current layout
* code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
* perhaps others.
*
* Limitations:
*
* +++ biblio/index references target the nearest section marker, rather
* than having a dedicated target themselves. In large bibliographies
* this will cause problems. (The solution is to fake up a response
* from xhtml_find_section(), probably linking it into the sections
* chain just in case we need it again, and to make freeing it up
* easier.) docsrc.pl used to work as we do, however, and SGT agrees that
* this is acceptable for now.
* +++ can't cope with leaf-level == 0. It's all to do with the
* top-level file not being normal, probably not even having a valid
* section level, and stuff like that. I question whether this is an
* issue, frankly; small manuals that fit on one page should probably
* not be written in halibut at all.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "halibut.h"
struct xhtmlsection_Struct {
struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */
struct xhtmlsection_Struct *child; /* NULL if split across files */
struct xhtmlsection_Struct *parent; /* NULL if split across files */
struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */
paragraph *para;
struct xhtmlfile_Struct *file; /* which file is this a part of? */
char *fragment; /* fragment id within the file */
int level;
};
struct xhtmlfile_Struct {
struct xhtmlfile_Struct *next;
struct xhtmlfile_Struct *child;
struct xhtmlfile_Struct *parent;
char *filename;
struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */
int is_leaf; /* is this file a leaf file, ie does it not have any children? */
};
typedef struct xhtmlsection_Struct xhtmlsection;
typedef struct xhtmlfile_Struct xhtmlfile;
typedef struct xhtmlindex_Struct xhtmlindex;
struct xhtmlindex_Struct {
int nsection;
int size;
xhtmlsection **sections;
};
typedef struct {
int just_numbers;
wchar_t *number_suffix;
} xhtmlheadfmt;
typedef struct {
int contents_depth[6];
int leaf_contains_contents;
int leaf_level;
int leaf_smallest_contents;
int include_version_id;
wchar_t *author, *description;
wchar_t *head_end, *body, *body_start, *body_end, *address_start,
*address_end, *nav_attrs;
wchar_t *rlink_prefix, *rlink_suffix;
wchar_t *chm_toc_file, *chm_ind_file;
int suppress_address;
xhtmlheadfmt fchapter, *fsect;
int nfsect;
} xhtmlconfig;
/*static void xhtml_level(paragraph *, int);
static void xhtml_level_0(paragraph *);
static void xhtml_docontents(FILE *, paragraph *, int);
static void xhtml_dosections(FILE *, paragraph *, int);
static void xhtml_dobody(FILE *, paragraph *, int);*/
static void xhtml_doheader(FILE *, word *);
static void xhtml_dofooter(FILE *);
static void xhtml_versionid(FILE *, word *, int);
static void xhtml_utostr(wchar_t *, char **);
static int xhtml_para_level(paragraph *);
static int xhtml_reservedchar(int);
static int xhtml_convert(wchar_t *, char **, int);
static void xhtml_rdaddwc(rdstringc *, word *, word *);
static void xhtml_para(FILE *, word *);
static void xhtml_codepara(FILE *, word *);
static void xhtml_heading(FILE *, paragraph *);
static void chm_doheader(FILE *, word *);
static void chm_dofooter(FILE *);
/* File-global variables are much easier than passing these things
* all over the place. Evil, but easier. We can replace this with a single
* structure at some point.
*/
static xhtmlconfig conf;
static keywordlist *keywords;
static indexdata *idx;
static xhtmlfile *topfile;
static xhtmlsection *topsection;
static paragraph *sourceparas;
static xhtmlfile *lastfile;
static xhtmlfile *xhtml_last_file = NULL;
static int last_level = -1;
static xhtmlsection *currentsection;
static FILE* chm_toc = NULL;
static FILE* chm_ind = NULL;
static xhtmlconfig xhtml_configure(paragraph * source)
{
xhtmlconfig ret;
/*
* Defaults.
*/
ret.contents_depth[0] = 2;
ret.contents_depth[1] = 3;
ret.contents_depth[2] = 4;
ret.contents_depth[3] = 5;
ret.contents_depth[4] = 6;
ret.contents_depth[5] = 7;
ret.leaf_level = 2;
ret.leaf_smallest_contents = 4;
ret.leaf_contains_contents = FALSE;
ret.include_version_id = TRUE;
ret.author = NULL;
ret.description = NULL;
ret.head_end = NULL;
ret.body = NULL;
ret.body_start = NULL;
ret.body_end = NULL;
ret.address_start = NULL;
ret.address_end = NULL;
ret.nav_attrs = NULL;
ret.suppress_address = FALSE;
ret.chm_toc_file = NULL;
ret.chm_ind_file = NULL;
chm_toc = NULL;
chm_ind = NULL;
ret.fchapter.just_numbers = FALSE;
ret.fchapter.number_suffix = ustrdup(L": ");
ret.nfsect = 2;
ret.fsect = mknewa(xhtmlheadfmt, ret.nfsect);
ret.fsect[0].just_numbers = FALSE;
ret.fsect[0].number_suffix = ustrdup(L": ");
ret.fsect[1].just_numbers = TRUE;
ret.fsect[1].number_suffix = ustrdup(L" ");
ret.rlink_prefix = NULL;
ret.rlink_suffix = NULL;
for (; source; source = source->next)
{
if (source->type == para_Config)
{
if (!ustricmp(source->keyword, L"xhtml-contents-depth-0"))
{
ret.contents_depth[0] = utoi(uadv(source->keyword));
} else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1"))
{
ret.contents_depth[1] = utoi(uadv(source->keyword));
} else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2"))
{
ret.contents_depth[2] = utoi(uadv(source->keyword));
} else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3"))
{
ret.contents_depth[3] = utoi(uadv(source->keyword));
} else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4"))
{
ret.contents_depth[4] = utoi(uadv(source->keyword));
} else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5"))
{
ret.contents_depth[5] = utoi(uadv(source->keyword));
} else if (!ustricmp(source->keyword, L"xhtml-leaf-level"))
{
ret.leaf_level = utoi(uadv(source->keyword));
} else
if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents"))
{
ret.leaf_smallest_contents = utoi(uadv(source->keyword));
} else if (!ustricmp(source->keyword, L"xhtml-versionid"))
{
ret.include_version_id = utob(uadv(source->keyword));
} else
if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents"))
{
ret.leaf_contains_contents = utob(uadv(source->keyword));
} else if (!ustricmp(source->keyword, L"xhtml-suppress-address"))
{
ret.suppress_address = utob(uadv(source->keyword));
} else if (!ustricmp(source->keyword, L"xhtml-author"))
{
ret.author = uadv(source->keyword);
} else if (!ustricmp(source->keyword, L"chm-toc-file"))
{
ret.chm_toc_file = uadv(source->keyword);
} else if (!ustricmp(source->keyword, L"chm-ind-file"))
{
ret.chm_ind_file = uadv(source->keyword);
} else if (!ustricmp(source->keyword, L"xhtml-description"))
{
ret.description = uadv(source->keyword);
} else if (!ustricmp(source->keyword, L"xhtml-head-end"))
{
ret.head_end = uadv(source->keyword);
} else if (!ustricmp(source->keyword, L"xhtml-body-start"))
{
ret.body_start = uadv(source->keyword);
} else if (!ustricmp(source->keyword, L"xhtml-body-tag"))
{
ret.body = uadv(source->keyword);
} else if (!ustricmp(source->keyword, L"xhtml-body-end"))
{
ret.body_end = uadv(source->keyword);
} else if (!ustricmp(source->keyword, L"xhtml-address-start"))
{
ret.address_start = uadv(source->keyword);
} else if (!ustricmp(source->keyword, L"xhtml-address-end"))
{
ret.address_end = uadv(source->keyword);
} else
if (!ustricmp(source->keyword, L"xhtml-navigation-attributes"))
{
ret.nav_attrs = uadv(source->keyword);
} else if (!ustricmp(source->keyword, L"xhtml-chapter-numeric"))
{
ret.fchapter.just_numbers = utob(uadv(source->keyword));
} else if (!ustricmp(source->keyword, L"xhtml-chapter-suffix"))
{
ret.fchapter.number_suffix = ustrdup(uadv(source->keyword));
} else if (!ustricmp(source->keyword, L"xhtml-rlink-prefix"))
{
ret.rlink_prefix = uadv(source->keyword);
} else if (!ustricmp(source->keyword, L"xhtml-rlink-suffix"))
{
ret.rlink_suffix = uadv(source->keyword);
} else if (!ustricmp(source->keyword, L"xhtml-section-numeric"))
{
wchar_t *p = uadv(source->keyword);
int n = 0;
if (uisdigit(*p))
{
n = utoi(p);
p = uadv(p);
}
if (n >= ret.nfsect)
{
int i;
ret.fsect = resize(ret.fsect, n + 1);
for (i = ret.nfsect; i <= n; i++)
ret.fsect[i] = ret.fsect[ret.nfsect - 1];
ret.nfsect = n + 1;
}
ret.fsect[n].just_numbers = utob(p);
} else if (!ustricmp(source->keyword, L"xhtml-section-suffix"))
{
wchar_t *p = uadv(source->keyword);
int n = 0;
if (uisdigit(*p))
{
n = utoi(p);
p = uadv(p);
}
if (n >= ret.nfsect)
{
int i;
ret.fsect = resize(ret.fsect, n + 1);
for (i = ret.nfsect; i <= n; i++)
ret.fsect[i] = ret.fsect[ret.nfsect - 1];
ret.nfsect = n + 1;
}
ret.fsect[n].number_suffix = ustrdup(p);
}
}
}
/* printf(" !!! leaf_level = %i\n", ret.leaf_level);
printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents); */
return ret;
}
static xhtmlsection *xhtml_new_section(xhtmlsection * last)
{
xhtmlsection *ret = mknew(xhtmlsection);
ret->next = NULL;
ret->child = NULL;
ret->parent = NULL;
ret->chain = last;
ret->para = NULL;
ret->file = NULL;
ret->fragment = NULL;
ret->level = -1; /* marker: end of chain */
return ret;
}
/* Returns NULL or the section that marks that paragraph */
static xhtmlsection *xhtml_find_section(paragraph * p)
{
xhtmlsection *ret = topsection;
if (xhtml_para_level(p) == -1)
{ /* first, we back-track to a section paragraph */
paragraph *p2 = sourceparas;
paragraph *p3 = NULL;
while (p2 && p2 != p)
{
if (xhtml_para_level(p2) != -1)
{
p3 = p2;
}
p2 = p2->next;
}
if (p3 == NULL)
{ /* for some reason, we couldn't find a section before this paragraph ... ? */
/* Note that this can happen, if you have a cross-reference to before the first chapter starts.
* So don't do that, then.
*/
return NULL;
}
p = p3;
}
while (ret && ret->para != p)
{
/* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
ret = ret->chain;
}
return ret;
}
static xhtmlfile *xhtml_new_file(xhtmlsection * sect)
{
xhtmlfile *ret = mknew(xhtmlfile);
ret->next = NULL;
ret->child = NULL;
ret->parent = NULL;
ret->filename = NULL;
ret->sections = sect;
ret->is_leaf = (sect != NULL && sect->level == conf.leaf_level);
if (sect == NULL)
{
if (conf.leaf_level == 0)
{ /* currently unused */
#define FILENAME_MANUAL "Manual.html"
#define FILENAME_CONTENTS "Contents.html"
ret->filename = smalloc(strlen(FILENAME_MANUAL) + 1);
sprintf(ret->filename, FILENAME_MANUAL);
} else
{
ret->filename = smalloc(strlen(FILENAME_CONTENTS) + 1);
sprintf(ret->filename, FILENAME_CONTENTS);
}
} else
{
paragraph *p = sect->para;
rdstringc fname_c = { 0, 0, NULL };
char *c;
word *w;
for (w = (p->kwtext) ? (p->kwtext) : (p->words); w; w = w->next)
{
switch (removeattr(w->type))
{
case word_Normal:
/*case word_Emph:
case word_Code:
case word_WeakCode: */
xhtml_utostr(w->text, &c);
rdaddsc(&fname_c, c);
sfree(c);
break;
}
}
rdaddsc(&fname_c, ".html");
ret->filename = rdtrimc(&fname_c);
}
/* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false")); */
return ret;
}
/*
* Walk the tree fixing up files which are actually leaf (ie
* have no children) but aren't at leaf level, so they have the
* leaf flag set.
*/
void xhtml_fixup_layout(xhtmlfile * file)
{
if (file->child == NULL)
{
file->is_leaf = TRUE;
} else
{
xhtml_fixup_layout(file->child);
}
if (file->next)
xhtml_fixup_layout(file->next);
}
/*
* Create the tree structure so we know where everything goes.
* Method:
*
* Ignoring file splitting, we have three choices with each new section:
*
* +-----------------+-----------------+
* | | |
* X +----X----+ (1)
* | |
* Y (2)
* |
* (3)
*
* Y is the last section we added (currentsect).
* If sect is the section we want to add, then:
*
* (1) if sect->level < currentsect->level
* (2) if sect->level == currentsect->level
* (3) if sect->level > currentsect->level
*
* This requires the constraint that you never skip section numbers
* (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
*
* Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
* more than one level at a time. Lots of asserts, and probably part of
* the algorithm here, rely on this being true. (It currently isn't
* enforced by halibut, however.)
*
* File splitting makes this harder. For instance, say we added at (3)
* above and now need to add another section. We are splitting at level
* 2, ie the level of Y. Z is the last section we added:
*
* +-----------------+-----------------+
* | | |
* X +----X----+ (1)
* | |
* +----Y----+ (1)
* | |
* Z (2)
* |
* (3)
*
* The (1) case is now split; we need to search upwards to find where
* to actually link in. The other two cases remain the same (and will
* always be like this).
*
* File splitting makes this harder, however. The decision of whether
* to split to a new file is always on the same condition, however (is
* the level of this section higher than the leaf_level configuration
* value or not).
*
* Treating the cases backwards:
*
* (3) same file if sect->level > conf.leaf_level, otherwise new file
*
* if in the same file, currentsect->child points to sect
* otherwise the linking is done through the file tree (which works
* in more or less the same way, ie currentfile->child points to
* the new file)
*
* (2) same file if sect->level > conf.leaf_level, otherwise new file
*
* if in the same file, currentsect->next points to sect
* otherwise file linking and currentfile->next points to the new
* file (we know that Z must have caused a new file to be created)
*
* (1) same file if sect->level > conf.leaf_level, otherwise new file
*
* this is actually effectively the same case as (2) here,
* except that we first have to travel up the sections to figure
* out which section this new one will be a sibling of. In doing
* so, we may disappear off the top of a file and have to go up
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -