📄 bk_xhtml.c
字号:
/* * xhtml backend for Halibut * (initial implementation by James Aylett) * * Still to do: * * +++ doesn't handle non-breaking hyphens. Not sure how to yet. * +++ entity names (from a file -- ideally supply normal SGML files) * +++ configuration directive to file split where the current layout * code wouldn't. Needs changes to _ponder_layout() and _do_paras(), * perhaps others. * * Limitations: * * +++ biblio/index references target the nearest section marker, rather * than having a dedicated target themselves. In large bibliographies * this will cause problems. (The solution is to fake up a response * from xhtml_find_section(), probably linking it into the sections * chain just in case we need it again, and to make freeing it up * easier.) docsrc.pl used to work as we do, however, and SGT agrees that * this is acceptable for now. * +++ can't cope with leaf-level == 0. It's all to do with the * top-level file not being normal, probably not even having a valid * section level, and stuff like that. I question whether this is an * issue, frankly; small manuals that fit on one page should probably * not be written in halibut at all. */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <assert.h>#include "halibut.h"struct xhtmlsection_Struct { struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */ struct xhtmlsection_Struct *child; /* NULL if split across files */ struct xhtmlsection_Struct *parent; /* NULL if split across files */ struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */ paragraph *para; struct xhtmlfile_Struct *file; /* which file is this a part of? */ char *fragment; /* fragment id within the file */ int level;};struct xhtmlfile_Struct { struct xhtmlfile_Struct *next; struct xhtmlfile_Struct *child; struct xhtmlfile_Struct *parent; char *filename; struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */ int is_leaf; /* is this file a leaf file, ie does it not have any children? */};typedef struct xhtmlsection_Struct xhtmlsection;typedef struct xhtmlfile_Struct xhtmlfile;typedef struct xhtmlindex_Struct xhtmlindex;struct xhtmlindex_Struct { int nsection; int size; xhtmlsection **sections;};typedef struct { int just_numbers; wchar_t *number_suffix;} xhtmlheadfmt;typedef struct { int contents_depth[6]; int leaf_contains_contents; int leaf_level; int leaf_smallest_contents; int include_version_id; wchar_t *author, *description; wchar_t *head_end, *body, *body_start, *body_end, *address_start, *address_end, *nav_attrs; wchar_t *rlink_prefix, *rlink_suffix; wchar_t *chm_toc_file, *chm_ind_file; int suppress_address; xhtmlheadfmt fchapter, *fsect; int nfsect;} xhtmlconfig;/*static void xhtml_level(paragraph *, int);static void xhtml_level_0(paragraph *);static void xhtml_docontents(FILE *, paragraph *, int);static void xhtml_dosections(FILE *, paragraph *, int);static void xhtml_dobody(FILE *, paragraph *, int);*/static void xhtml_doheader(FILE *, word *);static void xhtml_dofooter(FILE *);static void xhtml_versionid(FILE *, word *, int);static void xhtml_utostr(wchar_t *, char **);static int xhtml_para_level(paragraph *);static int xhtml_reservedchar(int);static int xhtml_convert(wchar_t *, char **, int);static void xhtml_rdaddwc(rdstringc *, word *, word *);static void xhtml_para(FILE *, word *);static void xhtml_codepara(FILE *, word *);static void xhtml_heading(FILE *, paragraph *);static void chm_doheader(FILE *, word *);static void chm_dofooter(FILE *);/* File-global variables are much easier than passing these things * all over the place. Evil, but easier. We can replace this with a single * structure at some point. */static xhtmlconfig conf;static keywordlist *keywords;static indexdata *idx;static xhtmlfile *topfile;static xhtmlsection *topsection;static paragraph *sourceparas;static xhtmlfile *lastfile;static xhtmlfile *xhtml_last_file = NULL;static int last_level = -1;static xhtmlsection *currentsection;static FILE* chm_toc = NULL;static FILE* chm_ind = NULL;static xhtmlconfig xhtml_configure(paragraph * source){ xhtmlconfig ret; /* * Defaults. */ ret.contents_depth[0] = 2; ret.contents_depth[1] = 3; ret.contents_depth[2] = 4; ret.contents_depth[3] = 5; ret.contents_depth[4] = 6; ret.contents_depth[5] = 7; ret.leaf_level = 2; ret.leaf_smallest_contents = 4; ret.leaf_contains_contents = FALSE; ret.include_version_id = TRUE; ret.author = NULL; ret.description = NULL; ret.head_end = NULL; ret.body = NULL; ret.body_start = NULL; ret.body_end = NULL; ret.address_start = NULL; ret.address_end = NULL; ret.nav_attrs = NULL; ret.suppress_address = FALSE; ret.chm_toc_file = NULL; ret.chm_ind_file = NULL; chm_toc = NULL; chm_ind = NULL; ret.fchapter.just_numbers = FALSE; ret.fchapter.number_suffix = ustrdup(L": "); ret.nfsect = 2; ret.fsect = mknewa(xhtmlheadfmt, ret.nfsect); ret.fsect[0].just_numbers = FALSE; ret.fsect[0].number_suffix = ustrdup(L": "); ret.fsect[1].just_numbers = TRUE; ret.fsect[1].number_suffix = ustrdup(L" "); ret.rlink_prefix = NULL; ret.rlink_suffix = NULL; for (; source; source = source->next) { if (source->type == para_Config) { if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) { ret.contents_depth[0] = utoi(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) { ret.contents_depth[1] = utoi(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) { ret.contents_depth[2] = utoi(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) { ret.contents_depth[3] = utoi(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) { ret.contents_depth[4] = utoi(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) { ret.contents_depth[5] = utoi(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) { ret.leaf_level = utoi(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) { ret.leaf_smallest_contents = utoi(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"xhtml-versionid")) { ret.include_version_id = utob(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) { ret.leaf_contains_contents = utob(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) { ret.suppress_address = utob(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"xhtml-author")) { ret.author = uadv(source->keyword); } else if (!ustricmp(source->keyword, L"chm-toc-file")) { ret.chm_toc_file = uadv(source->keyword); } else if (!ustricmp(source->keyword, L"chm-ind-file")) { ret.chm_ind_file = uadv(source->keyword); } else if (!ustricmp(source->keyword, L"xhtml-description")) { ret.description = uadv(source->keyword); } else if (!ustricmp(source->keyword, L"xhtml-head-end")) { ret.head_end = uadv(source->keyword); } else if (!ustricmp(source->keyword, L"xhtml-body-start")) { ret.body_start = uadv(source->keyword); } else if (!ustricmp(source->keyword, L"xhtml-body-tag")) { ret.body = uadv(source->keyword); } else if (!ustricmp(source->keyword, L"xhtml-body-end")) { ret.body_end = uadv(source->keyword); } else if (!ustricmp(source->keyword, L"xhtml-address-start")) { ret.address_start = uadv(source->keyword); } else if (!ustricmp(source->keyword, L"xhtml-address-end")) { ret.address_end = uadv(source->keyword); } else if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) { ret.nav_attrs = uadv(source->keyword); } else if (!ustricmp(source->keyword, L"xhtml-chapter-numeric")) { ret.fchapter.just_numbers = utob(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"xhtml-chapter-suffix")) { ret.fchapter.number_suffix = ustrdup(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"xhtml-rlink-prefix")) { ret.rlink_prefix = uadv(source->keyword); } else if (!ustricmp(source->keyword, L"xhtml-rlink-suffix")) { ret.rlink_suffix = uadv(source->keyword); } else if (!ustricmp(source->keyword, L"xhtml-section-numeric")) { wchar_t *p = uadv(source->keyword); int n = 0; if (uisdigit(*p)) { n = utoi(p); p = uadv(p); } if (n >= ret.nfsect) { int i; ret.fsect = resize(ret.fsect, n + 1); for (i = ret.nfsect; i <= n; i++) ret.fsect[i] = ret.fsect[ret.nfsect - 1]; ret.nfsect = n + 1; } ret.fsect[n].just_numbers = utob(p); } else if (!ustricmp(source->keyword, L"xhtml-section-suffix")) { wchar_t *p = uadv(source->keyword); int n = 0; if (uisdigit(*p)) { n = utoi(p); p = uadv(p); } if (n >= ret.nfsect) { int i; ret.fsect = resize(ret.fsect, n + 1); for (i = ret.nfsect; i <= n; i++) ret.fsect[i] = ret.fsect[ret.nfsect - 1]; ret.nfsect = n + 1; } ret.fsect[n].number_suffix = ustrdup(p); } } } /* printf(" !!! leaf_level = %i\n", ret.leaf_level); printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]); printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]); printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]); printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]); printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]); printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]); printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents); */ return ret;}static xhtmlsection *xhtml_new_section(xhtmlsection * last){ xhtmlsection *ret = mknew(xhtmlsection); ret->next = NULL; ret->child = NULL; ret->parent = NULL; ret->chain = last; ret->para = NULL; ret->file = NULL; ret->fragment = NULL; ret->level = -1; /* marker: end of chain */ return ret;}/* Returns NULL or the section that marks that paragraph */static xhtmlsection *xhtml_find_section(paragraph * p){ xhtmlsection *ret = topsection; if (xhtml_para_level(p) == -1) { /* first, we back-track to a section paragraph */ paragraph *p2 = sourceparas; paragraph *p3 = NULL; while (p2 && p2 != p) { if (xhtml_para_level(p2) != -1) { p3 = p2; } p2 = p2->next; } if (p3 == NULL) { /* for some reason, we couldn't find a section before this paragraph ... ? */ /* Note that this can happen, if you have a cross-reference to before the first chapter starts. * So don't do that, then. */ return NULL; } p = p3; } while (ret && ret->para != p) {/* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/ ret = ret->chain; } return ret;}static xhtmlfile *xhtml_new_file(xhtmlsection * sect){ xhtmlfile *ret = mknew(xhtmlfile); ret->next = NULL; ret->child = NULL; ret->parent = NULL; ret->filename = NULL; ret->sections = sect; ret->is_leaf = (sect != NULL && sect->level == conf.leaf_level); if (sect == NULL) { if (conf.leaf_level == 0) { /* currently unused */#define FILENAME_MANUAL "Manual.html"#define FILENAME_CONTENTS "Contents.html" ret->filename = smalloc(strlen(FILENAME_MANUAL) + 1); sprintf(ret->filename, FILENAME_MANUAL); } else { ret->filename = smalloc(strlen(FILENAME_CONTENTS) + 1); sprintf(ret->filename, FILENAME_CONTENTS); } } else { paragraph *p = sect->para; rdstringc fname_c = { 0, 0, NULL }; char *c; word *w; for (w = (p->kwtext) ? (p->kwtext) : (p->words); w; w = w->next) { switch (removeattr(w->type)) { case word_Normal: /*case word_Emph: case word_Code: case word_WeakCode: */ xhtml_utostr(w->text, &c); rdaddsc(&fname_c, c); sfree(c); break; } } rdaddsc(&fname_c, ".html"); ret->filename = rdtrimc(&fname_c); } /* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false")); */ return ret;}/* * Walk the tree fixing up files which are actually leaf (ie * have no children) but aren't at leaf level, so they have the * leaf flag set. */void xhtml_fixup_layout(xhtmlfile * file){ if (file->child == NULL) { file->is_leaf = TRUE; } else { xhtml_fixup_layout(file->child); } if (file->next) xhtml_fixup_layout(file->next);}/* * Create the tree structure so we know where everything goes. * Method: * * Ignoring file splitting, we have three choices with each new section: * * +-----------------+-----------------+ * | | | * X +----X----+ (1) * | | * Y (2) * | * (3) * * Y is the last section we added (currentsect). * If sect is the section we want to add, then: * * (1) if sect->level < currentsect->level * (2) if sect->level == currentsect->level * (3) if sect->level > currentsect->level * * This requires the constraint that you never skip section numbers * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing). * * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change * more than one level at a time. Lots of asserts, and probably part of * the algorithm here, rely on this being true. (It currently isn't * enforced by halibut, however.) * * File splitting makes this harder. For instance, say we added at (3) * above and now need to add another section. We are splitting at level * 2, ie the level of Y. Z is the last section we added: * * +-----------------+-----------------+ * | | | * X +----X----+ (1) * | | * +----Y----+ (1) * | | * Z (2) * | * (3) * * The (1) case is now split; we need to search upwards to find where * to actually link in. The other two cases remain the same (and will * always be like this). * * File splitting makes this harder, however. The decision of whether * to split to a new file is always on the same condition, however (is * the level of this section higher than the leaf_level configuration * value or not). * * Treating the cases backwards: * * (3) same file if sect->level > conf.leaf_level, otherwise new file * * if in the same file, currentsect->child points to sect * otherwise the linking is done through the file tree (which works * in more or less the same way, ie currentfile->child points to * the new file) * * (2) same file if sect->level > conf.leaf_level, otherwise new file * * if in the same file, currentsect->next points to sect * otherwise file linking and currentfile->next points to the new * file (we know that Z must have caused a new file to be created) * * (1) same file if sect->level > conf.leaf_level, otherwise new file * * this is actually effectively the same case as (2) here, * except that we first have to travel up the sections to figure * out which section this new one will be a sibling of. In doing * so, we may disappear off the top of a file and have to go up
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -