📄 input.c

📁 NullSofts criptable install system2.28源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*
 * input.c: read the source form
 */

#include <stdio.h>
#include <assert.h>
#include <time.h>
#include "halibut.h"

#define TAB_STOP 8              /* for column number tracking */

static void setpos(input * in, char *fname)
{
  in->pos.filename = fname;
  in->pos.line = 1;
  in->pos.col = (in->reportcols ? 1 : -1);
}

static void unget(input * in, int c, filepos * pos)
{
  if (in->npushback >= in->pushbacksize)
  {
    in->pushbacksize = in->npushback + 16;
    in->pushback = resize(in->pushback, in->pushbacksize);
  }
  in->pushback[in->npushback].chr = c;
  in->pushback[in->npushback].pos = *pos;       /* structure copy */
  in->npushback++;
}

/* ---------------------------------------------------------------------- */
/*
 * Macro subsystem
 */
typedef struct macro_Tag macro;
struct macro_Tag {
  wchar_t *name, *text;
};
struct macrostack_Tag {
  macrostack *next;
  wchar_t *text;
  int ptr, npushback;
  filepos pos;
};
static int macrocmp(void *av, void *bv)
{
  macro *a = (macro *) av, *b = (macro *) bv;
  return ustrcmp(a->name, b->name);
}
static void
macrodef(tree234 * macros, wchar_t * name, wchar_t * text, filepos fpos)
{
  macro *m = mknew(macro);
  m->name = name;
  m->text = text;
  if (add234(macros, m) != m)
  {
    error(err_macroexists, &fpos, name);
    sfree(name);
    sfree(text);
  }
}
static int
macrolookup(tree234 * macros, input * in, wchar_t * name, filepos * pos)
{
  macro m, *gotit;
  m.name = name;
  gotit = find234(macros, &m, NULL);
  if (gotit)
  {
    macrostack *expansion = mknew(macrostack);
    expansion->next = in->stack;
    expansion->text = gotit->text;
    expansion->pos = *pos;      /* structure copy */
    expansion->ptr = 0;
    expansion->npushback = in->npushback;
    in->stack = expansion;
    return TRUE;
  } else
    return FALSE;
}
static void macrocleanup(tree234 * macros)
{
  int ti;
  macro *m;
  for (ti = 0; (m = (macro *) index234(macros, ti)) != NULL; ti++)
  {
    sfree(m->name);
    sfree(m->text);
    sfree(m);
  }
  freetree234(macros);
}

/*
 * Can return EOF
 */
static int get(input * in, filepos * pos)
{
  int pushbackpt = in->stack ? in->stack->npushback : 0;
  if (in->npushback > pushbackpt)
  {
    --in->npushback;
    if (pos)
      *pos = in->pushback[in->npushback].pos;   /* structure copy */
    return in->pushback[in->npushback].chr;
  } else if (in->stack)
  {
    wchar_t c = in->stack->text[in->stack->ptr];
    if (in->stack->text[++in->stack->ptr] == L'\0')
    {
      macrostack *tmp = in->stack;
      in->stack = tmp->next;
      sfree(tmp);
    }
    return c;
  } else if (in->currfp)
  {
    int c = getc(in->currfp);

    if (c == EOF)
    {
      fclose(in->currfp);
      in->currfp = NULL;
    }
    /* Track line numbers, for error reporting */
    if (pos)
      *pos = in->pos;
    if (in->reportcols)
    {
      switch (c)
      {
      case '\t':
        in->pos.col = 1 + (in->pos.col + TAB_STOP - 1) % TAB_STOP;
        break;
      case '\n':
        in->pos.col = 1;
        in->pos.line++;
        break;
      default:
        in->pos.col++;
        break;
      }
    } else
    {
      in->pos.col = -1;
      if (c == '\n')
        in->pos.line++;
    }
    /* FIXME: do input charmap translation. We should be returning
     * Unicode here. */
    return c;
  } else
    return EOF;
}

/*
 * Lexical analysis of source files.
 */
typedef struct token_Tag token;
struct token_Tag {
  int type;
  int cmd, aux;
  wchar_t *text;
  filepos pos;
};
enum {
  tok_eof,                      /* end of file */
  tok_eop,                      /* end of paragraph */
  tok_white,                    /* whitespace */
  tok_word,                     /* a word or word fragment */
  tok_cmd,                      /* \command */
  tok_lbrace,                   /* { */
  tok_rbrace                    /* } */
};

/* Halibut command keywords. */
enum {
  c__invalid,                   /* invalid command */
  c__comment,                   /* comment command (\#) */
  c__escaped,                   /* escaped character */
  c__nbsp,                      /* nonbreaking space */
  c_A,                          /* appendix heading */
  c_B,                          /* bibliography entry */
  c_BR,                         /* bibliography rewrite */
  c_C,                          /* chapter heading */
  c_H,                          /* heading */
  c_I,                          /* invisible index mark */
  c_IM,                         /* index merge/rewrite */
  c_K,                          /* capitalised cross-reference */
  c_S,                          /* aux field is 0, 1, 2, ... */
  c_U,                          /* unnumbered-chapter heading */
  c_W,                          /* Web hyperlink */
  c_L,                          /* Relative/local hyperlink */
  c_b,                          /* bulletted list */
  c_c,                          /* code */
  c_cfg,                        /* configuration directive */
  c_copyright,                  /* copyright statement */
  c_cw,                         /* weak code */
  c_date,                       /* document processing date */
  c_define,                     /* macro definition */
  c_e,                          /* emphasis */
  c_i,                          /* visible index mark */
  c_ii,                         /* uncapitalised visible index mark */
  c_k,                          /* uncapitalised cross-reference */
  c_R,                          /* free text cross-reference */
  c_n,                          /* numbered list */
  c_nocite,                     /* bibliography trickery */
  c_preamble,                   /* document preamble text */
  c_q,                          /* quote marks */
  c_rule,                       /* horizontal rule */
  c_title,                      /* document title */
  c_u,                          /* aux field is char code */
  c_versionid                   /* document RCS id */
};

/* Perhaps whitespace should be defined in a more Unicode-friendly way? */
#define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
#define isnl(c) ( (c)==10 )
#define isdec(c) ( ((c)>='0'&&(c)<='9') )
#define fromdec(c) ( (c)-'0' )
#define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
#define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
#define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))

/*
 * Keyword comparison function. Like strcmp, but between a wchar_t *
 * and a char *.
 */
static int kwcmp(wchar_t const *p, char const *q)
{
  int i;
  do
  {
    i = *p - *q;
  }
  while (*p++ && *q++ && !i);
  return i;
}

/*
 * Match a keyword.
 */
static void match_kw(token * tok)
{
  /*
   * FIXME. The ids are explicit in here so as to allow long-name
   * equivalents to the various very short keywords.
   *
   * This list must be sorted, it's searched using binary search.
   */
  static const struct {
    char const *name;
    int id;
  } keywords[] = {
    {
    "#", c__comment}
    ,                           /* comment command (\#) */
    {
    "-", c__escaped}
    ,                           /* nonbreaking hyphen */
    {
    "A", c_A}
    ,                           /* appendix heading */
    {
    "B", c_B}
    ,                           /* bibliography entry */
    {
    "BR", c_BR}
    ,                           /* bibliography rewrite */
    {
    "C", c_C}
    ,                           /* chapter heading */
    {
    "H", c_H}
    ,                           /* heading */
    {
    "I", c_I}
    ,                           /* invisible index mark */
    {
    "IM", c_IM}
    ,                           /* index merge/rewrite */
    {
    "K", c_K}
    ,                           /* capitalised cross-reference */
    {
    "L", c_L}
    ,                           /* Relative/local hyperlink */
    {
    "R", c_R}
    ,                           /* free text cross-reference */
    {
    "U", c_U}
    ,                           /* unnumbered-chapter heading */
    {
    "W", c_W}
    ,                           /* Web hyperlink */
    {
    "\\", c__escaped}
    ,                           /* escaped backslash (\\) */
    {
    "_", c__nbsp}
    ,                           /* nonbreaking space (\_) */
    {
    "b", c_b}
    ,                           /* bulletted list */
    {
    "c", c_c}
    ,                           /* code */
    {
    "cfg", c_cfg}
    ,                           /* configuration directive */
    {
    "copyright", c_copyright}
    ,                           /* copyright statement */
    {
    "cw", c_cw}
    ,                           /* weak code */
    {
    "date", c_date}
    ,                           /* document processing date */
    {
    "define", c_define}
    ,                           /* macro definition */
    {
    "e", c_e}
    ,                           /* emphasis */
    {
    "i", c_i}
    ,                           /* visible index mark */
    {
    "ii", c_ii}
    ,                           /* uncapitalised visible index mark */
    {
    "k", c_k}
    ,                           /* uncapitalised cross-reference */
    {
    "n", c_n}
    ,                           /* numbered list */
    {
    "nocite", c_nocite}
    ,                           /* bibliography trickery */
    {
    "preamble", c_preamble}
    ,                           /* document preamble text */
    {
    "q", c_q}
    ,                           /* quote marks */
    {
    "rule", c_rule}
    ,                           /* horizontal rule */
    {
    "title", c_title}
    ,                           /* document title */
    {
    "versionid", c_versionid}
    ,                           /* document RCS id */
    {
    "{", c__escaped}
    ,                           /* escaped lbrace (\{) */
    {
    "}", c__escaped}
    ,                           /* escaped rbrace (\}) */
  };
  int i, j, k, c;

  /*
   * Special cases: \S{0,1,2,...} and \uABCD. If the syntax
   * doesn't match correctly, we just fall through to the
   * binary-search phase.
   */
  if (tok->text[0] == 'S')
  {
    /* We expect numeric characters thereafter. */
    wchar_t *p = tok->text + 1;
    int n;
    if (!*p)
      n = 1;
    else
    {
      n = 0;
      while (*p && isdec(*p))
      {
        n = 10 * n + fromdec(*p);
        p++;
      }
    }
    if (!*p)
    {
      tok->cmd = c_S;
      tok->aux = n;
      return;
    }
  } else if (tok->text[0] == 'u')
  {
    /* We expect hex characters thereafter. */
    wchar_t *p = tok->text + 1;
    int n = 0;
    while (*p && ishex(*p))
    {
      n = 16 * n + fromhex(*p);
      p++;
    }
    if (!*p)
    {
      tok->cmd = c_u;
      tok->aux = n;
      return;
    }
  }

  i = -1;
  j = sizeof(keywords) / sizeof(*keywords);
  while (j - i > 1)
  {
    k = (i + j) / 2;
    c = kwcmp(tok->text, keywords[k].name);
    if (c < 0)
      j = k;
    else if (c > 0)
      i = k;
    else
    {                           /* c == 0 */

      tok->cmd = keywords[k].id;
      return;
    }
  }

  tok->cmd = c__invalid;
}


/*
 * Read a token from the input file, in the normal way (`normal' in
 * the sense that code paragraphs work a different way).
 */
token get_token(input * in)
{
  int c;
  int nls;
  token ret;
  rdstring rs = { 0, 0, NULL };
  filepos cpos;

  ret.cmd = c__invalid;
  ret.aux = FALSE;
  ret.text = NULL;              /* default */
  c = get(in, &cpos);
  ret.pos = cpos;
  if (iswhite(c))
  {                             /* tok_white or tok_eop */
    nls = 0;
    do
    {
      if (isnl(c))
        nls++;
    }
    while ((c = get(in, &cpos)) != EOF && iswhite(c));
    if (c == EOF)
    {
      ret.type = tok_eof;
      return ret;
    }
    unget(in, c, &cpos);
    ret.type = (nls > 1 ? tok_eop : tok_white);
    return ret;
  } else if (c == EOF)
  {                             /* tok_eof */
    ret.type = tok_eof;
    return ret;
  } else if (c == '\\')
  {                             /* tok_cmd */
    c = get(in, &cpos);
    if (c == '-' || c == '\\' || c == '_' ||
        c == '#' || c == '{' || c == '}')
    {
      /* single-char command */
      rdadd(&rs, (wchar_t)c);
    } else if (c == 'u')
    {
      int len = 0;
      do
      {
        rdadd(&rs, (wchar_t)c);
        len++;
        c = get(in, &cpos);
      }
      while (ishex(c) && len < 5);
      unget(in, c, &cpos);
    } else if (iscmd(c))
    {
      do
      {
        rdadd(&rs, (wchar_t)c);
        c = get(in, &cpos);
      }
12 3 下一页
💿 文件大小 1876 K
👤 上传用户 wp1111111
📂 所属分类 Internet/网络编程
🏷️ 相关标签

#NullSofts #criptable #install #system
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -