⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 token.cc

📁 早期freebsd实现
💻 CC
字号:
// -*- C++ -*-/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.     Written by James Clark (jjc@jclark.com)This file is part of groff.groff is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.groff is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public License alongwith groff; see the file COPYING.  If not, write to the Free SoftwareFoundation, 675 Mass Ave, Cambridge, MA 02139, USA. */#include "refer.h"#include "token.h"#define TOKEN_TABLE_SIZE 1009// I believe in Icelandic thorn sorts after z.#define THORN_SORT_KEY "{"struct token_table_entry {  const char *tok;  token_info ti;  token_table_entry();};token_table_entry token_table[TOKEN_TABLE_SIZE];int ntokens = 0;static void skip_name(const char **ptr, const char *end){  if (*ptr < end) {    switch (*(*ptr)++) {    case '(':      if (*ptr < end) {	*ptr += 1;	if (*ptr < end)	  *ptr += 1;      }      break;    case '[':      while (*ptr < end)	if (*(*ptr)++ == ']')	  break;      break;    }  }}int get_token(const char **ptr, const char *end){  if (*ptr >= end)    return 0;  char c = *(*ptr)++;  if (c == '\\' && *ptr < end) {    switch (**ptr) {    default:      *ptr += 1;      break;    case '(':    case '[':      skip_name(ptr, end);      break;    case '*':    case 'f':      *ptr += 1;      skip_name(ptr, end);      break;    }  }  return 1;}token_info::token_info(): type(TOKEN_OTHER), sort_key(0), other_case(0){}void token_info::set(token_type t, const char *sk, const char *oc){  assert(oc == 0 || t == TOKEN_UPPER || t == TOKEN_LOWER);  type = t;  sort_key = sk;  other_case = oc;}void token_info::sortify(const char *start, const char *end, string &result)     const{  if (sort_key)    result += sort_key;  else if (type == TOKEN_UPPER || type == TOKEN_LOWER) {    for (; start < end; start++)      if (csalpha(*start))	result += cmlower(*start);  }}int token_info::sortify_non_empty(const char *start, const char *end) const{  if (sort_key)    return *sort_key != '\0';  if (type != TOKEN_UPPER && type != TOKEN_LOWER)    return 0;  for (; start < end; start++)    if (csalpha(*start))      return 1;  return 0;}void token_info::lower_case(const char *start, const char *end,			    string &result) const{  if (type != TOKEN_UPPER) {    while (start < end)      result += *start++;  }  else if (other_case)    result += other_case;  else {    while (start < end)      result += cmlower(*start++);  }}void token_info::upper_case(const char *start, const char *end,			    string &result) const{  if (type != TOKEN_LOWER) {    while (start < end)      result += *start++;  }  else if (other_case)    result += other_case;  else {    while (start < end)      result += cmupper(*start++);  }}token_table_entry::token_table_entry(): tok(0){}static void store_token(const char *tok, token_type typ,			const char *sk = 0, const char *oc = 0){  unsigned n = hash_string(tok, strlen(tok)) % TOKEN_TABLE_SIZE;  while (n >= 0) {    if (token_table[n].tok == 0) {      if (++ntokens == TOKEN_TABLE_SIZE)	assert(0);      token_table[n].tok = tok;      break;    }    if (strcmp(tok, token_table[n].tok) == 0)      break;    if (--n < 0)      n = TOKEN_TABLE_SIZE - 1;  }  token_table[n].ti.set(typ, sk, oc);}token_info default_token_info;const token_info *lookup_token(const char *start, const char *end){  unsigned n = hash_string(start, end - start) % TOKEN_TABLE_SIZE;  while (n >= 0) {    if (token_table[n].tok == 0)      break;    if (strlen(token_table[n].tok) == end - start	&& memcmp(token_table[n].tok, start, end - start) == 0)      return &(token_table[n].ti);    if (--n < 0)      n = TOKEN_TABLE_SIZE - 1;  }  return &default_token_info;}static void init_ascii(){  for (const char *p = "abcdefghijklmnopqrstuvwxyz"; *p; p++) {    char buf[2];    buf[0] = *p;    buf[1] = '\0';    store_token(strsave(buf), TOKEN_LOWER);    buf[0] = cmupper(buf[0]);    store_token(strsave(buf), TOKEN_UPPER);  }  for (p = "0123456789"; *p; p++) {    char buf[2];    buf[0] = *p;    buf[1] = '\0';    const char *s = strsave(buf);    store_token(s, TOKEN_OTHER, s);  }  for (p = ".,:;?!"; *p; p++) {    char buf[2];    buf[0] = *p;    buf[1] = '\0';    store_token(strsave(buf), TOKEN_PUNCT);  }  store_token("-", TOKEN_HYPHEN);}static void store_letter(const char *lower, const char *upper,		  const char *sort_key = 0){  store_token(lower, TOKEN_LOWER, sort_key, upper);  store_token(upper, TOKEN_UPPER, sort_key, lower);}static void init_letter(unsigned char uc_code, unsigned char lc_code,		 const char *sort_key){  char lbuf[2];  lbuf[0] = lc_code;  lbuf[1] = 0;  char ubuf[2];  ubuf[0] = uc_code;  ubuf[1] = 0;  store_letter(strsave(lbuf), strsave(ubuf), sort_key);}static void init_latin1(){  init_letter(0xc0, 0xe0, "a");  init_letter(0xc1, 0xe1, "a");  init_letter(0xc2, 0xe2, "a");  init_letter(0xc3, 0xe3, "a");  init_letter(0xc4, 0xe4, "a");  init_letter(0xc5, 0xe5, "a");  init_letter(0xc6, 0xe6, "ae");  init_letter(0xc7, 0xe7, "c");  init_letter(0xc8, 0xe8, "e");  init_letter(0xc9, 0xe9, "e");  init_letter(0xca, 0xea, "e");  init_letter(0xcb, 0xeb, "e");  init_letter(0xcc, 0xec, "i");  init_letter(0xcd, 0xed, "i");  init_letter(0xce, 0xee, "i");  init_letter(0xcf, 0xef, "i");  init_letter(0xd0, 0xf0, "d");  init_letter(0xd1, 0xf1, "n");  init_letter(0xd2, 0xf2, "o");  init_letter(0xd3, 0xf3, "o");  init_letter(0xd4, 0xf4, "o");  init_letter(0xd5, 0xf5, "o");  init_letter(0xd6, 0xf6, "o");  init_letter(0xd8, 0xf8, "o");  init_letter(0xd9, 0xf9, "u");  init_letter(0xda, 0xfa, "u");  init_letter(0xdb, 0xfb, "u");  init_letter(0xdc, 0xfc, "u");  init_letter(0xdd, 0xfd, "y");  init_letter(0xde, 0xfe, THORN_SORT_KEY);  store_token("\337", TOKEN_LOWER, "ss", "SS");  store_token("\377", TOKEN_LOWER, "y", "Y");}static void init_two_char_letter(char l1, char l2, char u1, char u2,				 const char *sk = 0){  char buf[6];  buf[0] = '\\';  buf[1] = '(';  buf[2] = l1;  buf[3] = l2;  buf[4] = '\0';  const char *p = strsave(buf);  buf[2] = u1;  buf[3] = u2;  store_letter(p, strsave(buf), sk);  buf[1] = '[';  buf[4] = ']';  buf[5] = '\0';  p = strsave(buf);  buf[2] = l1;  buf[3] = l2;  store_letter(strsave(buf), p, sk);  }static void init_special_chars(){  for (const char *p = "':^`~"; *p; p++)    for (const char *q = "aeiouy"; *q; q++) {      // Use a variable to work around bug in gcc 2.0      char c = cmupper(*q);      init_two_char_letter(*p, *q, *p, c);    }  for (p = "/l/o~n,coeaeij"; *p; p += 2) {    // Use variables to work around bug in gcc 2.0    char c0 = cmupper(p[0]);    char c1 = cmupper(p[1]);    init_two_char_letter(p[0], p[1], c0, c1);  }  init_two_char_letter('v', 's', 'v', 'S', "s");  init_two_char_letter('v', 'z', 'v', 'Z', "z");  init_two_char_letter('o', 'a', 'o', 'A', "a");  init_two_char_letter('T', 'p', 'T', 'P', THORN_SORT_KEY);  init_two_char_letter('-', 'd', '-', 'D');    store_token("\\(ss", TOKEN_LOWER, 0, "SS");  store_token("\\[ss]", TOKEN_LOWER, 0, "SS");  store_token("\\(Sd", TOKEN_LOWER, "d", "\\(-D");  store_token("\\[Sd]", TOKEN_LOWER, "d", "\\[-D]");  store_token("\\(hy", TOKEN_HYPHEN);  store_token("\\[hy]", TOKEN_HYPHEN);}static void init_strings(){  char buf[6];  buf[0] = '\\';  buf[1] = '*';  for (const char *p = "'`^^,:~v_o./;"; *p; p++) {    buf[2] = *p;    buf[3] = '\0';    store_token(strsave(buf), TOKEN_ACCENT);    buf[2] = '[';    buf[3] = *p;    buf[4] = ']';    buf[5] = '\0';    store_token(strsave(buf), TOKEN_ACCENT);  }  // -ms special letters  store_letter("\\*(th", "\\*(Th", THORN_SORT_KEY);  store_letter("\\*[th]", "\\*[Th]", THORN_SORT_KEY);  store_letter("\\*(d-", "\\*(D-");  store_letter("\\*[d-]", "\\*[D-]");  store_letter("\\*(ae", "\\*(Ae", "ae");  store_letter("\\*[ae]", "\\*[Ae]", "ae");  store_letter("\\*(oe", "\\*(Oe", "oe");  store_letter("\\*[oe]", "\\*[Oe]", "oe");  store_token("\\*3", TOKEN_LOWER, "y", "Y");  store_token("\\*8", TOKEN_LOWER, "ss", "SS");  store_token("\\*q", TOKEN_LOWER, "o", "O");}struct token_initer {  token_initer();};static token_initer the_token_initer;token_initer::token_initer(){  init_ascii();  init_latin1();  init_special_chars();  init_strings();  default_token_info.set(TOKEN_OTHER);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -