📄 label.y
字号:
/* -*- C++ -*- Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. Written by James Clark (jjc@jclark.com)This file is part of groff.groff is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.groff is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public License alongwith groff; see the file COPYING. If not, write to the Free SoftwareFoundation, 675 Mass Ave, Cambridge, MA 02139, USA. */%{#include "refer.h"#include "refid.h"#include "ref.h"#include "token.h"int yylex();void yyerror(const char *);int yyparse();static const char *format_serial(char c, int n);struct label_info { int start; int length; int count; int total; label_info(const string &);};label_info *lookup_label(const string &label);struct expression { enum { // Does the tentative label depend on the reference? CONTAINS_VARIABLE = 01, CONTAINS_STAR = 02, CONTAINS_FORMAT = 04, CONTAINS_AT = 010 }; virtual ~expression() { } virtual void evaluate(int, const reference &, string &, substring_position &) = 0; virtual unsigned analyze() { return 0; }};class at_expr : public expression {public: at_expr() { } void evaluate(int, const reference &, string &, substring_position &); unsigned analyze() { return CONTAINS_VARIABLE|CONTAINS_AT; }};class format_expr : public expression { char type; int width; int first_number;public: format_expr(char c, int w = 0, int f = 1) : type(c), width(w), first_number(f) { } void evaluate(int, const reference &, string &, substring_position &); unsigned analyze() { return CONTAINS_FORMAT; }};class field_expr : public expression { int number; char name;public: field_expr(char nm, int num) : name(nm), number(num) { } void evaluate(int, const reference &, string &, substring_position &); unsigned analyze() { return CONTAINS_VARIABLE; }};class literal_expr : public expression { string s;public: literal_expr(const char *ptr, int len) : s(ptr, len) { } void evaluate(int, const reference &, string &, substring_position &);};class unary_expr : public expression {protected: expression *expr;public: unary_expr(expression *e) : expr(e) { } ~unary_expr() { delete expr; } void evaluate(int, const reference &, string &, substring_position &) = 0; unsigned analyze() { return expr ? expr->analyze() : 0; }};// This caches the analysis of an expression.class analyzed_expr : public unary_expr { unsigned flags;public: analyzed_expr(expression *); void evaluate(int, const reference &, string &, substring_position &); unsigned analyze() { return flags; }};class star_expr : public unary_expr {public: star_expr(expression *e) : unary_expr(e) { } void evaluate(int, const reference &, string &, substring_position &); unsigned analyze() { return ((expr ? (expr->analyze() & ~CONTAINS_VARIABLE) : 0) | CONTAINS_STAR); }};typedef void map_t(const char *, const char *, string &);class map_expr : public unary_expr { map_t *func;public: map_expr(expression *e, map_t *f) : unary_expr(e), func(f) { } void evaluate(int, const reference &, string &, substring_position &);}; typedef const char *extractor_t(const char *, const char *, const char **);class extractor_expr : public unary_expr { int part; extractor_t *func;public: enum { BEFORE = +1, MATCH = 0, AFTER = -1 }; extractor_expr(expression *e, extractor_t *f, int pt) : unary_expr(e), func(f), part(pt) { } void evaluate(int, const reference &, string &, substring_position &);};class truncate_expr : public unary_expr { int n;public: truncate_expr(expression *e, int i) : n(i), unary_expr(e) { } void evaluate(int, const reference &, string &, substring_position &);};class separator_expr : public unary_expr {public: separator_expr(expression *e) : unary_expr(e) { } void evaluate(int, const reference &, string &, substring_position &);};class binary_expr : public expression {protected: expression *expr1; expression *expr2;public: binary_expr(expression *e1, expression *e2) : expr1(e1), expr2(e2) { } ~binary_expr() { delete expr1; delete expr2; } void evaluate(int, const reference &, string &, substring_position &) = 0; unsigned analyze() { return (expr1 ? expr1->analyze() : 0) | (expr2 ? expr2->analyze() : 0); }};class alternative_expr : public binary_expr {public: alternative_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { } void evaluate(int, const reference &, string &, substring_position &);};class list_expr : public binary_expr {public: list_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { } void evaluate(int, const reference &, string &, substring_position &);};class substitute_expr : public binary_expr {public: substitute_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { } void evaluate(int, const reference &, string &, substring_position &);};class ternary_expr : public expression {protected: expression *expr1; expression *expr2; expression *expr3;public: ternary_expr(expression *e1, expression *e2, expression *e3) : expr1(e1), expr2(e2), expr3(e3) { } ~ternary_expr() { delete expr1; delete expr2; delete expr3; } void evaluate(int, const reference &, string &, substring_position &) = 0; unsigned analyze() { return ((expr1 ? expr1->analyze() : 0) | (expr2 ? expr2->analyze() : 0) | (expr3 ? expr3->analyze() : 0)); }};class conditional_expr : public ternary_expr {public: conditional_expr(expression *e1, expression *e2, expression *e3) : ternary_expr(e1, e2, e3) { } void evaluate(int, const reference &, string &, substring_position &);};static expression *parsed_label = 0;static expression *parsed_date_label = 0;static expression *parsed_short_label = 0;static expression *parse_result;string literals;%}%union { int num; expression *expr; struct { int ndigits; int val; } dig; struct { int start; int len; } str;}/* uppercase or lowercase letter */%token <num> TOKEN_LETTER/* literal characters */%token <str> TOKEN_LITERAL/* digit */%token <num> TOKEN_DIGIT%type <expr> conditional%type <expr> alternative%type <expr> list%type <expr> string%type <expr> substitute%type <expr> optional_conditional%type <num> number%type <dig> digits%type <num> optional_number%type <num> flag%%expr: optional_conditional { parse_result = ($1 ? new analyzed_expr($1) : 0); } ;conditional: alternative { $$ = $1; } | alternative '?' optional_conditional ':' conditional { $$ = new conditional_expr($1, $3, $5); } ;optional_conditional: /* empty */ { $$ = 0; } | conditional { $$ = $1; } ;alternative: list { $$ = $1; } | alternative '|' list { $$ = new alternative_expr($1, $3); } | alternative '&' list { $$ = new conditional_expr($1, $3, 0); } ; list: substitute { $$ = $1; } | list substitute { $$ = new list_expr($1, $2); } ;substitute: string { $$ = $1; } | substitute '~' string { $$ = new substitute_expr($1, $3); } ;string: '@' { $$ = new at_expr; } | TOKEN_LITERAL { $$ = new literal_expr(literals.contents() + $1.start, $1.len); } | TOKEN_LETTER { $$ = new field_expr($1, 0); } | TOKEN_LETTER number { $$ = new field_expr($1, $2 - 1); } | '%' TOKEN_LETTER { switch ($2) { case 'I': case 'i': case 'A': case 'a': $$ = new format_expr($2); break; default: command_error("unrecognized format `%1'", char($2)); $$ = new format_expr('a'); break; } } | '%' digits { $$ = new format_expr('0', $2.ndigits, $2.val); } | string '.' flag TOKEN_LETTER optional_number { switch ($4) { case 'l': $$ = new map_expr($1, lowercase); break; case 'u': $$ = new map_expr($1, uppercase); break; case 'c': $$ = new map_expr($1, capitalize); break; case 'r': $$ = new map_expr($1, reverse_name); break; case 'a': $$ = new map_expr($1, abbreviate_name); break; case 'y': $$ = new extractor_expr($1, find_year, $3); break; case 'n': $$ = new extractor_expr($1, find_last_name, $3); break; default: $$ = $1; command_error("unknown function `%1'", char($4)); break; } } | string '+' number { $$ = new truncate_expr($1, $3); } | string '-' number { $$ = new truncate_expr($1, -$3); } | string '*' { $$ = new star_expr($1); } | '(' optional_conditional ')' { $$ = $2; } | '<' optional_conditional '>' { $$ = new separator_expr($2); } ;optional_number: /* empty */ { $$ = -1; } | number { $$ = $1; } ;number: TOKEN_DIGIT { $$ = $1; } | number TOKEN_DIGIT { $$ = $1*10 + $2; } ;digits: TOKEN_DIGIT { $$.ndigits = 1; $$.val = $1; } | digits TOKEN_DIGIT { $$.ndigits = $1.ndigits + 1; $$.val = $1.val*10 + $2; } ; flag: /* empty */ { $$ = 0; } | '+' { $$ = 1; } | '-' { $$ = -1; } ;%%/* bison defines const to be empty unless __STDC__ is defined, which itisn't under cfront */#ifdef const#undef const#endifconst char *spec_ptr;const char *spec_end;const char *spec_cur;int yylex(){ while (spec_ptr < spec_end && csspace(*spec_ptr)) spec_ptr++; spec_cur = spec_ptr; if (spec_ptr >= spec_end) return 0; unsigned char c = *spec_ptr++; if (csalpha(c)) { yylval.num = c; return TOKEN_LETTER; } if (csdigit(c)) { yylval.num = c - '0'; return TOKEN_DIGIT; } if (c == '\'') { yylval.str.start = literals.length(); for (; spec_ptr < spec_end; spec_ptr++) { if (*spec_ptr == '\'') { if (++spec_ptr < spec_end && *spec_ptr == '\'') literals += '\''; else { yylval.str.len = literals.length() - yylval.str.start; return TOKEN_LITERAL; } } else literals += *spec_ptr; } yylval.str.len = literals.length() - yylval.str.start; return TOKEN_LITERAL; } return c;}int set_label_spec(const char *label_spec){ spec_cur = spec_ptr = label_spec; spec_end = strchr(label_spec, '\0'); literals.clear(); if (yyparse()) return 0; delete parsed_label; parsed_label = parse_result; return 1;}int set_date_label_spec(const char *label_spec){ spec_cur = spec_ptr = label_spec; spec_end = strchr(label_spec, '\0'); literals.clear(); if (yyparse()) return 0; delete parsed_date_label; parsed_date_label = parse_result; return 1;}int set_short_label_spec(const char *label_spec){ spec_cur = spec_ptr = label_spec; spec_end = strchr(label_spec, '\0'); literals.clear(); if (yyparse()) return 0; delete parsed_short_label; parsed_short_label = parse_result; return 1;}void yyerror(const char *message){ if (spec_cur < spec_end) command_error("label specification %1 before `%2'", message, spec_cur); else command_error("label specification %1 at end of string", message, spec_cur);}void at_expr::evaluate(int tentative, const reference &ref, string &result, substring_position &){ if (tentative) ref.canonicalize_authors(result); else { const char *end, *start = ref.get_authors(&end); if (start) result.append(start, end - start); }}void format_expr::evaluate(int tentative, const reference &ref, string &result, substring_position &){ if (tentative) return; const label_info *lp = ref.get_label_ptr(); int num = lp == 0 ? ref.get_number() : lp->count; if (type != '0') result += format_serial(type, num + 1); else { const char *ptr = itoa(num + first_number); int pad = width - strlen(ptr); while (--pad >= 0) result += '0'; result += ptr; }}static const char *format_serial(char c, int n){ assert(n > 0); static char buf[128]; // more than enough. switch (c) { case 'i': case 'I': { char *p = buf; // troff uses z and w to represent 10000 and 5000 in Roman // numerals; I can find no historical basis for this usage const char *s = c == 'i' ? "zwmdclxvi" : "ZWMDCLXVI"; if (n >= 40000) return itoa(n); while (n >= 10000) { *p++ = s[0]; n -= 10000; } for (int i = 1000; i > 0; i /= 10, s += 2) { int m = n/i; n -= m*i; switch (m) { case 3: *p++ = s[2]; /* falls through */ case 2: *p++ = s[2]; /* falls through */ case 1: *p++ = s[2]; break; case 4: *p++ = s[2]; *p++ = s[1]; break; case 8: *p++ = s[1]; *p++ = s[2]; *p++ = s[2]; *p++ = s[2]; break; case 7: *p++ = s[1]; *p++ = s[2]; *p++ = s[2]; break; case 6: *p++ = s[1]; *p++ = s[2]; break; case 5: *p++ = s[1]; break; case 9: *p++ = s[2]; *p++ = s[0]; } } *p = 0; break; } case 'a': case 'A': { char *p = buf; // this is derived from troff/reg.c while (n > 0) { int d = n % 26;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -