📄 lex.c
字号:
/* Language lexer for the GNU compiler for the Java(TM) language. Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc. Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)This file is part of GNU CC.GNU CC is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2, or (at your option)any later version.GNU CC is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with GNU CC; see the file COPYING. If not, write tothe Free Software Foundation, 59 Temple Place - Suite 330,Boston, MA 02111-1307, USA. Java and all Java-based marks are trademarks or registered trademarksof Sun Microsystems, Inc. in the United States and other countries.The Free Software Foundation is independent of Sun Microsystems, Inc. *//* It defines java_lex (yylex) that reads a Java ASCII source filepossibly containing Unicode escape sequence or utf8 encoded charactersand returns a token for everything found but comments, white spacesand line terminators. When necessary, it also fills the java_lval(yylval) union. It's implemented to be called by a re-entrant parsergenerated by Bison.The lexical analysis conforms to the Java grammar described in "TheJava(TM) Language Specification. J. Gosling, B. Joy, G. Steele.Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */#include "keyword.h"#ifndef JC1_LITEextern struct obstack *expression_obstack;#endif/* Function declaration */static int java_lineterminator PROTO ((unicode_t));static char *java_sprint_unicode PROTO ((struct java_line *, int));static void java_unicode_2_utf8 PROTO ((unicode_t));static void java_lex_error PROTO ((char *, int));#ifndef JC1_LITEstatic int java_is_eol PROTO ((FILE *, int));static tree build_wfl_node PROTO ((tree));#endifstatic void java_store_unicode PROTO ((struct java_line *, unicode_t, int));static unicode_t java_parse_escape_sequence PROTO ((void));static int java_letter_or_digit_p PROTO ((unicode_t));static int java_parse_doc_section PROTO ((unicode_t));static void java_parse_end_comment PROTO ((unicode_t));static unicode_t java_get_unicode PROTO (());static unicode_t java_read_unicode PROTO ((int, int *));static void java_store_unicode PROTO ((struct java_line *, unicode_t, int));static unicode_t java_read_char PROTO (());static void java_allocate_new_line PROTO (());static void java_unget_unicode PROTO (());static unicode_t java_sneak_unicode PROTO (());voidjava_init_lex (){#ifndef JC1_LITE int java_lang_imported = 0; if (!java_lang_id) java_lang_id = get_identifier ("java.lang"); if (!java_lang_cloneable) java_lang_cloneable = get_identifier ("java.lang.Cloneable"); if (!java_lang_imported) { tree node = build_tree_list (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE); read_import_dir (TREE_PURPOSE (node)); TREE_CHAIN (node) = ctxp->import_demand_list; ctxp->import_demand_list = node; java_lang_imported = 1; } if (!wfl_operator) wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0); if (!label_id) label_id = get_identifier ("$L"); if (!wfl_append) wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0); if (!wfl_string_buffer) wfl_string_buffer = build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0); if (!wfl_to_string) wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0); ctxp->static_initialized = ctxp->non_static_initialized = ctxp->incomplete_class = NULL_TREE; bzero ((PTR) ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0])); bzero ((PTR) current_jcf, sizeof (JCF)); ctxp->current_parsed_class = NULL; ctxp->package = NULL_TREE;#endif ctxp->filename = input_filename; ctxp->lineno = lineno = 0; ctxp->p_line = NULL; ctxp->c_line = NULL; ctxp->unget_utf8_value = 0; ctxp->minus_seen = 0; ctxp->java_error_flag = 0;}static char *java_sprint_unicode (line, i) struct java_line *line; int i;{ static char buffer [10]; if (line->unicode_escape_p [i] || line->line [i] > 128) sprintf (buffer, "\\u%04x", line->line [i]); else { buffer [0] = line->line [i]; buffer [1] = '\0'; } return buffer;}static unicode_tjava_sneak_unicode (){ return (ctxp->c_line->line [ctxp->c_line->current]);}static voidjava_unget_unicode (){ if (!ctxp->c_line->current) fatal ("can't unget unicode - java_unget_unicode"); ctxp->c_line->current--; ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);}static voidjava_allocate_new_line (){ unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0'); char ahead_escape_p = (ctxp->c_line ? ctxp->c_line->unicode_escape_ahead_p : 0); if (ctxp->c_line && !ctxp->c_line->white_space_only) { if (ctxp->p_line) { free (ctxp->p_line->unicode_escape_p); free (ctxp->p_line->line); free (ctxp->p_line); } ctxp->p_line = ctxp->c_line; ctxp->c_line = NULL; /* Reallocated */ } if (!ctxp->c_line) { ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line)); ctxp->c_line->max = JAVA_LINE_MAX; ctxp->c_line->line = (unicode_t *)xmalloc (sizeof (unicode_t)*ctxp->c_line->max); ctxp->c_line->unicode_escape_p = (char *)xmalloc (sizeof (char)*ctxp->c_line->max); ctxp->c_line->white_space_only = 0; } ctxp->c_line->line [0] = ctxp->c_line->size = 0; ctxp->c_line->char_col = ctxp->c_line->current = 0; if (ahead) { ctxp->c_line->line [ctxp->c_line->size] = ahead; ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p; ctxp->c_line->size++; } ctxp->c_line->ahead [0] = 0; ctxp->c_line->unicode_escape_ahead_p = 0; ctxp->c_line->lineno = ++lineno; ctxp->c_line->white_space_only = 1;}#define BAD_UTF8_VALUE 0xFFFEstatic unicode_tjava_read_char (){ int c; int c1, c2; if (ctxp->unget_utf8_value) { int to_return = ctxp->unget_utf8_value; ctxp->unget_utf8_value = 0; return (to_return); } c = GETC (); if (c < 128) return (unicode_t)c; if (c == EOF) return UEOF; else { if ((c & 0xe0) == 0xc0) { c1 = GETC (); if ((c1 & 0xc0) == 0x80) return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f)); c = c1; } else if ((c & 0xf0) == 0xe0) { c1 = GETC (); if ((c1 & 0xc0) == 0x80) { c2 = GETC (); if ((c2 & 0xc0) == 0x80) return (unicode_t)(((c & 0xf) << 12) + (( c1 & 0x3f) << 6) + (c2 & 0x3f)); else c = c2; } else c = c1; } /* We looked for a UTF8 multi-byte sequence (since we saw an initial byte with the high bit set), but found invalid bytes instead. If the most recent byte was Ascii (and not EOF), we should unget it, in case it was a comment terminator or other delimitor. */ if ((c & 0x80) == 0) UNGETC (c); return BAD_UTF8_VALUE; }}static voidjava_store_unicode (l, c, unicode_escape_p) struct java_line *l; unicode_t c; int unicode_escape_p;{ if (l->size == l->max) { l->max += JAVA_LINE_MAX; l->line = (unicode_t *)realloc (l->line, sizeof (unicode_t)*l->max); l->unicode_escape_p = (char *)realloc (l->unicode_escape_p, sizeof (char)*l->max); } l->line [l->size] = c; l->unicode_escape_p [l->size++] = unicode_escape_p;}static unicode_tjava_read_unicode (term_context, unicode_escape_p) int term_context; int *unicode_escape_p;{ unicode_t c; long i, base; c = java_read_char (); *unicode_escape_p = 0; if (c != '\\') return ((term_context ? c : java_lineterminator (c) ? '\n' : (unicode_t)c)); /* Count the number of preceeding '\' */ for (base = ftell (finput), i = base-2; c == '\\';) { fseek (finput, i--, SEEK_SET); c = java_read_char (); /* Will fail if reading utf8 stream. FIXME */ } fseek (finput, base, SEEK_SET); if ((base-i-3)%2 == 0) /* If odd number of \ seen */ { c = java_read_char (); if (c == 'u') { unsigned short unicode = 0; int shift = 12; /* Next should be 4 hex digits, otherwise it's an error. The hex value is converted into the unicode, pushed into the Unicode stream. */ for (shift = 12; shift >= 0; shift -= 4) { if ((c = java_read_char ()) == UEOF) return UEOF; if (c >= '0' && c <= '9') unicode |= (unicode_t)((c-'0') << shift); else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift); else java_lex_error ("Non hex digit in Unicode escape sequence", 0); } *unicode_escape_p = 1; return (term_context ? unicode : (java_lineterminator (c) ? '\n' : unicode)); } ctxp->unget_utf8_value = c; } return (unicode_t)'\\';}static unicode_tjava_get_unicode (){ /* It's time to read a line when... */ if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size) { unicode_t c; java_allocate_new_line (); if (ctxp->c_line->line[0] != '\n') for (;;) { int unicode_escape_p; c = java_read_unicode (0, &unicode_escape_p); java_store_unicode (ctxp->c_line, c, unicode_escape_p); if (ctxp->c_line->white_space_only && !JAVA_WHITE_SPACE_P (c) && c!='\n') ctxp->c_line->white_space_only = 0; if ((c == '\n') || (c == UEOF)) break; } } ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0); JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]); return ctxp->c_line->line [ctxp->c_line->current++];}static intjava_lineterminator (c) unicode_t c;{ int unicode_escape_p; if (c == '\n') /* CR */ { if ((c = java_read_unicode (1, &unicode_escape_p)) != '\r') { ctxp->c_line->ahead [0] = c; ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p; } return 1; } else if (c == '\r') /* LF */ { if ((c = java_read_unicode (1, &unicode_escape_p)) != '\n') { ctxp->c_line->ahead [0] = c; ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p; } return 1; } else return 0;}/* Parse the end of a C style comment. * C is the first character following the '/' and '*'. */static voidjava_parse_end_comment (c) unicode_t c;{ for ( ;; c = java_get_unicode ()) { switch (c) { case UEOF: java_lex_error ("Comment not terminated at end of input", 0); case '*': switch (c = java_get_unicode ()) { case UEOF: java_lex_error ("Comment not terminated at end of input", 0); case '/': return; case '*': /* reparse only '*' */ java_unget_unicode (); } } }}/* Parse the documentation section. Keywords must be at the beginning of a documentation comment line (ignoring white space and any `*' character). Parsed keyword(s): @DEPRECATED. */static intjava_parse_doc_section (c) unicode_t c;{ int valid_tag = 0, seen_star = 0; while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n') { switch (c) { case '*': seen_star = 1; break; case '\n': /* ULT */ valid_tag = 1; default: seen_star = 0; } c = java_get_unicode(); } if (c == UEOF) java_lex_error ("Comment not terminated at end of input", 0); if (seen_star && (c == '/')) return 1; /* Goto step1 in caller */ /* We're parsing @deprecated */ if (valid_tag && (c == '@')) { char tag [11]; int tag_index = 0; while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n') { c = java_get_unicode (); tag [tag_index++] = c; } if (c == UEOF) java_lex_error ("Comment not terminated at end of input", 0); java_unget_unicode (); tag [tag_index] = '\0'; if (!strcmp (tag, "deprecated")) ctxp->deprecated = 1; } return 0;}/* This function to be used only by JAVA_ID_CHAR_P (), otherwise it will return a wrong result. */static intjava_letter_or_digit_p (c) unicode_t c;{ return _JAVA_LETTER_OR_DIGIT_P (c);}static unicode_tjava_parse_escape_sequence (){ unicode_t char_lit; unicode_t c; switch (c = java_get_unicode ()) { case 'b': return (unicode_t)0x8; case 't':
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -