c-lex.c

来自「GCC编译器源代码」· C语言代码 · 共 2,195 行 · 第 1/4 页
2,195 行
/* Lexical analyzer for C and Objective C.   Copyright (C) 1987, 88, 89, 92, 94-96, 1997 Free Software Foundation, Inc.This file is part of GNU CC.GNU CC is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2, or (at your option)any later version.GNU CC is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with GNU CC; see the file COPYING.  If not, write tothe Free Software Foundation, 59 Temple Place - Suite 330,Boston, MA 02111-1307, USA.  */#include "config.h"#include <stdio.h>#include <errno.h>#include <setjmp.h>#include "rtl.h"#include "tree.h"#include "input.h"#include "c-lex.h"#include "c-tree.h"#include "flags.h"#include "c-parse.h"#include "c-pragma.h"#include <ctype.h>/* MULTIBYTE_CHARS support only works for native compilers.   ??? Ideally what we want is to model widechar support after   the current floating point support.  */#ifdef CROSS_COMPILE#undef MULTIBYTE_CHARS#endif#ifdef MULTIBYTE_CHARS#include <stdlib.h>#include <locale.h>#endif#ifndef errnoextern int errno;#endif#if USE_CPPLIB#include "cpplib.h"cpp_reader parse_in;cpp_options parse_options;static enum cpp_token cpp_token;#endif/* The elements of `ridpointers' are identifier nodes   for the reserved type names and storage classes.   It is indexed by a RID_... value.  */tree ridpointers[(int) RID_MAX];/* Cause the `yydebug' variable to be defined.  */#define YYDEBUG 1#if USE_CPPLIBstatic unsigned char *yy_cur, *yy_lim;intyy_get_token (){  for (;;)    {      parse_in.limit = parse_in.token_buffer;      cpp_token = cpp_get_token (&parse_in);      if (cpp_token == CPP_EOF)	return -1;      yy_lim = CPP_PWRITTEN (&parse_in);      yy_cur = parse_in.token_buffer;      if (yy_cur < yy_lim)	return *yy_cur++;    }}#define GETC() (yy_cur < yy_lim ? *yy_cur++ : yy_get_token ())#define UNGETC(c) ((c), yy_cur--)#else#define GETC() getc (finput)#define UNGETC(c) ungetc (c, finput)#endif/* the declaration found for the last IDENTIFIER token read in.   yylex must look this up to detect typedefs, which get token type TYPENAME,   so it is left around in case the identifier is not a typedef but is   used in a context which makes it a reference to a variable.  */tree lastiddecl;/* Nonzero enables objc features.  */int doing_objc_thang;extern tree is_class_name ();extern int yydebug;/* File used for outputting assembler code.  */extern FILE *asm_out_file;#ifndef WCHAR_TYPE_SIZE#ifdef INT_TYPE_SIZE#define WCHAR_TYPE_SIZE INT_TYPE_SIZE#else#define WCHAR_TYPE_SIZE	BITS_PER_WORD#endif#endif/* Number of bytes in a wide character.  */#define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)static int maxtoken;		/* Current nominal length of token buffer.  */char *token_buffer;	/* Pointer to token buffer.			   Actual allocated length is maxtoken + 2.			   This is not static because objc-parse.y uses it.  */static int indent_level = 0;        /* Number of { minus number of }. *//* Nonzero if end-of-file has been seen on input.  */static int end_of_file;#if !USE_CPPLIB/* Buffered-back input character; faster than using ungetc.  */static int nextchar = -1;#endifstatic int skip_which_space		PROTO((int));static char *extend_token_buffer	PROTO((char *));static int readescape			PROTO((int *));int check_newline ();/* Do not insert generated code into the source, instead, include it.   This allows us to build gcc automatically even for targets that   need to add or modify the reserved keyword lists.  */#include "c-gperf.h"/* Return something to represent absolute declarators containing a *.   TARGET is the absolute declarator that the * contains.   TYPE_QUALS is a list of modifiers such as const or volatile   to apply to the pointer type, represented as identifiers.   We return an INDIRECT_REF whose "contents" are TARGET   and whose type is the modifier list.  */treemake_pointer_declarator (type_quals, target)     tree type_quals, target;{  return build1 (INDIRECT_REF, type_quals, target);}voidforget_protocol_qualifiers (){  int i, n = sizeof wordlist / sizeof (struct resword);  for (i = 0; i < n; i++)    if ((int) wordlist[i].rid >= (int) RID_IN        && (int) wordlist[i].rid <= (int) RID_ONEWAY)      wordlist[i].name = "";}voidremember_protocol_qualifiers (){  int i, n = sizeof wordlist / sizeof (struct resword);  for (i = 0; i < n; i++)    if (wordlist[i].rid == RID_IN)      wordlist[i].name = "in";    else if (wordlist[i].rid == RID_OUT)      wordlist[i].name = "out";    else if (wordlist[i].rid == RID_INOUT)      wordlist[i].name = "inout";    else if (wordlist[i].rid == RID_BYCOPY)      wordlist[i].name = "bycopy";    else if (wordlist[i].rid == RID_ONEWAY)      wordlist[i].name = "oneway";   }#if USE_CPPLIBvoidinit_parse (filename)     char *filename;{  init_lex ();  yy_cur = "\n";  yy_lim = yy_cur+1;  cpp_reader_init (&parse_in);  parse_in.data = &parse_options;  cpp_options_init (&parse_options);  cpp_handle_options (&parse_in, 0, NULL); /* FIXME */  parse_in.show_column = 1;  if (! cpp_start_read (&parse_in, filename))    abort ();}voidfinish_parse (){  cpp_finish (&parse_in);}#endifvoidinit_lex (){  /* Make identifier nodes long enough for the language-specific slots.  */  set_identifier_size (sizeof (struct lang_identifier));  /* Start it at 0, because check_newline is called at the very beginning     and will increment it to 1.  */  lineno = 0;#ifdef MULTIBYTE_CHARS  /* Change to the native locale for multibyte conversions.  */  setlocale (LC_CTYPE, "");#endif  maxtoken = 40;  token_buffer = (char *) xmalloc (maxtoken + 2);  ridpointers[(int) RID_INT] = get_identifier ("int");  ridpointers[(int) RID_CHAR] = get_identifier ("char");  ridpointers[(int) RID_VOID] = get_identifier ("void");  ridpointers[(int) RID_FLOAT] = get_identifier ("float");  ridpointers[(int) RID_DOUBLE] = get_identifier ("double");  ridpointers[(int) RID_SHORT] = get_identifier ("short");  ridpointers[(int) RID_LONG] = get_identifier ("long");  ridpointers[(int) RID_UNSIGNED] = get_identifier ("unsigned");  ridpointers[(int) RID_SIGNED] = get_identifier ("signed");  ridpointers[(int) RID_INLINE] = get_identifier ("inline");  ridpointers[(int) RID_CONST] = get_identifier ("const");  ridpointers[(int) RID_VOLATILE] = get_identifier ("volatile");  ridpointers[(int) RID_AUTO] = get_identifier ("auto");  ridpointers[(int) RID_STATIC] = get_identifier ("static");  ridpointers[(int) RID_EXTERN] = get_identifier ("extern");  ridpointers[(int) RID_TYPEDEF] = get_identifier ("typedef");  ridpointers[(int) RID_REGISTER] = get_identifier ("register");  ridpointers[(int) RID_ITERATOR] = get_identifier ("iterator");  ridpointers[(int) RID_COMPLEX] = get_identifier ("complex");  ridpointers[(int) RID_ID] = get_identifier ("id");  ridpointers[(int) RID_IN] = get_identifier ("in");  ridpointers[(int) RID_OUT] = get_identifier ("out");  ridpointers[(int) RID_INOUT] = get_identifier ("inout");  ridpointers[(int) RID_BYCOPY] = get_identifier ("bycopy");  ridpointers[(int) RID_ONEWAY] = get_identifier ("oneway");  forget_protocol_qualifiers();  /* Some options inhibit certain reserved words.     Clear those words out of the hash table so they won't be recognized.  */#define UNSET_RESERVED_WORD(STRING) \  do { struct resword *s = is_reserved_word (STRING, sizeof (STRING) - 1); \       if (s) s->name = ""; } while (0)  if (! doing_objc_thang)    UNSET_RESERVED_WORD ("id");  if (flag_traditional)    {      UNSET_RESERVED_WORD ("const");      UNSET_RESERVED_WORD ("volatile");      UNSET_RESERVED_WORD ("typeof");      UNSET_RESERVED_WORD ("signed");      UNSET_RESERVED_WORD ("inline");      UNSET_RESERVED_WORD ("iterator");      UNSET_RESERVED_WORD ("complex");    }  if (flag_no_asm)    {      UNSET_RESERVED_WORD ("asm");      UNSET_RESERVED_WORD ("typeof");      UNSET_RESERVED_WORD ("inline");      UNSET_RESERVED_WORD ("iterator");      UNSET_RESERVED_WORD ("complex");    }}voidreinit_parse_for_function (){}/* Function used when yydebug is set, to print a token in more detail.  */voidyyprint (file, yychar, yylval)     FILE *file;     int yychar;     YYSTYPE yylval;{  tree t;  switch (yychar)    {    case IDENTIFIER:    case TYPENAME:    case OBJECTNAME:      t = yylval.ttype;      if (IDENTIFIER_POINTER (t))	fprintf (file, " `%s'", IDENTIFIER_POINTER (t));      break;    case CONSTANT:      t = yylval.ttype;      if (TREE_CODE (t) == INTEGER_CST)	fprintf (file,#if HOST_BITS_PER_WIDE_INT == 64#if HOST_BITS_PER_WIDE_INT != HOST_BITS_PER_INT		 " 0x%lx%016lx",#else		 " 0x%x%016x",#endif#else#if HOST_BITS_PER_WIDE_INT != HOST_BITS_PER_INT		 " 0x%lx%08lx",#else		 " 0x%x%08x",#endif#endif		 TREE_INT_CST_HIGH (t), TREE_INT_CST_LOW (t));      break;    }}/* If C is not whitespace, return C.   Otherwise skip whitespace and return first nonwhite char read.  */static intskip_white_space (c)     register int c;{  static int newline_warning = 0;  for (;;)    {      switch (c)	{	  /* We don't recognize comments here, because	     cpp output can include / and * consecutively as operators.	     Also, there's no need, since cpp removes all comments.  */	case '\n':	  c = check_newline ();	  break;	case ' ':	case '\t':	case '\f':	case '\v':	case '\b':	  c = GETC();	  break;	case '\r':	  /* ANSI C says the effects of a carriage return in a source file	     are undefined.  */	  if (pedantic && !newline_warning)	    {	      warning ("carriage return in source file");	      warning ("(we only warn about the first carriage return)");	      newline_warning = 1;	    }	  c = GETC();	  break;	case '\\':	  c = GETC();	  if (c == '\n')	    lineno++;	  else	    error ("stray '\\' in program");	  c = GETC();	  break;	default:	  return (c);	}    }}/* Skips all of the white space at the current location in the input file.   Must use and reset nextchar if it has the next character.  */voidposition_after_white_space (){  register int c;#if !USE_CPPLIB  if (nextchar != -1)    c = nextchar, nextchar = -1;  else#endif    c = GETC();  UNGETC (skip_white_space (c));}/* Make the token buffer longer, preserving the data in it.   P should point to just beyond the last valid character in the old buffer.   The value we return is a pointer to the new buffer   at a place corresponding to P.  */static char *extend_token_buffer (p)     char *p;{  int offset = p - token_buffer;  maxtoken = maxtoken * 2 + 10;  token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2);  return token_buffer + offset;}#if !USE_CPPLIB#define GET_DIRECTIVE_LINE() get_directive_line (finput)#else /* USE_CPPLIB *//* Read the rest of a #-directive from input stream FINPUT.   In normal use, the directive name and the white space after it   have already been read, so they won't be included in the result.   We allow for the fact that the directive line may contain   a newline embedded within a character or string literal which forms   a part of the directive.   The value is a string in a reusable buffer.  It remains valid   only until the next time this function is called.  */static char *GET_DIRECTIVE_LINE (){  static char *directive_buffer = NULL;  static unsigned buffer_length = 0;  register char *p;  register char *buffer_limit;  register int looking_for = 0;  register int char_escaped = 0;  if (buffer_length == 0)    {      directive_buffer = (char *)xmalloc (128);      buffer_length = 128;    }  buffer_limit = &directive_buffer[buffer_length];  for (p = directive_buffer; ; )    {      int c;      /* Make buffer bigger if it is full.  */      if (p >= buffer_limit)        {	  register unsigned bytes_used = (p - directive_buffer);	  buffer_length *= 2;	  directive_buffer	    = (char *)xrealloc (directive_buffer, buffer_length);	  p = &directive_buffer[bytes_used];	  buffer_limit = &directive_buffer[buffer_length];        }      c = GETC ();      /* Discard initial whitespace.  */      if ((c == ' ' || c == '\t') && p == directive_buffer)	continue;      /* Detect the end of the directive.  */      if (c == '\n' && looking_for == 0)	{          UNGETC (c);	  c = '\0';	}      *p++ = c;      if (c == 0)	return directive_buffer;      /* Handle string and character constant syntax.  */      if (looking_for)	{	  if (looking_for == c && !char_escaped)	    looking_for = 0;	/* Found terminator... stop looking.  */	}      else        if (c == '\'' || c == '"')	  looking_for = c;	/* Don't stop buffering until we see another				   another one of these (or an EOF).  */      /* Handle backslash.  */      char_escaped = (c == '\\' && ! char_escaped);    }}#endif /* USE_CPPLIB *//* At the beginning of a line, increment the line number   and process any #-directive on this line.   If the line is a #-directive, read the entire line and return a newline.   Otherwise, return the line's first non-whitespace character.  */intcheck_newline (){  register int c;  register int token;  lineno++;  /* Read first nonwhite char on the line.  */  c = GETC();  while (c == ' ' || c == '\t')    c = GETC();  if (c != '#')    {      /* If not #, return it so caller will use it.  */      return c;    }  /* Read first nonwhite char after the `#'.  */  c = GETC();  while (c == ' ' || c == '\t')    c = GETC();  /* If a letter follows, then if the word here is `line', skip     it and ignore it; otherwise, ignore the line, with an error     if the word isn't `pragma', `ident', `define', or `undef'.  */  if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))    {      if (c == 'p')	{	  if (GETC() == 'r'
c-lex.c - 源码说明

本页面展示了「GCC编译器源代码」中的 c-lex.c 源码文件，采用 C语言编程语言编写，共 2,195 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与GCC相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?