⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 c-lex.c

📁 GCC编译器源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
/* Lexical analyzer for C and Objective C.   Copyright (C) 1987, 88, 89, 92, 94-96, 1997 Free Software Foundation, Inc.This file is part of GNU CC.GNU CC is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2, or (at your option)any later version.GNU CC is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with GNU CC; see the file COPYING.  If not, write tothe Free Software Foundation, 59 Temple Place - Suite 330,Boston, MA 02111-1307, USA.  */#include "config.h"#include <stdio.h>#include <errno.h>#include <setjmp.h>#include "rtl.h"#include "tree.h"#include "input.h"#include "c-lex.h"#include "c-tree.h"#include "flags.h"#include "c-parse.h"#include "c-pragma.h"#include <ctype.h>/* MULTIBYTE_CHARS support only works for native compilers.   ??? Ideally what we want is to model widechar support after   the current floating point support.  */#ifdef CROSS_COMPILE#undef MULTIBYTE_CHARS#endif#ifdef MULTIBYTE_CHARS#include <stdlib.h>#include <locale.h>#endif#ifndef errnoextern int errno;#endif#if USE_CPPLIB#include "cpplib.h"cpp_reader parse_in;cpp_options parse_options;static enum cpp_token cpp_token;#endif/* The elements of `ridpointers' are identifier nodes   for the reserved type names and storage classes.   It is indexed by a RID_... value.  */tree ridpointers[(int) RID_MAX];/* Cause the `yydebug' variable to be defined.  */#define YYDEBUG 1#if USE_CPPLIBstatic unsigned char *yy_cur, *yy_lim;intyy_get_token (){  for (;;)    {      parse_in.limit = parse_in.token_buffer;      cpp_token = cpp_get_token (&parse_in);      if (cpp_token == CPP_EOF)	return -1;      yy_lim = CPP_PWRITTEN (&parse_in);      yy_cur = parse_in.token_buffer;      if (yy_cur < yy_lim)	return *yy_cur++;    }}#define GETC() (yy_cur < yy_lim ? *yy_cur++ : yy_get_token ())#define UNGETC(c) ((c), yy_cur--)#else#define GETC() getc (finput)#define UNGETC(c) ungetc (c, finput)#endif/* the declaration found for the last IDENTIFIER token read in.   yylex must look this up to detect typedefs, which get token type TYPENAME,   so it is left around in case the identifier is not a typedef but is   used in a context which makes it a reference to a variable.  */tree lastiddecl;/* Nonzero enables objc features.  */int doing_objc_thang;extern tree is_class_name ();extern int yydebug;/* File used for outputting assembler code.  */extern FILE *asm_out_file;#ifndef WCHAR_TYPE_SIZE#ifdef INT_TYPE_SIZE#define WCHAR_TYPE_SIZE INT_TYPE_SIZE#else#define WCHAR_TYPE_SIZE	BITS_PER_WORD#endif#endif/* Number of bytes in a wide character.  */#define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)static int maxtoken;		/* Current nominal length of token buffer.  */char *token_buffer;	/* Pointer to token buffer.			   Actual allocated length is maxtoken + 2.			   This is not static because objc-parse.y uses it.  */static int indent_level = 0;        /* Number of { minus number of }. *//* Nonzero if end-of-file has been seen on input.  */static int end_of_file;#if !USE_CPPLIB/* Buffered-back input character; faster than using ungetc.  */static int nextchar = -1;#endifstatic int skip_which_space		PROTO((int));static char *extend_token_buffer	PROTO((char *));static int readescape			PROTO((int *));int check_newline ();/* Do not insert generated code into the source, instead, include it.   This allows us to build gcc automatically even for targets that   need to add or modify the reserved keyword lists.  */#include "c-gperf.h"/* Return something to represent absolute declarators containing a *.   TARGET is the absolute declarator that the * contains.   TYPE_QUALS is a list of modifiers such as const or volatile   to apply to the pointer type, represented as identifiers.   We return an INDIRECT_REF whose "contents" are TARGET   and whose type is the modifier list.  */treemake_pointer_declarator (type_quals, target)     tree type_quals, target;{  return build1 (INDIRECT_REF, type_quals, target);}voidforget_protocol_qualifiers (){  int i, n = sizeof wordlist / sizeof (struct resword);  for (i = 0; i < n; i++)    if ((int) wordlist[i].rid >= (int) RID_IN        && (int) wordlist[i].rid <= (int) RID_ONEWAY)      wordlist[i].name = "";}voidremember_protocol_qualifiers (){  int i, n = sizeof wordlist / sizeof (struct resword);  for (i = 0; i < n; i++)    if (wordlist[i].rid == RID_IN)      wordlist[i].name = "in";    else if (wordlist[i].rid == RID_OUT)      wordlist[i].name = "out";    else if (wordlist[i].rid == RID_INOUT)      wordlist[i].name = "inout";    else if (wordlist[i].rid == RID_BYCOPY)      wordlist[i].name = "bycopy";    else if (wordlist[i].rid == RID_ONEWAY)      wordlist[i].name = "oneway";   }#if USE_CPPLIBvoidinit_parse (filename)     char *filename;{  init_lex ();  yy_cur = "\n";  yy_lim = yy_cur+1;  cpp_reader_init (&parse_in);  parse_in.data = &parse_options;  cpp_options_init (&parse_options);  cpp_handle_options (&parse_in, 0, NULL); /* FIXME */  parse_in.show_column = 1;  if (! cpp_start_read (&parse_in, filename))    abort ();}voidfinish_parse (){  cpp_finish (&parse_in);}#endifvoidinit_lex (){  /* Make identifier nodes long enough for the language-specific slots.  */  set_identifier_size (sizeof (struct lang_identifier));  /* Start it at 0, because check_newline is called at the very beginning     and will increment it to 1.  */  lineno = 0;#ifdef MULTIBYTE_CHARS  /* Change to the native locale for multibyte conversions.  */  setlocale (LC_CTYPE, "");#endif  maxtoken = 40;  token_buffer = (char *) xmalloc (maxtoken + 2);  ridpointers[(int) RID_INT] = get_identifier ("int");  ridpointers[(int) RID_CHAR] = get_identifier ("char");  ridpointers[(int) RID_VOID] = get_identifier ("void");  ridpointers[(int) RID_FLOAT] = get_identifier ("float");  ridpointers[(int) RID_DOUBLE] = get_identifier ("double");  ridpointers[(int) RID_SHORT] = get_identifier ("short");  ridpointers[(int) RID_LONG] = get_identifier ("long");  ridpointers[(int) RID_UNSIGNED] = get_identifier ("unsigned");  ridpointers[(int) RID_SIGNED] = get_identifier ("signed");  ridpointers[(int) RID_INLINE] = get_identifier ("inline");  ridpointers[(int) RID_CONST] = get_identifier ("const");  ridpointers[(int) RID_VOLATILE] = get_identifier ("volatile");  ridpointers[(int) RID_AUTO] = get_identifier ("auto");  ridpointers[(int) RID_STATIC] = get_identifier ("static");  ridpointers[(int) RID_EXTERN] = get_identifier ("extern");  ridpointers[(int) RID_TYPEDEF] = get_identifier ("typedef");  ridpointers[(int) RID_REGISTER] = get_identifier ("register");  ridpointers[(int) RID_ITERATOR] = get_identifier ("iterator");  ridpointers[(int) RID_COMPLEX] = get_identifier ("complex");  ridpointers[(int) RID_ID] = get_identifier ("id");  ridpointers[(int) RID_IN] = get_identifier ("in");  ridpointers[(int) RID_OUT] = get_identifier ("out");  ridpointers[(int) RID_INOUT] = get_identifier ("inout");  ridpointers[(int) RID_BYCOPY] = get_identifier ("bycopy");  ridpointers[(int) RID_ONEWAY] = get_identifier ("oneway");  forget_protocol_qualifiers();  /* Some options inhibit certain reserved words.     Clear those words out of the hash table so they won't be recognized.  */#define UNSET_RESERVED_WORD(STRING) \  do { struct resword *s = is_reserved_word (STRING, sizeof (STRING) - 1); \       if (s) s->name = ""; } while (0)  if (! doing_objc_thang)    UNSET_RESERVED_WORD ("id");  if (flag_traditional)    {      UNSET_RESERVED_WORD ("const");      UNSET_RESERVED_WORD ("volatile");      UNSET_RESERVED_WORD ("typeof");      UNSET_RESERVED_WORD ("signed");      UNSET_RESERVED_WORD ("inline");      UNSET_RESERVED_WORD ("iterator");      UNSET_RESERVED_WORD ("complex");    }  if (flag_no_asm)    {      UNSET_RESERVED_WORD ("asm");      UNSET_RESERVED_WORD ("typeof");      UNSET_RESERVED_WORD ("inline");      UNSET_RESERVED_WORD ("iterator");      UNSET_RESERVED_WORD ("complex");    }}voidreinit_parse_for_function (){}/* Function used when yydebug is set, to print a token in more detail.  */voidyyprint (file, yychar, yylval)     FILE *file;     int yychar;     YYSTYPE yylval;{  tree t;  switch (yychar)    {    case IDENTIFIER:    case TYPENAME:    case OBJECTNAME:      t = yylval.ttype;      if (IDENTIFIER_POINTER (t))	fprintf (file, " `%s'", IDENTIFIER_POINTER (t));      break;    case CONSTANT:      t = yylval.ttype;      if (TREE_CODE (t) == INTEGER_CST)	fprintf (file,#if HOST_BITS_PER_WIDE_INT == 64#if HOST_BITS_PER_WIDE_INT != HOST_BITS_PER_INT		 " 0x%lx%016lx",#else		 " 0x%x%016x",#endif#else#if HOST_BITS_PER_WIDE_INT != HOST_BITS_PER_INT		 " 0x%lx%08lx",#else		 " 0x%x%08x",#endif#endif		 TREE_INT_CST_HIGH (t), TREE_INT_CST_LOW (t));      break;    }}/* If C is not whitespace, return C.   Otherwise skip whitespace and return first nonwhite char read.  */static intskip_white_space (c)     register int c;{  static int newline_warning = 0;  for (;;)    {      switch (c)	{	  /* We don't recognize comments here, because	     cpp output can include / and * consecutively as operators.	     Also, there's no need, since cpp removes all comments.  */	case '\n':	  c = check_newline ();	  break;	case ' ':	case '\t':	case '\f':	case '\v':	case '\b':	  c = GETC();	  break;	case '\r':	  /* ANSI C says the effects of a carriage return in a source file	     are undefined.  */	  if (pedantic && !newline_warning)	    {	      warning ("carriage return in source file");	      warning ("(we only warn about the first carriage return)");	      newline_warning = 1;	    }	  c = GETC();	  break;	case '\\':	  c = GETC();	  if (c == '\n')	    lineno++;	  else	    error ("stray '\\' in program");	  c = GETC();	  break;	default:	  return (c);	}    }}/* Skips all of the white space at the current location in the input file.   Must use and reset nextchar if it has the next character.  */voidposition_after_white_space (){  register int c;#if !USE_CPPLIB  if (nextchar != -1)    c = nextchar, nextchar = -1;  else#endif    c = GETC();  UNGETC (skip_white_space (c));}/* Make the token buffer longer, preserving the data in it.   P should point to just beyond the last valid character in the old buffer.   The value we return is a pointer to the new buffer   at a place corresponding to P.  */static char *extend_token_buffer (p)     char *p;{  int offset = p - token_buffer;  maxtoken = maxtoken * 2 + 10;  token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2);  return token_buffer + offset;}#if !USE_CPPLIB#define GET_DIRECTIVE_LINE() get_directive_line (finput)#else /* USE_CPPLIB *//* Read the rest of a #-directive from input stream FINPUT.   In normal use, the directive name and the white space after it   have already been read, so they won't be included in the result.   We allow for the fact that the directive line may contain   a newline embedded within a character or string literal which forms   a part of the directive.   The value is a string in a reusable buffer.  It remains valid   only until the next time this function is called.  */static char *GET_DIRECTIVE_LINE (){  static char *directive_buffer = NULL;  static unsigned buffer_length = 0;  register char *p;  register char *buffer_limit;  register int looking_for = 0;  register int char_escaped = 0;  if (buffer_length == 0)    {      directive_buffer = (char *)xmalloc (128);      buffer_length = 128;    }  buffer_limit = &directive_buffer[buffer_length];  for (p = directive_buffer; ; )    {      int c;      /* Make buffer bigger if it is full.  */      if (p >= buffer_limit)        {	  register unsigned bytes_used = (p - directive_buffer);	  buffer_length *= 2;	  directive_buffer	    = (char *)xrealloc (directive_buffer, buffer_length);	  p = &directive_buffer[bytes_used];	  buffer_limit = &directive_buffer[buffer_length];        }      c = GETC ();      /* Discard initial whitespace.  */      if ((c == ' ' || c == '\t') && p == directive_buffer)	continue;      /* Detect the end of the directive.  */      if (c == '\n' && looking_for == 0)	{          UNGETC (c);	  c = '\0';	}      *p++ = c;      if (c == 0)	return directive_buffer;      /* Handle string and character constant syntax.  */      if (looking_for)	{	  if (looking_for == c && !char_escaped)	    looking_for = 0;	/* Found terminator... stop looking.  */	}      else        if (c == '\'' || c == '"')	  looking_for = c;	/* Don't stop buffering until we see another				   another one of these (or an EOF).  */      /* Handle backslash.  */      char_escaped = (c == '\\' && ! char_escaped);    }}#endif /* USE_CPPLIB *//* At the beginning of a line, increment the line number   and process any #-directive on this line.   If the line is a #-directive, read the entire line and return a newline.   Otherwise, return the line's first non-whitespace character.  */intcheck_newline (){  register int c;  register int token;  lineno++;  /* Read first nonwhite char on the line.  */  c = GETC();  while (c == ' ' || c == '\t')    c = GETC();  if (c != '#')    {      /* If not #, return it so caller will use it.  */      return c;    }  /* Read first nonwhite char after the `#'.  */  c = GETC();  while (c == ' ' || c == '\t')    c = GETC();  /* If a letter follows, then if the word here is `line', skip     it and ignore it; otherwise, ignore the line, with an error     if the word isn't `pragma', `ident', `define', or `undef'.  */  if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))    {      if (c == 'p')	{	  if (GETC() == 'r'

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -