📄 parser.c

📁 SRI international 发布的OAA框架软件
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*
 * A n t l r  T r a n s l a t i o n  H e a d e r
 *
 * Terence Parr, Will Cohen, and Hank Dietz: 1989-2001
 * Purdue University Electrical Engineering
 * With AHPCRC, University of Minnesota
 * ANTLR Version 1.33MR33
 *
 *   ./pccts/bin/antlr -o ../ -gh parser.h -gt -mrhoist off -k 2 parser.g
 *
 */

#define ANTLR_VERSION	13333
#include "pcctscfg.h"
#include "pccts_stdio.h"

#include "charbuf.h"
#include "stringbuffer.h"
#include "libicl.h"
#include "libicl_private.h"
#include "glib.h"
#include <stdio.h>
#ifdef _WINDOWS
#include "oaa-windows.h"
#endif

/* Abstract Syntax Tree fields */
#define AST_FIELDS int isDefined; int type; char* tokenData; size_t tokenLen;
/* Remember column numbers */
#define ZZCOL
/* We define syntax error function */
#define USER_ZZSYN
/* Token buffer size */
/* #define ZZLEXBUFSIZE 10485760*/
#define ZZLEXBUFSIZE 10485760

extern GByteArray* parser_dblQuoteBuf;
extern char* parser_tmpStrIn;
#ifdef NORMAL_GC
extern void* GC_debug_malloc(size_t, const char*, int);
extern char* gc_strdup(char*);
#endif

#ifdef NORMAL_GC
#define zzcr_ast(ast, attr, ttype, text) { \
  if(((ttype) == ICLDATAQ) || \
  ((ttype) == DBLQUOTED)) { \
    char* buf = parser_dblQuoteBuf->data; \
    g_byte_array_free(parser_dblQuoteBuf, FALSE); \
    /* \
    char* buf = (char*)malloc(parser_dblQuoteLen + 1); \
    memcpy(buf, text, parser_dblQuoteLen); \
    buf[parser_dblQuoteLen] = '\0'; \
    */ \
    ast->tokenData = buf; \
    ast->tokenLen = parser_dblQuoteLen; \
  } \
  else { \
    if(zzbufovf) { \
      ast->tokenLen = ZZLEXBUFSIZE; \
      ast->tokenData = (char*)GC_debug_malloc(ZZLEXBUFSIZE + 1, __FILE__, __LINE__); \
      strncpy(ast->tokenData, text, ZZLEXBUFSIZE); \
      fprintf(stderr, "ZZLEXBUFSIZE overflowed (%i)\n", ZZLEXBUFSIZE); \
    } \
    else { \
      ast->tokenData = gc_strdup(text); \
      ast->tokenLen = strlen(text); \
    } \
  } \
  ast->type = (ttype); \
  ast->isDefined = 1; \
  ast->right = NULL; \
  ast->down = NULL; \
  /* printf("zzcr_ast at pointer %p\n", ast); */ \
};

/*
#define zzd_ast(t) { \
  GC_debug_free(t->tokenData); \
  fprintf(stderr, "freed token data at pointer %p\n", t); \
  t->isDefined = 0; \
} \
*/
#undef zzd_ast

#else
#define zzcr_ast(ast, attr, ttype, text) { \
  if(((ttype) == ICLDATAQ) || \
  ((ttype) == DBLQUOTED)) { \
    char* buf = parser_dblQuoteBuf->data; \
    g_byte_array_free(parser_dblQuoteBuf, FALSE); \
    /* \
    char* buf = (char*)malloc(parser_dblQuoteLen + 1); \
    memcpy(buf, text, parser_dblQuoteLen); \
    buf[parser_dblQuoteLen] = '\0'; \
    */ \
    ast->tokenData = buf; \
    ast->tokenLen = parser_dblQuoteLen; \
  } \
  else { \
    if(zzbufovf) { \
      ast->tokenLen = ZZLEXBUFSIZE; \
      ast->tokenData = (char*)malloc(ZZLEXBUFSIZE + 1); \
      strncpy(ast->tokenData, text, ZZLEXBUFSIZE); \
      fprintf(stderr, "ZZLEXBUFSIZE overflowed (%i)\n", ZZLEXBUFSIZE); \
    } \
    else { \
      ast->tokenData = strdup(text); \
      ast->tokenLen = strlen(text); \
    } \
  } \
  ast->type = (ttype); \
  ast->isDefined = 1; \
  ast->right = NULL; \
  ast->down = NULL; \
  /* printf("zzcr_ast at pointer %p\n", ast); */ \
};

/*
#define zzd_ast(t) { \
  free(t->tokenData); \
  t->isDefined = 0; \
} \
*/
#undef zzd_ast

#endif

extern int parser_dblQuoteLen;
extern ICLTerm* parser_getTermFromString(char* str, size_t len);
extern ICLTerm* parser_getTermFromStringDebug(char* str, size_t len);
extern int parser_getNetTermFromBuf(ICLTerm** result, stringbuffer_t* buf);
/*extern void zzsyn(char *text, int tok, char *egroup, SetWordType *eset, int etok, int k, char *bad_text);*/
extern int parser_error;
extern int parser_setDebug(int b);
extern void handleDblQuotedData();
#define LL_K 2
#define GENAST

#include "ast.h"

#define zzSET_SIZE 8
#include "antlr.h"
#include "tokens.h"
#include "dlgdef.h"
#include "mode.h"

/* MR23 In order to remove calls to PURIFY use the antlr -nopurify option */

#ifndef PCCTS_PURIFY
#define PCCTS_PURIFY(r,s) memset((char *) &(r),'\0',(s));
#endif

#include "ast.c"
zzASTgvars

ANTLR_INFO


extern int zzSyntaxErrCount; /* defined in err.h */
extern void (*zzerr)(const char*); /* defined in dlgauto.h */
int parser_error = FALSE;
int parser_DEBUG = FALSE;
int parser_dblQuoteLen;
GByteArray* parser_dblQuoteBuf = NULL;
char* parser_tmpStrIn = NULL;
ICLTerm* parser_getTermFromStringReal(char* str, size_t len, int debug);
static stringbuffer_t* currentBuf = NULL;
enum parser_state_vals {parser_dblquote_CONTINUE, parser_dblquote_FOUNDQUOTE};
static int parser_dblquote_state = 0;
static size_t parser_savedIndex = 0;
static int parser_savedIndexSet = FALSE;

/**
* Binary data isn't handled well by pccts, so we do it ourselves.  This
* function relies on the fact that we use our own stringbuffer type to
* buffer intermediate data.
*/
void handleDblQuotedData() {
struct zzdlg_state dlgState;
int done = FALSE;
int nextChar = 0;
char toAppend[1];

    /* we save the dlg automaton state each time--could do it just once, but it's */
/* not that expensive */
zzsave_dlg_state(&dlgState);

    /* if we've been here before without finishing the data, continue where we left off */
if(parser_savedIndexSet) {
stringbuffer_setIndex(currentBuf, parser_savedIndex);
}
else {
parser_dblQuoteBuf = g_byte_array_new();
}

    /* A tiny state machine of two states--CONTINUE or FOUNDQUOTE; we enter FOUNDQUOTE */
/* when we find a double quote, so that we can check if it is the ending double quote */
/* or just a quadruple quote (a quoted double quote: "").  If we ever hit EOF, we */
/* save the index in the currentBuf object and return so that the pccts automaton */
/* can fail to find the token correctly */
while(!done) {
nextChar = (*(dlgState.func_ptr))();
if(nextChar == EOF) {
done = TRUE;
//g_byte_array_free(parser_dblQuoteBuf, TRUE);
parser_dblQuoteLen = 0;
zzrestore_dlg_state(&dlgState);
parser_savedIndex = stringbuffer_getIndex(currentBuf);
parser_savedIndexSet = TRUE;
return;
}
switch(parser_dblquote_state) {
case 0:
if(nextChar == '"') {
parser_dblquote_state = parser_dblquote_FOUNDQUOTE;
break;
}
else {
toAppend[0] = nextChar;
/* printf("Adding char %o\n", nextChar); */
g_byte_array_append(parser_dblQuoteBuf, toAppend, 1);
}
break;
case 1:
parser_dblquote_state = parser_dblquote_CONTINUE;
if(nextChar == '"') {
/* printf("Adding char %o\n", '"'); */
g_byte_array_append(parser_dblQuoteBuf, "\"", 1);
}
else {
/* done */
stringbuffer_rewind(currentBuf, 2);
done = TRUE;
parser_dblQuoteLen = parser_dblQuoteBuf->len;
zzrestore_dlg_state(&dlgState);
parser_savedIndex = 0;
parser_savedIndexSet = FALSE;
return;
}
break;
}
}
}

void parser_errorWriter(const char* s) {
int i = 0;
if(parser_DEBUG) {
fprintf(stderr, "Error with text: [");
for(i = 0; s[i] != '\0'; ++i) {
fprintf(stderr, " %o ", s[i]);
}
fprintf(stderr, "\n");
zzerrstd(s);
}
}

int parser_setDebug(int b) {
int ret = parser_DEBUG;
parser_DEBUG = b;
return ret;
}

void zzsynstd(char *text, int tok, char *egroup, SetWordType *eset, int etok, int k, char *bad_text) {
zzSyntaxErrCount++;                             /* MR11 */
fprintf(stderr, "line %d: syntax error at \"%s\"", zzline, (tok==zzEOF_TOKEN)?"EOF":bad_text);
if ( !etok && !eset ) {fprintf(stderr, "\n"); return;}
if ( k==1 ) fprintf(stderr, " missing");
else
{
fprintf(stderr, "; \"%s\" not", bad_text);
if ( zzset_deg(eset)>1 ) fprintf(stderr, " in");
}
if ( zzset_deg(eset)>0 ) zzedecode(eset);
else fprintf(stderr, " %s", zztokens[etok]);
if ( strlen(egroup) > 0 ) fprintf(stderr, " in %s", egroup);
fprintf(stderr, "\n");

}

void zzsyn(char *text, int tok, char *egroup, SetWordType *eset, int etok, int k, char *bad_text) {
if(parser_DEBUG) {
fprintf(stderr, "standard error text:\n");
zzsynstd(text, tok, egroup, eset, etok, k, bad_text);
fprintf(stderr, "end standard error text\n");
}
parser_error = TRUE;
}

void printAst(AST* a) {
if(a == NULL) {
return;
}
if(a->isDefined != 1) {
return;
}
printf(" [type %i][text %s] ", a->type, a->tokenData);
}

void before(AST* a) {
(void)a;
printf(" (");
}

void after(AST* a) {
(void)a;
printf(")");
}

int currentBuf_read() {
return stringbuffer_read(currentBuf);
}

void traverse(AST* tree, 
void (*func)(AST*, void*),
void (*pre)(AST*, void*), 
void (*post)(AST*, void*),
void* data) {
while(tree != NULL) {
if(tree->down != NULL) {
if(pre != NULL) {
(*pre)(tree, data);
}
}
if(func != NULL) {
(*func)(tree, data);
}
traverse(tree->down, func, pre, post, data);
if(tree->down != NULL) {
if(post != NULL) {
(*post)(tree, data);
}
}
tree = tree->right;
}
}

void parser_freeASTData(AST* a, void* dummy) {
(void)dummy;
if(a->isDefined == 1) {
#ifdef NORMAL_GC
GC_debug_free(a->tokenData);
#else
free(a->tokenData);
#endif
}
}

void parser_freeTree(AST* root) {
traverse(root, parser_freeASTData, NULL, NULL, NULL);
}

int astToTerm(AST* root, ICLTerm** result, int debug) {
int done = FALSE;
GPtrArray* parentsList = g_ptr_array_new();
GPtrArray* termList = g_ptr_array_new();
AST* currentAST = NULL;
ICLTerm* currentTerm = NULL;
gint64* intp;
double* dblp;
char* endp;
ICLStructType* st;
ICLGroupType* gt;
int parentIndex = 0;

    if(debug || parser_DEBUG) {
debug = TRUE;
}

    g_ptr_array_add(parentsList, root);
while(parentsList->len != 0) {
currentAST = (AST*)g_ptr_array_index(parentsList, parentsList->len - 1);
if(currentAST->isDefined != 1) {
(void)g_ptr_array_remove_index(parentsList, parentIndex);
continue;
}
#ifdef NORMAL_GC
currentTerm = (ICLTerm*)GC_debug_malloc(sizeof(ICLTerm), __FILE__, __LINE__);
printf("Allocated new term at %p\n", currentTerm);
#else
currentTerm = (ICLTerm*)malloc(sizeof(ICLTerm));
#endif
currentTerm->magic_cookie = ICL_MAGIC_COOKIE;
currentTerm->refCount = 1;
currentTerm->glibAlloc = 0;
currentTerm->hadQuotes = 0;
g_ptr_array_add(termList, currentTerm);
switch(currentAST->type) {
case STR:
currentTerm->iclType = icl_str_type;
if(currentAST->tokenData[0] == '\'') {
currentTerm->hadQuotes = 1;
}
icl_stRemoveQuotes(currentAST->tokenData);
icl_stUndoubleQuotes(currentAST->tokenData);
currentTerm->p = currentAST->tokenData;
currentTerm->len = strlen(currentAST->tokenData);
if(debug) {
printf("Found str: %s\n", currentTerm->p);
}
break;
case VAR:
currentTerm->iclType = icl_var_type;
currentTerm->p = currentAST->tokenData;
currentTerm->len = currentAST->tokenLen;
if(debug) {
printf("Found var: %s\n", currentTerm->p);
}
break;
case INT:
currentTerm->iclType = icl_int_type;
#ifdef NORMAL_GC
intp = (gint64*)GC_debug_malloc(sizeof(gint64), __FILE__, __LINE__);
#else
intp = (gint64*)malloc(sizeof(gint64));
#endif
*intp = strtoll(currentAST->tokenData, &endp, 10);
if(*endp != '\0') {
if(debug) {
printf("Bad int\n");
}
parser_error = TRUE;
g_ptr_array_free(termList, TRUE);
g_ptr_array_free(parentsList, TRUE);
return FALSE;
}
free(currentAST->tokenData);
currentAST->tokenData = NULL;
currentAST->tokenLen = 0;
currentTerm->p = intp;
currentTerm->len = sizeof(gint64);
if(debug) {
printf("Found int: %lld\n", *((gint64*)currentTerm->p));
}
break;
case FLOAT:
currentTerm->iclType = icl_float_type;
#ifdef NORMAL_GC
dblp = (double*)GC_debug_malloc(sizeof(double), __FILE__, __LINE__);
#else
dblp = (double*)malloc(sizeof(double));
#endif
*dblp = strtod(currentAST->tokenData, &endp);
if(*endp != '\0') {
if(debug) {
printf("Bad float\n");
}
parser_error = TRUE;
g_ptr_array_free(termList, TRUE);
g_ptr_array_free(parentsList, TRUE);                
return FALSE;
}
free(currentAST->tokenData);
currentAST->tokenData = NULL;
currentAST->tokenLen = 0;
currentTerm->p = dblp;
currentTerm->len = sizeof(double);
if(debug) {
printf("Found float: %lf\n", *((double*)currentTerm->p));
}
break;
case STRUCT:
currentTerm->iclType = icl_struct_type;
#ifdef NORMAL_GC
st = (ICLStructType*)GC_debug_malloc(sizeof(ICLStructType), __FILE__, __LINE__);
#else
st = (ICLStructType*)malloc(sizeof(ICLStructType));
#endif
st->name = currentAST->tokenData;
st->args = NULL;
st->numArgs = 0;
currentTerm->p = st;
currentTerm->len = sizeof(st);
if(debug) {
printf("Found struct: name at %p: %s\n", st->name, st->name);
}
break;
case LIST:
free(currentAST->tokenData);
currentTerm->iclType = icl_list_type;
currentTerm->p = NULL;
currentTerm->len = sizeof(ICLListType*);
if(debug) {
printf("Found list\n");
}
break;
case GROUP:
currentTerm->iclType = icl_group_type;
#ifdef NORMAL_GC
gt = (ICLGroupType*)GC_debug_malloc(sizeof(ICLGroupType), __FILE__, __LINE__);
#else
gt = (ICLGroupType*)malloc(sizeof(ICLGroupType));
#endif
gt->list = NULL;
gt->startChar = currentAST->tokenData[0];
free(currentAST->tokenData);
gt->separator = strdup(",");
currentTerm->p = gt;
currentTerm->len = sizeof(gt);
if(debug) {
printf("Found group\n");
}
break;
case ICLDATAQ:
currentTerm->iclType = icl_dataq_type;
currentTerm->p = currentAST->tokenData;
currentTerm->len = currentAST->tokenLen;
currentTerm->glibAlloc = 1;
if(debug) {
printf("Found icldataq of len %i\n", currentTerm->len);
}
break;
default:
fprintf(stderr, "Parser attempted to make ICLTerm from type %i\n", currentAST->type);
parser_error = TRUE;
g_ptr_array_free(termList, TRUE);
g_ptr_array_free(parentsList, TRUE);
return FALSE;
}
if(debug) {
printf("Created term at %p with type %i\n", currentTerm, currentTerm->iclType);
}
if(currentAST != root) {
if(debug) {
printf("Attempting to add term at %p to parent %p\n", (ICLTerm*)g_ptr_array_index(termList, parentIndex), (ICLTerm*)g_ptr_array_index(termList, parentIndex - 1));
}
icl_addChild((ICLTerm*)g_ptr_array_index(termList, parentIndex - 1), (ICLTerm*)g_ptr_array_index(termList, parentIndex));
}
/* Check if we go down or right or up */
if(currentAST->down != NULL) {
g_ptr_array_add(parentsList, currentAST->down);
++parentIndex;
continue;
}
else if(currentAST->right != NULL) {
(void)g_ptr_array_remove_index(parentsList, parentIndex);
(void)g_ptr_array_remove_index(termList, parentIndex);
g_ptr_array_add(parentsList, currentAST->right);
/* parentIndex stays the same, since we removed and added one */
}
else {
/* both down  and right are NULL--we're at a leaf */
/* Keep going up the tree until we get to a non-NULL
* right, or we hit the root of the tree
*/
int foundSibling = FALSE;
while(parentIndex > 0){
(void)g_ptr_array_remove_index(parentsList, parentIndex);
(void)g_ptr_array_remove_index(termList, parentIndex);
--parentIndex;
currentAST = g_ptr_array_index(parentsList, parentIndex);
if(currentAST->right == NULL) {
continue;
}
else {
foundSibling = TRUE;
break;
}
}
if(parentIndex == 0) {
/* done */
*result = (ICLTerm*)g_ptr_array_remove_index(termList, 0);
g_ptr_array_free(termList, TRUE);
g_ptr_array_free(parentsList, TRUE);
termList = NULL;
parentsList = NULL;
if(debug) {
printf("found ICLTerm\n");
}
parser_error = FALSE;
return TRUE;
}
else if(foundSibling == TRUE) {
/* keep making ICLTerm tree */
/* remove the current sibling and replace with the new one */
(void)g_ptr_array_remove_index(parentsList, parentIndex);
(void)g_ptr_array_remove_index(termList, parentIndex);
g_ptr_array_add(parentsList, currentAST->right);
continue;
}
else {
fprintf(stderr, "Bad parse: at leaf, could not find suitable parent to continue or return");
parser_error = TRUE;
g_ptr_array_free(termList, TRUE);
g_ptr_array_free(parentsList, TRUE);
return FALSE;
/* error */
}
}
}
}

AST* getASTFromString(char* str, size_t len, int debug) {
int oldDebug = parser_DEBUG;
#ifdef NORMAL_GC
AST* root = (AST*)GC_debug_malloc(sizeof(AST), __FILE__, __LINE__);
#else
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -