📄 dhcp-tokenizer.c
字号:
/* $Header: /cvsroot/dhcp-agent/dhcp-agent/src/dhcp-tokenizer.c,v 1.4 2003/03/25 03:05:06 actmodern Exp $ * * Copyright 2002 Thamer Alharbash * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * *//* TODO: * It may be profitable to mmap() in order to implement peeking nicer. * Config files won't be so big anyway. */#define MODULE_NAME "dhcp-tokenizer"#include "dhcp-local.h"#include "dhcp-libutil.h"#include "dhcp-tokenizer.h"/* these characters are special. we only allow them in a quoted string. */static char special_characters[] = ",;{}=";/* forward declaration of tokenizers. */static token_t tokenize_comment(tokenizer_t *tokenizer);static token_t tokenize_gobble_line(tokenizer_t *tokenizer);static token_t tokenize_quoted_string(tokenizer_t *tokenizer);static token_t tokenize_string(tokenizer_t *tokenizer);static token_t tokenize_comment(tokenizer_t *tokenizer);static token_t tokenize_newline(tokenizer_t *tokenizer);static token_t tokenize_assignment(tokenizer_t *tokenizer);static token_t tokenize_block_open(tokenizer_t *tokenizer);static token_t tokenize_block_close(tokenizer_t *tokenizer);static token_t tokenize_comma(tokenizer_t *tokenizer);static token_t tokenize_semicolon(tokenizer_t *tokenizer);/* tokenizer dispatch table. */tokenizers_t parsers[] = { /* char val */ /* routine. */ { ';', tokenize_semicolon }, { '\\', tokenize_gobble_line }, { '#' , tokenize_comment }, { '\"', tokenize_quoted_string }, { '\n', tokenize_newline }, { '=', tokenize_assignment }, { '{' , tokenize_block_open }, { '}', tokenize_block_close }, { ',', tokenize_comma },};/* * * * * * * * * * * * utility functions * * * * * * * * * * * */static int is_special_char(int c){ int i; for(i = 0;i < NELMS(special_characters);i++) { if(c == special_characters[i]) return 1; } return 0;}/* is a valid string character. */static int is_valid_string_char(int c){ if(c == '\\') return 1; if(isalnum(c) || ispunct(c)) return 1; else return 0;}/* the stream is EOF or an error occured. */static token_t check_eof(tokenizer_t *tokenizer){ if(feof(tokenizer->fp)) return TOKEN_EOF; else return TOKEN_ERROR;}/* just gobble up till the end of the newline */static token_t tokenize_gobble_line(tokenizer_t *tokenizer){ int c; while(1) { c = fgetc(tokenizer->fp); switch (c) { case EOF: return check_eof(tokenizer); case '\n': tokenizer->line_no++; return TOKEN_NEWLINE; default: break; } }}/* * * * * * * * * * * * * * * the tokenizer routines * * * * * * * * * * * * * * *//* read a quoted string. */static token_t tokenize_quoted_string(tokenizer_t *tokenizer){ int c, c2; while(1) { c = fgetc(tokenizer->fp); switch (c) { case EOF: return check_eof(tokenizer); case '\"': return TOKEN_STRING; case '\n': return TOKEN_ERROR; /* error to have newline in string constant. */ case '\\': c2 = getc(tokenizer->fp); if(c2 == '\n' || c2 == '\r' || c2 == ' ' || c2 == '\t') { if(tokenize_gobble_line(tokenizer) != TOKEN_NEWLINE) return TOKEN_ERROR; else break; } /* anything else means insert c2 without handling it specially. */ stringbuffer_append_c(tokenizer->data_buff, (char)c2); break; default: if(is_valid_string_char(c) || c == ' ' || c == '\t') stringbuffer_append_c(tokenizer->data_buff, (char)c); else { ungetc(c, tokenizer->fp); return TOKEN_ERROR; } } }}/* read a string. */static token_t tokenize_string(tokenizer_t *tokenizer){ int c, c2; while(1) { c = fgetc(tokenizer->fp); switch (c) { case EOF: return check_eof(tokenizer); case '\n': ungetc(c, tokenizer->fp); return TOKEN_STRING; /* newline terminates string. */ case '\t': case ' ': ungetc(c, tokenizer->fp); return TOKEN_STRING; /* spaces or tabs terminate string */ case '\\': c2 = getc(tokenizer->fp); if(c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r') { ungetc(c2, tokenizer->fp); return TOKEN_STRING; } else { stringbuffer_append_c(tokenizer->data_buff, (char)c2); break; } default: if(is_special_char(c)) { ungetc(c, tokenizer->fp); return TOKEN_STRING; } if(is_valid_string_char(c)) { stringbuffer_append_c(tokenizer->data_buff, (char)c); } else { ungetc(c, tokenizer->fp); return TOKEN_ERROR; /* anything else and we've been terminated. */ } } }}static token_t tokenize_comment(tokenizer_t *tokenizer){ return tokenize_gobble_line(tokenizer);}static token_t tokenize_newline(tokenizer_t *tokenizer){ tokenizer->line_no++; return TOKEN_NEWLINE;}static token_t tokenize_assignment(tokenizer_t *tokenizer){ return TOKEN_ASSIGNMENT;}static token_t tokenize_block_open(tokenizer_t *tokenizer){ return TOKEN_BLOCK_OPEN;}static token_t tokenize_block_close(tokenizer_t *tokenizer){ return TOKEN_BLOCK_CLOSE;}static token_t tokenize_comma(tokenizer_t *tokenizer){ return TOKEN_COMMA;}static token_t tokenize_semicolon(tokenizer_t *tokenizer){ return TOKEN_SEMICOLON;}static token_t tokenizer_get_next_token_proc(tokenizer_t *tokenizer, uint8_t peeking){ int c, c2; int i; /* have we peeked previously? */ if(tokenizer->peeked) { /* then we have enough to return with now. */ /* if we're not peeking now clear flag before return. */ if(!peeking) { /* remove peek flag. */ tokenizer->peeked = 0; } /* the data is already read, go ahead and return. */ return tokenizer->peek_value; } /* if we're peeking then setup our flag now since we'll be returning * a peeked value and not losing it. */ tokenizer->peeked = peeking; /* now read as usual. */ stringbuffer_clear(tokenizer->data_buff); while(1) { c = fgetc(tokenizer->fp); switch (c) { case EOF: tokenizer->peek_value = TOKEN_EOF; return tokenizer->peek_value; case ' ': case '\t': continue; case '\\': c2 = getc(tokenizer->fp); /* any backslash whose first trailing character is a * space-like or newline-like character means we * gobble up, but don't report end of line instead we * continue parsing. */ if(c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r') { ungetc(c2, tokenizer->fp); if(tokenize_gobble_line(tokenizer) != TOKEN_NEWLINE) { tokenizer->peek_value = TOKEN_ERROR; return tokenizer->peek_value; } else { break; } } else { /* the next character may not be passed through * the special parsers. we assume it's a string * and do our best to read it .*/ ungetc(c2, tokenizer->fp); goto read_in_string; } /* fall through */ default: for(i = 0;i < NELMS(parsers); i++) { if(parsers[i].character == c) { tokenizer->peek_value = parsers[i].do_parse(tokenizer); return tokenizer->peek_value; } } /* otherwise we default to trying a string. */ read_in_string: /* if we have an alphanumeric it's a string. * read it in. */ if(is_valid_string_char(c) || c == '\\') { ungetc(c, tokenizer->fp); tokenizer->peek_value = tokenize_string(tokenizer); return tokenizer->peek_value; } /* anything else is an error. */ tokenizer->peek_value = TOKEN_ERROR; return tokenizer->peek_value; } }}/* * * * * * * * interface * * * * * * * *//* get the next token. */token_t tokenizer_get_next_token(tokenizer_t *tokenizer){ return(tokenizer_get_next_token_proc(tokenizer, 0));}/* peek at the next token. */token_t tokenizer_peek_next_token(tokenizer_t *tokenizer){ return(tokenizer_get_next_token_proc(tokenizer, 1));}/* get the next token and ignore newlines. */token_t tokenizer_get_next_token_ignore_newlines(tokenizer_t *tokenizer){ token_t token; while((token = tokenizer_get_next_token(tokenizer)) == TOKEN_NEWLINE); return token;}/* peek the next token and ignore newlines. */token_t tokenizer_peek_next_token_ignore_newlines(tokenizer_t *tokenizer){ token_t token; while(1) { token = tokenizer_peek_next_token(tokenizer); if(token == TOKEN_NEWLINE) tokenizer_get_next_token(tokenizer); else return token; }}/* create a parser instance. */tokenizer_t *tokenizer_create(const char *filename){ FILE *fp; tokenizer_t *tokenizer; fp = file_open_or_create_safe(filename, "r"); if(fp == NULL) return NULL; tokenizer = xmalloc(sizeof(tokenizer_t)); tokenizer->fp = fp; tokenizer->line_no = 1; tokenizer->data_buff = stringbuffer_create(); tokenizer->peeked = 0; return tokenizer;}/* destroy a tokenizer instance. */void tokenizer_destroy(tokenizer_t *tokenizer){ fclose(tokenizer->fp); stringbuffer_destroy(tokenizer->data_buff); xfree(tokenizer); return;}/* get current line number from the tokenizer. */int tokenizer_get_line_no(tokenizer_t *tokenizer){ return tokenizer->line_no;}/* get a data string from the tokenizer. */const char *tokenizer_get_data(tokenizer_t *tokenizer){ return stringbuffer_getstring(tokenizer->data_buff);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -