📄 sexp.h
字号:
* continuations. Manipulating continuations by hand is required if the
* continuation-based parser is called directly. This is <b>not
* recommended</b> unless you are willing to deal with potential errors and
* are willing to learn exactly how the continuation relates to the
* internals of the parser. A simpler approach is to use either the
* <i>parse_sexp</i> function that simply returns an s-expression without
* exposing the continuations, or the <i>iparse_sexp</i> function that
* allows iteratively popping one s-expression at a time from a string
* containing one or more s-expressions. Refer to the documentation for
* each parsing function for further details on behavior and usage.
*/
typedef struct pcont {
/**
* The parser stack used for iterative parsing.
*/
faststack_t *stack;
/**
* The last full s-expression encountered by the parser. If this is
* NULL, the parser has not encountered a full s-expression and more
* data is required for the current s-expression being parsed. If this
* is non-NULL, then the parser has encountered one s-expression and may
* be partially through parsing the next s-expression.
*/
sexp_t *last_sexp;
/**
* Pointer to a temporary buffer used to store atom values during parsing.
*/
char *val;
/**
* Current number of bytes allocated for val.
*/
int val_allocated;
/**
* Current number of used bytes in val.
*/
int val_used;
/**
* Pointer to the character following the last character in the current
* atom value being parsed.
*/
char *vcur;
/**
* Pointer to the last character to examine in the string being parsed.
* When the parser is called with the continuation, this is the first
* character that will be processed. If this is NULL, the parser will
* start parsing at the beginning of the string passed into the parser.
*/
char *lastPos;
/**
* This is a pointer to the beginning of the current string being
* processed. lastPos is a pointer to some value inside the string
* that this points to.
*/
char *sbuffer;
/**
* This is the depth of parenthesis (the number of left parens encountered)
* that the parser is currently working with.
*/
unsigned int depth;
/**
* This is the depth of parenthesis encountered after a single quote (tick)
* if the character immediately following the tick was a left paren.
*/
unsigned int qdepth;
/**
* This is the state ID of the current state of the parser in the
* DFA representing the parser. The current parser is a DFA based parser
* to simplify restoring the proper state from a continuation.
*/
unsigned int state;
/**
* This is a flag indicating whether the next character to be processed
* should be assumed to have been prefaced with a '\' character to escape
* it.
*/
unsigned int esc;
/**
* Flag whether or not we are processing an atom that was preceeded by
* a single quote.
*/
unsigned int squoted;
/**
* Error code. Used to indicate that the continuation being returned does
* not represent a successful parsing and thus the contents aren't of much
* value. If this value is 0, no error occurred. Otherwise, it will be 1.
*/
unsigned int error;
/**
* Mode. The parsers' specialized behaviours can be activated by
* tweaking the mode setting. There are currently two available:
* normal and inline_binary. Inline_binary treats atoms that start
* with #b# specially, assuming that they have the structure:
*
* #b#s#data
*
* Where s is a positive (greater than 0) integer representing the length
* of the data, and data is s bytes of binary data following the #
* sign. After the s bytes, it is assumed normal s-expression data
* continues.
*/
parsermode_t mode;
/* -----------------------------------------------------------------
* These fields below are related to dealing with INLINE_BINARY mode
* ----------------------------------------------------------------- */
/**
* Length to expect of the current binary data being read in.
* this also corresponds to the size of the memory allocated for
* reading this binary data into.
*/
unsigned int binexpected;
/**
* Number of bytes of the binary blob that have already been read in.
*/
unsigned int binread;
/**
* Pointer to the memory containing the binary data being read in.
*/
char *bindata;
} pcont_t;
/**
* This structure is a wrapper around a standard I/O file descriptor and
* the parsing infrastructure (continuation and a buffer) required to
* parse off of it. This is used so that routines can hide the loops and
* details required to accumulate up data read off of the file descriptor
* and parse expressions individually out of it.
*/
typedef struct sexp_iowrap {
/**
* Continuation used to parse off of the file descriptor.
*/
pcont_t *cc;
/**
* The file descriptor. Currently CANNOT be a socket since implementation
* uses read(), not recv().
*/
int fd;
/**
* Buffer to read data into before parsing.
*/
char buf[BUFSIZ];
/**
* Byte count for last read. If it is -1, there was an error. Otherwise,
* it will be a value from 0 to BUFSIZ.
*/
int cnt;
} sexp_iowrap_t;
/*===========*/
/* FUNCTIONS */
/*===========*/
/* this is for C++ users */
#ifdef __cplusplus
extern "C" {
#endif
/**
* Set the parameters on atom value buffer allocation and growth sizes.
* This is an important point for performance tuning, as many factors in
* the expected expression structure must be taken into account such as:
*
* - Average size of atom values
* - Variance in sizes of atom values
* - Amount of memory that is tolerably ''wasted'' (allocated but not
* used)
*
* The \a ss parameter specifies the initial size of all atom buffers.
* Ideally, this should be sufficiently large to capture MOST atom values,
* or at least close enough such that one growth is required. The
* \a gs parameter specifies the number of bytes to increase the buffer size
* by when space is exhausted. A safe choice for parameter sizes would
* be on the order of the average size for \a ss, and one standard
* deviation for \a gs. This ensures that 50% of all expressions are
* guaranteed to fit in the initial buffer, and rougly 80-90% will fit in
* one growth. If memory is not an issue, choosing ss to be the mean plus
* one standard deviation will capture 80-90% of expressions in the initial
* buffer, and a gs of one standard deviation will capture nearly all
* expressions.
*
* Note: These parameters can be tuned at runtime as needs change, and they
* will be applied to all expressions and expression elements parsed after
* they are modified. They will not be applied retroactively to expressions
* that have already been parsed.
*/
void set_parser_buffer_params(int ss, int gs);
/**
* return an allocated sexp_t. This structure may be an already allocated
* one from the stack or a new one if none are available. Use this instead
* of manually mallocing if you want to avoid excessive mallocs. <I>Note:
* Mallocing your own expressions is fine - you can even use
* sexp_t_deallocate to deallocate them and put them in the pool.</I>
* Also, if the stack has not been initialized yet, this does so.
*/
sexp_t *sexp_t_allocate();
/**
* given a malloc'd sexp_t element, put it back into the already-allocated
* element stack. This method will allocate a stack if one has not been
* allocated already.
*/
void sexp_t_deallocate(sexp_t *s);
/**
* In the event that someone wants us to release ALL of the memory used
* between calls by the library, they can free it. If you don't call
* this, the caches will be persistent for the lifetime of the library
* user.
*/
void sexp_cleanup();
/**
* print a sexp_t struct as a string in the LISP style. If the buffer
* is large enough and the conversion is successful, the return value
* represents the length of the string contained in the buffer. If the
* buffer was too small, or some other error occurred, the return
* value is -1 and the contents of the buffer should not be assumed to
* contain any useful information.
*/
int print_sexp(char *loc, int size, sexp_t *e);
/**
* print a sexp_t structure to a buffer, growing it as necessary instead
* of relying on fixed size buffers like print_sexp. Important arguments
* to tune for performance reasons are <tt>ss</tt> and <tt>gs</tt> - the
* buffer start size and growth size.
*/
int print_sexp_cstr(CSTRING **s, sexp_t *e, int ss, int gs);
/**
* Allocate a new sexp_t element representing a list.
*/
sexp_t *new_sexp_list(sexp_t *l);
/**
* allocate a new sexp_t element representing a value
*/
sexp_t *new_sexp_atom(char *buf, int bs);
/**
* create an initial continuation for parsing the given string
*/
pcont_t *init_continuation(char *str);
/**
* destroy a continuation. This involves cleaning up what it contains,
* and cleaning up the continuation itself.
*/
void destroy_continuation (pcont_t * pc);
/**
* create an IO wrapper structure around a file descriptor.
*/
sexp_iowrap_t *init_iowrap(int fd);
/**
* destroy an IO wrapper structure
*/
void destroy_iowrap(sexp_iowrap_t *iow);
/**
* given and IO wrapper handle, read one s-expression off of it. this
* expression may be contained in a continuation, so there is no
* guarantee that under the covers an IO read actually is occuring.
* returning null implies no s-expression was able to be read.
*/
sexp_t *read_one_sexp(sexp_iowrap_t *iow);
/**
* wrapper around parser for compatibility.
*/
sexp_t *parse_sexp(char *s, int len);
/**
* wrapper around parser for friendlier continuation use
* pre-condition : continuation (cc) is NON-NULL!
*/
sexp_t *iparse_sexp(char *s, int len, pcont_t *cc);
/**
* given a LISP style s-expression string, parse it into a set of
* connected sexp_t structures.
*/
pcont_t *cparse_sexp(char *s, int len, pcont_t *pc);
/**
* given a sexp_t structure, free the memory it uses (and recursively free
* the memory used by all sexp_t structures that it references). Note
* that this will call the deallocation routine for sexp_t elements.
* This means that memory isn't freed, but stored away in a cache of
* pre-allocated elements. This is an optimization to speed up the
* parser to eliminate wasteful free and re-malloc calls.
*/
void destroy_sexp(sexp_t *s);
/* this is for C++ users */
#ifdef __cplusplus
}
#endif
#include "sexp_ops.h"
#endif /* __SEXP_H__ */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -