📄 parse_object.h
字号:
/* -*- Mode: C; -*- *//******************************************************************************* ** Copyright 2005 University of Cambridge Computer Laboratory. ** ** This file is part of Nprobe. ** ** Nprobe is free software; you can redistribute it and/or modify ** it under the terms of the GNU General Public License as published by ** the Free Software Foundation; either version 2 of the License, or ** (at your option) any later version. ** ** Nprobe is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with Nprobe; if not, write to the Free Software ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ** *******************************************************************************/#ifndef _PARSE_OBJECT_H__#define _PARSE_OBJECT_H__#include <setjmp.h>/*****************************************************************************//* * Enable building of chain of char buffers to hold references * - additional buffers appended as necessary * - dumped as continuous set of records - type indicated by initial byte * - records may be either: * - NULL terminated character sequences (ie strings) representing URLs * which may be normal URLs or base URLs for relative resolution * - end of relative scope markers * - or four bytes representing a 32-bit signed integer time stamp giving * offset from first response packet arrival time in us. This timestamp * applies to all following until another time stamp is encountered. */#define LINKS_BUFSZ (128*16)/* * Don't want to have to check for room in buffer for every write of known-size * records - so check variable length records (ie urls) to always leave room * for the max known size record. * * -12 allows for two record terminators plus (optional) time stamp and * preamble byte, and possibly refresh preamble */#define LINKS_BUFLEN (LINKS_BUFSZ - 12)#define LINKS_MAX_BUFS 32/* * This is an arbitrary limit, but designed to catch obvious runaway urls * resulting from malformed HTML or parse failures */#define MAX_URL_LEN 4096struct links_buf{ struct links_buf *next; /* chained in use or in pool */ unsigned short nchars; /* use - including delimiters */ char buf[LINKS_BUFSZ];};typedef struct links_buf links_buf_t; struct links_chars { struct links_buf *chain; /* first buffer in chain */ struct links_buf *current; /* current buffer */ char *buf; /* where currently writing */ unsigned char nbufs; unsigned short nchars; /* usage of current buffer */ unsigned int totchars; /* total o/a buffers */};/*****************************************************************************//* * Defs for link record_types * - construct bitwise to allow varying discriminations */#define DUMMY_VALUE 0x0#define LR_TS 0x0/* included by scripts, or archive (script) objects - default HTML included */#define LR_SCRIPTED 0x1#define LR_ARCHIVE 0x2/* in-line (fetched as part of load), link (fetched later), or unknown */#define LR_INLINE 0x4#define LR_LINK 0x8#define LR_UNKNOWN 0x10/* base-url for relative resolution (may be nested) */#define LR_REL_BASE 0x20#define LR_END_BASE 0x40 /* end of scope *//* link is a redirection */#define LR_REDIRECT 0x80#define LR_SCRIPT_ARCHIVE (LR_SCRIPTED | LR_ARCHIVE | LR_INLINE)#define LR_SCRIPTED_LINK (LR_SCRIPTED | LR_LINK)#define LR_SCRIPTED_INLINE (LR_SCRIPTED | LR_INLINE)#define LR_SCRIPTED_UNKNOWN (LR_SCRIPTED | LR_UNKNOWN)#define LR_REL_SCRIPT_BASE (LR_SCRIPTED | LR_REL_BASE)#define LR_END_SCRIPT_BASE (LR_SCRIPTED | LR_END_BASE)#define LR_REDIRECT_INLINE (LR_REDIRECT | LR_INLINE)#define LR_REDIRECT_LINK (LR_REDIRECT | LR_LINK)/* Allow for extension link types */#define LR_HIGH 0xff#define LR_REFRESH_SELF 0x100#define LR_REFRESH_URL 0x200#define LR_REFRESH (LR_REFRESH_SELF | LR_REFRESH_URL)#define LR_HAS_URL (LR_INLINE | LR_LINK | LR_UNKNOWN | LR_REL_BASE \ | LR_REFRESH_URL)#ifndef SWIGstruct tcp_conn; /* forward *//* * A template controllong parse of specific element tags where only one * attribute may give URL reference */struct simple_parse_template { char *url_attr; unsigned char record_type; /* inline/link/scripted/ etc*/};typedef struct simple_parse_template simple_parse_template_t;/* * A template controllong parse of specific element tags where two * attributes may give URL references */struct multi_parse_template { simple_parse_template_t temps[10];};typedef struct multi_parse_template multi_parse_template_t;/* State allows HTML body searches to span packets */#define TAGBUF_SZ 512#define LARGE_TAGBUF_SZ 5096typedef union tag_buf{ union tag_buf *q; char buf[LARGE_TAGBUF_SZ];} tag_buf_t;typedef struct ob_parse_state { unsigned int parse_state; unsigned int where_state; char tagbuf_buf[TAGBUF_SZ]; /* normal buffer for tag acquisition */ char *tagbuf; /* a larger one if required */ int tagbuf_sz; int tagbuf_indx;} ob_parse_state_t;/* defs for where state */#define P_NOT_STARTED 0#define P_FOUND_TAG 1#define P_FINDING_TAG_END 2#define P_FOLLOW_ERROR 0x3/* defs for parse state */#define P_TAG_SAVED 0x1#define P_LARGE_BUF 0x2#define P_NEED_LINK_TIMESTAMP 0x80000000#define OB_TAGBUF tconnp->su.http.reptrans->inner.ob_p_state.tagbuf#define OB_TAGBUF_BUF tconnp->su.http.reptrans->inner.ob_p_state.tagbuf_buf#define OB_TAGBUF_INDX tconnp->su.http.reptrans->inner.ob_p_state.tagbuf_indx#define OB_TAGBUF_SZ tconnp->su.http.reptrans->inner.ob_p_state.tagbuf_sz#define OB_PARSE_STATE tconnp->su.http.reptrans->inner.ob_p_state.parse_state#define OB_WHERE_STATE tconnp->su.http.reptrans->inner.ob_p_state.where_state#define OB_ATTR_NEEDED tconnp->su.http.reptrans->inner.ob_p_state.needed#define OB_PARSE_STATE_MATCH_INDX tconnp->su.http.reptrans->inner.ob_p_state.match_indx#define OB_PARSE_STATE_TMPP tconnp->su.http.reptrans->inner.ob_p_state.tmpp#define OB_PTEMP_SLOT tconnp->su.http.reptrans->inner.ob_p_state.temp_slot /* Apart from top nibble (time stamp state) states are exclusive */#define SET_OB_PARSE_STATE(state) \MACRO_BEGIN \ OB_PARSE_STATE |= (state); \MACRO_END#define CLEAR_OB_PARSE_STATE(state) \MACRO_BEGIN \ OB_PARSE_STATE &= ~(state); \MACRO_END#define SET_OB_WHERE_STATE(state) \MACRO_BEGIN \OB_WHERE_STATE = (state); \MACRO_END#if 0#define PULL(buf, adj) \MACRO_BEGIN \assert(adj <= buf.len); \(buf).len-=(adj); \(buf).buf+=(adj); \MACRO_END#endif#define PULL(buf, adj) \MACRO_BEGIN \if (adj > (buf).len) \ goto error; \(buf).len-=(adj); \(buf).buf+=(adj); \MACRO_END#define JUMP_SPACE(buf, where) \MACRO_BEGIN \ while (*(buf).buf == ' ') \ PULL(buf, 1); \ if ((buf).len == 0) \ { \ SET_OB_WHERE_STATE(where); \ goto out; \ } \ MACRO_END#if 0#define JUMP_SPACE(buf, where) \MACRO_BEGIN \ while (*(buf).buf == ' ') \ PULL(buf, 1); \ if (LEN == 0) \ { \ SET_OB_WHERE_STATE(where); \ goto out; \ } \ MACRO_END#endif#if 0#define JUMP_SPACE(buf, where) \MACRO_BEGIN \while (*(buf).buf == ' ') \ if (LEN == 0) \ { \ SET_OB_WHERE_STATE(where); \ goto out; \ } \ PULL(buf, 1); \MACRO_END#endif/*****************************************************************************/struct tcp_conn; /* forward *//* * Parse_object.c */inline void chain_new_links_buf(struct links_chars *lc, jmp_buf bail); inline void chain_first_links_buf(struct links_chars *lc);inline int set_large_pbuf(struct tcp_conn *tconnp);inline char *find_url_end(char *s, int len);inline char *find_linkref_end(char *s, int len);inline int clear_chars(char *start, char *end, char *chars);inline void write_base_scope_end(struct tcp_conn *tconnp, unsigned char type);inline void write_tstamp(struct links_chars *chars, int offset_us);inline void write_url(struct links_chars *chars, char *start, int len, jmp_buf bail);char * get_url(struct tcp_conn *tconnp, char *start, char *end, unsigned char url_type);int parse_rep_body(prec_t *pp, struct tcp_conn *tconnp, int len, short code);int parse_simple(struct tcp_conn *tconnp, simple_parse_template_t *temp, char *start, char *end);int parse_multi(struct tcp_conn *tconnp, multi_parse_template_t *temp, char *start, char *end);#endif /* ifndef SWIG */#endif /* _PARSE_OBJECT_H__ *//* * end parse_object.h */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -