📄 hi_norm.c
字号:
/**** @file hi_norm.c** ** @author Daniel Roelker <droelker@sourcefire.com** ** @brief Contains normalization skeleton for server and client** normalization routines.** ** This file contains the core routines to normalize the different fields** within the HTTP protocol. We currently only support client URI** normalization, but the hooks are here to easily add other routines.** ** NOTES:** - Initial development. DJR*/#include <stdlib.h>#include <stdio.h>#include <string.h>#include <ctype.h>#include <sys/types.h>#include "hi_client_norm.h"#include "hi_eo.h"#include "hi_eo_events.h"#include "hi_eo_log.h"#include "hi_ui_iis_unicode_map.h"#include "hi_return_codes.h"#include "hi_si.h"#include "hi_util.h"#include "hi_util_xmalloc.h"#define MAX_DIRS 2048#define NO_HEX_VAL -1#define BASE36_VAL -2#define HEX_VAL 1/**** This define checks for negative return codes, since we have multiple** reasons to error. This just cuts the return code checks, especially** as we add more errors.*/#define GET_ERR 0x80000000#define END_OF_BUFFER -1#define DOUBLE_ENCODING -2#define DIR_TRAV -2#define NON_ASCII_CHAR 0xfftypedef int (*DECODE_FUNC)(HI_SESSION *, u_char *, u_char *, u_char **);typedef struct s_URI_NORM_STATE{ u_char *abs_uri; u_char *param; /* ** Directory tracking */ u_char *dir_track[MAX_DIRS]; u_int dir_count;} URI_NORM_STATE;static int hex_lookup[256];static int valid_lookup[256];/*** NAME** GetPtr::*//**** This routine is for getting bytes in the U decode.** ** This checks the current bounds and checking for the double decoding.** This routine differs from the other Get routines because it returns** other values than just END_OF_BUFFER and the char.** ** We also return DOUBLE_ENCODING if there is a % and double decoding** is turned on.** ** When using this function it is important to note that it increments** the buffer before checking the bounds. So, if you call this function** in a loop and don't check for END_OF_BUFFER being returned, then ** you are going to overwrite the buffer. If I put the check in, you** would just be in an never-ending loop. So just use this correctly.** ** @param ServerConf the server configuration** @param start the start of the URI** @param end the end of the URI** @param ptr the current pointer into the URI** ** @return integer** ** @retval END_OF_BUFFER the end of the buffer has been reached.** @retval DOUBLE_ENCODING a percent was found and double decoding is on** @retval <= 0xff an ASCII char */static int GetPtr(HI_SESSION *Session, u_char *start, u_char *end, u_char **ptr){ HTTPINSPECT_CONF *ServerConf = Session->server_conf; (*ptr)++; if(!hi_util_in_bounds(start, end, *ptr)) return END_OF_BUFFER; if(ServerConf->double_decoding.on && **ptr == '%') return DOUBLE_ENCODING; return (int)**ptr;}/*** NAME** UDecode::*//**** Handles the single decode for %U encoding.** ** This routine receives the ptr pointing to the u. We check the bounds** and continue with processing. %u encoding works by specifying the** exact codepoint to be used. For example, %u002f would be /. So this** all seems fine. BUT, the problem is that IIS maps multiple codepoints** to ASCII characters. So, %u2044 also maps to /. So this is what we** need to handle here.** ** This routine only handles the single encoding. For double decoding,** %u is handled in DoubleDecode(). It's the same routine, with just** the GetByte function different.** ** We use a get_byte function to get the bytes, so we can use this** routine for PercentDecode and for DoubleDecode.**** @param ServerConf the server configuration** @param start the start of the URI** @param end the end of the URI** @param ptr the current pointer into the URI** @param get_byte the function pointer to get bytes.** ** @return integer** ** @retval END_OF_BUFFER we are at the end of the buffer** @retval DOUBLE_ENCODING this U encoding is possible double encoded** @retval NON_ASCII_CHAR return this char for non-ascii or bad decodes** @retval iChar this is the char that we decoded.*/static int UDecode(HI_SESSION *Session, u_char *start, u_char *end, u_char **ptr, DECODE_FUNC get_byte){ HTTPINSPECT_CONF *ServerConf = Session->server_conf; int iByte; int iNorm; int iCtr; iNorm = 0; for(iCtr = 0; iCtr < 4; iCtr++) { iByte = get_byte(Session, start, end, ptr); if(iByte & GET_ERR) return iByte; if(hex_lookup[(u_char)iByte] == NO_HEX_VAL) return NON_ASCII_CHAR; iNorm <<= 4; iNorm = (iNorm | (hex_lookup[(u_char)iByte])); } /* ** If the decoded codepoint is greater than a single byte value, ** then we return a NON_ASCII_CHAR. */ if(iNorm > 0xff) { /* ** We check here for IIS codepoints that map to ASCII chars. */ if(ServerConf->iis_unicode.on) { iNorm = ServerConf->iis_unicode_map[iNorm]; if(iNorm == HI_UI_NON_ASCII_CODEPOINT) { iNorm = NON_ASCII_CHAR; } if(hi_eo_generate_event(Session, ServerConf->iis_unicode.alert)) { hi_eo_client_event_log(Session, HI_EO_CLIENT_IIS_UNICODE, NULL, NULL); } } else { iNorm = NON_ASCII_CHAR; } } /* ** Check if we alert on this encoding */ if(hi_eo_generate_event(Session, ServerConf->u_encoding.alert)) { hi_eo_client_event_log(Session, HI_EO_CLIENT_U_ENCODE, NULL, NULL); } return iNorm;}/*** NAME** PercentDecode::*//**** This is the first level of decoding, and deals with ASCII, U, and** double decoding.**** This function is the main decoding function. It handles all the ASCII** encoding and the U encoding, and tells us when there is a double** encoding.** ** We use the GetPtr() routine to get the bytes for us. This routine** checks for DOUBLE_ENCODING and tells us about it if it finds something,** so we can reset the ptrs and run it through the double decoding** routine.** ** The philosophy behind this routine is that if we run out of buffer** we return such, the only other thing we return besides the decodes** char is a NON_ASCII_CHAR in the case that we try and decode something** like %tt. This is no good, so we return a place holder.** ** @param ServerConf the server configuration** @param start the start of the URI** @param end the end of the URI** @param ptr the current pointer into the URI** ** @return integer** ** @retval END_OF_BUFFER We've hit the end of buffer while decoding.** @retval NON_ASCII_CHAR Invalid hex encoding, so we return a placeholder.** @retval char return the valid char** ** @see GetPtr()*/static int PercentDecode(HI_SESSION *Session, u_char *start, u_char *end, u_char **ptr){ HTTPINSPECT_CONF *ServerConf = Session->server_conf; int iByte; u_char *orig_ptr; int iNorm; orig_ptr = *ptr; iByte = GetPtr(Session, start, end, ptr); if(iByte & GET_ERR) { if(iByte == END_OF_BUFFER) return END_OF_BUFFER; if(iByte == DOUBLE_ENCODING) { *ptr = orig_ptr; return (int)**ptr; } } /* ** Initialize the normalization byte */ iNorm = 0; /* ** hex values */ if(valid_lookup[(u_char)iByte] < 0) { /* ** Check for %u encoding. ** ** The u-encoding loop always returns something. */ if(ServerConf->u_encoding.on && (toupper(iByte) == 'U')) { iNorm = UDecode(Session, start, end, ptr, GetPtr); /* ** We have to handle the double meaning of END_OF_BUFFER ** when using the GetPtr() function. */ if(iNorm & GET_ERR) { if(iNorm == END_OF_BUFFER) { /* ** We have reached the end of the buffer while ** processing a U encoding. */ return END_OF_BUFFER; } if(iNorm == DOUBLE_ENCODING) { *ptr = orig_ptr; return (int)**ptr; } } return iNorm; } else if(!ServerConf->base36.on || valid_lookup[(u_char)iByte] != BASE36_VAL) { return NON_ASCII_CHAR; } /* ** The logic above dictates that if we get to this point, we ** have a valid base36 encoding, so let's log the event. */ if(hi_eo_generate_event(Session, ServerConf->base36.alert)) { hi_eo_client_event_log(Session, HI_EO_CLIENT_BASE36, NULL, NULL); } } iNorm = (hex_lookup[(u_char)iByte]<<4); iByte = GetPtr(Session, start, end, ptr); if(iByte & GET_ERR) { if(iByte == END_OF_BUFFER) return END_OF_BUFFER; if(iByte == DOUBLE_ENCODING) { *ptr = orig_ptr; return (int)**ptr; } } if(valid_lookup[(u_char)iByte] < 0) { if(!ServerConf->base36.on || valid_lookup[(u_char)iByte] != BASE36_VAL) { return NON_ASCII_CHAR; } /* ** Once again, we know we have a valid base36 encoding, let's alert ** if possible. */ if(hi_eo_generate_event(Session, ServerConf->base36.alert)) { hi_eo_client_event_log(Session, HI_EO_CLIENT_BASE36, NULL, NULL); } } iNorm = (iNorm | (hex_lookup[(u_char)iByte])) & 0xff; if(hi_eo_generate_event(Session,ServerConf->ascii.alert)) { hi_eo_client_event_log(Session, HI_EO_CLIENT_ASCII, NULL, NULL); } return iNorm;}/*** NAME** GetChar::*//**** Wrapper for PercentDecode() and handles the return values from** PercentDecode().** ** This really decodes the chars for UnicodeDecode(). If the char is** a percent then we process stuff, otherwise we just increment the** pointer and return.** ** @param ServerConf the server configuration** @param start the start of the URI** @param end the end of the URI** @param ptr the current pointer into the URI** @param bare_byte value for a non-ASCII char or a decoded non-ASCII char** ** @return integer** ** @retval END_OF_BUFFER End of the buffer has been reached before decode.** @retval NON_ASCII_CHAR End of buffer during decoding, return decoded char.** @retval char return the valid decoded/undecoded char** ** @see PercentDecode()** @see GetByte()*/static int GetChar(HI_SESSION *Session, u_char *start, u_char *end, u_char **ptr, int *bare_byte){ HTTPINSPECT_CONF *ServerConf = Session->server_conf; int iNorm = (int)(**ptr); if(!hi_util_in_bounds(start, end, *ptr)) return END_OF_BUFFER; if(**ptr == '%' && ServerConf->ascii.on) { /* ** We go into percent encoding. */ iNorm = PercentDecode(Session, start, end, ptr); /* ** If during the course of PercentDecode() we run into the end ** of the buffer, then we return early (WITHOUT INCREMENTING ptr) ** with a NON_ASCII_CHAR. */ if(iNorm == END_OF_BUFFER) return NON_ASCII_CHAR; *bare_byte = 0; } else { if(ServerConf->bare_byte.on && (u_char)iNorm > 0x7f) { if(hi_eo_generate_event(Session, ServerConf->bare_byte.alert)) { hi_eo_client_event_log(Session, HI_EO_CLIENT_BARE_BYTE, NULL, NULL); } /* ** Set the bare_byte flag */ *bare_byte = 0; } else { /* ** Set the bare_byte flag negative. */ *bare_byte = 1; } } /* ** Increment the buffer. */ (*ptr)++; return iNorm;}/*** NAME** UTF8Decode::*//**** Decode the UTF-8 sequences and check for valid codepoints via the** Unicode standard and the IIS standard.** ** We decode up to 3 bytes of UTF-8 because that's all I've been able to** get to work on various servers, so let's reduce some false positives.** So we decode valid UTF-8 sequences and then check the value. If the** value is ASCII, then it's decoded to that. Otherwise, if iis_unicode** is turned on, we will check the unicode codemap for valid IIS mappings.** If a mapping turns up, then we return the mapped ASCII.** ** @param ServerConf the server configuration** @param start the start of the URI** @param end the end of the URI** @param ptr the current pointer into the URI** ** @return integer** ** @retval NON_ASCII_CHAR Reached end of buffer while decoding** @retval char return the decoded or badly decoded char** ** @see GetByte()** @see UnicodeDecode()*/static int UTF8Decode(HI_SESSION *Session, u_char *start, u_char *end, u_char **ptr, int iFirst){ HTTPINSPECT_CONF *ServerConf = Session->server_conf; int iBareByte; int iNorm; int iNumBytes; int iCtr; int iByte; /* ** Right now we support up to 3 byte unicode sequences. We can add ** more if any of the HTTP servers support more. */ if((iFirst & 0xe0) == 0xc0) { iNumBytes = 1; iNorm = iFirst & 0x1f; } else if((iFirst & 0xf0) == 0xe0) { iNumBytes = 2; iNorm = iFirst & 0x0f; } else { /*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -