findhash.c
来自「开放源码的编译器open watcom 1.6.0版的源代码」· C语言 代码 · 共 1,153 行 · 第 1/3 页
C
1,153 行
/****************************************************************************
*
* Open Watcom Project
*
* Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
*
* ========================================================================
*
* This file contains Original Code and/or Modifications of Original
* Code as defined in and that are subject to the Sybase Open Watcom
* Public License version 1.0 (the 'License'). You may not use this file
* except in compliance with the License. BY USING THIS FILE YOU AGREE TO
* ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
* provided with the Original Code and Modifications, and is also
* available at www.sybase.com/developer/opensource.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
* NON-INFRINGEMENT. Please see the License for the specific language
* governing rights and limitations under the License.
*
* ========================================================================
*
* Description: WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
* DESCRIBE IT HERE!
*
****************************************************************************/
#include <assert.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>
typedef enum { FALSE = 0, TRUE = 1 } boolean;
typedef enum {
LETTER_0 = 1,
LETTER_1,
LETTER_2,
LETTER_3,
LETTER_4,
LETTER_5,
LETTER_6,
LETTER_7,
LETTER_8,
LETTER_9,
LETTER_a,
LETTER_b,
LETTER_c,
LETTER_d,
LETTER_e,
LETTER_f,
LETTER_g,
LETTER_h,
LETTER_i,
LETTER_j,
LETTER_k,
LETTER_l,
LETTER_m,
LETTER_n,
LETTER_o,
LETTER_p,
LETTER_q,
LETTER_r,
LETTER_s,
LETTER_t,
LETTER_u,
LETTER_v,
LETTER_w,
LETTER_x,
LETTER_y,
LETTER_z,
LETTER_A,
LETTER_B,
LETTER_C,
LETTER_D,
LETTER_E,
LETTER_F,
LETTER_G,
LETTER_H,
LETTER_I,
LETTER_J,
LETTER_K,
LETTER_L,
LETTER_M,
LETTER_N,
LETTER_O,
LETTER_P,
LETTER_Q,
LETTER_R,
LETTER_S,
LETTER_T,
LETTER_U,
LETTER_V,
LETTER_W,
LETTER_X,
LETTER_Y,
LETTER_Z,
LETTER_NULLCHAR,
LETTER_DOLLAR,
LETTER_BACK_QUOTE,
LETTER__,
LETTER_UNKNOWN,
LETTER_MIN = LETTER_0,
LETTER_MAX = LETTER_UNKNOWN,
} letter_t;
typedef unsigned keyword_t;
#define CHAR_UNKNOWN '#'
#define NULL_KEYWORD 0
#define MAX_KEYWORDS 200
#define MAX_HASHSIZE (2*MAX_KEYWORDS)
#define MAX_WEIGHTS 256
#define BAD_THRESHOLD 5
char *tokens[ MAX_KEYWORDS+1 ];
char *token_class[ MAX_KEYWORDS+1 ];
unsigned position[ MAX_KEYWORDS+1 ];
letter_t first[ MAX_KEYWORDS+1 ];
letter_t last[ MAX_KEYWORDS+1 ];
size_t len[ MAX_KEYWORDS+1 ];
size_t init_hash[ MAX_KEYWORDS+1 ];
unsigned done[ MAX_KEYWORDS+1 ];
unsigned hash[ MAX_KEYWORDS+1 ];
unsigned ordered[ MAX_KEYWORDS+1 ];
unsigned long collisions[ MAX_KEYWORDS+1 ];
keyword_t used[ MAX_HASHSIZE ];
keyword_t partial[ MAX_HASHSIZE ];
unsigned freq[ LETTER_MAX+1 ];
unsigned weights[ LETTER_MAX+1 ];
letter_t next[ LETTER_MAX+1 ];
letter_t prev[ LETTER_MAX+1 ];
boolean available[ MAX_WEIGHTS+1 ];
letter_t most_used_character;
unsigned num_keywords;
unsigned hashsize;
unsigned hashmask;
unsigned first_scale;
unsigned last_scale;
size_t min_len;
size_t max_len;
unsigned extra;
unsigned long len_mask;
struct {
unsigned quiet : 1; /* no console output req'd */
unsigned imperfect : 1; /* non-minimal perfect hash function allowed */
unsigned mask_hash : 1; /* use mod 2^n */
unsigned tiny_output : 1;/* output .gh file for small lists */
unsigned align : 1; /* output strings length mod 4 */
} flags;
FILE *outfile;
void error( char *msg )
/*********************/
{
fputs( msg, stderr );
fputc( '\n', stderr );
}
void fatal( char *msg )
/*********************/
{
error( msg );
exit( EXIT_FAILURE );
}
void output( char *msg, ... )
/***************************/
{
va_list args;
if( flags.quiet ) {
return;
}
va_start( args, msg );
vprintf( msg, args );
va_end( args );
}
letter_t make_letter( char c )
/****************************/
{
if( c == '\0' ) {
return( LETTER_NULLCHAR );
}
if( c == '$' ) {
return( LETTER_DOLLAR );
}
if( c == '`' ) {
return( LETTER_BACK_QUOTE );
}
if( c == '_' ) {
return( LETTER__ );
}
if( isdigit( c ) ) {
return( ( c - '0' ) + LETTER_0 );
}
if( c >= 'a' && c <= 'i' ) {
return( ( c - 'a' ) + LETTER_a );
}
if( c >= 'j' && c <= 'r' ) {
return( ( c - 'j' ) + LETTER_j );
}
if( c >= 's' && c <= 'z' ) {
return( ( c - 's' ) + LETTER_s );
}
if( c >= 'A' && c <= 'I' ) {
return( ( c - 'A' ) + LETTER_A );
}
if( c >= 'J' && c <= 'R' ) {
return( ( c - 'J' ) + LETTER_J );
}
if( c >= 'S' && c <= 'Z' ) {
return( ( c - 'S' ) + LETTER_S );
}
return( LETTER_UNKNOWN );
}
char make_char( letter_t i )
/**************************/
{
if( i == LETTER_NULLCHAR ) {
return( '\0' );
}
if( i == LETTER__ ) {
return( '_' );
}
if( i == LETTER_DOLLAR ) {
return( '$' );
}
if( i == LETTER_BACK_QUOTE ) {
return( '`' );
}
if( i >= LETTER_0 && i <= LETTER_9 ) {
return( ( i - LETTER_0 ) + '0' );
}
if( i >= LETTER_a && i <= LETTER_i ) {
return( ( i - LETTER_a ) + 'a' );
}
if( i >= LETTER_j && i <= LETTER_r ) {
return( ( i - LETTER_j ) + 'j' );
}
if( i >= LETTER_s && i <= LETTER_z ) {
return( ( i - LETTER_s ) + 's' );
}
if( i >= LETTER_A && i <= LETTER_I ) {
return( ( i - LETTER_A ) + 'A' );
}
if( i >= LETTER_J && i <= LETTER_R ) {
return( ( i - LETTER_J ) + 'J' );
}
if( i >= LETTER_S && i <= LETTER_Z ) {
return( ( i - LETTER_S ) + 'S' );
}
return( CHAR_UNKNOWN );
}
char *make_define( letter_t i )
/**************************/
{
static char define_string[2];
if( i == LETTER_NULLCHAR ) {
return( "00h" );
}
if( i == LETTER_DOLLAR ) {
return( "DOLLAR" );
}
if( i == LETTER_BACK_QUOTE ) {
return( "BACK_QUOTE" );
}
if( i == LETTER__ ) {
define_string[0] = '_';
} else if( i >= LETTER_0 && i <= LETTER_9 ) {
define_string[0] = ( ( i - LETTER_0 ) + '0' );
} else if( i >= LETTER_a && i <= LETTER_i ) {
define_string[0] = ( ( i - LETTER_a ) + 'a' );
} else if( i >= LETTER_j && i <= LETTER_r ) {
define_string[0] = ( ( i - LETTER_j ) + 'j' );
} else if( i >= LETTER_s && i <= LETTER_z ) {
define_string[0] = ( ( i - LETTER_s ) + 's' );
} else if( i >= LETTER_A && i <= LETTER_I ) {
define_string[0] = ( ( i - LETTER_A ) + 'A' );
} else if( i >= LETTER_J && i <= LETTER_R ) {
define_string[0] = ( ( i - LETTER_J ) + 'J' );
} else if( i >= LETTER_S && i <= LETTER_Z ) {
define_string[0] = ( ( i - LETTER_S ) + 'S' );
} else {
return( "UNKNOWN" );
}
define_string[1] = '\0';
return define_string;
}
int cmptok( const void *_v1, const void *_v2 )
/****************************************************/
{
const keyword_t *v1 = _v1;
const keyword_t *v2 = _v2;
return( strcmp( tokens[*v1], tokens[*v2] ) );
}
void swap( char **v1, char **v2 )
/*******************************/
{
char *tmp;
tmp = *v1;
*v1 = *v2;
*v2 = tmp;
}
void init_tokens( char **input_file )
/***********************************/
{
int c;
char *check;
keyword_t i,j,k;
keyword_t size;
unsigned col;
size_t key_len;
size_t tok_len;
FILE *fp;
auto char keyword[80];
auto char class[80];
tokens[0] = "";
min_len = 80;
max_len = 0;
size = 1;
for( ; *input_file != NULL; ++input_file ) {
check = *input_file;
if( check[0] == '/' || check[0] == '-' ) {
if( tolower( check[1] ) == 'q' && check[2] == '\0' ) {
flags.quiet = TRUE;
continue;
}
if( tolower( check[1] ) == 'i' && check[2] == '\0' ) {
flags.imperfect = TRUE;
continue;
}
if( tolower( check[1] ) == 'm' && check[2] == '\0' ) {
flags.mask_hash = TRUE;
continue;
}
if( tolower( check[1] ) == 't' && check[2] == '\0' ) {
flags.tiny_output = TRUE;
continue;
}
if( tolower( check[1] ) == 'a' && check[2] == '\0' ) {
flags.align = TRUE;
continue;
}
}
fp = fopen( *input_file, "r" );
if( fp == NULL ) {
fatal( "cannot open keyword file" );
}
for(;;) {
c = fgetc( fp );
if( c == '#' ) {
for(;;) {
c = fgetc( fp );
if( ( c == EOF ) || ( c == '\n' ) ) {
break;
}
}
continue;
}
ungetc( c, fp );
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?