findhash.c

来自「开放源码的编译器open watcom 1.6.0版的源代码」· C语言 代码 · 共 1,153 行 · 第 1/3 页

C
1,153
字号
/****************************************************************************
*
*                            Open Watcom Project
*
*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
*
*  ========================================================================
*
*    This file contains Original Code and/or Modifications of Original
*    Code as defined in and that are subject to the Sybase Open Watcom
*    Public License version 1.0 (the 'License'). You may not use this file
*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
*    provided with the Original Code and Modifications, and is also
*    available at www.sybase.com/developer/opensource.
*
*    The Original Code and all software distributed under the License are
*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
*    NON-INFRINGEMENT. Please see the License for the specific language
*    governing rights and limitations under the License.
*
*  ========================================================================
*
* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
*               DESCRIBE IT HERE!
*
****************************************************************************/


#include <assert.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>

typedef enum { FALSE = 0, TRUE = 1 } boolean;
typedef enum {
    LETTER_0 = 1,
    LETTER_1,
    LETTER_2,
    LETTER_3,
    LETTER_4,
    LETTER_5,
    LETTER_6,
    LETTER_7,
    LETTER_8,
    LETTER_9,
    LETTER_a,
    LETTER_b,
    LETTER_c,
    LETTER_d,
    LETTER_e,
    LETTER_f,
    LETTER_g,
    LETTER_h,
    LETTER_i,
    LETTER_j,
    LETTER_k,
    LETTER_l,
    LETTER_m,
    LETTER_n,
    LETTER_o,
    LETTER_p,
    LETTER_q,
    LETTER_r,
    LETTER_s,
    LETTER_t,
    LETTER_u,
    LETTER_v,
    LETTER_w,
    LETTER_x,
    LETTER_y,
    LETTER_z,
    LETTER_A,
    LETTER_B,
    LETTER_C,
    LETTER_D,
    LETTER_E,
    LETTER_F,
    LETTER_G,
    LETTER_H,
    LETTER_I,
    LETTER_J,
    LETTER_K,
    LETTER_L,
    LETTER_M,
    LETTER_N,
    LETTER_O,
    LETTER_P,
    LETTER_Q,
    LETTER_R,
    LETTER_S,
    LETTER_T,
    LETTER_U,
    LETTER_V,
    LETTER_W,
    LETTER_X,
    LETTER_Y,
    LETTER_Z,
    LETTER_NULLCHAR,
    LETTER_DOLLAR,
    LETTER_BACK_QUOTE,
    LETTER__,
    LETTER_UNKNOWN,

    LETTER_MIN = LETTER_0,
    LETTER_MAX = LETTER_UNKNOWN,
} letter_t;
typedef unsigned keyword_t;

#define CHAR_UNKNOWN    '#'
#define NULL_KEYWORD    0

#define MAX_KEYWORDS    200
#define MAX_HASHSIZE    (2*MAX_KEYWORDS)
#define MAX_WEIGHTS     256
#define BAD_THRESHOLD   5

char *tokens[ MAX_KEYWORDS+1 ];
char *token_class[ MAX_KEYWORDS+1 ];
unsigned position[ MAX_KEYWORDS+1 ];
letter_t first[ MAX_KEYWORDS+1 ];
letter_t last[ MAX_KEYWORDS+1 ];
size_t len[ MAX_KEYWORDS+1 ];
size_t init_hash[ MAX_KEYWORDS+1 ];
unsigned done[ MAX_KEYWORDS+1 ];
unsigned hash[ MAX_KEYWORDS+1 ];
unsigned ordered[ MAX_KEYWORDS+1 ];
unsigned long collisions[ MAX_KEYWORDS+1 ];

keyword_t used[ MAX_HASHSIZE ];
keyword_t partial[ MAX_HASHSIZE ];

unsigned freq[ LETTER_MAX+1 ];
unsigned weights[ LETTER_MAX+1 ];
letter_t next[ LETTER_MAX+1 ];
letter_t prev[ LETTER_MAX+1 ];

boolean available[ MAX_WEIGHTS+1 ];

letter_t most_used_character;
unsigned num_keywords;
unsigned hashsize;
unsigned hashmask;
unsigned first_scale;
unsigned last_scale;
size_t min_len;
size_t max_len;
unsigned extra;
unsigned long len_mask;

struct {
    unsigned    quiet : 1;      /* no console output req'd */
    unsigned    imperfect : 1;  /* non-minimal perfect hash function allowed */
    unsigned    mask_hash : 1;  /* use mod 2^n */
    unsigned    tiny_output : 1;/* output .gh file for small lists */
    unsigned    align : 1;      /* output strings length mod 4 */
} flags;

FILE *outfile;

void error( char *msg )
/*********************/
{
    fputs( msg, stderr );
    fputc( '\n', stderr );
}

void fatal( char *msg )
/*********************/
{
    error( msg );
    exit( EXIT_FAILURE );
}

void output( char *msg, ... )
/***************************/
{
    va_list args;

    if( flags.quiet ) {
        return;
    }
    va_start( args, msg );
    vprintf( msg, args );
    va_end( args );
}

letter_t make_letter( char c )
/****************************/
{
    if( c == '\0' ) {
        return( LETTER_NULLCHAR );
    }
    if( c == '$' ) {
        return( LETTER_DOLLAR );
    }
    if( c == '`' ) {
        return( LETTER_BACK_QUOTE );
    }
    if( c == '_' ) {
        return( LETTER__ );
    }
    if( isdigit( c ) ) {
        return( ( c - '0' ) + LETTER_0 );
    }
    if( c >= 'a' && c <= 'i' ) {
        return( ( c - 'a' ) + LETTER_a );
    }
    if( c >= 'j' && c <= 'r' ) {
        return( ( c - 'j' ) + LETTER_j );
    }
    if( c >= 's' && c <= 'z' ) {
        return( ( c - 's' ) + LETTER_s );
    }
    if( c >= 'A' && c <= 'I' ) {
        return( ( c - 'A' ) + LETTER_A );
    }
    if( c >= 'J' && c <= 'R' ) {
        return( ( c - 'J' ) + LETTER_J );
    }
    if( c >= 'S' && c <= 'Z' ) {
        return( ( c - 'S' ) + LETTER_S );
    }
    return( LETTER_UNKNOWN );
}

char make_char( letter_t i )
/**************************/
{
    if( i == LETTER_NULLCHAR ) {
        return( '\0' );
    }
    if( i == LETTER__ ) {
        return( '_' );
    }
    if( i == LETTER_DOLLAR ) {
        return( '$' );
    }
    if( i == LETTER_BACK_QUOTE ) {
        return( '`' );
    }
    if( i >= LETTER_0 && i <= LETTER_9 ) {
        return( ( i - LETTER_0 ) + '0' );
    }
    if( i >= LETTER_a && i <= LETTER_i ) {
        return( ( i - LETTER_a ) + 'a' );
    }
    if( i >= LETTER_j && i <= LETTER_r ) {
        return( ( i - LETTER_j ) + 'j' );
    }
    if( i >= LETTER_s && i <= LETTER_z ) {
        return( ( i - LETTER_s ) + 's' );
    }
    if( i >= LETTER_A && i <= LETTER_I ) {
        return( ( i - LETTER_A ) + 'A' );
    }
    if( i >= LETTER_J && i <= LETTER_R ) {
        return( ( i - LETTER_J ) + 'J' );
    }
    if( i >= LETTER_S && i <= LETTER_Z ) {
        return( ( i - LETTER_S ) + 'S' );
    }
    return( CHAR_UNKNOWN );
}

char *make_define( letter_t i )
/**************************/
{
    static char define_string[2];

    if( i == LETTER_NULLCHAR ) {
        return( "00h" );
    }
    if( i == LETTER_DOLLAR ) {
        return( "DOLLAR" );
    }
    if( i == LETTER_BACK_QUOTE ) {
        return( "BACK_QUOTE" );
    }
    if( i == LETTER__ ) {
        define_string[0] = '_';
    } else if( i >= LETTER_0 && i <= LETTER_9 ) {
        define_string[0] = ( ( i - LETTER_0 ) + '0' );
    } else if( i >= LETTER_a && i <= LETTER_i ) {
        define_string[0] = ( ( i - LETTER_a ) + 'a' );
    } else if( i >= LETTER_j && i <= LETTER_r ) {
        define_string[0] = ( ( i - LETTER_j ) + 'j' );
    } else if( i >= LETTER_s && i <= LETTER_z ) {
        define_string[0] = ( ( i - LETTER_s ) + 's' );
    } else if( i >= LETTER_A && i <= LETTER_I ) {
        define_string[0] = ( ( i - LETTER_A ) + 'A' );
    } else if( i >= LETTER_J && i <= LETTER_R ) {
        define_string[0] = ( ( i - LETTER_J ) + 'J' );
    } else if( i >= LETTER_S && i <= LETTER_Z ) {
        define_string[0] = ( ( i - LETTER_S ) + 'S' );
    } else {
        return( "UNKNOWN" );
    }
    define_string[1] = '\0';
    return define_string;
}

int cmptok( const void *_v1, const void *_v2 )
/****************************************************/
{
    const keyword_t *v1 = _v1;
    const keyword_t *v2 = _v2;

    return( strcmp( tokens[*v1], tokens[*v2] ) );
}

void swap( char **v1, char **v2 )
/*******************************/
{
    char *tmp;

    tmp = *v1;
    *v1 = *v2;
    *v2 = tmp;
}

void init_tokens( char **input_file )
/***********************************/
{
    int c;
    char *check;
    keyword_t i,j,k;
    keyword_t size;
    unsigned col;
    size_t key_len;
    size_t tok_len;
    FILE *fp;
    auto char keyword[80];
    auto char class[80];

    tokens[0] = "";
    min_len = 80;
    max_len = 0;
    size = 1;
    for( ; *input_file != NULL; ++input_file ) {
        check = *input_file;
        if( check[0] == '/' || check[0] == '-' ) {
            if( tolower( check[1] ) == 'q' && check[2] == '\0' ) {
                flags.quiet = TRUE;
                continue;
            }
            if( tolower( check[1] ) == 'i' && check[2] == '\0' ) {
                flags.imperfect = TRUE;
                continue;
            }
            if( tolower( check[1] ) == 'm' && check[2] == '\0' ) {
                flags.mask_hash = TRUE;
                continue;
            }
            if( tolower( check[1] ) == 't' && check[2] == '\0' ) {
                flags.tiny_output = TRUE;
                continue;
            }
            if( tolower( check[1] ) == 'a' && check[2] == '\0' ) {
                flags.align = TRUE;
                continue;
            }
        }
        fp = fopen( *input_file, "r" );
        if( fp == NULL ) {
            fatal( "cannot open keyword file" );
        }
        for(;;) {
            c = fgetc( fp );
            if( c == '#' ) {
                for(;;) {
                    c = fgetc( fp );
                    if( ( c == EOF ) || ( c == '\n' ) ) {
                        break;
                    }
                }
                continue;
            }
            ungetc( c, fp );

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?