⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 compact.cc

📁 早期freebsd实现
💻 CC
字号:
/* Compact a sparse 2-D matrix.  Uses the Tarjan and Yao algorithm   taken from the article ``Storing a Sparse Table'' in CACM, 1979.   Copyright (C) 1989 Free Software Foundation, Inc.   written by Douglas C. Schmidt (schmidt@ics.uci.edu)      This file is part of GNU TRIE-GEN.      GNU TRIE-GEN is free software; you can redistribute it and/or modify   it under the terms of the GNU General Public License as published by   the Free Software Foundation; either version 1, or (at your option)   any later version.      GNU TRIE-GEN is distributed in the hope that it will be useful,   but WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the   GNU General Public License for more details.      You should have received a copy of the GNU General Public License   along with GNU trie-gen; see the file COPYING.  If not, write to   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */#include <stdio.h>#include <builtin.h>#include <new.h>#include <std.h>#include <assert.h>#include "compact.h"/* Essentially provides the functionality of `calloc.' */static inline void *operator new (size_t elem_size, int size){  void *temp = new char[elem_size * size];  memset (temp, 0, elem_size * size);  return temp;}/* Essentially combines the functionality of `realloc' and `calloc'. */static inline void *operator new (size_t elem_size, void *old_ptr, int old_size, int new_size){ #ifdef _G_OLD_PLACEMENT  old_ptr = new {old_ptr, new_size * elem_size} char;#else  old_ptr = new (old_ptr, new_size * elem_size) char;#endif  memset (old_ptr + old_size * elem_size, 0, (new_size - old_size) * elem_size);  return old_ptr;}/* Initializes the internal form in the case that the user passes   in a pointer to an already existing 2-dimensional matrix.  Note   that by declaring the matrix to be a 1-dimensional array we   can perform the col and row offset calculations ourselves and   handle matrices with fixed, but arbitrary-sized columns and rows. */Compact_Matrix::Compact_Matrix (ITEM_TYPE *mat, int rows, int cols):     matrix (mat), max_rows (rows), total_cols (cols), current_rows (rows){  init (rows);  for (int i = 0; i < max_rows; i++)    for (int j = 0; j < total_cols; j++)      if (matrix[i * total_cols + j] != 0)        {          ITEM_TYPE value = matrix[i * total_cols + j];          total_entries++;          row_vec[i].count++;          row_vec[i].col_list = new Column_Node (row_vec[i].col_list, j, value);        }}/* Initializer for the case where we don't have a previously created   matrix to play with.  DEFAULT_ROWS represents the best first   approximation as to the number of rows in the matrix.  However, this   buffer is resized as needed, so the algorithm isn't overly penalized   for a bad first guess. */Compact_Matrix::Compact_Matrix (int default_rows): max_rows (default_rows){  current_rows = 0;  total_cols = 0;  matrix = 0;  init (default_rows);}void Compact_Matrix::init(int rows){  max_col_count = 0;  total_entries = 0;  compressed_len = -1;  row_offsets = 0;  checks = 0;  bucket_vec = 0;  values = 0;  row_vec = rows ? new Row_Node[rows] : 0;}/* Returns the `matrix[i][j]' item in the sparse 2-dimensional matrix.   Note that if the matrix is very sparse the number of items in each   COL_LIST will be very short, hence linear search is not too inefficent. */ITEM_TYPE Compact_Matrix::operator () (int i, int j){  assert (i >= 0 && i < current_rows && j >= 0);    for (Column_Node *col_list = row_vec[i].col_list; col_list; col_list = col_list->next)    if (col_list->index == j)      return col_list->value;  return 0;}/* Sets `matrix[i][j]' to VALUE.  ROW_VEC is dynamically resized, if   necessary.  At the moment the new entry is simply pushed onto the   linked list of COL_LIST entries.  If there aren't many elements then   this will not be too inefficient for later retrieval. */voidCompact_Matrix::operator () (int i, int j, ITEM_TYPE value){  if (i >= max_rows)    resize ((max_rows * 2) >? (i+1));  row_vec[i].col_list = new Column_Node (row_vec[i].col_list, j, value);  total_entries++;  row_vec[i].count++;  current_rows >?= i + 1;}/* Enlarges the ROW_VEC from CURR_ROWS to NEW_SIZE. */void Compact_Matrix::resize (int new_size){  Row_Node *temp = new Row_Node[max_rows = new_size];    memcpy ((void *) temp, (void *) row_vec, current_rows * sizeof *row_vec);  delete row_vec;  row_vec = temp;}/* Calls the functions that compact the table and generate the resulting   lookup scheme. */voidCompact_Matrix::output (void){  bucket_sort ();  first_fit_decreasing ();  output_arrays ();  output_lookup ();}/* Performs a bucket sort so that all rows with the same number of non-null   entries are treated as part of the same equivalence class.  This   operation is very fast! */void Compact_Matrix::bucket_sort (void){  for (int i = 0; i < current_rows; i++)    max_col_count >?= row_vec[i].count;  #ifdef _G_OLD_PLACEMENT  bucket_vec = new {max_col_count + 1} Column_Node *;#else  bucket_vec = new (max_col_count + 1) Column_Node *;#endif  for (i = 0; i < current_rows; i++)    bucket_vec[row_vec[i].count] = new Column_Node (bucket_vec[row_vec[i].count], i, 0);}/* Useful macros to clarify subsequent code.  They should probably be made   into member functions... */#define STARTING_ROW_OFFSET(X) (row_offsets[(X)])#define LARGEST_COL_VALUE(X) (row_vec[(X)].col_list->index)#define COL_LIST(X) (row_vec[(X)].col_list)#define COL_COUNT(X) (row_vec[(X)].count)#define COL_INDEX(X) ((X)->index)#define ROW_INDEX(X) ((X)->index)/* Performs sparse 2-dimensional array compaction suitable for use with the   `double-offset indexing' (used by Bison and FLEX to compact the size of   the sparse LR parsing tables and DFA's).  This function implements the   `first fit decreasing' heuristic described in Tarjan and Yao's paper    ``Storing a Sparse Table'' in CACM, 1979. */void Compact_Matrix::first_fit_decreasing (void){  /* Bit-vector and counter that records if a row/col location is already set. */  int    current_max = current_rows + (total_cols >? MAX_ASCII_RANGE);  char *already_assigned = (char *) malloc (current_max);    memset (already_assigned, 0, current_max);#ifdef _G_OLD_PLACEMENT  row_offsets = new {current_rows} int;  values      = new {current_max} ITEM_TYPE;  checks      = new {current_max} int;#else  row_offsets = new (current_rows) int;  values      = new (current_max) ITEM_TYPE;  checks      = new (current_max) int;#endif  for (int row_index = max_col_count; row_index >= 0; row_index--)    if (bucket_vec[row_index])      /* Process every row in the `equivalence class' of rows containing the         same number of non-null column entries. */      for (Column_Node *rows_list = bucket_vec[row_index]; rows_list; rows_list = rows_list->next)        {          int row        = ROW_INDEX (rows_list);          int row_offset = STARTING_ROW_OFFSET (row);                    /* Process every column index in the current row. */          for (Column_Node *col_list = COL_LIST (row); ;)            {              int col = COL_INDEX (col_list);              /* If we exceed the boundaries it's time to resize various buffers. */              if (row_offset + col >= current_max)                {                  int   new_size = (current_max >? row_offset + col) * 2;                  already_assigned =		      (char *) realloc (already_assigned, new_size);		  memset (already_assigned + current_max, 0,			  new_size - current_max);#ifdef _G_OLD_PLACEMENT                  values = new {values, current_max, new_size} int;                  checks = new {checks, current_max, new_size} int;#else                  values = new (values, current_max, new_size) int;                  checks = new (checks, current_max, new_size) int;#endif                  current_max *= 2;                }              if (already_assigned[row_offset + col])                {                  /* Efficiency hack to skip over obvious collisions. */                  while (++row_offset + col < current_max && already_assigned[row_offset + col])                     ;                  /* Reset col_list and begin again (with new row offset). */                  col_list = COL_LIST (row);                }              else if ((col_list = col_list->next) == 0)                break;            }          /* No more collisions exist.  Record the positions for the next round. */          for (col_list = COL_LIST (row); col_list; col_list = col_list->next)            {              int offset = row_offset + COL_INDEX (col_list);              already_assigned[offset] = 1;              compressed_len >?= offset;              values[offset] = col_list->value;              checks[offset] = row;            }          /* Need to reset this once all is said and done! */          STARTING_ROW_OFFSET (row) = row_offset;        }  free(already_assigned);}/* Generates the three arrays that comprise the `double-offset index'   scheme.  This is a bit messy, since I'm trying to neatly format   the generated tables and also determine the smallest (in bytes)   type declarations necessary to represent the elements of the tables. */void Compact_Matrix::output_arrays (){  const int COL_WIDTH = 12;        int max_number = 0;        int count;        printf ("#ifndef __STDC__\n#define const\n#endif\n\n");  printf ("#define YY_LAST %d\n", compressed_len);        for (int i = 0; i < current_rows; i++)    max_number >?= row_offsets[i];  count = max_number;  for (int field_width = 1; (count /= 10) > 0; field_width++)    ;  printf ("\nstatic const unsigned %s yy_rows[%d] = \n{\n  ",          max_number < MAX_UNSIGNED_CHAR ? "char" : max_number < MAX_UNSIGNED_SHORT ? "short" : "int",          current_rows);        for (i = 0; i < current_rows; )    printf ("%*d,%s", field_width, row_offsets[i], ++i % COL_WIDTH ? " " : "\n  ");        max_number = 0;  for (i = 0; i < compressed_len + 1; i++)    max_number >?= checks[i];  count = max_number;  for (field_width = 1; (count /= 10) > 0; field_width++)    ;  printf ("\n};\n\nstatic const unsigned %s yy_check[%d] = \n{\n  ",          max_number < MAX_UNSIGNED_CHAR ? "char" : max_number < MAX_UNSIGNED_SHORT ? "short" : "int",          compressed_len + 1);        for (i = 0; i < compressed_len + 1; )    printf ("%*d,%s", field_width, checks[i], ++i % COL_WIDTH ? " " : "\n  ");        max_number = 0;  for (i = 0; i < compressed_len + 1; i++)    max_number >?= abs (values[i]);  count = max_number;  for (field_width = 2; (count /= 10) > 0; field_width++)    ;  printf ("\n};\n\nstatic const %s yy_next[%d] = \n{\n  ",          max_number < MAX_SIGNED_CHAR ? "char" : max_number < MAX_SIGNED_SHORT ? "short" : "int",          compressed_len + 1);        for (i = 0; i <= compressed_len; )    printf ("%*d,%s", field_width, values[i], ++i % COL_WIDTH ? " " : "\n  ");        printf ("\n};\n\n");}/* Generates the `double-offset index' function, that provides the   value stored in the location referenced by parameters ROW and COL. */void Compact_Matrix::output_lookup (void){  printf ("static inline int\nnext_state "          "(int row, int col)\n{\n  int state_index = yy_rows[row] + col;\n\n"          "  if (state_index > YY_LAST || yy_check[state_index] != row)\n    "          "return 0;\n  else\n    return yy_next[state_index];\n}\n");}/* Useful debugging routine. */voidCompact_Matrix::dump_entries (void){  for (int i = 0; i < current_rows; i++)    if (row_vec[i].col_list)      {        Column_Node *temp = row_vec[i].col_list;              fprintf (stderr, "row %d's count = %d, cols = ", i, row_vec[i].count);        do { fprintf (stderr, "%d ", temp->index); } while (temp = temp->next);        putc ('\n', stderr);      }}/* Useful debugging routine. */void Compact_Matrix::dump_bucket (void){  for (int i = 0; i < total_entries; i++)    if (bucket_vec[i])      {        Column_Node *temp = bucket_vec[i];                fprintf (stderr, "bucket %d = ", i);        do { fprintf (stderr, "%d ", temp->index); } while (temp = temp->next);        putc ('\n', stderr);      }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -