⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gen_perf.cpp

📁 最新的版本ACE-5.6.8,刚从外文网上搬下,与大家分享.
💻 CPP
字号:
// -*- C++ -*-

// $Id: Gen_Perf.cpp 82513 2008-08-05 18:52:53Z parsons $

// Copyright (C) 1989 Free Software Foundation, Inc.
// written by Douglas C. Schmidt (schmidt@cs.wustl.edu)

// This file is part of GNU GPERF.

// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

#include "Gen_Perf.h"

ACE_RCSID(src, Gen_Perf, "$Id: Gen_Perf.cpp 82513 2008-08-05 18:52:53Z parsons $")

#if defined (ACE_HAS_GPERF)

#include "Vectors.h"
#include "ace/OS_NS_stdlib.h"
#include "ace/OS_NS_time.h"
#include "ace/OS_NS_stdio.h"
#include "ace/OS_Memory.h"

// Current release version.
extern const char *version_string;

// Reads input keys, possibly applies the reordering heuristic, sets
// the maximum associated value size (rounded up to the nearest power
// of 2), may initialize the associated values array, and determines
// the maximum hash table size.  Note: using the random numbers is
// often helpful, though not as deterministic, of course!

Gen_Perf::Gen_Perf (void)
  : fewest_collisions (0),
    num_done (1)
{
}

// Merge two disjoint hash key multisets to form the ordered disjoint
// union of the sets.  (In a multiset, an element can occur multiple
// times).  Precondition: both set1 and set2 must be
// ordered. Returns the length of the combined set.

int
Gen_Perf::compute_disjoint_union (char *set1, char *set2, char *set3)
{
  char *base = set3;

  while (*set1 && *set2)
    if (*set1 == *set2)
      set1++, set2++;
    else
      {
        *set3 = *set1 < *set2 ? *set1++ : *set2++;
        if (set3 == base || *set3 != *(set3 - 1))
          set3++;
      }

  while (*set1)
    {
      *set3 = *set1++;
      if (set3 == base || *set3 != *(set3 - 1))
        set3++;
    }

  while (*set2)
    {
      *set3 = *set2++;
      if (set3 == base || *set3 != *(set3 - 1))
        set3++;
    }
  *set3 = '\0';
  return set3 - base;
}

// Sort the UNION_SET in increasing frequency of occurrence.  This
// speeds up later processing since we may assume the resulting set
// (Set_3, in this case), is ordered. Uses insertion sort, since the
// UNION_SET is typically short.

void
Gen_Perf::sort_set (char *union_set, int len)
{
  for (int i = 0, j = len - 1; i < j; i++)
    {
      int curr, tmp;

      for (curr = i + 1, tmp = (int) union_set[curr];
           curr > 0
           && Vectors::occurrences[tmp] < Vectors::occurrences[(int) union_set[curr - 1]];
           curr--)
        union_set[curr] = union_set[curr - 1];

      union_set[curr] = static_cast<char> (tmp);
    }
}

// Generate a keysig's hash value.

int
Gen_Perf::hash (List_Node *key_node)
{
  int sum = option[NOLENGTH] ? 0 : key_node->length;

  for (char *ptr = key_node->keysig; *ptr; ptr++)
      sum += Vectors::asso_values[(int) *ptr];

  key_node->hash_value = sum;
  return sum;
}

// Find out how character value change affects successfully hash
// items.  Returns FALSE if no other hash values are affected, else
// returns TRUE.  Note that because Option.Get_Asso_Max is a power of
// two we can guarantee that all legal Vectors::Asso_Values are
// visited without repetition since Option.Get_Jump was forced to be
// an odd value!

inline int
Gen_Perf::affects_prev (char c, List_Node *curr)
{
  int original_char = Vectors::asso_values[(int) c];
  int total_iterations;

  if (!option[FAST])
    total_iterations = option.asso_max ();
  else
    {
      total_iterations = option.iterations ();

      if (total_iterations == 0)
        total_iterations = this->key_list.keyword_list_length ();
    }

  // Try all legal associated values.

  for (int i = total_iterations - 1; i >= 0; --i)
    {
      int collisions = 0;

      Vectors::asso_values[(int) c] =
        (Vectors::asso_values[(int) c]
         + (option.jump () ? option.jump () : ACE_OS::rand ()))
        & (option.asso_max () - 1);

      // Iteration Number array is a win, O(1) intialization time!
      this->char_search.reset ();

      // See how this asso_value change affects previous keywords.  If
      // it does better than before we'll take it!

      for (List_Node *ptr = this->key_list.head;
           this->char_search.find (this->hash (ptr)) == 0
           || ++collisions < fewest_collisions;
           ptr = ptr->next)
        {
          if (ptr == curr)
            {
              fewest_collisions = collisions;
              
              if (option[DEBUGGING])
                {
                  ACE_DEBUG ((LM_DEBUG,
                              "- resolved after %d iterations",
                              total_iterations - i));
                }
                
              return 0;
            }
        }
    }

  // Restore original values, no more tries.
  Vectors::asso_values[(int) c] = original_char;
  // If we're this far it's time to try the next character....
  return 1;
}

// Change a character value, try least-used characters first.

int
Gen_Perf::change (List_Node *prior, List_Node *curr)
{
  if (option[DEBUGGING])
    ACE_DEBUG ((LM_DEBUG,
                "collision on keyword #%d, prior = \"%s\", curr = \"%s\" hash = %d\n",
                num_done,
                prior->key,
                curr->key,
                curr->hash_value));
  Gen_Perf::sort_set (this->union_set,
                      compute_disjoint_union (prior->keysig,
                                              curr->keysig,
                                              this->union_set));

  // Try changing some values, if change doesn't alter other values
  // continue normal action.
  fewest_collisions++;

  for (char *temp = union_set; *temp != '\0'; temp++)
    if (affects_prev (*temp, curr) == 0)
      {
        if (option[DEBUGGING])
          ACE_DEBUG ((LM_DEBUG,
                      " by changing asso_value['%c'] (char #%d) to %d\n",
                      *temp,
                      temp - union_set + 1,
                      Vectors::asso_values[(int) *temp]));
        // Good, doesn't affect previous hash values, we'll take it.
        return 0;
      }

  for (List_Node *ptr = this->key_list.head;
       ptr != curr;
       ptr = ptr->next)
    this->hash (ptr);

  this->hash (curr);

  if (option[DEBUGGING])
    ACE_DEBUG ((LM_DEBUG,
                "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
               !option[FAST] ? option.asso_max () : option.iterations () ? option.iterations () : this->key_list.keyword_list_length (),
                fewest_collisions + this->key_list.total_duplicates));
  return 0;
}

int
Gen_Perf::open (void)
{
  if (this->key_list.read_keys () == -1)
    return -1;

  if (option[ORDER])
    this->key_list.reorder ();

  int asso_value_max = option.asso_max ();
  int non_linked_length = this->key_list.keyword_list_length ();

  if (asso_value_max == 0)
    asso_value_max = non_linked_length;
  else if (asso_value_max > 0)
    asso_value_max *= non_linked_length;
  else // if (asso_value_max < 0)
    asso_value_max = non_linked_length / -asso_value_max;

  option.asso_max (ACE_POW (asso_value_max));

  if (option[RANDOM])
    {
      ACE_OS::srand ((u_int) ACE_OS::time (0));

      for (int i = 0; i < ACE_STANDARD_CHARACTER_SET_SIZE; ++i)
        {
          Vectors::asso_values[i] = (ACE_OS::rand () & (asso_value_max - 1));
        }
    }
  else
    {
      int asso_value = option.initial_value ();

      // Initialize array if user requests non-zero default.
      if (asso_value)
        {
          for (int i = ACE_STANDARD_CHARACTER_SET_SIZE - 1; i >= 0; --i)
            {
              Vectors::asso_values[i] = asso_value & (option.asso_max () - 1);
            }
        }
    }

  this->max_hash_value =
    this->key_list.max_key_length ()
    + option.asso_max ()
    * option.max_keysig_size ();

  ACE_NEW_RETURN (this->union_set,
                  char[2 * option.max_keysig_size () + 1],
                  -1);
  ACE_OS::printf ("/* ");

  if (option[C])
    ACE_OS::printf ("C");

  else if (option[CPLUSPLUS])
    ACE_OS::printf ("C++");

  ACE_OS::printf (" code produced by gperf version %s */\n",
                  version_string);
  Options::print_options ();

  if (option[DEBUGGING])
    ACE_DEBUG ((LM_DEBUG,
                "total non-linked keys = %d\n"
                "total duplicates = %d\n"
                "maximum associated value is %d\n"
                "maximum size of generated hash table is %d\n",
                non_linked_length,
                this->key_list.total_duplicates,
                asso_value_max,
                max_hash_value));
  if (this->char_search.open (max_hash_value + 1) == -1)
    return -1;
  return 0;
}

// For binary search, do normal string sort on the keys, and then
// assign hash values from 0 to N-1. Then go ahead with the normal
// logic that is there for perfect hashing.
int
Gen_Perf::compute_binary_search (void)
{
  // Do a string sort.
  this->key_list.string_sort ();

  // Assign hash values.
  List_Node *curr;
  int hash_value;
  for (hash_value = 0, curr = this->key_list.head;
       curr != 0;
       curr = curr->next, hash_value++)
    {
      curr->hash_value = hash_value;
    }

  return 0;
}

int
Gen_Perf::compute_linear_search (void)
{
  // Convert the list of keys to a linear list without
  // equivalence classes.
  this->key_list.string_sort ();

  // Assign hash values.
  List_Node *curr;
  int hash_value;
  for (hash_value = 0, curr = this->key_list.head;
       curr != 0;
       curr = curr->next, hash_value++)
    {
      curr->hash_value = hash_value;
    }
  return 0;
}

int
Gen_Perf::compute_perfect_hash (void)
{
  List_Node *curr;

  for (curr = this->key_list.head;
       curr != 0;
       curr = curr->next)
    {
      this->hash (curr);

      for (List_Node *ptr = this->key_list.head;
           ptr != curr;
           ptr = ptr->next)
        if (ptr->hash_value == curr->hash_value)
          {
            if (this->change (ptr, curr) == -1)
              return -1;
            break;
          }
      num_done++;
    }

  // Make one final check, just to make sure nothing weird happened...

  this->char_search.reset ();

  for (curr = this->key_list.head;
       curr;
       curr = curr->next)
    {
      if (this->char_search.find (this->hash (curr)) != 0)
        {
          if (option[DUP])
            {
              // Keep track of the number of "dynamic" links (i.e., keys
              // that hash to the same value) so that we can use it later
              // when generating the output.
              this->key_list.total_duplicates++;
            }
          else
            {
              // Yow, big problems.  we're outta here!
              ACE_ERROR ((LM_ERROR,
                          "\nInternal error, duplicate value %d:\n"
                          "try options -D or -r, or use new key positions.\n\n",
                          this->hash (curr)));
              return -1;
            }
        }
    }

  return 0;
}

// Does the hard stuff....  Initializes the Bool Array, and attempts
// to find a perfect function that will hash all the key words without
// getting any duplications.  This is made much easier since we aren't
// attempting to generate *minimum* functions, only perfect ones.  If
// we can't generate a perfect function in one pass *and* the user
// hasn't enabled the DUP option, we'll inform the user to try the
// randomization option, use -D, or choose alternative key positions.
// The alternatives (e.g., back-tracking) are too time-consuming, i.e,
// exponential in the number of keys.

int
Gen_Perf::run (void)
{
  if (this->open () == -1)
    return 1;

  if (option[BINARYSEARCH])
    {
      if (this->compute_binary_search () == -1)
        return 1;
    }
  else if (option[LINEARSEARCH])
    {
      if (this->compute_linear_search () == -1)
        return 1;
    }
  else
    {
      if (this->compute_perfect_hash () == -1)
        return 1;

      // Sorts the key word list by hash value, and then outputs the
      // list.  The generated hash table code is only output if the
      // early stage of processing turned out O.K.
      this->key_list.sort ();
    }

  this->key_list.output ();
  return 0;
}

// Prints out some diagnostics upon completion.

Gen_Perf::~Gen_Perf (void)
{
  if (option[DEBUGGING])
    {
      ACE_DEBUG ((LM_DEBUG,
                  "\ndumping occurrence and associated values tables\n"));
      for (int i = 0; i < ACE_STANDARD_CHARACTER_SET_SIZE; i++)
        if (Vectors::occurrences[i])
          ACE_DEBUG ((LM_DEBUG,
                      "Vectors::asso_values[%c] = %6d, Vectors::occurrences[%c] = %6d\n",
                      i,
                      Vectors::asso_values[i],
                      i,
                      Vectors::occurrences[i]));
      ACE_DEBUG ((LM_DEBUG,
                  "end table dumping\n"));
    }

  delete [] this->union_set;
}

#endif /* ACE_HAS_GPERF */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -