📄 dbz.java

📁 java高级使用教程全书一共分六章
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
// Dbz - access a Unix DBZ file
//
// Copyright (C) 1996 by Jef Poskanzer <jef@acme.com>.  All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
// Visit the ACME Labs Java page for up-to-date versions of this and other
// fine Java utilities: http://www.acme.com/java/

package Acme;

import java.util.*;
import java.io.*;

/// Access a Unix DBZ file.
// <P>
// DBZ files are similar to DBM files.  From the Java point of view,
// they are just on-disk hash tables.  Comments from the C version:
// <P>
// The dbz database exploits the fact that when news stores a <key,value>
// tuple, the `value' part is a seek offset into a text file, pointing to
// a copy of the `key' part.  This avoids the need to store a copy of
// the key in the dbz files.  However, the text file *must* exist and be
// consistent with the dbz files, or things will fail.
// <P>
// The basic format of the database is a simple hash table containing the
// values.  A value is stored by indexing into the table using a hash value
// computed from the key; collisions are resolved by linear probing (just
// search forward for an empty slot, wrapping around to the beginning of
// the table if necessary).  Linear probing is a performance disaster when
// the table starts to get full, so a complication is introduced.  The
// database is actually one *or more* tables, stored sequentially in the
// .pag file, and the length of linear-probe sequences is limited.  The
// search (for an existing item or an empty slot) always starts in the
// first table, and whenever MAXRUN probes have been done in table N,
// probing continues in table N+1.  This behaves reasonably well even in
// cases of massive overflow.  There are some other small complications
// added, see comments below.
// <P>
// The table size is fixed for any particular database, but is determined
// dynamically when a database is rebuilt.  The strategy is to try to pick
// the size so the first table will be no more than 2/3 full, that being
// slightly before the point where performance starts to degrade.  (It is
// desirable to be a bit conservative because the overflow strategy tends
// to produce files with holes in them, which is a nuisance.)
// <P>
// <A HREF="/resources/classes/Acme/Dbz.java">Fetch the software.</A><BR>
// <A HREF="/resources/classes/Acme.tar.gz">Fetch the entire Acme package.</A>

public class Dbz extends Dictionary
    {

    /// For validating .dir format.
    private static final int dbzVersion = 3;

    /// Size of an offset - in Java that's an int.
    private static final int SOF = 4;

    // We assume that unused areas of a binary file are zeros, and that the
    // bit pattern of `(of_t)0' is all zeros.  The alternative is rather
    // painful file initialization.  Note that okayvalue(), if OVERFLOW is
    // defined, knows what value of an offset would cause overflow.
    private static final int VACANT = 0;
    private static int BIAS( int o ) { return o + 1; }
    private static int UNBIAS( int o ) { return o - 1; }

    // In a Unix implementation, or indeed any in which an of_t is a byte
    // count, there are a bunch of high bits free in an of_t.  There is a
    // use for them.  Checking a possible hit by looking it up in the base
    // file is relatively expensive, and the cost can be dramatically reduced
    // by using some of those high bits to tag the value with a few more bits
    // of the key's hash.  This detects most false hits without the overhead of
    // seek+read+strcmp.  We use the top bit to indicate whether the value is
    // tagged or not, and don't tag a value which is using the tag bits itself.
    // We're in trouble if the of_t representation wants to use the top bit.
    // The actual bitmasks and offset come from the configuration stuff,
    // which permits fiddling with them as necessary, and also suppressing
    // them completely (by defining the masks to 0).  We build pre-shifted
    // versions of the masks for efficiency.
    private int tagbits;
    private int taghere;
    private int tagboth;
    private boolean HASTAG( int o ) { return ( o & taghere ) != 0; }
    private int TAG( int o ) { return o & tagbits; }
    private int NOTAG( int o ) { return o & ( ~ tagboth ); }
    private boolean CANTAG( int o ) { return ( o & tagboth ) == 0; }
    private int MKTAG( int v ) { return ( v << tagshift ) & tagbits; }

    // A new, from-scratch database, not built as a rebuild of an old one,
    // needs to know table size, casemap algorithm, and tagging.  Normally
    // the user supplies this info, but there have to be defaults.
    private static final int DEFSIZE=120011;	// 300007 might be better
    private static final char DEFCASE='C';
    private static final int TAGENB=0x80;
    private static final int TAGMASK=0x7f;
    private static final int TAGSHIFT=24;

    // We read configuration info from the .dir file into these variables,
    // so we can avoid wired-in assumptions for an existing database.
    //
    // Among the info is a record of recent peak usages, so that a new table
    // size can be chosen intelligently when rebuilding.  10 is a good
    // number of usages to keep, since news displays marked fluctuations
    // in volume on a 7-day cycle.
    boolean olddbz;			// .dir file empty but .pag not?
    int tsize;				// table size
    int[] used = new int[11];		// entries used today, yesterday, ...
    int valuesize;			// size of table values, == SOF
    int[] bytemap = new int[SOF];	// byte-order map
    char casemap;			// case-mapping algorithm
    char fieldsep;			// field separator in base file, if any
    int tagenb;				// unshifted tag-enable bit
    int tagmask;			// unshifted tag mask
    int tagshift;			// shift count for tagmask and tagenb

    // For a program that makes many, many references to the database, it
    // is a large performance win to keep the table in core, if it will fit.
    // Note that this does hurt robustness in the event of crashes, and
    // dbmclose() *must* be called to flush the in-core database to disk.
    // The code is prepared to deal with the possibility that there isn't
    // enough memory.  There *is* an assumption that a size_t is big enough
    // to hold the size (in bytes) of one table, so dbminit() tries to figure
    // out whether this is possible first.
    //
    // The preferred way to ask for an in-core table is to do dbzincore(1)
    // before dbminit().  The default is not to do it, although -DINCORE
    // overrides this for backward compatibility with old dbz.
    //
    // We keep only the first table in core.  This greatly simplifies the
    // code, and bounds memory demand.  Furthermore, doing this is a large
    // performance win even in the event of massive overflow.
    private boolean incore = true;

    // Buffer for .pag reads. Buffering more than about 16 does not help
    // significantly at the densities we try to maintain
    private int[] pagbuf = new int[16];

    // Buffer for base-file reads.  Message-IDs (all news ever needs to
    // read) are essentially never longer than 64 bytes.
    private byte[] basebuf = new byte[64];

    private static final int NOTFOUND = -1;

    // Arguably the searcher struct for a given routine ought to be local to
    // it, but a fetch() is very often immediately followed by a store(), and
    // in some circumstances it is a useful performance win to remember where
    // the fetch() completed.  So we use a global struct and remember whether
    // it is current.
    DbzSearcher srch = new DbzSearcher();
    DbzSearcher prev;		// srch or null

    // Byte-ordering stuff.
    private int[] mybmap = new int[SOF];	// my byte order
    private boolean bytesame;
    private int MAPIN( int o ) { return bytesame ? o : bytemap( o, bytemap, mybmap ); }
    private int MAPOUT( int o ) { return bytesame ? o : bytemap( o, mybmap, bytemap ); }

    // The files.
    private RandomAccessFile basef;	// descriptor for base file
    private RandomAccessFile dirf;	// descriptor for .dir file
    private RandomAccessFile pagf;	// descriptor for .pag file
    private boolean readOnly;		// files open read-only
    private int pagpos;		      // posn in pagf; only search may set != -1
    private int[] corepag;		// incore version of .pag file, if any
    private boolean written;		// has a store() been done?


    /// Constructor.
    // @exception IOException if something goes wrong
    public Dbz( File file ) throws IOException
	{
	try
	    {
	    basef = new RandomAccessFile( file, "rw" );
	    dirf = new RandomAccessFile( file.getPath() + ".dir", "rw" );
	    pagf = new RandomAccessFile( file.getPath() + ".pag", "rw" );
	    readOnly = false;
	    }
	catch ( Exception e )
	    {
	    basef = new RandomAccessFile( file, "r" );
	    dirf = new RandomAccessFile( file.getPath() + ".dir", "r" );
	    pagf = new RandomAccessFile( file.getPath() + ".pag", "r" );
	    readOnly = true;
	    }
	pagpos = -1;
	getconf();
	tagbits = tagmask << tagshift;
	taghere = tagenb << tagshift;
	tagboth = tagbits | taghere;
	mybytemap( mybmap );
	bytesame = true;
	for ( int i = 0; i < SOF; ++i )
	    if ( mybmap[i] != bytemap[i] )
		bytesame = false;
	// Get first table into core, if it looks desirable and feasible.
	int s = tsize * SOF;
	if ( incore && s / SOF == tsize )
	    corepag = getcore();
	else
	    corepag = null;
	// Misc. setup.
	crcinit();
	written = false;
	prev = null;
	}

    /// Returns the number of elements contained within the dbz file.
    public int size()
	{
	// !!!
	return 0;
	}

    /// Returns true if the dbz file contains no elements.
    public boolean isEmpty()
	{
	// !!!
	return true;
	}

    /// Returns an enumeration of the dbz file's keys.
    public Enumeration keys()
	{
	// !!!
	return new DbzKeyEnumerator();
	}

    /// Returns an enumeration of the elements. Use the Enumeration methods
    // on the returned object to fetch the elements sequentially.
    public Enumeration elements()
	{
	return new DbzElementEnumerator( this );
	}

    /// Gets the object associated with the specified key in the dbz file.
    // @param key the key in the hash table
    // @returns the element for the key, or null if the key
    // 		is not defined in the hash table.
    public Object get( Object key )
	{
	return fetch( mapcase( (String) key ) );
	}

    /// Puts the specified element into the Dictionary, using the specified
    // key.  The element may be retrieved by doing a get() with the same
    // key.  The key and the element cannot be null.
    // @param key the specified hashtable key
    // @param value the specified element
    // @return the old value of the key, or null if it did not have one.
    // @exception NullPointerException If the value of the specified
    // element is null.
    public Object put( Object key, Object value )
	{
	return store( mapcase( (String) key ), value );
	}

    /// Removes the element corresponding to the key. Does nothing if the
    // key is not present.
    // @param key the key that needs to be removed
    // @return the value of key, or null if the key was not found.
    public Object remove( Object key )
	{
	// !!!
	return null;
	}


    /// Main program - for testing etc.
    public static void main( String[] args )
	{
	// !!!
	}
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -