📄 bzip2outputstream.cs
字号:
// BZip2OutputStream.cs
// Copyright (C) 2001 Mike Krueger
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
// Linking this library statically or dynamically with other modules is
// making a combined work based on this library. Thus, the terms and
// conditions of the GNU General Public License cover the whole
// combination.
//
// As a special exception, the copyright holders of this library give you
// permission to link this library with independent modules to produce an
// executable, regardless of the license terms of these independent
// modules, and to copy and distribute the resulting executable under
// terms of your choice, provided that you also meet, for each linked
// independent module, the terms and conditions of the license of that
// module. An independent module is a module which is not derived from
// or based on this library. If you modify this library, you may extend
// this exception to your version of the library, but you are not
// obligated to do so. If you do not wish to do so, delete this
// exception statement from your version.
using System;
using System.IO;
using ICSharpCode.SharpZipLib.Checksums;
namespace ICSharpCode.SharpZipLib.BZip2
{
// TODO: Update to BZip2 1.0.1, 1.0.2
/// <summary>
/// An output stream that compresses into the BZip2 format
/// including file header chars into another stream.
/// </summary>
public class BZip2OutputStream : Stream
{
/// <summary>
/// Gets a value indicating whether the current stream supports reading
/// </summary>
public override bool CanRead {
get {
return false;
}
}
/// <summary>
/// Gets a value indicating whether the current stream supports seeking
/// </summary>
public override bool CanSeek {
get {
return false;
}
}
/// <summary>
/// Gets a value indicating whether the current stream supports writing
/// </summary>
public override bool CanWrite {
get {
return baseStream.CanWrite;
}
}
/// <summary>
/// Gets the length in bytes of the stream
/// </summary>
public override long Length {
get {
return baseStream.Length;
}
}
/// <summary>
/// Gets or sets the current position of this stream.
/// </summary>
public override long Position {
get {
return baseStream.Position;
}
set {
throw new NotSupportedException("BZip2OutputStream position cannot be set");
}
}
/// <summary>
/// Sets the current position of this stream to the given value.
/// </summary>
public override long Seek(long offset, SeekOrigin origin)
{
throw new NotSupportedException("BZip2OutputStream Seek not supported");
}
/// <summary>
/// Sets the length of this stream to the given value.
/// </summary>
public override void SetLength(long val)
{
throw new NotSupportedException("BZip2OutputStream SetLength not supported");
}
/// <summary>
/// Read a byte from the stream advancing the position.
/// </summary>
public override int ReadByte()
{
throw new NotSupportedException("BZip2OutputStream ReadByte not supported");
}
/// <summary>
/// Read a block of bytes
/// </summary>
public override int Read(byte[] b, int off, int len)
{
throw new NotSupportedException("BZip2OutputStream Read not supported");
}
/// <summary>
/// Write a block of bytes to the stream
/// </summary>
public override void Write(byte[] buf, int off, int len)
{
for (int i = 0; i < len; ++i) {
WriteByte(buf[off + i]);
}
}
readonly static int SETMASK = (1 << 21);
readonly static int CLEARMASK = (~SETMASK);
readonly static int GREATER_ICOST = 15;
readonly static int LESSER_ICOST = 0;
readonly static int SMALL_THRESH = 20;
readonly static int DEPTH_THRESH = 10;
/*--
If you are ever unlucky/improbable enough
to get a stack overflow whilst sorting,
increase the following constant and try
again. In practice I have never seen the
stack go above 27 elems, so the following
limit seems very generous.
--*/
readonly static int QSORT_STACK_SIZE = 1000;
static void Panic()
{
throw new BZip2Exception("BZip2 output stream panic");
}
void MakeMaps()
{
int i;
nInUse = 0;
for (i = 0; i < 256; i++) {
if (inUse[i]) {
seqToUnseq[nInUse] = (char)i;
unseqToSeq[i] = (char)nInUse;
nInUse++;
}
}
}
static void HbMakeCodeLengths(char[] len, int[] freq, int alphaSize, int maxLen)
{
/*--
Nodes and heap entries run from 1. Entry 0
for both the heap and nodes is a sentinel.
--*/
int nNodes, nHeap, n1, n2, j, k;
bool tooLong;
int[] heap = new int[BZip2Constants.MAX_ALPHA_SIZE + 2];
int[] weight = new int[BZip2Constants.MAX_ALPHA_SIZE * 2];
int[] parent = new int[BZip2Constants.MAX_ALPHA_SIZE * 2];
for (int i = 0; i < alphaSize; ++i) {
weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
}
while (true) {
nNodes = alphaSize;
nHeap = 0;
heap[0] = 0;
weight[0] = 0;
parent[0] = -2;
for (int i = 1; i <= alphaSize; ++i) {
parent[i] = -1;
nHeap++;
heap[nHeap] = i;
int zz = nHeap;
int tmp = heap[zz];
while (weight[tmp] < weight[heap[zz >> 1]]) {
heap[zz] = heap[zz >> 1];
zz >>= 1;
}
heap[zz] = tmp;
}
if (!(nHeap < (BZip2Constants.MAX_ALPHA_SIZE+2))) {
Panic();
}
while (nHeap > 1) {
n1 = heap[1];
heap[1] = heap[nHeap];
nHeap--;
int zz = 1;
int yy = 0;
int tmp = heap[zz];
while (true) {
yy = zz << 1;
if (yy > nHeap) {
break;
}
if (yy < nHeap && weight[heap[yy+1]] < weight[heap[yy]]) {
yy++;
}
if (weight[tmp] < weight[heap[yy]]) {
break;
}
heap[zz] = heap[yy];
zz = yy;
}
heap[zz] = tmp;
n2 = heap[1];
heap[1] = heap[nHeap];
nHeap--;
zz = 1;
yy = 0;
tmp = heap[zz];
while (true) {
yy = zz << 1;
if (yy > nHeap) {
break;
}
if (yy < nHeap && weight[heap[yy+1]] < weight[heap[yy]]) {
yy++;
}
if (weight[tmp] < weight[heap[yy]]) {
break;
}
heap[zz] = heap[yy];
zz = yy;
}
heap[zz] = tmp;
nNodes++;
parent[n1] = parent[n2] = nNodes;
weight[nNodes] = (int)((weight[n1] & 0xffffff00) + (weight[n2] & 0xffffff00)) |
(int)(1 + (((weight[n1] & 0x000000ff) > (weight[n2] & 0x000000ff)) ? (weight[n1] & 0x000000ff) : (weight[n2] & 0x000000ff)));
parent[nNodes] = -1;
nHeap++;
heap[nHeap] = nNodes;
zz = nHeap;
tmp = heap[zz];
while (weight[tmp] < weight[heap[zz >> 1]]) {
heap[zz] = heap[zz >> 1];
zz >>= 1;
}
heap[zz] = tmp;
}
if (!(nNodes < (BZip2Constants.MAX_ALPHA_SIZE * 2))) {
Panic();
}
tooLong = false;
for (int i = 1; i <= alphaSize; ++i) {
j = 0;
k = i;
while (parent[k] >= 0) {
k = parent[k];
j++;
}
len[i - 1] = (char)j;
if (j > maxLen) {
tooLong = true;
}
}
if (!tooLong) {
break;
}
for (int i = 1; i < alphaSize; ++i) {
j = weight[i] >> 8;
j = 1 + (j / 2);
weight[i] = j << 8;
}
}
}
/*--
index of the last char in the block, so
the block size == last + 1.
--*/
int last;
/*--
index in zptr[] of original string after sorting.
--*/
int origPtr;
/*--
always: in the range 0 .. 9.
The current block size is 100000 * this number.
--*/
int blockSize100k;
bool blockRandomised;
int bytesOut;
int bsBuff;
int bsLive;
IChecksum mCrc = new StrangeCRC();
bool[] inUse = new bool[256];
int nInUse;
char[] seqToUnseq = new char[256];
char[] unseqToSeq = new char[256];
char[] selector = new char[BZip2Constants.MAX_SELECTORS];
char[] selectorMtf = new char[BZip2Constants.MAX_SELECTORS];
byte[] block;
int[] quadrant;
int[] zptr;
short[] szptr;
int[] ftab;
int nMTF;
int[] mtfFreq = new int[BZip2Constants.MAX_ALPHA_SIZE];
/*
* Used when sorting. If too many long comparisons
* happen, we stop sorting, randomise the block
* slightly, and try again.
*/
int workFactor;
int workDone;
int workLimit;
bool firstAttempt;
int nBlocksRandomised;
int currentChar = -1;
int runLength = 0;
/// <summary>
/// Construct a default output stream with maximum block size
/// </summary>
/// <param name="stream">The stream to write BZip data onto.</param>
public BZip2OutputStream(Stream stream) : this(stream, 9)
{
}
/// <summary>
/// Initialise a new instance of the <see cref="BZip2OutputStream"></see>
/// for the specified stream, using the given blocksize.
/// </summary>
/// <param name="stream">The stream to write compressed data to.</param>
/// <param name="blockSize">The block size to use.</param>
/// <remarks>
/// Valid block sizes are in the range 1..9, with 1 giving
/// the lowest compression and 9 the highest.
/// </remarks>
public BZip2OutputStream(Stream stream, int blockSize)
{
block = null;
quadrant = null;
zptr = null;
ftab = null;
BsSetStream(stream);
workFactor = 50;
if (blockSize > 9) {
blockSize = 9;
}
if (blockSize < 1) {
blockSize = 1;
}
blockSize100k = blockSize;
AllocateCompressStructures();
Initialize();
InitBlock();
}
/// <summary>
/// Write a byte to the stream.
/// </summary>
public override void WriteByte(byte bv)
{
int b = (256 + bv) % 256;
if (currentChar != -1) {
if (currentChar == b) {
runLength++;
if (runLength > 254) {
WriteRun();
currentChar = -1;
runLength = 0;
}
} else {
WriteRun();
runLength = 1;
currentChar = b;
}
} else {
currentChar = b;
runLength++;
}
}
void WriteRun()
{
if (last < allowableBlockSize) {
inUse[currentChar] = true;
for (int i = 0; i < runLength; i++) {
mCrc.Update(currentChar);
}
switch (runLength) {
case 1:
last++;
block[last + 1] = (byte)currentChar;
break;
case 2:
last++;
block[last + 1] = (byte)currentChar;
last++;
block[last + 1] = (byte)currentChar;
break;
case 3:
last++;
block[last + 1] = (byte)currentChar;
last++;
block[last + 1] = (byte)currentChar;
last++;
block[last + 1] = (byte)currentChar;
break;
default:
inUse[runLength - 4] = true;
last++;
block[last + 1] = (byte)currentChar;
last++;
block[last + 1] = (byte)currentChar;
last++;
block[last + 1] = (byte)currentChar;
last++;
block[last + 1] = (byte)currentChar;
last++;
block[last + 1] = (byte)(runLength - 4);
break;
}
} else {
EndBlock();
InitBlock();
WriteRun();
}
}
bool closed = false;
/// <summary>
/// Free any resources and other cleanup before garbage collection reclaims memory
/// </summary>
~BZip2OutputStream()
{
Close();
}
/// <summary>
/// End the current block and end compression.
/// Close the stream and free any resources
/// </summary>
public override void Close()
{
if (!closed) {
closed = true;
if (runLength > 0) {
WriteRun();
}
currentChar = -1;
EndBlock();
EndCompression();
Flush();
baseStream.Close();
}
}
/// <summary>
/// Flush output buffers
/// </summary>
public override void Flush()
{
baseStream.Flush();
}
uint blockCRC, combinedCRC;
void Initialize()
{
bytesOut = 0;
nBlocksRandomised = 0;
/*--- Write `magic' bytes h indicating file-format == huffmanised,
followed by a digit indicating blockSize100k.
---*/
// TODO adding header here should be optional?
BsPutUChar('B');
BsPutUChar('Z');
BsPutUChar('h');
BsPutUChar('0' + blockSize100k);
combinedCRC = 0;
}
int allowableBlockSize;
void InitBlock()
{
// blockNo++;
mCrc.Reset();
last = -1;
// ch = 0;
for (int i = 0; i < 256; i++) {
inUse[i] = false;
}
/*--- 20 is just a paranoia constant ---*/
allowableBlockSize = BZip2Constants.baseBlockSize * blockSize100k - 20;
}
void EndBlock()
{
if (last < 0) { // dont do anything for empty files, (makes empty files compatible with original Bzip)
return;
}
blockCRC = (uint)mCrc.Value;
combinedCRC = (combinedCRC << 1) | (combinedCRC >> 31);
combinedCRC ^= blockCRC;
/*-- sort the block and establish posn of original string --*/
DoReversibleTransformation();
/*--
A 6-byte block header, the value chosen arbitrarily
as 0x314159265359 :-). A 32 bit value does not really
give a strong enough guarantee that the value will not
appear by chance in the compressed datastream. Worst-case
probability of this event, for a 900k block, is about
2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits.
For a compressed file of size 100Gb -- about 100000 blocks --
only a 48-bit marker will do. NB: normal compression/
decompression do *not* rely on these statistical properties.
They are only important when trying to recover blocks from
damaged files.
--*/
BsPutUChar(0x31);
BsPutUChar(0x41);
BsPutUChar(0x59);
BsPutUChar(0x26);
BsPutUChar(0x53);
BsPutUChar(0x59);
/*-- Now the block's CRC, so it is in a known place. --*/
BsPutint((int)blockCRC);
/*-- Now a single bit indicating randomisation. --*/
if (blockRandomised) {
BsW(1,1);
nBlocksRandomised++;
} else {
BsW(1,0);
}
/*-- Finally, block's contents proper. --*/
MoveToFrontCodeAndSend();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -