📄 dct.cxx
字号:
/*
* Copyright (c) 1994 Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the Network Research
* Group at Lawrence Berkeley Laboratory.
* 4. Neither the name of the University nor of the Laboratory may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/************ Change log
*
* $Log: dct.cxx,v $
* Revision 1.14 2003/03/14 07:25:55 robertj
* Removed $header keyword so is not different on alternate repositories
*
* Revision 1.13 2002/10/24 21:05:26 dereks
* Fix compile time warning.
*
* Revision 1.12 2002/05/17 01:47:33 dereks
* backout the integer maths in the h261 codec.
*
* Revision 1.11 2002/02/15 03:54:31 yurik
* Warnings removed during compilation, patch courtesy of Jehan Bing, jehan@bravobrava.com
*
* Revision 1.10 2001/10/24 20:24:32 dereks
* Remove green stripes under windows for INT_64. Thanks to Robert Lupa.
*
* Revision 1.9 2001/10/17 03:52:39 robertj
* Fixed MSVC compatibility
*
* Revision 1.8 2001/10/17 01:54:36 yurik
* Fixed clash with CE includes for INT32 type
*
* Revision 1.7 2001/10/16 23:51:42 dereks
* Change vic's fdct() from floating-point to fix-point. Improves performance
* for h261 video significantly on some machines. Thanks to Cosmos Jiang
*
* Revision 1.6 2001/10/16 21:20:07 yurik
* Removed warnings on Windows CE. Submitted by Jehan Bing, jehan@bravobrava.com
*
* Revision 1.3 2000/12/19 22:22:34 dereks
* Remove connection to grabber-OS.cxx files. grabber-OS.cxx files no longer used.
* Video data is now read from a video channel, using the pwlib classes.
*
* Revision 1.2 2000/08/25 03:18:49 dereks
* Add change log facility (Thanks Robert for the info on implementation)
*
*
*
********/
#include <sys/types.h>
#include "bsd-endian.h"
#include "dct.h"
/*
* Macros for fix-point (integer) arithmetic. FP_NBITS gives the number
* of binary digits past the decimal point. FP_MUL computes the product
* of two fixed point numbers. A fixed point number and an integer
* can be directly multiplied to give a fixed point number. FP_SCALE
* converts a floating point number to fixed point (and is used only
* at startup, not by the dct engine). FP_NORM converts a fixed
* point number to scalar by rounding to the closest integer.
* FP_JNORM is similar except it folds the jpeg bias of 128 into the
* rounding addition.
*/
#define FP_NBITS 15
#define FP_MUL(a, b) ((((a) >> 5) * ((b) >> 5)) >> (FP_NBITS - 10))
#define FP_SCALE(v) (int)((double)(v) * double(1 << FP_NBITS) + 0.5)
#define FP_NORM(v) (((v) + (1 << (FP_NBITS-1))) >> FP_NBITS)
#define FP_JNORM(v) (((v) + (257 << (FP_NBITS-1))) >> FP_NBITS)
#define M(n) ((m0 >> (n)) & 1)
/*
* This macro stolen from nv.
*/
/* Sick little macro which will limit x to [0..255] with logical ops */
#define LIMIT8(x, t) ((t = (x)), (t &= ~(t>>31)), (t | ~((t-256) >> 31)))
#define LIMIT(x, t) (LIMIT8((x), t) & 0xff)
/* row order */
const u_char ROWZAG[] = {
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
12, 19, 26, 33, 40, 48, 41, 34,
27, 20, 13, 6, 7, 14, 21, 28,
35, 42, 49, 56, 57, 50, 43, 36,
29, 22, 15, 23, 30, 37, 44, 51,
58, 59, 52, 45, 38, 31, 39, 46,
53, 60, 61, 54, 47, 55, 62, 63,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0
};
/* column order */
const u_char COLZAG[] = {
0, 8, 1, 2, 9, 16, 24, 17,
10, 3, 4, 11, 18, 25, 32, 40,
33, 26, 19, 12, 5, 6, 13, 20,
27, 34, 41, 48, 56, 49, 42, 35,
28, 21, 14, 7, 15, 22, 29, 36,
43, 50, 57, 58, 51, 44, 37, 30,
23, 31, 38, 45, 52, 59, 60, 53,
46, 39, 47, 54, 61, 62, 55, 63,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0
};
#define A1 FP_SCALE(0.7071068)
#define A2 FP_SCALE(0.5411961)
#define A3 A1
#define A4 FP_SCALE(1.3065630)
#define A5 FP_SCALE(0.3826834)
#define FA1 (0.707106781f)
#define FA2 (0.541196100f)
#define FA3 FA1
#define FA4 (1.306562965f)
#define FA5 (0.382683433f)
#ifdef B0
#undef B0
#endif
/*
* these magic numbers are scaling factors for each coef of the 1-d
* AA&N DCT. The scale factor for coef 0 is 1 and coef 1<=n<=7 is
* cos(n*PI/16)*sqrt(2). There is also a normalization of sqrt(8).
* Formally you divide by the scale factor but we multiply by the
* inverse because it's faster. So the numbers below are the inverse
* of what was just described.
*/
#define B0 0.35355339059327376220
#define B1 0.25489778955207958447
#define B2 0.27059805007309849220
#define B3 0.30067244346752264027
#define B4 0.35355339059327376220
#define B5 0.44998811156820785231
#define B6 0.65328148243818826392
#define B7 1.28145772387075308943
/*
* Output multipliers for AA&N DCT
* (i.e., first stage multipliers for inverse DCT).
*/
static const double first_stage[8] = { B0, B1, B2, B3, B4, B5, B6, B7, };
/*
* The first_stage array crossed with itself. This allows us
* to embed the first stage multipliers of the row pass by
* computing scaled versions of the columns.
*/
static const int cross_stage[64] = {
FP_SCALE(B0 * B0),
FP_SCALE(B0 * B1),
FP_SCALE(B0 * B2),
FP_SCALE(B0 * B3),
FP_SCALE(B0 * B4),
FP_SCALE(B0 * B5),
FP_SCALE(B0 * B6),
FP_SCALE(B0 * B7),
FP_SCALE(B1 * B0),
FP_SCALE(B1 * B1),
FP_SCALE(B1 * B2),
FP_SCALE(B1 * B3),
FP_SCALE(B1 * B4),
FP_SCALE(B1 * B5),
FP_SCALE(B1 * B6),
FP_SCALE(B1 * B7),
FP_SCALE(B2 * B0),
FP_SCALE(B2 * B1),
FP_SCALE(B2 * B2),
FP_SCALE(B2 * B3),
FP_SCALE(B2 * B4),
FP_SCALE(B2 * B5),
FP_SCALE(B2 * B6),
FP_SCALE(B2 * B7),
FP_SCALE(B3 * B0),
FP_SCALE(B3 * B1),
FP_SCALE(B3 * B2),
FP_SCALE(B3 * B3),
FP_SCALE(B3 * B4),
FP_SCALE(B3 * B5),
FP_SCALE(B3 * B6),
FP_SCALE(B3 * B7),
FP_SCALE(B4 * B0),
FP_SCALE(B4 * B1),
FP_SCALE(B4 * B2),
FP_SCALE(B4 * B3),
FP_SCALE(B4 * B4),
FP_SCALE(B4 * B5),
FP_SCALE(B4 * B6),
FP_SCALE(B4 * B7),
FP_SCALE(B5 * B0),
FP_SCALE(B5 * B1),
FP_SCALE(B5 * B2),
FP_SCALE(B5 * B3),
FP_SCALE(B5 * B4),
FP_SCALE(B5 * B5),
FP_SCALE(B5 * B6),
FP_SCALE(B5 * B7),
FP_SCALE(B6 * B0),
FP_SCALE(B6 * B1),
FP_SCALE(B6 * B2),
FP_SCALE(B6 * B3),
FP_SCALE(B6 * B4),
FP_SCALE(B6 * B5),
FP_SCALE(B6 * B6),
FP_SCALE(B6 * B7),
FP_SCALE(B7 * B0),
FP_SCALE(B7 * B1),
FP_SCALE(B7 * B2),
FP_SCALE(B7 * B3),
FP_SCALE(B7 * B4),
FP_SCALE(B7 * B5),
FP_SCALE(B7 * B6),
FP_SCALE(B7 * B7),
};
static const float f_cross_stage[64] = {
(float)(B0 * B0),
(float)(B0 * B1),
(float)(B0 * B2),
(float)(B0 * B3),
(float)(B0 * B4),
(float)(B0 * B5),
(float)(B0 * B6),
(float)(B0 * B7),
(float)(B1 * B0),
(float)(B1 * B1),
(float)(B1 * B2),
(float)(B1 * B3),
(float)(B1 * B4),
(float)(B1 * B5),
(float)(B1 * B6),
(float)(B1 * B7),
(float)(B2 * B0),
(float)(B2 * B1),
(float)(B2 * B2),
(float)(B2 * B3),
(float)(B2 * B4),
(float)(B2 * B5),
(float)(B2 * B6),
(float)(B2 * B7),
(float)(B3 * B0),
(float)(B3 * B1),
(float)(B3 * B2),
(float)(B3 * B3),
(float)(B3 * B4),
(float)(B3 * B5),
(float)(B3 * B6),
(float)(B3 * B7),
(float)(B4 * B0),
(float)(B4 * B1),
(float)(B4 * B2),
(float)(B4 * B3),
(float)(B4 * B4),
(float)(B4 * B5),
(float)(B4 * B6),
(float)(B4 * B7),
(float)(B5 * B0),
(float)(B5 * B1),
(float)(B5 * B2),
(float)(B5 * B3),
(float)(B5 * B4),
(float)(B5 * B5),
(float)(B5 * B6),
(float)(B5 * B7),
(float)(B6 * B0),
(float)(B6 * B1),
(float)(B6 * B2),
(float)(B6 * B3),
(float)(B6 * B4),
(float)(B6 * B5),
(float)(B6 * B6),
(float)(B6 * B7),
(float)(B7 * B0),
(float)(B7 * B1),
(float)(B7 * B2),
(float)(B7 * B3),
(float)(B7 * B4),
(float)(B7 * B5),
(float)(B7 * B6),
(float)(B7 * B7),
};
/*
* Map a quantization table in natural, row-order,
* into the qt input expected by rdct().
*/
void
rdct_fold_q(const int* in, int* out)
{
for (int i = 0; i < 64; ++i) {
/*
* Fold column and row passes of the dct.
* By scaling each column DCT independently,
* we pre-bias all the row DCT's so the
* first multiplier is already embedded
* in the temporary result. Thanks to
* Martin Vetterli for explaining how
* to do this.
*/
double v = double(in[i]);
v *= first_stage[i & 7];
v *= first_stage[i >> 3];
out[i] = FP_SCALE(v);
}
}
/*
* Just like rdct_fold_q() but we divide by the quantizer.
*/
void fdct_fold_q(const int* in, float* out)
{
for (int i = 0; i < 64; ++i) {
double v = first_stage[i >> 3];
v *= first_stage[i & 7];
double q = double(in[i]);
out[i] = v / q;
}
}
void dcsum(int dc, u_char* in, u_char* out, int stride)
{
for (int k = 8; --k >= 0; ) {
int t;
#ifdef INT_64
/*XXX assume little-endian */
INT_64 i = *(INT_64*)in;
INT_64 o = (INT_64)LIMIT(dc + (int)(i >> 56 & 0xff), t) << 56;
o |= (INT_64)LIMIT(dc + (int)(i >> 48 & 0xff), t) << 48;
o |= (INT_64)LIMIT(dc + (int)(i >> 40 & 0xff), t) << 40;
o |= (INT_64)LIMIT(dc + (int)(i >> 32 & 0xff), t) << 32;
o |= (INT_64)LIMIT(dc + (int)(i >> 24 & 0xff), t) << 24;
o |= (INT_64)LIMIT(dc + (int)(i >> 16 & 0xff), t) << 16;
o |= (INT_64)LIMIT(dc + (int)(i >> 8 & 0xff), t) << 8;
o |= (INT_64)LIMIT(dc + (int)(i & 0xff), t);
*(INT_64*)out = o;
#else
u_int o = 0;
u_int i = *(u_int*)in;
SPLICE(o, LIMIT(dc + EXTRACT(i, 24), t), 24);
SPLICE(o, LIMIT(dc + EXTRACT(i, 16), t), 16);
SPLICE(o, LIMIT(dc + EXTRACT(i, 8), t), 8);
SPLICE(o, LIMIT(dc + EXTRACT(i, 0), t), 0);
*(u_int*)out = o;
o = 0;
i = *(u_int*)(in + 4);
SPLICE(o, LIMIT(dc + EXTRACT(i, 24), t), 24);
SPLICE(o, LIMIT(dc + EXTRACT(i, 16), t), 16);
SPLICE(o, LIMIT(dc + EXTRACT(i, 8), t), 8);
SPLICE(o, LIMIT(dc + EXTRACT(i, 0), t), 0);
*(u_int*)(out + 4) = o;
#endif
in += stride;
out += stride;
}
}
void dcsum2(int dc, u_char* in, u_char* out, int stride)
{
for (int k = 8; --k >= 0; ) {
int t;
u_int o = 0;
SPLICE(o, LIMIT(dc + in[0], t), 24);
SPLICE(o, LIMIT(dc + in[1], t), 16);
SPLICE(o, LIMIT(dc + in[2], t), 8);
SPLICE(o, LIMIT(dc + in[3], t), 0);
*(u_int*)out = o;
o = 0;
SPLICE(o, LIMIT(dc + in[4], t), 24);
SPLICE(o, LIMIT(dc + in[5], t), 16);
SPLICE(o, LIMIT(dc + in[6], t), 8);
SPLICE(o, LIMIT(dc + in[7], t), 0);
*(u_int*)(out + 4) = o;
in += stride;
out += stride;
}
}
void dcfill(int DC, u_char* out, int stride)
{
int t;
u_int dc = DC;
dc = LIMIT(dc, t);
dc |= dc << 8;
dc |= dc << 16;
#ifdef INT_64
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -