📄 dct.cpp
字号:
/* * dct.cc -- * * DCT code, also contains MMX version * * Copyright (c) 1994-2002 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * A. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * B. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * C. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */#ifndef lintstatic const char rcsid[] = "@(#) $Header$";#endif#include <sys/types.h>#include "bsd-endian.h"#include "dct.h"#include <stdio.h>/* conditional declaration */#if MMX_DCT_ENABLEDvoid domidct8x8llmW(short *inptr, short *quantptr, int *wsptr, u_char *outptr, int stride);#endif/* * Macros for fix-point (integer) arithmetic. FP_NBITS gives the number * of binary digits past the decimal point. FP_MUL computes the product * of two fixed point numbers. A fixed point number and an integer * can be directly multiplied to give a fixed point number. FP_SCALE * converts a floating point number to fixed point (and is used only * at startup, not by the dct engine). FP_NORM converts a fixed * point number to scalar by rounding to the closest integer. * FP_JNORM is similar except it folds the jpeg bias of 128 into the * rounding addition. */#define FP_NBITS 15#define FP_MUL(a, b) ((((a) >> 5) * ((b) >> 5)) >> (FP_NBITS - 10))#define FP_SCALE(v) (int)((double)(v) * double(1 << FP_NBITS) + 0.5)#define FP_NORM(v) (((v) + (1 << (FP_NBITS-1))) >> FP_NBITS)#define FP_JNORM(v) (((v) + (257 << (FP_NBITS-1))) >> FP_NBITS)#define M(n) ((m0 >> (n)) & 1)/* * This macro stolen from nv. *//* Sick little macro which will limit x to [0..255] with logical ops */#define LIMIT8(x, t) ((t = (x)), (t &= ~(t>>31)), (t | ~((t-256) >> 31)))#define LIMIT(x, t) (LIMIT8((x), t) & 0xff)#if 0wmay - not needed/* row order */const u_char ROWZAG[] = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};#endif/* column order */const u_char DCT_COLZAG[] = { 0, 8, 1, 2, 9, 16, 24, 17, 10, 3, 4, 11, 18, 25, 32, 40, 33, 26, 19, 12, 5, 6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35, 28, 21, 14, 7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30, 23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};#define A1 FP_SCALE(0.7071068)#define A2 FP_SCALE(0.5411961)#define A3 A1#define A4 FP_SCALE(1.3065630)#define A5 FP_SCALE(0.3826834)#define FA1 (0.707106781f)#define FA2 (0.541196100f)#define FA3 FA1#define FA4 (1.306562965f)#define FA5 (0.382683433f)/* * these magic numbers are scaling factors for each coef of the 1-d * AA&N DCT. The scale factor for coef 0 is 1 and coef 1<=n<=7 is * cos(n*PI/16)*sqrt(2). There is also a normalization of sqrt(8). * Formally you divide by the scale factor but we multiply by the * inverse because it's faster. So the numbers below are the inverse * of what was just described. */#define B0 0.35355339059327376220#define B1 0.25489778955207958447#define B2 0.27059805007309849220#define B3 0.30067244346752264027#define B4 0.35355339059327376220#define B5 0.44998811156820785231#define B6 0.65328148243818826392#define B7 1.28145772387075308943/* * Output multipliers for AA&N DCT * (i.e., first stage multipliers for inverse DCT). */static const double first_stage[8] = { B0, B1, B2, B3, B4, B5, B6, B7, };#ifdef _MSC_VER// 'initializing' : truncation from 'const double to float#pragma warning(disable: 4305)#endif/* * The first_stage array crossed with itself. This allows us * to embed the first stage multipliers of the row pass by * computing scaled versions of the columns. */static const int cross_stage[64] = { FP_SCALE(B0 * B0), FP_SCALE(B0 * B1), FP_SCALE(B0 * B2), FP_SCALE(B0 * B3), FP_SCALE(B0 * B4), FP_SCALE(B0 * B5), FP_SCALE(B0 * B6), FP_SCALE(B0 * B7), FP_SCALE(B1 * B0), FP_SCALE(B1 * B1), FP_SCALE(B1 * B2), FP_SCALE(B1 * B3), FP_SCALE(B1 * B4), FP_SCALE(B1 * B5), FP_SCALE(B1 * B6), FP_SCALE(B1 * B7), FP_SCALE(B2 * B0), FP_SCALE(B2 * B1), FP_SCALE(B2 * B2), FP_SCALE(B2 * B3), FP_SCALE(B2 * B4), FP_SCALE(B2 * B5), FP_SCALE(B2 * B6), FP_SCALE(B2 * B7), FP_SCALE(B3 * B0), FP_SCALE(B3 * B1), FP_SCALE(B3 * B2), FP_SCALE(B3 * B3), FP_SCALE(B3 * B4), FP_SCALE(B3 * B5), FP_SCALE(B3 * B6), FP_SCALE(B3 * B7), FP_SCALE(B4 * B0), FP_SCALE(B4 * B1), FP_SCALE(B4 * B2), FP_SCALE(B4 * B3), FP_SCALE(B4 * B4), FP_SCALE(B4 * B5), FP_SCALE(B4 * B6), FP_SCALE(B4 * B7), FP_SCALE(B5 * B0), FP_SCALE(B5 * B1), FP_SCALE(B5 * B2), FP_SCALE(B5 * B3), FP_SCALE(B5 * B4), FP_SCALE(B5 * B5), FP_SCALE(B5 * B6), FP_SCALE(B5 * B7), FP_SCALE(B6 * B0), FP_SCALE(B6 * B1), FP_SCALE(B6 * B2), FP_SCALE(B6 * B3), FP_SCALE(B6 * B4), FP_SCALE(B6 * B5), FP_SCALE(B6 * B6), FP_SCALE(B6 * B7), FP_SCALE(B7 * B0), FP_SCALE(B7 * B1), FP_SCALE(B7 * B2), FP_SCALE(B7 * B3), FP_SCALE(B7 * B4), FP_SCALE(B7 * B5), FP_SCALE(B7 * B6), FP_SCALE(B7 * B7),};static const float f_cross_stage[64] = { B0 * B0, B0 * B1, B0 * B2, B0 * B3, B0 * B4, B0 * B5, B0 * B6, B0 * B7, B1 * B0, B1 * B1, B1 * B2, B1 * B3, B1 * B4, B1 * B5, B1 * B6, B1 * B7, B2 * B0, B2 * B1, B2 * B2, B2 * B3, B2 * B4, B2 * B5, B2 * B6, B2 * B7, B3 * B0, B3 * B1, B3 * B2, B3 * B3, B3 * B4, B3 * B5, B3 * B6, B3 * B7, B4 * B0, B4 * B1, B4 * B2, B4 * B3, B4 * B4, B4 * B5, B4 * B6, B4 * B7, B5 * B0, B5 * B1, B5 * B2, B5 * B3, B5 * B4, B5 * B5, B5 * B6, B5 * B7, B6 * B0, B6 * B1, B6 * B2, B6 * B3, B6 * B4, B6 * B5, B6 * B6, B6 * B7, B7 * B0, B7 * B1, B7 * B2, B7 * B3, B7 * B4, B7 * B5, B7 * B6, B7 * B7,};/* * Map a quantization table in natural, row-order, * into the qt input expected by rdct(). */#if 0wmay - not neededvoidrdct_fold_q(const int* in, int* out){#if !MMX_DCT_ENABLED for (int i = 0; i < 64; ++i) { /* * Fold column and row passes of the dct. * By scaling each column DCT independently, * we pre-bias all the row DCT's so the * first multiplier is already embedded * in the temporary result. Thanks to * Martin Vetterli for explaining how * to do this. */ double v = double(in[i]); v *= first_stage[i & 7]; v *= first_stage[i >> 3]; out[i] = FP_SCALE(v); }#else /* the MMX version of rdct() expects the quantization * table to be an array of short */ for (int i = 0; i < 64; i++) out[i] = in[i];#endif}#endif/* * Just like rdct_fold_q() but we divide by the quantizer. */voiddct_fdct_fold_q(const int* in, float* out){ for (int i = 0; i < 64; ++i) { double v = first_stage[i >> 3]; v *= first_stage[i & 7]; double q = double(in[i]); out[i] = (float) (v / q); }}#if 0wmay - not neededvoid dcsum(int dc, u_char* in, u_char* out, int stride){ for (int k = 8; --k >= 0; ) { int t;#ifdef INT_64 /*FIXME assume little-endian */ INT_64 i = *(INT_64*)in; INT_64 o = (INT_64)LIMIT(dc + (i >> 56), t) << 56; o |= (INT_64)LIMIT(dc + (i >> 48 & 0xff), t) << 48; o |= (INT_64)LIMIT(dc + (i >> 40 & 0xff), t) << 40; o |= (INT_64)LIMIT(dc + (i >> 32 & 0xff), t) << 32; o |= (INT_64)LIMIT(dc + (i >> 24 & 0xff), t) << 24; o |= (INT_64)LIMIT(dc + (i >> 16 & 0xff), t) << 16; o |= (INT_64)LIMIT(dc + (i >> 8 & 0xff), t) << 8; o |= (INT_64)LIMIT(dc + (i & 0xff), t); *(INT_64*)out = o;#else u_int o = 0; u_int i = *(u_int*)in; SPLICE(o, LIMIT(dc + EXTRACT(i, 24), t), 24); SPLICE(o, LIMIT(dc + EXTRACT(i, 16), t), 16); SPLICE(o, LIMIT(dc + EXTRACT(i, 8), t), 8); SPLICE(o, LIMIT(dc + EXTRACT(i, 0), t), 0); *(u_int*)out = o; o = 0; i = *(u_int*)(in + 4); SPLICE(o, LIMIT(dc + EXTRACT(i, 24), t), 24); SPLICE(o, LIMIT(dc + EXTRACT(i, 16), t), 16); SPLICE(o, LIMIT(dc + EXTRACT(i, 8), t), 8); SPLICE(o, LIMIT(dc + EXTRACT(i, 0), t), 0); *(u_int*)(out + 4) = o;#endif in += stride; out += stride; }}void dcsum2(int dc, u_char* in, u_char* out, int stride){ for (int k = 8; --k >= 0; ) { int t; u_int o = 0; SPLICE(o, LIMIT(dc + in[0], t), 24); SPLICE(o, LIMIT(dc + in[1], t), 16); SPLICE(o, LIMIT(dc + in[2], t), 8); SPLICE(o, LIMIT(dc + in[3], t), 0); *(u_int*)out = o; o = 0; SPLICE(o, LIMIT(dc + in[4], t), 24); SPLICE(o, LIMIT(dc + in[5], t), 16); SPLICE(o, LIMIT(dc + in[6], t), 8); SPLICE(o, LIMIT(dc + in[7], t), 0); *(u_int*)(out + 4) = o; in += stride;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -