📄 flot8.c
字号:
/*
* Routines for fast DCT and lapped transforms for images
*
* H. S. Malvar, R. L. de Queiroz, and E. M. Rubino, June '93
*
* Refs: - Malvar, Signal Processing with Lapped Transforms, book, 1992.
* - Malvar, LBT/HLBT paper, IEEE Trans. SP, Apr. 1998.
*
* History:
*
* 06/93 - HSM, RLQ, EMR, first version
* 06/97 - HSM, with LBT and HLBT
* 10/00 - HSM, public release
*
* Disclaimer: use these programs at your own risk; no guarantees!
* The code is reasonably fast but not optimized for memory allocation or speed.
*
* Have fun with lapped transforms!
*
* PS1: this software is (c) 1993-2000 H. S. Malvar, all rights reserved.
* You may use this code or derivatives at will, but please quote the source ...
*
* PS2: this program is "postcardware"; if you decide to use it I do charge a
* license fee: you should send me a postcard of the city in which you work :-)
* please send it to: H. Malvar, Microsoft Research, One Microsoft Way,
* Redmond, WA 98052, USA.
*/
#include <stdlib.h>
#include <stdio.h>
#include "flot8.h"
#define HALF 0.5f
#define TWO 2.0f
#define ZERO 0.0f
#define SQTWO 1.4142135623731
#define SQHALF 0.70710678118655
/*
* ISQ2 = 1/sqrt(2)
* Cipj and Sipj stand for cosine and sine of (i*PI/j)
* All DCT coef. should be multiplied by 1/2. In order to save time
* this scale factor was already multiplied by some weighting factors
* used through DCT forward and inverse.
*/
#define ISQ2 7.07106781186548e-01 * 0.5
#define C1p8 9.23879532511287e-01 * 0.5
#define S1p8 3.82683432365090e-01 * 0.5
#define C1p16 9.80785280403230e-01
#define C5p16 5.55570233019602e-01
#define S1p16 1.95090322016128e-01
#define S5p16 8.31469612302545e-01
#define CC1p8 9.23879532511287e-01 * 0.70710678118655
#define SS1p8 3.82683432365090e-01 * 0.70710678118655
/* --------- PART 1: SUPPORT ROUTINES --------- */
/* Conversion of float to int with rounding */
static int round(float x)
{
if (x > ZERO)
return((int) (x + HALF));
else
return((int) (x - HALF));
}
/* Memory allocation for integer vectors */
static int16 *allocint(int Nelms)
{
int16 *p;
p = (int16 *) calloc(Nelms, sizeof(int16));
if (p == NULL) {
printf("Out of memory for transforms\n");
exit(1);
}
return(p);
}
/* Memory allocation for float vectors */
static float *allocfloat(int Nelms)
{
float *p;
p = (float *) calloc(Nelms, sizeof(float));
if (p == NULL) {
printf("Out of memory for transforms\n");
exit(1);
}
return(p);
}
/* DCT-III */
static void dctIII4(float *y)
{
float x[4];
/* 1st butterfly stage */
x[0] = (y[0] + y[2]) * HALF;
x[1] = (y[0] - y[2]) * HALF;
x[2] = SS1p8 * y[1] - CC1p8 * y[3];
x[3] = SS1p8 * y[3] + CC1p8 * y[1];
/* 2nd butterfly stage */
y[0] = x[0] + x[3];
y[1] = x[1] + x[2];
y[2] = x[1] - x[2];
y[3] = x[0] - x[3];
}
static void dctIII8(float *y)
{
float x[8];
/* 1st butterfly stage */
x[0] = (y[0] + y[4]) * ISQ2;
x[1] = (y[0] - y[4]) * ISQ2;
x[2] = y[2] * S1p8 - y[6] * C1p8;
x[3] = y[6] * S1p8 + y[2] * C1p8;
x[4] = y[1] * HALF;
x[5] = y[7] * HALF;
x[6] = (y[3] + y[5]) * ISQ2;
x[7] = (y[3] - y[5]) * ISQ2;
/* 2nd butterfly stage */
y[0] = x[0] + x[3];
y[1] = x[1] + x[2];
y[2] = x[1] - x[2];
y[3] = x[0] - x[3];
y[4] = x[4] + x[6];
y[5] = x[5] + x[7];
y[6] = x[4] - x[6];
y[7] = x[5] - x[7];
/* 3rd butterfly stage */
x[7] = y[4] * C1p16 + y[5] * S1p16;
x[4] = y[4] * S1p16 - y[5] * C1p16;
x[5] = y[6] * C5p16 + y[7] * S5p16;
x[6] = y[6] * S5p16 - y[7] * C5p16;
/* 4th butterfly stage */
y[7] = y[0] - x[7];
y[0] = y[0] + x[7];
y[6] = y[1] - x[6];
y[1] = y[1] + x[6];
y[5] = y[2] - x[5];
y[2] = y[2] + x[5];
y[4] = y[3] - x[4];
y[3] = y[3] + x[4];
}
/* DCT-II */
static void dctII4(float *y)
{
float x[4];
/* 1st butterfly stage */
x[0] = y[0] + y[3];
x[1] = y[1] + y[2];
x[2] = y[1] - y[2];
x[3] = y[0] - y[3];
/* 2nd butterfly stage */
y[0] = (x[0] + x[1]) * HALF;
y[2] = (x[0] - x[1]) * HALF;
y[1] = SS1p8 * x[2] + CC1p8 * x[3];
y[3] = SS1p8 * x[3] - CC1p8 * x[2];
}
static void dctII8(float *y)
{
float x[8];
float f;
/* 1st butterfly stage */
x[0] = y[0] + y[7];
x[1] = y[1] + y[6];
x[2] = y[2] + y[5];
x[3] = y[3] + y[4];
x[4] = y[0] - y[7];
x[5] = y[3] - y[4];
x[6] = y[1] - y[6];
x[7] = y[2] - y[5];
/* 2nd butterfly stage */
f = (x[0] + x[3]) * ISQ2;
x[3] = x[0] - x[3];
x[0] = f;
f = (x[1] + x[2]) * ISQ2;
x[2] = x[1] - x[2];
x[1] = f;
y[4] = x[4] * HALF;
y[5] = x[5] * HALF;
y[6] = (x[6] + x[7]) * ISQ2;
y[7] = (x[6] - x[7]) * ISQ2;
/* 3rd butterfly stage */
x[4] = y[4] + y[6];
x[5] = y[5] + y[7];
x[6] = y[4] - y[6];
x[7] = y[5] - y[7];
/* 4th butterfly stage */
y[0] = x[0] + x[1];
y[4] = x[0] - x[1];
y[6] = x[3] * S1p8 - x[2] * C1p8;
y[2] = x[2] * S1p8 + x[3] * C1p8;
y[1] = x[4] * C1p16 + x[5] * S1p16;
y[7] = x[4] * S1p16 - x[5] * C1p16;
y[5] = x[6] * C5p16 + x[7] * S5p16;
y[3] = x[6] * S5p16 - x[7] * C5p16;
}
/* Direct DCT on a vector of blocks */
static void dctvec(
int16 *x, /* vector of ints containing the i/o data */
int xlen) /* length of vector (multiple of eight) */
{
int16 *endp;
float y[8];
/* Loop of nofblocks DCT's */
endp = x + xlen;
while (x < endp) {
y[0] = (float) x[0];
y[1] = (float) x[1];
y[2] = (float) x[2];
y[3] = (float) x[3];
y[4] = (float) x[4];
y[5] = (float) x[5];
y[6] = (float) x[6];
y[7] = (float) x[7];
dctII8(y);
x[0] = round(y[0]);
x[1] = round(y[1]);
x[2] = round(y[2]);
x[3] = round(y[3]);
x[4] = round(y[4]);
x[5] = round(y[5]);
x[6] = round(y[6]);
x[7] = round(y[7]);
x+=8;
}
}
/* Inverse DCT on a vector of blocks */
static void idctvec(
int16 *x, /* vector of ints containing the i/o data */
int xlen) /* length of vector (multiple of eight) */
{
int16 *endp;
float y[8];
/* Loop of nofblocks DCT's */
endp = x + xlen;
while (x < endp) {
y[0] = (float) x[0];
y[1] = (float) x[1];
y[2] = (float) x[2];
y[3] = (float) x[3];
y[4] = (float) x[4];
y[5] = (float) x[5];
y[6] = (float) x[6];
y[7] = (float) x[7];
dctIII8(y);
x[0] = round(y[0]);
x[1] = round(y[1]);
x[2] = round(y[2]);
x[3] = round(y[3]);
x[4] = round(y[4]);
x[5] = round(y[5]);
x[6] = round(y[6]);
x[7] = round(y[7]);
x+=8;
}
}
/* Direct LOT on a vector of blocks */
#define C0 0.89802757576062
#define C1 0.86074202700394
#define C2 0.87630668004386
#define S0 0.43993916985592
#define S1 0.50904141575037
#define S2 0.48175367410172
static void lotvec(
int16 *x, /* vector of ints containing the i/o data */
int xlen, /* length of vector (multiple of eight) */
float *y, /* working vector, length = xlen + 8 */
float *z, /* working vector, length = xlen + 8 */
int type) /* LOT or LBT */
{
float *p1, *p2, *p3, *p4;
int16 *q1, *q2;
float f;
int i;
/* Copy x onto y and fold borders */
q1 = x;
p1 = y + 4;
p3 = p1 + xlen;
while (p1 < p3) {
*p1++ = *q1++;
}
p1 = p2 = y + 4;
p3 = p4 = p1 + xlen;
for (i = 4; i ; i--) {
*(--p2) = *(p1++);
*(p4++) = *(--p3);
}
/* Stages of DCTs */
p1 = y;
p2 = y + xlen + 8;
while (p1 < p2) {
dctII8(p1);
/* for LBT, scale first odd coefficient */
if (type == LBT)
p1[1] *= SQTWO;
p1 += 8;
}
/* 1st stage of +1/-1 butterflies */
z[0] = y[0];
z[1] = y[2];
z[2] = y[4];
z[3] = y[6];
p1 = z + 4;
p2 = z + xlen - 4;
p3 = y + 8;
while(p1 < p2) {
p1[0] = p3[0] + p3[1];
p1[1] = p3[2] + p3[3];
p1[2] = p3[4] + p3[5];
p1[3] = p3[6] + p3[7];
p1[4] = p3[0] - p3[1];
p1[5] = p3[2] - p3[3];
p1[6] = p3[4] - p3[5];
p1[7] = p3[6] - p3[7];
p1 += 8;
p3 += 8;
}
p1[0] = p3[0];
p1[1] = p3[2];
p1[2] = p3[4];
p1[3] = p3[6];
/* 2nd stage of +1/-1 butterflies */
p1 = y;
p2 = y + xlen;
p3 = z;
while(p1 < p2) {
p1[0] = p3[0] + p3[4];
p1[1] = p3[1] + p3[5];
p1[2] = p3[2] + p3[6];
p1[3] = p3[3] + p3[7];
p1[4] = p3[0] - p3[4];
p1[5] = p3[1] - p3[5];
p1[6] = p3[2] - p3[6];
p1[7] = p3[3] - p3[7];
p1 += 8;
p3 += 8;
}
/* 3rd stage : plane rotations */
q1 = x;
q2 = x + xlen;
p3 = y;
while(q1 < q2) {
q1[0] = round(HALF * p3[0]);
q1[2] = round(HALF * p3[1]);
q1[4] = round(HALF * p3[2]);
q1[6] = round(HALF * p3[3]);
q1[1] = round((p3[4] * C0 - p3[5] * S0) * HALF);
f = p3[4] * S0 + p3[5] * C0;
q1[3] = round((f * C1 - p3[6] * S1) * HALF);
f = f * S1 + p3[6] * C1;
q1[5] = round((f * C2 - p3[7] * S2) * HALF);
q1[7] = round((f * S2 + p3[7] * C2) * HALF);
q1 += 8;
p3 += 8;
}
}
/* Inverse LOT on a vector of blocks */
static void ilotvec(
int16 *x, /* vector of ints containing the i/o data */
int xlen, /* length of vector (multiple of eight) */
float *y, /* working vector, length = xlen + 8 */
float *z, /* working vector, length = xlen + 8 */
int type) /* LOT or LBT */
{
float *p1, *p2, *p3;
int16 *q1, *q3;
float f;
/* Plane rotations */
p1 = y;
p2 = y + xlen;
q3 = x;
while(p1 < p2) {
p1[0] = HALF * q3[0];
p1[1] = HALF * q3[2];
p1[2] = HALF * q3[4];
p1[3] = HALF * q3[6];
p1[7] = (q3[7] * C2 - q3[5] * S2) * HALF;
f = q3[5] * C2 + q3[7] * S2;
p1[6] = (f * C1 - q3[3] * S1) * HALF;
f = q3[3] * C1 + f * S1;
p1[5] = (f * C0 - q3[1] * S0) * HALF;
p1[4] = (f * S0 + q3[1] * C0) * HALF;
p1 += 8;
q3 += 8;
}
/* 2nd stage of +1/-1 butterflies */
p1 = z;
p2 = z + xlen;
p3 = y;
while(p1 < p2) {
p1[0] = p3[0] + p3[4];
p1[1] = p3[1] + p3[5];
p1[2] = p3[2] + p3[6];
p1[3] = p3[3] + p3[7];
p1[4] = p3[0] - p3[4];
p1[5] = p3[1] - p3[5];
p1[6] = p3[2] - p3[6];
p1[7] = p3[3] - p3[7];
p1 += 8;
p3 += 8;
}
/* 1st stage of +1/-1 butterflies */
y[0] = z[0] * TWO;
y[2] = z[1] * TWO;
y[4] = z[2] * TWO;
y[6] = z[3] * TWO;
y[1] = y[3] = y[5] = y[7] = ZERO;
p1 = y + 8;
p2 = y + xlen;
p3 = z + 4;
while(p1 < p2) {
p1[0] = p3[0] + p3[4];
p1[2] = p3[1] + p3[5];
p1[4] = p3[2] + p3[6];
p1[6] = p3[3] + p3[7];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -