flot8.c

来自「lapped transforms for images. lapped tra」· C语言代码 · 共 917 行 · 第 1/2 页
917 行
/* 
 * Routines for fast DCT and lapped transforms for images
 *
 * H. S. Malvar, R. L. de Queiroz, and E. M. Rubino, June '93
 *
 * Refs: - Malvar, Signal Processing with Lapped Transforms, book, 1992.
 *       - Malvar, LBT/HLBT paper, IEEE Trans. SP, Apr. 1998.
 *
 * History:
 *
 * 06/93 - HSM, RLQ, EMR, first version
 * 06/97 - HSM, with LBT and HLBT
 * 10/00 - HSM, public release
 *
 * Disclaimer: use these programs at your own risk; no guarantees!
 * The code is reasonably fast but not optimized for memory allocation or speed.
 *
 * Have fun with lapped transforms!
 *
 * PS1: this software is (c) 1993-2000 H. S. Malvar, all rights reserved.
 * You may use this code or derivatives at will, but please quote the source ...
 *
 * PS2: this program is "postcardware"; if you decide to use it I do charge a
 * license fee: you should send me a postcard of the city in which you work :-)
 * please send it to: H. Malvar, Microsoft Research, One Microsoft Way,
 * Redmond, WA 98052, USA.
 */

#include <stdlib.h>
#include <stdio.h>
#include "flot8.h"

#define HALF     0.5f
#define TWO      2.0f
#define ZERO     0.0f
#define SQTWO    1.4142135623731
#define SQHALF   0.70710678118655

/*
 *  ISQ2 = 1/sqrt(2)
 *  Cipj and Sipj stand for cosine and sine of (i*PI/j)
 *  All DCT coef. should be multiplied by 1/2. In order to save time
 *  this scale factor was already multiplied by some weighting factors
 *  used through DCT forward and inverse.
 */

#define ISQ2     7.07106781186548e-01 * 0.5
#define C1p8     9.23879532511287e-01 * 0.5
#define S1p8     3.82683432365090e-01 * 0.5
#define C1p16    9.80785280403230e-01
#define C5p16    5.55570233019602e-01
#define S1p16    1.95090322016128e-01
#define S5p16    8.31469612302545e-01

#define CC1p8    9.23879532511287e-01 * 0.70710678118655
#define SS1p8    3.82683432365090e-01 * 0.70710678118655

/* --------- PART 1:  SUPPORT ROUTINES --------- */

/* Conversion of float to int with rounding */

static int round(float x)
{
    if (x > ZERO)
        return((int) (x + HALF));
	else
        return((int) (x - HALF));
}

/* Memory allocation for integer vectors */

static int16 *allocint(int Nelms)
{
    int16   *p;

    p = (int16 *) calloc(Nelms, sizeof(int16));
    if (p == NULL) {
        printf("Out of memory for transforms\n");
        exit(1);
    }
    return(p);
}

/* Memory allocation for float vectors */

static float *allocfloat(int Nelms)
{
    float   *p;

    p = (float *) calloc(Nelms, sizeof(float));
    if (p == NULL) {
        printf("Out of memory for transforms\n");
        exit(1);
    }
    return(p);
}


/* DCT-III */

static void dctIII4(float *y)
{
    float x[4];

	/* 1st butterfly stage */
    x[0] = (y[0] + y[2]) * HALF;
    x[1] = (y[0] - y[2]) * HALF;
    x[2] = SS1p8 * y[1] - CC1p8 * y[3];
    x[3] = SS1p8 * y[3] + CC1p8 * y[1];

	/* 2nd butterfly stage */
    y[0] = x[0] + x[3];
    y[1] = x[1] + x[2];
    y[2] = x[1] - x[2];
    y[3] = x[0] - x[3];
}

static void dctIII8(float *y)
{
	float x[8];

	/* 1st butterfly stage */
	x[0] = (y[0] + y[4]) * ISQ2;
	x[1] = (y[0] - y[4]) * ISQ2;
	x[2] = y[2] * S1p8 - y[6] * C1p8;
	x[3] = y[6] * S1p8 + y[2] * C1p8;
	x[4] = y[1] * HALF;
	x[5] = y[7] * HALF;
	x[6] = (y[3] + y[5]) * ISQ2;
	x[7] = (y[3] - y[5]) * ISQ2;

	/* 2nd butterfly stage */
	y[0] = x[0] + x[3];
	y[1] = x[1] + x[2];
	y[2] = x[1] - x[2];
	y[3] = x[0] - x[3];
	y[4] = x[4] + x[6];
	y[5] = x[5] + x[7];
	y[6] = x[4] - x[6];
	y[7] = x[5] - x[7];

	/* 3rd butterfly stage */
    x[7] = y[4] * C1p16 + y[5] * S1p16;
	x[4] = y[4] * S1p16 - y[5] * C1p16;
	x[5] = y[6] * C5p16 + y[7] * S5p16;
	x[6] = y[6] * S5p16 - y[7] * C5p16;

	/* 4th butterfly stage */
	y[7] = y[0] - x[7];
	y[0] = y[0] + x[7];
	y[6] = y[1] - x[6];
	y[1] = y[1] + x[6];
	y[5] = y[2] - x[5];
	y[2] = y[2] + x[5];
	y[4] = y[3] - x[4];
	y[3] = y[3] + x[4];
}

/* DCT-II */

static void dctII4(float *y)
{
    float x[4];

	/* 1st butterfly stage */
    x[0] = y[0] + y[3];
    x[1] = y[1] + y[2];
    x[2] = y[1] - y[2];
    x[3] = y[0] - y[3];

	/* 2nd butterfly stage */
    y[0] = (x[0] + x[1]) * HALF;
    y[2] = (x[0] - x[1]) * HALF;
    y[1] = SS1p8 * x[2] + CC1p8 * x[3];
    y[3] = SS1p8 * x[3] - CC1p8 * x[2];
}

static void dctII8(float *y)
{
	float x[8];
	float f;

	/* 1st butterfly stage */
	x[0] = y[0] + y[7];
	x[1] = y[1] + y[6];
	x[2] = y[2] + y[5];
	x[3] = y[3] + y[4];
	x[4] = y[0] - y[7];
	x[5] = y[3] - y[4];
	x[6] = y[1] - y[6];
	x[7] = y[2] - y[5];

	/* 2nd butterfly stage */
	f = (x[0] + x[3]) * ISQ2;
	x[3] = x[0] - x[3];
	x[0] = f;
	f = (x[1] + x[2]) * ISQ2;
	x[2] = x[1] - x[2];
	x[1] = f;
    y[4] = x[4] * HALF;
    y[5] = x[5] * HALF;
	y[6] = (x[6] + x[7]) * ISQ2;
	y[7] = (x[6] - x[7]) * ISQ2;

	/* 3rd butterfly stage */
	x[4] = y[4] + y[6];
	x[5] = y[5] + y[7];
	x[6] = y[4] - y[6];
	x[7] = y[5] - y[7];

	/* 4th butterfly stage */
	y[0] = x[0] + x[1];
	y[4] = x[0] - x[1];
	y[6] = x[3] * S1p8 - x[2] * C1p8;
	y[2] = x[2] * S1p8 + x[3] * C1p8;
	y[1] = x[4] * C1p16 + x[5] * S1p16;
	y[7] = x[4] * S1p16 - x[5] * C1p16;
	y[5] = x[6] * C5p16 + x[7] * S5p16;
	y[3] = x[6] * S5p16 - x[7] * C5p16;
}


/* Direct DCT on a vector of blocks */

static void dctvec(
        int16   *x,     /* vector of ints containing the i/o data */
        int     xlen)   /* length of vector (multiple of eight) */
{
	int16 *endp;
	float y[8];

	/* Loop of nofblocks DCT's */
	endp = x + xlen;
    while (x < endp) {
		y[0] = (float) x[0];
		y[1] = (float) x[1];
		y[2] = (float) x[2];
		y[3] = (float) x[3];
		y[4] = (float) x[4];
		y[5] = (float) x[5];
		y[6] = (float) x[6];
		y[7] = (float) x[7];
		dctII8(y);
		x[0] = round(y[0]);
		x[1] = round(y[1]);
		x[2] = round(y[2]);
		x[3] = round(y[3]);
		x[4] = round(y[4]);
		x[5] = round(y[5]);
		x[6] = round(y[6]);
		x[7] = round(y[7]);
		x+=8;
	}
}

/* Inverse DCT on a vector of blocks */

static void idctvec(
        int16   *x,     /* vector of ints containing the i/o data */
        int     xlen)   /* length of vector (multiple of eight) */
{
	int16 *endp;
	float y[8];

	/* Loop of nofblocks DCT's */
	endp = x + xlen;
    while (x < endp) {
		y[0] = (float) x[0];
		y[1] = (float) x[1];
		y[2] = (float) x[2];
		y[3] = (float) x[3];
		y[4] = (float) x[4];
		y[5] = (float) x[5];
		y[6] = (float) x[6];
		y[7] = (float) x[7];
		dctIII8(y);
		x[0] = round(y[0]);
		x[1] = round(y[1]);
		x[2] = round(y[2]);
		x[3] = round(y[3]);
		x[4] = round(y[4]);
		x[5] = round(y[5]);
		x[6] = round(y[6]);
		x[7] = round(y[7]);
		x+=8;
	}
}

/* Direct LOT on a vector of blocks */

#define C0      0.89802757576062
#define C1      0.86074202700394
#define C2      0.87630668004386
#define S0      0.43993916985592
#define S1      0.50904141575037
#define S2      0.48175367410172

static void lotvec(
        int16   *x,     /* vector of ints containing the i/o data */
        int     xlen,   /* length of vector (multiple of eight) */
        float   *y,     /* working vector, length = xlen + 8 */
        float   *z,     /* working vector, length = xlen + 8 */
        int     type)   /* LOT or LBT */
{
	float   *p1, *p2, *p3, *p4;
    int16   *q1, *q2;
	float   f;
	int     i;

	/* Copy x onto y and fold borders */
    q1 = x;
    p1 = y + 4;
    p3 = p1 + xlen;
    while (p1 < p3) {
        *p1++ = *q1++;
    }
	p1 = p2 = y + 4;
	p3 = p4 = p1 + xlen;
    for (i = 4; i ; i--) {
		*(--p2) = *(p1++);
		*(p4++) = *(--p3);
	}

	/* Stages of DCTs */
	p1 = y;
	p2 = y + xlen + 8;
    while (p1 < p2) {
        dctII8(p1);
        /* for LBT, scale first odd coefficient */
        if (type == LBT)
            p1[1] *= SQTWO;
		p1 += 8;
	}

	/* 1st stage of +1/-1 butterflies */
    z[0] = y[0];
    z[1] = y[2];
    z[2] = y[4];
    z[3] = y[6];
    p1 = z + 4;
    p2 = z + xlen - 4;
	p3 = y + 8;
    while(p1 < p2) {
		p1[0] = p3[0] + p3[1];
		p1[1] = p3[2] + p3[3];
		p1[2] = p3[4] + p3[5];
		p1[3] = p3[6] + p3[7];
		p1[4] = p3[0] - p3[1];
		p1[5] = p3[2] - p3[3];
		p1[6] = p3[4] - p3[5];
		p1[7] = p3[6] - p3[7];
		p1 += 8;
		p3 += 8;
	}
	p1[0] = p3[0];
	p1[1] = p3[2];
	p1[2] = p3[4];
	p1[3] = p3[6];

	/* 2nd stage of +1/-1 butterflies */
	p1 = y;
	p2 = y + xlen;
    p3 = z;
    while(p1 < p2) {
		p1[0] = p3[0] + p3[4];
		p1[1] = p3[1] + p3[5];
		p1[2] = p3[2] + p3[6];
		p1[3] = p3[3] + p3[7];
		p1[4] = p3[0] - p3[4];
		p1[5] = p3[1] - p3[5];
		p1[6] = p3[2] - p3[6];
		p1[7] = p3[3] - p3[7];
		p1 += 8;
		p3 += 8;
	}

	/* 3rd stage : plane rotations */
    q1 = x;
    q2 = x + xlen;
	p3 = y;
    while(q1 < q2) {
        q1[0] = round(HALF * p3[0]);
        q1[2] = round(HALF * p3[1]);
        q1[4] = round(HALF * p3[2]);
        q1[6] = round(HALF * p3[3]);
        q1[1] = round((p3[4] * C0 - p3[5] * S0) * HALF);
        f     = p3[4] * S0 + p3[5] * C0;
        q1[3] = round((f * C1 - p3[6] * S1) * HALF);
        f     = f * S1 + p3[6] * C1;
        q1[5] = round((f * C2 - p3[7] * S2) * HALF);
        q1[7] = round((f * S2 + p3[7] * C2) * HALF);
        q1 += 8;
        p3 += 8;
	}
}

/* Inverse LOT on a vector of blocks */

static void ilotvec(
        int16   *x,     /* vector of ints containing the i/o data */
        int     xlen,   /* length of vector (multiple of eight) */
        float   *y,     /* working vector, length = xlen + 8 */
        float   *z,     /* working vector, length = xlen + 8 */
        int     type)   /* LOT or LBT */
{
	float   *p1, *p2, *p3;
    int16   *q1, *q3;
	float   f;

	/* Plane rotations */
	p1 = y;
	p2 = y + xlen;
    q3 = x;
    while(p1 < p2) {
        p1[0] = HALF * q3[0];
        p1[1] = HALF * q3[2];
        p1[2] = HALF * q3[4];
        p1[3] = HALF * q3[6];
        p1[7] = (q3[7] * C2 - q3[5] * S2) * HALF;
        f = q3[5] * C2 + q3[7] * S2;
        p1[6] = (f * C1 - q3[3] * S1) * HALF;
        f = q3[3] * C1 + f * S1;
        p1[5] = (f * C0 - q3[1] * S0) * HALF;
        p1[4] = (f * S0 + q3[1] * C0) * HALF;
		p1 += 8;
        q3 += 8;
	}

	/* 2nd stage of +1/-1 butterflies */
    p1 = z;
    p2 = z + xlen;
	p3 = y;
    while(p1 < p2) {
		p1[0] = p3[0] + p3[4];
		p1[1] = p3[1] + p3[5];
		p1[2] = p3[2] + p3[6];
		p1[3] = p3[3] + p3[7];
		p1[4] = p3[0] - p3[4];
		p1[5] = p3[1] - p3[5];
		p1[6] = p3[2] - p3[6];
		p1[7] = p3[3] - p3[7];
		p1 += 8;
		p3 += 8;
	}

	/* 1st stage of +1/-1 butterflies */
    y[0] = z[0] * TWO;
    y[2] = z[1] * TWO;
    y[4] = z[2] * TWO;
    y[6] = z[3] * TWO;
    y[1] = y[3] = y[5] = y[7] = ZERO;
	p1 = y + 8;
	p2 = y + xlen;
    p3 = z + 4;
    while(p1 < p2) {
		p1[0] = p3[0] + p3[4];
		p1[2] = p3[1] + p3[5];
		p1[4] = p3[2] + p3[6];
		p1[6] = p3[3] + p3[7];
flot8.c - 源码说明

本页面展示了「lapped transforms for images. lapped transform is a very fast algorithm to replace dct.」中的 flot8.c 源码文件，采用 C语言编程语言编写，共 917 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与lapped相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?