📄 img_mad_8x8.h
字号:
/* The arrays for the source image data and the result must be */
/* word aligned. */
/* */
/* MEMORY NOTE */
/* No bank conflicts occur for this kernel. */
/* */
/* No particular memory alignment is required for the reference */
/* image, although the source image data and result */
/* must be word aligned. */
/* */
/* NOTES */
/* The search window spans from (0,0) to (sx-1,sy-1), inclusive. */
/* */
/* The inner loop comprises 64 instructions that are executed in 8 */
/* cycles, with 64 absolute-differences accumulated in a single */
/* iteration. The source pixels are pre-read into registers and */
/* the inner loop is unrolled completely. This code thus executes */
/* 8 instructions per cycle, and computes 8 absolute differences */
/* per cycle. */
/* */
/* This benchmark is Little Endian. */
/* */
/* This code suppresses interrupts for the entire duration of the */
/* code, hence it is interrupt tolerant but not interruptible. */
/* */
/* C CODE */
/* The following is the equivalent C code. The assembly code */
/* has restrictions as noted under "ASSUMPTIONS". */
/* */
/* void IMG_mad_8x8 */
/* ( */
/* const unsigned char *restrict refImg, */
/* const unsigned char *restrict srcImg, */
/* int pitch, int sx, int sy, */
/* unsigned int *restrict match */
/* ) */
/* { */
/* int i, j, x, y, matx, maty; */
/* unsigned matpos, matval; */
/* */
/* matval = ~0U; */
/* matx = maty = 0; */
/* */
/* for (x = 0; x < sx; x++) */
/* for (y = 0; y < sy; y++) */
/* { */
/* unsigned acc = 0; */
/* */
/* for (i = 0; i < 8; i++) */
/* for (j = 0; j < 8; j++) */
/* acc += abs(srcImg[i*8 + j] - */
/* refImg[(i+y)*pitch + x + j]); */
/* */
/* if (acc < matval) */
/* { */
/* matval = acc; */
/* matx = x; */
/* maty = y; */
/* } */
/* } */
/* */
/* matpos = (0xffff0000 & (matx << 16)) | */
/* (0x0000ffff & maty); */
/* match[0] = matpos; */
/* match[1] = matval; */
/* } */
/* */
/* CYCLES */
/* sx = # of columns of the search space */
/* sy = # of rows of the search space */
/* cycles = 8 * sx * sy + 66. */
/* */
/* For sx= 4 and sy= 4, cycles = 194. */
/* For sx= 64 and sy= 32, cycles = 16450. */
/* */
/* Overall throughput is 7.97 pixels/cycle (0.126 cycles/pixel) */
/* over the whole function for the case H=64 and V=32. */
/* */
/* CODESIZE */
/* 788 bytes */
/* ------------------------------------------------------------------------ */
/* Copyright (c) 2003 Texas Instruments, Incorporated. */
/* All Rights Reserved. */
/* ======================================================================== */
#ifndef IMG_MAD_8X8_H_
#define IMG_MAD_8X8_H_ 1
void IMG_mad_8x8
(
const unsigned char *ref_data, /* Ref. image to search */
const unsigned char *src_data, /* Source 8x8 block */
int pitch, /* Width of ref image */
int sx, int sy, /* Search window size */
unsigned int *match /* Result */
);
#endif
/* ======================================================================== */
/* End of file: img_mad_8x8.h */
/* ------------------------------------------------------------------------ */
/* Copyright (c) 2003 Texas Instruments, Incorporated. */
/* All Rights Reserved. */
/* ======================================================================== */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -