📄 trydct.cpp
字号:
// TryDCT.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include "Common.h"
#define LOOP (1048576)
inline void query_performance_count(__int64& c)
{
__asm
{
mov ecx, c
rdtsc
mov [ecx], eax
mov [ecx + 4], edx
}
}
int main(int argc, char* argv[])
{
#if 1
__declspec(align(16)) unsigned char buf[128], buf2[128];
__declspec(align(16)) short tmpbuf[64], preserve_dct[64];
// __declspec(align(16)) double co[64];
#else
__declspec(align(16)) unsigned char dummy;
unsigned char buf[128], buf2[128];
short tmpbuf[64], preserve_dct[64];
// double co[64];
#endif
// double tmpl[64];
int i, j;
__int64 n64Start, n64End;
__int64 n64BBmpegAAN = 0, n64BBmpegLLM = 0, n64FdctAM32 = 0, n64FdctMM32 = 0, n64FdctXvidMMX = 0, n64FdctXvidSSE2 = 0;
__int64 n64IdctAAN = 0, n64IdctChen = 0, n64IdctInt32 = 0, n64Idct8x8SSE = 0, n64IdctAP922FloatSSE = 0, n64IdctAP922FloatX87 = 0, n64IdctAP922HybridX87 = 0, n64IdctXvidMMX = 0, n64IdctXvidXMM = 0, n64IdctXvidSSE2 = 0;
__int64 n64SubPredMMX = 0, n64AddPredMMX = 0, n64SubPredSSE2 = 0;
Init();
srand((unsigned)time(NULL));
for ( j = 0; j < LOOP; j++ )
{
for ( i = 0; i < 64; i++ )
{
buf[i] = rand() * 255 / RAND_MAX;
buf2[i] = rand() * 255 / RAND_MAX;
}
// Standard_fDCT(buf, tmpl);
/*
query_performance_count(n64Start);
BBmpeg_AAN_fDct(buf, co);
query_performance_count(n64End);
n64BBmpegAAN += n64End - n64Start;
// printf("The max absolute error (BBmpeg_AAN_fDct): %E\n", CompareDCT(co, tmpl));
query_performance_count(n64Start);
BBmpeg_LLM_fDct(buf, co);
query_performance_count(n64End);
n64BBmpegLLM += n64End - n64Start;
// printf("The max absolute error (BBmpeg_LLM_fDct): %E\n", CompareDCT(co, tmpl));
for ( i = 0; i < 64; i++ )
tmpbuf[i] = (short)buf[i];
query_performance_count(n64Start);
fdct_am32(tmpbuf);
query_performance_count(n64End);
n64FdctAM32 += n64End - n64Start;
// for ( i = 0; i < 64; i++ )
// co[i] = (double)tmpbuf[i];
// printf("The max absolute error (fdct_am32): %E\n", CompareDCT(co, tmpl));
*/
for ( i = 0; i < 64; i++ )
tmpbuf[i] = (short)buf[i];
query_performance_count(n64Start);
fdct_mm32(tmpbuf);
query_performance_count(n64End);
n64FdctMM32 += n64End - n64Start;
// for ( i = 0; i < 64; i++ )
// co[i] = (double)tmpbuf[i];
// printf("The max absolute error (fdct_mm32): %E\n", CompareDCT(co, tmpl));
for ( i = 0; i < 64; i++ )
tmpbuf[i] = (short)buf[i];
query_performance_count(n64Start);
fdct_mmx(tmpbuf);
query_performance_count(n64End);
n64FdctXvidMMX += n64End - n64Start;
for ( i = 0; i < 64; i++ )
tmpbuf[i] = (short)buf[i];
query_performance_count(n64Start);
// fdct_sse2(tmpbuf);
query_performance_count(n64End);
n64FdctXvidSSE2 += n64End - n64Start;
memcpy(preserve_dct, tmpbuf, sizeof(tmpbuf));
query_performance_count(n64Start);
idct_aan(tmpbuf);
query_performance_count(n64End);
n64IdctAAN += n64End - n64Start;
memcpy(tmpbuf, preserve_dct, sizeof(tmpbuf));
query_performance_count(n64Start);
idct_chen(tmpbuf);
query_performance_count(n64End);
n64IdctChen += n64End - n64Start;
memcpy(tmpbuf, preserve_dct, sizeof(tmpbuf));
query_performance_count(n64Start);
idct_int32(tmpbuf);
query_performance_count(n64End);
n64IdctInt32 += n64End - n64Start;
memcpy(tmpbuf, preserve_dct, sizeof(tmpbuf));
query_performance_count(n64Start);
idct8x8_sse(tmpbuf);
query_performance_count(n64End);
n64Idct8x8SSE += n64End - n64Start;
/*
memcpy(tmpbuf, preserve_dct, sizeof(tmpbuf));
query_performance_count(n64Start);
idct_ap922float_x87(tmpbuf);
query_performance_count(n64End);
n64IdctAP922FloatX87 += n64End - n64Start;
memcpy(tmpbuf, preserve_dct, sizeof(tmpbuf));
query_performance_count(n64Start);
idct_ap922hybr_x87(tmpbuf);
query_performance_count(n64End);
n64IdctAP922HybridX87 += n64End - n64Start;
memcpy(tmpbuf, preserve_dct, sizeof(tmpbuf));
query_performance_count(n64Start);
idct_ap922float_sse(tmpbuf);
query_performance_count(n64End);
n64IdctAP922FloatSSE += n64End - n64Start;
*/
memcpy(tmpbuf, preserve_dct, sizeof(tmpbuf));
query_performance_count(n64Start);
idct_mmx(tmpbuf);
query_performance_count(n64End);
n64IdctXvidMMX += n64End - n64Start;
memcpy(tmpbuf, preserve_dct, sizeof(tmpbuf));
query_performance_count(n64Start);
idct_xmm(tmpbuf);
query_performance_count(n64End);
n64IdctXvidXMM += n64End - n64Start;
memcpy(tmpbuf, preserve_dct, sizeof(tmpbuf));
query_performance_count(n64Start);
// idct_sse2(tmpbuf);
query_performance_count(n64End);
n64IdctXvidSSE2 += n64End - n64Start;
query_performance_count(n64Start);
sub_pred_mmx(buf, buf2, 16, tmpbuf);
query_performance_count(n64End);
n64SubPredMMX += n64End - n64Start;
query_performance_count(n64Start);
add_pred_mmx(buf, buf2, 16, tmpbuf);
query_performance_count(n64End);
n64AddPredMMX += n64End - n64Start;
query_performance_count(n64Start);
sub_pred_sse2(buf, buf2, 16, tmpbuf);
query_performance_count(n64End);
n64SubPredSSE2 += n64End - n64Start;
}
// printf("n64BBmpegAAN = %I64d,\tn64BBmpegLLM = %I64d\n", n64BBmpegAAN / LOOP, n64BBmpegLLM / LOOP);
// printf("n64FdctAM32 = %I64d,\tn64FdctMM32 = %I64d\n", n64FdctAM32 / LOOP, n64FdctMM32 / LOOP);
printf("n64FdctMM32 = %I64d,\tn64FdctXvidMMX = %I64d\n", n64FdctMM32 / LOOP, n64FdctXvidMMX / LOOP);
printf("n64FdctXvidSSE2 = %I64d\n", n64FdctXvidSSE2 / LOOP);
printf("\n");
printf("n64IdctAAN = %I64d,\tn64IdctChen = %I64d\n", n64IdctAAN / LOOP, n64IdctChen / LOOP);
printf("n64IdctInt32 = %I64d,\tn64Idct8x8SSE = %I64d\n", n64IdctInt32 / LOOP, n64Idct8x8SSE / LOOP);
// printf("n64IdctAP922HybridX87 = %I64d,\tn64IdctAP922FloatX87 = %I64d\n", n64IdctAP922HybridX87 / LOOP, n64IdctAP922FloatX87 / LOOP);
// printf("n64IdctAP922FloatSSE = %I64d\n", n64IdctAP922FloatSSE / LOOP);
printf("n64IdctXvidMMX = %I64d,\tn64IdctXvidXMM = %I64d\n", n64IdctXvidMMX / LOOP, n64IdctXvidXMM / LOOP);
printf("n64IdctXvidSSE2 = %I64d\n", n64IdctXvidSSE2 / LOOP);
printf("\n");
printf("n64SubPredMMX = %I64d,\tn64AddPredMMX = %I64d\n", n64SubPredMMX / LOOP, n64AddPredMMX / LOOP);
printf("n64SubPredSSE2 = %I64d\n", n64SubPredSSE2 / LOOP);
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -