📄 c_vecsum.c
字号:
/****************************************************************************//* Copyright (C) 1996-2000 Texas Instruments Incorporated *//* All Rights Reserved *//* *//* C_VECSUM.C - Example code from Programmer's Guide on optimizing C code. *//* *//****************************************************************************/#include <stdio.h>#include <time.h>void vecsum1(short *, short *, short *, unsigned int);void vecsum2(short * restrict, short * restrict, short * restrict, unsigned int);void vecsum3(short * restrict, short * restrict, short * restrict, unsigned int);void vecsum4(short * restrict, short * restrict, short * restrict, unsigned int);void vecsum5(short * restrict, short * restrict, short * restrict, unsigned int);void vecsum6(int *restrict, int *restrict, int * restrict, unsigned int);unsigned int n = 44;short a[] = {0x08D6, 0xF380, 0xF380, 0x0000, 0xF380, 0x0C80,0x08D6, 0xF380, 0x0C80, 0x0C80, 0xF380, 0xF380,0xF729, 0x0C80, 0x0C80, 0x0000, 0xF380, 0xF380,0xF729, 0x0C80, 0xF380, 0xF380, 0x0C80, 0x0C80,0x08D6, 0xF380, 0xF380, 0x0000, 0xF380, 0x0C80,0x08D6, 0xF380, 0x0C80, 0x0C80, 0xF380, 0xF380,0xF729, 0x0C80, 0x0C80, 0x0000, 0xF380, 0xF380,0xF729, 0x0C80 };short b[] = {0x08D6, 0xF380, 0xF380, 0x0000, 0xF380, 0x0C80,0x08D6, 0xF380, 0x0C80, 0x0C80, 0xF380, 0xF380,0xF729, 0x0C80, 0x0C80, 0x0000, 0xF380, 0xF380,0xF729, 0x0C80, 0xF380, 0xF380, 0x0C80, 0x0C80,0x08D6, 0xF380, 0xF380, 0x0000, 0x0C80, 0x0C80,0x08D6, 0xF380, 0x0C80, 0x0C80, 0xF380, 0xF380,0xF729, 0x0C80, 0x0C80, 0x0000, 0xF380, 0xF380,0xF729, 0x0C80 };short c1[44];short c[44];/****************************************************************************//* TOP LEVEL DRIVER FOR THE TEST. *//****************************************************************************/int main(){ clock_t t_overhead, t_start, t_stop; /************************************************************************/ /* COMPUTE THE OVERHEAD OF CALLING CLOCK TWICE TO GET TIMING INFO. */ /************************************************************************/ t_start = clock(); t_stop = clock(); t_overhead = t_stop - t_start; /************************************************************************/ /* TIME VECSUM1. */ /************************************************************************/ t_start = clock(); vecsum1(c1, a, b, n); t_stop = clock(); printf("VECSUM1: %d cycles\n", t_stop - t_start - t_overhead); /************************************************************************/ /* TIME VECSUM2. */ /************************************************************************/ t_start = clock(); vecsum2(c, a, b, n); t_stop = clock(); printf("VECSUM2: %d cycles\n", t_stop - t_start - t_overhead); if (memcmp(c1, c, sizeof(c)) != 0) printf("Result failure vecsum2()\n"); else printf("Correct result vecsum2()\n"); /************************************************************************/ /* TIME VECSUM3. */ /************************************************************************/ t_start = clock(); vecsum3(c, a, b, n); t_stop = clock(); printf("VECSUM3: %d cycles\n", t_stop - t_start - t_overhead); if (memcmp(c1, c, sizeof(c)) != 0) printf("Result failure vecsum3()\n"); else printf("Correct result vecsum3()\n"); /************************************************************************/ /* TIME VECSUM4. */ /************************************************************************/ t_start = clock(); vecsum4(c, a, b, n); t_stop = clock(); printf("VECSUM4: %d cycles\n", t_stop - t_start - t_overhead); if (memcmp(c1, c, sizeof(c)) != 0) printf("Result failure vecsum4()\n"); else printf("Correct result vecsum4()\n"); /************************************************************************/ /* TIME VECSUM5. */ /************************************************************************/ t_start = clock(); vecsum5(c, a, b, n); t_stop = clock(); printf("VECSUM5: %d cycles\n", t_stop - t_start - t_overhead); if (memcmp(c1, c, sizeof(c)) != 0) printf("Result failure vecsum5()\n"); else printf("Correct result vecsum5()\n"); /************************************************************************/ /* TIME VECSUM6. */ /************************************************************************/ t_start = clock(); vecsum6((int*)c, (int*)a, (int*)b, n); t_stop = clock(); printf("VECSUM6: %d cycles\n", t_stop - t_start - t_overhead); if (memcmp(c1, c, sizeof(c)) != 0) printf("Result failure vecsum6()\n"); else printf("Correct result vecsum6()\n");}/****************************************************************************//* BASIC VECSUM *//****************************************************************************/void vecsum1(short *sum, short *in1, short *in2, unsigned int N){ int i; for (i = 0; i < N; i++) sum[i] = in1[i] + in2[i];}/****************************************************************************//* VECSUM WITH RESTRICT KEYWORDS *//****************************************************************************/void vecsum2(short * restrict sum, short * restrict in1, short * restrict in2, unsigned int N){ int i; for (i = 0; i < N; i++) sum[i] = in1[i] + in2[i];}/****************************************************************************//* VECSUM WITH RESTRICT KEYWORDS, MUST_ITERATE, WORD READS *//****************************************************************************/void vecsum3(short * restrict sum, short * restrict in1, short * restrict in2, unsigned int N){ int i; const int *i_in1 = (const int *)in1; const int *i_in2 = (const int *)in2; int *i_sum = (int *)sum; #pragma MUST_ITERATE(10) for (i = 0; i < (N/2); i++) i_sum[i] = _add2(i_in1[i], i_in2[i]);}/****************************************************************************//* VECSUM WITH RESTRICT KEYWORDS , MUST_ITERATE, WORD READS, GENERIC VERSION*//****************************************************************************/void vecsum4(short * restrict sum, short * restrict in1, short * restrict in2, unsigned int N){ int i; /* test to see if sum, in2, and in1 are aligned to a word boundary */ if (((int)sum | (int)in2 | (int)in1) & 0x2) { #pragma MUST_ITERATE(20) for (i = 0; i < N; i++) sum[i] = in1[i] + in2[i]; } else { const int *i_in1 = (const int *)in1; const int *i_in2 = (const int *)in2; int *i_sum = (int *)sum; #pragma MUST_ITERATE(10) for (i = 0; i < (N >> 1); i++) i_sum[i] = _add2(i_in1[i], i_in2[i]); if (N & 0x1) sum[i] = in1[i] + in2[i]; }}/****************************************************************************//* VECSUM WITH RESTRICT KEYWORDS , MUST_ITERATE, AND WORD READS *//****************************************************************************/void vecsum5(short * restrict sum, short * restrict in1, short *restrict in2, unsigned int N){ int i; _nassert(((int)sum & 0x3) == 0); _nassert(((int)in1 & 0x3) == 0); _nassert(((int)in2 & 0x3) == 0); #pragma MUST_ITERATE(40, , 2) for (i = 0; i < N; i++) sum[i] = in1[i] + in2[i];}/****************************************************************************//* VECSUM WITH RESTRICT KEYWORDS , MUST_ITERATE, WORD READS, AND UNROLLED *//****************************************************************************/void vecsum6(int * restrict sum, int * restrict in1, int * restrict in2, unsigned int N){ int i; int sz = N >> 2; #pragma MUST_ITERATE(10) for (i = 0; i < sz; i++) { sum[i] = _add2(in1[i] , in2[i]); sum[i + sz] = _add2(in1[i + sz], in2[i + sz]); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -