⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 c_dotp_flt.c

📁 dsp6713开发板的许多例程.对入门特别有用
💻 C
字号:
/****************************************************************************/
/*         Copyright (C) 1996-2000 Texas Instruments Incorporated           */
/*                      All Rights Reserved                                 */
/*                                                                          */
/* C_DOTP_FLT.C - Floating point dot product example.                       */
/*                Example code from Programmer's Guide on optimizing C code.*/
/*                                                                          */
/****************************************************************************/
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>

#define FHI(a)    _itof(_hi(a))
#define FLO(a)    _itof(_lo(a))
#define THRESHOLD 0.001

float dotp1(float * restrict,  float * restrict,  int);
float dotp2(double * restrict, double * restrict, int);
float dotp3(double * restrict, double * restrict, int);

#pragma DATA_ALIGN(a2, 8)
#pragma DATA_ALIGN(b2, 8)

float a1[512], b1[512], a2[512], b2[512];
float ret1, ret2, ret3;

/****************************************************************************/
/* TOP LEVEL DRIVER FOR THE TEST.                                           */
/****************************************************************************/
int main()
{
    #pragma DATA_MEM_BANK(a3, 0);
    #pragma DATA_MEM_BANK(b3, 0);
    float a3[512], b3[512];
    clock_t t_overhead, t_start, t_stop;
    int i;

    /************************************************************************/
    /* INITIALIZE INPUT ARRAYS                                              */
    /************************************************************************/
    for (i = 0; i < 512; i++)
    {
        a1[i] = a2[i] = a3[i] = rand() * 0.000123;
        b1[i] = b2[i] = b3[i] = rand() * 0.000123;
    }

    /************************************************************************/
    /* COMPUTE THE OVERHEAD OF CALLING CLOCK TWICE TO GET TIMING INFO.      */
    /************************************************************************/
    t_start    = clock();
    t_stop     = clock();
    t_overhead = t_stop - t_start;

    /************************************************************************/
    /* TIME DOTP1                                                           */
    /************************************************************************/
    t_start = clock();
    ret1 = dotp1(a1, b1, 512);
    t_stop = clock();
    printf("DOTP1: %d cycles\n", t_stop - t_start - t_overhead);

    /************************************************************************/
    /* TIME DOTP2                                                           */
    /************************************************************************/
    t_start = clock();
    ret2 = dotp2((double *)a2, (double *)b2, 512);
    t_stop = clock();
    printf("DOTP2: %d cycles\n", t_stop - t_start - t_overhead);
    if (fabs(ret1 - ret2) > THRESHOLD) printf("Result failure dotp2()\n");
    else                               printf("Correct result dotp2()\n");

    /************************************************************************/
    /* TIME DOTP3                                                           */
    /************************************************************************/
    t_start = clock();
    ret3 = dotp3((double *)a3, (double *)b3, 512);
    t_stop = clock();
    printf("DOTP3: %d cycles\n", t_stop - t_start - t_overhead);
    if (fabs(ret2 - ret3) > THRESHOLD) printf("Result failure dotp3()\n");
    else                               printf("Correct result dotp3()\n");
}

/****************************************************************************/
/* DOTP1 - BASIC FORM.                                                      */
/****************************************************************************/
float dotp1(float a[restrict], float b[restrict], int N)
{
    int i;
    float sum = 0;

    for (i = 0; i < N; i++)
        sum += a[i] * b[i];

    return sum;
}

/****************************************************************************/
/* DOTP2 - USING INTRINSICS                                                 */
/****************************************************************************/
float dotp2(double a[restrict], double b[restrict], int N)
{
    int i;
    float sum0 = 0;
    float sum1 = 0;

    for (i = 0; i < N/2; i++)
    {
        sum0 += _itof(_hi(a[i])) * _itof(_hi(b[i]));
        sum1 += _itof(_lo(a[i])) * _itof(_lo(b[i]));
    }

    return sum0 + sum1;
}

/****************************************************************************/
/* DOTP3 - PEAK PERFORMANCE                                                 */
/****************************************************************************/
float dotp3(double a[restrict], double b[restrict], int N)
{
    int i;
    float sum0 = 0;
    float sum1 = 0;
    float sum2 = 0;
    float sum3 = 0;
    float sum4 = 0;
    float sum5 = 0;
    float sum6 = 0;
    float sum7 = 0;

    for (i = 0; i < N/2; i+= 4)
    {
        sum0 += FHI(a[i])   * FHI(b[i]);
        sum1 += FLO(a[i])   * FLO(b[i]);
        sum2 += FHI(a[i+1]) * FHI(b[i+1]);
        sum3 += FLO(a[i+1]) * FLO(b[i+1]);
        sum4 += FHI(a[i+2]) * FHI(b[i+2]);
        sum5 += FLO(a[i+2]) * FLO(b[i+2]);
        sum6 += FHI(a[i+3]) * FHI(b[i+3]);
        sum7 += FLO(a[i+3]) * FLO(b[i+3]);
    }

    sum0 += sum1;
    sum2 += sum3;
    sum4 += sum5;
    sum6 += sum7;
    sum0 += sum2;
    sum4 += sum6;

    return sum0 + sum4;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -