dsp_mat_mul.h

来自「dm642函数库」· C头文件 代码 · 共 191 行

H
191
字号
/* ======================================================================== */
/*  TEXAS INSTRUMENTS, INC.                                                 */
/*                                                                          */
/*  DSPLIB  DSP Signal Processing Library                                   */
/*                                                                          */
/*      Release:        Revision 1.04b                                      */
/*      CVS Revision:   1.4     Sun Sep 29 03:32:25 2002 (UTC)              */
/*      Snapshot date:  23-Oct-2003                                         */
/*                                                                          */
/*  This library contains proprietary intellectual property of Texas        */
/*  Instruments, Inc.  The library and its source code are protected by     */
/*  various copyrights, and portions may also be protected by patents or    */
/*  other legal protections.                                                */
/*                                                                          */
/*  This software is licensed for use with Texas Instruments TMS320         */
/*  family DSPs.  This license was provided to you prior to installing      */
/*  the software.  You may review this license by consulting the file       */
/*  TI_license.PDF which accompanies the files in this library.             */
/* ------------------------------------------------------------------------ */
/*          Copyright (C) 2003 Texas Instruments, Incorporated.             */
/*                          All Rights Reserved.                            */
/* ======================================================================== */
/* ======================================================================== */
/*  Assembler compatibility shim for assembling 4.30 and later code on      */
/*  tools prior to 4.30.                                                    */
/* ======================================================================== */
/* ======================================================================== */
/*  End of assembler compatibility shim.                                    */
/* ======================================================================== */
/* ======================================================================== */
/*  TEXAS INSTRUMENTS, INC.                                                 */
/*                                                                          */
/*  NAME                                                                    */
/*      DSP_mat_mul -- Matrix Multiply, Little Endian                       */
/*                                                                          */
/*   REVISION DATE                                                          */
/*       10-Feb-2002                                                        */
/*                                                                          */
/*  USAGE                                                                   */
/*      This routine is C-callable and can be called as:                    */
/*                                                                          */
/*          void DSP_mat_mul                                                */
/*          (                                                               */
/*              const short *restrict x, int r1, int c1,                    */
/*              const short *restrict y,         int c2,                    */
/*              short       *restrict r,                                    */
/*              int                   qs                                    */
/*          );                                                              */
/*                                                                          */
/*      x  == Pointer to r1 by c1 input matrix.                             */
/*      y  == Pointer to c1 by c2 input matrix.                             */
/*      r  == Pointer to r1 by c2 output matrix.                            */
/*                                                                          */
/*      r1 == Number of rows in x.                                          */
/*      c1 == Number of columns in x.  Also number of rows in y.            */
/*      c2 == Number of columns in y.                                       */
/*                                                                          */
/*      qs == Final right-shift to apply to the result.                     */
/*                                                                          */
/*  DESCRIPTION                                                             */
/*      This function computes the expression "r = x * y" for the matrices  */
/*      x and y.  The columnar dimension of x must match the row dimension  */
/*      of y.  The resulting matrix has the same number of rows as x and    */
/*      the same number of columns as y.                                    */
/*                                                                          */
/*      The values stored in the matrices are assumed to be fixed-point     */
/*      or integer values.  All intermediate sums are retained to 32-bit    */
/*      precision.  No rounding or overflow checking is performed.  The     */
/*      results are right-shifted by the user-specified amount, and then    */
/*      truncated to 16 bits.                                               */
/*                                                                          */
/*      This code is suitable for dense matrices.  No optimizations are     */
/*      made for sparse matrices.                                           */
/*                                                                          */
/*      The following is a C description of the algorithm.  The assembly    */
/*      code may place restrictions on the inputs that the C code version   */
/*      does not.  These restrictions are noted under ASSUMPTIONS below.    */
/*                                                                          */
/*      void DSP_mat_mul                                                    */
/*      (                                                                   */
/*          const short *restrict x, int r1, int c1,                        */
/*          const short *restrict y,         int c2,                        */
/*          short       *restrict r,                                        */
/*          int                   qs                                        */
/*      )                                                                   */
/*      {                                                                   */
/*          int i, j, k;                                                    */
/*          int sum;                                                        */
/*                                                                          */
/*          // ---------------------------------------------------- //      */
/*          //  Multiply each row in x by each column in y.  The    //      */
/*          //  product of row m in x and column n in y is placed   //      */
/*          //  in position (m,n) in the result.                    //      */
/*          // ---------------------------------------------------- //      */
/*          for (i = 0; i < r1; i++)                                        */
/*              for (j = 0; j < c2; j++)                                    */
/*              {                                                           */
/*                  sum = 0;                                                */
/*                                                                          */
/*                  for (k = 0; k < c1; k++)                                */
/*                      sum += x[k + i*c1] * y[j + k*c2];                   */
/*                                                                          */
/*                  r[j + i*c2] = sum >> qs;                                */
/*              }                                                           */
/*      }                                                                   */
/*                                                                          */
/*  ASSUMPTIONS                                                             */
/*      The arrays 'x', 'y', and 'r' are stored in distinct arrays.  That   */
/*      is, in-place processing is not allowed.                             */
/*                                                                          */
/*      The input matrices have minimum dimensions of at least 1 row and    */
/*      1 column, and maximum dimensions of 32767 rows and 32767 columns.   */
/*                                                                          */
/*  TECHNIQUES                                                              */
/*      The 'i' loop and 'k' loops are unrolled 2x.  The 'j' loop is        */
/*      unrolled 4x.  For dimensions that are not multiples of the          */
/*      various loops' unroll factors, this code calculates extra results   */
/*      beyond the edges of the matrix.  These extra results are            */
/*      ultimately discarded.  This allows the loops to be unrolled for     */
/*      efficient operation on large matrices while not losing              */
/*      flexibility.                                                        */
/*                                                                          */
/*      The outer two levels of loop nest are collapsed, further reducing   */
/*      the overhead of the looping structure.                              */
/*                                                                          */
/*  NOTES                                                                   */
/*      This code blocks interrupts during its innermost loop.  Interrupts  */
/*      are not blocked otherwise.  As a result, interrupts can be blocked  */
/*      for up to 0.25*c1' + 16 cycles at a time.                           */
/*                                                                          */
/*      When calculating the loop trip counts, the values of r1 and c1      */
/*      are rounded up to the next even value.  The value of c2 is          */
/*      rounded up to the next multiple of 4.  This does not affect         */
/*      the memory layout of the input or output matrices.                  */
/*                                                                          */
/*  MEMORY NOTE                                                             */
/*      The load instructions in the inner loop are predicated to avoid     */
/*      significant over-fetching on the matrices.  However, since the      */
/*      outer loops are unrolled, this code may fetch approximately one     */
/*      full row beyond the end of the 'x' matrix and approximately one     */
/*      double-word beyond the end of the 'y' matrix.  The values read      */
/*      are discarded and do not affect the results of the computation.     */
/*                                                                          */
/*      This code has no memory alignment requirements, as non-aligned      */
/*      loads are used for accessing the inputs, and individual STHs are    */
/*      used for writing the results.                                       */
/*                                                                          */
/*      This is a LITTLE ENDIAN implementation.                             */
/*                                                                          */
/*  CYCLES                                                                  */
/*      cycles = 0.25 * (r1'*c2'*c1') + 2.25 * (r1'*c2') + 11, where:       */
/*                                                                          */
/*          r1' = 2 * ceil(r1/2.0)   // r1 rounded up to next even          */
/*          c1' = 2 * ceil(c1/2.0)   // c1 rounded up to next even          */
/*          c2' = 4 * ceil(c2/4.0);  // c2 rounded up to next mult of 4     */
/*                                                                          */
/*      For r1= 1, c1= 1, c2= 1,  cycles =    33.                           */
/*      For r1= 8, c1=20, c2= 8,  cycles =   475.                           */
/*      For r1=12, c1=14, c2=18,  cycles =  1391.                           */
/*      For r1=32, c1=32, c2=32,  cycles = 10507.                           */
/*                                                                          */
/*      The cycle count includes 6 cycles of function call overhead.  The   */
/*      exact overhead seen by a given application will depend on the       */
/*      compiler options used.                                              */
/*                                                                          */
/*  CODESIZE                                                                */
/*      416 bytes.                                                          */
/*                                                                          */
/* ------------------------------------------------------------------------ */
/*            Copyright (c) 2003 Texas Instruments, Incorporated.           */
/*                           All Rights Reserved.                           */
/* ======================================================================== */
#ifndef DSP_MAT_MUL_H_
#define DSP_MAT_MUL_H_ 1

void DSP_mat_mul
(
    const short *restrict x, int r1, int c1,
    const short *restrict y,         int c2,
    short       *restrict r,
    int                   qs
);

#endif
/* ======================================================================== */
/*  End of file:  dsp_mat_mul.h                                             */
/* ------------------------------------------------------------------------ */
/*            Copyright (c) 2003 Texas Instruments, Incorporated.           */
/*                           All Rights Reserved.                           */
/* ======================================================================== */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?