⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cxmatmul.cpp

📁 opencv库在TI DM6437上的移植,目前包括两个库cv.lib和cxcore.lib的工程
💻 CPP
📖 第 1 页 / 共 5 页
字号:
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                        Intel License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of Intel Corporation may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "_cxcore.h"

/****************************************************************************************\
*                                         cvGEMM                                         *
\****************************************************************************************/

icvBLAS_GEMM_32f_t icvBLAS_GEMM_32f_p = 0;
icvBLAS_GEMM_64f_t icvBLAS_GEMM_64f_p = 0;
icvBLAS_GEMM_32fc_t icvBLAS_GEMM_32fc_p = 0;
icvBLAS_GEMM_64fc_t icvBLAS_GEMM_64fc_p = 0;

static void
icvGEMM_CopyBlock( const uchar* src, int src_step,
                   uchar* dst, int dst_step,
                   CvSize size, int pix_size )
{
    int j;
    size.width = size.width * (pix_size / sizeof(int));

    for( ; size.height--; src += src_step, dst += dst_step )
    {
        for( j = 0; j <= size.width - 4; j += 4 )
        {
            int t0 = ((const int*)src)[j];
            int t1 = ((const int*)src)[j+1];
            ((int*)dst)[j] = t0;
            ((int*)dst)[j+1] = t1;
            t0 = ((const int*)src)[j+2];
            t1 = ((const int*)src)[j+3];
            ((int*)dst)[j+2] = t0;
            ((int*)dst)[j+3] = t1;
        }

        for( ; j < size.width; j++ )
            ((int*)dst)[j] = ((const int*)src)[j];
    }
}


static void
icvGEMM_TransposeBlock( const uchar* src, int src_step,
                        uchar* dst, int dst_step,
                        CvSize size, int pix_size )
{
    int i, j;
    for( i = 0; i < size.width; i++, dst += dst_step, src += pix_size )
    {
        const uchar* _src = src;
        switch( pix_size )
        {
        case sizeof(int):
            for( j = 0; j < size.height; j++, _src += src_step )
                ((int*)dst)[j] = ((int*)_src)[0];
            break;
        case sizeof(int)*2:
            for( j = 0; j < size.height*2; j += 2, _src += src_step )
            {
                int t0 = ((int*)_src)[0];
                int t1 = ((int*)_src)[1];
                ((int*)dst)[j] = t0;
                ((int*)dst)[j+1] = t1;
            }
            break;
        case sizeof(int)*4:
            for( j = 0; j < size.height*4; j += 4, _src += src_step )
            {
                int t0 = ((int*)_src)[0];
                int t1 = ((int*)_src)[1];
                ((int*)dst)[j] = t0;
                ((int*)dst)[j+1] = t1;
                t0 = ((int*)_src)[2];
                t1 = ((int*)_src)[3];
                ((int*)dst)[j+2] = t0;
                ((int*)dst)[j+3] = t1;
            }
            break;
        default:
            assert(0);
            return;
        }
    }
}

#define ICV_DEF_GEMM_SINGLE_MUL( flavor, arrtype, worktype )                \
static CvStatus CV_STDCALL                                                  \
icvGEMMSingleMul_##flavor( const arrtype* a_data, size_t a_step,            \
                         const arrtype* b_data, size_t b_step,              \
                         const arrtype* c_data, size_t c_step,              \
                         arrtype* d_data, size_t d_step,                    \
                         CvSize a_size, CvSize d_size,                      \
                         double alpha, double beta, int flags )             \
{                                                                           \
    int i, j, k, n = a_size.width, m = d_size.width, drows = d_size.height; \
    const arrtype *_a_data = a_data, *_b_data = b_data, *_c_data = c_data;  \
    arrtype* a_buf = 0;                                                     \
    size_t a_step0, a_step1, c_step0, c_step1, t_step;                      \
                                                                            \
    a_step /= sizeof(a_data[0]);                                            \
    b_step /= sizeof(b_data[0]);                                            \
    c_step /= sizeof(c_data[0]);                                            \
    d_step /= sizeof(d_data[0]);                                            \
    a_step0 = a_step;                                                       \
    a_step1 = 1;                                                            \
                                                                            \
    if( !c_data )                                                           \
        c_step0 = c_step1 = 0;                                              \
    else if( !(flags & CV_GEMM_C_T) )                                       \
        c_step0 = c_step, c_step1 = 1;                                      \
    else                                                                    \
        c_step0 = 1, c_step1 = c_step;                                      \
                                                                            \
    if( flags & CV_GEMM_A_T )                                               \
    {                                                                       \
        CV_SWAP( a_step0, a_step1, t_step );                                \
        n = a_size.height;                                                  \
        if( a_step > 1 && n > 1 )                                           \
            a_buf = (arrtype*)cvStackAlloc(n*sizeof(a_data[0]));            \
    }                                                                       \
                                                                            \
    if( n == 1 ) /* external product */                                     \
    {                                                                       \
        arrtype* b_buf = 0;                                                 \
                                                                            \
        if( a_step > 1 )                                                    \
        {                                                                   \
            a_buf = (arrtype*)cvStackAlloc(drows*sizeof(a_data[0]));        \
            for( k = 0; k < drows; k++ )                                    \
                a_buf[k] = a_data[a_step*k];                                \
            a_data = a_buf;                                                 \
        }                                                                   \
                                                                            \
        if( b_step > 1 )                                                    \
        {                                                                   \
            b_buf = (arrtype*)cvStackAlloc(d_size.width*sizeof(b_buf[0]) ); \
            for( j = 0; j < d_size.width; j++ )                             \
                b_buf[j] = b_data[j*b_step];                                \
            b_data = b_buf;                                                 \
        }                                                                   \
                                                                            \
        for( i = 0; i < drows; i++, _c_data += c_step0,                     \
                                    d_data += d_step )                      \
        {                                                                   \
            worktype al = worktype(a_data[i])*alpha;                        \
            c_data = _c_data;                                               \
            for( j = 0; j <= d_size.width - 2; j += 2, c_data += 2*c_step1 )\
            {                                                               \
                worktype s0 = al*b_data[j];                                 \
                worktype s1 = al*b_data[j+1];                               \
                if( !c_data )                                               \
                {                                                           \
                    d_data[j] = arrtype(s0);                                \
                    d_data[j+1] = arrtype(s1);                              \
                }                                                           \
                else                                                        \
                {                                                           \
                    d_data[j] = arrtype(s0 + c_data[0]*beta);               \
                    d_data[j+1] = arrtype(s1 + c_data[c_step1]*beta);       \
                }                                                           \
            }                                                               \
                                                                            \
            for( ; j < d_size.width; j++, c_data += c_step1 )               \
            {                                                               \
                worktype s0 = al*b_data[j];                                 \
                if( !c_data )                                               \
                    d_data[j] = arrtype(s0);                                \
                else                                                        \
                    d_data[j] = arrtype(s0 + c_data[0]*beta);               \
            }                                                               \
        }                                                                   \
    }                                                                       \
    else if( flags & CV_GEMM_B_T ) /* A * Bt */                             \
    {                                                                       \
        for( i = 0; i < drows; i++, _a_data += a_step0,                     \
                                    _c_data += c_step0,                     \
                                    d_data += d_step )                      \
        {                                                                   \
            a_data = _a_data;                                               \
            b_data = _b_data;                                               \
            c_data = _c_data;                                               \
                                                                            \
            if( a_buf )                                                     \
            {                                                               \
                for( k = 0; k < n; k++ )                                    \
                    a_buf[k] = a_data[a_step1*k];                           \
                a_data = a_buf;                                             \
            }                                                               \
                                                                            \
            for( j = 0; j < d_size.width; j++, b_data += b_step,            \
                                               c_data += c_step1 )          \
            {                                                               \
                worktype s0(0), s1(0), s2(0), s3(0);                        \
                                                                            \
                for( k = 0; k <= n - 4; k += 4 )                            \
                {                                                           \
                    s0 += worktype(a_data[k])*b_data[k];                    \
                    s1 += worktype(a_data[k+1])*b_data[k+1];                \
                    s2 += worktype(a_data[k+2])*b_data[k+2];                \
                    s3 += worktype(a_data[k+3])*b_data[k+3];                \
                }                                                           \
                                                                            \
                for( ; k < n; k++ )                                         \
                    s0 += worktype(a_data[k])*b_data[k];                    \
                s0 = (s0+s1+s2+s3)*alpha;                                   \
                                                                            \
                if( !c_data )                                               \
                    d_data[j] = arrtype(s0);                                \
                else                                                        \
                    d_data[j] = arrtype(s0 + c_data[0]*beta);               \
            }                                                               \
        }                                                                   \
    }                                                                       \
    else if( d_size.width*sizeof(d_data[0]) <= 1600 )                       \
    {                                                                       \
        for( i = 0; i < drows; i++, _a_data += a_step0,                     \

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -