dmat_dmat_mult.hpp

来自「矩阵运算源码最新版本」· HPP 代码 · 共 1,095 行 · 第 1/3 页

HPP
1,095
字号
// Software License for MTL// // Copyright (c) 2007 The Trustees of Indiana University. All rights reserved.// Authors: Peter Gottschling and Andrew Lumsdaine// // This file is part of the Matrix Template Library// // See also license.mtl.txt in the distribution.#ifndef MTL_DMAT_DMAT_MULT_INCLUDE#define MTL_DMAT_DMAT_MULT_INCLUDE#include <boost/static_assert.hpp>#include <boost/type_traits.hpp>#include <boost/numeric/mtl/operation/set_to_zero.hpp>#include <boost/numeric/mtl/utility/range_generator.hpp>#include <boost/numeric/mtl/operation/cursor_pseudo_dot.hpp>#include <boost/numeric/mtl/operation/multi_action_block.hpp>#include <boost/numeric/mtl/operation/assign_mode.hpp>#include <boost/numeric/mtl/utility/tag.hpp>#include <boost/numeric/mtl/utility/glas_tag.hpp>#include <boost/numeric/mtl/utility/is_row_major.hpp>#include <boost/numeric/meta_math/loop.hpp>#include <boost/numeric/mtl/recursion/base_case_test.hpp>#include <boost/numeric/mtl/recursion/base_case_matrix.hpp>#include <boost/numeric/mtl/recursion/matrix_recursator.hpp>#include <boost/numeric/mtl/recursion/base_case_cast.hpp>#include <boost/numeric/mtl/interface/blas.hpp>#include <boost/numeric/mtl/matrix/dense2D.hpp>#include <boost/numeric/mtl/operation/print_matrix.hpp>#include <boost/numeric/mtl/operation/no_op.hpp>#include <iostream>#include <complex>namespace mtl {// =====================================// Generic matrix product with iterators// =====================================template <typename MatrixA, typename MatrixB, typename MatrixC, typename Assign= assign::assign_sum,	  typename Backup= no_op>     // To allow 5th parameter, is ignoredstruct gen_dmat_dmat_mult_ft{    void operator()(MatrixA const& a, MatrixB const& b, MatrixC& c)    {	using namespace tag;	using traits::range_generator;          typedef typename range_generator<row, MatrixA>::type       a_cur_type;                     typedef typename range_generator<row, MatrixC>::type       c_cur_type;             	typedef typename range_generator<col, MatrixB>::type       b_cur_type;                     typedef typename range_generator<iter::all, c_cur_type>::type   c_icur_type;                    typedef typename range_generator<const_iter::all, a_cur_type>::type  a_icur_type;                    typedef typename range_generator<const_iter::all, b_cur_type>::type  b_icur_type;          	if (Assign::init_to_zero) set_to_zero(c);	a_cur_type ac= begin<row>(a), aend= end<row>(a);	for (c_cur_type cc= begin<row>(c); ac != aend; ++ac, ++cc) {	    b_cur_type bc= begin<col>(b), bend= end<col>(b);	    for (c_icur_type cic= begin<iter::all>(cc); bc != bend; ++bc, ++cic) { 		    		typename MatrixC::value_type c_tmp(*cic);		a_icur_type aic= begin<const_iter::all>(ac), aiend= end<const_iter::all>(ac); 		for (b_icur_type bic= begin<const_iter::all>(bc); aic != aiend; ++aic, ++bic) {		    //std::cout << "aic " << *aic << ", bic " << *bic << '\n'; std::cout.flush();		    Assign::update(c_tmp, *aic * *bic);		}		*cic= c_tmp;	    }	}    }    };template <typename Assign= assign::assign_sum,	  typename Backup= no_op>     // To allow 2nd parameter, is ignoredstruct gen_dmat_dmat_mult_t{    template <typename MatrixA, typename MatrixB, typename MatrixC>    void operator()(MatrixA const& a, MatrixB const& b, MatrixC& c)    {	gen_dmat_dmat_mult_ft<MatrixA, MatrixB, MatrixC, Assign, Backup>()(a, b, c);    }};// =====================================================// Generic matrix product with cursors and property maps// =====================================================template <typename MatrixA, typename MatrixB, typename MatrixC, typename Assign= assign::assign_sum,	  typename Backup= no_op>     // To allow 5th parameter, is ignoredstruct gen_cursor_dmat_dmat_mult_ft{    void operator()(MatrixA const& a, MatrixB const& b, MatrixC& c)    {	typedef glas::tag::row                                          row;	typedef glas::tag::col                                          col;	typedef glas::tag::all                                          all;        typedef typename traits::const_value<MatrixA>::type                a_value_type;        typedef typename traits::const_value<MatrixB>::type                b_value_type;        typedef typename traits::value<MatrixC>::type                      c_value_type;        typedef typename traits::range_generator<row, MatrixA>::type     a_cur_type;        typedef typename traits::range_generator<row, MatrixC>::type     c_cur_type;                typedef typename traits::range_generator<col, MatrixB>::type     b_cur_type;        typedef typename traits::range_generator<all, c_cur_type>::type  c_icur_type;        typedef typename traits::range_generator<all, a_cur_type>::type  a_icur_type;        typedef typename traits::range_generator<all, b_cur_type>::type  b_icur_type;	if (Assign::init_to_zero) set_to_zero(c);	a_value_type   a_value(a);	b_value_type   b_value(b);	c_value_type   c_value(c);    			a_cur_type ac= begin<row>(a), aend= end<row>(a);	for (c_cur_type cc= begin<row>(c); ac != aend; ++ac, ++cc) {	    	    b_cur_type bc= begin<col>(b), bend= end<col>(b);	    for (c_icur_type cic= begin<all>(cc); bc != bend; ++bc, ++cic) { 				typename MatrixC::value_type c_tmp(c_value(*cic));		a_icur_type aic= begin<all>(ac), aiend= end<all>(ac); 		for (b_icur_type bic= begin<all>(bc); aic != aiend; ++aic, ++bic)		    Assign::update(c_tmp, a_value(*aic) * b_value(*bic));		c_value(*cic, c_tmp);	    }	}     }};template <typename Assign= assign::assign_sum,	  typename Backup= no_op>     // To allow 2nd parameter, is ignoredstruct gen_cursor_dmat_dmat_mult_t{    template <typename MatrixA, typename MatrixB, typename MatrixC>    void operator()(MatrixA const& a, MatrixB const& b, MatrixC& c)    {	gen_cursor_dmat_dmat_mult_ft<MatrixA, MatrixB, MatrixC, Assign, Backup>()(a, b, c);    }};/*Unrolling matrix product with dimensions that are not multiples of blocks1. Do with optimization:   C_nw += A_nw * B_nw   - wherby the matrix dimensions of sub-matrices are the largest multiples of block sizes      smaller or equal to the matrix dimensions of the original matrix2. Do without optimization   C_nw += A_ne * B_sw   C_ne += A_n * B_e   C_s += A_s * BThe inner loop can be unrolled arbitrarily. So, we can simplify1. Do with optimization:   C_nw += A_n * B_w   - wherby the matrix dimensions of sub-matrices are the largest multiples of block sizes      smaller or equal to the matrix dimensions of the original matrix2. Do with optimization only in inner loop   C_ne += A_n * B_e   C_s += A_s * B  */// =======================// Unrolled with iterators// required has_2D_layout// =======================// Define defaults if not yet given as Compiler flag#ifndef MTL_DMAT_DMAT_MULT_TILING1#  define MTL_DMAT_DMAT_MULT_TILING1 2#endif#ifndef MTL_DMAT_DMAT_MULT_TILING2#  define MTL_DMAT_DMAT_MULT_TILING2 4#endif#ifndef MTL_DMAT_DMAT_MULT_INNER_UNROLL#  define MTL_DMAT_DMAT_MULT_INNER_UNROLL 8#endiftemplate <unsigned long Index0, unsigned long Max0, unsigned long Index1, unsigned long Max1, typename Assign>struct gen_tiling_dmat_dmat_mult_block    : public meta_math::loop2<Index0, Max0, Index1, Max1>{    typedef meta_math::loop2<Index0, Max0, Index1, Max1>                              base;    typedef gen_tiling_dmat_dmat_mult_block<base::next_index0, Max0, base::next_index1, Max1, Assign>  next_t;    template <typename Value, typename ValueA, typename SizeA, typename ValueB, typename SizeB>    static inline void apply(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04, 			     Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09, 			     Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15, 			     ValueA *begin_a, SizeA& ari, ValueB *begin_b, SizeB& bci)    {	tmp00+= begin_a[ base::index0 * ari ] * begin_b[ base::index1 * bci ];	next_t::apply(tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09, 		      tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp00, 		      begin_a, ari, begin_b, bci);     }    template <typename Value, typename MatrixC, typename SizeC>    static inline void update(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04, 			      Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09, 			      Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15,			      MatrixC& c, SizeC i, SizeC k)    {	Assign::update(c(i + base::index0, k + base::index1), tmp00);	next_t::update(tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09, 		       tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp00, 		       c, i, k);    }};template <unsigned long Max0, unsigned long Max1, typename Assign>struct gen_tiling_dmat_dmat_mult_block<Max0, Max0, Max1, Max1, Assign>    : public meta_math::loop2<Max0, Max0, Max1, Max1>{    typedef meta_math::loop2<Max0, Max0, Max1, Max1>  base;    template <typename Value, typename ValueA, typename SizeA, typename ValueB, typename SizeB>    static inline void apply(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04, 			     Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09, 			     Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15, 			     ValueA *begin_a, SizeA& ari, ValueB *begin_b, SizeB& bci)    {	tmp00+= begin_a[ base::index0 * ari ] * begin_b[ base::index1 * bci ];    }    template <typename Value, typename MatrixC, typename SizeC>    static inline void update(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04, 			      Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09, 			      Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15,			      MatrixC& c, SizeC i, SizeC k)    {	Assign::update(c(i + base::index0, k + base::index1), tmp00);    }};template <typename MatrixA, typename MatrixB, typename MatrixC,	  unsigned long Tiling1= MTL_DMAT_DMAT_MULT_TILING1,	  unsigned long Tiling2= MTL_DMAT_DMAT_MULT_TILING2,	  typename Assign= assign::assign_sum, 	  typename Backup= gen_dmat_dmat_mult_t<Assign> >struct gen_tiling_dmat_dmat_mult_ft{    BOOST_STATIC_ASSERT((Tiling1 * Tiling2 <= 16));      void operator()(MatrixA const& a, MatrixB const& b, MatrixC& c)    {	apply(a, b, c, typename traits::category<MatrixA>::type(),	      typename traits::category<MatrixB>::type());    }    private:    void apply(MatrixA const& a, MatrixB const& b, MatrixC& c, tag::universe, tag::universe)    {	Backup()(a, b, c);    }#if MTL_OUTLINE_TILING_DMAT_DMAT_MULT_APPLY    void apply(MatrixA const& a, MatrixB const& b, MatrixC& c, tag::has_2D_layout, tag::has_2D_layout);#else    void apply(MatrixA const& a, MatrixB const& b, MatrixC& c, tag::has_2D_layout, tag::has_2D_layout)    {	// std::cout << "do unrolling\n";	if (Assign::init_to_zero) set_to_zero(c);	typedef gen_tiling_dmat_dmat_mult_block<1, Tiling1, 1, Tiling2, Assign>  block;	typedef typename MatrixC::size_type                                          size_type;	typedef typename MatrixC::value_type                                         value_type;	const value_type z= math::zero(c[0][0]);    // if this are matrices we need their size	size_type i_max= c.num_rows(), i_block= Tiling1 * (i_max / Tiling1),	          k_max= c.num_cols(), k_block= Tiling2 * (k_max / Tiling2);	size_t ari= &a(1, 0) - &a(0, 0), // how much is the offset of A's entry increased by incrementing row	       aci= &a(0, 1) - &a(0, 0), bri= &b(1, 0) - &b(0, 0), bci= &b(0, 1) - &b(0, 0);	    	// C_nw += A_nw * B_nw	for (size_type i= 0; i < i_block; i+= Tiling1)	    for (size_type k= 0; k < k_block; k+= Tiling2) {		value_type tmp00= z, tmp01= z, tmp02= z, tmp03= z, tmp04= z,                           tmp05= z, tmp06= z, tmp07= z, tmp08= z, tmp09= z, 		           tmp10= z, tmp11= z, tmp12= z, tmp13= z, tmp14= z, tmp15= z;		const typename MatrixA::value_type *begin_a= &a(i, 0), *end_a= &a(i, a.num_cols());		const typename MatrixB::value_type *begin_b= &b(0, k);		for (; begin_a != end_a; begin_a+= aci, begin_b+= bri)		    block::apply(tmp00, tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09, 				 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, 				 begin_a, ari, begin_b, bci); 		block::update(tmp00, tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09, 			      tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, 			      c, i, k);	    }	// C_ne += A_n * B_e	for (size_type i= 0; i < i_block; i++)	    for (int k = k_block; k < k_max; k++) {		value_type tmp00= z;		const typename MatrixA::value_type *begin_a= &a(i, 0), *end_a= &a(i, a.num_cols());		const typename MatrixB::value_type *begin_b= &b(0, k);		for (; begin_a != end_a; begin_a+= aci, begin_b+= bri)		    tmp00 += *begin_a * *begin_b;		Assign::update(c(i, k), tmp00);	    }	// C_s += A_s * B	for (size_type i= i_block; i < i_max; i++)	    for (int k = 0; k < k_max; k++) {		value_type tmp00= z;		const typename MatrixA::value_type *begin_a= &a(i, 0), *end_a= &a(i, a.num_cols());		const typename MatrixB::value_type *begin_b= &b(0, k);		for (; begin_a != end_a; begin_a+= aci, begin_b+= bri)		    tmp00 += *begin_a * *begin_b;		Assign::update(c(i, k), tmp00);	    }    }#endif};template <unsigned long Tiling1= MTL_DMAT_DMAT_MULT_TILING1,	  unsigned long Tiling2= MTL_DMAT_DMAT_MULT_TILING2,	  typename Assign= assign::assign_sum, 	  typename Backup= gen_dmat_dmat_mult_t<Assign> >struct gen_tiling_dmat_dmat_mult_t{    template <typename MatrixA, typename MatrixB, typename MatrixC>    void operator()(MatrixA const& a, MatrixB const& b, MatrixC& c)    {	gen_tiling_dmat_dmat_mult_ft<	     MatrixA, MatrixB, MatrixC, Tiling1, Tiling2, Assign, Backup	>()(a, b, c);    }};#if MTL_OUTLINE_TILING_DMAT_DMAT_MULT_APPLYtemplate <typename MatrixA, typename MatrixB, typename MatrixC, 

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?