smat_dmat_mult.hpp

来自「矩阵运算源码最新版本」· HPP 代码 · 共 267 行

HPP
267
字号
// Software License for MTL// // Copyright (c) 2007 The Trustees of Indiana University. All rights reserved.// Authors: Peter Gottschling and Andrew Lumsdaine// // This file is part of the Matrix Template Library// // See also license.mtl.txt in the distribution.#ifndef MTL_SMAT_DMAT_MULT_INCLUDE#define MTL_SMAT_DMAT_MULT_INCLUDE#include <boost/numeric/mtl/operation/set_to_zero.hpp>#include <boost/numeric/mtl/utility/range_generator.hpp>#include <boost/numeric/mtl/concept/collection.hpp>#include <boost/numeric/mtl/utility/tag.hpp>#include <boost/numeric/mtl/utility/category.hpp>#include <boost/numeric/meta_math/loop1.hpp>namespace mtl { namespace functor {template <typename Assign= assign::assign_sum,	  typename Backup= no_op>     // To allow 2nd parameter, is ignoredstruct gen_smat_dmat_mult{    template <typename MatrixA, typename MatrixB, typename MatrixC>    void operator()(MatrixA const& a, MatrixB const& b, MatrixC& c)    {	apply(a, b, c, typename OrientedCollection<MatrixA>::orientation());    }private:    template <typename MatrixA, typename MatrixB, typename MatrixC>    void apply(MatrixA const& a, MatrixB const& b, MatrixC& c, tag::row_major)    {	using namespace tag;	using traits::range_generator;          typedef typename range_generator<row, MatrixA>::type       a_cur_type;                     typedef typename range_generator<row, MatrixC>::type       c_cur_type;             	typedef typename range_generator<col, MatrixB>::type       b_cur_type;                     typedef typename range_generator<nz, a_cur_type>::type     a_icur_type;                    typedef typename range_generator<all, b_cur_type>::type    b_icur_type;                  typedef typename range_generator<iter::all, c_cur_type>::type    c_icur_type;            	typename traits::col<MatrixA>::type             col_a(a); 	typename traits::const_value<MatrixA>::type     value_a(a); 	typename traits::const_value<MatrixB>::type     value_b(b); 	if (Assign::init_to_zero) set_to_zero(c);	a_cur_type ac= begin<row>(a), aend= end<row>(a);	for (c_cur_type cc= begin<row>(c); ac != aend; ++ac, ++cc) {	    b_cur_type bc= begin<col>(b), bend= end<col>(b);	    for (c_icur_type cic= begin<iter::all>(cc); bc != bend; ++bc, ++cic) { 		    		typename MatrixC::value_type c_tmp(*cic);		for (a_icur_type aic= begin<nz>(ac), aiend= end<nz>(ac); aic != aiend; ++aic) {		    typename Collection<MatrixA>::size_type     ca= col_a(*aic);   // column of non-zero		    typename Collection<MatrixA>::value_type    va= value_a(*aic); // value of non-zero		    b_icur_type bic= begin<all>(bc);		    bic+= ca;		    typename Collection<MatrixB>::value_type    vb= value_b(*bic); // value in dense matrix		    Assign::update(c_tmp, value_a(*aic) * value_b(*bic));		}		*cic= c_tmp;	    }	}    }    template <typename MatrixA, typename MatrixB, typename MatrixC>    void apply(MatrixA const& a, MatrixB const& b, MatrixC& c, tag::col_major)    {	using namespace tag;	using traits::range_generator;          typedef typename range_generator<col, MatrixA>::type       a_cur_type;                     typedef typename range_generator<nz, a_cur_type>::type     a_icur_type;            	typename traits::row<MatrixA>::type             row_a(a); 	typename traits::const_value<MatrixA>::type     value_a(a); 	if (Assign::init_to_zero) set_to_zero(c);	unsigned rb= 0; // traverse all rows of b	for (a_cur_type ac= begin<col>(a), aend= end<col>(a); ac != aend; ++ac, ++rb)	    for (a_icur_type aic= begin<nz>(ac), aiend= end<nz>(ac); aic != aiend; ++aic) {		typename Collection<MatrixA>::size_type     ra= row_a(*aic);   // row in A and C		typename Collection<MatrixA>::value_type    va= value_a(*aic); // value of non-zero		for (unsigned cb= 0; cb < num_cols(b); ++cb) // column in B and C		    Assign::update(c(ra, cb), va * b(rb, cb));	    }    }};// =======================// Unrolled // required has_2D_layout// =======================// Define defaults if not yet given as Compiler flag#ifndef MTL_SMAT_DMAT_MULT_TILING1#  define MTL_SMAT_DMAT_MULT_TILING1 8#endiftemplate <unsigned long Index0, unsigned long Max0, typename Assign>struct gen_tiling_smat_dmat_mult_block    : public meta_math::loop1<Index0, Max0>{    typedef meta_math::loop1<Index0, Max0>                                    base;    typedef gen_tiling_smat_dmat_mult_block<base::next_index0, Max0, Assign>  next_t;    template <typename Value, typename ValueA, typename ValueB, typename Size>    static inline void apply(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04, 			     Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09, 			     Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15, 			     const ValueA& va, ValueB *begin_b, const Size& bci)    {	tmp00+= va * *(begin_b + base::index0 * bci);	next_t::apply(tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09, 		      tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp00, 		      va, begin_b, bci);     }    template <typename Value, typename MatrixC, typename SizeC>    static inline void update(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04, 			      Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09, 			      Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15,			      MatrixC& c, SizeC i, SizeC k)    {	Assign::update(c(i, k + base::index0), tmp00);	next_t::update(tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09, 		       tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp00, 		       c, i, k);    }};	template <unsigned long Max0, typename Assign>struct gen_tiling_smat_dmat_mult_block<Max0, Max0, Assign>    : public meta_math::loop1<Max0, Max0>{    typedef meta_math::loop1<Max0, Max0>                                    base;    template <typename Value, typename ValueA, typename ValueB, typename Size>    static inline void apply(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04, 			     Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09, 			     Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15, 			     const ValueA& va, ValueB *begin_b, const Size& bci)    {	tmp00+= va * *(begin_b + base::index0 * bci);    }    template <typename Value, typename MatrixC, typename SizeC>    static inline void update(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04, 			      Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09, 			      Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15,			      MatrixC& c, SizeC i, SizeC k)    {	Assign::update(c(i, k + base::index0), tmp00);    }};template <unsigned long Tiling1= MTL_SMAT_DMAT_MULT_TILING1,	  typename Assign= assign::assign_sum,	  typename Backup= gen_smat_dmat_mult<Assign> >struct gen_tiling_smat_dmat_mult{    template <typename MatrixA, typename MatrixB, typename MatrixC>    void operator()(MatrixA const& a, MatrixB const& b, MatrixC& c)    {	apply(a, b, c, typename traits::category<MatrixC>::type());    }private:    template <typename MatrixA, typename MatrixB, typename MatrixC>    void apply(MatrixA const& a, MatrixB const& b, MatrixC& c, tag::universe)    {	Backup()(a, b, c);    }    template <typename MatrixA, typename MatrixB, typename MatrixC>    void apply(MatrixA const& a, MatrixB const& b, MatrixC& c, tag::has_2D_layout)    {	apply2(a, b, c, typename OrientedCollection<MatrixA>::orientation());    }    template <typename MatrixA, typename MatrixB, typename MatrixC>    void apply2(MatrixA const& a, MatrixB const& b, MatrixC& c, tag::col_major)    {	// may be I'll write an optimized version later	Backup()(a, b, c);    }    template <typename MatrixA, typename MatrixB, typename MatrixC>    void apply2(MatrixA const& a, MatrixB const& b, MatrixC& c, tag::row_major)    {	using namespace tag;	using traits::range_generator;  	typedef gen_tiling_smat_dmat_mult_block<1, Tiling1, Assign>  block;	typedef typename Collection<MatrixA>::size_type              size_type;	typedef typename Collection<MatrixC>::value_type             value_type;	const value_type z= math::zero(c[0][0]);    // if this are matrices we need their size        typedef typename range_generator<row, MatrixA>::type         a_cur_type;                     typedef typename range_generator<nz, a_cur_type>::type       a_icur_type;            	typename traits::col<MatrixA>::type             col_a(a); 	typename traits::const_value<MatrixA>::type     value_a(a); 	if (Assign::init_to_zero) set_to_zero(c);	size_type i_max= num_cols(b), i_block= Tiling1 * (i_max / Tiling1);	size_t bci= &b(0, 1) - &b(0, 0); // how much is the offset of B's entry increased by incrementing column	a_cur_type ac= begin<row>(a), aend= end<row>(a);	size_type rc= 0; // start in row 0	for (a_cur_type ac= begin<row>(a), aend= end<row>(a); ac != aend; ++ac, ++rc) {	    for (size_type i= 0; i < i_block; i+= Tiling1) {	    		value_type tmp00= z, tmp01= z, tmp02= z, tmp03= z, tmp04= z,                           tmp05= z, tmp06= z, tmp07= z, tmp08= z, tmp09= z, 		           tmp10= z, tmp11= z, tmp12= z, tmp13= z, tmp14= z, tmp15= z;		for (a_icur_type aic= begin<nz>(ac), aiend= end<nz>(ac); aic != aiend; ++aic) {		    typename Collection<MatrixA>::size_type     ca= col_a(*aic);   // column of non-zero		    typename Collection<MatrixA>::value_type    va= value_a(*aic); // value of non-zero		    // Element in first vector in block to be multiplied with va; rb==ca		    const typename MatrixB::value_type *begin_b= &b(ca, i); 		    block::apply(tmp00, tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09, 				 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, 				 va, begin_b, bci); 		}		block::update(tmp00, tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09, 			      tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, 			      c, rc, i);	    }		    for (size_type i= i_block; i < i_max; i++) {		value_type tmp00= z;		for (a_icur_type aic= begin<nz>(ac), aiend= end<nz>(ac); aic != aiend; ++aic) {		    typename Collection<MatrixA>::size_type     ca= col_a(aic);   // column of non-zero		    tmp00+= value_a(*aic) * b(ca, i);		}		Assign::update(c(rc, i), tmp00);	    }	}    }};}} // namespace mtl::functor#endif // MTL_SMAT_DMAT_MULT_INCLUDE

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?