dmat_dmat_mult.hpp
来自「矩阵运算源码最新版本」· HPP 代码 · 共 1,095 行 · 第 1/3 页
HPP
1,095 行
// Software License for MTL// // Copyright (c) 2007 The Trustees of Indiana University. All rights reserved.// Authors: Peter Gottschling and Andrew Lumsdaine// // This file is part of the Matrix Template Library// // See also license.mtl.txt in the distribution.#ifndef MTL_DMAT_DMAT_MULT_INCLUDE#define MTL_DMAT_DMAT_MULT_INCLUDE#include <boost/static_assert.hpp>#include <boost/type_traits.hpp>#include <boost/numeric/mtl/operation/set_to_zero.hpp>#include <boost/numeric/mtl/utility/range_generator.hpp>#include <boost/numeric/mtl/operation/cursor_pseudo_dot.hpp>#include <boost/numeric/mtl/operation/multi_action_block.hpp>#include <boost/numeric/mtl/operation/assign_mode.hpp>#include <boost/numeric/mtl/utility/tag.hpp>#include <boost/numeric/mtl/utility/glas_tag.hpp>#include <boost/numeric/mtl/utility/is_row_major.hpp>#include <boost/numeric/meta_math/loop.hpp>#include <boost/numeric/mtl/recursion/base_case_test.hpp>#include <boost/numeric/mtl/recursion/base_case_matrix.hpp>#include <boost/numeric/mtl/recursion/matrix_recursator.hpp>#include <boost/numeric/mtl/recursion/base_case_cast.hpp>#include <boost/numeric/mtl/interface/blas.hpp>#include <boost/numeric/mtl/matrix/dense2D.hpp>#include <boost/numeric/mtl/operation/print_matrix.hpp>#include <boost/numeric/mtl/operation/no_op.hpp>#include <iostream>#include <complex>namespace mtl {// =====================================// Generic matrix product with iterators// =====================================template <typename MatrixA, typename MatrixB, typename MatrixC, typename Assign= assign::assign_sum, typename Backup= no_op> // To allow 5th parameter, is ignoredstruct gen_dmat_dmat_mult_ft{ void operator()(MatrixA const& a, MatrixB const& b, MatrixC& c) { using namespace tag; using traits::range_generator; typedef typename range_generator<row, MatrixA>::type a_cur_type; typedef typename range_generator<row, MatrixC>::type c_cur_type; typedef typename range_generator<col, MatrixB>::type b_cur_type; typedef typename range_generator<iter::all, c_cur_type>::type c_icur_type; typedef typename range_generator<const_iter::all, a_cur_type>::type a_icur_type; typedef typename range_generator<const_iter::all, b_cur_type>::type b_icur_type; if (Assign::init_to_zero) set_to_zero(c); a_cur_type ac= begin<row>(a), aend= end<row>(a); for (c_cur_type cc= begin<row>(c); ac != aend; ++ac, ++cc) { b_cur_type bc= begin<col>(b), bend= end<col>(b); for (c_icur_type cic= begin<iter::all>(cc); bc != bend; ++bc, ++cic) { typename MatrixC::value_type c_tmp(*cic); a_icur_type aic= begin<const_iter::all>(ac), aiend= end<const_iter::all>(ac); for (b_icur_type bic= begin<const_iter::all>(bc); aic != aiend; ++aic, ++bic) { //std::cout << "aic " << *aic << ", bic " << *bic << '\n'; std::cout.flush(); Assign::update(c_tmp, *aic * *bic); } *cic= c_tmp; } } } };template <typename Assign= assign::assign_sum, typename Backup= no_op> // To allow 2nd parameter, is ignoredstruct gen_dmat_dmat_mult_t{ template <typename MatrixA, typename MatrixB, typename MatrixC> void operator()(MatrixA const& a, MatrixB const& b, MatrixC& c) { gen_dmat_dmat_mult_ft<MatrixA, MatrixB, MatrixC, Assign, Backup>()(a, b, c); }};// =====================================================// Generic matrix product with cursors and property maps// =====================================================template <typename MatrixA, typename MatrixB, typename MatrixC, typename Assign= assign::assign_sum, typename Backup= no_op> // To allow 5th parameter, is ignoredstruct gen_cursor_dmat_dmat_mult_ft{ void operator()(MatrixA const& a, MatrixB const& b, MatrixC& c) { typedef glas::tag::row row; typedef glas::tag::col col; typedef glas::tag::all all; typedef typename traits::const_value<MatrixA>::type a_value_type; typedef typename traits::const_value<MatrixB>::type b_value_type; typedef typename traits::value<MatrixC>::type c_value_type; typedef typename traits::range_generator<row, MatrixA>::type a_cur_type; typedef typename traits::range_generator<row, MatrixC>::type c_cur_type; typedef typename traits::range_generator<col, MatrixB>::type b_cur_type; typedef typename traits::range_generator<all, c_cur_type>::type c_icur_type; typedef typename traits::range_generator<all, a_cur_type>::type a_icur_type; typedef typename traits::range_generator<all, b_cur_type>::type b_icur_type; if (Assign::init_to_zero) set_to_zero(c); a_value_type a_value(a); b_value_type b_value(b); c_value_type c_value(c); a_cur_type ac= begin<row>(a), aend= end<row>(a); for (c_cur_type cc= begin<row>(c); ac != aend; ++ac, ++cc) { b_cur_type bc= begin<col>(b), bend= end<col>(b); for (c_icur_type cic= begin<all>(cc); bc != bend; ++bc, ++cic) { typename MatrixC::value_type c_tmp(c_value(*cic)); a_icur_type aic= begin<all>(ac), aiend= end<all>(ac); for (b_icur_type bic= begin<all>(bc); aic != aiend; ++aic, ++bic) Assign::update(c_tmp, a_value(*aic) * b_value(*bic)); c_value(*cic, c_tmp); } } }};template <typename Assign= assign::assign_sum, typename Backup= no_op> // To allow 2nd parameter, is ignoredstruct gen_cursor_dmat_dmat_mult_t{ template <typename MatrixA, typename MatrixB, typename MatrixC> void operator()(MatrixA const& a, MatrixB const& b, MatrixC& c) { gen_cursor_dmat_dmat_mult_ft<MatrixA, MatrixB, MatrixC, Assign, Backup>()(a, b, c); }};/*Unrolling matrix product with dimensions that are not multiples of blocks1. Do with optimization: C_nw += A_nw * B_nw - wherby the matrix dimensions of sub-matrices are the largest multiples of block sizes smaller or equal to the matrix dimensions of the original matrix2. Do without optimization C_nw += A_ne * B_sw C_ne += A_n * B_e C_s += A_s * BThe inner loop can be unrolled arbitrarily. So, we can simplify1. Do with optimization: C_nw += A_n * B_w - wherby the matrix dimensions of sub-matrices are the largest multiples of block sizes smaller or equal to the matrix dimensions of the original matrix2. Do with optimization only in inner loop C_ne += A_n * B_e C_s += A_s * B */// =======================// Unrolled with iterators// required has_2D_layout// =======================// Define defaults if not yet given as Compiler flag#ifndef MTL_DMAT_DMAT_MULT_TILING1# define MTL_DMAT_DMAT_MULT_TILING1 2#endif#ifndef MTL_DMAT_DMAT_MULT_TILING2# define MTL_DMAT_DMAT_MULT_TILING2 4#endif#ifndef MTL_DMAT_DMAT_MULT_INNER_UNROLL# define MTL_DMAT_DMAT_MULT_INNER_UNROLL 8#endiftemplate <unsigned long Index0, unsigned long Max0, unsigned long Index1, unsigned long Max1, typename Assign>struct gen_tiling_dmat_dmat_mult_block : public meta_math::loop2<Index0, Max0, Index1, Max1>{ typedef meta_math::loop2<Index0, Max0, Index1, Max1> base; typedef gen_tiling_dmat_dmat_mult_block<base::next_index0, Max0, base::next_index1, Max1, Assign> next_t; template <typename Value, typename ValueA, typename SizeA, typename ValueB, typename SizeB> static inline void apply(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04, Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09, Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15, ValueA *begin_a, SizeA& ari, ValueB *begin_b, SizeB& bci) { tmp00+= begin_a[ base::index0 * ari ] * begin_b[ base::index1 * bci ]; next_t::apply(tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp00, begin_a, ari, begin_b, bci); } template <typename Value, typename MatrixC, typename SizeC> static inline void update(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04, Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09, Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15, MatrixC& c, SizeC i, SizeC k) { Assign::update(c(i + base::index0, k + base::index1), tmp00); next_t::update(tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp00, c, i, k); }};template <unsigned long Max0, unsigned long Max1, typename Assign>struct gen_tiling_dmat_dmat_mult_block<Max0, Max0, Max1, Max1, Assign> : public meta_math::loop2<Max0, Max0, Max1, Max1>{ typedef meta_math::loop2<Max0, Max0, Max1, Max1> base; template <typename Value, typename ValueA, typename SizeA, typename ValueB, typename SizeB> static inline void apply(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04, Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09, Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15, ValueA *begin_a, SizeA& ari, ValueB *begin_b, SizeB& bci) { tmp00+= begin_a[ base::index0 * ari ] * begin_b[ base::index1 * bci ]; } template <typename Value, typename MatrixC, typename SizeC> static inline void update(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04, Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09, Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15, MatrixC& c, SizeC i, SizeC k) { Assign::update(c(i + base::index0, k + base::index1), tmp00); }};template <typename MatrixA, typename MatrixB, typename MatrixC, unsigned long Tiling1= MTL_DMAT_DMAT_MULT_TILING1, unsigned long Tiling2= MTL_DMAT_DMAT_MULT_TILING2, typename Assign= assign::assign_sum, typename Backup= gen_dmat_dmat_mult_t<Assign> >struct gen_tiling_dmat_dmat_mult_ft{ BOOST_STATIC_ASSERT((Tiling1 * Tiling2 <= 16)); void operator()(MatrixA const& a, MatrixB const& b, MatrixC& c) { apply(a, b, c, typename traits::category<MatrixA>::type(), typename traits::category<MatrixB>::type()); } private: void apply(MatrixA const& a, MatrixB const& b, MatrixC& c, tag::universe, tag::universe) { Backup()(a, b, c); }#if MTL_OUTLINE_TILING_DMAT_DMAT_MULT_APPLY void apply(MatrixA const& a, MatrixB const& b, MatrixC& c, tag::has_2D_layout, tag::has_2D_layout);#else void apply(MatrixA const& a, MatrixB const& b, MatrixC& c, tag::has_2D_layout, tag::has_2D_layout) { // std::cout << "do unrolling\n"; if (Assign::init_to_zero) set_to_zero(c); typedef gen_tiling_dmat_dmat_mult_block<1, Tiling1, 1, Tiling2, Assign> block; typedef typename MatrixC::size_type size_type; typedef typename MatrixC::value_type value_type; const value_type z= math::zero(c[0][0]); // if this are matrices we need their size size_type i_max= c.num_rows(), i_block= Tiling1 * (i_max / Tiling1), k_max= c.num_cols(), k_block= Tiling2 * (k_max / Tiling2); size_t ari= &a(1, 0) - &a(0, 0), // how much is the offset of A's entry increased by incrementing row aci= &a(0, 1) - &a(0, 0), bri= &b(1, 0) - &b(0, 0), bci= &b(0, 1) - &b(0, 0); // C_nw += A_nw * B_nw for (size_type i= 0; i < i_block; i+= Tiling1) for (size_type k= 0; k < k_block; k+= Tiling2) { value_type tmp00= z, tmp01= z, tmp02= z, tmp03= z, tmp04= z, tmp05= z, tmp06= z, tmp07= z, tmp08= z, tmp09= z, tmp10= z, tmp11= z, tmp12= z, tmp13= z, tmp14= z, tmp15= z; const typename MatrixA::value_type *begin_a= &a(i, 0), *end_a= &a(i, a.num_cols()); const typename MatrixB::value_type *begin_b= &b(0, k); for (; begin_a != end_a; begin_a+= aci, begin_b+= bri) block::apply(tmp00, tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, begin_a, ari, begin_b, bci); block::update(tmp00, tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, c, i, k); } // C_ne += A_n * B_e for (size_type i= 0; i < i_block; i++) for (int k = k_block; k < k_max; k++) { value_type tmp00= z; const typename MatrixA::value_type *begin_a= &a(i, 0), *end_a= &a(i, a.num_cols()); const typename MatrixB::value_type *begin_b= &b(0, k); for (; begin_a != end_a; begin_a+= aci, begin_b+= bri) tmp00 += *begin_a * *begin_b; Assign::update(c(i, k), tmp00); } // C_s += A_s * B for (size_type i= i_block; i < i_max; i++) for (int k = 0; k < k_max; k++) { value_type tmp00= z; const typename MatrixA::value_type *begin_a= &a(i, 0), *end_a= &a(i, a.num_cols()); const typename MatrixB::value_type *begin_b= &b(0, k); for (; begin_a != end_a; begin_a+= aci, begin_b+= bri) tmp00 += *begin_a * *begin_b; Assign::update(c(i, k), tmp00); } }#endif};template <unsigned long Tiling1= MTL_DMAT_DMAT_MULT_TILING1, unsigned long Tiling2= MTL_DMAT_DMAT_MULT_TILING2, typename Assign= assign::assign_sum, typename Backup= gen_dmat_dmat_mult_t<Assign> >struct gen_tiling_dmat_dmat_mult_t{ template <typename MatrixA, typename MatrixB, typename MatrixC> void operator()(MatrixA const& a, MatrixB const& b, MatrixC& c) { gen_tiling_dmat_dmat_mult_ft< MatrixA, MatrixB, MatrixC, Tiling1, Tiling2, Assign, Backup >()(a, b, c); }};#if MTL_OUTLINE_TILING_DMAT_DMAT_MULT_APPLYtemplate <typename MatrixA, typename MatrixB, typename MatrixC,
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?