📄 eval.cc
字号:
int offset = order[i][j-1]; iter.advance(offset); expr.advance(offset); } iter.loadStride(maxRank); expr.loadStride(maxRank); // Evaluate the expression along the column if ((useUnitStride) || (useCommonStride)) {#ifdef BZ_USE_FAST_READ_ARRAY_EXPR int ubound = lastLength * commonStride; T_numtype* restrict data = const_cast<T_numtype*>(iter.data()); if (commonStride == 1) { #ifndef BZ_ARRAY_FAST_TRAVERSAL_UNROLL for (int i=0; i < ubound; ++i) T_update::update(*data++, expr.fastRead(i)); #else int n1 = ubound & 3; int i=0; for (; i < n1; ++i) T_update::update(*data++, expr.fastRead(i)); for (; i < ubound; i += 4) { T_update::update(*data++, expr.fastRead(i)); T_update::update(*data++, expr.fastRead(i+1)); T_update::update(*data++, expr.fastRead(i+2)); T_update::update(*data++, expr.fastRead(i+3)); } #endif // BZ_ARRAY_FAST_TRAVERSAL_UNROLL } #ifdef BZ_ARRAY_EXPR_USE_COMMON_STRIDE else { for (int i=0; i < ubound; i += commonStride) T_update::update(data[i], expr.fastRead(i)); } #endif // BZ_ARRAY_EXPR_USE_COMMON_STRIDE iter.advance(lastLength * commonStride); expr.advance(lastLength * commonStride);#else // ! BZ_USE_FAST_READ_ARRAY_EXPR T_numtype* restrict last = const_cast<T_numtype*>(iter.data()) + lastLength * commonStride; while (iter.data() != last) { T_update::update(*const_cast<T_numtype*>(iter.data()), *expr); iter.advance(commonStride); expr.advance(commonStride); }#endif // BZ_USE_FAST_READ_ARRAY_EXPR } else { // No common stride T_numtype* restrict last = const_cast<T_numtype*>(iter.data()) + lastLength * stride(maxRank); while (iter.data() != last) { T_update::update(*const_cast<T_numtype*>(iter.data()), *expr); iter.advance(); expr.advance(); } } } return *this;}#endif // BZ_ARRAY_SPACE_FILLING_TRAVERSAL#endif // BZ_HAVE_STD#ifdef BZ_ARRAY_2D_NEW_STENCIL_TILING#ifdef BZ_ARRAY_2D_STENCIL_TILINGtemplate<typename T_numtype, int N_rank> template<typename T_expr, typename T_update>inline Array<T_numtype, N_rank>& Array<T_numtype, N_rank>::evaluateWithTiled2DTraversal( T_expr expr, T_update){ const int minorRank = ordering(0); const int majorRank = ordering(1); FastArrayIterator<T_numtype, N_rank> iter(*this); iter.push(0); expr.push(0);#ifdef BZ_2D_STENCIL_DEBUG int count = 0;#endif bool useUnitStride = iter.isUnitStride(minorRank) && expr.isUnitStride(minorRank);#ifdef BZ_ARRAY_EXPR_USE_COMMON_STRIDE int commonStride = expr.suggestStride(minorRank); if (iter.suggestStride(minorRank) > commonStride) commonStride = iter.suggestStride(minorRank); bool useCommonStride = iter.isStride(minorRank,commonStride) && expr.isStride(minorRank,commonStride);#else int commonStride = 1; bool useCommonStride = false;#endif // Determine if a common major stride exists int commonMajorStride = expr.suggestStride(majorRank); if (iter.suggestStride(majorRank) > commonMajorStride) commonMajorStride = iter.suggestStride(majorRank); bool haveCommonMajorStride = iter.isStride(majorRank,commonMajorStride) && expr.isStride(majorRank,commonMajorStride); int maxi = length(majorRank); int maxj = length(minorRank); const int tileHeight = 16, tileWidth = 3; int bi, bj; for (bi=0; bi < maxi; bi += tileHeight) { int ni = bi + tileHeight; if (ni > maxi) ni = maxi; // Move back to the beginning of the array iter.pop(0); expr.pop(0); // Move to the start of this tile row iter.loadStride(majorRank); iter.advance(bi); expr.loadStride(majorRank); expr.advance(bi); // Save this position iter.push(1); expr.push(1); for (bj=0; bj < maxj; bj += tileWidth) { // Move to the beginning of the tile row iter.pop(1); expr.pop(1); // Move to the top of the current tile (bi,bj) iter.loadStride(minorRank); iter.advance(bj); expr.loadStride(minorRank); expr.advance(bj); if (bj + tileWidth <= maxj) { // Strip mining if ((useUnitStride) && (haveCommonMajorStride)) { int offset = 0; T_numtype* restrict data = const_cast<T_numtype*> (iter.data()); for (int i=bi; i < ni; ++i) { _bz_typename T_expr::T_numtype tmp1, tmp2, tmp3; // Common subexpression elimination -- compilers // won't necessarily do this on their own. int t1 = offset+1; int t2 = offset+2; tmp1 = expr.fastRead(offset); tmp2 = expr.fastRead(t1); tmp3 = expr.fastRead(t2); T_update::update(data[0], tmp1); T_update::update(data[1], tmp2); T_update::update(data[2], tmp3); offset += commonMajorStride; data += commonMajorStride;#ifdef BZ_2D_STENCIL_DEBUG count += 3;#endif } } else { for (int i=bi; i < ni; ++i) { iter.loadStride(minorRank); expr.loadStride(minorRank); // Loop through current row elements T_update::update(*const_cast<T_numtype*>(iter.data()), *expr); iter.advance(); expr.advance(); T_update::update(*const_cast<T_numtype*>(iter.data()), *expr); iter.advance(); expr.advance(); T_update::update(*const_cast<T_numtype*>(iter.data()), *expr); iter.advance(-2); expr.advance(-2); iter.loadStride(majorRank); expr.loadStride(majorRank); iter.advance(); expr.advance();#ifdef BZ_2D_STENCIL_DEBUG count += 3;#endif } } } else { // This code handles partial tiles at the bottom of the // array. for (int j=bj; j < maxj; ++j) { iter.loadStride(majorRank); expr.loadStride(majorRank); for (int i=bi; i < ni; ++i) { T_update::update(*const_cast<T_numtype*>(iter.data()), *expr); iter.advance(); expr.advance();#ifdef BZ_2D_STENCIL_DEBUG ++count;#endif } // Move back to the top of this column iter.advance(bi-ni); expr.advance(bi-ni); // Move over to the next column iter.loadStride(minorRank); expr.loadStride(minorRank); iter.advance(); expr.advance(); } } } }#ifdef BZ_2D_STENCIL_DEBUG cout << "BZ_2D_STENCIL_DEBUG: count = " << count << endl;#endif return *this;}#endif // BZ_ARRAY_2D_STENCIL_TILING#endif // BZ_ARRAY_2D_NEW_STENCIL_TILING#ifndef BZ_ARRAY_2D_NEW_STENCIL_TILING#ifdef BZ_ARRAY_2D_STENCIL_TILINGtemplate<typename T_numtype, int N_rank> template<typename T_expr, typename T_update>inline Array<T_numtype, N_rank>& Array<T_numtype, N_rank>::evaluateWithTiled2DTraversal( T_expr expr, T_update){ const int minorRank = ordering(0); const int majorRank = ordering(1); const int blockSize = 16; FastArrayIterator<T_numtype, N_rank> iter(*this); iter.push(0); expr.push(0); bool useUnitStride = iter.isUnitStride(minorRank) && expr.isUnitStride(minorRank);#ifdef BZ_ARRAY_EXPR_USE_COMMON_STRIDE int commonStride = expr.suggestStride(minorRank); if (iter.suggestStride(minorRank) > commonStride) commonStride = iter.suggestStride(minorRank); bool useCommonStride = iter.isStride(minorRank,commonStride) && expr.isStride(minorRank,commonStride);#else int commonStride = 1; bool useCommonStride = false;#endif int maxi = length(majorRank); int maxj = length(minorRank); int bi, bj; for (bi=0; bi < maxi; bi += blockSize) { int ni = bi + blockSize; if (ni > maxi) ni = maxi; for (bj=0; bj < maxj; bj += blockSize) { int nj = bj + blockSize; if (nj > maxj) nj = maxj; // Move to the beginning of the array iter.pop(0); expr.pop(0); // Move to the beginning of the tile (bi,bj) iter.loadStride(majorRank); iter.advance(bi); iter.loadStride(minorRank); iter.advance(bj); expr.loadStride(majorRank); expr.advance(bi); expr.loadStride(minorRank); expr.advance(bj); // Loop through tile rows for (int i=bi; i < ni; ++i) { // Save the beginning of this tile row iter.push(1); expr.push(1); // Load the minor stride iter.loadStride(minorRank); expr.loadStride(minorRank); if (useUnitStride) { T_numtype* restrict data = const_cast<T_numtype*> (iter.data()); int ubound = (nj-bj); for (int j=0; j < ubound; ++j) T_update::update(*data++, expr.fastRead(j)); }#ifdef BZ_ARRAY_EXPR_USE_COMMON_STRIDE else if (useCommonStride) { int ubound = (nj-bj) * commonStride; T_numtype* restrict data = const_cast<T_numtype*> (iter.data()); for (int j=0; j < ubound; j += commonStride) T_update::update(data[j], expr.fastRead(j)); }#endif else { for (int j=bj; j < nj; ++j) { // Loop through current row elements T_update::update(*const_cast<T_numtype*>(iter.data()), *expr); iter.advance(); expr.advance(); } } // Move back to the beginning of the tile row, then // move to the next row iter.pop(1); iter.loadStride(majorRank); iter.advance(1); expr.pop(1); expr.loadStride(majorRank); expr.advance(1); } } } return *this;}#endif // BZ_ARRAY_2D_STENCIL_TILING#endif // BZ_ARRAY_2D_NEW_STENCIL_TILINGBZ_NAMESPACE_END#endif // BZ_ARRAYEVAL_CC
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -