📄 eval.cc
字号:
{
iter.loadStride(ordering(j));
expr.loadStride(ordering(j));
int offset = order[i][j-1];
iter.advance(offset);
expr.advance(offset);
}
iter.loadStride(maxRank);
expr.loadStride(maxRank);
// Evaluate the expression along the column
if ((useUnitStride) || (useCommonStride))
{
T_numtype* _bz_restrict last = const_cast<T_numtype*>(iter.data())
+ lastLength * commonStride;
#ifdef BZ_USE_FAST_READ_ARRAY_EXPR
int ubound = lastLength * commonStride;
T_numtype* _bz_restrict data = const_cast<T_numtype*>(iter.data());
if (commonStride == 1)
{
#ifndef BZ_ARRAY_FAST_TRAVERSAL_UNROLL
for (int i=0; i < ubound; ++i)
T_update::update(data[i], expr.fastRead(i));
#else
int n1 = ubound & 3;
int i=0;
for (; i < n1; ++i)
T_update::update(data[i], expr.fastRead(i));
for (; i < ubound; i += 4)
{
T_update::update(data[i], expr.fastRead(i));
T_update::update(data[i+1], expr.fastRead(i+1));
T_update::update(data[i+2], expr.fastRead(i+2));
T_update::update(data[i+3], expr.fastRead(i+3));
}
#endif // BZ_ARRAY_FAST_TRAVERSAL_UNROLL
}
#ifdef BZ_ARRAY_EXPR_USE_COMMON_STRIDE
else {
for (int i=0; i < ubound; i += commonStride)
T_update::update(data[i], expr.fastRead(i));
}
#endif // BZ_ARRAY_EXPR_USE_COMMON_STRIDE
iter.advance(lastLength * commonStride);
expr.advance(lastLength * commonStride);
#else // ! BZ_USE_FAST_READ_ARRAY_EXPR
while (iter.data() != last)
{
T_update::update(*const_cast<T_numtype*>(iter.data()), *expr);
iter.advance(commonStride);
expr.advance(commonStride);
}
#endif // BZ_USE_FAST_READ_ARRAY_EXPR
}
else {
// No common stride
T_numtype* _bz_restrict last = const_cast<T_numtype*>(iter.data())
+ lastLength * stride(maxRank);
while (iter.data() != last)
{
T_update::update(*const_cast<T_numtype*>(iter.data()), *expr);
iter.advance();
expr.advance();
}
}
}
return *this;
}
#endif // BZ_HAVE_STD
#ifdef BZ_ARRAY_2D_NEW_STENCIL_TILING
#ifdef BZ_ARRAY_2D_STENCIL_TILING
template<class T_numtype, int N_rank> template<class T_expr, class T_update>
inline Array<T_numtype, N_rank>&
Array<T_numtype, N_rank>::evaluateWithTiled2DTraversal(
T_expr expr, T_update)
{
const int minorRank = ordering(0);
const int majorRank = ordering(1);
FastArrayIterator<T_numtype, N_rank> iter(*this);
iter.push(0);
expr.push(0);
#ifdef BZ_2D_STENCIL_DEBUG
int count = 0;
#endif
_bz_bool useUnitStride = iter.isUnitStride(minorRank)
&& expr.isUnitStride(minorRank);
#ifdef BZ_ARRAY_EXPR_USE_COMMON_STRIDE
int commonStride = expr.suggestStride(minorRank);
if (iter.suggestStride(minorRank) > commonStride)
commonStride = iter.suggestStride(minorRank);
bool useCommonStride = iter.isStride(minorRank,commonStride)
&& expr.isStride(minorRank,commonStride);
#else
int commonStride = 1;
bool useCommonStride = _bz_false;
#endif
// Determine if a common major stride exists
int commonMajorStride = expr.suggestStride(majorRank);
if (iter.suggestStride(majorRank) > commonMajorStride)
commonMajorStride = iter.suggestStride(majorRank);
bool haveCommonMajorStride = iter.isStride(majorRank,commonMajorStride)
&& expr.isStride(majorRank,commonMajorStride);
int maxi = length(majorRank);
int maxj = length(minorRank);
const int tileHeight = 16, tileWidth = 3;
int bi, bj;
for (bi=0; bi < maxi; bi += tileHeight)
{
int ni = bi + tileHeight;
if (ni > maxi)
ni = maxi;
// Move back to the beginning of the array
iter.pop(0);
expr.pop(0);
// Move to the start of this tile row
iter.loadStride(majorRank);
iter.advance(bi);
expr.loadStride(majorRank);
expr.advance(bi);
// Save this position
iter.push(1);
expr.push(1);
for (bj=0; bj < maxj; bj += tileWidth)
{
// Move to the beginning of the tile row
iter.pop(1);
expr.pop(1);
// Move to the top of the current tile (bi,bj)
iter.loadStride(minorRank);
iter.advance(bj);
expr.loadStride(minorRank);
expr.advance(bj);
if (bj + tileWidth <= maxj)
{
// Strip mining
if ((useUnitStride) && (haveCommonMajorStride))
{
int offset = 0;
T_numtype* _bz_restrict data = const_cast<T_numtype*>
(iter.data());
for (int i=bi; i < ni; ++i)
{
_bz_typename T_expr::T_numtype tmp1, tmp2, tmp3;
// Common subexpression elimination -- compilers
// won't necessarily do this on their own.
int t1 = offset+1;
int t2 = offset+2;
tmp1 = expr.fastRead(offset);
tmp2 = expr.fastRead(t1);
tmp3 = expr.fastRead(t2);
T_update::update(data[0], tmp1);
T_update::update(data[1], tmp2);
T_update::update(data[2], tmp3);
offset += commonMajorStride;
data += commonMajorStride;
#ifdef BZ_2D_STENCIL_DEBUG
count += 3;
#endif
}
}
else {
for (int i=bi; i < ni; ++i)
{
iter.loadStride(minorRank);
expr.loadStride(minorRank);
// Loop through current row elements
T_update::update(*const_cast<T_numtype*>(iter.data()),
*expr);
iter.advance();
expr.advance();
T_update::update(*const_cast<T_numtype*>(iter.data()),
*expr);
iter.advance();
expr.advance();
T_update::update(*const_cast<T_numtype*>(iter.data()),
*expr);
iter.advance(-2);
expr.advance(-2);
iter.loadStride(majorRank);
expr.loadStride(majorRank);
iter.advance();
expr.advance();
#ifdef BZ_2D_STENCIL_DEBUG
count += 3;
#endif
}
}
}
else {
// This code handles partial tiles at the bottom of the
// array.
for (int j=bj; j < maxj; ++j)
{
iter.loadStride(majorRank);
expr.loadStride(majorRank);
for (int i=bi; i < ni; ++i)
{
T_update::update(*const_cast<T_numtype*>(iter.data()),
*expr);
iter.advance();
expr.advance();
#ifdef BZ_2D_STENCIL_DEBUG
++count;
#endif
}
// Move back to the top of this column
iter.advance(bi-ni);
expr.advance(bi-ni);
// Move over to the next column
iter.loadStride(minorRank);
expr.loadStride(minorRank);
iter.advance();
expr.advance();
}
}
}
}
#ifdef BZ_2D_STENCIL_DEBUG
cout << "BZ_2D_STENCIL_DEBUG: count = " << count << endl;
#endif
return *this;
}
#endif // BZ_ARRAY_2D_STENCIL_TILING
#endif // BZ_ARRAY_2D_NEW_STENCIL_TILING
#ifndef BZ_ARRAY_2D_NEW_STENCIL_TILING
#ifdef BZ_ARRAY_2D_STENCIL_TILING
template<class T_numtype, int N_rank> template<class T_expr, class T_update>
inline Array<T_numtype, N_rank>&
Array<T_numtype, N_rank>::evaluateWithTiled2DTraversal(
T_expr expr, T_update)
{
const int minorRank = ordering(0);
const int majorRank = ordering(1);
const int blockSize = 16;
FastArrayIterator<T_numtype, N_rank> iter(*this);
iter.push(0);
expr.push(0);
_bz_bool useUnitStride = iter.isUnitStride(minorRank)
&& expr.isUnitStride(minorRank);
#ifdef BZ_ARRAY_EXPR_USE_COMMON_STRIDE
int commonStride = expr.suggestStride(minorRank);
if (iter.suggestStride(minorRank) > commonStride)
commonStride = iter.suggestStride(minorRank);
bool useCommonStride = iter.isStride(minorRank,commonStride)
&& expr.isStride(minorRank,commonStride);
#else
int commonStride = 1;
bool useCommonStride = _bz_false;
#endif
int maxi = length(majorRank);
int maxj = length(minorRank);
int bi, bj;
for (bi=0; bi < maxi; bi += blockSize)
{
int ni = bi + blockSize;
if (ni > maxi)
ni = maxi;
for (bj=0; bj < maxj; bj += blockSize)
{
int nj = bj + blockSize;
if (nj > maxj)
nj = maxj;
// Move to the beginning of the array
iter.pop(0);
expr.pop(0);
// Move to the beginning of the tile (bi,bj)
iter.loadStride(majorRank);
iter.advance(bi);
iter.loadStride(minorRank);
iter.advance(bj);
expr.loadStride(majorRank);
expr.advance(bi);
expr.loadStride(minorRank);
expr.advance(bj);
// Loop through tile rows
for (int i=bi; i < ni; ++i)
{
// Save the beginning of this tile row
iter.push(1);
expr.push(1);
// Load the minor stride
iter.loadStride(minorRank);
expr.loadStride(minorRank);
if (useUnitStride)
{
T_numtype* _bz_restrict data = const_cast<T_numtype*>
(iter.data());
int ubound = (nj-bj);
for (int j=0; j < ubound; ++j)
T_update::update(data[j], expr.fastRead(j));
}
#ifdef BZ_ARRAY_EXPR_USE_COMMON_STRIDE
else if (useCommonStride)
{
int ubound = (nj-bj) * commonStride;
T_numtype* _bz_restrict data = const_cast<T_numtype*>
(iter.data());
for (int j=0; j < ubound; j += commonStride)
T_update::update(data[j], expr.fastRead(j));
}
#endif
else {
for (int j=bj; j < nj; ++j)
{
// Loop through current row elements
T_update::update(*const_cast<T_numtype*>(iter.data()),
*expr);
iter.advance();
expr.advance();
}
}
// Move back to the beginning of the tile row, then
// move to the next row
iter.pop(1);
iter.loadStride(majorRank);
iter.advance(1);
expr.pop(1);
expr.loadStride(majorRank);
expr.advance(1);
}
}
}
return *this;
}
#endif // BZ_ARRAY_2D_STENCIL_TILING
#endif // BZ_ARRAY_2D_NEW_STENCIL_TILING
BZ_NAMESPACE_END
#endif // BZ_ARRAYEVAL_CC
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -