⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 eval.cc

📁 著名的数学计算类库
💻 CC
📖 第 1 页 / 共 3 页
字号:
            int offset = order[i][j-1];            iter.advance(offset);            expr.advance(offset);        }        iter.loadStride(maxRank);        expr.loadStride(maxRank);        // Evaluate the expression along the column        if ((useUnitStride) || (useCommonStride))        {#ifdef BZ_USE_FAST_READ_ARRAY_EXPR            int ubound = lastLength * commonStride;            T_numtype* restrict data = const_cast<T_numtype*>(iter.data());            if (commonStride == 1)            {             #ifndef BZ_ARRAY_FAST_TRAVERSAL_UNROLL                for (int i=0; i < ubound; ++i)                    T_update::update(*data++, expr.fastRead(i)); #else                int n1 = ubound & 3;                int i=0;                for (; i < n1; ++i)                    T_update::update(*data++, expr.fastRead(i));                for (; i < ubound; i += 4)                {                    T_update::update(*data++, expr.fastRead(i));                    T_update::update(*data++, expr.fastRead(i+1));                    T_update::update(*data++, expr.fastRead(i+2));                    T_update::update(*data++, expr.fastRead(i+3));                } #endif  // BZ_ARRAY_FAST_TRAVERSAL_UNROLL            } #ifdef BZ_ARRAY_EXPR_USE_COMMON_STRIDE            else {                for (int i=0; i < ubound; i += commonStride)                    T_update::update(data[i], expr.fastRead(i));            } #endif // BZ_ARRAY_EXPR_USE_COMMON_STRIDE            iter.advance(lastLength * commonStride);            expr.advance(lastLength * commonStride);#else   // ! BZ_USE_FAST_READ_ARRAY_EXPR            T_numtype* restrict last = const_cast<T_numtype*>(iter.data())                 + lastLength * commonStride;            while (iter.data() != last)            {                T_update::update(*const_cast<T_numtype*>(iter.data()), *expr);                iter.advance(commonStride);                expr.advance(commonStride);            }#endif  // BZ_USE_FAST_READ_ARRAY_EXPR        }        else {            // No common stride            T_numtype* restrict last = const_cast<T_numtype*>(iter.data())                 + lastLength * stride(maxRank);            while (iter.data() != last)            {                T_update::update(*const_cast<T_numtype*>(iter.data()), *expr);                iter.advance();                expr.advance();            }        }    }    return *this;}#endif // BZ_ARRAY_SPACE_FILLING_TRAVERSAL#endif // BZ_HAVE_STD#ifdef BZ_ARRAY_2D_NEW_STENCIL_TILING#ifdef BZ_ARRAY_2D_STENCIL_TILINGtemplate<typename T_numtype, int N_rank> template<typename T_expr, typename T_update>inline Array<T_numtype, N_rank>& Array<T_numtype, N_rank>::evaluateWithTiled2DTraversal(    T_expr expr, T_update){    const int minorRank = ordering(0);    const int majorRank = ordering(1);    FastArrayIterator<T_numtype, N_rank> iter(*this);    iter.push(0);    expr.push(0);#ifdef BZ_2D_STENCIL_DEBUG    int count = 0;#endif    bool useUnitStride = iter.isUnitStride(minorRank)                          && expr.isUnitStride(minorRank);#ifdef BZ_ARRAY_EXPR_USE_COMMON_STRIDE    int commonStride = expr.suggestStride(minorRank);    if (iter.suggestStride(minorRank) > commonStride)        commonStride = iter.suggestStride(minorRank);    bool useCommonStride = iter.isStride(minorRank,commonStride)        && expr.isStride(minorRank,commonStride);#else    int commonStride = 1;    bool useCommonStride = false;#endif    // Determine if a common major stride exists    int commonMajorStride = expr.suggestStride(majorRank);    if (iter.suggestStride(majorRank) > commonMajorStride)        commonMajorStride = iter.suggestStride(majorRank);    bool haveCommonMajorStride = iter.isStride(majorRank,commonMajorStride)        && expr.isStride(majorRank,commonMajorStride);    int maxi = length(majorRank);    int maxj = length(minorRank);    const int tileHeight = 16, tileWidth = 3;    int bi, bj;    for (bi=0; bi < maxi; bi += tileHeight)    {        int ni = bi + tileHeight;        if (ni > maxi)            ni = maxi;        // Move back to the beginning of the array        iter.pop(0);        expr.pop(0);        // Move to the start of this tile row        iter.loadStride(majorRank);        iter.advance(bi);        expr.loadStride(majorRank);        expr.advance(bi);        // Save this position        iter.push(1);        expr.push(1);        for (bj=0; bj < maxj; bj += tileWidth)        {            // Move to the beginning of the tile row            iter.pop(1);            expr.pop(1);            // Move to the top of the current tile (bi,bj)            iter.loadStride(minorRank);            iter.advance(bj);            expr.loadStride(minorRank);            expr.advance(bj);            if (bj + tileWidth <= maxj)            {                // Strip mining                if ((useUnitStride) && (haveCommonMajorStride))                {                    int offset = 0;                    T_numtype* restrict data = const_cast<T_numtype*>                        (iter.data());                    for (int i=bi; i < ni; ++i)                    {                        _bz_typename T_expr::T_numtype tmp1, tmp2, tmp3;                        // Common subexpression elimination -- compilers                        // won't necessarily do this on their own.                        int t1 = offset+1;                        int t2 = offset+2;                        tmp1 = expr.fastRead(offset);                        tmp2 = expr.fastRead(t1);                        tmp3 = expr.fastRead(t2);                        T_update::update(data[0], tmp1);                        T_update::update(data[1], tmp2);                        T_update::update(data[2], tmp3);                        offset += commonMajorStride;                        data += commonMajorStride;#ifdef BZ_2D_STENCIL_DEBUG    count += 3;#endif                    }                }                else {                    for (int i=bi; i < ni; ++i)                    {                        iter.loadStride(minorRank);                        expr.loadStride(minorRank);                        // Loop through current row elements                        T_update::update(*const_cast<T_numtype*>(iter.data()),                            *expr);                        iter.advance();                        expr.advance();                        T_update::update(*const_cast<T_numtype*>(iter.data()),                            *expr);                        iter.advance();                        expr.advance();                        T_update::update(*const_cast<T_numtype*>(iter.data()),                            *expr);                        iter.advance(-2);                        expr.advance(-2);                        iter.loadStride(majorRank);                        expr.loadStride(majorRank);                        iter.advance();                        expr.advance();#ifdef BZ_2D_STENCIL_DEBUG    count += 3;#endif                    }                }            }            else {                // This code handles partial tiles at the bottom of the                // array.                for (int j=bj; j < maxj; ++j)                {                    iter.loadStride(majorRank);                    expr.loadStride(majorRank);                    for (int i=bi; i < ni; ++i)                    {                        T_update::update(*const_cast<T_numtype*>(iter.data()),                            *expr);                        iter.advance();                        expr.advance();#ifdef BZ_2D_STENCIL_DEBUG    ++count;#endif                    }                    // Move back to the top of this column                    iter.advance(bi-ni);                    expr.advance(bi-ni);                    // Move over to the next column                    iter.loadStride(minorRank);                    expr.loadStride(minorRank);                    iter.advance();                    expr.advance();                }            }        }    }#ifdef BZ_2D_STENCIL_DEBUG    cout << "BZ_2D_STENCIL_DEBUG: count = " << count << endl;#endif    return *this;}#endif // BZ_ARRAY_2D_STENCIL_TILING#endif // BZ_ARRAY_2D_NEW_STENCIL_TILING#ifndef BZ_ARRAY_2D_NEW_STENCIL_TILING#ifdef BZ_ARRAY_2D_STENCIL_TILINGtemplate<typename T_numtype, int N_rank> template<typename T_expr, typename T_update>inline Array<T_numtype, N_rank>& Array<T_numtype, N_rank>::evaluateWithTiled2DTraversal(    T_expr expr, T_update){    const int minorRank = ordering(0);    const int majorRank = ordering(1);    const int blockSize = 16;        FastArrayIterator<T_numtype, N_rank> iter(*this);    iter.push(0);    expr.push(0);    bool useUnitStride = iter.isUnitStride(minorRank)                          && expr.isUnitStride(minorRank);#ifdef BZ_ARRAY_EXPR_USE_COMMON_STRIDE    int commonStride = expr.suggestStride(minorRank);    if (iter.suggestStride(minorRank) > commonStride)        commonStride = iter.suggestStride(minorRank);    bool useCommonStride = iter.isStride(minorRank,commonStride)        && expr.isStride(minorRank,commonStride);#else    int commonStride = 1;    bool useCommonStride = false;#endif    int maxi = length(majorRank);    int maxj = length(minorRank);    int bi, bj;    for (bi=0; bi < maxi; bi += blockSize)    {        int ni = bi + blockSize;        if (ni > maxi)            ni = maxi;        for (bj=0; bj < maxj; bj += blockSize)        {            int nj = bj + blockSize;            if (nj > maxj)                nj = maxj;            // Move to the beginning of the array            iter.pop(0);            expr.pop(0);            // Move to the beginning of the tile (bi,bj)            iter.loadStride(majorRank);            iter.advance(bi);            iter.loadStride(minorRank);            iter.advance(bj);            expr.loadStride(majorRank);            expr.advance(bi);            expr.loadStride(minorRank);            expr.advance(bj);            // Loop through tile rows            for (int i=bi; i < ni; ++i)            {                // Save the beginning of this tile row                iter.push(1);                expr.push(1);                // Load the minor stride                iter.loadStride(minorRank);                expr.loadStride(minorRank);                if (useUnitStride)                {                    T_numtype* restrict data = const_cast<T_numtype*>                        (iter.data());                    int ubound = (nj-bj);                    for (int j=0; j < ubound; ++j)                        T_update::update(*data++, expr.fastRead(j));                }#ifdef BZ_ARRAY_EXPR_USE_COMMON_STRIDE                else if (useCommonStride)                {                    int ubound = (nj-bj) * commonStride;                    T_numtype* restrict data = const_cast<T_numtype*>                        (iter.data());                    for (int j=0; j < ubound; j += commonStride)                        T_update::update(data[j], expr.fastRead(j));                }#endif                else {                    for (int j=bj; j < nj; ++j)                    {                        // Loop through current row elements                        T_update::update(*const_cast<T_numtype*>(iter.data()),                             *expr);                        iter.advance();                        expr.advance();                    }                }                // Move back to the beginning of the tile row, then                // move to the next row                iter.pop(1);                iter.loadStride(majorRank);                iter.advance(1);                expr.pop(1);                expr.loadStride(majorRank);                expr.advance(1);            }        }    }    return *this;}#endif // BZ_ARRAY_2D_STENCIL_TILING#endif // BZ_ARRAY_2D_NEW_STENCIL_TILINGBZ_NAMESPACE_END#endif // BZ_ARRAYEVAL_CC

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -