⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 eval.cc

📁 数值计算工具库,C语言编写的,可以直接调用.
💻 CC
📖 第 1 页 / 共 3 页
字号:
        {
            iter.loadStride(ordering(j));
            expr.loadStride(ordering(j));

            int offset = order[i][j-1];
            iter.advance(offset);
            expr.advance(offset);
        }

        iter.loadStride(maxRank);
        expr.loadStride(maxRank);

        // Evaluate the expression along the column

        if ((useUnitStride) || (useCommonStride))
        {
            T_numtype* _bz_restrict last = const_cast<T_numtype*>(iter.data()) 
                + lastLength * commonStride;

#ifdef BZ_USE_FAST_READ_ARRAY_EXPR
            int ubound = lastLength * commonStride;
            T_numtype* _bz_restrict data = const_cast<T_numtype*>(iter.data());

            if (commonStride == 1)
            {            
 #ifndef BZ_ARRAY_FAST_TRAVERSAL_UNROLL
                for (int i=0; i < ubound; ++i)
                    T_update::update(data[i], expr.fastRead(i));
 #else
                int n1 = ubound & 3;
                int i=0;
                for (; i < n1; ++i)
                    T_update::update(data[i], expr.fastRead(i));

                for (; i < ubound; i += 4)
                {
                    T_update::update(data[i], expr.fastRead(i));
                    T_update::update(data[i+1], expr.fastRead(i+1));
                    T_update::update(data[i+2], expr.fastRead(i+2));
                    T_update::update(data[i+3], expr.fastRead(i+3));
                }
 #endif  // BZ_ARRAY_FAST_TRAVERSAL_UNROLL
            }
 #ifdef BZ_ARRAY_EXPR_USE_COMMON_STRIDE
            else {
                for (int i=0; i < ubound; i += commonStride)
                    T_update::update(data[i], expr.fastRead(i));
            }
 #endif // BZ_ARRAY_EXPR_USE_COMMON_STRIDE

            iter.advance(lastLength * commonStride);
            expr.advance(lastLength * commonStride);
#else   // ! BZ_USE_FAST_READ_ARRAY_EXPR
            while (iter.data() != last)
            {
                T_update::update(*const_cast<T_numtype*>(iter.data()), *expr);
                iter.advance(commonStride);
                expr.advance(commonStride);
            }
#endif  // BZ_USE_FAST_READ_ARRAY_EXPR

        }
        else {
            // No common stride

            T_numtype* _bz_restrict last = const_cast<T_numtype*>(iter.data()) 
                + lastLength * stride(maxRank);

            while (iter.data() != last)
            {
                T_update::update(*const_cast<T_numtype*>(iter.data()), *expr);
                iter.advance();
                expr.advance();
            }
        }
    }

    return *this;
}
#endif // BZ_HAVE_STD

#ifdef BZ_ARRAY_2D_NEW_STENCIL_TILING

#ifdef BZ_ARRAY_2D_STENCIL_TILING

template<class T_numtype, int N_rank> template<class T_expr, class T_update>
inline Array<T_numtype, N_rank>& 
Array<T_numtype, N_rank>::evaluateWithTiled2DTraversal(
    T_expr expr, T_update)
{
    const int minorRank = ordering(0);
    const int majorRank = ordering(1);

    FastArrayIterator<T_numtype, N_rank> iter(*this);
    iter.push(0);
    expr.push(0);

#ifdef BZ_2D_STENCIL_DEBUG
    int count = 0;
#endif

    _bz_bool useUnitStride = iter.isUnitStride(minorRank)
                          && expr.isUnitStride(minorRank);

#ifdef BZ_ARRAY_EXPR_USE_COMMON_STRIDE
    int commonStride = expr.suggestStride(minorRank);
    if (iter.suggestStride(minorRank) > commonStride)
        commonStride = iter.suggestStride(minorRank);
    bool useCommonStride = iter.isStride(minorRank,commonStride)
        && expr.isStride(minorRank,commonStride);
#else
    int commonStride = 1;
    bool useCommonStride = _bz_false;
#endif

    // Determine if a common major stride exists
    int commonMajorStride = expr.suggestStride(majorRank);
    if (iter.suggestStride(majorRank) > commonMajorStride)
        commonMajorStride = iter.suggestStride(majorRank);
    bool haveCommonMajorStride = iter.isStride(majorRank,commonMajorStride)
        && expr.isStride(majorRank,commonMajorStride);


    int maxi = length(majorRank);
    int maxj = length(minorRank);

    const int tileHeight = 16, tileWidth = 3;

    int bi, bj;
    for (bi=0; bi < maxi; bi += tileHeight)
    {
        int ni = bi + tileHeight;
        if (ni > maxi)
            ni = maxi;

        // Move back to the beginning of the array
        iter.pop(0);
        expr.pop(0);

        // Move to the start of this tile row
        iter.loadStride(majorRank);
        iter.advance(bi);
        expr.loadStride(majorRank);
        expr.advance(bi);

        // Save this position
        iter.push(1);
        expr.push(1);

        for (bj=0; bj < maxj; bj += tileWidth)
        {
            // Move to the beginning of the tile row
            iter.pop(1);
            expr.pop(1);

            // Move to the top of the current tile (bi,bj)
            iter.loadStride(minorRank);
            iter.advance(bj);
            expr.loadStride(minorRank);
            expr.advance(bj);

            if (bj + tileWidth <= maxj)
            {
                // Strip mining

                if ((useUnitStride) && (haveCommonMajorStride))
                {
                    int offset = 0;
                    T_numtype* _bz_restrict data = const_cast<T_numtype*>
                        (iter.data());

                    for (int i=bi; i < ni; ++i)
                    {
                        _bz_typename T_expr::T_numtype tmp1, tmp2, tmp3;

                        // Common subexpression elimination -- compilers
                        // won't necessarily do this on their own.
                        int t1 = offset+1;
                        int t2 = offset+2;

                        tmp1 = expr.fastRead(offset);
                        tmp2 = expr.fastRead(t1);
                        tmp3 = expr.fastRead(t2);

                        T_update::update(data[0], tmp1);
                        T_update::update(data[1], tmp2);
                        T_update::update(data[2], tmp3);

                        offset += commonMajorStride;
                        data += commonMajorStride;

#ifdef BZ_2D_STENCIL_DEBUG
    count += 3;
#endif
                    }
                }
                else {

                    for (int i=bi; i < ni; ++i)
                    {
                        iter.loadStride(minorRank);
                        expr.loadStride(minorRank);

                        // Loop through current row elements
                        T_update::update(*const_cast<T_numtype*>(iter.data()),
                            *expr);
                        iter.advance();
                        expr.advance();

                        T_update::update(*const_cast<T_numtype*>(iter.data()),
                            *expr);
                        iter.advance();
                        expr.advance();

                        T_update::update(*const_cast<T_numtype*>(iter.data()),
                            *expr);
                        iter.advance(-2);
                        expr.advance(-2);

                        iter.loadStride(majorRank);
                        expr.loadStride(majorRank);
                        iter.advance();
                        expr.advance();

#ifdef BZ_2D_STENCIL_DEBUG
    count += 3;
#endif

                    }
                }
            }
            else {

                // This code handles partial tiles at the bottom of the
                // array.

                for (int j=bj; j < maxj; ++j)
                {
                    iter.loadStride(majorRank);
                    expr.loadStride(majorRank);

                    for (int i=bi; i < ni; ++i)
                    {
                        T_update::update(*const_cast<T_numtype*>(iter.data()),
                            *expr);
                        iter.advance();
                        expr.advance();
#ifdef BZ_2D_STENCIL_DEBUG
    ++count;
#endif

                    }

                    // Move back to the top of this column
                    iter.advance(bi-ni);
                    expr.advance(bi-ni);

                    // Move over to the next column
                    iter.loadStride(minorRank);
                    expr.loadStride(minorRank);

                    iter.advance();
                    expr.advance();
                }
            }
        }
    }

#ifdef BZ_2D_STENCIL_DEBUG
    cout << "BZ_2D_STENCIL_DEBUG: count = " << count << endl;
#endif

    return *this;
}

#endif // BZ_ARRAY_2D_STENCIL_TILING
#endif // BZ_ARRAY_2D_NEW_STENCIL_TILING



#ifndef BZ_ARRAY_2D_NEW_STENCIL_TILING

#ifdef BZ_ARRAY_2D_STENCIL_TILING

template<class T_numtype, int N_rank> template<class T_expr, class T_update>
inline Array<T_numtype, N_rank>& 
Array<T_numtype, N_rank>::evaluateWithTiled2DTraversal(
    T_expr expr, T_update)
{
    const int minorRank = ordering(0);
    const int majorRank = ordering(1);

    const int blockSize = 16;
    
    FastArrayIterator<T_numtype, N_rank> iter(*this);
    iter.push(0);
    expr.push(0);

    _bz_bool useUnitStride = iter.isUnitStride(minorRank)
                          && expr.isUnitStride(minorRank);

#ifdef BZ_ARRAY_EXPR_USE_COMMON_STRIDE
    int commonStride = expr.suggestStride(minorRank);
    if (iter.suggestStride(minorRank) > commonStride)
        commonStride = iter.suggestStride(minorRank);
    bool useCommonStride = iter.isStride(minorRank,commonStride)
        && expr.isStride(minorRank,commonStride);
#else
    int commonStride = 1;
    bool useCommonStride = _bz_false;
#endif

    int maxi = length(majorRank);
    int maxj = length(minorRank);

    int bi, bj;
    for (bi=0; bi < maxi; bi += blockSize)
    {
        int ni = bi + blockSize;
        if (ni > maxi)
            ni = maxi;

        for (bj=0; bj < maxj; bj += blockSize)
        {
            int nj = bj + blockSize;
            if (nj > maxj)
                nj = maxj;

            // Move to the beginning of the array
            iter.pop(0);
            expr.pop(0);

            // Move to the beginning of the tile (bi,bj)
            iter.loadStride(majorRank);
            iter.advance(bi);
            iter.loadStride(minorRank);
            iter.advance(bj);

            expr.loadStride(majorRank);
            expr.advance(bi);
            expr.loadStride(minorRank);
            expr.advance(bj);

            // Loop through tile rows
            for (int i=bi; i < ni; ++i)
            {
                // Save the beginning of this tile row
                iter.push(1);
                expr.push(1);

                // Load the minor stride
                iter.loadStride(minorRank);
                expr.loadStride(minorRank);

                if (useUnitStride)
                {
                    T_numtype* _bz_restrict data = const_cast<T_numtype*>
                        (iter.data());

                    int ubound = (nj-bj);
                    for (int j=0; j < ubound; ++j)
                        T_update::update(data[j], expr.fastRead(j));
                }
#ifdef BZ_ARRAY_EXPR_USE_COMMON_STRIDE
                else if (useCommonStride)
                {
                    int ubound = (nj-bj) * commonStride;
                    T_numtype* _bz_restrict data = const_cast<T_numtype*>
                        (iter.data());

                    for (int j=0; j < ubound; j += commonStride)
                        T_update::update(data[j], expr.fastRead(j));
                }
#endif
                else {
                    for (int j=bj; j < nj; ++j)
                    {
                        // Loop through current row elements
                        T_update::update(*const_cast<T_numtype*>(iter.data()), 
                            *expr);
                        iter.advance();
                        expr.advance();
                    }
                }

                // Move back to the beginning of the tile row, then
                // move to the next row
                iter.pop(1);
                iter.loadStride(majorRank);
                iter.advance(1);

                expr.pop(1);
                expr.loadStride(majorRank);
                expr.advance(1);
            }
        }
    }

    return *this;
}
#endif // BZ_ARRAY_2D_STENCIL_TILING
#endif // BZ_ARRAY_2D_NEW_STENCIL_TILING

BZ_NAMESPACE_END

#endif // BZ_ARRAYEVAL_CC

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -