⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 atl_caxpy_x1_y1.c

📁 基于Blas CLapck的.用过的人知道是干啥的
💻 C
📖 第 1 页 / 共 2 页
字号:
/* *             Automatically Tuned Linear Algebra Software v3.8.0 *                    (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: *   1. Redistributions of source code must retain the above copyright *      notice, this list of conditions and the following disclaimer. *   2. Redistributions in binary form must reproduce the above copyright *      notice, this list of conditions, and the following disclaimer in the *      documentation and/or other materials provided with the distribution. *   3. The name of the ATLAS group or the names of its contributers may *      not be used to endorse or promote products derived from this *      software without specific written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */#include "atlas_misc.h"static void axpy_lt8(const int N, const SCALAR alpha, const TYPE *x, TYPE *y)/* * For cleanup, see if we can get compiler to do the work, use constant loops */{   int i;   const register TYPE ralpha = *alpha, ialpha = alpha[1];   register TYPE xr, xi;   switch(N)   {   case 1:      xr = *x; xi = x[1];      #ifndef Conj_         *y   += ralpha * xr - ialpha * xi;         y[1] += ialpha * xr + ralpha * xi;      #else         *y   += ralpha * xr + ialpha * xi;         y[1] += ialpha * xr - ralpha * xi;      #endif      break;   case 2:      for (i=0; i != 2; i++, x += 2, y += 2)      {         xr = *x; xi = x[1];         #ifndef Conj_            *y   += ralpha * xr - ialpha * xi;            y[1] += ialpha * xr + ralpha * xi;         #else            *y   += ralpha * xr + ialpha * xi;            y[1] += ialpha * xr - ralpha * xi;         #endif      }      break;   case 3:      for (i=0; i != 3; i++, x += 2, y += 2)      {         xr = *x; xi = x[1];         #ifndef Conj_            *y   += ralpha * xr - ialpha * xi;            y[1] += ialpha * xr + ralpha * xi;         #else            *y   += ralpha * xr + ialpha * xi;            y[1] += ialpha * xr - ralpha * xi;         #endif      }      break;   case 4:      for (i=0; i != 4; i++, x += 2, y += 2)      {         xr = *x; xi = x[1];         #ifndef Conj_            *y   += ralpha * xr - ialpha * xi;            y[1] += ialpha * xr + ralpha * xi;         #else            *y   += ralpha * xr + ialpha * xi;            y[1] += ialpha * xr - ralpha * xi;         #endif      }      break;   case 5:      for (i=0; i != 5; i++, x += 2, y += 2)      {         xr = *x; xi = x[1];         #ifndef Conj_            *y   += ralpha * xr - ialpha * xi;            y[1] += ialpha * xr + ralpha * xi;         #else            *y   += ralpha * xr + ialpha * xi;            y[1] += ialpha * xr - ralpha * xi;         #endif      }      break;   case 6:      for (i=0; i != 6; i++, x += 2, y += 2)      {         xr = *x; xi = x[1];         #ifndef Conj_            *y   += ralpha * xr - ialpha * xi;            y[1] += ialpha * xr + ralpha * xi;         #else            *y   += ralpha * xr + ialpha * xi;            y[1] += ialpha * xr - ralpha * xi;         #endif      }      break;   case 7:      for (i=0; i != 7; i++, x += 2, y += 2)      {         xr = *x; xi = x[1];         #ifndef Conj_            *y   += ralpha * xr - ialpha * xi;            y[1] += ialpha * xr + ralpha * xi;         #else            *y   += ralpha * xr + ialpha * xi;            y[1] += ialpha * xr - ralpha * xi;         #endif      }      break;   default:;   }}#if defined (ATL_MULADD) && ATL_mmnreg >= 26#ifdef Conj_   #define PEQ -=   #define MEQ +=#else   #define PEQ +=   #define MEQ -=#endifstatic void axpy_8(const int N, const SCALAR alpha, const TYPE *x, TYPE *y){   const int n4 = N >> 2, N2 = (n4>>1)<<1, nr = n4 - N2, nn4 = n4<<1;   TYPE *u = y+nn4, *v = u+nn4, *z = v+nn4;   const TYPE *X1 = x + nn4, *X2 = X1 + nn4, *X3 = X2 + nn4;   const TYPE *stX = x + ((N2-2)<<1);   const register TYPE ralpha = *alpha, ialpha = alpha[1];   register TYPE yr0, yi0, yr1, yi1;   register TYPE ur0, ui0, ur1, ui1;   register TYPE vr0, vi0, vr1, vi1;   register TYPE zr0, zi0, zr1, zi1;   register TYPE xr0, xi0, xr1, xi1;   register TYPE xr2, xi2, xr3, xi3;   if (N2)   {      yr0 = *y; ur0 = *u; vr0 = *v; zr0 = *z;      yi0 = y[1]; ui0 = u[1]; vi0 = v[1]; zi0 = z[1];      xr0 = *x; xr1 = *X1; xr2 = *X2; xr3 = *X3;      yr0 += xr0 * ralpha; xi0 = x[1];      ur0 += xr1 * ralpha; xi1 = X1[1];      vr0 += xr2 * ralpha; xi2 = X2[1];      zr0 += xr3 * ralpha; xi3 = X3[1];      yi0 += xr0 * ialpha; yr1 = y[2];      ui0 += xr1 * ialpha; ur1 = u[2];      vi0 += xr2 * ialpha; vr1 = v[2];      zi0 += xr3 * ialpha; zr1 = z[2];      yr0 MEQ xi0 * ialpha; yi1 = y[3];      ur0 MEQ xi1 * ialpha; ui1 = u[3];      vr0 MEQ xi2 * ialpha; vi1 = v[3];      zr0 MEQ xi3 * ialpha; zi1 = z[3];      yi0 PEQ xi0 * ralpha; xr0 = x[2];      ui0 PEQ xi1 * ralpha; xr1 = X1[2];      vi0 PEQ xi2 * ralpha; xr2 = X2[2];      zi0 PEQ xi3 * ralpha; xr3 = X3[2];      if (N2 != 2)      {         do         {            *y = yr0; yr1 += xr0 * ralpha; xi0 = x[3]; x += 4;            *u = ur0; ur1 += xr1 * ralpha; xi1 = X1[3]; X1 += 4;            *v = vr0; vr1 += xr2 * ralpha; xi2 = X2[3]; X2 += 4;            *z = zr0; zr1 += xr3 * ralpha; xi3 = X3[3]; X3 += 4;            y[1] = yi0; yi1 += xr0 * ialpha; yr0 = y[4];            u[1] = ui0; ui1 += xr1 * ialpha; ur0 = u[4];            v[1] = vi0; vi1 += xr2 * ialpha; vr0 = v[4];            z[1] = zi0; zi1 += xr3 * ialpha; zr0 = z[4];            yr1 MEQ xi0 * ialpha; yi0 = y[5];            ur1 MEQ xi1 * ialpha; ui0 = u[5];            vr1 MEQ xi2 * ialpha; vi0 = v[5];            zr1 MEQ xi3 * ialpha; zi0 = z[5];            yi1 PEQ xi0 * ralpha; xr0 = *x;            ui1 PEQ xi1 * ralpha; xr1 = *X1;            vi1 PEQ xi2 * ralpha; xr2 = *X2;            zi1 PEQ xi3 * ralpha; xr3 = *X3;            y[2] = yr1; yr0 += xr0 * ralpha; xi0 = x[1];            u[2] = ur1; ur0 += xr1 * ralpha; xi1 = X1[1];            v[2] = vr1; vr0 += xr2 * ralpha; xi2 = X2[1];            z[2] = zr1; zr0 += xr3 * ralpha; xi3 = X3[1];            y[3] = yi1; yi0 += xr0 * ialpha; yr1 = y[6];            u[3] = ui1; ui0 += xr1 * ialpha; ur1 = u[6];            v[3] = vi1; vi0 += xr2 * ialpha; vr1 = v[6];            z[3] = zi1; zi0 += xr3 * ialpha; zr1 = z[6];            yr0 MEQ xi0 * ialpha; yi1 = y[7];            ur0 MEQ xi1 * ialpha; ui1 = u[7]; y += 4;            vr0 MEQ xi2 * ialpha; vi1 = v[7];            zr0 MEQ xi3 * ialpha; zi1 = z[7]; u += 4;            yi0 PEQ xi0 * ralpha; xr0 = x[2]; v += 4;            ui0 PEQ xi1 * ralpha; xr1 = X1[2];            vi0 PEQ xi2 * ralpha; xr2 = X2[2]; z += 4;            zi0 PEQ xi3 * ralpha; xr3 = X3[2];         }         while (x != stX);      }      if (!nr) /* finish off this iteratation only */      {            *y = yr0; yr1 += xr0 * ralpha; xi0 = x[3];            *u = ur0; ur1 += xr1 * ralpha; xi1 = X1[3];            *v = vr0; vr1 += xr2 * ralpha; xi2 = X2[3];            *z = zr0; zr1 += xr3 * ralpha; xi3 = X3[3]; X3 += 4;            y[1] = yi0; yi1 += xr0 * ialpha;            u[1] = ui0; ui1 += xr1 * ialpha;            v[1] = vi0; vi1 += xr2 * ialpha;            z[1] = zi0; zi1 += xr3 * ialpha;            yr1 MEQ xi0 * ialpha;            ur1 MEQ xi1 * ialpha;            vr1 MEQ xi2 * ialpha;            zr1 MEQ xi3 * ialpha;            yi1 PEQ xi0 * ralpha;            ui1 PEQ xi1 * ralpha;            vi1 PEQ xi2 * ralpha;            zi1 PEQ xi3 * ralpha;            y[2] = yr1;            u[2] = ur1;            v[2] = vr1;            z[2] = zr1;            y[3] = yi1;            u[3] = ui1;            v[3] = vi1;            z[3] = zi1; z += 4;      }      else     /* one iteration to do besides finishing off one from loop */      {            *y = yr0; yr1 += xr0 * ralpha; xi0 = x[3]; x += 4;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -