⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 camm_dpa.h

📁 基于Blas CLapck的.用过的人知道是干啥的
💻 H
📖 第 1 页 / 共 3 页
字号:
#include <stdlib.h>#include <sys/time.h>#include <stdio.h>#include "camm_util.h"#if defined(ALIGN) #if( defined(SCPLX) || defined(DCPLX))#error Cannot align complex routines#endif#if defined(SREAL) && ( NDPM != 1 ) && ( STRIDE % 4 != 0)#error Can only align SREAL with NDPM 1 or STRIDE % 4 = 0#endif#if defined(DREAL) && ( NDPM != 1 ) && ( STRIDE % 2 != 0)#error Can only align DREAL with NDPM 1 or STRIDE % 2 = 0#endif#endif/****************************************************************************** *  Single Precision Complex Macros ******************************************************************************/  #ifdef SCPLX#ifdef NO_TRANSPOSE#if NDPM > 3 #error Max NDPM is 3 for SCPLX NO_TRANSPOSE#endif#undef plax#define plax#undef R1#define R1 2#undef R2#define R2 4#undef R3#define R3 6#undef R4#define R4 6#undef TREG#define TREG 1#undef SREG#define SREG 0#undef CREG#define CREG 0#ifdef GER#undef AREG#define AREG 0#undef targ#define targ(a_)        AREG#undef wb#define wb(a_,b_)       pu(AREG,a_,b_)#undef wbd#define wbd(a_,b_)      pud(AREG,a_,b_)#undef w#define w(a_)#undef w1_2#define w1_2(a_)#else#undef AREG#define AREG TREG#undef targ#define targ(a_)        CREG#undef wb#define wb(a_,b_)#undef wbd#define wbd(a_,b_)#undef w#define w(a_)           pu(CREG,a_ ## 0,si)#undef w1_2#define w1_2(a_)        pud(CREG,a_ ## 0,si)#endif#undef src#define src(a_)         a_#undef mpx#define mpx(a_)         pls(0,si,a_) ps(0,a_,a_) pls(4,si,P(a_,1)) \                        ps(0,P(a_,1),P(a_,1)) sign(a_)#undef madd#define madd(a_,b_,c_)  pas(a_,b_,c_)#undef ulfa#define ulfa(a_)#else#undef R1#define R1 4#undef R2#define R2 5#undef R3#define R3 6#undef R4#define R4 7#undef TREG#define TREG 3#undef SREG#define SREG 2#undef CREG#define CREG 0#undef targ#define targ(a_)        a_#undef src#define src(a_)         0#undef w#define w(a_)#undef w1_2#define w1_2(a_)#undef mpx#define mpx(a_)        px(a_)#ifdef BETA0#undef ulfa#define ulfa(a_)       phl(a_,0) pa(0,a_) pud(a_,0,si)#else#undef ulfa#define ulfa(a_)       pld(0,si,TREG) phl(a_,0) pa(0,a_) pa(TREG,a_) pud(a_,0,si)#endif#undef AREG#define AREG TREG#undef wb#define wb(a_,b_)#undef wbd#define wbd(a_,b_)#undef wbs#define wbs(a_,b_)#undef plax#define plax       pc(CREG,1) ps(160,CREG,CREG) ps(245,1,1) sign(CREG)#endif#if defined(Conj_) && ! defined(GER) #undef sign#define sign(a_)       pm(SREG,a_)#else		   #undef sign#define sign(a_)       pm(SREG,P(a_,1))#endif#undef plb#define plb(a_,b_)           pl(a_,b_,AREG)#undef plbd#define plbd(a_,b_)          px(AREG) pld(a_,b_,AREG)#undef dpr#define dpr(a_)              pm(src(a_),TREG) pa(TREG,targ(a_))#undef dprp#define dprp(a_,b_,c_)       pf(b_,c_) pm(src(a_),TREG) pa(TREG,targ(a_))#undef dpi#define dpi(a_)              pm(P(src(a_),1),TREG) ps(177,TREG,TREG) pa(TREG,targ(a_))#ifndef GER#undef plaa#define plaa(a_)                pl(a_ ## 0,si,CREG) plax#undef wa#define wa(a_)                  w(a_)#undef dp#define dp(a_,b_,c_)            plb(a_ ## 0,b_) dpr(c_) plb(a_ ## 0,b_) dpi(c_)#undef dpp#define dpp(a_,b_,c_,d_,e_)     plb(a_ ## 0,b_) dprp(c_,d_,e_) plb(a_ ## 0,b_) dpi(c_)#undef ddp#define ddp(a_,b_,c_)           dp(a_,b_,c_)       #undef ddpp#define ddpp(a_,b_,c_,d_,e_)    dpp(a_,b_,c_,d_,e_)#undef plaa1_2#define plaa1_2(a_)             px(CREG) pld(a_ ## 0,si,CREG) plax#undef wa1_2#define wa1_2(a_)               w1_2(a_)#undef dp1_2#define dp1_2(a_,b_,c_)         plbd(a_ ## 0,b_) dpr(c_) plbd(a_ ## 0,b_) dpi(c_)#undef dpp1_2#define dpp1_2(a_,b_,c_,d_,e_)  plbd(a_ ## 0,b_) dprp(c_,d_,e_) plbd(a_ ## 0,b_) dpi(c_)#undef ddp1_2#define ddp1_2(a_,b_,c_)        dp1_2(a_,b_,c_)       #undef ddpp1_2#define ddpp1_2(a_,b_,c_,d_,e_) dpp1_2(a_,b_,c_,d_,e_)#else#undef lqc#define lqc(a_)              pl(a_ ## 0,si,TREG)#undef lqc1#define lqc1_2(a_)           px(TREG) pld(a_ ## 0,si,TREG)#undef plaa#define plaa(a_) #undef wa#define wa(a_)#undef dp#define dp(a_,b_,c_)         lqc(a_) plb(a_ ## 0,b_) dpr(c_) \                             lqc(a_) dpi(c_) wb(a_ ## 0,b_)#undef dpp#define dpp(a_,b_,c_,d_,e_)  lqc(a_) plb(a_ ## 0,b_) dpr(c_) pf(d_,e_) \                             lqc(a_) dpi(c_) wb(a_ ## 0,b_)#undef ddp#define ddp(a_,b_,c_)        dp(a_,b_,c_)       #undef ddpp#define ddpp(a_,b_,c_,d_,e_) dpp(a_,b_,c_,d_,e_)#undef plaa1_2#define plaa1_2(a_)#undef wa1_2#define wa1_2(a_)#undef dp1_2#define dp1_2(a_,b_,c_)         lqc1_2(a_) plbd(a_ ## 0,b_) dpr(c_) \                                lqc1_2(a_) dpi(c_) wbd(a_ ## 0,b_)#undef dpp1_2#define dpp1_2(a_,b_,c_,d_,e_)  lqc1_2(a_) plbd(a_ ## 0,b_) dpr(c_) pf(d_,e_) \                                lqc1_2(a_) dpi(c_) wbd(a_ ## 0,b_)#undef ddp1_2#define ddp1_2(a_,b_,c_)        dp1_2(a_,b_,c_)       #undef ddpp1_2#define ddpp1_2(a_,b_,c_,d_,e_) dpp1_2(a_,b_,c_,d_,e_)#endif#endif/****************************************************************************** *  Single Precision Real Macros ******************************************************************************/  #ifdef SREAL#ifdef NO_TRANSPOSE#undef mpx#define mpx(a_)        pls(0,si,a_) ps(0,a_,a_)#undef madd#define madd(a_,b_,c_) pas(a_,b_,c_)#undef TREG#define TREG 1#undef targ#define targ(a_)        0#undef src#define src(a_)         a_#undef ulfa#define ulfa(a_)#ifdef GER#undef w#define w(a_)#undef w1_2#define w1_2(a_)#undef w1_4#define w1_4(a_)#undef CREG#define CREG 2#undef AREG#define AREG 0#undef cp#define cp pc(CREG,TREG)#undef wb#define wb(a_,b_) pu(AREG,a_,b_)#undef wbd#define wbd(a_,b_) pud(AREG,a_,b_)#undef wbs#define wbs(a_,b_) pus(AREG,a_,b_)#else#undef CREG#define CREG 0#undef AREG#define AREG TREG#undef cp#define cp#undef wb#define wb(a_,b_)#undef wbd#define wbd(a_,b_)#undef wbs#define wbs(a_,b_)#undef w#define w(a_)           pu(CREG,a_ ## 0,si)#undef w1_2#define w1_2(a_)        pud(CREG,a_ ## 0,si)#undef w1_4#define w1_4(a_)        pus(CREG,a_ ## 0,si)#endif#else#undef mpx#define mpx(a_)        px(a_)#ifdef BETA0#undef madd#define madd(a_,b_,c_)#else#undef madd#define madd(a_,b_,c_) pas(a_,b_,c_)#endif#undef TREG#define TREG 3#undef targ#define targ(a_)        a_#undef src#define src(a_)         0#undef w#define w(a_)#undef w1_2#define w1_2(a_)#undef w1_4#define w1_4(a_)#undef ulfa#undef ulfa#define ulfa(a_)       phl(a_,0) pa(0,a_) pc(a_,0) ps(1,0,0) pa(0,a_) \                       madd(0,si,a_) pus(a_,0,si) #undef CREG#define CREG 0#undef AREG#define AREG TREG#undef cp#define cp#undef wb#define wb(a_,b_)#undef wbd#define wbd(a_,b_)#undef wbs#define wbs(a_,b_)#endif#if defined(ALIGN)#undef plb#define plb(a_,b_)           pla(a_,b_,AREG)#else#undef plb#define plb(a_,b_)           pl(a_,b_,AREG)#endif#undef plbd#define plbd(a_,b_)          px(AREG) pld(a_,b_,AREG)#undef plbs#define plbs(a_,b_)          pls(a_,b_,AREG)#undef dpr#define dpr(a_)              pm(src(a_),TREG) pa(TREG,targ(a_))#undef dprp#define dprp(a_,b_,c_)       pf(b_,c_) pm(src(a_),TREG) pa(TREG,targ(a_))#undef dprs#define dprs(a_)             pmsr(src(a_),TREG) pasr(TREG,targ(a_))#undef dprps#define dprps(a_,b_,c_)      pf(b_,c_) pmsr(src(a_),TREG) pasr(TREG,targ(a_))#undef plaa#define plaa(a_)             pl(a_ ## 0,si,CREG) #undef wa#define wa(a_)               w(a_)#undef dp#define dp(a_,b_,c_)         cp plb(a_ ## 0,b_) dpr(c_) wb(a_ ## 0,b_)#undef dpp#define dpp(a_,b_,c_,d_,e_)  cp plb(a_ ## 0,b_) dprp(c_,d_,e_) wb(a_ ## 0,b_)#undef ddp#define ddp(a_,b_,c_)        dp(a_,b_,c_)       #undef ddpp#define ddpp(a_,b_,c_,d_,e_) dpp(a_,b_,c_,d_,e_)#undef plaa1_2#define plaa1_2(a_)             px(CREG) pld(a_ ## 0,si,CREG) #undef wa1_2#define wa1_2(a_)               w1_2(a_)#undef dp1_2#define dp1_2(a_,b_,c_)         cp plbd(a_ ## 0,b_) dpr(c_) wbd(a_ ## 0,b_)#undef dpp1_2#define dpp1_2(a_,b_,c_,d_,e_)  cp plbd(a_ ## 0,b_) dprp(c_,d_,e_) wbd(a_ ## 0,b_)#undef ddp1_2#define ddp1_2(a_,b_,c_)        dp1_2(a_,b_,c_)       #undef ddpp1_2#define ddpp1_2(a_,b_,c_,d_,e_) dpp1_2(a_,b_,c_,d_,e_)#undef plaa1_4#define plaa1_4(a_)             pls(a_ ## 0,si,CREG) #undef wa1_4#define wa1_4(a_)               w1_4(a_)#undef dp1_4#define dp1_4(a_,b_,c_)         cp plbs(a_ ## 0,b_) dprs(c_) wbs(a_ ## 0,b_)#undef dpp1_4#define dpp1_4(a_,b_,c_,d_,e_)  cp plbs(a_ ## 0,b_) dprps(c_,d_,e_) wbs(a_ ## 0,b_)#undef ddp1_4#define ddp1_4(a_,b_,c_)        dp1_4(a_,b_,c_)       #undef ddpp1_4#define ddpp1_4(a_,b_,c_,d_,e_) dpp1_4(a_,b_,c_,d_,e_)#undef R1#define R1 4#undef R2#define R2 5#undef R3#define R3 6#undef R4#define R4 7#endif/****************************************************************************** *  Double Precision Real Macros ******************************************************************************/  #ifdef DREAL#ifdef ATL_SSE2#ifdef NO_TRANSPOSE#undef mpx#define mpx(a_)        pls(0,si,a_) ps(0,a_,a_)#undef madd#define madd(a_,b_,c_) pas(a_,b_,c_)#undef TREG#define TREG 1#undef targ#define targ(a_)        0#undef src#define src(a_)         a_#undef ulfa#define ulfa(a_)#ifdef GER#undef w#define w(a_)#undef w1_2#define w1_2(a_)#undef w1_4#define w1_4(a_)#undef CREG#define CREG 2#undef AREG#define AREG 0#undef cp#define cp pc(CREG,TREG)#undef wb#define wb(a_,b_) pu(AREG,a_,b_)#undef wbd#define wbd(a_,b_) pus(AREG,a_,b_)#undef wbs/* #define wbs(a_,b_) pus(AREG,a_,b_) */#else#undef CREG#define CREG 0#undef AREG#define AREG TREG#undef cp#define cp#undef wb#define wb(a_,b_)#undef wbd#define wbd(a_,b_)#undef wbs/* #define wbs(a_,b_) */#undef w#define w(a_)           pu(CREG,a_ ## 0,si)#undef w1_2#define w1_2(a_)        pus(CREG,a_ ## 0,si)#undef w1_4/* #define w1_4(a_)        pus(CREG,a_ ## 0,si) */#endif#else#undef mpx#define mpx(a_)        px(a_)#ifdef BETA0#undef madd#define madd(a_,b_,c_)#else#undef madd#define madd(a_,b_,c_) pas(a_,b_,c_)#endif#undef TREG#define TREG 3#undef targ#define targ(a_)        a_#undef src#define src(a_)         0#undef w#define w(a_)#undef w1_2#define w1_2(a_)#undef w1_4#define w1_4(a_)#undef ulfa#undef ulfa#define ulfa(a_)       /* phl(a_,0) pa(0,a_) */ pc(a_,0)  ps(1,0,0) pa(0,a_) \                       madd(0,si,a_) pus(a_,0,si) #undef CREG#define CREG 0#undef AREG#define AREG TREG#undef cp#define cp#undef wb#define wb(a_,b_)#undef wbd#define wbd(a_,b_)#undef wbs#define wbs(a_,b_)#endif#if defined(ALIGN)#undef plb#define plb(a_,b_)           pla(a_,b_,AREG)#else#undef plb#define plb(a_,b_)           pl(a_,b_,AREG)#endif#undef plbd#define plbd(a_,b_)          /* px(AREG)  */pls(a_,b_,AREG)#undef plbs/* #define plbs(a_,b_)          pls(a_,b_,AREG) */#undef dpr#define dpr(a_)              pm(src(a_),TREG) pa(TREG,targ(a_))#undef dprp#define dprp(a_,b_,c_)       pf(b_,c_) pm(src(a_),TREG) pa(TREG,targ(a_))#undef dprs#define dprs(a_)             pmsr(src(a_),TREG) pasr(TREG,targ(a_))#undef dprps#define dprps(a_,b_,c_)      pf(b_,c_) pmsr(src(a_),TREG) pasr(TREG,targ(a_))#undef plaa#define plaa(a_)             pl(a_ ## 0,si,CREG) #undef wa#define wa(a_)               w(a_)#undef dp#define dp(a_,b_,c_)         cp plb(a_ ## 0,b_) dpr(c_) wb(a_ ## 0,b_)#undef dpp#define dpp(a_,b_,c_,d_,e_)  cp plb(a_ ## 0,b_) dprp(c_,d_,e_) wb(a_ ## 0,b_)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -