📄 q1fv_8.c
字号:
T2i = VMUL(LDK(KP707106781), VADD(T2e, T2h)); T2D = VBYI(VSUB(T2z, T2y)); T2k = VMUL(LDK(KP707106781), VSUB(T2h, T2e)); T2A = VADD(T2y, T2z); } { V T3P, T49, T3S, T4a; { V T3N, T3O, T3Q, T3R; T3N = LD(&(x[WS(vs, 7) + WS(rs, 1)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); T3O = LD(&(x[WS(vs, 7) + WS(rs, 5)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); T3P = VSUB(T3N, T3O); T49 = VADD(T3N, T3O); T3Q = LD(&(x[WS(vs, 7) + WS(rs, 7)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); T3R = LD(&(x[WS(vs, 7) + WS(rs, 3)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); T3S = VSUB(T3Q, T3R); T4a = VADD(T3Q, T3R); } T3T = VMUL(LDK(KP707106781), VADD(T3P, T3S)); T4e = VBYI(VSUB(T4a, T49)); T3V = VMUL(LDK(KP707106781), VSUB(T3S, T3P)); T4b = VADD(T49, T4a); } { V TD, TX, TG, TY; { V TB, TC, TE, TF; TB = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); TC = LD(&(x[WS(vs, 1) + WS(rs, 5)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); TD = VSUB(TB, TC); TX = VADD(TB, TC); TE = LD(&(x[WS(vs, 1) + WS(rs, 7)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); TF = LD(&(x[WS(vs, 1) + WS(rs, 3)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); TG = VSUB(TE, TF); TY = VADD(TE, TF); } TH = VMUL(LDK(KP707106781), VADD(TD, TG)); T12 = VBYI(VSUB(TY, TX)); TJ = VMUL(LDK(KP707106781), VSUB(TG, TD)); TZ = VADD(TX, TY); } { V T1a, T1u, T1d, T1v; { V T18, T19, T1b, T1c; T18 = LD(&(x[WS(vs, 2) + WS(rs, 1)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); T19 = LD(&(x[WS(vs, 2) + WS(rs, 5)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); T1a = VSUB(T18, T19); T1u = VADD(T18, T19); T1b = LD(&(x[WS(vs, 2) + WS(rs, 7)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); T1c = LD(&(x[WS(vs, 2) + WS(rs, 3)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); T1d = VSUB(T1b, T1c); T1v = VADD(T1b, T1c); } T1e = VMUL(LDK(KP707106781), VADD(T1a, T1d)); T1z = VBYI(VSUB(T1v, T1u)); T1g = VMUL(LDK(KP707106781), VSUB(T1d, T1a)); T1w = VADD(T1u, T1v); } { V T2L, T35, T2O, T36; { V T2J, T2K, T2M, T2N; T2J = LD(&(x[WS(vs, 5) + WS(rs, 1)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); T2K = LD(&(x[WS(vs, 5) + WS(rs, 5)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); T2L = VSUB(T2J, T2K); T35 = VADD(T2J, T2K); T2M = LD(&(x[WS(vs, 5) + WS(rs, 7)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); T2N = LD(&(x[WS(vs, 5) + WS(rs, 3)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); T2O = VSUB(T2M, T2N); T36 = VADD(T2M, T2N); } T2P = VMUL(LDK(KP707106781), VADD(T2L, T2O)); T3a = VBYI(VSUB(T36, T35)); T2R = VMUL(LDK(KP707106781), VSUB(T2O, T2L)); T37 = VADD(T35, T36); } { V T3i, T3C, T3l, T3D; { V T3g, T3h, T3j, T3k; T3g = LD(&(x[WS(vs, 6) + WS(rs, 1)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); T3h = LD(&(x[WS(vs, 6) + WS(rs, 5)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); T3i = VSUB(T3g, T3h); T3C = VADD(T3g, T3h); T3j = LD(&(x[WS(vs, 6) + WS(rs, 7)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); T3k = LD(&(x[WS(vs, 6) + WS(rs, 3)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); T3l = VSUB(T3j, T3k); T3D = VADD(T3j, T3k); } T3m = VMUL(LDK(KP707106781), VADD(T3i, T3l)); T3H = VBYI(VSUB(T3D, T3C)); T3o = VMUL(LDK(KP707106781), VSUB(T3l, T3i)); T3E = VADD(T3C, T3D); } ST(&(x[0]), VADD(Tp, Ts), ms, &(x[0])); ST(&(x[WS(rs, 2)]), VADD(T1t, T1w), ms, &(x[0])); ST(&(x[WS(rs, 5)]), VADD(T34, T37), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 7)]), VADD(T48, T4b), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 6)]), VADD(T3B, T3E), ms, &(x[0])); ST(&(x[WS(rs, 4)]), VADD(T2x, T2A), ms, &(x[0])); { V Tt, T4c, T2B, T24; ST(&(x[WS(rs, 3)]), VADD(T20, T23), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 1)]), VADD(TW, TZ), ms, &(x[WS(rs, 1)])); Tt = BYTWJ(&(W[TWVL * 6]), VSUB(Tp, Ts)); ST(&(x[WS(vs, 4)]), Tt, ms, &(x[WS(vs, 4)])); T4c = BYTWJ(&(W[TWVL * 6]), VSUB(T48, T4b)); ST(&(x[WS(vs, 4) + WS(rs, 7)]), T4c, ms, &(x[WS(vs, 4) + WS(rs, 1)])); T2B = BYTWJ(&(W[TWVL * 6]), VSUB(T2x, T2A)); ST(&(x[WS(vs, 4) + WS(rs, 4)]), T2B, ms, &(x[WS(vs, 4)])); T24 = BYTWJ(&(W[TWVL * 6]), VSUB(T20, T23)); ST(&(x[WS(vs, 4) + WS(rs, 3)]), T24, ms, &(x[WS(vs, 4) + WS(rs, 1)])); } { V T10, T1x, T3F, T38, T1A, Tw; T10 = BYTWJ(&(W[TWVL * 6]), VSUB(TW, TZ)); ST(&(x[WS(vs, 4) + WS(rs, 1)]), T10, ms, &(x[WS(vs, 4) + WS(rs, 1)])); T1x = BYTWJ(&(W[TWVL * 6]), VSUB(T1t, T1w)); ST(&(x[WS(vs, 4) + WS(rs, 2)]), T1x, ms, &(x[WS(vs, 4)])); T3F = BYTWJ(&(W[TWVL * 6]), VSUB(T3B, T3E)); ST(&(x[WS(vs, 4) + WS(rs, 6)]), T3F, ms, &(x[WS(vs, 4)])); T38 = BYTWJ(&(W[TWVL * 6]), VSUB(T34, T37)); ST(&(x[WS(vs, 4) + WS(rs, 5)]), T38, ms, &(x[WS(vs, 4) + WS(rs, 1)])); T1A = BYTWJ(&(W[TWVL * 10]), VSUB(T1y, T1z)); ST(&(x[WS(vs, 6) + WS(rs, 2)]), T1A, ms, &(x[WS(vs, 6)])); Tw = BYTWJ(&(W[TWVL * 10]), VSUB(Tu, Tv)); ST(&(x[WS(vs, 6)]), Tw, ms, &(x[WS(vs, 6)])); } { V T2E, T3I, T13, T27, T3b, T4f; T2E = BYTWJ(&(W[TWVL * 10]), VSUB(T2C, T2D)); ST(&(x[WS(vs, 6) + WS(rs, 4)]), T2E, ms, &(x[WS(vs, 6)])); T3I = BYTWJ(&(W[TWVL * 10]), VSUB(T3G, T3H)); ST(&(x[WS(vs, 6) + WS(rs, 6)]), T3I, ms, &(x[WS(vs, 6)])); T13 = BYTWJ(&(W[TWVL * 10]), VSUB(T11, T12)); ST(&(x[WS(vs, 6) + WS(rs, 1)]), T13, ms, &(x[WS(vs, 6) + WS(rs, 1)])); T27 = BYTWJ(&(W[TWVL * 10]), VSUB(T25, T26)); ST(&(x[WS(vs, 6) + WS(rs, 3)]), T27, ms, &(x[WS(vs, 6) + WS(rs, 1)])); T3b = BYTWJ(&(W[TWVL * 10]), VSUB(T39, T3a)); ST(&(x[WS(vs, 6) + WS(rs, 5)]), T3b, ms, &(x[WS(vs, 6) + WS(rs, 1)])); T4f = BYTWJ(&(W[TWVL * 10]), VSUB(T4d, T4e)); ST(&(x[WS(vs, 6) + WS(rs, 7)]), T4f, ms, &(x[WS(vs, 6) + WS(rs, 1)])); } { V Tx, T1B, T3c, T4g, T3J, T2F; Tx = BYTWJ(&(W[TWVL * 2]), VADD(Tu, Tv)); ST(&(x[WS(vs, 2)]), Tx, ms, &(x[WS(vs, 2)])); T1B = BYTWJ(&(W[TWVL * 2]), VADD(T1y, T1z)); ST(&(x[WS(vs, 2) + WS(rs, 2)]), T1B, ms, &(x[WS(vs, 2)])); T3c = BYTWJ(&(W[TWVL * 2]), VADD(T39, T3a)); ST(&(x[WS(vs, 2) + WS(rs, 5)]), T3c, ms, &(x[WS(vs, 2) + WS(rs, 1)])); T4g = BYTWJ(&(W[TWVL * 2]), VADD(T4d, T4e)); ST(&(x[WS(vs, 2) + WS(rs, 7)]), T4g, ms, &(x[WS(vs, 2) + WS(rs, 1)])); T3J = BYTWJ(&(W[TWVL * 2]), VADD(T3G, T3H)); ST(&(x[WS(vs, 2) + WS(rs, 6)]), T3J, ms, &(x[WS(vs, 2)])); T2F = BYTWJ(&(W[TWVL * 2]), VADD(T2C, T2D)); ST(&(x[WS(vs, 2) + WS(rs, 4)]), T2F, ms, &(x[WS(vs, 2)])); } T28 = BYTWJ(&(W[TWVL * 2]), VADD(T25, T26)); ST(&(x[WS(vs, 2) + WS(rs, 3)]), T28, ms, &(x[WS(vs, 2) + WS(rs, 1)])); T14 = BYTWJ(&(W[TWVL * 2]), VADD(T11, T12)); ST(&(x[WS(vs, 2) + WS(rs, 1)]), T14, ms, &(x[WS(vs, 2) + WS(rs, 1)])); { V Th, Ti, Tb, Tg; Tb = VADD(T3, Ta); Tg = VBYI(VSUB(Tc, Tf)); Th = BYTWJ(&(W[TWVL * 12]), VSUB(Tb, Tg)); Ti = BYTWJ(&(W[0]), VADD(Tb, Tg)); ST(&(x[WS(vs, 7)]), Th, ms, &(x[WS(vs, 7)])); ST(&(x[WS(vs, 1)]), Ti, ms, &(x[WS(vs, 1)])); } { V T40, T41, T3U, T3Z; T3U = VADD(T3M, T3T); T3Z = VBYI(VSUB(T3V, T3Y)); T40 = BYTWJ(&(W[TWVL * 12]), VSUB(T3U, T3Z)); T41 = BYTWJ(&(W[0]), VADD(T3U, T3Z)); ST(&(x[WS(vs, 7) + WS(rs, 7)]), T40, ms, &(x[WS(vs, 7) + WS(rs, 1)])); ST(&(x[WS(vs, 1) + WS(rs, 7)]), T41, ms, &(x[WS(vs, 1) + WS(rs, 1)])); } { V T2p, T2q, T2j, T2o; T2j = VADD(T2b, T2i); T2o = VBYI(VSUB(T2k, T2n)); T2p = BYTWJ(&(W[TWVL * 12]), VSUB(T2j, T2o)); T2q = BYTWJ(&(W[0]), VADD(T2j, T2o)); ST(&(x[WS(vs, 7) + WS(rs, 4)]), T2p, ms, &(x[WS(vs, 7)])); ST(&(x[WS(vs, 1) + WS(rs, 4)]), T2q, ms, &(x[WS(vs, 1)])); } { V T1S, T1T, T1M, T1R; T1M = VADD(T1E, T1L); T1R = VBYI(VSUB(T1N, T1Q)); T1S = BYTWJ(&(W[TWVL * 12]), VSUB(T1M, T1R)); T1T = BYTWJ(&(W[0]), VADD(T1M, T1R)); ST(&(x[WS(vs, 7) + WS(rs, 3)]), T1S, ms, &(x[WS(vs, 7) + WS(rs, 1)])); ST(&(x[WS(vs, 1) + WS(rs, 3)]), T1T, ms, &(x[WS(vs, 1) + WS(rs, 1)])); } { V TO, TP, TI, TN; TI = VADD(TA, TH); TN = VBYI(VSUB(TJ, TM)); TO = BYTWJ(&(W[TWVL * 12]), VSUB(TI, TN)); TP = BYTWJ(&(W[0]), VADD(TI, TN)); ST(&(x[WS(vs, 7) + WS(rs, 1)]), TO, ms, &(x[WS(vs, 7) + WS(rs, 1)])); ST(&(x[WS(vs, 1) + WS(rs, 1)]), TP, ms, &(x[WS(vs, 1) + WS(rs, 1)])); } { V T1l, T1m, T1f, T1k; T1f = VADD(T17, T1e); T1k = VBYI(VSUB(T1g, T1j)); T1l = BYTWJ(&(W[TWVL * 12]), VSUB(T1f, T1k)); T1m = BYTWJ(&(W[0]), VADD(T1f, T1k)); ST(&(x[WS(vs, 7) + WS(rs, 2)]), T1l, ms, &(x[WS(vs, 7)])); ST(&(x[WS(vs, 1) + WS(rs, 2)]), T1m, ms, &(x[WS(vs, 1)])); } { V T3t, T3u, T3n, T3s; T3n = VADD(T3f, T3m); T3s = VBYI(VSUB(T3o, T3r)); T3t = BYTWJ(&(W[TWVL * 12]), VSUB(T3n, T3s)); T3u = BYTWJ(&(W[0]), VADD(T3n, T3s)); ST(&(x[WS(vs, 7) + WS(rs, 6)]), T3t, ms, &(x[WS(vs, 7)])); ST(&(x[WS(vs, 1) + WS(rs, 6)]), T3u, ms, &(x[WS(vs, 1)])); } { V T2W, T2X, T2Q, T2V; T2Q = VADD(T2I, T2P); T2V = VBYI(VSUB(T2R, T2U)); T2W = BYTWJ(&(W[TWVL * 12]), VSUB(T2Q, T2V)); T2X = BYTWJ(&(W[0]), VADD(T2Q, T2V)); ST(&(x[WS(vs, 7) + WS(rs, 5)]), T2W, ms, &(x[WS(vs, 7) + WS(rs, 1)])); ST(&(x[WS(vs, 1) + WS(rs, 5)]), T2X, ms, &(x[WS(vs, 1) + WS(rs, 1)])); } { V T1p, T1q, T1n, T1o; T1n = VSUB(T17, T1e); T1o = VBYI(VADD(T1j, T1g)); T1p = BYTWJ(&(W[TWVL * 8]), VSUB(T1n, T1o)); T1q = BYTWJ(&(W[TWVL * 4]), VADD(T1n, T1o)); ST(&(x[WS(vs, 5) + WS(rs, 2)]), T1p, ms, &(x[WS(vs, 5)])); ST(&(x[WS(vs, 3) + WS(rs, 2)]), T1q, ms, &(x[WS(vs, 3)])); } { V Tl, Tm, Tj, Tk; Tj = VSUB(T3, Ta); Tk = VBYI(VADD(Tf, Tc)); Tl = BYTWJ(&(W[TWVL * 8]), VSUB(Tj, Tk)); Tm = BYTWJ(&(W[TWVL * 4]), VADD(Tj, Tk)); ST(&(x[WS(vs, 5)]), Tl, ms, &(x[WS(vs, 5)])); ST(&(x[WS(vs, 3)]), Tm, ms, &(x[WS(vs, 3)])); } { V T2t, T2u, T2r, T2s; T2r = VSUB(T2b, T2i); T2s = VBYI(VADD(T2n, T2k)); T2t = BYTWJ(&(W[TWVL * 8]), VSUB(T2r, T2s)); T2u = BYTWJ(&(W[TWVL * 4]), VADD(T2r, T2s)); ST(&(x[WS(vs, 5) + WS(rs, 4)]), T2t, ms, &(x[WS(vs, 5)])); ST(&(x[WS(vs, 3) + WS(rs, 4)]), T2u, ms, &(x[WS(vs, 3)])); } { V T3x, T3y, T3v, T3w; T3v = VSUB(T3f, T3m); T3w = VBYI(VADD(T3r, T3o)); T3x = BYTWJ(&(W[TWVL * 8]), VSUB(T3v, T3w)); T3y = BYTWJ(&(W[TWVL * 4]), VADD(T3v, T3w)); ST(&(x[WS(vs, 5) + WS(rs, 6)]), T3x, ms, &(x[WS(vs, 5)])); ST(&(x[WS(vs, 3) + WS(rs, 6)]), T3y, ms, &(x[WS(vs, 3)])); } { V TS, TT, TQ, TR; TQ = VSUB(TA, TH); TR = VBYI(VADD(TM, TJ)); TS = BYTWJ(&(W[TWVL * 8]), VSUB(TQ, TR)); TT = BYTWJ(&(W[TWVL * 4]), VADD(TQ, TR)); ST(&(x[WS(vs, 5) + WS(rs, 1)]), TS, ms, &(x[WS(vs, 5) + WS(rs, 1)])); ST(&(x[WS(vs, 3) + WS(rs, 1)]), TT, ms, &(x[WS(vs, 3) + WS(rs, 1)])); } { V T1W, T1X, T1U, T1V; T1U = VSUB(T1E, T1L); T1V = VBYI(VADD(T1Q, T1N)); T1W = BYTWJ(&(W[TWVL * 8]), VSUB(T1U, T1V)); T1X = BYTWJ(&(W[TWVL * 4]), VADD(T1U, T1V)); ST(&(x[WS(vs, 5) + WS(rs, 3)]), T1W, ms, &(x[WS(vs, 5) + WS(rs, 1)])); ST(&(x[WS(vs, 3) + WS(rs, 3)]), T1X, ms, &(x[WS(vs, 3) + WS(rs, 1)])); } { V T30, T31, T2Y, T2Z; T2Y = VSUB(T2I, T2P); T2Z = VBYI(VADD(T2U, T2R)); T30 = BYTWJ(&(W[TWVL * 8]), VSUB(T2Y, T2Z)); T31 = BYTWJ(&(W[TWVL * 4]), VADD(T2Y, T2Z)); ST(&(x[WS(vs, 5) + WS(rs, 5)]), T30, ms, &(x[WS(vs, 5) + WS(rs, 1)])); ST(&(x[WS(vs, 3) + WS(rs, 5)]), T31, ms, &(x[WS(vs, 3) + WS(rs, 1)])); } { V T44, T45, T42, T43; T42 = VSUB(T3M, T3T); T43 = VBYI(VADD(T3Y, T3V)); T44 = BYTWJ(&(W[TWVL * 8]), VSUB(T42, T43)); T45 = BYTWJ(&(W[TWVL * 4]), VADD(T42, T43)); ST(&(x[WS(vs, 5) + WS(rs, 7)]), T44, ms, &(x[WS(vs, 5) + WS(rs, 1)])); ST(&(x[WS(vs, 3) + WS(rs, 7)]), T45, ms, &(x[WS(vs, 3) + WS(rs, 1)])); } }}static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 2), VTW(0, 3), VTW(0, 4), VTW(0, 5), VTW(0, 6), VTW(0, 7), {TW_NEXT, VL, 0}};static const ct_desc desc = { 8, "q1fv_8", twinstr, &GENUS, {264, 128, 0, 0}, 0, 0, 0 };void X(codelet_q1fv_8) (planner *p) { X(kdft_difsq_register) (p, q1fv_8, &desc);}#endif /* HAVE_FMA */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -