📄 gt.asm
字号:
ADD .S2X 8,A14,B14 ; load INP ptr (z,w)
ADD .S2X 4,A15,B15 ; load OUT ptr (1/w)
|| ZERO .L2 B0 ; init store count-up
|| ZERO .D1 A1 ; init store cond1 (W)
|| B .S1 www ; prime the first branch
ZERO .D2 B1 ; init store cond2 (1/W)
*-----------------------------------------------*
www: MPYSP .M1 A13,A11,A9 ; Y * y = Yy
|| MPYSP .M2 B13,B11,B9 ; W * w = Ww
|| ADDSP .L1X A7,B7,A6 ; yx+wz = W
||[A1] STW .D1 A5,*A15++[6] ; store W
||[B1] STW .D2 B4,*B15++[6] ; store 1/W
|| ADDK .S2 1,B0 ; incr store count
|| NOP
|| NOP
*
MPYSP .M1 A12,A10,A8 ; X * x = Xx
|| MPYSP .M2 B12,B10,B8 ; Z * z = Zz
|| ADDSP .L1 A9,A8,A7 ; Yy+Xx = yx
|| ADDSP .L2 B9,B8,B7 ; Ww+Zz = wz
||[A1] STW .D1 A5,*A15++[6] ; store W
||[B1] STW .D2 B4,*B15++[6] ; store 1/W
||[B2] ADDK .S2 -1,B2 ; decr branch count
|| NOP
*
MV .S1 A6,A5 ; W -> W
||[A1] STW .D1 A5,*A15++[6] ; store (W)
|| MPYSP .M1 A5,A4,A2 ; W * 1/w = m1
|| SUBSP .L2X B3,A2,B6 ; 2.0 - m1 = d
||[B1] STW .D2 B4,*B15++[6] ; store (1/W)
|| CMPLT .L1X 6,B0,A1 ; store cond1 (W)
||[B2] B .S2 www ; process next vertex
|| NOP
*
LDDW .D1 *A14++[2],A11:A10 ; load INP (y,x)
|| LDDW .D2 *B14++[2],B11:B10 ; load INP (w,z)
|| RCPSP .S1 A6,A4 ; 1/w (seed)
|| MPYSP .M1 A0,A4,A3 ; 1/w -> 1/w
|| MV .S2X A3,B5 ; 1/W -> 1/w
|| MPYSP .M2 B5,B6,B4 ; 1/w * d = 1/W
|| CMPLT .L2 10,B0,B1 ; store cond2 (1/W)
|| NOP
*-----------------------------------------------*
*---------xxx: loop (4 cycles per vertex)------*
*
MVK .S1 stack,A1 ; new stack pntr in A0
MVKH .S1 stack,A1 ; new stack pntr in A0
LDW .D1 *+A1[16],A2 ; load SIZ
LDW .D1 *+A1[13],A0 ; load TMT pntr
LDW .D1 *+A1[14],A14 ; load INP pntr
LDW .D1 *+A1[15],A15 ; load OUT pntr
ZERO .L1 A1 ; init store condition
ADD .S2 9,A2,B2 ; init branch count/cond
LDDW .D1 *+A0[0],A13:A12 ;xyz; load TMTw (Y,X)
LDDW .D1 *+A0[1],B13:B12 ;xyz; load TMTw (W,Z)
LDDW .D1 *+A0[2],B1:B0 ;xyz; load TMTw (T,S)
|| MV .S2X A15,B15 ; init OUT ptr (c1,v)
ADDK .S2 -576,B15 ; init OUT ptr (c1,v)
|| ADDK .S1 -432,A15 ; init OUT prt (w,1/w,h,c0)
B .S1 xxx ;xyz; prime the first branch
MVK .S1 10,A2 ; init store count-down
*-----------------------------------------------*
xxx: MPYSP .M1 A13,A11,A9 ;xyz; Y * y = Yy
|| MPYSP .M2 B13,B11,B9 ; W * w = Ww
|| ADDSP .L2X A7,B7,B6 ; yx+wz = h
|| LDDW .D1 *+A15[9],A5:A4 ;xyz; load (1/W,W)
||[!A2] STW .D2 B3,*-B15[31] ;xyz; store (v)
|| [A2] ADDK .S1 -1,A2 ; store cond (v) and
|| NOP ; decr store count
|| NOP
*
MPYSP .M1 A12,A10,A8 ; X * x = Xx
|| MPYSP .M2 B12,B10,B8 ; Z * z = Zz
|| ADDSP .L1 A9,A8,A7 ; Yy+Xx = yx
|| ADDSP .L2 B9,B8,B7 ; Ww+Zz = wz
|| MV .S1X B6,A6 ; h -> h
|| [A1] STW .D1 A6,*-A15[16] ;xyz; store (h)
|| ADD .S2X 8,A14,B14 ;^^^; load INP ptr (z,w)
|| [B2] ADD .D2 -1,B2,B2 ; decr branch count
*
ADDSP .L1X A4,B6,A3 ; w + h = c0
|| SUBSP .L2X A4,B6,B5 ; w - h = c1
|| [A1] STW .D1 A3,*-A15[15] ;xyz; store (c0)
|| [A1] STW .D2 B5,*+B15[22] ;xyz; store (c1)
|| ADDK .S2 72,B15 ; point to next OUT vertex
|| [B2] B .S1 xxx ;xyz; process next vertex
|| NOP
|| NOP
*
LDDW .D1 *A14++[2],A11:A10 ; load INP (y,x)
|| LDDW .D2 *B14++[2],B11:B10 ; load INP (w,z)
|| MPYSP .M1X B6,A5,A0 ; h * 1/w = v0
|| MPYSP .M2X A0,B0,B4 ; v0 * S = v1
|| ADDSP .L2 B4,B1,B3 ; v1 + T = v2
|| CMPGT .L1 4,A2,A1 ; store cond (h,c0,c1)
|| ADDK .S1 72,A15 ; point to next OUT vertex
|| NOP
*-----------------------------------------------*
*---------yyy: loop (4 cycles per vertex)------*
*
MVK .S1 stack,A1 ; new stack pntr in A0
MVKH .S1 stack,A1 ; new stack pntr in A0
LDW .D1 *+A1[16],A2 ; load SIZ
LDW .D1 *+A1[13],A0 ; load TMT pntr
LDW .D1 *+A1[14],A14 ; load INP pntr
LDW .D1 *+A1[15],A15 ; load OUT pntr
ZERO .L1 A1 ; init store condition
ADD .S2 9,A2,B2 ; init branch count/cond
LDDW .D1 *+A0[3],A13:A12 ;xyz; load TMTw (Y,X)
LDDW .D1 *+A0[4],B13:B12 ;xyz; load TMTw (W,Z)
LDDW .D1 *+A0[5],B1:B0 ;xyz; load TMTw (T,S)
|| MV .S2X A15,B15 ; init OUT ptr (c1,v)
ADDK .S2 -576,B15 ; init OUT ptr (c1,v)
|| ADDK .S1 -432,A15 ; init OUT prt (w,1/w,h,c0)
B .S1 yyy ;xyz; prime the first branch
MVK .S1 10,A2 ; init store count-down
*-----------------------------------------------*
yyy: MPYSP .M1 A13,A11,A9 ;xyz; Y * y = Yy
|| MPYSP .M2 B13,B11,B9 ; W * w = Ww
|| ADDSP .L2X A7,B7,B6 ; yx+wz = h
|| LDDW .D1 *+A15[12],A5:A4 ;xyz; load (1/W,W)
||[!A2] STW .D2 B3,*-B15[25] ;xyz; store (v)
|| [A2] ADDK .S1 -1,A2 ; store cond (v) and
|| NOP ; decr store count
|| NOP
*
MPYSP .M1 A12,A10,A8 ; X * x = Xx
|| MPYSP .M2 B12,B10,B8 ; Z * z = Zz
|| ADDSP .L1 A9,A8,A7 ; Yy+Xx = yx
|| ADDSP .L2 B9,B8,B7 ; Ww+Zz = wz
|| MV .S1X B6,A6 ; h -> h
|| [A1] STW .D1 A6,*-A15[10] ;xyz; store (h)
|| ADD .S2X 8,A14,B14 ;^^^; load INP ptr (z,w)
|| [B2] ADD .D2 -1,B2,B2 ; decr branch count
*
ADDSP .L1X A4,B6,A3 ; w + h = c0
|| SUBSP .L2X A4,B6,B5 ; w - h = c1
|| [A1] STW .D1 A3,*-A15[9] ;xyz; store (c0)
|| [A1] STW .D2 B5,*+B15[28] ;xyz; store (c1)
|| ADDK .S2 72,B15 ; point to next OUT vertex
|| [B2] B .S1 yyy ;xyz; process next vertex
|| NOP
|| NOP
*
LDDW .D1 *A14++[2],A11:A10 ; load INP (y,x)
|| LDDW .D2 *B14++[2],B11:B10 ; load INP (w,z)
|| MPYSP .M1X B6,A5,A0 ; h * 1/w = v0
|| MPYSP .M2X A0,B0,B4 ; v0 * S = v1
|| ADDSP .L2 B4,B1,B3 ; v1 + T = v2
|| CMPGT .L1 4,A2,A1 ; store cond (h,c0,c1)
|| ADDK .S1 72,A15 ; point to next OUT vertex
|| NOP
*-----------------------------------------------*
*----------zzz: loop (4 cycles per vertex)------*
*
MVK .S1 stack,A1 ; new stack pntr in A0
MVKH .S1 stack,A1 ; new stack pntr in A0
LDW .D1 *+A1[16],A2 ; load SIZ
LDW .D1 *+A1[13],A0 ; load TMT pntr
LDW .D1 *+A1[14],A14 ; load INP pntr
LDW .D1 *+A1[15],A15 ; load OUT pntr
ZERO .L1 A1 ; init store condition
ADD .S2 9,A2,B2 ; init branch count/cond
LDDW .D1 *+A0[6],A13:A12 ;xyz; load TMTw (Y,X)
LDDW .D1 *+A0[7],B13:B12 ;xyz; load TMTw (W,Z)
LDDW .D1 *+A0[8],B1:B0 ;xyz; load TMTw (T,S)
|| MV .S2X A15,B15 ; init OUT ptr (c1,v)
ADDK .S2 -576,B15 ; init OUT ptr (c1,v)
|| ADDK .S1 -432,A15 ; init OUT prt (w,1/w,h,c0)
B .S1 zzz ;xyz; prime the first branch
MVK .S1 10,A2 ; init store count-down
*-----------------------------------------------*
zzz: MPYSP .M1 A13,A11,A9 ;xyz; Y * y = Yy
|| MPYSP .M2 B13,B11,B9 ; W * w = Ww
|| ADDSP .L2X A7,B7,B6 ; yx+wz = h
|| LDDW .D1 *+A15[15],A5:A4 ;xyz; load (1/W,W)
||[!A2] STW .D2 B3,*-B15[19] ;xyz; store (v)
|| [A2] ADDK .S1 -1,A2 ; store cond (v) and
|| NOP ; decr store count
|| NOP
*
MPYSP .M1 A12,A10,A8 ; X * x = Xx
|| MPYSP .M2 B12,B10,B8 ; Z * z = Zz
|| ADDSP .L1 A9,A8,A7 ; Yy+Xx = yx
|| ADDSP .L2 B9,B8,B7 ; Ww+Zz = wz
|| MV .S1X B6,A6 ; h -> h
|| [A1] STW .D1 A6,*-A15[4] ;xyz; store (h)
|| ADD .S2X 8,A14,B14 ;^^^; load INP ptr (z,w)
|| [B2] ADD .D2 -1,B2,B2 ; decr branch count
*
ADDSP .L1X A4,B6,A3 ; w + h = c0
|| SUBSP .L2X A4,B6,B5 ; w - h = c1
|| [A1] STW .D1 A3,*-A15[3] ;xyz; store (c0)
|| [A1] STW .D2 B5,*+B15[34] ;xyz; store (c1)
|| ADDK .S2 72,B15 ; point to next OUT vertex
|| [B2] B .S1 zzz ;xyz; process next vertex
|| NOP
|| NOP
*
LDDW .D1 *A14++[2],A11:A10 ; load INP (y,x)
|| LDDW .D2 *B14++[2],B11:B10 ; load INP (w,z)
|| MPYSP .M1X B6,A5,A0 ; h * 1/w = v0
|| MPYSP .M2X A0,B0,B4 ; v0 * S = v1
|| ADDSP .L2 B4,B1,B3 ; v1 + T = v2
|| CMPGT .L1 4,A2,A1 ; store cond (h,c0,c1)
|| ADDK .S1 72,A15 ; point to next OUT vertex
|| NOP
*-----------------------------------------------*
MVK .S1 stack,A0 ; new stack pointer in A0
MVKH .S1 stack,A0 ; new stack pointer in A0
MVK .S2 stack,B0 ; new stack pointer in B0
MVKH .S2 stack,B0 ; new stack pointer in B0
LDW .D2 *B0,B3 ; pop return addr off stack
LDW .D1 *+A0[1],A10 ; pop A10 off stack
|| LDW .D2 *+B0[2],B10 ; pop B10 off stack
LDW .D1 *+A0[3],A11 ; pop A11 off stack
|| LDW .D2 *+B0[4],B11 ; pop B11 off stack
LDW .D1 *+A0[5],A12 ; pop A12 off stack
|| LDW .D2 *+B0[6],B12 ; pop B12 off stack
LDW .D1 *+A0[7],A13 ; pop A13 off stack
|| LDW .D2 *+B0[8],B13 ; pop B13 off stack
LDW .D1 *+A0[9],A14 ; pop A14 off stack
|| LDW .D2 *+B0[10],B14 ; pop B14 off stack
LDW .D1 *+A0[11],A15 ; pop A15 off stack
|| LDW .D2 *+B0[12],B15 ; pop B15 off stack
B .S2 B3 ; return to calling C progr
NOP 6 ; wait 6 cycles for the last
; pop to occur before returning
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -