📄 nnm.nc
字号:
"adc r10, r21 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T32: mul r15, r18 \n\t" //t=2 "add r8, r0 \n\t" "adc r9, r1 \n\t" "brcc T33 \n\t" "adc r10, r21 \n\t" "brcc T33 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T33: mul r16, r18 \n\t" //t=3 "add r9, r0 \n\t" "adc r10, r1 \n\t" "brcc T34 \n\t" "adc r11, r21 \n\t" "brcc T34 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T34: mul r17, r18 \n\t" //t=4 "add r10, r0 \n\t" "adc r11, r1 \n\t" "brcc T35 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T35: ld r18, %a2+ \n\t" //load c "mul r13, r18 \n\t" //t=0, b0*c "add r6, r0 \n\t" "adc r8, r1 \n\t" "brcc T41P \n\t" "adc r9, r21 \n\t" "brcc T41P \n\t" "adc r10, r21 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T41P: mul r14, r18 \n\t" //t=1 "add r8, r0 \n\t" "adc r9, r1 \n\t" "brcc T42P \n\t" "adc r10, r21 \n\t" "brcc T42P \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T42P: mul r15, r18 \n\t" //t=2 "add r9, r0 \n\t" "adc r10, r1 \n\t" "brcc T43P \n\t" "adc r11, r21 \n\t" "brcc T43P \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T43P: mul r16, r18 \n\t" //t=3 "add r10, r0 \n\t" "adc r11, r1 \n\t" "brcc T44P \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T44P: mul r17, r18 \n\t" //t=4 "add r11, r0 \n\t" "adc r12, r1 \n\t" "adc r25, r21 \n\t" "cp r20, r19 \n\t" //i == j? "breq LOOP2_EXIT \n\t" "inc r20 \n\t" "jmp LOOP2 \n\t" "LOOP2_EXIT: st %a0+, r2 \n\t" //a[i*d] = r2 "st %a0+, r3 \n\t" "st %a0+, r4 \n\t" "st %a0+, r5 \n\t" "st %a0+, r6 \n\t" "movw r2, r8 \n\t" "movw r4, r10 \n\t" "mov r6, r12 \n\t" "mov r8, r25 \n\t" "clr r9 \n\t" "clr r10 \n\t" "clr r11 \n\t" "clr r12 \n\t" "clr r25 \n\t" "mul r19, r22 \n\t" //restore c base address "add r0, r22 \n\t" "sub %A2, r0 \n\t" "sbc %B2, r1 \n\t" "cp r19, %3 \n\t" //i == ceiling(n/d)-1? "breq LOOP1_EXIT \n\t" "inc r19 \n\t" "jmp LOOP1 \n\t" "LOOP1_EXIT: inc r19 \n\t" //i = 5 "add %A2, r22 \n\t" "adc %B2, r21 \n\t" //init base address c "mul %3, r22 \n\t" "add r0, r22 \n\t" "add %A1, r0 \n\t" "adc %B1, r1 \n\t" //load b "LOOP3: mov r20, r19 \n\t" //j = i-4 "sub r20, %3 \n\t" "LOOP4: ld r17, -%a1 \n\t" //load b0~b(d-1) "ld r16, -%a1 \n\t" "ld r15, -%a1 \n\t" "ld r14, -%a1 \n\t" "ld r13, -%a1 \n\t" "ld r18, %a2+ \n\t" "mul r13, r18 \n\t" //t=0 "add r2, r0 \n\t" "adc r3, r1 \n\t" "brcc T41 \n\t" "adc r4, r21 \n\t" "brcc T41 \n\t" "adc r5, r21 \n\t" "adc r6, r21 \n\t" "adc r8, r21 \n\t" "adc r9, r21 \n\t" "adc r10, r21 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T41: mul r14, r18 \n\t" //t=1 "add r3, r0 \n\t" "adc r4, r1 \n\t" "brcc T42 \n\t" "adc r5, r21 \n\t" "brcc T42 \n\t" "adc r6, r21 \n\t" "adc r8, r21 \n\t" "adc r9, r21 \n\t" "adc r10, r21 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T42: mul r15, r18 \n\t" //t=2 "add r4, r0 \n\t" "adc r5, r1 \n\t" "brcc T43 \n\t" "adc r6, r21 \n\t" "brcc T43 \n\t" "adc r8, r21 \n\t" "adc r9, r21 \n\t" "adc r10, r21 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T43: mul r16, r18 \n\t" //t=3 "add r5, r0 \n\t" "adc r6, r1 \n\t" "brcc T44 \n\t" "adc r8, r21 \n\t" "brcc T44 \n\t" "adc r9, r21 \n\t" "adc r10, r21 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T44: mul r17, r18 \n\t" //t=4 "add r6, r0 \n\t" "adc r8, r1 \n\t" "brcc T45 \n\t" "adc r9, r21 \n\t" "brcc T45 \n\t" "adc r10, r21 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T45: ld r18, %a2+ \n\t" //load c "mul r13, r18 \n\t" //t=0, b0*c "add r3, r0 \n\t" "adc r4, r1 \n\t" "brcc T51 \n\t" "adc r5, r21 \n\t" "brcc T51 \n\t" "adc r6, r21 \n\t" "adc r8, r21 \n\t" "adc r9, r21 \n\t" "adc r10, r21 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T51: mul r14, r18 \n\t" //t=1 "add r4, r0 \n\t" "adc r5, r1 \n\t" "brcc T52 \n\t" "adc r6, r21 \n\t" "brcc T52 \n\t" "adc r8, r21 \n\t" "adc r9, r21 \n\t" "adc r10, r21 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T52: mul r15, r18 \n\t" //t=2 "add r5, r0 \n\t" "adc r6, r1 \n\t" "brcc T53 \n\t" "adc r8, r21 \n\t" "brcc T53 \n\t" "adc r9, r21 \n\t" "adc r10, r21 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T53: mul r16, r18 \n\t" //t=3 "add r6, r0 \n\t" "adc r8, r1 \n\t" "brcc T54 \n\t" "adc r9, r21 \n\t" "brcc T54 \n\t" "adc r10, r21 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T54: mul r17, r18 \n\t" "add r8, r0 \n\t" "adc r9, r1 \n\t" "brcc T55 \n\t" "adc r10, r21 \n\t" "brcc T55 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T55: ld r18, %a2+ \n\t" //load c "mul r13, r18 \n\t" //t=0, b0*c "add r4, r0 \n\t" "adc r5, r1 \n\t" "brcc T61 \n\t" "adc r6, r21 \n\t" "brcc T61 \n\t" "adc r8, r21 \n\t" "adc r9, r21 \n\t" "adc r10, r21 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T61: mul r14, r18 \n\t" //t=1 "add r5, r0 \n\t" "adc r6, r1 \n\t" "brcc T62 \n\t" "adc r8, r21 \n\t" "brcc T62 \n\t" "adc r9, r21 \n\t" "adc r10, r21 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T62: mul r15, r18 \n\t" //t=2 "add r6, r0 \n\t" "adc r8, r1 \n\t" "brcc T63 \n\t" "adc r9, r21 \n\t" "brcc T63 \n\t" "adc r10, r21 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T63: mul r16, r18 \n\t" //t=3 "add r8, r0 \n\t" "adc r9, r1 \n\t" "brcc T64 \n\t" "adc r10, r21 \n\t" "brcc T64 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T64: mul r17, r18 \n\t" "add r9, r0 \n\t" "adc r10, r1 \n\t" "brcc T65 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T65: ld r18, %a2+ \n\t" //load c "mul r13, r18 \n\t" //t=0, b0*c "add r5, r0 \n\t" "adc r6, r1 \n\t" "brcc T71 \n\t" "adc r8, r21 \n\t" "brcc T71 \n\t" "adc r9, r21 \n\t" "adc r10, r21 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T71: mul r14, r18 \n\t" //t=1 "add r6, r0 \n\t" "adc r8, r1 \n\t" "brcc T72 \n\t" "adc r9, r21 \n\t" "brcc T72 \n\t" "adc r10, r21 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T72: mul r15, r18 \n\t" //t=2 "add r8, r0 \n\t" "adc r9, r1 \n\t" "brcc T73 \n\t" "adc r10, r21 \n\t" "brcc T73 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T73: mul r16, r18 \n\t" //t=3 "add r9, r0 \n\t" "adc r10, r1 \n\t" "brcc T74 \n\t" "adc r11, r21 \n\t" "brcc T74 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T74: mul r17, r18 \n\t" //t=4 "add r10, r0 \n\t" "adc r11, r1 \n\t" "brcc T75 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T75: ld r18, %a2+ \n\t" //load c "mul r13, r18 \n\t" //t=0, b0*c "add r6, r0 \n\t" "adc r8, r1 \n\t" "brcc T71P \n\t" "adc r9, r21 \n\t" "brcc T71P \n\t" "adc r10, r21 \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T71P: mul r14, r18 \n\t" //t=1 "add r8, r0 \n\t" "adc r9, r1 \n\t" "brcc T72P \n\t" "adc r10, r21 \n\t" "brcc T72P \n\t" "adc r11, r21 \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T72P: mul r15, r18 \n\t" //t=2 "add r9, r0 \n\t" "adc r10, r1 \n\t" "brcc T73P \n\t" "adc r11, r21 \n\t" "brcc T73P \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T73P: mul r16, r18 \n\t" //t=3 "add r10, r0 \n\t" "adc r11, r1 \n\t" "brcc T74P \n\t" "adc r12, r21 \n\t" "adc r25, r21 \n\t" "T74P: mul r17, r18 \n\t" //t=4 "add r11, r0 \n\t" "adc r12, r1 \n\t" "adc r25, r21 \n\t" "cp r20, %3 \n\t" //j=ceiling(n/d)-1? "breq LOOP4_EXIT \n\t" "inc r20 \n\t" "jmp LOOP4 \n\t" "LOOP4_EXIT: st %a0+, r2 \n\t" //a[i*d] = r2 "st %a0+, r3 \n\t" "st %a0+, r4 \n\t" "st %a0+, r5 \n\t" "st %a0+, r6 \n\t" "movw r2, r8 \n\t" "movw r4, r10 \n\t" "mov r6, r12 \n\t" "mov r8, r25 \n\t" "clr r9 \n\t" "clr r10 \n\t" "clr r11 \n\t" "clr r12 \n\t" "clr r25 \n\t" "mov r0, r24 \n\t" "sub r0, r19 \n\t" "mul r0, r22 \n\t" "sub %A2, r0 \n\t" "sbc %B2, r1 \n\t" //restore c "add r0, r22 \n\t" "add %A1, r0 \n\t" "adc %B1, r1 \n\t" //restore b "cp r19, r24 \n\t" "breq LOOP3_EXIT \n\t" "inc r19 \n\t" "jmp LOOP3 \n\t" "LOOP3_EXIT: st %a0+, r2 \n\t" "st %a0+, r3 \n\t" "st %a0+, r4 \n\t" "st %a0+, r5 \n\t" "st %a0+, r6 \n\t" "pop r29 \n\t" "pop r28 \n\t" "pop r1 \n\t" "pop r0 \n\t" : :"e"(a),"e"(b),"e"(c),"a"(n_d) :"r0","r1","r2","r3","r4","r5","r6","r8","r9","r10","r11","r12","r13","r14","r15","r16","r17","r18","r19","r20","r21","r22","r24","r25" );#endif #endif#ifdef TELOSB //should implement in assembly NN_DIGIT t[2*MAX_NN_DIGITS]; NN_UINT bDigits, cDigits, i; NN_AssignZero (t, 2 * digits); bDigits = NN_Digits (b, digits); cDigits = NN_Digits (c, digits); for (i = 0; i < bDigits; i++) t[i+cDigits] += NN_AddDigitMult (&t[i], &t[i], b[i], c, cDigits); NN_Assign (a, t, 2 * digits);#endif#else NN_DIGIT t[2*MAX_NN_DIGITS]; NN_UINT bDigits, cDigits, i; NN_AssignZero (t, 2 * digits); bDigits = NN_Digits (b, digits); cDigits = NN_Digits (c, digits); for (i = 0; i < bDigits; i++) t[i+cDigits] += NN_AddDigitMult (&t[i], &t[i], b[i], c, cDigits); NN_Assign (a, t, 2 * digits);#endif } void NN_Sqr(NN_DIGIT *a, NN_DIGIT *b, NN_UINT digits) __attribute__ ((noinline)) {#ifdef INLINE_ASM#ifdef MICA uint8_t n_d; n_d = digits/4; //r2~r10 //r11~r14 //r15 //r16 i //r17 j //r19 0 //r21:r20 b //r25 d asm volatile (//"push r0 \n\t" "push r1 \n\t" "push r28 \n\t" "push r29 \n\t" "clr r2 \n\t" //init 9 registers for accumulator "clr r3 \n\t" "clr r4 \n\t" "clr r5 \n\t" "clr r6 \n\t" "clr r7 \n\t" "clr r8 \n\t" "clr r9 \n\t" "clr r10 \n\t" //end of init "clr r19 \n\t" //zero "ldi r25, 4 \n\t" //d=4 "dec %2 \n\t" "ldi r16, 0 \n\t" //i "SQR_LOOP1: ldi r17, 0 \n\t" //j=0 "mul r16, r25 \n\t" "add r0, r25 \n\t" "movw r26, %A1 \n\t" "add r26, r0 \n\t" "adc r27, r1 \n\t" //load b, (i-j+1)*d-1 "movw r28, %A1 \n\t" //load c "SQR_LOOP2: mov r0, r16 \n\t" "sub r0, r17 \n\t" "cp r0, r17 \n\t" "breq JMP_EQ_1 \n\t" "brlo JMP_SQR_LOOP2_EXIT \n\t" "jmp SQR_LOOP2_1 \n\t" "JMP_EQ_1: jmp EQ_1 \n\t" "JMP_SQR_LOOP2_EXIT: jmp SQR_LOOP2_EXIT \n\t" "SQR_LOOP2_1: ld r14, -X \n\t" //load b0~b(d-1) "ld r13, -X \n\t" "ld r12, -X \n\t" "ld r11, -X \n\t" "ld r15, Y+ \n\t" //load c[j*d+0] "mul r11, r15 \n\t" //t=0 "clr r24 \n\t" "lsl r0 \n\t" "rol r1 \n\t" "rol r24 \n\t" "add r2, r0 \n\t" "adc r3, r1 \n\t" "adc r4, r24 \n\t" "brcc SQR_T01 \n\t" "adc r5, r19 \n\t" "adc r6, r19 \n\t" "adc r7, r19 \n\t" "adc r8, r19 \n\t" "adc r9, r19 \n\t" "adc r10, r19 \n\t" "SQR_T01: mul r12, r15 \n\t" //t=1 "clr r24 \n\t" "lsl r0 \n\t" "rol r1 \n\t" "rol r24 \n\t" "add r3, r0 \n\t" "adc r4, r1 \n\t" "adc r5, r24 \n\t" "brcc SQR_T02 \n\t" "adc r6, r19 \n\t" "adc r7, r19 \n\t" "adc r8, r19 \n\t" "adc r9, r19 \n\t" "adc r10, r19 \n\t" "SQR_T02: mul r13, r15 \n\t" //t=2 "clr r24 \n\t" "lsl r0 \n\t" "rol r1 \n\t" "rol r24 \n\t" "add r4, r0 \n\t" "adc r5, r1 \n\t" "adc r6, r24 \n\t" "brcc SQR_T03 \n\t"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -