📄 choose_table.nas
字号:
; new count bit routine; part of this code is origined from; new GOGO-no-coda (1999, 2000); Copyright (C) 1999 shigeo; modified by Keiichi SAKAI%include "nasm.h" globaldef choose_table_MMX globaldef MMX_masking externdef largetbl externdef t1l externdef table23 externdef table56 segment_data align 16D14_14_14_14 dd 0x000E000E, 0x000E000ED15_15_15_15 dd 0xfff0fff0, 0xfff0fff0mul_add dd 0x00010010, 0x00010010mul_add23 dd 0x00010003, 0x00010003mul_add56 dd 0x00010004, 0x00010004tableDEF dd 0x00010003,0x01,0x00050005,0x05,0x00070006,0x07,0x00090008,0x08,0x000a0008, 0x09 dd 0x000a0009,0x0a,0x000b000a,0x0a,0x000b000a,0x0b,0x000c000a,0x0a,0x000c000b, 0x0b dd 0x000c000b,0x0c,0x000d000c,0x0c,0x000d000c,0x0d,0x000d000c,0x0d,0x000e000d, 0x0e dd 0x000b000e,0x0e,0x00040005,0x04,0x00060005,0x06,0x00080007,0x08,0x00090008, 0x09 dd 0x000a0009,0x0a,0x000b0009,0x0a,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0b dd 0x000c000b,0x0b,0x000c000b,0x0c,0x000d000c,0x0c,0x000e000c,0x0d,0x000d000c, 0x0e dd 0x000e000d,0x0e,0x000b000d,0x0e,0x00070006,0x07,0x00080007,0x08,0x00090007, 0x09 dd 0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c dd 0x000d000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000c,0x0d,0x000d000c, 0x0d dd 0x000e000d,0x0e,0x000e000d,0x0f,0x000c000d,0x0f,0x00090007,0x08,0x00090008, 0x09 dd 0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c dd 0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000c,0x0d,0x000e000c, 0x0d dd 0x000e000c,0x0d,0x000f000d,0x0e,0x000f000d,0x0f,0x000d000d,0x0f,0x000a0008, 0x09 dd 0x000a0008,0x09,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c dd 0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0c,0x000e000b,0x0d,0x000e000c, 0x0d dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d,0x0f,0x000c000d, 0x10 dd 0x000a0009,0x0a,0x000a0009,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c dd 0x000d000a,0x0c,0x000d000b,0x0d,0x000e000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d dd 0x000e000c,0x0e,0x000f000c,0x0d,0x000f000d,0x0f,0x000f000d,0x0f,0x0010000d, 0x10 dd 0x000d000e,0x10,0x000b000a,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c dd 0x000d000a,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f dd 0x0010000e,0x10,0x000d000e,0x10,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0c dd 0x000c000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0e,0x000e000c, 0x0e dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0f,0x000f000c,0x0f,0x000f000d, 0x0f dd 0x0011000d,0x10,0x0011000d,0x12,0x000d000e,0x12,0x000b000a,0x0a,0x000c000a, 0x0a dd 0x000c000a,0x0b,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000d, 0x0f dd 0x0010000d,0x0f,0x0010000e,0x10,0x0010000e,0x11,0x000d000e,0x11,0x000c000a, 0x0b dd 0x000c000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d dd 0x000e000c,0x0d,0x000f000c,0x0f,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f dd 0x0010000d,0x10,0x000f000d,0x10,0x0010000e,0x10,0x000f000e,0x12,0x000e000e, 0x11 dd 0x000c000b,0x0b,0x000d000b,0x0c,0x000c000b,0x0c,0x000d000b,0x0d,0x000e000c, 0x0d dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0e,0x0010000d, 0x0f dd 0x0010000d,0x10,0x0010000d,0x0f,0x0011000d,0x10,0x0011000e,0x11,0x0010000f, 0x12 dd 0x000d000e,0x13,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b, 0x0d dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0e,0x0010000c,0x0e,0x0010000d, 0x0f dd 0x0010000d,0x0f,0x0010000d,0x0f,0x0010000d,0x10,0x0010000e,0x11,0x000f000e, 0x11 dd 0x0010000e,0x11,0x000e000f,0x12,0x000d000c,0x0c,0x000e000c,0x0d,0x000e000b, 0x0d dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0f,0x000f000d,0x0e,0x000f000d, 0x0f dd 0x000f000d,0x10,0x0011000d,0x10,0x0010000d,0x11,0x0010000d,0x11,0x0010000e, 0x11 dd 0x0010000e,0x12,0x0012000f,0x12,0x000e000f,0x12,0x000f000c,0x0d,0x000e000c, 0x0d dd 0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0f,0x000f000d,0x0f,0x0010000d, 0x10 dd 0x0010000d,0x10,0x0010000d,0x10,0x0012000e,0x10,0x0011000e,0x10,0x0011000e, 0x11 dd 0x0011000e,0x12,0x0013000e,0x11,0x0011000f,0x12,0x000e000f,0x12,0x000e000d, 0x0e dd 0x000f000d,0x0e,0x000d000d,0x0e,0x000e000d,0x0f,0x0010000d,0x0f,0x0010000d, 0x0f dd 0x000f000d,0x11,0x0010000d,0x10,0x0010000e,0x10,0x0011000e,0x13,0x0012000e, 0x11 dd 0x0011000e,0x11,0x0013000f,0x11,0x0011000f,0x13,0x0010000e,0x12,0x000e000f, 0x12 dd 0x000b000d,0x0d,0x000b000d,0x0e,0x000b000d,0x0f,0x000c000d,0x10,0x000c000d, 0x10 dd 0x000d000d,0x10,0x000d000d,0x11,0x000d000e,0x10,0x000e000e,0x11,0x000e000e, 0x11 dd 0x000e000e,0x12,0x000e000e,0x12,0x000e000f,0x15,0x000e000f,0x14,0x000e000f, 0x15 dd 0x000c000f,0x12tableABC dd 0x00020004,0x1,0x00040004,0x4,0x00060006,0x7,0x00080008,0x9,0x00090009,0xa,0x000a000a,0xa dd 0x0009000a,0xa,0x000a000a,0xb,0x00000000,0x0,0x00020003,0x1,0x00040004,0x4,0x00070006,0x7 dd 0x00090007,0x9,0x00090009,0x9,0x000a000a,0xa,0x00000000,0x0,0x00040004,0x4,0x00050005,0x6 dd 0x00060006,0x8,0x00080007,0x9,0x000a0009,0xa,0x000a0009,0xb,0x0009000a,0xa,0x000a000a,0xa dd 0x00000000,0x0,0x00040004,0x4,0x00040005,0x6,0x00060006,0x8,0x000a0007,0x9,0x000a0008,0x9 dd 0x000a000a,0xa,0x00000000,0x0,0x00060006,0x7,0x00070006,0x8,0x00080007,0x9,0x00090008,0xa dd 0x000a0009,0xb,0x000b000a,0xc,0x000a0009,0xb,0x000a000a,0xb,0x00000000,0x0,0x00070005,0x7 dd 0x00060006,0x7,0x00080007,0x9,0x000a0008,0xa,0x000a0009,0xa,0x000b000a,0xb,0x00000000,0x0 dd 0x00080007,0x8,0x00080007,0x9,0x00090008,0xa,0x000b0008,0xb,0x000a0009,0xc,0x000c000a,0xc dd 0x000a000a,0xb,0x000b000a,0xc,0x00000000,0x0,0x00090007,0x8,0x000a0007,0x9,0x000a0008,0xa dd 0x000b0009,0xb,0x000b0009,0xb,0x000c000a,0xb,0x00000000,0x0,0x00090008,0x9,0x000a0008,0xa dd 0x000a0009,0xb,0x000b0009,0xc,0x000b000a,0xc,0x000c000a,0xc,0x000b000a,0xc,0x000c000b,0xc dd 0x00000000,0x0,0x00090008,0x8,0x00090008,0x9,0x000a0009,0xa,0x000b0009,0xb,0x000c000a,0xb dd 0x000c000b,0xc,0x00000000,0x0,0x00090009,0xa,0x000a0009,0xb,0x000b000a,0xc,0x000c000a,0xc dd 0x000c000a,0xd,0x000d000b,0xd,0x000c000a,0xc,0x000d000b,0xd,0x00000000,0x0,0x000a0009,0x9 dd 0x000a0009,0xa,0x000b000a,0xb,0x000b000a,0xc,0x000d000b,0xc,0x000d000b,0xc,0x00000000,0x0 dd 0x00090009,0x9,0x00090009,0xa,0x00090009,0xb,0x000a000a,0xc,0x000b000a,0xc,0x000c000b,0xc dd 0x000c000b,0xd,0x000c000c,0xd,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0 dd 0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x0009000a,0xa,0x0009000a,0xa dd 0x000a000a,0xb,0x000b000b,0xc,0x000c000b,0xc,0x000c000b,0xd,0x000c000b,0xd,0x000c000c,0xd dd 0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0 dd 0x0,0x00000000, 0x0,0x00000000linbits32 dd 0x00040004,0x10001,0x00040004,0x20002,0x00040004,0x30003,0x00040004,0x40004 dd 0x00050005,0x60006,0x00060006,0x60006,0x00070007,0x80008,0x00080008,0x80008 dd 0x00090009,0xa000a,0x000b000b,0xa000a,0x000b000b,0xd000d,0x000d000d,0xd000d dd 0x000d000d,0xd000dchoose_table_H dw 0x1810, 0x1811, 0x1812, 0x1813, 0x1914, 0x1a14, 0x1b15, 0x1c15 dw 0x1d16, 0x1e16, 0x1e17, 0x1f17, 0x1f17choose_jump_table_L: dd table_MMX.L_case_0 dd table_MMX.L_case_1 dd table_MMX.L_case_2 dd table_MMX.L_case_3 dd table_MMX.L_case_45 dd table_MMX.L_case_45 dd table_MMX.L_case_67 dd table_MMX.L_case_67 dd table_MMX.L_case_8_15 dd table_MMX.L_case_8_15 dd table_MMX.L_case_8_15 dd table_MMX.L_case_8_15 dd table_MMX.L_case_8_15 dd table_MMX.L_case_8_15 dd table_MMX.L_case_8_15 dd table_MMX.L_case_8_15 segment_code;; use MMX; align 16; int choose_table(int *ix, int *end, int *s)choose_table_MMX: mov ecx,[esp+4] ;ecx = begin mov edx,[esp+8] ;edx = end sub ecx,edx ;ecx = begin-end(should be minus) test ecx,8 pxor mm0,mm0 ;mm0=[0:0] movq mm1,[edx+ecx] jz .lp add ecx,8 jz .exit align 4.lp: movq mm4,[edx+ecx] movq mm5,[edx+ecx+8] add ecx,16 psubusw mm4,mm0 ; $BK\Ev$O(B dword $B$G$J$$$H$$$1$J$$$N$@$,(B psubusw mm5,mm1 ; $B$=$s$J%3%^%s%I$O$J$$(B :-p paddw mm0,mm4 ; $B$,(B, $B$3$3$G07$&CM$NHO0O$O(B 8191+15 $B0J2<$J$N$GLdBj$J$$(B paddw mm1,mm5 jnz .lp.exit: psubusw mm1,mm0 ; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B paddw mm0,mm1 movq mm4,mm0 punpckhdq mm4,mm4 psubusw mm4,mm0 ; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B paddw mm0,mm4 movd eax,mm0 cmp eax,15 ja .with_ESC jmp [choose_jump_table_L+eax*4].with_ESC1: emms mov ecx, [esp+12] ; *s mov [ecx], eax or eax,-1 ret.with_ESC: cmp eax, 8191+15 ja .with_ESC1 sub eax,15 push ebx push esi bsr eax, eax%assign _P 4*2 movq mm5, [D15_15_15_15] movq mm6, [D14_14_14_14] movq mm3, [mul_add] mov ecx, [esp+_P+4] ; = ix; mov edx, [esp+_P+8] ; = end sub ecx, edx xor esi, esi ; sum = 0 test ecx, 8 pxor mm7, mm7 ; linbits_sum, 14$B$r1[$($?$b$N$N?t(B jz .H_dual_lp1 movq mm0, [edx+ecx] add ecx,8 packssdw mm0,mm7 movq mm2, mm0 paddusw mm0, mm5 ; mm0 = min(ix, 15)+0xfff0 pcmpgtw mm2, mm6 ; 14$B$h$jBg$-$$$+!)(B psubw mm7, mm2 ; 14$B$h$jBg$-$$$H$-(B linbits_sum++; pmaddwd mm0, mm3 ; {0, 0, y, x}*{1, 16, 1, 16} movd ebx, mm0 mov esi, [largetbl+ebx*4+(16*16+16)*4] jz .H_dual_exit align 4.H_dual_lp1: movq mm0, [edx+ecx] movq mm1, [edx+ecx+8] packssdw mm0,mm1 movq mm2, mm0 paddusw mm0, mm5 ; mm0 = min(ix, 15)+0xfff0 pcmpgtw mm2, mm6 ; 14$B$h$jBg$-$$$+!)(B pmaddwd mm0, mm3 ; {y, x, y, x}*{1, 16, 1, 16} movd ebx, mm0 punpckhdq mm0,mm0 add esi, [largetbl+ebx*4+(16*16+16)*4] movd ebx, mm0 add esi, [largetbl+ebx*4+(16*16+16)*4] add ecx, 16 psubw mm7, mm2 ; 14$B$h$jBg$-$$$H$-(B linbits_sum++; jnz .H_dual_lp1.H_dual_exit: pmov mm1,mm7 punpckhdq mm7,mm7 paddd mm7,mm1 punpckldq mm7,mm7 pmaddwd mm7, [linbits32+eax*8] ; linbits mov ax, [choose_table_H+eax*2] movd ecx, mm7 punpckhdq mm7,mm7 movd edx,mm7 emms shl edx, 16 add ecx, edx add ecx, esi pop esi pop ebx mov edx, ecx and ecx, 0xffff ; ecx = sum2 shr edx, 16 ; edx = sum cmp edx, ecx jle .chooseE_s1 mov edx, ecx shr eax, 8.chooseE_s1: mov ecx, [esp+12] ; *s and eax, 0xff add [ecx], edx rettable_MMX.L_case_0: emms rettable_MMX.L_case_1: emms mov eax, [esp+12] ; *s mov ecx, [esp+4] ; *ix sub ecx, edx push ebx.lp: mov ebx, [edx+ecx] add ebx, ebx add ebx, [edx+ecx+4] movzx ebx, byte [ebx+t1l] add [eax], ebx add ecx, 8 jnz .lp pop ebx mov eax, 1 rettable_MMX.L_case_45: push dword 7 mov ecx, tableABC+9*8 jmp from3table_MMX.L_case_67: push dword 10 mov ecx, tableABC jmp from3table_MMX.L_case_8_15: push dword 13 mov ecx, tableDEFfrom3: mov eax,[esp+8] ;eax = *begin; mov edx,[esp+12] ;edx = *end push ebx sub eax, edx movq mm5,[mul_add] pxor mm2,mm2 ;mm2 = sum test eax, 8 jz .choose3_lp1; odd length movq mm0,[edx+eax] ;mm0 = ix[0] | ix[1] add eax,8 packssdw mm0,mm2 pmaddwd mm0,mm5 movd ebx,mm0 movq mm2, [ecx+ebx*8] jz .choose3_exit align 4.choose3_lp1 movq mm0,[edx+eax] movq mm1,[edx+eax+8] add eax,16 packssdw mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3] pmaddwd mm0,mm5 movd ebx,mm0 punpckhdq mm0,mm0 paddd mm2, [ecx+ebx*8] movd ebx,mm0 paddd mm2, [ecx+ebx*8] jnz .choose3_lp1.choose3_exit; xor eax,eax movd ebx, mm2 punpckhdq mm2,mm2 mov ecx, ebx and ecx, 0xffff ; ecx = sum2 shr ebx, 16 ; ebx = sum1 movd edx, mm2 ; edx = sum cmp edx, ebx jle .choose3_s1 mov edx, ebx inc eax.choose3_s1: emms pop ebx cmp edx, ecx jle .choose3_s2 mov edx, ecx mov eax, 2.choose3_s2: pop ecx add eax, ecx mov ecx, [esp+12] ; *s add [ecx], edx rettable_MMX.L_case_2: push dword 2 mov ecx,table23 pmov mm5,[mul_add23] jmp from2table_MMX.L_case_3: push dword 5 mov ecx,table56 pmov mm5,[mul_add56]from2: mov eax,[esp+8] ;eax = *begin; mov edx,[esp+12] ;edx = *end push ebx push edi sub eax, edx xor edi, edi test eax, 8 jz .choose2_lp1; odd length movq mm0,[edx+eax] ;mm0 = ix[0] | ix[1] pxor mm2,mm2 ;mm2 = sum packssdw mm0,mm2 pmaddwd mm0,mm5 movd ebx,mm0 mov edi, [ecx+ebx*4] add eax,8 jz .choose2_exit align 4.choose2_lp1 movq mm0,[edx+eax] movq mm1,[edx+eax+8] packssdw mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3] pmaddwd mm0,mm5 movd ebx,mm0 punpckhdq mm0,mm0 add edi, [ecx+ebx*4] movd ebx, mm0 add edi, [ecx+ebx*4] add eax,16 jnc .choose2_lp1.choose2_exit mov ecx, edi pop edi pop ebx pop eax ; table num. emms mov edx, ecx and ecx, 0xffff ; ecx = sum2 shr edx, 16 ; edx = sum1 cmp edx, ecx jle .choose2_s1 mov edx, ecx inc eax.choose2_s1: mov ecx, [esp+12] ; *s add [ecx], edx ret end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -