📄 dsputil_iwmmxt_rnd.h
字号:
void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h){ int stride = line_size; __asm__ __volatile__ ( "and r12, %[pixels], #7 \n\t" "bic %[pixels], %[pixels], #7 \n\t" "tmcr wcgr1, r12 \n\t" "add r4, %[pixels], %[line_size] \n\t" "add r5, %[block], %[line_size] \n\t" "mov %[line_size], %[line_size], lsl #1 \n\t" "1: \n\t" "wldrd wr0, [%[pixels]] \n\t" "subs %[h], %[h], #2 \n\t" "wldrd wr1, [%[pixels], #8] \n\t" "add %[pixels], %[pixels], %[line_size] \n\t" "wldrd wr3, [r4] \n\t" "pld [%[pixels]] \n\t" "pld [%[pixels], #32] \n\t" "wldrd wr4, [r4, #8] \n\t" "add r4, r4, %[line_size] \n\t" "walignr1 wr8, wr0, wr1 \n\t" "pld [r4] \n\t" "pld [r4, #32] \n\t" "walignr1 wr10, wr3, wr4 \n\t" "wstrd wr8, [%[block]] \n\t" "add %[block], %[block], %[line_size] \n\t" "wstrd wr10, [r5] \n\t" "add r5, r5, %[line_size] \n\t" "bne 1b \n\t" : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) : : "memory", "r4", "r5", "r12");}void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h){ int stride = line_size; __asm__ __volatile__ ( "and r12, %[pixels], #7 \n\t" "bic %[pixels], %[pixels], #7 \n\t" "tmcr wcgr1, r12 \n\t" "add r4, %[pixels], %[line_size] \n\t" "add r5, %[block], %[line_size] \n\t" "mov %[line_size], %[line_size], lsl #1 \n\t" "1: \n\t" "wldrd wr0, [%[pixels]] \n\t" "subs %[h], %[h], #2 \n\t" "wldrd wr1, [%[pixels], #8] \n\t" "add %[pixels], %[pixels], %[line_size] \n\t" "wldrd wr3, [r4] \n\t" "pld [%[pixels]] \n\t" "pld [%[pixels], #32] \n\t" "wldrd wr4, [r4, #8] \n\t" "add r4, r4, %[line_size] \n\t" "walignr1 wr8, wr0, wr1 \n\t" "wldrd wr0, [%[block]] \n\t" "wldrd wr2, [r5] \n\t" "pld [r4] \n\t" "pld [r4, #32] \n\t" "walignr1 wr10, wr3, wr4 \n\t" WAVG2B" wr8, wr8, wr0 \n\t" WAVG2B" wr10, wr10, wr2 \n\t" "wstrd wr8, [%[block]] \n\t" "add %[block], %[block], %[line_size] \n\t" "wstrd wr10, [r5] \n\t" "pld [%[block]] \n\t" "pld [%[block], #32] \n\t" "add r5, r5, %[line_size] \n\t" "pld [r5] \n\t" "pld [r5, #32] \n\t" "bne 1b \n\t" : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) : : "memory", "r4", "r5", "r12");}void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h){ int stride = line_size; __asm__ __volatile__ ( "and r12, %[pixels], #7 \n\t" "bic %[pixels], %[pixels], #7 \n\t" "tmcr wcgr1, r12 \n\t" "add r4, %[pixels], %[line_size] \n\t" "add r5, %[block], %[line_size] \n\t" "mov %[line_size], %[line_size], lsl #1 \n\t" "1: \n\t" "wldrd wr0, [%[pixels]] \n\t" "wldrd wr1, [%[pixels], #8] \n\t" "subs %[h], %[h], #2 \n\t" "wldrd wr2, [%[pixels], #16] \n\t" "add %[pixels], %[pixels], %[line_size] \n\t" "wldrd wr3, [r4] \n\t" "pld [%[pixels]] \n\t" "pld [%[pixels], #32] \n\t" "walignr1 wr8, wr0, wr1 \n\t" "wldrd wr4, [r4, #8] \n\t" "walignr1 wr9, wr1, wr2 \n\t" "wldrd wr5, [r4, #16] \n\t" "add r4, r4, %[line_size] \n\t" "pld [r4] \n\t" "pld [r4, #32] \n\t" "walignr1 wr10, wr3, wr4 \n\t" "wstrd wr8, [%[block]] \n\t" "walignr1 wr11, wr4, wr5 \n\t" "wstrd wr9, [%[block], #8] \n\t" "add %[block], %[block], %[line_size] \n\t" "wstrd wr10, [r5] \n\t" "wstrd wr11, [r5, #8] \n\t" "add r5, r5, %[line_size] \n\t" "bne 1b \n\t" : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) : : "memory", "r4", "r5", "r12");}void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h){ int stride = line_size; __asm__ __volatile__ ( "pld [%[pixels]] \n\t" "pld [%[pixels], #32] \n\t" "pld [%[block]] \n\t" "pld [%[block], #32] \n\t" "and r12, %[pixels], #7 \n\t" "bic %[pixels], %[pixels], #7 \n\t" "tmcr wcgr1, r12 \n\t" "add r4, %[pixels], %[line_size]\n\t" "add r5, %[block], %[line_size] \n\t" "mov %[line_size], %[line_size], lsl #1 \n\t" "1: \n\t" "wldrd wr0, [%[pixels]] \n\t" "wldrd wr1, [%[pixels], #8] \n\t" "subs %[h], %[h], #2 \n\t" "wldrd wr2, [%[pixels], #16] \n\t" "add %[pixels], %[pixels], %[line_size] \n\t" "wldrd wr3, [r4] \n\t" "pld [%[pixels]] \n\t" "pld [%[pixels], #32] \n\t" "walignr1 wr8, wr0, wr1 \n\t" "wldrd wr4, [r4, #8] \n\t" "walignr1 wr9, wr1, wr2 \n\t" "wldrd wr5, [r4, #16] \n\t" "add r4, r4, %[line_size] \n\t" "wldrd wr0, [%[block]] \n\t" "pld [r4] \n\t" "wldrd wr1, [%[block], #8] \n\t" "pld [r4, #32] \n\t" "wldrd wr2, [r5] \n\t" "walignr1 wr10, wr3, wr4 \n\t" "wldrd wr3, [r5, #8] \n\t" WAVG2B" wr8, wr8, wr0 \n\t" WAVG2B" wr9, wr9, wr1 \n\t" WAVG2B" wr10, wr10, wr2 \n\t" "wstrd wr8, [%[block]] \n\t" "walignr1 wr11, wr4, wr5 \n\t" WAVG2B" wr11, wr11, wr3 \n\t" "wstrd wr9, [%[block], #8] \n\t" "add %[block], %[block], %[line_size] \n\t" "wstrd wr10, [r5] \n\t" "pld [%[block]] \n\t" "pld [%[block], #32] \n\t" "wstrd wr11, [r5, #8] \n\t" "add r5, r5, %[line_size] \n\t" "pld [r5] \n\t" "pld [r5, #32] \n\t" "bne 1b \n\t" : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) : : "memory", "r4", "r5", "r12");}void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h){ int stride = line_size; // [wr0 wr1 wr2 wr3] for previous line // [wr4 wr5 wr6 wr7] for current line SET_RND(wr15); // =2 for rnd and =1 for no_rnd version __asm__ __volatile__( "pld [%[pixels]] \n\t" "pld [%[pixels], #32] \n\t" "and r12, %[pixels], #7 \n\t" "bic %[pixels], %[pixels], #7 \n\t" "tmcr wcgr1, r12 \n\t" "add r12, r12, #1 \n\t" "add r4, %[pixels], %[line_size]\n\t" "tmcr wcgr2, r12 \n\t" "add r5, %[block], %[line_size] \n\t" "mov %[line_size], %[line_size], lsl #1 \n\t" "1: \n\t" "wldrd wr10, [%[pixels]] \n\t" "cmp r12, #8 \n\t" "wldrd wr11, [%[pixels], #8] \n\t" "add %[pixels], %[pixels], %[line_size] \n\t" "wldrd wr13, [r4] \n\t" "pld [%[pixels]] \n\t" "wldrd wr14, [r4, #8] \n\t" "pld [%[pixels], #32] \n\t" "add r4, r4, %[line_size] \n\t" "walignr1 wr0, wr10, wr11 \n\t" "pld [r4] \n\t" "pld [r4, #32] \n\t" "walignr1 wr2, wr13, wr14 \n\t" "wmoveq wr4, wr11 \n\t" "wmoveq wr6, wr14 \n\t" "walignr2ne wr4, wr10, wr11 \n\t" "walignr2ne wr6, wr13, wr14 \n\t" WAVG2B" wr0, wr0, wr4 \n\t" WAVG2B" wr2, wr2, wr6 \n\t" "wstrd wr0, [%[block]] \n\t" "subs %[h], %[h], #2 \n\t" "wstrd wr2, [r5] \n\t" "add %[block], %[block], %[line_size] \n\t" "add r5, r5, %[line_size] \n\t" "bne 1b \n\t" : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) : : "r4", "r5", "r12", "memory");}void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h){ int stride = line_size; // [wr0 wr1 wr2 wr3] for previous line // [wr4 wr5 wr6 wr7] for current line SET_RND(wr15); // =2 for rnd and =1 for no_rnd version __asm__ __volatile__( "pld [%[pixels]] \n\t" "pld [%[pixels], #32] \n\t" "and r12, %[pixels], #7 \n\t" "bic %[pixels], %[pixels], #7 \n\t" "tmcr wcgr1, r12 \n\t" "add r12, r12, #1 \n\t" "add r4, %[pixels], %[line_size]\n\t" "tmcr wcgr2, r12 \n\t" "add r5, %[block], %[line_size] \n\t" "mov %[line_size], %[line_size], lsl #1 \n\t" "1: \n\t" "wldrd wr10, [%[pixels]] \n\t" "cmp r12, #8 \n\t" "wldrd wr11, [%[pixels], #8] \n\t" "wldrd wr12, [%[pixels], #16] \n\t" "add %[pixels], %[pixels], %[line_size] \n\t" "wldrd wr13, [r4] \n\t" "pld [%[pixels]] \n\t" "wldrd wr14, [r4, #8] \n\t" "pld [%[pixels], #32] \n\t" "wldrd wr15, [r4, #16] \n\t" "add r4, r4, %[line_size] \n\t" "walignr1 wr0, wr10, wr11 \n\t" "pld [r4] \n\t" "pld [r4, #32] \n\t" "walignr1 wr1, wr11, wr12 \n\t" "walignr1 wr2, wr13, wr14 \n\t" "walignr1 wr3, wr14, wr15 \n\t" "wmoveq wr4, wr11 \n\t" "wmoveq wr5, wr12 \n\t" "wmoveq wr6, wr14 \n\t" "wmoveq wr7, wr15 \n\t" "walignr2ne wr4, wr10, wr11 \n\t" "walignr2ne wr5, wr11, wr12 \n\t" "walignr2ne wr6, wr13, wr14 \n\t" "walignr2ne wr7, wr14, wr15 \n\t" WAVG2B" wr0, wr0, wr4 \n\t" WAVG2B" wr1, wr1, wr5 \n\t" "wstrd wr0, [%[block]] \n\t" WAVG2B" wr2, wr2, wr6 \n\t" "wstrd wr1, [%[block], #8] \n\t" WAVG2B" wr3, wr3, wr7 \n\t" "add %[block], %[block], %[line_size] \n\t" "wstrd wr2, [r5] \n\t" "subs %[h], %[h], #2 \n\t"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -