dsputil_iwmmxt_rnd.h
来自「wince下著名的视频播放器源码」· C头文件 代码 · 共 1,094 行 · 第 1/4 页
H
1,094 行
"wstrd wr3, [r5, #8] \n\t" "add r5, r5, %[line_size] \n\t" "bne 1b \n\t" : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) : : "r4", "r5", "r12", "memory");}void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h){ int stride = line_size; // [wr0 wr1 wr2 wr3] for previous line // [wr4 wr5 wr6 wr7] for current line SET_RND(wr15); // =2 for rnd and =1 for no_rnd version __asm__ __volatile__( "pld [%[pixels]] \n\t" "pld [%[pixels], #32] \n\t" "pld [%[block]] \n\t" "pld [%[block], #32] \n\t" "and r12, %[pixels], #7 \n\t" "bic %[pixels], %[pixels], #7 \n\t" "tmcr wcgr1, r12 \n\t" "add r12, r12, #1 \n\t" "add r4, %[pixels], %[line_size]\n\t" "tmcr wcgr2, r12 \n\t" "add r5, %[block], %[line_size] \n\t" "mov %[line_size], %[line_size], lsl #1 \n\t" "pld [r5] \n\t" "pld [r5, #32] \n\t" "1: \n\t" "wldrd wr10, [%[pixels]] \n\t" "cmp r12, #8 \n\t" "wldrd wr11, [%[pixels], #8] \n\t" "add %[pixels], %[pixels], %[line_size] \n\t" "wldrd wr13, [r4] \n\t" "pld [%[pixels]] \n\t" "wldrd wr14, [r4, #8] \n\t" "pld [%[pixels], #32] \n\t" "add r4, r4, %[line_size] \n\t" "walignr1 wr0, wr10, wr11 \n\t" "pld [r4] \n\t" "pld [r4, #32] \n\t" "walignr1 wr2, wr13, wr14 \n\t" "wmoveq wr4, wr11 \n\t" "wmoveq wr6, wr14 \n\t" "walignr2ne wr4, wr10, wr11 \n\t" "wldrd wr10, [%[block]] \n\t" "walignr2ne wr6, wr13, wr14 \n\t" "wldrd wr12, [r5] \n\t" WAVG2B" wr0, wr0, wr4 \n\t" WAVG2B" wr2, wr2, wr6 \n\t" WAVG2B" wr0, wr0, wr10 \n\t" WAVG2B" wr2, wr2, wr12 \n\t" "wstrd wr0, [%[block]] \n\t" "subs %[h], %[h], #2 \n\t" "wstrd wr2, [r5] \n\t" "add %[block], %[block], %[line_size] \n\t" "add r5, r5, %[line_size] \n\t" "pld [%[block]] \n\t" "pld [%[block], #32] \n\t" "pld [r5] \n\t" "pld [r5, #32] \n\t" "bne 1b \n\t" : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) : : "r4", "r5", "r12", "memory");}void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h){ int stride = line_size; // [wr0 wr1 wr2 wr3] for previous line // [wr4 wr5 wr6 wr7] for current line SET_RND(wr15); // =2 for rnd and =1 for no_rnd version __asm__ __volatile__( "pld [%[pixels]] \n\t" "pld [%[pixels], #32] \n\t" "pld [%[block]] \n\t" "pld [%[block], #32] \n\t" "and r12, %[pixels], #7 \n\t" "bic %[pixels], %[pixels], #7 \n\t" "tmcr wcgr1, r12 \n\t" "add r12, r12, #1 \n\t" "add r4, %[pixels], %[line_size]\n\t" "tmcr wcgr2, r12 \n\t" "add r5, %[block], %[line_size] \n\t" "mov %[line_size], %[line_size], lsl #1 \n\t" "pld [r5] \n\t" "pld [r5, #32] \n\t" "1: \n\t" "wldrd wr10, [%[pixels]] \n\t" "cmp r12, #8 \n\t" "wldrd wr11, [%[pixels], #8] \n\t" "wldrd wr12, [%[pixels], #16] \n\t" "add %[pixels], %[pixels], %[line_size] \n\t" "wldrd wr13, [r4] \n\t" "pld [%[pixels]] \n\t" "wldrd wr14, [r4, #8] \n\t" "pld [%[pixels], #32] \n\t" "wldrd wr15, [r4, #16] \n\t" "add r4, r4, %[line_size] \n\t" "walignr1 wr0, wr10, wr11 \n\t" "pld [r4] \n\t" "pld [r4, #32] \n\t" "walignr1 wr1, wr11, wr12 \n\t" "walignr1 wr2, wr13, wr14 \n\t" "walignr1 wr3, wr14, wr15 \n\t" "wmoveq wr4, wr11 \n\t" "wmoveq wr5, wr12 \n\t" "wmoveq wr6, wr14 \n\t" "wmoveq wr7, wr15 \n\t" "walignr2ne wr4, wr10, wr11 \n\t" "walignr2ne wr5, wr11, wr12 \n\t" "walignr2ne wr6, wr13, wr14 \n\t" "walignr2ne wr7, wr14, wr15 \n\t" "wldrd wr10, [%[block]] \n\t" WAVG2B" wr0, wr0, wr4 \n\t" "wldrd wr11, [%[block], #8] \n\t" WAVG2B" wr1, wr1, wr5 \n\t" "wldrd wr12, [r5] \n\t" WAVG2B" wr2, wr2, wr6 \n\t" "wldrd wr13, [r5, #8] \n\t" WAVG2B" wr3, wr3, wr7 \n\t" WAVG2B" wr0, wr0, wr10 \n\t" WAVG2B" wr1, wr1, wr11 \n\t" WAVG2B" wr2, wr2, wr12 \n\t" WAVG2B" wr3, wr3, wr13 \n\t" "wstrd wr0, [%[block]] \n\t" "subs %[h], %[h], #2 \n\t" "wstrd wr1, [%[block], #8] \n\t" "add %[block], %[block], %[line_size] \n\t" "wstrd wr2, [r5] \n\t" "pld [%[block]] \n\t" "wstrd wr3, [r5, #8] \n\t" "add r5, r5, %[line_size] \n\t" "pld [%[block], #32] \n\t" "pld [r5] \n\t" "pld [r5, #32] \n\t" "bne 1b \n\t" : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) : :"r4", "r5", "r12", "memory");}void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h){ int stride = line_size; // [wr0 wr1 wr2 wr3] for previous line // [wr4 wr5 wr6 wr7] for current line __asm__ __volatile__( "pld [%[pixels]] \n\t" "pld [%[pixels], #32] \n\t" "and r12, %[pixels], #7 \n\t" "tmcr wcgr1, r12 \n\t" "bic %[pixels], %[pixels], #7 \n\t" "wldrd wr10, [%[pixels]] \n\t" "wldrd wr11, [%[pixels], #8] \n\t" "pld [%[block]] \n\t" "add %[pixels], %[pixels], %[line_size] \n\t" "walignr1 wr0, wr10, wr11 \n\t" "pld [%[pixels]] \n\t" "pld [%[pixels], #32] \n\t" "1: \n\t" "wldrd wr10, [%[pixels]] \n\t" "wldrd wr11, [%[pixels], #8] \n\t" "add %[pixels], %[pixels], %[line_size] \n\t" "pld [%[pixels]] \n\t" "pld [%[pixels], #32] \n\t" "walignr1 wr4, wr10, wr11 \n\t" "wldrd wr10, [%[block]] \n\t" WAVG2B" wr8, wr0, wr4 \n\t" WAVG2B" wr8, wr8, wr10 \n\t" "wstrd wr8, [%[block]] \n\t" "add %[block], %[block], %[line_size] \n\t" "wldrd wr10, [%[pixels]] \n\t" "wldrd wr11, [%[pixels], #8] \n\t" "pld [%[block]] \n\t" "add %[pixels], %[pixels], %[line_size] \n\t" "pld [%[pixels]] \n\t" "pld [%[pixels], #32] \n\t" "walignr1 wr0, wr10, wr11 \n\t" "wldrd wr10, [%[block]] \n\t" WAVG2B" wr8, wr0, wr4 \n\t" WAVG2B" wr8, wr8, wr10 \n\t" "wstrd wr8, [%[block]] \n\t" "add %[block], %[block], %[line_size] \n\t" "subs %[h], %[h], #2 \n\t" "pld [%[block]] \n\t" "bne 1b \n\t" : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) : : "cc", "memory", "r12");}void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h){ int stride = line_size; // [wr0 wr1 wr2 wr3] for previous line // [wr4 wr5 wr6 wr7] for current line __asm__ __volatile__( "pld [%[pixels]] \n\t" "pld [%[pixels], #32] \n\t" "and r12, %[pixels], #7 \n\t" "tmcr wcgr1, r12 \n\t" "bic %[pixels], %[pixels], #7 \n\t" "wldrd wr10, [%[pixels]] \n\t" "wldrd wr11, [%[pixels], #8] \n\t" "wldrd wr12, [%[pixels], #16] \n\t" "add %[pixels], %[pixels], %[line_size] \n\t" "pld [%[pixels]] \n\t" "pld [%[pixels], #32] \n\t" "walignr1 wr0, wr10, wr11 \n\t" "walignr1 wr1, wr11, wr12 \n\t" "1: \n\t" "wldrd wr10, [%[pixels]] \n\t" "wldrd wr11, [%[pixels], #8] \n\t" "wldrd wr12, [%[pixels], #16] \n\t" "add %[pixels], %[pixels], %[line_size] \n\t" "pld [%[pixels]] \n\t" "pld [%[pixels], #32] \n\t" "walignr1 wr4, wr10, wr11 \n\t" "walignr1 wr5, wr11, wr12 \n\t" WAVG2B" wr8, wr0, wr4 \n\t" WAVG2B" wr9, wr1, wr5 \n\t" "wstrd wr8, [%[block]] \n\t" "wstrd wr9, [%[block], #8] \n\t" "add %[block], %[block], %[line_size] \n\t" "wldrd wr10, [%[pixels]] \n\t" "wldrd wr11, [%[pixels], #8] \n\t" "wldrd wr12, [%[pixels], #16] \n\t" "add %[pixels], %[pixels], %[line_size] \n\t" "pld [%[pixels]] \n\t" "pld [%[pixels], #32] \n\t" "walignr1 wr0, wr10, wr11 \n\t" "walignr1 wr1, wr11, wr12 \n\t" WAVG2B" wr8, wr0, wr4 \n\t" WAVG2B" wr9, wr1, wr5 \n\t" "wstrd wr8, [%[block]] \n\t" "wstrd wr9, [%[block], #8] \n\t" "add %[block], %[block], %[line_size] \n\t" "subs %[h], %[h], #2 \n\t" "bne 1b \n\t" : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) : : "r4", "r5", "r12", "memory");}void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h){ int stride = line_size; // [wr0 wr1 wr2 wr3] for previous line // [wr4 wr5 wr6 wr7] for current line __asm__ __volatile__( "pld [%[pixels]] \n\t" "pld [%[pixels], #32] \n\t" "and r12, %[pixels], #7 \n\t" "tmcr wcgr1, r12 \n\t" "bic %[pixels], %[pixels], #7 \n\t" "wldrd wr10, [%[pixels]] \n\t" "wldrd wr11, [%[pixels], #8] \n\t" "pld [%[block]] \n\t" "wldrd wr12, [%[pixels], #16] \n\t" "add %[pixels], %[pixels], %[line_size] \n\t"
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?