📄 pred_comp.c
字号:
/* (s[i]+s[i+lx]+1) >> 1 */ s0H = vec_sra(s0H, one); /* d[i] + ((s[i]+s[i+lx]+1)>>1) */ dL = vec_add(dL, s0H); /* (d[i]+((s[i]+s[i+lx]+1)>>1)) + 1 */ dL = vec_add(dL, one); /* ((d[i]+((s[i]+s[i+lx]+1)>>1))+1) >> 1 */ dL = vec_sra(dL, one); /* (unsigned char[]) d[0-7], d[8-15] */ vu8(dH) = vec_packsu(dH, dL); vec_st(vu8(dH), 0, (unsigned char*)d); lS0 = lS2; s += lx; d += lx; } /* }}} */ } /* }}} */ } else /* w == 16 */ { /* {{{ */ perm = vec_lvsl(0, s); lS = vec_ld(0, (unsigned char*)s); lS1 = vec_ld(16, (unsigned char*)s); lS0 = vec_perm(lS, lS1, perm); s += lx; for (i = 0; i < h; i++) { lS = vec_ld(0, (unsigned char*)s); lS3 = vec_ld(16, (unsigned char*)s); lS2 = vec_perm(lS, lS3, perm); lD = vec_ld(0, (unsigned char*)d); /* (unsigned short[]) s[0-7] */ vu8(s0H) = vec_mergeh(zero, lS0); /* (unsigned short[]) s[8-15] */ vu8(s0L) = vec_mergel(zero, lS0); /* (unsigned short[]) s+lx[0-7] */ vu8(s2H) = vec_mergeh(zero, lS2); /* (unsigned short[]) s+lx[8-15] */ vu8(s2L) = vec_mergel(zero, lS2); /* (unsigned short[]) d[0-7] */ vu8(dH) = vec_mergeh(zero, lD); /* (unsigned short[]) d[8-15] */ vu8(dL) = vec_mergel(zero, lD); /* s[i] + s[i+lx] */ s0H = vec_add(s0H, s2H); s0L = vec_add(s0L, s2L); /* (s[i]+s[i+lx]) + 1 */ s0H = vec_add(s0H, one); s0L = vec_add(s0L, one); /* (s[i]+s[i+lx]+1) >> 1 */ s0H = vec_sra(s0H, one); s0L = vec_sra(s0L, one); /* d[i] + ((s[i]+s[i+lx]+1)>>1) */ dH = vec_add(dH, s0H); dL = vec_add(dL, s0L); /* (d[i]+((s[i]+s[i+lx]+1)>>1)) + 1 */ dH = vec_add(dH, one); dL = vec_add(dL, one); /* ((d[i]+((s[i]+s[i+lx]+1)>>1))+1) >> 1 */ dH = vec_sra(dH, one); dL = vec_sra(dL, one); /* (unsigned char[]) d[0-7], d[8-15] */ vu8(dH) = vec_packsu(dH, dL); vec_st(vu8(dH), 0, (unsigned char*)d); lS0 = lS2; s += lx; d += lx; } /* }}} */ }#if 0 for (j=0; j<h; j++) { for (i=0; i<w; i++) d[i] = (d[i] + ((unsigned int)(s[i]+s[i+lx]+1)>>1)+1)>>1; s+= lx; d+= lx; }#endif /* }}} */ } else /* !addflag */ { /* {{{ */ zero = vec_splat_u8(0); one = vec_splat_u16(1); if (w == 8) { /* {{{ */ perm = vec_lvsl(0, (unsigned char*)s); if (((unsigned long)d & 0xf) == 0) { /* {{{ */ lS0 = vec_ld(0, (unsigned char*)s); lS1 = vec_ld(16, (unsigned char*)s); lS0 = vec_perm(lS0, lS1, perm); s += lx; for (i = 0; i < h; i++) { lS2 = vec_ld(0, (unsigned char*)s); lS3 = vec_ld(16, (unsigned char*)s); lS2 = vec_perm(lS2, lS3, perm); lD = vec_ld(0, (unsigned char*)d); /* (unsigned short[]) s[0-7] */ vu8(s0H) = vec_mergeh(zero, lS0); /* (unsigned short[]) s+lx[0-7] */ vu8(s2H) = vec_mergeh(zero, lS2); /* (unsigned short[]) d[8-15] */ vu8(dL) = vec_mergel(zero, lD); /* s[i] + s[i+lx] */ s0H = vec_add(s0H, s2H); /* (s[i]+s[i+lx]) + 1 */ s0H = vec_add(s0H, one); /* (s[i]+s[i+lx]+1) >> 1 */ s0H = vec_sra(s0H, one); /* (unsigned char[]) d[0-7], d[8-15] */ vu8(dH) = vec_packsu(s0H, dL); vec_st(vu8(dH), 0, (unsigned char*)d); lS0 = lS2; s += lx; d += lx; } /* }}} */ } else { /* {{{ */ /* d is offset 8 bytes, work on dL (low bytes). */ lS0 = vec_ld(0, (unsigned char*)s); lS1 = vec_ld(16, (unsigned char*)s); lS0 = vec_perm(lS0, lS1, perm); s += lx; for (i = 0; i < h; i++) { lS2 = vec_ld(0, (unsigned char*)s); lS3 = vec_ld(16, (unsigned char*)s); lS2 = vec_perm(lS2, lS3, perm); lD = vec_ld(0, (unsigned char*)d); /* (unsigned short[]) s[0-7] */ vu8(s0H) = vec_mergeh(zero, lS0); /* (unsigned short[]) s+lx[0-7] */ vu8(s2H) = vec_mergeh(zero, lS2); /* (unsigned short[]) d[0-7] */ vu8(dH) = vec_mergeh(zero, lD); /* s[i] + s[i+lx] */ s0H = vec_add(s0H, s2H); /* (s[i]+s[i+lx]) + 1 */ s0H = vec_add(s0H, one); /* (s[i]+s[i+lx]+1) >> 1 */ s0H = vec_sra(s0H, one); /* (unsigned char[]) d[0-7], d[8-15] */ vu8(dH) = vec_packsu(dH, s0H); vec_st(vu8(dH), 0, (unsigned char*)d); lS0 = lS2; s += lx; d += lx; } /* }}} */ } /* }}} */ } else /* w == 16 */ { /* {{{ */ perm = vec_lvsl(0, s); lS = vec_ld(0, (unsigned char*)s); lS1 = vec_ld(16, (unsigned char*)s); lS0 = vec_perm(lS, lS1, perm); s += lx; for (i = 0; i < h; i++) { lS = vec_ld(0, (unsigned char*)s); lS3 = vec_ld(16, (unsigned char*)s); lS2 = vec_perm(lS, lS3, perm); /* (unsigned short[]) s[0-7] */ vu8(s0H) = vec_mergeh(zero, lS0); /* (unsigned short[]) s[8-15] */ vu8(s0L) = vec_mergel(zero, lS0); /* (unsigned short[]) s+lx[0-7] */ vu8(s2H) = vec_mergeh(zero, lS2); /* (unsigned short[]) s+lx[8-15] */ vu8(s2L) = vec_mergel(zero, lS2); /* s[i] + s[i+lx] */ s0H = vec_add(s0H, s2H); s0L = vec_add(s0L, s2L); /* (s[i]+s[i+lx]) + 1 */ s0H = vec_add(s0H, one); s0L = vec_add(s0L, one); /* (s[i]+s[i+lx]+1) >> 1 */ s0H = vec_sra(s0H, one); s0L = vec_sra(s0L, one); /* (unsigned char[]) d[0-7], d[8-15] */ vu8(dH) = vec_packsu(s0H, s0L); vec_st(vu8(dH), 0, (unsigned char*)d); lS0 = lS2; s += lx; d += lx; } /* }}} */ }#if 0 for (j=0; j<h; j++) { for (i=0; i<w; i++) d[i] = (unsigned int)(s[i]+s[i+lx]+1)>>1; s+= lx; d+= lx; }#endif /* }}} */ } /* }}} */ } else /* !yh */ { /* (!xh && !yh) {{{ */ if (addflag) { /* {{{ */ zero = vec_splat_u8(0); one = vec_splat_u16(1); if (w == 8) { /* {{{ */ perm = vec_lvsl(0, (unsigned char*)s); if (((unsigned long)d & 0xf) == 0) { for (i = 0; i < h; i++) { lS0 = vec_ld(0, (unsigned char*)s); lS1 = vec_ld(16, (unsigned char*)s); lS0 = vec_perm(lS0, lS1, perm); lD = vec_ld(0, (unsigned char*)d); /* (unsigned short[]) s[0-7] */ vu8(s0H) = vec_mergeh(zero, lS0); /* (unsigned short[]) d[0-7] */ vu8(dH) = vec_mergeh(zero, lD); /* (unsigned short[]) d[8-15] */ vu8(dL) = vec_mergel(zero, lD); /* d[i] + s[i] */ dH = vec_add(dH, s0H); /* (d[i]+s[i]) + 1 */ dH = vec_add(dH, one); /* (d[i]+s[i]+1) >> 1 */ dH = vec_sra(dH, one); /* (unsigned char[]) d[0-7], d[8-15] */ vu8(dH) = vec_packsu(dH, dL); vec_st(vu8(dH), 0, (unsigned char*)d); s += lx; d += lx; } /* }}} */ } else { /* {{{ */ /* d is offset 8 bytes, work on dL (low bytes). */ for (i = 0; i < h; i++) { lS0 = vec_ld(0, (unsigned char*)s); lS1 = vec_ld(16, (unsigned char*)s); lS0 = vec_perm(lS0, lS1, perm); lD = vec_ld(0, (unsigned char*)d); /* (unsigned short[]) s[0-7] */ vu8(s0H) = vec_mergeh(zero, lS0); /* (unsigned short[]) d[0-7] */ vu8(dH) = vec_mergeh(zero, lD); /* (unsigned short[]) d[8-15] */ vu8(dL) = vec_mergel(zero, lD); /* d[i] + s[i] */ dL = vec_add(dL, s0H); /* (d[i]+s[i]) + 1 */ dL = vec_add(dL, one); /* (d[i]+s[i]+1) >> 1 */ dL = vec_sra(dL, one); /* (unsigned char[]) d[0-7], d[8-15] */ vu8(dH) = vec_packsu(dH, dL); vec_st(vu8(dH), 0, (unsigned char*)d); s += lx; d += lx; } /* }}} */ } } else /* w == 16 */ { /* {{{ */ perm = vec_lvsl(0, s); for (i = 0; i < h; i++) { lS0 = vec_ld(0, (unsigned char*)s); lS1 = vec_ld(16, (unsigned char*)s); lS0 = vec_perm(lS0, lS1, perm); lD = vec_ld(0, (unsigned char*)d); /* (unsigned short[]) s[0-7] */ vu8(s0H) = vec_mergeh(zero, lS0); /* (unsigned short[]) s[8-15] */ vu8(s0L) = vec_mergel(zero, lS0); /* (unsigned short[]) d[0-7] */ vu8(dH) = vec_mergeh(zero, lD); /* (unsigned short[]) d[8-15] */ vu8(dL) = vec_mergel(zero, lD); /* d[i] + s[i] */ dH = vec_add(dH, s0H); dL = vec_add(dL, s0L); /* (d[i]+s[i]) + 1 */ dH = vec_add(dH, one); dL = vec_add(dL, one); /* (d[i]+s[i]+1) >> 1 */ dH = vec_sra(dH, one); dL = vec_sra(dL, one); /* (unsigned char[]) d[0-7], d[8-15] */ vu8(dH) = vec_packsu(dH, dL); vec_st(vu8(dH), 0, (unsigned char*)d); s += lx; d += lx; } /* }}} */ }#if 0 for (j=0; j<h; j++) { for (i=0; i<w; i++) d[i] = (unsigned int)(d[i]+s[i]+1)>>1; s+= lx; d+= lx; }#endif /* }}} */ } else /* !addflag */ { /* {{{ */ if (w == 8) { /* {{{ */ /* w / 4, int size copying instead of byte size */ for (j = 0; j < h; j++) { *((unsigned int*)d) = *((unsigned int*)s); *((unsigned int*)d+1) = *((unsigned int*)s+1); s += lx; d += lx; } /* }}} */ } else /* w == 16 */ { /* {{{ */ perm = vec_lvsl(0, s); for (i = 0; i < h; i++) { lS0 = vec_ld(0, (unsigned char*)s); lS1 = vec_ld(16, (unsigned char*)s); lS0 = vec_perm(lS0, lS1, perm); vec_st(vu8(lS0), 0, (unsigned char*)d); s += lx; d += lx; } /* }}} */ } /* }}} */ } /* }}} */ } } AMBER_STOP;}#if ALTIVEC_TEST_FUNCTION(pred_comp) /* {{{ */# ifdef ALTIVEC_VERIFYvoid pred_comp_altivec_verify(PRED_COMP_PDECL){ int i, j; uint8_t *s, *d; unsigned long checksum1, checksum2; /* dst is used as input and output, it must be saved/restored */ uint8_t dstcpy[16][16]; uint8_t dc; /* save dst */ d = dst + lx*y + x; for (j = 0; j < h; j++) { for (i = 0; i < w; i++) { dstcpy[j][i] = d[i]; } d += lx; } pred_comp_altivec(PRED_COMP_ARGS); d = dst + lx*y + x; for (checksum1 = j = 0; j < h; j++) { for (i = 0; i < w; i++) { checksum1 += d[i]; } d += lx; } /* restore/swap dst & dstcpy */ d = dst + lx*y + x; for (j = 0; j < h; j++) { for (i = 0; i < w; i++) { dc = d[i]; d[i] = dstcpy[j][i]; dstcpy[j][i] = dc; } d += lx; } ALTIVEC_TEST_WITH(pred_comp)(PRED_COMP_ARGS); d = dst + lx*y + x; for (checksum2 = j = 0; j < h; j++) { for (i = 0; i < w; i++) { checksum2 += d[i]; } d += lx; } if (checksum1 != checksum2) { s = src + lx*(y+(dy>>1)) + (x+(dx>>1)); d = dst + lx*y + x; mjpeg_debug("pred_comp(" PRED_COMP_PFMT ") s=0x%X, d=0x%x", PRED_COMP_ARGS, s, d); mjpeg_debug("pred_comp: checksums differ %d != %d", checksum1, checksum2); /* d = dst + lx*y + x; */ for (j = 0; j < h; j++) { for (i = 0; i < w; i++) { if (dstcpy[j][i] != d[i]) { mjpeg_debug("pred_comp: dst[%d][%d] %d != %d", j, i, dstcpy[j][i], d[i]); } } d += lx; } }}# elseALTIVEC_TEST(pred_comp, void, (PRED_COMP_PDECL), PRED_COMP_PFMT, PRED_COMP_ARGS);# endif#endif /* }}} *//* vim:set foldmethod=marker foldlevel=0: */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -