📄 pred_comp.c
字号:
} else /* w == 16 */ { /* {{{ */ perm = vec_lvsl(0, s); perm1 = vec_splat_u8(1); perm1 = vec_add(perm, perm1); lS = vec_ld(0, (unsigned char*)s); lS1 = vec_ld(16, (unsigned char*)s); lS0 = vec_perm(lS, lS1, perm); lS1 = vec_perm(lS, lS1, perm1); s += lx; for (i = 0; i < h; i++) { lS = vec_ld(0, (unsigned char*)s); lS3 = vec_ld(16, (unsigned char*)s); lS2 = vec_perm(lS, lS3, perm); lS3 = vec_perm(lS, lS3, perm1); /* (unsigned short[]) s[0-7] */ vu8(s0H) = vec_mergeh(zero, lS0); /* (unsigned short[]) s[8-15] */ vu8(s0L) = vec_mergel(zero, lS0); /* (unsigned short[]) s[1-8] */ vu8(s1H) = vec_mergeh(zero, lS1); /* (unsigned short[]) s[9-16] */ vu8(s1L) = vec_mergel(zero, lS1); /* (unsigned short[]) s+lx[0-7] */ vu8(s2H) = vec_mergeh(zero, lS2); /* (unsigned short[]) s+lx[8-15] */ vu8(s2L) = vec_mergel(zero, lS2); /* (unsigned short[]) s+lx[1-8] */ vu8(s3H) = vec_mergeh(zero, lS3); /* (unsigned short[]) s+lx[9-16] */ vu8(s3L) = vec_mergel(zero, lS3); /* s[i] + s[i+1] */ s0H = vec_add(s0H, s1H); s0L = vec_add(s0L, s1L); /* s[i+lx] + s[i+lx+1] */ s2H = vec_add(s2H, s3H); s2L = vec_add(s2L, s3L); /* (s[i]+s[i+1]) + (s[i+lx]+s[i+lx+1]) */ s0H = vec_add(s0H, s2H); s0L = vec_add(s0L, s2L); /* (s[i]+s[i+1]+s[i+lx]+s[i+lx+1]) + 2 */ s0H = vec_add(s0H, two); s0L = vec_add(s0L, two); /* (s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2) >> 2 */ s0H = vec_sra(s0H, two); s0L = vec_sra(s0L, two); /* (unsigned char[]) d[0-7], d[8-15] */ vu8(dH) = vec_packsu(s0H, s0L); vec_st(vu8(dH), 0, (unsigned char*)d); lS0 = lS2; lS1 = lS3; s += lx; d += lx; } /* }}} */ }#if 0 for (j=0; j<h; j++) { for (i=0; i<w; i++) d[i] = (unsigned int)(s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2)>>2; s+= lx; d+= lx; }#endif /* }}} */ } /* }}} */ } else /* !yh */ { /* (xh && !yh) {{{ */ if (addflag) { /* {{{ */ zero = vec_splat_u8(0); one = vec_splat_u16(1); if (w == 8) { /* {{{ */ eight = vec_splat_u8(8); perm = vec_lvsl(0, (unsigned char*)s); if (((unsigned long)d & 0xf) == 0) { for (i = 0; i < h; i++) { lS0 = vec_ld(0, (unsigned char*)s); lS1 = vec_ld(16, (unsigned char*)s); lS0 = vec_perm(lS0, lS1, perm); lS1 = vec_slo(lS0, eight); lD = vec_ld(0, (unsigned char*)d); /* (unsigned short[]) s[0-7] */ vu8(s0H) = vec_mergeh(zero, lS0); /* (unsigned short[]) s[1-8] */ vu8(s1H) = vec_mergeh(zero, lS1); /* (unsigned short[]) d[0-7] */ vu8(dH) = vec_mergeh(zero, lD); /* (unsigned short[]) d[8-15] */ vu8(dL) = vec_mergel(zero, lD); /* s[i] + s[i+1] */ s0H = vec_add(s0H, s1H); /* (s[i]+s[i+1]) + 1 */ s0H = vec_add(s0H, one); /* (s[i]+s[i+1]+1) >> 1 */ s0H = vec_sra(s0H, one); /* d[i] + ((s[i]+s[i+1]+1)>>1) */ dH = vec_add(dH, s0H); /* (d[i]+((s[i]+s[i+1]+1)>>1)) + 1 */ dH = vec_add(dH, one); /* ((d[i]+((s[i]+s[i+1]+1)>>1))+1) >> 1 */ dH = vec_sra(dH, one); /* (unsigned char[]) d[0-7], d[8-15] */ vu8(dH) = vec_packsu(dH, dL); vec_st(vu8(dH), 0, (unsigned char*)d); s += lx; d += lx; } /* }}} */ } else { /* {{{ */ /* d is offset 8 bytes, work on dL (low bytes). */ for (i = 0; i < h; i++) { lS0 = vec_ld(0, (unsigned char*)s); lS1 = vec_ld(16, (unsigned char*)s); lS0 = vec_perm(lS0, lS1, perm); lS1 = vec_slo(lS0, eight); lD = vec_ld(0, (unsigned char*)d); /* (unsigned short[]) s[0-7] */ vu8(s0H) = vec_mergeh(zero, lS0); /* (unsigned short[]) s[1-8] */ vu8(s1H) = vec_mergeh(zero, lS1); /* (unsigned short[]) d[8-15] */ vu8(dL) = vec_mergel(zero, lD); /* (unsigned short[]) d[0-7] */ vu8(dH) = vec_mergeh(zero, lD); /* s[i] + s[i+1] */ s0H = vec_add(s0H, s1H); /* (s[i]+s[i+1]) + 1 */ s0H = vec_add(s0H, one); /* (s[i]+s[i+1]+1) >> 1 */ s0H = vec_sra(s0H, one); /* d[i] + ((s[i]+s[i+1]+1)>>1) */ dL = vec_add(dL, s0H); /* (d[i]+((s[i]+s[i+1]+1)>>1)) + 1 */ dL = vec_add(dL, one); /* ((d[i]+((s[i]+s[i+1]+1)>>1))+1) >> 1 */ dL = vec_sra(dL, one); /* (unsigned char[]) d[0-7], d[8-15] */ vu8(dH) = vec_packsu(dH, dL); vec_st(vu8(dH), 0, (unsigned char*)d); s += lx; d += lx; } /* }}} */ } } else /* w == 16 */ { /* {{{ */ perm = vec_lvsl(0, s); perm1 = vec_splat_u8(1); perm1 = vec_add(perm, perm1); for (i = 0; i < h; i++) { lS = vec_ld(0, (unsigned char*)s); lS1 = vec_ld(16, (unsigned char*)s); lS0 = vec_perm(lS, lS1, perm); lS1 = vec_perm(lS, lS1, perm1); lD = vec_ld(0, (unsigned char*)d); /* (unsigned short[]) s[0-7] */ vu8(s0H) = vec_mergeh(zero, lS0); /* (unsigned short[]) s[8-15] */ vu8(s0L) = vec_mergel(zero, lS0); /* (unsigned short[]) s[1-8] */ vu8(s1H) = vec_mergeh(zero, lS1); /* (unsigned short[]) s[9-16] */ vu8(s1L) = vec_mergel(zero, lS1); /* (unsigned short[]) d[0-7] */ vu8(dH) = vec_mergeh(zero, lD); /* (unsigned short[]) d[8-15] */ vu8(dL) = vec_mergel(zero, lD); /* s[i] + s[i+1] */ s0H = vec_add(s0H, s1H); s0L = vec_add(s0L, s1L); /* (s[i]+s[i+1]) + 1 */ s0H = vec_add(s0H, one); s0L = vec_add(s0L, one); /* (s[i]+s[i+1]+1) >> 1 */ s0H = vec_sra(s0H, one); s0L = vec_sra(s0L, one); /* d[i] + ((s[i]+s[i+1]+1)>>1) */ dH = vec_add(dH, s0H); dL = vec_add(dL, s0L); /* (d[i]+((s[i]+s[i+1]+1)>>1)) + 1 */ dH = vec_add(dH, one); dL = vec_add(dL, one); /* ((d[i]+((s[i]+s[i+1]+1)>>1))+1) >> 1 */ dH = vec_sra(dH, one); dL = vec_sra(dL, one); /* (unsigned char[]) d[0-7], d[8-15] */ vu8(dH) = vec_packsu(dH, dL); vec_st(vu8(dH), 0, (unsigned char*)d); s += lx; d += lx; }#if 0 for (j=0; j<h; j++) { for (i=0; i<w; i++) d[i] = (d[i] + ((unsigned int)(s[i]+s[i+1]+1)>>1)+1)>>1; s += lx; d += lx; }#endif /* }}} */ } /* }}} */ } else /* !addflag */ { /* {{{ */ zero = vec_splat_u8(0); one = vec_splat_u16(1); if (w == 8) { /* {{{ */ eight = vec_splat_u8(8); perm = vec_lvsl(0, (unsigned char*)s); if (((unsigned long)d & 0xf) == 0) { /* {{{ */ for (i = 0; i < h; i++) { lS0 = vec_ld(0, (unsigned char*)s); lS1 = vec_ld(16, (unsigned char*)s); lS0 = vec_perm(lS0, lS1, perm); lS1 = vec_slo(lS0, eight); lD = vec_ld(0, (unsigned char*)d); /* (unsigned short[]) s[0-7] */ vu8(s0H) = vec_mergeh(zero, lS0); /* (unsigned short[]) s[1-8] */ vu8(s1H) = vec_mergeh(zero, lS1); /* (unsigned short[]) d[8-15] */ vu8(dL) = vec_mergel(zero, lD); /* s[i] + s[i+1] */ s0H = vec_add(s0H, s1H); /* (s[i]+s[i+1]) + 1 */ s0H = vec_add(s0H, one); /* (s[i]+s[i+1]+1) >> 1 */ s0H = vec_sra(s0H, one); /* (unsigned char[]) d[0-7], d[8-15] */ vu8(dH) = vec_packsu(s0H, dL); vec_st(vu8(dH), 0, (unsigned char*)d); s += lx; d += lx; } /* }}} */ } else { /* {{{ */ /* d is offset 8 bytes, work on dL (low bytes). */ for (i = 0; i < h; i++) { lS0 = vec_ld(0, (unsigned char*)s); lS1 = vec_ld(16, (unsigned char*)s); lS0 = vec_perm(lS0, lS1, perm); lS1 = vec_slo(lS0, eight); lD = vec_ld(0, (unsigned char*)d); /* (unsigned short[]) s[0-7] */ vu8(s0H) = vec_mergeh(zero, lS0); /* (unsigned short[]) s[1-8] */ vu8(s1H) = vec_mergeh(zero, lS1); /* (unsigned short[]) d[0-7] */ vu8(dH) = vec_mergeh(zero, lD); /* s[i] + s[i+1] */ s0H = vec_add(s0H, s1H); /* (s[i]+s[i+1]) + 1 */ s0H = vec_add(s0H, one); /* (s[i]+s[i+1]+1) >> 1 */ s0H = vec_sra(s0H, one); /* (unsigned char[]) d[0-7], d[8-15] */ vu8(dH) = vec_packsu(dH, s0H); vec_st(vu8(dH), 0, (unsigned char*)d); s += lx; d += lx; } /* }}} */ } /* }}} */ } else /* w == 16 */ { /* {{{ */ perm = vec_lvsl(0, s); perm1 = vec_splat_u8(1); perm1 = vec_add(perm, perm1); for (i = 0; i < h; i++) { lS = vec_ld(0, (unsigned char*)s); lS1 = vec_ld(16, (unsigned char*)s); lS0 = vec_perm(lS, lS1, perm); lS1 = vec_perm(lS, lS1, perm1); /* (unsigned short[]) s[0-7] */ vu8(s0H) = vec_mergeh(zero, lS0); /* (unsigned short[]) s[8-15] */ vu8(s0L) = vec_mergel(zero, lS0); /* (unsigned short[]) s[1-8] */ vu8(s1H) = vec_mergeh(zero, lS1); /* (unsigned short[]) s[9-16] */ vu8(s1L) = vec_mergel(zero, lS1); /* s[i] + s[i+1] */ s0H = vec_add(s0H, s1H); s0L = vec_add(s0L, s1L); /* (s[i]+s[i+1]) + 1 */ s0H = vec_add(s0H, one); s0L = vec_add(s0L, one); /* (s[i]+s[i+1]+1) >> 1 */ s0H = vec_sra(s0H, one); s0L = vec_sra(s0L, one); /* (unsigned char[]) d[0-7], d[8-15] */ vu8(dH) = vec_packsu(s0H, s0L); vec_st(vu8(dH), 0, (unsigned char*)d); s += lx; d += lx; } /* }}} */ }#if 0 for (j=0; j<h; j++) { for (i=0; i<w; i++) d[i] = (unsigned int)(s[i]+s[i+1]+1)>>1; s+= lx; d+= lx; }#endif /* }}} */ } /* }}} */ } } else /* !xh */ { if (yh) { /* (!xh && yh) {{{ */ if (addflag) { /* {{{ */ zero = vec_splat_u8(0); one = vec_splat_u16(1); if (w == 8) { /* {{{ */ perm = vec_lvsl(0, (unsigned char*)s); if (((unsigned long)d & 0xf) == 0) { /* {{{ */ lS0 = vec_ld(0, (unsigned char*)s); lS1 = vec_ld(16, (unsigned char*)s); lS0 = vec_perm(lS0, lS1, perm); s += lx; for (i = 0; i < h; i++) { lS2 = vec_ld(0, (unsigned char*)s); lS3 = vec_ld(16, (unsigned char*)s); lS2 = vec_perm(lS2, lS3, perm); lD = vec_ld(0, (unsigned char*)d); /* (unsigned short[]) s[0-7] */ vu8(s0H) = vec_mergeh(zero, lS0); /* (unsigned short[]) s+lx[0-7] */ vu8(s2H) = vec_mergeh(zero, lS2); /* (unsigned short[]) d[0-7] */ vu8(dH) = vec_mergeh(zero, lD); /* (unsigned short[]) d[8-15] */ vu8(dL) = vec_mergel(zero, lD); /* s[i] + s[i+lx] */ s0H = vec_add(s0H, s2H); /* (s[i]+s[i+lx]) + 1 */ s0H = vec_add(s0H, one); /* (s[i]+s[i+lx]+1) >> 1 */ s0H = vec_sra(s0H, one); /* d[i] + ((s[i]+s[i+lx]+1)>>1) */ dH = vec_add(dH, s0H); /* (d[i]+((s[i]+s[i+lx]+1)>>1)) + 1 */ dH = vec_add(dH, one); /* ((d[i]+((s[i]+s[i+lx]+1)>>1))+1) >> 1 */ dH = vec_sra(dH, one); /* (unsigned char[]) d[0-7], d[8-15] */ vu8(dH) = vec_packsu(dH, dL); vec_st(vu8(dH), 0, (unsigned char*)d); lS0 = lS2; s += lx; d += lx; } /* }}} */ } else { /* {{{ */ /* d is offset 8 bytes, work on dL (low bytes). */ lS0 = vec_ld(0, (unsigned char*)s); lS1 = vec_ld(16, (unsigned char*)s); lS0 = vec_perm(lS0, lS1, perm); s += lx; for (i = 0; i < h; i++) { lS2 = vec_ld(0, (unsigned char*)s); lS3 = vec_ld(16, (unsigned char*)s); lS2 = vec_perm(lS2, lS3, perm); lD = vec_ld(0, (unsigned char*)d); /* (unsigned short[]) s[0-7] */ vu8(s0H) = vec_mergeh(zero, lS0); /* (unsigned short[]) s+lx[0-7] */ vu8(s2H) = vec_mergeh(zero, lS2); /* (unsigned short[]) d[0-7] */ vu8(dH) = vec_mergeh(zero, lD); /* (unsigned short[]) d[8-15] */ vu8(dL) = vec_mergel(zero, lD); /* s[i] + s[i+lx] */ s0H = vec_add(s0H, s2H); /* (s[i]+s[i+lx]) + 1 */ s0H = vec_add(s0H, one);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -