📄 build_sub44_mests.c
字号:
while (i--) { int weight = *psad; psad++; if (SKIM(weight, threshold)) { UPDATE_THRESHOLD(weight,threshold); mres.weight = (uint16_t)(weight + DISTANCE_PENALTY(x,y)); mres.x = (int8_t)x; *cres = mres; cres++; } x += 4; } currowblk += rowstride; vx0y0 = vec_sld(vx0y1, vx0y1, 0); /* vx0y0 = vx0y1 (VPU) */ vx16y0 = vec_or(vx16y1, vx16y1); /* vx16y0 = vx16y1 (VALU) */ y += 4; } while (--j); /* }}} */ } else /* h == 4 */ { /* {{{ */ vector unsigned char vr2, vr3; vector unsigned char vx0y2, vx16y2, vx0y3, vx16y3; int rowstride2, rowstride3; rowstride2 = rowstride + rowstride; vr2 = vec_ld(rowstride2, (unsigned char*)s44blk); rowstride3 = rowstride2 + rowstride; vr3 = vec_ld(rowstride3, (unsigned char*)s44blk); vr2 = vec_perm(vr2, vr2, t1); vr3 = vec_perm(vr3, vr3, t1); curblk = currowblk + rowstride; vx0y1 = vec_ld(0, (unsigned char*)curblk); vx16y1 = vec_ld(x16, (unsigned char*)curblk); curblk += rowstride; vx0y2 = vec_ld(0, (unsigned char*)curblk); vx16y2 = vec_ld(x16, (unsigned char*)curblk); nextrowblk = curblk + rowstride; do { vx0y3 = vec_ld(0, (unsigned char*)nextrowblk); vx16y3 = vec_ld(x16, (unsigned char*)nextrowblk); nextrowblk += rowstride;#ifdef ALTIVEC_DST vec_dst(nextrowblk, dsc.control, 0);#endif shifter = vec_add(shift, perm); psad = psads = (unsigned int*)(((unsigned long)cres + 15) & (~0xf)); /* calculating sads in the X direction 4 at a time. */ i = xl1; do { sads = vec_splat_s32(0); t1 = vec_perm(vx0y0, vx16y0, shifter); t2 = vec_max(t1, vr0); /* find largest of two */ t3 = vec_min(t1, vr0); /* find smaller of two */ t3 = vec_sub(t2, t3); /* find absolute difference */ vu32(sads) = vec_sum4s(t3, vu32(sads)); t1 = vec_perm(vx0y1, vx16y1, shifter); t2 = vec_max(t1, vr1); t3 = vec_min(t1, vr1); t3 = vec_sub(t2, t3); vu32(sads) = vec_sum4s(t3, vu32(sads)); t1 = vec_perm(vx0y2, vx16y2, shifter); t2 = vec_max(t1, vr2); t3 = vec_min(t1, vr2); t3 = vec_sub(t2, t3); vu32(sads) = vec_sum4s(t3, vu32(sads)); t1 = vec_perm(vx0y3, vx16y3, shifter); t2 = vec_max(t1, vr3); t3 = vec_min(t1, vr3); t3 = vec_sub(t2, t3); vu32(sads) = vec_sum4s(t3, vu32(sads)); vec_st(vu32(sads), 0, psad); psad += 4; /* increment permute for next iteration */ shifter = vec_add(shifter, increment); } while (--i); if (xl2) { vector unsigned char vn0y0, vn16y0, vn0y1, vn16y1; vector unsigned char vn0y2, vn16y2, vn0y3, vn16y3; int i2, i3; curblk = currowblk + 16; /* update to current pointer */ vn16y0 = vec_sld(vx16y0, vx16y0, 0); vn16y1 = vec_or(vx16y1, vx16y1); vn16y2 = vec_sld(vx16y2, vx16y2, 0); vn16y3 = vec_or(vx16y3, vx16y3); i = xl2; i2 = xl3; do { curblk += 16; /* update to next pointer */ vn0y0 = vec_sld(vn16y0, vn16y0, 0); vn16y0 = vec_ld(0, (unsigned char*)curblk); vn0y1 = vec_or(vn16y1, vn16y1); vn16y1 = vec_ld(rowstride, (unsigned char*)curblk); vn0y2 = vec_sld(vn16y2, vn16y2, 0); vn16y2 = vec_ld(rowstride2, (unsigned char*)curblk); vn0y3 = vec_or(vn16y3, vn16y3); vn16y3 = vec_ld(rowstride3, (unsigned char*)curblk); shifter = vec_add(shift, perm); i3 = i2 & 0x7; i2 >>= 3; do { sads = vec_splat_s32(0); t1 = vec_perm(vn0y0, vn16y0, shifter); t2 = vec_max(t1, vr0); /* find largest of two */ t3 = vec_min(t1, vr0); /* find smaller of two */ t3 = vec_sub(t2, t3); /* find absolute difference */ vu32(sads) = vec_sum4s(t3, vu32(sads)); t1 = vec_perm(vn0y1, vn16y1, shifter); t2 = vec_max(t1, vr1); t3 = vec_min(t1, vr1); t3 = vec_sub(t2, t3); vu32(sads) = vec_sum4s(t3, vu32(sads)); t1 = vec_perm(vn0y2, vn16y2, shifter); t2 = vec_max(t1, vr2); t3 = vec_min(t1, vr2); t3 = vec_sub(t2, t3); vu32(sads) = vec_sum4s(t3, vu32(sads)); t1 = vec_perm(vn0y3, vn16y3, shifter); t2 = vec_max(t1, vr3); t3 = vec_min(t1, vr3); t3 = vec_sub(t2, t3); vu32(sads) = vec_sum4s(t3, vu32(sads)); vec_st(vu32(sads), 0, psad); psad += 4; /* increment permute for next iteration */ shifter = vec_add(shifter, increment); } while (--i3); } while (--i); }#ifdef ALTIVEC_VERIFY VERIFY_SADS(currowblk, s44blk, rowstride, h, psads, xl);#endif psad = psads; mres.y = (int8_t)y; x = xlow; i = xl >> 2; while (i--) { int w0, w1, w2, w3, tx; w0 = *psad; psad++; w1 = *psad; psad++; w2 = *psad; psad++; w3 = *psad; psad++; if (SKIM(w0, threshold)) { UPDATE_THRESHOLD(w0,threshold); mres.weight = (uint16_t)(w0 + DISTANCE_PENALTY(x,y)); mres.x = (int8_t)x; *cres = mres; cres++; } if (SKIM(w1, threshold)) { UPDATE_THRESHOLD(w1,threshold); tx = x + 4; mres.weight = (uint16_t)(w1 + DISTANCE_PENALTY(tx,y)); mres.x = (int8_t)tx; *cres = mres; cres++; } if (SKIM(w2, threshold)) { UPDATE_THRESHOLD(w2,threshold); tx = x + 8; mres.weight = (uint16_t)(w2 + DISTANCE_PENALTY(tx,y)); mres.x = (int8_t)tx; *cres = mres; cres++; } if (SKIM(w3, threshold)) { UPDATE_THRESHOLD(w3,threshold); tx = x + 12; mres.weight = (uint16_t)(w3 + DISTANCE_PENALTY(tx,y)); mres.x = (int8_t)tx; *cres = mres; cres++; } x += 16; } i = xl & 0x3; while (i--) { int weight = *psad; psad++; if (SKIM(weight, threshold)) { UPDATE_THRESHOLD(weight,threshold); mres.weight = (uint16_t)(weight + DISTANCE_PENALTY(x,y)); mres.x = (int8_t)x; *cres = mres; cres++; } x += 4; } currowblk += rowstride; vx0y0 = vec_sld(vx0y1, vx0y1, 0); /* vx0y0 = vx0y1 (VPU) */ vx16y0 = vec_or(vx16y1, vx16y1); /* vx16y0 = vx16y1 (VALU) */ vx0y1 = vec_sld(vx0y2, vx0y2, 0); /* vx0y1 = vx0y2 (VPU) */ vx16y1 = vec_or(vx16y2, vx16y2); /* vx16y1 = vx16y2 (VALU) */ vx0y2 = vec_sld(vx0y3, vx0y3, 0); /* vx0y2 = vx0y3 (VPU) */ vx16y2 = vec_or(vx16y3, vx16y3); /* vx16y2 = vx16y3 (VALU) */ y += 4; } while (--j); /* }}} */ }#ifdef ALTIVEC_DST vec_dss(0);#endif /* sub44set->len = cres - sub44set->mests; */ xl = cres - sub44set->mests; sub44set->len = xl;#ifdef AMBER_ENABLE if (stop_amber) { AMBER_STOP; }#endif#ifdef USE_SMR_PPC if (xl > 1) xl = sub_mean_reduction_ppc(xl, sub44set, 1+(reduction > 1)); return xl;#else# if ALTIVEC_TEST_FUNCTION(sub_mean_reduction) ALTIVEC_TEST_SUFFIX(sub_mean_reduction)(sub44set, 1+(reduction>1), &mean_weight);# else ALTIVEC_SUFFIX(sub_mean_reduction)(sub44set, 1+(reduction>1), &mean_weight);# endif return sub44set->len;#endif}#if 0 /* build_sub44_mests_altivec_test {{{ */int build_sub44_mests_altivec_test(me_result_set *sub44set, int ilow, int jlow, int ihigh, int jhigh, int i0, int j0, int null_ctl_sad, uint8_t *s44org, uint8_t *s44blk, int qrowstride, int qh, int reduction){ uint8_t *s44orgblk; me_result_s *sub44_mests = sub44set->mests; int istrt = ilow-i0; int jstrt = jlow-j0; int iend = ihigh-i0; int jend = jhigh-j0; int mean_weight; int threshold; int i,j; int s1; uint8_t *old_s44orgblk; int sub44_num_mests; threshold = 6*null_ctl_sad / (4*4*reduction); s44orgblk = s44org+(ilow>>2)+qrowstride*(jlow>>2); sub44_num_mests = 0; s44orgblk = s44org+(ilow>>2)+qrowstride*(jlow>>2); for (j = jstrt; j <= jend; j += 4) { old_s44orgblk = s44orgblk; for (i = istrt; i <= iend; i += 4) { s1 = ((*psad_sub44)( s44orgblk,s44blk,qrowstride,qh) & 0xffff);#ifdef THRESHOLD if (s1 < threshold) { threshold = intmin(s1<<2,threshold);#endif sub44_mests[sub44_num_mests].x = i; sub44_mests[sub44_num_mests].y = j; sub44_mests[sub44_num_mests].weight = s1 + DISTANCE_PENALTY(i,j); ++sub44_num_mests;#ifdef THRESHOLD }#endif s44orgblk += 1; } s44orgblk = old_s44orgblk + qrowstride; } sub44set->len = sub44_num_mests;#if 0 sub_mean_reduction(sub44set, 1+(reduction>1), &mean_weight);#endif return sub44set->len;}#endif /* }}} */#if ALTIVEC_TEST_FUNCTION(build_sub44_mests) /* {{{ */#define BUILD_SUB44_MESTS_PFMT \ "sub44set=0x%X, ilow=%d, jlow=%d, ihigh=%d, jhigh=%d, i0=%d, j0=%d, " \ "null_ctl_sad=%d, s44org=0x%X, s44blk=0x%X, qrowstride=%d, qh=%d, " \ "reduction=%d" # ifdef ALTIVEC_VERIFYint build_sub44_mests_altivec_verify(BUILD_SUB44_MESTS_PDECL){ int i, len1, len2; unsigned long checksum1, checksum2; len1 = _build_sub44_mests_altivec(BUILD_SUB44_MESTS_ARGS, 1 /*verify*/); for (checksum1 = i = 0; i < len1; i++) { checksum1 += sub44set->mests[i].weight; checksum1 += abs(sub44set->mests[i].x); checksum1 += abs(sub44set->mests[i].y); } len2 = ALTIVEC_TEST_WITH(build_sub44_mests)(BUILD_SUB44_MESTS_ARGS); for (checksum2 = i = 0; i < len2; i++) { checksum2 += sub44set->mests[i].weight; checksum2 += abs(sub44set->mests[i].x); checksum2 += abs(sub44set->mests[i].y); } if (len1 != len2 || checksum1 != checksum2) { mjpeg_debug("build_sub44_mests(" BUILD_SUB44_MESTS_PFMT ")", BUILD_SUB44_MESTS_ARGS); mjpeg_debug("build_sub44_mests: checksums differ %d[%d] != %d[%d]", checksum1, len1, checksum2, len2);#if 1 len1 = _build_sub44_mests_altivec(BUILD_SUB44_MESTS_ARGS, 0 /*verify*/); for (i = 0; i < len1; i++) { mjpeg_debug("A: %3d, %3d, %5d", sub44set->mests[i].x, sub44set->mests[i].y, sub44set->mests[i].weight); } len2 = ALTIVEC_TEST_WITH(build_sub44_mests)(BUILD_SUB44_MESTS_ARGS); for (i = 0; i < len2; i++) { mjpeg_debug("C: %3d, %3d, %5d", sub44set->mests[i].x, sub44set->mests[i].y, sub44set->mests[i].weight); }#endif } return len2;}static void verify_sads(unsigned char *orgblk, unsigned char* s44blk, int rowstride, int h, unsigned int *sads, int count){ unsigned int i, weight, cweight; unsigned char *pblk; pblk = orgblk; for (i = 0; i < count; i++) { weight = sads[i]; /* pblk = orgblk + (rowstride * i); */#if ALTIVEC_TEST_FUNCTION(sad_sub44) cweight = ALTIVEC_TEST_WITH(sad_sub44)(pblk,s44blk,rowstride,h) & 0xffff;#else cweight = sad_sub44(pblk,s44blk,rowstride,h) & 0xffff;#endif if (weight != cweight) mjpeg_debug("build_sub44_mests: %d != %d=" "sad_sub44(blk1=0x%X, blk2=0x%X, rowstride=%d, h=%d)", weight, cweight, pblk, s44blk, rowstride, h); pblk++; }}# else#undef BENCHMARK_FREQUENCY#define BENCHMARK_FREQUENCY 543 /* benchmark every (n) calls */#undef BENCHMARK_EPILOG#define BENCHMARK_EPILOG \ mjpeg_info("build_sub44_mests: (ihigh-ilow)/4+1=%d, (jhigh-jlow)/4+1=%d", \ (ihigh-ilow)/4+1, (jhigh-jlow)/4+1);ALTIVEC_TEST(build_sub44_mests, int, (BUILD_SUB44_MESTS_PDECL), BUILD_SUB44_MESTS_PFMT, BUILD_SUB44_MESTS_ARGS);# endif#endif /* }}} *//* vim:set foldmethod=marker foldlevel=0: */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -