📄 mpegvideo_altivec.c
字号:
SWAP(row5, alt5); SWAP(row6, alt6); SWAP(row7, alt7); } if (whichPass == 1) { // transpose the data for the second pass // First, block transpose the upper right with lower left. SWAP(row4, alt0); SWAP(row5, alt1); SWAP(row6, alt2); SWAP(row7, alt3); // Now, transpose each block of four TRANSPOSE4(row0, row1, row2, row3); TRANSPOSE4(row4, row5, row6, row7); TRANSPOSE4(alt0, alt1, alt2, alt3); TRANSPOSE4(alt4, alt5, alt6, alt7); } } } // used after quantise step int oldBaseValue = 0; // perform the quantise step, using the floating point data // still in the row/alt registers { const int* biasAddr; const vector signed int* qmat; vector float bias, negBias; if (s->mb_intra) { vector signed int baseVector; // We must cache element 0 in the intra case // (it needs special handling). baseVector = vec_cts(vec_splat(row0, 0), 0); vec_ste(baseVector, 0, &oldBaseValue); qmat = (vector signed int*)s->q_intra_matrix[qscale]; biasAddr = &(s->intra_quant_bias); } else { qmat = (vector signed int*)s->q_inter_matrix[qscale]; biasAddr = &(s->inter_quant_bias); } // Load the bias vector (We add 0.5 to the bias so that we're // rounding when we convert to int, instead of flooring.) { vector signed int biasInt; const vector float negOneFloat = (vector float)(FOUR_INSTANCES(-1.0f)); LOAD4(biasInt, biasAddr); bias = vec_ctf(biasInt, QUANT_BIAS_SHIFT); negBias = vec_madd(bias, negOneFloat, zero); } { vector float q0, q1, q2, q3, q4, q5, q6, q7; q0 = vec_ctf(qmat[0], QMAT_SHIFT); q1 = vec_ctf(qmat[2], QMAT_SHIFT); q2 = vec_ctf(qmat[4], QMAT_SHIFT); q3 = vec_ctf(qmat[6], QMAT_SHIFT); q4 = vec_ctf(qmat[8], QMAT_SHIFT); q5 = vec_ctf(qmat[10], QMAT_SHIFT); q6 = vec_ctf(qmat[12], QMAT_SHIFT); q7 = vec_ctf(qmat[14], QMAT_SHIFT); row0 = vec_sel(vec_madd(row0, q0, negBias), vec_madd(row0, q0, bias), vec_cmpgt(row0, zero)); row1 = vec_sel(vec_madd(row1, q1, negBias), vec_madd(row1, q1, bias), vec_cmpgt(row1, zero)); row2 = vec_sel(vec_madd(row2, q2, negBias), vec_madd(row2, q2, bias), vec_cmpgt(row2, zero)); row3 = vec_sel(vec_madd(row3, q3, negBias), vec_madd(row3, q3, bias), vec_cmpgt(row3, zero)); row4 = vec_sel(vec_madd(row4, q4, negBias), vec_madd(row4, q4, bias), vec_cmpgt(row4, zero)); row5 = vec_sel(vec_madd(row5, q5, negBias), vec_madd(row5, q5, bias), vec_cmpgt(row5, zero)); row6 = vec_sel(vec_madd(row6, q6, negBias), vec_madd(row6, q6, bias), vec_cmpgt(row6, zero)); row7 = vec_sel(vec_madd(row7, q7, negBias), vec_madd(row7, q7, bias), vec_cmpgt(row7, zero)); q0 = vec_ctf(qmat[1], QMAT_SHIFT); q1 = vec_ctf(qmat[3], QMAT_SHIFT); q2 = vec_ctf(qmat[5], QMAT_SHIFT); q3 = vec_ctf(qmat[7], QMAT_SHIFT); q4 = vec_ctf(qmat[9], QMAT_SHIFT); q5 = vec_ctf(qmat[11], QMAT_SHIFT); q6 = vec_ctf(qmat[13], QMAT_SHIFT); q7 = vec_ctf(qmat[15], QMAT_SHIFT); alt0 = vec_sel(vec_madd(alt0, q0, negBias), vec_madd(alt0, q0, bias), vec_cmpgt(alt0, zero)); alt1 = vec_sel(vec_madd(alt1, q1, negBias), vec_madd(alt1, q1, bias), vec_cmpgt(alt1, zero)); alt2 = vec_sel(vec_madd(alt2, q2, negBias), vec_madd(alt2, q2, bias), vec_cmpgt(alt2, zero)); alt3 = vec_sel(vec_madd(alt3, q3, negBias), vec_madd(alt3, q3, bias), vec_cmpgt(alt3, zero)); alt4 = vec_sel(vec_madd(alt4, q4, negBias), vec_madd(alt4, q4, bias), vec_cmpgt(alt4, zero)); alt5 = vec_sel(vec_madd(alt5, q5, negBias), vec_madd(alt5, q5, bias), vec_cmpgt(alt5, zero)); alt6 = vec_sel(vec_madd(alt6, q6, negBias), vec_madd(alt6, q6, bias), vec_cmpgt(alt6, zero)); alt7 = vec_sel(vec_madd(alt7, q7, negBias), vec_madd(alt7, q7, bias), vec_cmpgt(alt7, zero)); } } // Store the data back into the original block { vector signed short data0, data1, data2, data3, data4, data5, data6, data7; data0 = vec_pack(vec_cts(row0, 0), vec_cts(alt0, 0)); data1 = vec_pack(vec_cts(row1, 0), vec_cts(alt1, 0)); data2 = vec_pack(vec_cts(row2, 0), vec_cts(alt2, 0)); data3 = vec_pack(vec_cts(row3, 0), vec_cts(alt3, 0)); data4 = vec_pack(vec_cts(row4, 0), vec_cts(alt4, 0)); data5 = vec_pack(vec_cts(row5, 0), vec_cts(alt5, 0)); data6 = vec_pack(vec_cts(row6, 0), vec_cts(alt6, 0)); data7 = vec_pack(vec_cts(row7, 0), vec_cts(alt7, 0)); { // Clamp for overflow vector signed int max_q_int, min_q_int; vector signed short max_q, min_q; LOAD4(max_q_int, &(s->max_qcoeff)); LOAD4(min_q_int, &(s->min_qcoeff)); max_q = vec_pack(max_q_int, max_q_int); min_q = vec_pack(min_q_int, min_q_int); data0 = vec_max(vec_min(data0, max_q), min_q); data1 = vec_max(vec_min(data1, max_q), min_q); data2 = vec_max(vec_min(data2, max_q), min_q); data4 = vec_max(vec_min(data4, max_q), min_q); data5 = vec_max(vec_min(data5, max_q), min_q); data6 = vec_max(vec_min(data6, max_q), min_q); data7 = vec_max(vec_min(data7, max_q), min_q); } vector bool char zero_01, zero_23, zero_45, zero_67; vector signed char scanIndices_01, scanIndices_23, scanIndices_45, scanIndices_67; vector signed char negOne = vec_splat_s8(-1); vector signed char* scanPtr = (vector signed char*)(s->intra_scantable.inverse); // Determine the largest non-zero index. zero_01 = vec_pack(vec_cmpeq(data0, (vector short)zero), vec_cmpeq(data1, (vector short)zero)); zero_23 = vec_pack(vec_cmpeq(data2, (vector short)zero), vec_cmpeq(data3, (vector short)zero)); zero_45 = vec_pack(vec_cmpeq(data4, (vector short)zero), vec_cmpeq(data5, (vector short)zero)); zero_67 = vec_pack(vec_cmpeq(data6, (vector short)zero), vec_cmpeq(data7, (vector short)zero)); // 64 biggest values scanIndices_01 = vec_sel(scanPtr[0], negOne, zero_01); scanIndices_23 = vec_sel(scanPtr[1], negOne, zero_23); scanIndices_45 = vec_sel(scanPtr[2], negOne, zero_45); scanIndices_67 = vec_sel(scanPtr[3], negOne, zero_67); // 32 largest values scanIndices_01 = vec_max(scanIndices_01, scanIndices_23); scanIndices_45 = vec_max(scanIndices_45, scanIndices_67); // 16 largest values scanIndices_01 = vec_max(scanIndices_01, scanIndices_45); // 8 largest values scanIndices_01 = vec_max(vec_mergeh(scanIndices_01, negOne), vec_mergel(scanIndices_01, negOne)); // 4 largest values scanIndices_01 = vec_max(vec_mergeh(scanIndices_01, negOne), vec_mergel(scanIndices_01, negOne)); // 2 largest values scanIndices_01 = vec_max(vec_mergeh(scanIndices_01, negOne), vec_mergel(scanIndices_01, negOne)); // largest value scanIndices_01 = vec_max(vec_mergeh(scanIndices_01, negOne), vec_mergel(scanIndices_01, negOne)); scanIndices_01 = vec_splat(scanIndices_01, 0); signed char lastNonZeroChar; vec_ste(scanIndices_01, 0, &lastNonZeroChar); lastNonZero = lastNonZeroChar; // While the data is still in vectors we check for the transpose IDCT permute // and handle it using the vector unit if we can. This is the permute used // by the altivec idct, so it is common when using the altivec dct. if ((lastNonZero > 0) && (s->idct_permutation_type == FF_TRANSPOSE_IDCT_PERM)) { TRANSPOSE8(data0, data1, data2, data3, data4, data5, data6, data7); } vec_st(data0, 0, data); vec_st(data1, 16, data); vec_st(data2, 32, data); vec_st(data3, 48, data); vec_st(data4, 64, data); vec_st(data5, 80, data); vec_st(data6, 96, data); vec_st(data7, 112, data); } // special handling of block[0] if (s->mb_intra) { if (!s->h263_aic) { if (n < 4) oldBaseValue /= s->y_dc_scale; else oldBaseValue /= s->c_dc_scale; } // Divide by 8, rounding the result data[0] = (oldBaseValue + 4) >> 3; } // We handled the tranpose permutation above and we don't // need to permute the "no" permutation case. if ((lastNonZero > 0) && (s->idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) && (s->idct_permutation_type != FF_NO_IDCT_PERM)) { ff_block_permute(data, s->idct_permutation, s->intra_scantable.scantable, lastNonZero); } return lastNonZero;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -