📄 rle_kc.i
字号:
zigzag[16+store_idx[6]] = int(commucperm(perm_6, CrCb[send_idx[6]]));
zigzag[0+store_idx[7]] = int(commucperm(perm_7, Y2Y1[send_idx[7]]));
zigzag[8+store_idx[7]] = int(commucperm(perm_7, Y4Y3[send_idx[7]]));
zigzag[16+store_idx[7]] = int(commucperm(perm_7, CrCb[send_idx[7]]));
uc<int> j, k;
cc ac_zero;
cc all_non_zero;
int src_idx = 0;
int data_idx = 0;
int lo_idx;
int hi_idx;
int hi_mask, lo_mask;
int run_level;
half2 run;
half2 ac_val;
int lo_ac_val;
int hi_ac_val;
int run_idx = 0;
int tmp_inc;
int hi_inc;
array<int> rle_out(96);
array<int> runs(12);
uc<int> i = 12;
loop_count(i) {
rle_out[0+data_idx] = -1;
rle_out[1+data_idx] = -1;
rle_out[2+data_idx] = -1;
rle_out[3+data_idx] = -1;
data_idx = data_idx + 4;
}
half2 h2_one = 1 | half2(shift(1, 16));
lo_mask = shift(-1, -16);
hi_mask = shift(-1, 16);
data_idx = 0;
i = 3;
loop_count(i) {
lo_idx = data_idx;
hi_idx = data_idx + 8;
ac_val = half2(zigzag[src_idx]);
ac_zero = itocc(int(ac_val == 0));
lo_ac_val = int(ac_val) & lo_mask;
run_level = lo_ac_val;
rle_out[lo_idx] = run_level;
hi_ac_val = shift(int(ac_val), -16);
run_level = hi_ac_val;
rle_out[hi_idx] = run_level;
src_idx = src_idx + 1;
lo_idx = lo_idx + select(Iframe, 1, select(ac_zero, 0, 1));
tmp_inc = int(select(ac_zero, half2(0), h2_one));
hi_inc = shift(tmp_inc, -16);
hi_idx = hi_idx + select(Iframe, 1, hi_inc);
run = half2(select(Iframe, 0, int(select(ac_zero, h2_one, 0))));
j = 7;
loop_count(j) {
ac_val = half2(zigzag[src_idx]);
ac_zero = itocc(int(ac_val == 0));
run_level = (int(ac_val) & lo_mask) | shift(int(run), 16);
rle_out[lo_idx] = run_level;
run_level = shift(int(ac_val), -16) | (int(run) & hi_mask);
rle_out[hi_idx] = run_level;
run = select(ac_zero, (run + h2_one), 0);
src_idx = src_idx + 1;
lo_idx = select(ac_zero, lo_idx, lo_idx + 1);
ac_zero = itocc(shift(cctoi(ac_zero), -16));
hi_idx = select(ac_zero, hi_idx, hi_idx + 1);
}
runs[0+run_idx] = int(run) & lo_mask;
runs[1+run_idx] = shift(int(run), -16);
run_idx = run_idx + 2;
all_non_zero = itocc(lo_idx == (data_idx + 8));
rle_out[lo_idx] = select( all_non_zero, int(rle_out[lo_idx]), 0);
data_idx = data_idx + 16;
all_non_zero = itocc(hi_idx == data_idx);
rle_out[hi_idx] = select(all_non_zero, int(rle_out[hi_idx]), 0);
}
cc cluster_one;
cc invalid;
cc send_run;
cc all_zeros;
int true_cc;
int done;
int blocks_left;
int run_token;
int orig_run_level;
int new_run_level;
int block_idx;
uc<int> perm_token = 0x65432100;
cluster_one = itocc(cid() == 1);
true_cc = -1;
blocks_left = 6;
invalid = itocc(select(cluster_one, 0, true_cc));
send_run = clust_zero;
run_idx = 0;
block_idx = 0;
run_token = runs[0];
i = 12;
loop_count(i) {
orig_run_level = rle_out[block_idx];
all_zeros = itocc(select(Iframe, 1, orig_run_level) == 0);
run_token = commucperm(perm_token, run_token);
new_run_level = orig_run_level + shift(run_token, 16);
rle_out[block_idx] = select(invalid, orig_run_level,
select(all_zeros, 0, new_run_level));
blocks_left = blocks_left - 1;
done = blocks_left == 0;
run_idx = select(send_run, run_idx + 1, run_idx);
block_idx = select(invalid, block_idx, block_idx + 8);
block_idx = select(clust_zero, block_idx + 8, block_idx);
int next_run = runs[run_idx];
run_token = select(all_zeros, select(clust_zero, next_run, run_token + 8), next_run);
invalid = itocc(select(cluster_one, done, commucperm(perm_token, cctoi(invalid))));
send_run = itocc(select(clust_zero, ~done, commucperm(perm_token, cctoi(send_run))));
}
int cbp, zerorow, allzero;
cc az;
uc<int> perm_combine_1 = 0x77553311;
uc<int> perm_combine_2 = 0x66662222;
uc<int> perm_combine_3 = 0x44444444;
zerorow = (rle_out[0] == 0);
allzero = zerorow & commucperm(perm_combine_1, zerorow);
allzero = allzero & commucperm(perm_combine_2, allzero);
allzero = allzero & commucperm(perm_combine_3, allzero);
az = itocc(allzero);
cbp = select(az, 0, 32);
zerorow = (rle_out[8] == 0);
allzero = zerorow & commucperm(perm_combine_1, zerorow);
allzero = allzero & commucperm(perm_combine_2, allzero);
allzero = allzero & commucperm(perm_combine_3, allzero);
az = itocc(allzero);
cbp = cbp | select(az, 0, 16);
zerorow = (rle_out[16] == 0);
allzero = zerorow & commucperm(perm_combine_1, zerorow);
allzero = allzero & commucperm(perm_combine_2, allzero);
allzero = allzero & commucperm(perm_combine_3, allzero);
az = itocc(allzero);
cbp = cbp | select(az, 0, 8);
zerorow = (rle_out[24] == 0);
allzero = zerorow & commucperm(perm_combine_1, zerorow);
allzero = allzero & commucperm(perm_combine_2, allzero);
allzero = allzero & commucperm(perm_combine_3, allzero);
az = itocc(allzero);
cbp = cbp | select(az, 0, 4);
zerorow = (rle_out[32] == 0);
allzero = zerorow & commucperm(perm_combine_1, zerorow);
allzero = allzero & commucperm(perm_combine_2, allzero);
allzero = allzero & commucperm(perm_combine_3, allzero);
az = itocc(allzero);
cbp = cbp | select(az, 0, 2);
zerorow = (rle_out[40] == 0);
allzero = zerorow & commucperm(perm_combine_1, zerorow);
allzero = allzero & commucperm(perm_combine_2, allzero);
allzero = allzero & commucperm(perm_combine_3, allzero);
az = itocc(allzero);
cbp = cbp | select(az, 0, 1);
int idx0 = cid();
int idx1 = (idx0 - 1) & 7;
int idx2 = (idx1 - 1) & 7;
int idx3 = (idx2 - 1) & 7;
int idx4 = (idx3 - 1) & 7;
int idx5 = (idx4 - 1) & 7;
int idx6 = (idx5 - 1) & 7;
int idx7 = (idx6 - 1) & 7;
array<int> output_array(48);
output_array[0+idx0] = rle_out[0+idx0];
output_array[0+idx7] = commucperm(perm_a, rle_out[0+idx1]);
output_array[0+idx6] = commucperm(perm_b, rle_out[0+idx2]);
output_array[0+idx5] = commucperm(perm_c, rle_out[0+idx3]);
output_array[0+idx4] = commucperm(perm_d, rle_out[0+idx4]);
output_array[0+idx3] = commucperm(perm_e, rle_out[0+idx5]);
output_array[0+idx2] = commucperm(perm_f, rle_out[0+idx6]);
output_array[0+idx1] = commucperm(perm_g, rle_out[0+idx7]);
output_array[8+idx0] = rle_out[8+idx0];
output_array[8+idx7] = commucperm(perm_a, rle_out[8+idx1]);
output_array[8+idx6] = commucperm(perm_b, rle_out[8+idx2]);
output_array[8+idx5] = commucperm(perm_c, rle_out[8+idx3]);
output_array[8+idx4] = commucperm(perm_d, rle_out[8+idx4]);
output_array[8+idx3] = commucperm(perm_e, rle_out[8+idx5]);
output_array[8+idx2] = commucperm(perm_f, rle_out[8+idx6]);
output_array[8+idx1] = commucperm(perm_g, rle_out[8+idx7]);
output_array[16+idx0] = rle_out[16+idx0];
output_array[16+idx7] = commucperm(perm_a, rle_out[16+idx1]);
output_array[16+idx6] = commucperm(perm_b, rle_out[16+idx2]);
output_array[16+idx5] = commucperm(perm_c, rle_out[16+idx3]);
output_array[16+idx4] = commucperm(perm_d, rle_out[16+idx4]);
output_array[16+idx3] = commucperm(perm_e, rle_out[16+idx5]);
output_array[16+idx2] = commucperm(perm_f, rle_out[16+idx6]);
output_array[16+idx1] = commucperm(perm_g, rle_out[16+idx7]);
output_array[24+idx0] = rle_out[24+idx0];
output_array[24+idx7] = commucperm(perm_a, rle_out[24+idx1]);
output_array[24+idx6] = commucperm(perm_b, rle_out[24+idx2]);
output_array[24+idx5] = commucperm(perm_c, rle_out[24+idx3]);
output_array[24+idx4] = commucperm(perm_d, rle_out[24+idx4]);
output_array[24+idx3] = commucperm(perm_e, rle_out[24+idx5]);
output_array[24+idx2] = commucperm(perm_f, rle_out[24+idx6]);
output_array[24+idx1] = commucperm(perm_g, rle_out[24+idx7]);
output_array[32+idx0] = rle_out[32+idx0];
output_array[32+idx7] = commucperm(perm_a, rle_out[32+idx1]);
output_array[32+idx6] = commucperm(perm_b, rle_out[32+idx2]);
output_array[32+idx5] = commucperm(perm_c, rle_out[32+idx3]);
output_array[32+idx4] = commucperm(perm_d, rle_out[32+idx4]);
output_array[32+idx3] = commucperm(perm_e, rle_out[32+idx5]);
output_array[32+idx2] = commucperm(perm_f, rle_out[32+idx6]);
output_array[32+idx1] = commucperm(perm_g, rle_out[32+idx7]);
output_array[40+idx0] = rle_out[40+idx0];
output_array[40+idx7] = commucperm(perm_a, rle_out[40+idx1]);
output_array[40+idx6] = commucperm(perm_b, rle_out[40+idx2]);
output_array[40+idx5] = commucperm(perm_c, rle_out[40+idx3]);
output_array[40+idx4] = commucperm(perm_d, rle_out[40+idx4]);
output_array[40+idx3] = commucperm(perm_e, rle_out[40+idx5]);
output_array[40+idx2] = commucperm(perm_f, rle_out[40+idx6]);
output_array[40+idx1] = commucperm(perm_g, rle_out[40+idx7]);
uint qs = commclperm(ucid(), 0, quant_scale);
out(clust_zero) << half2(qs);
out(clust_zero) << half2(cbp);
int mv, bestval;
motion(Pframe, ccend) >> mv >> bestval;
out(Pframe) << mv;
i = 6;
int idx = 0;
int tmpout;
cc valid;
loop_count(i) {
tmpout = output_array[idx];
valid = itocc(cctoi(Iframe) | (tmpout > 0));
out(valid) << tmpout;
tmpout = output_array[1+idx];
valid = itocc(tmpout > 0);
out(valid) << tmpout;
tmpout = output_array[2+idx];
valid = itocc(tmpout > 0);
out(valid) << tmpout;
tmpout = output_array[3+idx];
valid = itocc(tmpout > 0);
out(valid) << tmpout;
tmpout = output_array[4+idx];
valid = itocc(tmpout > 0);
out(valid) << tmpout;
tmpout = output_array[5+idx];
valid = itocc(tmpout > 0);
out(valid) << tmpout;
tmpout = output_array[6+idx];
valid = itocc(tmpout > 0);
out(valid) << tmpout;
tmpout = output_array[7+idx];
valid = itocc(tmpout > 0);
out(valid) << tmpout;
idx = idx + 8;
out(clust_zero) << -1;
}
}
flush(out, -2);
int drain;
cc cctrue = itocc(1 == 1);
motion(cctrue, ccend) >> drain;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -