📄 deblock_luma_kc.sc
字号:
top1_32_1 = spi_vselect8((vec uint8x4)cid_b0, top1_10, top1_32); top1_10_1 = spi_vselect8((vec uint8x4)cid_b0, top1_32, top1_10); top0 = (vec uint8x4)spi_vshuffleu(0x02000604, (vec uint32x1)top0_32_1, (vec uint32x1)top0_10_1); top1 = (vec uint8x4)spi_vshuffleu(0x02000604, (vec uint32x1)top1_32_1, (vec uint32x1)top1_10_1); // Swap tops with spi_laneid() + 8 top0 = (vec uint8x4)spi_vperm32(perm_b3, (vec uint32x1)top0, 0); top1 = (vec uint8x4)spi_vperm32(perm_b3, (vec uint32x1)top1, 0); //Load MB from top strip spi_array_read(in_out_frame_top, top0_0, in_out_idx); spi_array_read(in_out_frame_top, top1_0, in_out_idx+1U); spi_array_read(in_out_frame_top, top2_0, in_out_idx+2U); spi_array_read(in_out_frame_top, top3_0, in_out_idx+3U); spi_array_read(in_out_frame_top, top0_1, in_out_idx+pitch); spi_array_read(in_out_frame_top, top1_1, in_out_idx+pitch+1U); spi_array_read(in_out_frame_top, top2_1, in_out_idx+pitch+2U); spi_array_read(in_out_frame_top, top3_1, in_out_idx+pitch+3U); //Load MB from current strip spi_array_read(in_frame, cur0_0, in_idx); spi_array_read(in_frame, cur0_1, in_idx + (vec uint32x1)1); spi_array_read(in_frame, cur1_0, in_idx + (vec uint32x1)2); spi_array_read(in_frame, cur1_1, in_idx + (vec uint32x1)3); spi_array_read(in_frame, cur2_0, in_idx + (vec uint32x1)4); spi_array_read(in_frame, cur2_1, in_idx + (vec uint32x1)5); spi_array_read(in_frame, cur3_0, in_idx + (vec uint32x1)6); spi_array_read(in_frame, cur3_1, in_idx + (vec uint32x1)7); // For clusters 0-7, current MB strip bottom pixels will // be upated and for clusters 8-15, top MB strip bottom // pixels will be updated. // Update the pixels pud0_0 = (vec uint8x4)spi_vperm32(perm_cid_a9, (vec uint32x1)top0, 0); pud0_1 = (vec uint8x4)spi_vperm32(perm_cid_a9, (vec uint32x1)top1, 0); pud1_0 = (vec uint8x4)spi_vperm32(perm_cid_aB, (vec uint32x1)top0, 0); pud1_1 = (vec uint8x4)spi_vperm32(perm_cid_aB, (vec uint32x1)top1, 0); pud2_0 = (vec uint8x4)spi_vperm32(perm_cid_aD, (vec uint32x1)top0, 0); pud2_1 = (vec uint8x4)spi_vperm32(perm_cid_aD, (vec uint32x1)top1, 0); pud3_0 = top0; pud3_1 = top1; cur0_0 = spi_vselect8((vec uint8x4)cid_6_7, pud0_0, cur0_0); cur0_1 = spi_vselect8((vec uint8x4)cid_6_7, pud0_1, cur0_1); cur1_0 = spi_vselect8((vec uint8x4)cid_6_7, pud1_0, cur1_0); cur1_1 = spi_vselect8((vec uint8x4)cid_6_7, pud1_1, cur1_1); cur2_0 = spi_vselect8((vec uint8x4)cid_6_7, pud2_0, cur2_0); cur2_1 = spi_vselect8((vec uint8x4)cid_6_7, pud2_1, cur2_1); cur3_0 = spi_vselect8((vec uint8x4)cid_6_7, pud3_0, cur3_0); cur3_1 = spi_vselect8((vec uint8x4)cid_6_7, pud3_1, cur3_1); top0_0 = spi_vselect8((vec uint8x4)cid_14_15, pud0_0, top0_0); top0_1 = spi_vselect8((vec uint8x4)cid_14_15, pud0_1, top0_1); top1_0 = spi_vselect8((vec uint8x4)cid_14_15, pud1_0, top1_0); top1_1 = spi_vselect8((vec uint8x4)cid_14_15, pud1_1, top1_1); top2_0 = spi_vselect8((vec uint8x4)cid_14_15, pud2_0, top2_0); top2_1 = spi_vselect8((vec uint8x4)cid_14_15, pud2_1, top2_1); top3_0 = spi_vselect8((vec uint8x4)cid_14_15, pud3_0, top3_0); top3_1 = spi_vselect8((vec uint8x4)cid_14_15, pud3_1, top3_1); // Padding and Colflating //spi_array_read(in_out_frame_top, left_pad_0, 4); //spi_array_read(in_out_frame_top, left_pad_1, 4U + pitch); left_pad_0 = top0_0; left_pad_1 = top0_1; left_pad_0 = (vec uint8x4)spi_vshuffledu_lo(0x40404040, left_pad_0, left_pad_1); left_pad_1 = (vec uint8x4)spi_vshuffledu_hi(0x40404040, left_pad_0, left_pad_1); spi_array_write(in_out_frame_top, left_pad_0, 0); spi_array_write(in_out_frame_top, left_pad_0, 1); spi_array_write(in_out_frame_top, left_pad_0, 2); spi_array_write(in_out_frame_top, left_pad_0, 3); spi_array_write(in_out_frame_top, left_pad_1, pitch); spi_array_write(in_out_frame_top, left_pad_1, pitch+1U); spi_array_write(in_out_frame_top, left_pad_1, pitch+2U); spi_array_write(in_out_frame_top, left_pad_1, pitch+3U); //spi_array_read(out_frame, left_pad_0, 4); //spi_array_read(out_frame, left_pad_1, 4U + pitch); left_pad_0 = cur0_0; left_pad_1 = cur0_1; left_pad_0 = (vec uint8x4)spi_vshuffledu_lo(0x40404040, left_pad_0, left_pad_1); left_pad_1 = (vec uint8x4)spi_vshuffledu_hi(0x40404040, left_pad_0, left_pad_1); spi_array_write(out_frame, left_pad_0, 0); spi_array_write(out_frame, left_pad_0, 1); spi_array_write(out_frame, left_pad_0, 2); spi_array_write(out_frame, left_pad_0, 3); spi_array_write(out_frame, left_pad_1, pitch); spi_array_write(out_frame, left_pad_1, pitch+1U); spi_array_write(out_frame, left_pad_1, pitch+2U); spi_array_write(out_frame, left_pad_1, pitch+3U); } while(no_of_iter){#pragma pipeline // Load top intermeidates spi_array_read(in_frame/*_top_inter*/, topa_0, in_fti_idx); spi_array_read(in_frame/*_top_inter*/, topa_1, in_fti_idx + (vec uint32x1)1); spi_array_read(in_frame/*_top_inter*/, topa_2, in_fti_idx + (vec uint32x1)2); spi_array_read(in_frame/*_top_inter*/, topa_3, in_fti_idx + (vec uint32x1)3); // Each cluster has 4 lines of top MB (2 pixels per row), re // arrange the pixels such that each cluster will have 2 full // rows (4pixels) to write out. Swap data with neighboring // clusters. top0_32 = (vec uint8x4)spi_vperm32(perm_b0, (vec uint32x1)spi_vselect8((vec uint8x4)cid_b0, topa_0, topa_2), 0); top1_32 = (vec uint8x4)spi_vperm32(perm_b0, (vec uint32x1)spi_vselect8((vec uint8x4)cid_b0, topa_1, topa_3), 0); top0_10 = spi_vselect8((vec uint8x4)cid_b0, topa_2, topa_0); top1_10 = spi_vselect8((vec uint8x4)cid_b0, topa_3, topa_1); top0_32_1 = spi_vselect8((vec uint8x4)cid_b0, top0_10, top0_32); top0_10_1 = spi_vselect8((vec uint8x4)cid_b0, top0_32, top0_10); top1_32_1 = spi_vselect8((vec uint8x4)cid_b0, top1_10, top1_32); top1_10_1 = spi_vselect8((vec uint8x4)cid_b0, top1_32, top1_10); top0 = (vec uint8x4)spi_vshuffleu(0x02000604, (vec uint32x1)top0_32_1, (vec uint32x1)top0_10_1); top1 = (vec uint8x4)spi_vshuffleu(0x02000604, (vec uint32x1)top1_32_1, (vec uint32x1)top1_10_1); // Swap tops with spi_laneid() + 8 top0 = (vec uint8x4)spi_vperm32(perm_b3, (vec uint32x1)top0, 0); top1 = (vec uint8x4)spi_vperm32(perm_b3, (vec uint32x1)top1, 0); //Load MB from top strip spi_array_read(in_out_frame_top, top0_0, in_out_idx); spi_array_read(in_out_frame_top, top1_0, in_out_idx+1U); spi_array_read(in_out_frame_top, top2_0, in_out_idx+2U); spi_array_read(in_out_frame_top, top3_0, in_out_idx+3U); spi_array_read(in_out_frame_top, top0_1, in_out_idx+pitch); spi_array_read(in_out_frame_top, top1_1, in_out_idx+pitch+1U); spi_array_read(in_out_frame_top, top2_1, in_out_idx+pitch+2U); spi_array_read(in_out_frame_top, top3_1, in_out_idx+pitch+3U); //Load MB from current strip spi_array_read(in_frame, cur0_0, in_idx); spi_array_read(in_frame, cur0_1, in_idx + (vec uint32x1)1); spi_array_read(in_frame, cur1_0, in_idx + (vec uint32x1)2); spi_array_read(in_frame, cur1_1, in_idx + (vec uint32x1)3); spi_array_read(in_frame, cur2_0, in_idx + (vec uint32x1)4); spi_array_read(in_frame, cur2_1, in_idx + (vec uint32x1)5); spi_array_read(in_frame, cur3_0, in_idx + (vec uint32x1)6); spi_array_read(in_frame, cur3_1, in_idx + (vec uint32x1)7); // For clusters 0-7, current MB strip bottom pixels will // be upated and for clusters 8-15, top MB strip bottom // pixels will be updated. // Update the pixels pud0_0 = (vec uint8x4)spi_vperm32(spi_laneid()&0x9, (vec uint32x1)top0, 0); pud0_1 = (vec uint8x4)spi_vperm32(spi_laneid()&0x9, (vec uint32x1)top1, 0); pud1_0 = (vec uint8x4)spi_vperm32(spi_laneid()&0xB, (vec uint32x1)top0, 0); pud1_1 = (vec uint8x4)spi_vperm32(spi_laneid()&0xB, (vec uint32x1)top1, 0); pud2_0 = (vec uint8x4)spi_vperm32(spi_laneid()&0xD, (vec uint32x1)top0, 0); pud2_1 = (vec uint8x4)spi_vperm32(spi_laneid()&0xD, (vec uint32x1)top1, 0); pud3_0 = top0; pud3_1 = top1; cur0_0 = spi_vselect8((vec uint8x4)cid_6_7, pud0_0, cur0_0); cur0_1 = spi_vselect8((vec uint8x4)cid_6_7, pud0_1, cur0_1); cur1_0 = spi_vselect8((vec uint8x4)cid_6_7, pud1_0, cur1_0); cur1_1 = spi_vselect8((vec uint8x4)cid_6_7, pud1_1, cur1_1); cur2_0 = spi_vselect8((vec uint8x4)cid_6_7, pud2_0, cur2_0); cur2_1 = spi_vselect8((vec uint8x4)cid_6_7, pud2_1, cur2_1); cur3_0 = spi_vselect8((vec uint8x4)cid_6_7, pud3_0, cur3_0); cur3_1 = spi_vselect8((vec uint8x4)cid_6_7, pud3_1, cur3_1); top0_0 = spi_vselect8((vec uint8x4)cid_14_15, pud0_0, top0_0); top0_1 = spi_vselect8((vec uint8x4)cid_14_15, pud0_1, top0_1); top1_0 = spi_vselect8((vec uint8x4)cid_14_15, pud1_0, top1_0); top1_1 = spi_vselect8((vec uint8x4)cid_14_15, pud1_1, top1_1); top2_0 = spi_vselect8((vec uint8x4)cid_14_15, pud2_0, top2_0); top2_1 = spi_vselect8((vec uint8x4)cid_14_15, pud2_1, top2_1); top3_0 = spi_vselect8((vec uint8x4)cid_14_15, pud3_0, top3_0); top3_1 = spi_vselect8((vec uint8x4)cid_14_15, pud3_1, top3_1); // Write out the updated pixels of top strip. spi_array_write(in_out_frame_top, top0_0, in_out_idx); spi_array_write(in_out_frame_top, top1_0, in_out_idx+1U); spi_array_write(in_out_frame_top, top2_0, in_out_idx+2U); spi_array_write(in_out_frame_top, top3_0, in_out_idx+3U); spi_array_write(in_out_frame_top, top0_1, in_out_idx+pitch); spi_array_write(in_out_frame_top, top1_1, in_out_idx+pitch+1U); spi_array_write(in_out_frame_top, top2_1, in_out_idx+pitch+2U); spi_array_write(in_out_frame_top, top3_1, in_out_idx+pitch+3U); //Save MB from current strip spi_array_write(out_frame, cur0_0, in_out_idx); spi_array_write(out_frame, cur1_0, in_out_idx+1U); spi_array_write(out_frame, cur2_0, in_out_idx+2U); spi_array_write(out_frame, cur3_0, in_out_idx+3U); spi_array_write(out_frame, cur0_1, in_out_idx+pitch); spi_array_write(out_frame, cur1_1, in_out_idx+pitch+1U); spi_array_write(out_frame, cur2_1, in_out_idx+pitch+2U); spi_array_write(out_frame, cur3_1, in_out_idx+pitch+3U); in_out_idx = in_out_idx + 4U; in_fti_idx = in_fti_idx + 4U; in_idx = in_idx + 8U; no_of_iter = (int32x1)no_of_iter - 1; } if (s_filter_last_mb_horz_edges != 0){ spi_array_read(in_out_frame_top, right_pad_0, pitch - 5U); spi_array_read(in_out_frame_top, right_pad_1, spi_vshift32(pitch, 1) - 5U); right_pad_0 = (vec uint8x4)spi_vshuffledu_lo(0x73737373, right_pad_0, right_pad_1); right_pad_1 = (vec uint8x4)spi_vshuffledu_hi(0x73737373, right_pad_0, right_pad_1); spi_array_write(in_out_frame_top, right_pad_0, pitch - 4U); spi_array_write(in_out_frame_top, right_pad_0, pitch - 3U); spi_array_write(in_out_frame_top, right_pad_0, pitch - 2U); spi_array_write(in_out_frame_top, right_pad_0, pitch - 1U); spi_array_write(in_out_frame_top, right_pad_1, spi_vshift32(pitch, 1) - 4U); spi_array_write(in_out_frame_top, right_pad_1, spi_vshift32(pitch, 1) - 3U); spi_array_write(in_out_frame_top, right_pad_1, spi_vshift32(pitch, 1) - 2U); spi_array_write(in_out_frame_top, right_pad_1, spi_vshift32(pitch, 1) - 1U); spi_array_read(out_frame, right_pad_0, pitch - 5U); spi_array_read(out_frame, right_pad_1, spi_vshift32(pitch, 1) - 5U); right_pad_0 = (vec uint8x4)spi_vshuffledu_lo(0x73737373, right_pad_0, right_pad_1); right_pad_1 = (vec uint8x4)spi_vshuffledu_hi(0x73737373, right_pad_0, right_pad_1); spi_array_write(out_frame, right_pad_0, pitch - 4U); spi_array_write(out_frame, right_pad_0, pitch - 3U); spi_array_write(out_frame, right_pad_0, pitch - 2U); spi_array_write(out_frame, right_pad_0, pitch - 1U); spi_array_write(out_frame, right_pad_1, spi_vshift32(pitch, 1) - 4U); spi_array_write(out_frame, right_pad_1, spi_vshift32(pitch, 1) - 3U); spi_array_write(out_frame, right_pad_1, spi_vshift32(pitch, 1) - 2U); spi_array_write(out_frame, right_pad_1, spi_vshift32(pitch, 1) - 1U); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -