📄 deblock_sc.sc
字号:
// We use (mb_numx-1) instead of mb_numx, because if the last strip // in each row would have had only one macroblock in it, then // instead we can just finish it's processing as part of the // current strip. I.e., there is no "stitching" to be done since // that would be the sole macroblock in it's strip. num_strips = ((mb_numx-1) / strip_size) + (((mb_numx-1) % strip_size) ? 1 : 0); if (num_strips == 0) { num_strips++; } // Load the tables in to all clusters. Tables are repeated //spi_load(QP2ChromaMappingStr, (void *)QP_TO_CHROMA_MAPPING, 0, // (NUM_QP)/4, 1, 1, 0); //spi_load(IndexATableStr, (void *)IndexATable, 0, // NUM_QP, 1, 1, 0); //spi_load(IndexBTableStr, (void *)IndexBTable, 0, // (NUM_QP)/4, 1, 1, 0); spi_load(deblock_tbls_str, p_deblk_tbls, 0, DEBLK_TBLS_SIZE, 1, 1, 0); // Load index streams... spi_load(y_idx_str, p_y_idx); spi_load(uv_idx_str, p_uv_idx); spi_load(mb_info_idx_str, p_mb_info_idx); //spi_load(y_hlfres_idx_str, p_hlfres_y_idx); // Bottom indices are needed only for frames with heights // not multiple of 32. if (mb_numy % 2){ spi_load(mb_info_idx_str_bot, p_mb_info_idx_bot); spi_load(y_idx_str_bot, p_y_idx + 32); spi_load(uv_idx_str_bot, p_uv_idx + 32); } // Loop over row segments (each row segment is strip_size // wide, though the last segment may be < strip_size wide) for (s = 0; s < num_strips; s++) { int last_strip; int curr_strip_size, curr_strip_size_p2; int filter_first_mb_vert_edges; int filter_last_mb_horz_edges; // First 16 entries are used for non-last row of the frame // and last 16 entries are used for last row. int top_strip_has_valid_data = 0; int use_top_strip = 0; int idx_strm_ofs = 0; int use_bot_idx_str = 0; // Calculate x macroblock coordinate for this strip x = s * (strip_size); // The last strip might be smaller than the rest last_strip = ((x + strip_size + 1) >= mb_numx); curr_strip_size = (!last_strip ? (strip_size+1) : (mb_numx-x)); curr_strip_size_p2 = curr_strip_size + 2; // These flags can also be re used to indicate whether // we need left/right padding for this strip. filter_first_mb_vert_edges = (x == 0); filter_last_mb_horz_edges = last_strip; // Launch mb_info load to pipeup the loop. // Assuming mb_numy is never 1 spi_load(mb_info_strip, p_blk_mb_info + x*BLOCKS_PER_MB, 0, 2, 4*MBINFO_SIZE_IN_WORDS, 4*MBINFO_SIZE_IN_WORDS, MBINFO_SIZE_IN_WORDS, mb_info_idx_str(0, 4*curr_strip_size)); // Loop over rows for (y = 0; y < mb_numy; y += 2){ int dont_filter_bot_row = ((y+2) > mb_numy) ? 1 : 0; int packed_a_b_disfil_dontfilt = packed_alpha_beta_disfil | (curr_strip_size << 1) | dont_filter_bot_row; if (dont_filter_bot_row){ if (mb_numy <= 4) { spi_internal_error("Deblocking is currently supported only for image " "heights larger than 64 lines!"); } assert(mb_numy > 4); use_bot_idx_str = 1; } if (use_bot_idx_str){ if (use_top_strip){ spi_load(frame_strip_top_y, py + (x-1)*16, 0, 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 4, y_idx_str_bot); }else{ spi_load(frame_strip_y, py + (x-1)*16, 0, 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 4, y_idx_str_bot); } }else{ if (use_top_strip){ spi_load(frame_strip_top_y, py + (y*rec_width*16) + ((x-1)*16), 0, 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 4, y_idx_str ); }else{ spi_load(frame_strip_y, py + (y*rec_width*16) + ((x-1)*16), 0, 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 4, y_idx_str ); } } // Call Kernel Code deblock_mb_calc_bs(mb_info_strip, short_mb_info_strip, packed_a_b_disfil_dontfilt, bs_a_b_tc_str, deblock_tbls_str); if (use_bot_idx_str){ if (use_top_strip){ spi_load(frame_strip_top_uv, u + (x-1)*8, 0, 2, curr_strip_size_p2, curr_strip_size_p2, curr_strip_size_p2 * 2, uv_idx_str_bot); }else{ spi_load(frame_strip_uv, u + (x-1)*8, 0, 2, curr_strip_size_p2, curr_strip_size_p2, curr_strip_size_p2 * 2, uv_idx_str_bot); } }else{ if (use_top_strip){ spi_load(frame_strip_top_uv, u + y*(rec_width/2)*8 + (x-1)*8, 0, 2, curr_strip_size_p2, curr_strip_size_p2, curr_strip_size_p2 * 2, uv_idx_str); }else{ spi_load(frame_strip_uv, u + y*(rec_width/2)*8 + (x-1)*8, 0, 2, curr_strip_size_p2, curr_strip_size_p2, curr_strip_size_p2 * 2, uv_idx_str); } } // Launch mb_info for next iteration... // Use bot index stream if next mb_row is the last if ((y + 2 + 2) == (mb_numy + 1)){ spi_load(mb_info_strip, p_blk_mb_info + (y + 2)*mb_numx*BLOCKS_PER_MB + x*BLOCKS_PER_MB, 0, 2, 4*MBINFO_SIZE_IN_WORDS, 4*MBINFO_SIZE_IN_WORDS, MBINFO_SIZE_IN_WORDS, mb_info_idx_str_bot(0, 4*curr_strip_size)); }else if ((y + 2 + 2) < (mb_numy + 1)){ spi_load(mb_info_strip, p_blk_mb_info + (y + 2)*mb_numx*BLOCKS_PER_MB + x*BLOCKS_PER_MB, 0, 2, 4*MBINFO_SIZE_IN_WORDS, 4*MBINFO_SIZE_IN_WORDS, MBINFO_SIZE_IN_WORDS, mb_info_idx_str(0, 4*curr_strip_size)); } if (use_top_strip){ deblock_mb_luma(bs_a_b_tc_str(0, ((curr_strip_size+3)*16*12)), (filter_first_mb_vert_edges << 24) | (filter_last_mb_horz_edges << 16) | curr_strip_size_p2, frame_strip_top_y, frame_strip_y_inter, frame_strip_bot_y_inter, frame_strip_y ); }else{ deblock_mb_luma(bs_a_b_tc_str(0, ((curr_strip_size+3)*16*12)), (filter_first_mb_vert_edges << 24) | (filter_last_mb_horz_edges << 16) | curr_strip_size_p2, frame_strip_y, frame_strip_y_inter, frame_strip_bot_y_inter, frame_strip_top_y ); } if (use_top_strip){ deblock_mb_chroma(bs_a_b_tc_str(0, ((curr_strip_size+3)*16*12)), (filter_first_mb_vert_edges << 24) | (filter_last_mb_horz_edges << 16) | curr_strip_size_p2, frame_strip_y_inter, frame_strip_bot_uv_inter, // Input to Pre data munging frame_strip_top_uv, // Output of Post data munging frame_strip_uv); }else{ deblock_mb_chroma(bs_a_b_tc_str(0, ((curr_strip_size+3)*16*12)), (filter_first_mb_vert_edges << 24) | (filter_last_mb_horz_edges << 16) | curr_strip_size_p2, frame_strip_y_inter, frame_strip_bot_uv_inter, // Input to Pre data munging frame_strip_uv, // Output of Post data munging frame_strip_top_uv); } // Store the deblocked data out if (top_strip_has_valid_data){ if (use_top_strip){ spi_store(frame_strip_y, // dec py + ((y-2)*rec_width*16) + ((x-1)*16), 0, 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 4, y_idx_str ); }else{ spi_store(frame_strip_top_y, // dec py + ((y-2)*rec_width*16) + ((x-1)*16), 0, 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 4, y_idx_str ); } } // Store the deblocked data out if (top_strip_has_valid_data){ if (use_top_strip){ spi_store(frame_strip_uv, u + (y-2)*(rec_width/2)*8 + (x-1)*8, 0, 2, curr_strip_size_p2, curr_strip_size_p2, curr_strip_size_p2 * 2, uv_idx_str); }else{ spi_store(frame_strip_top_uv, u + (y-2)*(rec_width/2)*8 + (x-1)*8, 0, 2, curr_strip_size_p2, curr_strip_size_p2, curr_strip_size_p2 * 2, uv_idx_str); } } top_strip_has_valid_data = 1; use_top_strip = use_top_strip ^ 0x1; } // Write out the data of last row of this series of vertical strips if(use_bot_idx_str){ if (use_top_strip){ spi_store(frame_strip_y, // dec py + (x-1)*16, 0, 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 4, y_idx_str_bot); spi_store(frame_strip_uv, u + (x-1)*8, 0, 2, curr_strip_size_p2, curr_strip_size_p2, curr_strip_size_p2 * 2, uv_idx_str_bot); }else{ spi_store(frame_strip_top_y, // dec py + (x-1)*16, 0, 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 4, y_idx_str_bot); spi_store(frame_strip_top_uv, u + (x-1)*8, 0, 2, curr_strip_size_p2, curr_strip_size_p2, curr_strip_size_p2 * 2, uv_idx_str_bot); } }else{ if (use_top_strip){ spi_store(frame_strip_y, // dec py + ((y-2)*rec_width*16) + ((x-1)*16), 0, 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 4, y_idx_str ); spi_store(frame_strip_uv, u + (y-2)*(rec_width/2)*8 + (x-1)*8, 0, 2, curr_strip_size_p2, curr_strip_size_p2, curr_strip_size_p2 * 2, uv_idx_str); }else{ spi_store(frame_strip_top_y, // dec py + ((y-2)*rec_width*16) + ((x-1)*16), 0, 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 2, curr_strip_size_p2 * 4, y_idx_str ); spi_store(frame_strip_top_uv, u + (y-2)*(rec_width/2)*8 + (x-1)*8, 0, 2, curr_strip_size_p2, curr_strip_size_p2, curr_strip_size_p2 * 2, uv_idx_str); } } } // Reusing the same streams for bottom and top padding seem to perform // better. // Do vertical padding - Top Y spi_load(top_row_y, py - (rec_width - image_width)/2, 0, rec_width/4, 1, 1, 0); spi_store(top_row_y, py - offset_to00_y, 0, 16, 2*rec_width/8, 2*rec_width/8, rec_width/4); // Do vertical padding - Bottom Y spi_load(top_row_y, py - (rec_width - image_width)/2 + image_height * rec_width - rec_width, 0, rec_width/4, 1, 1, 0); spi_store(top_row_y, py - (rec_width - image_width)/2 + image_height * rec_width, 0, 16, 2*rec_width/8, 2*rec_width/8, rec_width/4); spi_barrier(); spi_release_all_streams();}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -