⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 deblock_sc.sc

📁 deblocking 在SPI DSP上的优化代码,超级强
💻 SC
📖 第 1 页 / 共 2 页
字号:
    // We use (mb_numx-1) instead of mb_numx, because if the last strip    // in each row would have had only one macroblock in it, then    // instead we can just finish it's processing as part of the    // current strip.  I.e., there is no "stitching" to be done since    // that would be the sole macroblock in it's strip.    num_strips = ((mb_numx-1) / strip_size) + (((mb_numx-1) % strip_size) ? 1 : 0);    if (num_strips == 0) { num_strips++; }            // Load the tables in to all clusters. Tables are repeated    //spi_load(QP2ChromaMappingStr, (void *)QP_TO_CHROMA_MAPPING, 0,    //         (NUM_QP)/4, 1, 1, 0);    //spi_load(IndexATableStr, (void *)IndexATable, 0,    //         NUM_QP, 1, 1, 0);    //spi_load(IndexBTableStr, (void *)IndexBTable, 0,    //         (NUM_QP)/4, 1, 1, 0);    spi_load(deblock_tbls_str, p_deblk_tbls, 0, DEBLK_TBLS_SIZE, 1, 1, 0);        // Load index streams...    spi_load(y_idx_str, p_y_idx);    spi_load(uv_idx_str, p_uv_idx);    spi_load(mb_info_idx_str, p_mb_info_idx);    //spi_load(y_hlfres_idx_str, p_hlfres_y_idx);    // Bottom indices are needed only for frames with heights    // not multiple of 32.    if (mb_numy % 2){        spi_load(mb_info_idx_str_bot, p_mb_info_idx_bot);        spi_load(y_idx_str_bot, p_y_idx + 32);        spi_load(uv_idx_str_bot, p_uv_idx + 32);    }        // Loop over row segments (each row segment is strip_size    // wide, though the last segment may be < strip_size wide)    for (s = 0; s < num_strips; s++) {        int last_strip;        int curr_strip_size, curr_strip_size_p2;        int filter_first_mb_vert_edges;        int filter_last_mb_horz_edges;        // First 16 entries are used for non-last row of the frame        // and last 16 entries are used for last row.        int top_strip_has_valid_data = 0;        int use_top_strip = 0;        int idx_strm_ofs = 0;        int use_bot_idx_str = 0;        // Calculate x macroblock coordinate for this strip        x = s * (strip_size);        // The last strip might be smaller than the rest        last_strip = ((x + strip_size + 1) >= mb_numx);        curr_strip_size = (!last_strip ? (strip_size+1) : (mb_numx-x));        curr_strip_size_p2 = curr_strip_size + 2;        // These flags can also be re used to indicate whether        // we need left/right padding for this strip.        filter_first_mb_vert_edges = (x == 0);        filter_last_mb_horz_edges  = last_strip;        // Launch mb_info load to pipeup the loop.        // Assuming mb_numy is never 1        spi_load(mb_info_strip,                 p_blk_mb_info + x*BLOCKS_PER_MB, 0,                 2, 4*MBINFO_SIZE_IN_WORDS, 4*MBINFO_SIZE_IN_WORDS, MBINFO_SIZE_IN_WORDS,                 mb_info_idx_str(0, 4*curr_strip_size));                // Loop over rows        for (y = 0; y < mb_numy; y += 2){            int dont_filter_bot_row = ((y+2) > mb_numy) ? 1 : 0;            int packed_a_b_disfil_dontfilt = packed_alpha_beta_disfil |                (curr_strip_size << 1) | dont_filter_bot_row;            if (dont_filter_bot_row){                if (mb_numy <= 4) {                    spi_internal_error("Deblocking is currently supported only for image "                                       "heights larger than 64 lines!");                }                assert(mb_numy > 4);                use_bot_idx_str = 1;            }                        if (use_bot_idx_str){                if (use_top_strip){                    spi_load(frame_strip_top_y,                             py + (x-1)*16, 0, 2,                              curr_strip_size_p2 * 2,                             curr_strip_size_p2 * 2, curr_strip_size_p2 * 4,                             y_idx_str_bot);                }else{                    spi_load(frame_strip_y,                             py + (x-1)*16, 0, 2,                              curr_strip_size_p2 * 2,                             curr_strip_size_p2 * 2, curr_strip_size_p2 * 4,                             y_idx_str_bot);                }            }else{                if (use_top_strip){                    spi_load(frame_strip_top_y,                             py + (y*rec_width*16) + ((x-1)*16), 0, 2,                              curr_strip_size_p2 * 2,                             curr_strip_size_p2 * 2, curr_strip_size_p2 * 4,                             y_idx_str                             );                }else{                    spi_load(frame_strip_y,                             py + (y*rec_width*16) + ((x-1)*16), 0, 2,                              curr_strip_size_p2 * 2,                             curr_strip_size_p2 * 2, curr_strip_size_p2 * 4,                             y_idx_str                             );                }            }            // Call Kernel Code            deblock_mb_calc_bs(mb_info_strip,                               short_mb_info_strip,                               packed_a_b_disfil_dontfilt,                               bs_a_b_tc_str,                               deblock_tbls_str);            if (use_bot_idx_str){                if (use_top_strip){                    spi_load(frame_strip_top_uv,                             u + (x-1)*8, 0, 2,                             curr_strip_size_p2,                             curr_strip_size_p2, curr_strip_size_p2 * 2,                             uv_idx_str_bot);                }else{                    spi_load(frame_strip_uv,                             u + (x-1)*8, 0, 2,                             curr_strip_size_p2,                             curr_strip_size_p2, curr_strip_size_p2 * 2,                             uv_idx_str_bot);                }            }else{                if (use_top_strip){                    spi_load(frame_strip_top_uv,                             u + y*(rec_width/2)*8 + (x-1)*8, 0, 2,                             curr_strip_size_p2,                             curr_strip_size_p2, curr_strip_size_p2 * 2,                             uv_idx_str);                }else{                    spi_load(frame_strip_uv,                             u + y*(rec_width/2)*8 + (x-1)*8, 0, 2,                             curr_strip_size_p2,                             curr_strip_size_p2, curr_strip_size_p2 * 2,                             uv_idx_str);                }            }                        // Launch mb_info for next iteration...            // Use bot index stream if next mb_row is the last            if ((y + 2 + 2) == (mb_numy + 1)){                spi_load(mb_info_strip,                         p_blk_mb_info + (y + 2)*mb_numx*BLOCKS_PER_MB + x*BLOCKS_PER_MB, 0,                         2, 4*MBINFO_SIZE_IN_WORDS, 4*MBINFO_SIZE_IN_WORDS, MBINFO_SIZE_IN_WORDS,                         mb_info_idx_str_bot(0, 4*curr_strip_size));            }else if ((y + 2 + 2) < (mb_numy + 1)){                spi_load(mb_info_strip,                         p_blk_mb_info + (y + 2)*mb_numx*BLOCKS_PER_MB + x*BLOCKS_PER_MB, 0,                         2, 4*MBINFO_SIZE_IN_WORDS, 4*MBINFO_SIZE_IN_WORDS, MBINFO_SIZE_IN_WORDS,                         mb_info_idx_str(0, 4*curr_strip_size));            }                                    if (use_top_strip){                deblock_mb_luma(bs_a_b_tc_str(0, ((curr_strip_size+3)*16*12)),                                (filter_first_mb_vert_edges << 24) |                                (filter_last_mb_horz_edges << 16) |                                curr_strip_size_p2,                                frame_strip_top_y,                                frame_strip_y_inter,                                frame_strip_bot_y_inter,                                frame_strip_y                                );            }else{                deblock_mb_luma(bs_a_b_tc_str(0, ((curr_strip_size+3)*16*12)),                                (filter_first_mb_vert_edges << 24) |                                (filter_last_mb_horz_edges << 16) |                                curr_strip_size_p2,                                frame_strip_y,                                frame_strip_y_inter,                                frame_strip_bot_y_inter,                                frame_strip_top_y                                );            }                        if (use_top_strip){                deblock_mb_chroma(bs_a_b_tc_str(0, ((curr_strip_size+3)*16*12)),                                  (filter_first_mb_vert_edges << 24) |                                  (filter_last_mb_horz_edges << 16) |                                  curr_strip_size_p2,                                  frame_strip_y_inter,                                  frame_strip_bot_uv_inter,                                  // Input to Pre data munging                                  frame_strip_top_uv,                                  // Output of Post data munging                                  frame_strip_uv);            }else{                deblock_mb_chroma(bs_a_b_tc_str(0, ((curr_strip_size+3)*16*12)),                                  (filter_first_mb_vert_edges << 24) |                                  (filter_last_mb_horz_edges << 16) |                                  curr_strip_size_p2,                                  frame_strip_y_inter,                                  frame_strip_bot_uv_inter,                                  // Input to Pre data munging                                  frame_strip_uv,                                  // Output of Post data munging                                  frame_strip_top_uv);            }            // Store the deblocked data out            if (top_strip_has_valid_data){                if (use_top_strip){                    spi_store(frame_strip_y, // dec                              py + ((y-2)*rec_width*16) + ((x-1)*16), 0, 2,                               curr_strip_size_p2 * 2,                              curr_strip_size_p2 * 2, curr_strip_size_p2 * 4,                              y_idx_str                              );                }else{                    spi_store(frame_strip_top_y, // dec                              py + ((y-2)*rec_width*16) + ((x-1)*16), 0, 2,                               curr_strip_size_p2 * 2,                              curr_strip_size_p2 * 2, curr_strip_size_p2 * 4,                              y_idx_str                              );                }            }                        // Store the deblocked data out            if (top_strip_has_valid_data){                if (use_top_strip){                    spi_store(frame_strip_uv,                              u + (y-2)*(rec_width/2)*8 + (x-1)*8, 0, 2,                              curr_strip_size_p2,                              curr_strip_size_p2, curr_strip_size_p2 * 2,                              uv_idx_str);                }else{                    spi_store(frame_strip_top_uv,                              u + (y-2)*(rec_width/2)*8 + (x-1)*8, 0, 2,                              curr_strip_size_p2,                              curr_strip_size_p2, curr_strip_size_p2 * 2,                              uv_idx_str);                }            }            top_strip_has_valid_data = 1;            use_top_strip = use_top_strip ^ 0x1;        }        // Write out the data of last row of this series of vertical strips        if(use_bot_idx_str){            if (use_top_strip){                spi_store(frame_strip_y, // dec                          py + (x-1)*16, 0, 2,                           curr_strip_size_p2 * 2,                          curr_strip_size_p2 * 2, curr_strip_size_p2 * 4,                          y_idx_str_bot);                spi_store(frame_strip_uv,                          u + (x-1)*8, 0, 2,                          curr_strip_size_p2,                          curr_strip_size_p2, curr_strip_size_p2 * 2,                          uv_idx_str_bot);            }else{                spi_store(frame_strip_top_y, // dec                          py + (x-1)*16, 0, 2,                           curr_strip_size_p2 * 2,                          curr_strip_size_p2 * 2, curr_strip_size_p2 * 4,                          y_idx_str_bot);                spi_store(frame_strip_top_uv,                          u + (x-1)*8, 0, 2,                          curr_strip_size_p2,                          curr_strip_size_p2, curr_strip_size_p2 * 2,                          uv_idx_str_bot);            }        }else{            if (use_top_strip){                spi_store(frame_strip_y, // dec                          py + ((y-2)*rec_width*16) + ((x-1)*16), 0, 2,                           curr_strip_size_p2 * 2,                          curr_strip_size_p2 * 2, curr_strip_size_p2 * 4,                          y_idx_str                          );                spi_store(frame_strip_uv,                          u + (y-2)*(rec_width/2)*8 + (x-1)*8, 0, 2,                          curr_strip_size_p2,                          curr_strip_size_p2, curr_strip_size_p2 * 2,                          uv_idx_str);            }else{                spi_store(frame_strip_top_y, // dec                          py + ((y-2)*rec_width*16) + ((x-1)*16), 0, 2,                           curr_strip_size_p2 * 2,                          curr_strip_size_p2 * 2, curr_strip_size_p2 * 4,                          y_idx_str                          );                spi_store(frame_strip_top_uv,                          u + (y-2)*(rec_width/2)*8 + (x-1)*8, 0, 2,                          curr_strip_size_p2,                          curr_strip_size_p2, curr_strip_size_p2 * 2,                          uv_idx_str);            }        }    }    // Reusing the same streams for bottom and top padding seem to perform    // better.     // Do vertical padding - Top Y    spi_load(top_row_y, py - (rec_width - image_width)/2, 0,             rec_width/4, 1, 1, 0);        spi_store(top_row_y, py - offset_to00_y, 0,              16, 2*rec_width/8, 2*rec_width/8, rec_width/4);        // Do vertical padding - Bottom Y    spi_load(top_row_y,             py - (rec_width - image_width)/2 + image_height * rec_width - rec_width,             0, rec_width/4, 1, 1, 0);    spi_store(top_row_y,              py - (rec_width - image_width)/2 + image_height * rec_width,              0, 16, 2*rec_width/8, 2*rec_width/8, rec_width/4);        spi_barrier();    spi_release_all_streams();}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -