⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rle_kc.cpp

📁 H.264完整的C语言代码和DCT的代码
💻 CPP
📖 第 1 页 / 共 2 页
字号:

      // all values are now ac values, so don't worry about dc stuff
      j = 7;
      loop_count(j) {
        // get values and check if they == 0
        ac_val = half2(zigzag[src_idx]);
        ac_zero = itocc(int(ac_val == 0));

        // encode run-level pair for the two blocks and store them
        run_level = (int(ac_val) & lo_mask) | shift(int(run), 16);
        rle_out[lo_idx] = run_level;
        run_level = shift(int(ac_val), -16) | (int(run) & hi_mask);
        rle_out[hi_idx] = run_level;

        // update the run variable, and reset to 0 if level != 0
        run = select(ac_zero, (run + h2_one), 0);

        // update source data index
        src_idx = src_idx + 1;

        // update the result data indices if level != 0
        lo_idx = select(ac_zero, lo_idx, lo_idx + 1);
        ac_zero = itocc(shift(cctoi(ac_zero), -16));
        hi_idx = select(ac_zero, hi_idx, hi_idx + 1);
      }

      // save final value of the run variable for each block
      runs[0+run_idx] = int(run) & lo_mask;
      runs[1+run_idx] = shift(int(run), -16);
      run_idx = run_idx + 2;

      // if not all the values for non-zero, store a zero to indicate the end
      // of actual run-level pairs for the first block.
      all_non_zero = itocc(lo_idx == (data_idx + 8));
      rle_out[lo_idx] = select( all_non_zero, int(rle_out[lo_idx]), 0);

      // update the dest base data index
      data_idx = data_idx + 16;

      // if not all the values for non-zero, store a zero to indicate the end
      // of actual run-level pairs for the second block.
      all_non_zero = itocc(hi_idx == data_idx);
      rle_out[hi_idx] = select(all_non_zero, int(rle_out[hi_idx]), 0);
    }



    // Now, patch up the run-level encoding. This is done by patching up the
    // the cluster 0/cluster 1 data first, and then the cluster 1/cluster 2
    // data, etc. However it is pipelined so that the patching for all 6
    // blocks are done with as much parallelism as possible. Thus, while
    // the cluster 1/cluster 2 data is being patched up for the first block,
    // the cluster 0/cluster 1 data will be patched up for the second block,
    // and so on. The invalid cc tells the cluster when it has a valid run
    // from the previous cluster. Thus cluster 7 will be invalid the first
    // 6 iterations, and cluster 1 will be invalid the last 6. Cluster 0 is
    // always invalid because it doesn't need to have any of its data patched
    // up (ie, there is no previous cluster from which to get a run). The
    // invalid cc is passed from cluster to cluster via communication. It
    // originates in cluster 1, and cluster 1 sets it to false until it has
    // finished all its blocks. It starts as true in every other cluster.
    // Eventually the invalid flag will be false in every cluster, and then
    // true again as the value cluster 1 sets it to percolates to the rest
    // of the clusters.

    cc cluster_one;     // cluster 1 ??
    cc invalid;         // false when a cluster has a valid run to work with
    cc send_run;        // true if clust needs to send its run to next cluster
    cc all_zeros;       // true if the cluster had all zeros for this block
    int true_cc;           // = 0xFFFF; used for setting cc's to TRUE
    int done;           // non-zero when cluster 1 has patched up all blocks
    int blocks_left;    // how many blocks cluster 1 has left to patch up
    int run_token;      // run of zeros, passed from cluster to cluster
    int orig_run_level; // the run-level calculated above
    int new_run_level;  // the run-level adjusted with the final run from the
                        // previous cluster
    int block_idx;      // index to the blocks, 0,8,16,24,32,40
    uc<int> perm_token = 0x65432100;   // used to comm. invalid and run_token
                                       // sends value to next cluster

    cluster_one = itocc(cid() == 1);
    true_cc = -1;
    blocks_left = 6;   // 8 - (1 + 1);
    invalid = itocc(select(cluster_one, 0, true_cc));
    send_run = clust_zero;
    run_idx = 0;         // index into final run values, 1 per block
    block_idx = 0;
    run_token = runs[0];

    i = 12;           // 7 patches per block; 6 blocks -> 7 + (6-1) = 12
    loop_count(i) {
      // get the run-level pair calculated above and see if it equals zero
      orig_run_level = rle_out[block_idx];
      all_zeros = itocc(select(Iframe, 1, orig_run_level) == 0);

      // get the final run of zeros of the previous cluster
      run_token = commucperm(perm_token, run_token);

      // adjust first run-level pair of this cluster
      new_run_level = orig_run_level + shift(run_token, 16);

      // and store it if the previous cluster's run was valid; if it wasn't,
      // store the original value back again
      rle_out[block_idx] = select(invalid, orig_run_level,
                                  select(all_zeros, 0, new_run_level));

      // any blocks left?
      blocks_left = blocks_left - 1;
      done = blocks_left == 0;

      // update the idx to the run values and to the block data
      run_idx = select(send_run, run_idx + 1, run_idx);
      block_idx = select(invalid, block_idx, block_idx + 8);
      block_idx = select(clust_zero, block_idx + 8, block_idx);

      // if this cluster was all zeros, update run and pass it along, else
      // pass the final run of zeros for this cluster to the next
      int next_run = runs[run_idx];
      run_token = select(all_zeros, select(clust_zero, next_run, run_token + 8), next_run);

      // pass the invalid "token" on to the next cluster, and if this is
      // cluster 1, mark it true if no more blocks left
      invalid = itocc(select(cluster_one, done, commucperm(perm_token, cctoi(invalid))));

      // pass the send_run token, except mark it false if cluster 0 and done
      send_run = itocc(select(clust_zero, ~done, commucperm(perm_token, cctoi(send_run))));
    }


    // calculate coded block pattern (cbp)
    int cbp, zerorow, allzero;
    cc az;
    uc<int> perm_combine_1 = 0x77553311;
    uc<int> perm_combine_2 = 0x66662222;
    uc<int> perm_combine_3 = 0x44444444;

    zerorow = (rle_out[0] == 0);
    allzero = zerorow & commucperm(perm_combine_1, zerorow);
    allzero = allzero & commucperm(perm_combine_2, allzero);
    allzero = allzero & commucperm(perm_combine_3, allzero);
    az = itocc(allzero);
    cbp = select(az, 0, 32);

    zerorow = (rle_out[8] == 0);
    allzero = zerorow & commucperm(perm_combine_1, zerorow);
    allzero = allzero & commucperm(perm_combine_2, allzero);
    allzero = allzero & commucperm(perm_combine_3, allzero);
    az = itocc(allzero);
    cbp = cbp | select(az, 0, 16);

    zerorow = (rle_out[16] == 0);
    allzero = zerorow & commucperm(perm_combine_1, zerorow);
    allzero = allzero & commucperm(perm_combine_2, allzero);
    allzero = allzero & commucperm(perm_combine_3, allzero);
    az = itocc(allzero);
    cbp = cbp | select(az, 0, 8);

    zerorow = (rle_out[24] == 0);
    allzero = zerorow & commucperm(perm_combine_1, zerorow);
    allzero = allzero & commucperm(perm_combine_2, allzero);
    allzero = allzero & commucperm(perm_combine_3, allzero);
    az = itocc(allzero);
    cbp = cbp | select(az, 0, 4);

    zerorow = (rle_out[32] == 0);
    allzero = zerorow & commucperm(perm_combine_1, zerorow);
    allzero = allzero & commucperm(perm_combine_2, allzero);
    allzero = allzero & commucperm(perm_combine_3, allzero);
    az = itocc(allzero);
    cbp = cbp | select(az, 0, 2);

    zerorow = (rle_out[40] == 0);
    allzero = zerorow & commucperm(perm_combine_1, zerorow);
    allzero = allzero & commucperm(perm_combine_2, allzero);
    allzero = allzero & commucperm(perm_combine_3, allzero);
    az = itocc(allzero);
    cbp = cbp | select(az, 0, 1);


    // Transpose blocks containing run-level pairs

    // calculate cluster dependent send and store indices
    int idx0 = cid();
    int idx1 = (idx0 - 1) & 7;
    int idx2 = (idx1 - 1) & 7;
    int idx3 = (idx2 - 1) & 7;
    int idx4 = (idx3 - 1) & 7;
    int idx5 = (idx4 - 1) & 7;
    int idx6 = (idx5 - 1) & 7;
    int idx7 = (idx6 - 1) & 7;

    array<int> output_array(48);
    output_array[0+idx0] = rle_out[0+idx0];
    output_array[0+idx7] = commucperm(perm_a, rle_out[0+idx1]);
    output_array[0+idx6] = commucperm(perm_b, rle_out[0+idx2]);
    output_array[0+idx5] = commucperm(perm_c, rle_out[0+idx3]);
    output_array[0+idx4] = commucperm(perm_d, rle_out[0+idx4]);
    output_array[0+idx3] = commucperm(perm_e, rle_out[0+idx5]);
    output_array[0+idx2] = commucperm(perm_f, rle_out[0+idx6]);
    output_array[0+idx1] = commucperm(perm_g, rle_out[0+idx7]);

    output_array[8+idx0] = rle_out[8+idx0];
    output_array[8+idx7] = commucperm(perm_a, rle_out[8+idx1]);
    output_array[8+idx6] = commucperm(perm_b, rle_out[8+idx2]);
    output_array[8+idx5] = commucperm(perm_c, rle_out[8+idx3]);
    output_array[8+idx4] = commucperm(perm_d, rle_out[8+idx4]);
    output_array[8+idx3] = commucperm(perm_e, rle_out[8+idx5]);
    output_array[8+idx2] = commucperm(perm_f, rle_out[8+idx6]);
    output_array[8+idx1] = commucperm(perm_g, rle_out[8+idx7]);

    output_array[16+idx0] = rle_out[16+idx0];
    output_array[16+idx7] = commucperm(perm_a, rle_out[16+idx1]);
    output_array[16+idx6] = commucperm(perm_b, rle_out[16+idx2]);
    output_array[16+idx5] = commucperm(perm_c, rle_out[16+idx3]);
    output_array[16+idx4] = commucperm(perm_d, rle_out[16+idx4]);
    output_array[16+idx3] = commucperm(perm_e, rle_out[16+idx5]);
    output_array[16+idx2] = commucperm(perm_f, rle_out[16+idx6]);
    output_array[16+idx1] = commucperm(perm_g, rle_out[16+idx7]);

    output_array[24+idx0] = rle_out[24+idx0];
    output_array[24+idx7] = commucperm(perm_a, rle_out[24+idx1]);
    output_array[24+idx6] = commucperm(perm_b, rle_out[24+idx2]);
    output_array[24+idx5] = commucperm(perm_c, rle_out[24+idx3]);
    output_array[24+idx4] = commucperm(perm_d, rle_out[24+idx4]);
    output_array[24+idx3] = commucperm(perm_e, rle_out[24+idx5]);
    output_array[24+idx2] = commucperm(perm_f, rle_out[24+idx6]);
    output_array[24+idx1] = commucperm(perm_g, rle_out[24+idx7]);

    output_array[32+idx0] = rle_out[32+idx0];
    output_array[32+idx7] = commucperm(perm_a, rle_out[32+idx1]);
    output_array[32+idx6] = commucperm(perm_b, rle_out[32+idx2]);
    output_array[32+idx5] = commucperm(perm_c, rle_out[32+idx3]);
    output_array[32+idx4] = commucperm(perm_d, rle_out[32+idx4]);
    output_array[32+idx3] = commucperm(perm_e, rle_out[32+idx5]);
    output_array[32+idx2] = commucperm(perm_f, rle_out[32+idx6]);
    output_array[32+idx1] = commucperm(perm_g, rle_out[32+idx7]);

    output_array[40+idx0] = rle_out[40+idx0];
    output_array[40+idx7] = commucperm(perm_a, rle_out[40+idx1]);
    output_array[40+idx6] = commucperm(perm_b, rle_out[40+idx2]);
    output_array[40+idx5] = commucperm(perm_c, rle_out[40+idx3]);
    output_array[40+idx4] = commucperm(perm_d, rle_out[40+idx4]);
    output_array[40+idx3] = commucperm(perm_e, rle_out[40+idx5]);
    output_array[40+idx2] = commucperm(perm_f, rle_out[40+idx6]);
    output_array[40+idx1] = commucperm(perm_g, rle_out[40+idx7]);


    // output quant_scale
    uint qs = commclperm(ucid(), 0, quant_scale);
    out(clust_zero) << half2(qs);

    // output cbp
    out(clust_zero) << half2(cbp);

    // output motion vector
    int mv, bestval;
    motion(Pframe, ccend) >> mv >> bestval;   // don't need 'bestval'
    out(Pframe) << mv;

    // output run-level pairs
    i = 6;
    int idx = 0;
    int tmpout;
    cc valid;
    loop_count(i) {
      tmpout = output_array[idx];
      valid = itocc(cctoi(Iframe) | (tmpout > 0));
      out(valid) << tmpout;

      tmpout = output_array[1+idx];
      valid = itocc(tmpout > 0);
      out(valid) << tmpout;

      tmpout = output_array[2+idx];
      valid = itocc(tmpout > 0);
      out(valid) << tmpout;

      tmpout = output_array[3+idx];
      valid = itocc(tmpout > 0);
      out(valid) << tmpout;

      tmpout = output_array[4+idx];
      valid = itocc(tmpout > 0);
      out(valid) << tmpout;

      tmpout = output_array[5+idx];
      valid = itocc(tmpout > 0);
      out(valid) << tmpout;

      tmpout = output_array[6+idx];
      valid = itocc(tmpout > 0);
      out(valid) << tmpout;

      tmpout = output_array[7+idx];
      valid = itocc(tmpout > 0);
      out(valid) << tmpout;

      idx = idx + 8;

      out(clust_zero) << -1;
    }
  }

  flush(out, -2);

  // read remaining valus in motion stream, if any (should be NULL elements)
  int drain;
  cc cctrue = itocc(1 == 1);
  motion(cctrue, ccend) >> drain;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -