📄 h264.c
字号:
/*
* H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
/**
* @file h264.c
* H.264 / AVC / MPEG4 part10 codec.
* @author Michael Niedermayer <michaelni@gmx.at>
*/
#include "h264.h"
//static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
//static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr);
static uint32_t pack16to32(int a, int b){
#ifdef WORDS_BIGENDIAN
return (b&0xFFFF) + (a<<16);
#else
return (a&0xFFFF) + (b<<16);
#endif
}
/**
* fill a rectangle.
* @param h height of the recatangle, should be a constant
* @param w width of the recatangle, should be a constant
* @param size the size of val (1 or 4), should be a constant
*/
static void fill_rectangle_c(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
/*
uint8_t *p= (uint8_t*)vp;
assert(size==1 || size==4);
w *= size;
stride *= size;
//FIXME check what gcc generates for 64 bit on x86 and possible write a 32 bit ver of it
if(w==2 && h==2){
*(uint16_t*)(p + 0)=
*(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
}else if(w==2 && h==4){
*(uint16_t*)(p + 0*stride)=
*(uint16_t*)(p + 1*stride)=
*(uint16_t*)(p + 2*stride)=
*(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
}else if(w==4 && h==1){
*(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
}else if(w==4 && h==2){
*(uint32_t*)(p + 0*stride)=
*(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
}else if(w==4 && h==4){
*(uint32_t*)(p + 0*stride)=
*(uint32_t*)(p + 1*stride)=
*(uint32_t*)(p + 2*stride)=
*(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
}else if(w==8 && h==1){
*(uint32_t*)(p + 0)=
*(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
}else if(w==8 && h==2){
*(uint32_t*)(p + 0 + 0*stride)=
*(uint32_t*)(p + 4 + 0*stride)=
*(uint32_t*)(p + 0 + 1*stride)=
*(uint32_t*)(p + 4 + 1*stride)= size==4 ? val : val*0x01010101;
}else if(w==8 && h==4){
*(uint64_t*)(p + 0*stride)=
*(uint64_t*)(p + 1*stride)=
*(uint64_t*)(p + 2*stride)=
*(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
}else if(w==16 && h==2){
*(uint64_t*)(p + 0+0*stride)=
*(uint64_t*)(p + 8+0*stride)=
*(uint64_t*)(p + 0+1*stride)=
*(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
}else if(w==16 && h==4){
*(uint64_t*)(p + 0+0*stride)=
*(uint64_t*)(p + 8+0*stride)=
*(uint64_t*)(p + 0+1*stride)=
*(uint64_t*)(p + 8+1*stride)=
*(uint64_t*)(p + 0+2*stride)=
*(uint64_t*)(p + 8+2*stride)=
*(uint64_t*)(p + 0+3*stride)=
*(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
}else
assert(0);
*/
uint8_t *p= (uint8_t*)vp;
uint16_t temp16;
uint32_t temp32;
uint64_t temp64;
__int64 val64,v1,v2;
assert(size==1 || size==4);
w *= size;
stride *= size;
//FIXME check what gcc generates for 64 bit on x86 and possible write a 32 bit ver of it
if(w==2 && h==2){
/*
*(uint16_t*)(p + 0)=
*(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
*/
temp16=size==4 ? val : val*0x0101;
memcpy(p+0,&temp16,sizeof(temp16));
memcpy(p+stride,&temp16,sizeof(temp16));
}else if(w==2 && h==4){
/*
*(uint16_t*)(p + 0*stride)=
*(uint16_t*)(p + 1*stride)=
*(uint16_t*)(p + 2*stride)=
*(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
*/
temp16=size==4 ? val : val*0x0101;
memcpy(p+0*stride,&temp16,sizeof(temp16));
memcpy(p+1*stride,&temp16,sizeof(temp16));
memcpy(p+2*stride,&temp16,sizeof(temp16));
memcpy(p+3*stride,&temp16,sizeof(temp16));
}else if(w==4 && h==1){
/*
*(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
*/
temp32=size==4 ? val : val*0x01010101;
memcpy(p+0*stride,&temp32,sizeof(temp32));
}else if(w==4 && h==2){
/*
*(uint32_t*)(p + 0*stride)=
*(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
*/
temp32=size==4 ? val : val*0x01010101;
memcpy(p+0*stride,&temp32,sizeof(temp32));
memcpy(p+1*stride,&temp32,sizeof(temp32));
}else if(w==4 && h==4){
/*
*(uint32_t*)(p + 0*stride)=
*(uint32_t*)(p + 1*stride)=
*(uint32_t*)(p + 2*stride)=
*(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
*/
temp32=size==4 ? val : val*0x01010101;
memcpy((p + 0*stride),&temp32,sizeof(temp32));
memcpy((p + 1*stride),&temp32,sizeof(temp32));
memcpy((p + 2*stride),&temp32,sizeof(temp32));
memcpy((p + 3*stride),&temp32,sizeof(temp32));
}else if(w==8 && h==1){
/*
*(uint32_t*)(p + 0)=
*(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
*/
temp32=size==4 ? val : val*0x01010101;
memcpy(p+0,&temp32,sizeof(temp32));
memcpy(p+4,&temp32,sizeof(temp32));
}else if(w==8 && h==2){
/*
*(uint32_t*)(p + 0 + 0*stride)=
*(uint32_t*)(p + 4 + 0*stride)=
*(uint32_t*)(p + 0 + 1*stride)=
*(uint32_t*)(p + 4 + 1*stride)= size==4 ? val : val*0x01010101;
*/
temp32=size==4 ? val : val*0x01010101;
memcpy(p+0+0*stride,&temp32,sizeof(temp32));
memcpy(p+4+0*stride,&temp32,sizeof(temp32));
memcpy(p+0+1*stride,&temp32,sizeof(temp32));
memcpy(p+4+1*stride,&temp32,sizeof(temp32));
}else if(w==8 && h==4){
/*
*(uint64_t*)(p + 0*stride)=
*(uint64_t*)(p + 1*stride)=
*(uint64_t*)(p + 2*stride)=
// *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
*(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001 : val*0x0101010101010101;
*/
val64=val;
v1=0x0100000001;
v2=0x0101010101010101;
temp64=size==4 ? val*v1 : val*v2;
memcpy(p+0*stride,&temp64,sizeof(temp64));
memcpy(p+1*stride,&temp64,sizeof(temp64));
memcpy(p+2*stride,&temp64,sizeof(temp64));
memcpy(p+3*stride,&temp64,sizeof(temp64));
}else if(w==16 && h==2){
/*
*(uint64_t*)(p + 0+0*stride)=
*(uint64_t*)(p + 8+0*stride)=
*(uint64_t*)(p + 0+1*stride)=
// *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
*(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001 : val*0x0101010101010101;
*/
val64=val;
v1=0x0100000001;
v2=0x0101010101010101;
temp64=size==4 ? val*v1 : val*v2;
memcpy(p+0+0*stride,&temp64,sizeof(temp64));
memcpy(p+8+0*stride,&temp64,sizeof(temp64));
memcpy(p+0+1*stride,&temp64,sizeof(temp64));
memcpy(p+8+1*stride,&temp64,sizeof(temp64));
}else if(w==16 && h==4){
/*
*(uint64_t*)(p + 0+0*stride)=
*(uint64_t*)(p + 8+0*stride)=
*(uint64_t*)(p + 0+1*stride)=
*(uint64_t*)(p + 8+1*stride)=
*(uint64_t*)(p + 0+2*stride)=
*(uint64_t*)(p + 8+2*stride)=
*(uint64_t*)(p + 0+3*stride)=
// *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
*(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001 : val*0x0101010101010101;
*/
val64=val;
v1=0x0100000001;
v2=0x0101010101010101;
temp64=size==4 ? val*v1 : val*v2;
memcpy(p+0+0*stride,&temp64,sizeof(temp64));
memcpy(p+8+0*stride,&temp64,sizeof(temp64));
memcpy(p+0+1*stride,&temp64,sizeof(temp64));
memcpy(p+8+1*stride,&temp64,sizeof(temp64));
memcpy(p+0+2*stride,&temp64,sizeof(temp64));
memcpy(p+8+2*stride,&temp64,sizeof(temp64));
memcpy(p+0+3*stride,&temp64,sizeof(temp64));
memcpy(p+8+3*stride,&temp64,sizeof(temp64));
}else
assert(0);
}
static void fill_caches(H264Context *h, int mb_type){
MpegEncContext * const s = &h->s;
const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
int topleft_xy, top_xy, topright_xy, left_xy[2];
int topleft_type, top_type, topright_type, left_type[2];
int left_block[4];
int i;
//wow what a mess, why didnt they simplify the interlacing&intra stuff, i cant imagine that these complex rules are worth it
if(h->sps.mb_aff){
//FIXME
topleft_xy = 0; /* avoid warning */
top_xy = 0; /* avoid warning */
topright_xy = 0; /* avoid warning */
}else{
topleft_xy = mb_xy-1 - s->mb_stride;
top_xy = mb_xy - s->mb_stride;
topright_xy= mb_xy+1 - s->mb_stride;
left_xy[0] = mb_xy-1;
left_xy[1] = mb_xy-1;
left_block[0]= 0;
left_block[1]= 1;
left_block[2]= 2;
left_block[3]= 3;
}
topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
if(IS_INTRA(mb_type)){
h->topleft_samples_available=
h->top_samples_available=
h->left_samples_available= 0xFFFF;
h->topright_samples_available= 0xEEEA;
if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
h->topleft_samples_available= 0xB3FF;
h->top_samples_available= 0x33FF;
h->topright_samples_available= 0x26EA;
}
for(i=0; i<2; i++){
if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
h->topleft_samples_available&= 0xDF5F;
h->left_samples_available&= 0x5F5F;
}
}
if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
h->topleft_samples_available&= 0x7FFF;
if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
h->topright_samples_available&= 0xFBFF;
if(IS_INTRA4x4(mb_type)){
if(IS_INTRA4x4(top_type)){
h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
}else{
int pred;
if(IS_INTRA16x16(top_type) || (IS_INTER(top_type) && !h->pps.constrained_intra_pred))
pred= 2;
else{
pred= -1;
}
h->intra4x4_pred_mode_cache[4+8*0]=
h->intra4x4_pred_mode_cache[5+8*0]=
h->intra4x4_pred_mode_cache[6+8*0]=
h->intra4x4_pred_mode_cache[7+8*0]= pred;
}
for(i=0; i<2; i++){
if(IS_INTRA4x4(left_type[i])){
h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
}else{
int pred;
if(IS_INTRA16x16(left_type[i]) || (IS_INTER(left_type[i]) && !h->pps.constrained_intra_pred))
pred= 2;
else{
pred= -1;
}
h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
}
}
}
}
/*
0 . T T. T T T T
1 L . .L . . . .
2 L . .L . . . .
3 . T TL . . . .
4 L . .L . . . .
5 L . .. . . . .
*/
//FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
if(top_type){
h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][0];
h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][1];
h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][2];
h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][7];
h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][10];
h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
}else{
h->non_zero_count_cache[4+8*0]=
h->non_zero_count_cache[5+8*0]=
h->non_zero_count_cache[6+8*0]=
h->non_zero_count_cache[7+8*0]=
h->non_zero_count_cache[1+8*0]=
h->non_zero_count_cache[2+8*0]=
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -