📄 h264.c

📁 手机端的H264源码
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/*
 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */
 
/**
 * @file h264.c
 * H.264 / AVC / MPEG4 part10 codec.
 * @author Michael Niedermayer <michaelni@gmx.at>
 */

#include "h264.h"

//static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
//static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr);

static  uint32_t pack16to32(int a, int b){
#ifdef WORDS_BIGENDIAN
   return (b&0xFFFF) + (a<<16);
#else
   return (a&0xFFFF) + (b<<16);
#endif
}

/**
 * fill a rectangle.
 * @param h height of the recatangle, should be a constant
 * @param w width of the recatangle, should be a constant
 * @param size the size of val (1 or 4), should be a constant
 */
static  void fill_rectangle_c(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
    /*
	uint8_t *p= (uint8_t*)vp;
    assert(size==1 || size==4);
    
    w      *= size;
    stride *= size;
    
//FIXME check what gcc generates for 64 bit on x86 and possible write a 32 bit ver of it
    if(w==2 && h==2){
        *(uint16_t*)(p + 0)=
        *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
    }else if(w==2 && h==4){
        *(uint16_t*)(p + 0*stride)=
        *(uint16_t*)(p + 1*stride)=
        *(uint16_t*)(p + 2*stride)=
        *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
    }else if(w==4 && h==1){
        *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
    }else if(w==4 && h==2){
        *(uint32_t*)(p + 0*stride)=
        *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
    }else if(w==4 && h==4){
        *(uint32_t*)(p + 0*stride)=
        *(uint32_t*)(p + 1*stride)=
        *(uint32_t*)(p + 2*stride)=
        *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
    }else if(w==8 && h==1){
        *(uint32_t*)(p + 0)=
        *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
    }else if(w==8 && h==2){
        *(uint32_t*)(p + 0 + 0*stride)=
        *(uint32_t*)(p + 4 + 0*stride)=
        *(uint32_t*)(p + 0 + 1*stride)=
        *(uint32_t*)(p + 4 + 1*stride)=  size==4 ? val : val*0x01010101;
    }else if(w==8 && h==4){
        *(uint64_t*)(p + 0*stride)=
        *(uint64_t*)(p + 1*stride)=
        *(uint64_t*)(p + 2*stride)=
        *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
    }else if(w==16 && h==2){
        *(uint64_t*)(p + 0+0*stride)=
        *(uint64_t*)(p + 8+0*stride)=
        *(uint64_t*)(p + 0+1*stride)=
        *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
    }else if(w==16 && h==4){
        *(uint64_t*)(p + 0+0*stride)=
        *(uint64_t*)(p + 8+0*stride)=
        *(uint64_t*)(p + 0+1*stride)=
        *(uint64_t*)(p + 8+1*stride)=
        *(uint64_t*)(p + 0+2*stride)=
        *(uint64_t*)(p + 8+2*stride)=
        *(uint64_t*)(p + 0+3*stride)=
        *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
    }else
        assert(0);
		*/
	uint8_t *p= (uint8_t*)vp;
	uint16_t temp16;
	uint32_t temp32;
	uint64_t temp64;
	__int64 val64,v1,v2;
    assert(size==1 || size==4);
    
    w      *= size;
    stride *= size;
    
//FIXME check what gcc generates for 64 bit on x86 and possible write a 32 bit ver of it
    if(w==2 && h==2){
		/*
        *(uint16_t*)(p + 0)=
        *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
		*/
		temp16=size==4 ? val : val*0x0101;
		memcpy(p+0,&temp16,sizeof(temp16));
		memcpy(p+stride,&temp16,sizeof(temp16));
    }else if(w==2 && h==4){
		/*
        *(uint16_t*)(p + 0*stride)=
        *(uint16_t*)(p + 1*stride)=
        *(uint16_t*)(p + 2*stride)=
        *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
		*/
		temp16=size==4 ? val : val*0x0101;
		memcpy(p+0*stride,&temp16,sizeof(temp16));
		memcpy(p+1*stride,&temp16,sizeof(temp16));
		memcpy(p+2*stride,&temp16,sizeof(temp16));
		memcpy(p+3*stride,&temp16,sizeof(temp16));
    }else if(w==4 && h==1){
		/*
        *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
		*/
		temp32=size==4 ? val : val*0x01010101;
		memcpy(p+0*stride,&temp32,sizeof(temp32));
    }else if(w==4 && h==2){
		/*
        *(uint32_t*)(p + 0*stride)=
        *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
		*/
		temp32=size==4 ? val : val*0x01010101;
		memcpy(p+0*stride,&temp32,sizeof(temp32));
		memcpy(p+1*stride,&temp32,sizeof(temp32));
    }else if(w==4 && h==4){
        /*
        *(uint32_t*)(p + 0*stride)=
        *(uint32_t*)(p + 1*stride)=
        *(uint32_t*)(p + 2*stride)=
        *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
		*/		
		temp32=size==4 ? val : val*0x01010101;
		memcpy((p + 0*stride),&temp32,sizeof(temp32));
		memcpy((p + 1*stride),&temp32,sizeof(temp32));
		memcpy((p + 2*stride),&temp32,sizeof(temp32));
		memcpy((p + 3*stride),&temp32,sizeof(temp32));
    }else if(w==8 && h==1){
		/*
        *(uint32_t*)(p + 0)=
        *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
		*/
		temp32=size==4 ? val : val*0x01010101;
		memcpy(p+0,&temp32,sizeof(temp32));
		memcpy(p+4,&temp32,sizeof(temp32));
    }else if(w==8 && h==2){
		/*
        *(uint32_t*)(p + 0 + 0*stride)=
        *(uint32_t*)(p + 4 + 0*stride)=
        *(uint32_t*)(p + 0 + 1*stride)=
        *(uint32_t*)(p + 4 + 1*stride)=  size==4 ? val : val*0x01010101;
		*/
		temp32=size==4 ? val : val*0x01010101;
		memcpy(p+0+0*stride,&temp32,sizeof(temp32));
		memcpy(p+4+0*stride,&temp32,sizeof(temp32));
		memcpy(p+0+1*stride,&temp32,sizeof(temp32));
		memcpy(p+4+1*stride,&temp32,sizeof(temp32));
    }else if(w==8 && h==4){
		/*
        *(uint64_t*)(p + 0*stride)=
        *(uint64_t*)(p + 1*stride)=
        *(uint64_t*)(p + 2*stride)=
//        *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
        *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001 : val*0x0101010101010101;
		*/
		val64=val;
		v1=0x0100000001;
		v2=0x0101010101010101;
		temp64=size==4 ? val*v1 : val*v2;
		memcpy(p+0*stride,&temp64,sizeof(temp64));
		memcpy(p+1*stride,&temp64,sizeof(temp64));
		memcpy(p+2*stride,&temp64,sizeof(temp64));
		memcpy(p+3*stride,&temp64,sizeof(temp64));
    }else if(w==16 && h==2){
		/*
        *(uint64_t*)(p + 0+0*stride)=
        *(uint64_t*)(p + 8+0*stride)=
        *(uint64_t*)(p + 0+1*stride)=
//        *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
        *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001 : val*0x0101010101010101;
		*/
		val64=val;
		v1=0x0100000001;
		v2=0x0101010101010101;
		temp64=size==4 ? val*v1 : val*v2;
		memcpy(p+0+0*stride,&temp64,sizeof(temp64));
		memcpy(p+8+0*stride,&temp64,sizeof(temp64));
		memcpy(p+0+1*stride,&temp64,sizeof(temp64));
		memcpy(p+8+1*stride,&temp64,sizeof(temp64));
    }else if(w==16 && h==4){
		/*
        *(uint64_t*)(p + 0+0*stride)=
        *(uint64_t*)(p + 8+0*stride)=
        *(uint64_t*)(p + 0+1*stride)=
        *(uint64_t*)(p + 8+1*stride)=
        *(uint64_t*)(p + 0+2*stride)=
        *(uint64_t*)(p + 8+2*stride)=
        *(uint64_t*)(p + 0+3*stride)=
//        *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
        *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001 : val*0x0101010101010101;
		*/
		val64=val;
		v1=0x0100000001;
		v2=0x0101010101010101;
		temp64=size==4 ? val*v1 : val*v2;
		memcpy(p+0+0*stride,&temp64,sizeof(temp64));
		memcpy(p+8+0*stride,&temp64,sizeof(temp64));
		memcpy(p+0+1*stride,&temp64,sizeof(temp64));
		memcpy(p+8+1*stride,&temp64,sizeof(temp64));
		memcpy(p+0+2*stride,&temp64,sizeof(temp64));
		memcpy(p+8+2*stride,&temp64,sizeof(temp64));
		memcpy(p+0+3*stride,&temp64,sizeof(temp64));
		memcpy(p+8+3*stride,&temp64,sizeof(temp64));
    }else
        assert(0);
}

static  void fill_caches(H264Context *h, int mb_type){
    MpegEncContext * const s = &h->s;
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
    int topleft_xy, top_xy, topright_xy, left_xy[2];
    int topleft_type, top_type, topright_type, left_type[2];
    int left_block[4];
    int i;

    //wow what a mess, why didnt they simplify the interlacing&intra stuff, i cant imagine that these complex rules are worth it 
    
    if(h->sps.mb_aff){
    //FIXME
        topleft_xy = 0; /* avoid warning */
        top_xy = 0; /* avoid warning */
        topright_xy = 0; /* avoid warning */
    }else{
        topleft_xy = mb_xy-1 - s->mb_stride;
        top_xy     = mb_xy   - s->mb_stride;
        topright_xy= mb_xy+1 - s->mb_stride;
        left_xy[0]   = mb_xy-1;
        left_xy[1]   = mb_xy-1;
        left_block[0]= 0;
        left_block[1]= 1;
        left_block[2]= 2;
        left_block[3]= 3;
    }

    topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
    top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
    topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
    left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
    left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;

    if(IS_INTRA(mb_type)){
        h->topleft_samples_available= 
        h->top_samples_available= 
        h->left_samples_available= 0xFFFF;
        h->topright_samples_available= 0xEEEA;

        if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
            h->topleft_samples_available= 0xB3FF;
            h->top_samples_available= 0x33FF;
            h->topright_samples_available= 0x26EA;
        }
        for(i=0; i<2; i++){
            if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
                h->topleft_samples_available&= 0xDF5F;
                h->left_samples_available&= 0x5F5F;
            }
        }
        
        if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
            h->topleft_samples_available&= 0x7FFF;
        
        if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
            h->topright_samples_available&= 0xFBFF;
    
        if(IS_INTRA4x4(mb_type)){
            if(IS_INTRA4x4(top_type)){
                h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
                h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
                h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
                h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
            }else{
                int pred;
                if(IS_INTRA16x16(top_type) || (IS_INTER(top_type) && !h->pps.constrained_intra_pred))
                    pred= 2;
                else{
                    pred= -1;
                }
                h->intra4x4_pred_mode_cache[4+8*0]=
                h->intra4x4_pred_mode_cache[5+8*0]=
                h->intra4x4_pred_mode_cache[6+8*0]=
                h->intra4x4_pred_mode_cache[7+8*0]= pred;
            }
            for(i=0; i<2; i++){
                if(IS_INTRA4x4(left_type[i])){
                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
                }else{
                    int pred;
                    if(IS_INTRA16x16(left_type[i]) || (IS_INTER(left_type[i]) && !h->pps.constrained_intra_pred))
                        pred= 2;
                    else{
                        pred= -1;
                    }
                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
                }
            }
        }
    }
    
    
/*
0 . T T. T T T T 
1 L . .L . . . . 
2 L . .L . . . . 
3 . T TL . . . . 
4 L . .L . . . . 
5 L . .. . . . . 
*/
//FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
    if(top_type){
        h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][0];
        h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][1];
        h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][2];
        h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
    
        h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][7];
        h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
    
        h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][10];
        h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
    }else{
        h->non_zero_count_cache[4+8*0]=      
        h->non_zero_count_cache[5+8*0]=
        h->non_zero_count_cache[6+8*0]=
        h->non_zero_count_cache[7+8*0]=
    
        h->non_zero_count_cache[1+8*0]=
        h->non_zero_count_cache[2+8*0]=
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -