📄 imgresample.c

📁 F:图像处理资料264264书籍ffmpeg-0.4.9-pre1VideoStream.rar 一个视频解压缩源代码
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * High quality image resampling with polyphase filters  * Copyright (c) 2001 Fabrice Bellard. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */ /** * @file imgresample.c * High quality image resampling with polyphase filters . */ #include "avcodec.h"#include "dsputil.h"#ifdef USE_FASTMEMCPY#include "fastmemcpy.h"#endif#define NB_COMPONENTS 3#define PHASE_BITS 4#define NB_PHASES  (1 << PHASE_BITS)#define NB_TAPS    4#define FCENTER    1  /* index of the center of the filter *///#define TEST    1  /* Test it */#define POS_FRAC_BITS 16#define POS_FRAC      (1 << POS_FRAC_BITS)/* 6 bits precision is needed for MMX */#define FILTER_BITS   8#define LINE_BUF_HEIGHT (NB_TAPS * 4)struct ImgReSampleContext {    int iwidth, iheight, owidth, oheight;    int topBand, bottomBand, leftBand, rightBand;    int padtop, padbottom, padleft, padright;    int pad_owidth, pad_oheight;    int h_incr, v_incr;    int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */    int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */    uint8_t *line_buf;};void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);static inline int get_phase(int pos){    return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);}/* This function must be optimized */static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,			    int src_width, int src_start, int src_incr,			    int16_t *filters){    int src_pos, phase, sum, i;    const uint8_t *s;    int16_t *filter;    src_pos = src_start;    for(i=0;i<dst_width;i++) {#ifdef TEST        /* test */        if ((src_pos >> POS_FRAC_BITS) < 0 ||            (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))            av_abort();#endif        s = src + (src_pos >> POS_FRAC_BITS);        phase = get_phase(src_pos);        filter = filters + phase * NB_TAPS;#if NB_TAPS == 4        sum = s[0] * filter[0] +            s[1] * filter[1] +            s[2] * filter[2] +            s[3] * filter[3];#else        {            int j;            sum = 0;            for(j=0;j<NB_TAPS;j++)                sum += s[j] * filter[j];        }#endif        sum = sum >> FILTER_BITS;        if (sum < 0)            sum = 0;        else if (sum > 255)            sum = 255;        dst[0] = sum;        src_pos += src_incr;        dst++;    }}/* This function must be optimized */static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,		       int wrap, int16_t *filter){    int sum, i;    const uint8_t *s;    s = src;    for(i=0;i<dst_width;i++) {#if NB_TAPS == 4        sum = s[0 * wrap] * filter[0] +            s[1 * wrap] * filter[1] +            s[2 * wrap] * filter[2] +            s[3 * wrap] * filter[3];#else        {            int j;            uint8_t *s1 = s;            sum = 0;            for(j=0;j<NB_TAPS;j++) {                sum += s1[0] * filter[j];                s1 += wrap;            }        }#endif        sum = sum >> FILTER_BITS;        if (sum < 0)            sum = 0;        else if (sum > 255)            sum = 255;        dst[0] = sum;        dst++;        s++;    }}#ifdef HAVE_MMX#include "i386/mmx.h"#define FILTER4(reg) \{\        s = src + (src_pos >> POS_FRAC_BITS);\        phase = get_phase(src_pos);\        filter = filters + phase * NB_TAPS;\        movq_m2r(*s, reg);\        punpcklbw_r2r(mm7, reg);\        movq_m2r(*filter, mm6);\        pmaddwd_r2r(reg, mm6);\        movq_r2r(mm6, reg);\        psrlq_i2r(32, reg);\        paddd_r2r(mm6, reg);\        psrad_i2r(FILTER_BITS, reg);\        src_pos += src_incr;\}#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);/* XXX: do four pixels at a time */static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,				 const uint8_t *src, int src_width,                                 int src_start, int src_incr, int16_t *filters){    int src_pos, phase;    const uint8_t *s;    int16_t *filter;    mmx_t tmp;        src_pos = src_start;    pxor_r2r(mm7, mm7);    while (dst_width >= 4) {        FILTER4(mm0);        FILTER4(mm1);        FILTER4(mm2);        FILTER4(mm3);        packuswb_r2r(mm7, mm0);        packuswb_r2r(mm7, mm1);        packuswb_r2r(mm7, mm3);        packuswb_r2r(mm7, mm2);        movq_r2m(mm0, tmp);        dst[0] = tmp.ub[0];        movq_r2m(mm1, tmp);        dst[1] = tmp.ub[0];        movq_r2m(mm2, tmp);        dst[2] = tmp.ub[0];        movq_r2m(mm3, tmp);        dst[3] = tmp.ub[0];        dst += 4;        dst_width -= 4;    }    while (dst_width > 0) {        FILTER4(mm0);        packuswb_r2r(mm7, mm0);        movq_r2m(mm0, tmp);        dst[0] = tmp.ub[0];        dst++;        dst_width--;    }    emms();}static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,			    int wrap, int16_t *filter){    int sum, i, v;    const uint8_t *s;    mmx_t tmp;    mmx_t coefs[4];        for(i=0;i<4;i++) {        v = filter[i];        coefs[i].uw[0] = v;        coefs[i].uw[1] = v;        coefs[i].uw[2] = v;        coefs[i].uw[3] = v;    }        pxor_r2r(mm7, mm7);    s = src;    while (dst_width >= 4) {        movq_m2r(s[0 * wrap], mm0);        punpcklbw_r2r(mm7, mm0);        movq_m2r(s[1 * wrap], mm1);        punpcklbw_r2r(mm7, mm1);        movq_m2r(s[2 * wrap], mm2);        punpcklbw_r2r(mm7, mm2);        movq_m2r(s[3 * wrap], mm3);        punpcklbw_r2r(mm7, mm3);        pmullw_m2r(coefs[0], mm0);        pmullw_m2r(coefs[1], mm1);        pmullw_m2r(coefs[2], mm2);        pmullw_m2r(coefs[3], mm3);        paddw_r2r(mm1, mm0);        paddw_r2r(mm3, mm2);        paddw_r2r(mm2, mm0);        psraw_i2r(FILTER_BITS, mm0);                packuswb_r2r(mm7, mm0);        movq_r2m(mm0, tmp);        *(uint32_t *)dst = tmp.ud[0];        dst += 4;        s += 4;        dst_width -= 4;    }    while (dst_width > 0) {        sum = s[0 * wrap] * filter[0] +            s[1 * wrap] * filter[1] +            s[2 * wrap] * filter[2] +            s[3 * wrap] * filter[3];        sum = sum >> FILTER_BITS;        if (sum < 0)            sum = 0;        else if (sum > 255)            sum = 255;        dst[0] = sum;        dst++;        s++;        dst_width--;    }    emms();}#endif#ifdef HAVE_ALTIVECtypedef	union {    vector unsigned char v;    unsigned char c[16];} vec_uc_t;typedef	union {    vector signed short v;    signed short s[8];} vec_ss_t;void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,			  int wrap, int16_t *filter){    int sum, i;    const uint8_t *s;    vector unsigned char *tv, tmp, dstv, zero;    vec_ss_t srchv[4], srclv[4], fv[4];    vector signed short zeros, sumhv, sumlv;        s = src;    for(i=0;i<4;i++)    {        /*           The vec_madds later on does an implicit >>15 on the result.           Since FILTER_BITS is 8, and we have 15 bits of magnitude in           a signed short, we have just enough bits to pre-shift our           filter constants <<7 to compensate for vec_madds.        */        fv[i].s[0] = filter[i] << (15-FILTER_BITS);        fv[i].v = vec_splat(fv[i].v, 0);    }        zero = vec_splat_u8(0);    zeros = vec_splat_s16(0);    /*       When we're resampling, we'd ideally like both our input buffers,       and output buffers to be 16-byte aligned, so we can do both aligned       reads and writes. Sadly we can't always have this at the moment, so       we opt for aligned writes, as unaligned writes have a huge overhead.       To do this, do enough scalar resamples to get dst 16-byte aligned.    */    i = (-(int)dst) & 0xf;    while(i>0) {        sum = s[0 * wrap] * filter[0] +        s[1 * wrap] * filter[1] +        s[2 * wrap] * filter[2] +        s[3 * wrap] * filter[3];        sum = sum >> FILTER_BITS;        if (sum<0) sum = 0; else if (sum>255) sum=255;        dst[0] = sum;        dst++;        s++;        dst_width--;        i--;    }        /* Do our altivec resampling on 16 pixels at once. */    while(dst_width>=16) {        /*           Read 16 (potentially unaligned) bytes from each of           4 lines into 4 vectors, and split them into shorts.           Interleave the multipy/accumulate for the resample           filter with the loads to hide the 3 cycle latency           the vec_madds have.        */        tv = (vector unsigned char *) &s[0 * wrap];        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));        srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);        srclv[0].v = (vector signed short) vec_mergel(zero, tmp);        sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);        sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);        tv = (vector unsigned char *) &s[1 * wrap];        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));        srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);        srclv[1].v = (vector signed short) vec_mergel(zero, tmp);        sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);        sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);        tv = (vector unsigned char *) &s[2 * wrap];        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));        srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);        srclv[2].v = (vector signed short) vec_mergel(zero, tmp);        sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);        sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);        tv = (vector unsigned char *) &s[3 * wrap];        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));        srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);        srclv[3].v = (vector signed short) vec_mergel(zero, tmp);        sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);        sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);            /*           Pack the results into our destination vector,           and do an aligned write of that back to memory.        */        dstv = vec_packsu(sumhv, sumlv) ;        vec_st(dstv, 0, (vector unsigned char *) dst);                dst+=16;        s+=16;        dst_width-=16;    }    /*
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -