📄 rgb2rgb_template.c

📁 ffmpeg的完整源代码和作者自己写的文档。不但有在Linux的工程哦
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/*
 *  rgb2rgb.c, Software RGB to RGB convertor
 *  pluralize by Software PAL8 to RGB convertor
 *               Software YUV to YUV convertor
 *               Software YUV to RGB convertor
 *  Written by Nick Kurshev.
 *  palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
 *  lot of big-endian byteorder fixes by Alex Beregszaszi
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 *
 * the C code (not assembly, mmx, ...) of this file can be used
 * under the LGPL license too
 */

#include <stddef.h>
#include <inttypes.h> /* for __WORDSIZE */

#ifndef __WORDSIZE
// #warning You have misconfigured system and probably will lose performance!
#define __WORDSIZE MP_WORDSIZE
#endif

#undef PREFETCH
#undef MOVNTQ
#undef EMMS
#undef SFENCE
#undef MMREG_SIZE
#undef PREFETCHW
#undef PAVGB

#ifdef HAVE_SSE2
#define MMREG_SIZE 16
#else
#define MMREG_SIZE 8
#endif

#ifdef HAVE_3DNOW
#define PREFETCH  "prefetch"
#define PREFETCHW "prefetchw"
#define PAVGB     "pavgusb"
#elif defined ( HAVE_MMX2 )
#define PREFETCH "prefetchnta"
#define PREFETCHW "prefetcht0"
#define PAVGB     "pavgb"
#else
#ifdef __APPLE__
#define PREFETCH "#"
#define PREFETCHW "#"
#else
#define PREFETCH  " # nop"
#define PREFETCHW " # nop"
#endif
#endif

#ifdef HAVE_3DNOW
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
#define EMMS     "femms"
#else
#define EMMS     "emms"
#endif

#ifdef HAVE_MMX2
#define MOVNTQ "movntq"
#define SFENCE "sfence"
#else
#define MOVNTQ "movq"
#define SFENCE " # nop"
#endif

static inline void RENAME(rgb24to32)(const uint8_t *src,uint8_t *dst,long src_size)
{
    uint8_t *dest = dst;
    const uint8_t *s = src;
    const uint8_t *end;
    #ifdef HAVE_MMX
        const uint8_t *mm_end;
    #endif
    end = s + src_size;
    #ifdef HAVE_MMX
        __asm __volatile(PREFETCH"    %0"::"m"(*s):"memory");
        mm_end = end - 23;
        __asm __volatile("movq        %0, %%mm7"::"m"(mask32):"memory");
        while (s < mm_end)
        {
            __asm __volatile(
            PREFETCH"    32%1           \n\t"
            "movd          %1, %%mm0    \n\t"
            "punpckldq    3%1, %%mm0    \n\t"
            "movd         6%1, %%mm1    \n\t"
            "punpckldq    9%1, %%mm1    \n\t"
            "movd        12%1, %%mm2    \n\t"
            "punpckldq   15%1, %%mm2    \n\t"
            "movd        18%1, %%mm3    \n\t"
            "punpckldq   21%1, %%mm3    \n\t"
            "pand       %%mm7, %%mm0    \n\t"
            "pand       %%mm7, %%mm1    \n\t"
            "pand       %%mm7, %%mm2    \n\t"
            "pand       %%mm7, %%mm3    \n\t"
            MOVNTQ"     %%mm0,   %0     \n\t"
            MOVNTQ"     %%mm1,  8%0     \n\t"
            MOVNTQ"     %%mm2, 16%0     \n\t"
            MOVNTQ"     %%mm3, 24%0"
            :"=m"(*dest)
            :"m"(*s)
            :"memory");
            dest += 32;
            s += 24;
        }
        __asm __volatile(SFENCE:::"memory");
        __asm __volatile(EMMS:::"memory");
    #endif
    while (s < end)
    {
    #ifdef WORDS_BIGENDIAN
        /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
        *dest++ = 0;
        *dest++ = s[2];
        *dest++ = s[1];
        *dest++ = s[0];
        s+=3;
    #else
        *dest++ = *s++;
        *dest++ = *s++;
        *dest++ = *s++;
        *dest++ = 0;
    #endif
    }
}

static inline void RENAME(rgb32to24)(const uint8_t *src,uint8_t *dst,long src_size)
{
    uint8_t *dest = dst;
    const uint8_t *s = src;
    const uint8_t *end;
#ifdef HAVE_MMX
    const uint8_t *mm_end;
#endif
    end = s + src_size;
#ifdef HAVE_MMX
    __asm __volatile(PREFETCH"    %0"::"m"(*s):"memory");
    mm_end = end - 31;
    while (s < mm_end)
    {
        __asm __volatile(
        PREFETCH"    32%1           \n\t"
        "movq          %1, %%mm0    \n\t"
        "movq         8%1, %%mm1    \n\t"
        "movq        16%1, %%mm4    \n\t"
        "movq        24%1, %%mm5    \n\t"
        "movq       %%mm0, %%mm2    \n\t"
        "movq       %%mm1, %%mm3    \n\t"
        "movq       %%mm4, %%mm6    \n\t"
        "movq       %%mm5, %%mm7    \n\t"
        "psrlq         $8, %%mm2    \n\t"
        "psrlq         $8, %%mm3    \n\t"
        "psrlq         $8, %%mm6    \n\t"
        "psrlq         $8, %%mm7    \n\t"
        "pand          %2, %%mm0    \n\t"
        "pand          %2, %%mm1    \n\t"
        "pand          %2, %%mm4    \n\t"
        "pand          %2, %%mm5    \n\t"
        "pand          %3, %%mm2    \n\t"
        "pand          %3, %%mm3    \n\t"
        "pand          %3, %%mm6    \n\t"
        "pand          %3, %%mm7    \n\t"
        "por        %%mm2, %%mm0    \n\t"
        "por        %%mm3, %%mm1    \n\t"
        "por        %%mm6, %%mm4    \n\t"
        "por        %%mm7, %%mm5    \n\t"

        "movq       %%mm1, %%mm2    \n\t"
        "movq       %%mm4, %%mm3    \n\t"
        "psllq        $48, %%mm2    \n\t"
        "psllq        $32, %%mm3    \n\t"
        "pand          %4, %%mm2    \n\t"
        "pand          %5, %%mm3    \n\t"
        "por        %%mm2, %%mm0    \n\t"
        "psrlq        $16, %%mm1    \n\t"
        "psrlq        $32, %%mm4    \n\t"
        "psllq        $16, %%mm5    \n\t"
        "por        %%mm3, %%mm1    \n\t"
        "pand          %6, %%mm5    \n\t"
        "por        %%mm5, %%mm4    \n\t"

        MOVNTQ"     %%mm0,   %0     \n\t"
        MOVNTQ"     %%mm1,  8%0     \n\t"
        MOVNTQ"     %%mm4, 16%0"
        :"=m"(*dest)
        :"m"(*s),"m"(mask24l),
         "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
        :"memory");
        dest += 24;
        s += 32;
    }
    __asm __volatile(SFENCE:::"memory");
    __asm __volatile(EMMS:::"memory");
#endif
    while (s < end)
    {
#ifdef WORDS_BIGENDIAN
        /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
        s++;
        dest[2] = *s++;
        dest[1] = *s++;
        dest[0] = *s++;
        dest += 3;
#else
        *dest++ = *s++;
        *dest++ = *s++;
        *dest++ = *s++;
        s++;
#endif
    }
}

/*
 Original by Strepto/Astral
 ported to gcc & bugfixed : A'rpi
 MMX2, 3DNOW optimization by Nick Kurshev
 32bit c version, and and&add trick by Michael Niedermayer
*/
static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,long src_size)
{
    register const uint8_t* s=src;
    register uint8_t* d=dst;
    register const uint8_t *end;
    const uint8_t *mm_end;
    end = s + src_size;
#ifdef HAVE_MMX
    __asm __volatile(PREFETCH"    %0"::"m"(*s));
    __asm __volatile("movq        %0, %%mm4"::"m"(mask15s));
    mm_end = end - 15;
    while (s<mm_end)
    {
        __asm __volatile(
        PREFETCH"  32%1         \n\t"
        "movq        %1, %%mm0  \n\t"
        "movq       8%1, %%mm2  \n\t"
        "movq     %%mm0, %%mm1  \n\t"
        "movq     %%mm2, %%mm3  \n\t"
        "pand     %%mm4, %%mm0  \n\t"
        "pand     %%mm4, %%mm2  \n\t"
        "paddw    %%mm1, %%mm0  \n\t"
        "paddw    %%mm3, %%mm2  \n\t"
        MOVNTQ"   %%mm0,  %0    \n\t"
        MOVNTQ"   %%mm2, 8%0"
        :"=m"(*d)
        :"m"(*s)
        );
        d+=16;
        s+=16;
    }
    __asm __volatile(SFENCE:::"memory");
    __asm __volatile(EMMS:::"memory");
#endif
    mm_end = end - 3;
    while (s < mm_end)
    {
        register unsigned x= *((uint32_t *)s);
        *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
        d+=4;
        s+=4;
    }
    if (s < end)
    {
        register unsigned short x= *((uint16_t *)s);
        *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
    }
}

static inline void RENAME(rgb16to15)(const uint8_t *src,uint8_t *dst,long src_size)
{
    register const uint8_t* s=src;
    register uint8_t* d=dst;
    register const uint8_t *end;
    const uint8_t *mm_end;
    end = s + src_size;
#ifdef HAVE_MMX
    __asm __volatile(PREFETCH"    %0"::"m"(*s));
    __asm __volatile("movq        %0, %%mm7"::"m"(mask15rg));
    __asm __volatile("movq        %0, %%mm6"::"m"(mask15b));
    mm_end = end - 15;
    while (s<mm_end)
    {
        __asm __volatile(
        PREFETCH"  32%1         \n\t"
        "movq        %1, %%mm0  \n\t"
        "movq       8%1, %%mm2  \n\t"
        "movq     %%mm0, %%mm1  \n\t"
        "movq     %%mm2, %%mm3  \n\t"
        "psrlq       $1, %%mm0  \n\t"
        "psrlq       $1, %%mm2  \n\t"
        "pand     %%mm7, %%mm0  \n\t"
        "pand     %%mm7, %%mm2  \n\t"
        "pand     %%mm6, %%mm1  \n\t"
        "pand     %%mm6, %%mm3  \n\t"
        "por      %%mm1, %%mm0  \n\t"
        "por      %%mm3, %%mm2  \n\t"
        MOVNTQ"   %%mm0,  %0    \n\t"
        MOVNTQ"   %%mm2, 8%0"
        :"=m"(*d)
        :"m"(*s)
        );
        d+=16;
        s+=16;
    }
    __asm __volatile(SFENCE:::"memory");
    __asm __volatile(EMMS:::"memory");
#endif
    mm_end = end - 3;
    while (s < mm_end)
    {
        register uint32_t x= *((uint32_t *)s);
        *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
        s+=4;
        d+=4;
    }
    if (s < end)
    {
        register uint16_t x= *((uint16_t *)s);
        *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
        s+=2;
        d+=2;
    }
}

static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
{
    const uint8_t *s = src;
    const uint8_t *end;
#ifdef HAVE_MMX
    const uint8_t *mm_end;
#endif
    uint16_t *d = (uint16_t *)dst;
    end = s + src_size;
#ifdef HAVE_MMX
    mm_end = end - 15;
#if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
    asm volatile(
    "movq           %3, %%mm5   \n\t"
    "movq           %4, %%mm6   \n\t"
    "movq           %5, %%mm7   \n\t"
    "jmp 2f                     \n\t"
    ASMALIGN(4)
    "1:                         \n\t"
    PREFETCH"   32(%1)          \n\t"
    "movd         (%1), %%mm0   \n\t"
    "movd        4(%1), %%mm3   \n\t"
    "punpckldq   8(%1), %%mm0   \n\t"
    "punpckldq  12(%1), %%mm3   \n\t"
    "movq        %%mm0, %%mm1   \n\t"
    "movq        %%mm3, %%mm4   \n\t"
    "pand        %%mm6, %%mm0   \n\t"
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -