📄 rleaccel.c
字号:
/*** $Id: RLEaccel.c,v 1.4 2003/11/22 04:44:14 weiym Exp $** ** Port to MiniGUI by Wei Yongming (2001/10/03).** Copyright (C) 2001 ~ 2002 Wei Yongming.** Copyright (C) 2003 Feynman Software.**** SDL - Simple DirectMedia Layer** Copyright (C) 1997, 1998, 1999, 2000, 2001 Sam Lantinga*//*** This program is free software; you can redistribute it and/or modify** it under the terms of the GNU General Public License as published by** the Free Software Foundation; either version 2 of the License, or** (at your option) any later version.**** This program is distributed in the hope that it will be useful,** but WITHOUT ANY WARRANTY; without even the implied warranty of** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the** GNU General Public License for more details.**** You should have received a copy of the GNU General Public License** along with this program; if not, write to the Free Software** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*//* * RLE encoding for software colorkey and alpha-channel acceleration * * Original version by Sam Lantinga * * Mattias Engdeg錼d (Yorick): Rewrite. New encoding format, encoder and * decoder. Added per-surface alpha blitter. Added per-pixel alpha * format, encoder and blitter. * * Many thanks to Xark and johns for hints, benchmarks and useful comments * leading to this code. * * Welcome to Macro Mayhem. *//* * The encoding translates the image data to a stream of segments of the form * * <skip> <run> <data> * * where <skip> is the number of transparent pixels to skip, * <run> is the number of opaque pixels to blit, * and <data> are the pixels themselves. * * This basic structure is used both for colorkeyed surfaces, used for simple * binary transparency and for per-surface alpha blending, and for surfaces * with per-pixel alpha. The details differ, however: * * Encoding of colorkeyed surfaces: * * Encoded pixels always have the same format as the target surface. * <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth * where they are 16 bit. This makes the pixel data aligned at all times. * Segments never wrap around from one scan line to the next. * * The end of the sequence is marked by a zero <skip>,<run> pair at the * * beginning of a line. * * Encoding of surfaces with per-pixel alpha: * * The sequence begins with a struct RLEDestFormat describing the target * pixel format, to provide reliable un-encoding. * * Each scan line is encoded twice: First all completely opaque pixels, * encoded in the target format as described above, and then all * partially transparent (translucent) pixels (where 1 <= alpha <= 254), * in the following 32-bit format: * * For 32-bit targets, each pixel has the target RGB format but with * the alpha value occupying the highest 8 bits. The <skip> and <run> * counts are 16 bit. * * For 16-bit targets, each pixel has the target RGB format, but with * the middle component (usually green) shifted 16 steps to the left, * and the hole filled with the 5 most significant bits of the alpha value. * i.e. if the target has the format rrrrrggggggbbbbb, * the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb. * The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit * for the translucent lines. Two padding bytes may be inserted * before each translucent line to keep them 32-bit aligned. * * The end of the sequence is marked by a zero <skip>,<run> pair at the * beginning of an opaque line. */#include <stdio.h>#include <stdlib.h>#include <string.h>#include "common.h"#include "newgal.h"#include "sysvideo.h"#include "blit.h"#include "memops.h"#include "RLEaccel_c.h"#define PIXEL_COPY(to, from, len, bpp) \do { \ if(bpp == 4) { \ GAL_memcpy4(to, from, (unsigned)(len)); \ } else { \ GAL_memcpy(to, from, (unsigned)(len) * (bpp)); \ } \} while(0)/* * Various colorkey blit methods, for opaque and per-surface alpha */#define OPAQUE_BLIT(to, from, length, bpp, alpha) \ PIXEL_COPY(to, from, length, bpp)/* * For 32bpp pixels on the form 0x00rrggbb: * If we treat the middle component separately, we can process the two * remaining in parallel. This is safe to do because of the gap to the left * of each component, so the bits from the multiplication don't collide. * This can be used for any RGB permutation of course. */#define ALPHA_BLIT32_888(to, from, length, bpp, alpha) \ do { \ int i; \ Uint32 *src = (Uint32 *)(from); \ Uint32 *dst = (Uint32 *)(to); \ for(i = 0; i < (int)(length); i++) { \ Uint32 s = *src++; \ Uint32 d = *dst; \ Uint32 s1 = s & 0xff00ff; \ Uint32 d1 = d & 0xff00ff; \ d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \ s &= 0xff00; \ d &= 0xff00; \ d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ *dst++ = d1 | d; \ } \ } while(0)/* * For 16bpp pixels we can go a step further: put the middle component * in the high 16 bits of a 32 bit word, and process all three RGB * components at the same time. Since the smallest gap is here just * 5 bits, we have to scale alpha down to 5 bits as well. */#define ALPHA_BLIT16_565(to, from, length, bpp, alpha) \ do { \ int i; \ Uint16 *src = (Uint16 *)(from); \ Uint16 *dst = (Uint16 *)(to); \ for(i = 0; i < (int)(length); i++) { \ Uint32 s = *src++; \ Uint32 d = *dst; \ s = (s | s << 16) & 0x07e0f81f; \ d = (d | d << 16) & 0x07e0f81f; \ d += (s - d) * alpha >> 5; \ d &= 0x07e0f81f; \ *dst++ = d | d >> 16; \ } \ } while(0)#define ALPHA_BLIT16_555(to, from, length, bpp, alpha) \ do { \ int i; \ Uint16 *src = (Uint16 *)(from); \ Uint16 *dst = (Uint16 *)(to); \ for(i = 0; i < (int)(length); i++) { \ Uint32 s = *src++; \ Uint32 d = *dst; \ s = (s | s << 16) & 0x03e07c1f; \ d = (d | d << 16) & 0x03e07c1f; \ d += (s - d) * alpha >> 5; \ d &= 0x03e07c1f; \ *dst++ = d | d >> 16; \ } \ } while(0)/* * The general slow catch-all function, for remaining depths and formats */#define ALPHA_BLIT_ANY(to, from, length, bpp, alpha) \ do { \ int i; \ Uint8 *src = from; \ Uint8 *dst = to; \ for(i = 0; i < (int)(length); i++) { \ Uint32 s, d; \ unsigned rs, gs, bs, rd, gd, bd; \ switch(bpp) { \ case 2: \ s = *(Uint16 *)src; \ d = *(Uint16 *)dst; \ break; \ case 3: \ if(GAL_BYTEORDER == GAL_BIG_ENDIAN) { \ s = (src[0] << 16) | (src[1] << 8) | src[2]; \ d = (dst[0] << 16) | (dst[1] << 8) | dst[2]; \ } else { \ s = (src[2] << 16) | (src[1] << 8) | src[0]; \ d = (dst[2] << 16) | (dst[1] << 8) | dst[0]; \ } \ break; \ case 4: \ s = *(Uint32 *)src; \ d = *(Uint32 *)dst; \ break; \ } \ RGB_FROM_PIXEL(s, fmt, rs, gs, bs); \ RGB_FROM_PIXEL(d, fmt, rd, gd, bd); \ rd += (rs - rd) * alpha >> 8; \ gd += (gs - gd) * alpha >> 8; \ bd += (bs - bd) * alpha >> 8; \ PIXEL_FROM_RGB(d, fmt, rd, gd, bd); \ switch(bpp) { \ case 2: \ *(Uint16 *)dst = d; \ break; \ case 3: \ if(GAL_BYTEORDER == GAL_BIG_ENDIAN) { \ dst[0] = d >> 16; \ dst[1] = d >> 8; \ dst[2] = d; \ } else { \ dst[0] = d; \ dst[1] = d >> 8; \ dst[2] = d >> 16; \ } \ break; \ case 4: \ *(Uint32 *)dst = d; \ break; \ } \ src += bpp; \ dst += bpp; \ } \ } while(0)/* * Special case: 50% alpha (alpha=128) * This is treated specially because it can be optimized very well, and * since it is good for many cases of semi-translucency. * The theory is to do all three components at the same time: * First zero the lowest bit of each component, which gives us room to * add them. Then shift right and add the sum of the lowest bits. */#define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha) \ do { \ int i; \ Uint32 *src = (Uint32 *)(from); \ Uint32 *dst = (Uint32 *)(to); \ for(i = 0; i < (int)(length); i++) { \ Uint32 s = *src++; \ Uint32 d = *dst; \ *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \ + (s & d & 0x00010101); \ } \ } while(0)/* * For 16bpp, we can actually blend two pixels in parallel, if we take * care to shift before we add, not after. *//* helper: blend a single 16 bit pixel at 50% */#define BLEND16_50(dst, src, mask) \ do { \ Uint32 s = *src++; \ Uint32 d = *dst; \ *dst++ = (((s & mask) + (d & mask)) >> 1) \ + (s & d & (~mask & 0xffff)); \ } while(0)/* basic 16bpp blender. mask is the pixels to keep when adding. */#define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask) \ do { \ unsigned n = (length); \ Uint16 *src = (Uint16 *)(from); \ Uint16 *dst = (Uint16 *)(to); \ if(((unsigned long)src ^ (unsigned long)dst) & 3) { \ /* source and destination not in phase, blit one by one */ \ while(n--) \ BLEND16_50(dst, src, mask); \ } else { \ if((unsigned long)src & 3) { \ /* first odd pixel */ \ BLEND16_50(dst, src, mask); \ n--; \ } \ for(; n > 1; n -= 2) { \ Uint32 s = *(Uint32 *)src; \ Uint32 d = *(Uint32 *)dst; \
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -