📄 clipconv_f32_s16.h

📁 Flush解码源程序
💻 H
字号:
/* clip and convert f32 to s16 functions * Copyright (C) 2001,2002  David A. Schleef <ds@schleef.org> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. *//*Kernel: clipconv_f32_s16Description: round source value to nearest integer, clip to output range, and convert to s16.Half integers may be rounded to either nearby integer.*/#ifndef _clipconv_f32_s16_h_#define _clipconv_f32_s16_h_#include <math.h>//#include <sl_types.h>//#include <sl_altivec.h>#define f32 float#define s16 short#define HAVE_IEEE754_H/* storage class */#ifndef SL_clipconv_f32_s16_storage #ifdef SL_storage  #define SL_clipconv_f32_s16_storage SL_storage #else  #define SL_clipconv_f32_s16_storage static inline #endif#endif/* * clipconv_f32_s16: * * Clip source to dest range, round to nearest integer, and * convert to s16. * * Half integers may be rounded to either nearby integer. *//* IMPL clipconv_f32_s16_ref */SL_clipconv_f32_s16_storagevoid clipconv_f32_s16_ref(s16 *dest, f32 *src, int n){	int i;	f32 x;	for(i=0;i<n;i++){		x = src[i];		if(x<-32768.0)x=-32768.0;		if(x>32767.0)x=32767.0;		dest[i] = floor(x + 0.5);	}}#define signbit_s32(x) (((unsigned int)(x))>>31)#ifdef HAVE_IEEE754_H/* This function is inaccurate to _i22, but we're only claiming _i30 */#include <ieee754.h>/* IMPL clipconv_f32_s16_i30_bits defined(HAVE_IEEE754_H) */SL_clipconv_f32_s16_storagevoid clipconv_f32_s16_i30_bits(s16 *dest, f32 *src, int n){	int i;	union ieee754_float id;	s16 d;	const float offset = 98304.5;	for(i=0;i<n;i++){		id.f = offset + src[i];		d = 0x8000 ^ (id.ieee.mantissa >> 7);		d += (-32768-d)*signbit_s32(id.ieee.exponent-143);		d += (32767-d)*signbit_s32(143-id.ieee.exponent);//printf("%g %g exp=%d mant=%d\n",src[i], id.f, id.ieee.exponent, id.ieee.mantissa>>7);#if 0		if(id.ieee.exponent<1039){			d=-32768;		}		if(id.ieee.exponent>1039){			d=32767;		}#endif		dest[i] = d;	}}#endif#if defined(__powerpc__)/* IMPL clipconv_f32_s16_ppcasm defined(__powerpc__) */SL_clipconv_f32_s16_storagevoid clipconv_f32_s16_ppcasm(s16 *dest, f32 *src, int n){	f32 min = -32768.0;	f32 max = 32767.0;	f32 ftmp;	dest--;	src--;#if 0	"\taddic %[src],%[src],-8\n"	"\taddic %[dest],%[dest],-2\n"	".loop:\n"	"\tlfdu %[ftmp0],8(%[src])\n"	"\tfsub %[ftmp1],%[ftmp0],%[min]\n"	"\tfsel %[ftmp0],%[ftmp1],%[ftmp0],%[min]\n"	"\tfsub %[ftmp1],%[ftmp0],%[max]\n"	"\tfsel %[ftmp0],%[ftmp1],%[max],%[ftmp0]\n"	"\tfctiw %[ftmp1],%[ftmp0]\n"	"\taddic. %[n],%[n],-1\n"	"\tstfd %[ftmp1],0(%[tmp])\n"	"\tlhz %[tmp2],6(%[tmp])\n"	"\tsthu %[tmp2],2(%[dest])\n"	"\tbge .loop\n"#endif	__asm__ __volatile__("\n"		"1:	lfsu	0,4(%1)\n"		"	fsub	1,0,%3\n"		"	fsel	0,1,0,%3\n"		"	fsub	1,0,%4\n"		"	fsel	0,1,%4,0\n"		"	fctiw	1,0\n"		"	addic.	%2,%2,-1\n"		"	stfdu	1,0(%5)\n"		"	lhz	11,6(%5)\n"		"	sthu	11,2(%0)\n"		"	bge	1b\n"	: "+b" (dest), "+b" (src), "+r" (n)	: "f" (min), "f" (max), "b" (&ftmp)	: "32", "33", "11" );}/* IMPL clipconv_f32_s16_a16_altivec defined(__powerpc__) */SL_clipconv_f32_s16_storagevoid clipconv_f32_s16_a16_altivec(s16 *dest, f32 *src, int n){	__asm__ __volatile__("\n"		"	addic	%2,%2,-8\n"		"	li	%%r12, 0\n"		"	li	%%r11, 0\n"		"1:	lvx	%%v0, %1, %%r12\n"		"	addi	%%r12, %%r12, 16\n"		"	vrfin	%%v1, %%v0\n"		"	lvx	%%v0, %1, %%r12\n"		"	addi	%%r12, %%r12, 16\n"		"	vrfin	%%v2, %%v0\n"		"	vctsxs	%%v3, %%v1, 0\n"		"	vctsxs	%%v4, %%v2, 0\n"		"	vpkswss	%%v5, %%v3, %%v4\n"		"	stvx	%%v5, %0, %%r11\n"		"	addi	%%r11, %%r11, 16\n"		"	addic.	%2,%2,-8\n"		"	bge 1b\n"	: "+b" (dest), "+b" (src), "+b" (n)	:	: "12", "11"	);}#endif#endif#ifdef TEST_clipconv_f32_s16int TEST_clipconv_f32_s16(void){	int i;	int pass;	int failures = 0;	f32 *src;	s16 *dest_test, *dest_ref;	struct sl_profile_struct t;#ifdef ALIGNED16	src = sl_malloc_f32_a16(N);	dest_ref = sl_malloc_s16_a16(N);	dest_test = sl_malloc_s16_a16(N);#else	src = sl_malloc_f32(N);	dest_ref = sl_malloc_s16(N);	dest_test = sl_malloc_s16(N);#endif	sl_profile_init(t);	srand(20020326);	printf("I: " sl_stringify(clipconv_f32_s16_FUNC) "\n");	for(pass=0;pass<N_PASS;pass++){		for(i=0;i<N;i++){			src[i] = 2.0*sl_rand_f32_s16();		}		clipconv_f32_s16_ref(dest_ref,src,N);		sl_profile_start(t);		clipconv_f32_s16_FUNC(dest_test,src,N);			sl_profile_stop(t);		for(i=0;i<N;i++){			int ok;			if(dest_test[i] == dest_ref[i])continue;#ifdef INACCURATE30			ok = sl_equal_f32_i30(src[i],floor(src[i])+0.5);#else			ok = sl_equal_f32(src[i],floor(src[i])+0.5);#endif			if(!ok){				printf("%d %.10g %d %d\n",i,src[i],dest_ref[i],					dest_test[i]);				failures++;			}		}	}	sl_free(src);	sl_free(dest_ref);	sl_free(dest_test);	if(failures){		printf("E: %d failures\n",failures);	}	sl_profile_print(t);	return failures;}#endif
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -