⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 r300_fragprog.c

📁 mesa-6.5-minigui源码
💻 C
📖 第 1 页 / 共 3 页
字号:
		ERROR("unknown SrcReg->File %x\n", fpsrc.File);		return r;	}	/* no point swizzling ONE/ZERO/HALF constants... */	if (r.v_swz < SWIZZLE_111 && r.s_swz < SWIZZLE_ZERO)		r = do_swizzle(rp, r, fpsrc.Swizzle, fpsrc.NegateBase);#if 0	/* WRONG! Need to be able to do individual component negation,	 * should probably handle this in the swizzling code unless	 * all components are negated, then we can do this natively */	if ((fpsrc.NegateBase & 0xf) == 0xf)		r.negate = GL_TRUE;	r.negate_s = (fpsrc.NegateBase >> 3) & 1;	if ((fpsrc.NegateBase & 0x7) == 0x0) {		r.negate_v = 0;	} else if ((fpsrc.NegateBase & 0x7) == 0x7) {		r.negate_v = 1;	} else {		if (r.type != REG_TYPE_TEMP) {			n = get_temp_reg(rp);			emit_arith(rp, PFS_OP_MAD, n, 0x7 ^ fpsrc.NegateBase,				   keep(r), pfs_one, pfs_zero, 0);			r.negate_v = 1;			emit_arith(rp, PFS_OP_MAD, n,				   fpsrc.NegateBase & 0x7 | WRITEMASK_W,				   r, pfs_one, pfs_zero, 0);			r.negate_v = 0;			r = n;		} else {			r.negate_v = 1;			emit_arith(rp, PFS_OP_MAD, r,				   fpsrc.NegateBase & 0x7 | WRITEMASK_W,				   r, pfs_one, pfs_zero, 0);			r.negate_v = 0;		}	}#endif	return r;}static pfs_reg_t t_scalar_src(struct r300_fragment_program *rp,			      struct prog_src_register fpsrc){	struct prog_src_register src = fpsrc;	int sc = GET_SWZ(fpsrc.Swizzle, 0); /* X */		src.Swizzle = ((sc<<0)|(sc<<3)|(sc<<6)|(sc<<9));	return t_src(rp, src);}static pfs_reg_t t_dst(struct r300_fragment_program *rp,		       struct prog_dst_register dest) {	pfs_reg_t r = undef;		switch (dest.File) {	case PROGRAM_TEMPORARY:		r.index = dest.Index;		r.valid = GL_TRUE;		return r;	case PROGRAM_OUTPUT:		r.type = REG_TYPE_OUTPUT;		switch (dest.Index) {		case FRAG_RESULT_COLR:		case FRAG_RESULT_DEPR:			r.index = dest.Index;			r.valid = GL_TRUE;			return r;		default:			ERROR("Bad DstReg->Index 0x%x\n", dest.Index);			return r;		}	default:		ERROR("Bad DstReg->File 0x%x\n", dest.File);		return r;	}}static int t_hw_src(struct r300_fragment_program *rp, pfs_reg_t src,		    GLboolean tex){	COMPILE_STATE;	int idx;	switch (src.type) {	case REG_TYPE_TEMP:		/* NOTE: if reg==-1 here, a source is being read that		 * 	 hasn't been written to. Undefined results */		if (cs->temps[src.index].reg == -1)			cs->temps[src.index].reg = get_hw_temp(rp);		idx = cs->temps[src.index].reg;		if (!src.no_use && (--cs->temps[src.index].refcount == 0))			free_temp(rp, src);		break;	case REG_TYPE_INPUT:		idx = cs->inputs[src.index].reg;		if (!src.no_use && (--cs->inputs[src.index].refcount == 0))			free_hw_temp(rp, cs->inputs[src.index].reg);		break;	case REG_TYPE_CONST:		return (src.index | SRC_CONST);	default:		ERROR("Invalid type for source reg\n");		return (0 | SRC_CONST);	}	if (!tex) cs->used_in_node |= (1 << idx);	return idx;}static int t_hw_dst(struct r300_fragment_program *rp, pfs_reg_t dest,		    GLboolean tex){	COMPILE_STATE;	int idx;	assert(dest.valid);	switch (dest.type) {	case REG_TYPE_TEMP:		if (cs->temps[dest.index].reg == -1) {			if (!tex)				cs->temps[dest.index].reg = get_hw_temp(rp);			else				cs->temps[dest.index].reg = get_hw_temp_tex(rp);		}		idx = cs->temps[dest.index].reg;		if (!dest.no_use && (--cs->temps[dest.index].refcount == 0))			free_temp(rp, dest);		cs->dest_in_node |= (1 << idx);		cs->used_in_node |= (1 << idx);		break;	case REG_TYPE_OUTPUT:		switch (dest.index) {		case FRAG_RESULT_COLR:			rp->node[rp->cur_node].flags |= R300_PFS_NODE_OUTPUT_COLOR;			break;		case FRAG_RESULT_DEPR:			rp->node[rp->cur_node].flags |= R300_PFS_NODE_OUTPUT_DEPTH;			break;		}		return dest.index;		break;	default:		ERROR("invalid dest reg type %d\n", dest.type);		return 0;	}		return idx;}static void emit_nop(struct r300_fragment_program *rp, GLuint mask,		     GLboolean sync){	COMPILE_STATE;		if (sync)		cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos);	if (mask & WRITEMASK_XYZ) {		rp->alu.inst[cs->v_pos].inst0 = NOP_INST0;		rp->alu.inst[cs->v_pos].inst1 = NOP_INST1;		cs->v_pos++;	}	if (mask & WRITEMASK_W) {		rp->alu.inst[cs->s_pos].inst2 = NOP_INST2;		rp->alu.inst[cs->s_pos].inst3 = NOP_INST3;		cs->s_pos++;	}}static void emit_tex(struct r300_fragment_program *rp,		     struct prog_instruction *fpi,		     int opcode){	COMPILE_STATE;	pfs_reg_t coord = t_src(rp, fpi->SrcReg[0]);	pfs_reg_t dest = undef, rdest = undef;	GLuint din = cs->dest_in_node, uin = cs->used_in_node;	int unit = fpi->TexSrcUnit;	int hwsrc, hwdest;		/* Resolve source/dest to hardware registers */	hwsrc = t_hw_src(rp, coord, GL_TRUE);	if (opcode != R300_FPITX_OP_KIL) {		dest = t_dst(rp, fpi->DstReg);		/* r300 doesn't seem to be able to do TEX->output reg */		if (dest.type == REG_TYPE_OUTPUT) {			rdest = dest;			dest = get_temp_reg_tex(rp);		}		hwdest = t_hw_dst(rp, dest, GL_TRUE);				/* Use a temp that hasn't been used in this node, rather		 * than causing an indirection		 */		if (uin & (1 << hwdest)) {			free_hw_temp(rp, hwdest);			hwdest = get_hw_temp_tex(rp);			cs->temps[dest.index].reg = hwdest;		}	} else {		hwdest = 0;		unit = 0;	}		/* Indirection if source has been written in this node, or if the	 * dest has been read/written in this node	 */	if ((coord.type != REG_TYPE_CONST && (din & (1<<hwsrc))) ||					(uin & (1<<hwdest))) {					/* Finish off current node */		cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos);		if (rp->node[rp->cur_node].alu_offset == cs->v_pos) {			/* No alu instructions in the node? Emit a NOP. */			emit_nop(rp, WRITEMASK_XYZW, GL_TRUE);			cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos);		}						rp->node[rp->cur_node].alu_end =				cs->v_pos - rp->node[rp->cur_node].alu_offset - 1;		assert(rp->node[rp->cur_node].alu_end >= 0);		if (++rp->cur_node >= PFS_MAX_TEX_INDIRECT) {			ERROR("too many levels of texture indirection\n");			return;		}		/* Start new node */		rp->node[rp->cur_node].tex_offset = rp->tex.length;		rp->node[rp->cur_node].alu_offset = cs->v_pos;		rp->node[rp->cur_node].tex_end = -1;		rp->node[rp->cur_node].alu_end = -1;			rp->node[rp->cur_node].flags = 0;		cs->used_in_node = 0;		cs->dest_in_node = 0;	}		if (rp->cur_node == 0) rp->first_node_has_tex = 1;    rp->tex.inst[rp->tex.length++] = 0        | (hwsrc << R300_FPITX_SRC_SHIFT)        | (hwdest << R300_FPITX_DST_SHIFT)        | (unit << R300_FPITX_IMAGE_SHIFT)        | (opcode << R300_FPITX_OPCODE_SHIFT); /* not entirely sure about this */	cs->dest_in_node |= (1 << hwdest); 	if (coord.type != REG_TYPE_CONST)		cs->used_in_node |= (1 << hwsrc);	rp->node[rp->cur_node].tex_end++;	/* Copy from temp to output if needed */	if (rdest.valid) {		emit_arith(rp, PFS_OP_MAD, rdest, WRITEMASK_XYZW, dest,			   pfs_one, pfs_zero, 0);		free_temp(rp, dest);	}}/* Add sources to FPI1/FPI3 lists.  If source is already on list, * reuse the index instead of wasting a source. */static int add_src(struct r300_fragment_program *rp, int reg, int pos,		   int srcmask){	COMPILE_STATE;	int csm, i;		/* Look for matches */	for (i=0,csm=srcmask; i<3; i++,csm=csm<<1) {			/* If sources have been allocated in this position(s)... */		if ((cs->slot[pos].umask & csm) == csm) {			/* ... and the register number(s) match, re-use the			   source */			if (srcmask == SLOT_VECTOR &&			    cs->slot[pos].vsrc[i] == reg)				return i;			if (srcmask == SLOT_SCALAR &&			    cs->slot[pos].ssrc[i] == reg)				return i;			if (srcmask == SLOT_BOTH &&			    cs->slot[pos].vsrc[i] == reg &&			    cs->slot[pos].ssrc[i] == reg)				return i;		}	}	/* Look for free spaces */	for (i=0,csm=srcmask; i<3; i++,csm=csm<<1) {		/* If the position(s) haven't been allocated */		if ((cs->slot[pos].umask & csm) == 0) {			cs->slot[pos].umask |= csm;			if (srcmask & SLOT_VECTOR)				cs->slot[pos].vsrc[i] = reg;			if (srcmask & SLOT_SCALAR)				cs->slot[pos].ssrc[i] = reg;			return i;		}		}		//ERROR("Failed to allocate sources in FPI1/FPI3!\n");	return 0;}/* Determine whether or not to position opcode in the same ALU slot for both * vector and scalar portions of an instruction. * * It's not necessary to force the first case, but it makes disassembled * shaders easier to read. */static GLboolean force_same_slot(int vop, int sop,				 GLboolean emit_vop, GLboolean emit_sop,				 int argc, pfs_reg_t *src){	int i;	if (emit_vop && emit_sop)		return GL_TRUE;	if (emit_vop && vop == R300_FPI0_OUTC_REPL_ALPHA)		return GL_TRUE;	if (emit_vop) {		for (i=0;i<argc;i++)			if (src[i].v_swz == SWIZZLE_WZY)				return GL_TRUE;	}	return GL_FALSE;}static void emit_arith(struct r300_fragment_program *rp, int op,		       pfs_reg_t dest, int mask,		       pfs_reg_t src0, pfs_reg_t src1, pfs_reg_t src2,		       int flags){	COMPILE_STATE;	pfs_reg_t src[3] = { src0, src1, src2 };	int hwsrc[3], sswz[3], vswz[3];	int hwdest;	GLboolean emit_vop = GL_FALSE, emit_sop = GL_FALSE;	int vop, sop, argc;	int vpos, spos;	int i;	vop = r300_fpop[op].v_op;	sop = r300_fpop[op].s_op;	argc = r300_fpop[op].argc;		if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3)		emit_vop = GL_TRUE;	if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA)		emit_sop = GL_TRUE;	if (dest.type == REG_TYPE_OUTPUT && dest.index == FRAG_RESULT_DEPR)		emit_vop = GL_FALSE;						if (force_same_slot(vop, sop, emit_vop, emit_sop, argc, src)) {		vpos = spos = MAX2(cs->v_pos, cs->s_pos);	} else {		vpos = cs->v_pos;		spos = cs->s_pos;		/* Here is where we'd decide on where a safe place is to		 * combine this instruction with a previous one.		 *		 * This is extremely simple for now.. if a source depends		 * on the opposite stream, force the same instruction.		 */		for (i=0;i<3;i++) {			if (emit_vop &&			    (v_swiz[src[i].v_swz].flags & SLOT_SCALAR)) {				vpos = spos = MAX2(vpos, spos);				break;			}			if (emit_sop &&			    (s_swiz[src[i].s_swz].flags & SLOT_VECTOR)) {				vpos = spos = MAX2(vpos, spos);				break;			}		}	}		/* - Convert src->hwsrc, record for FPI1/FPI3	 * - Determine ARG parts of FPI0/FPI2, unused args are filled	 *   with ARG_ZERO.	 */		for (i=0;i<3;i++) {		int srcpos;				if (i >= argc) {			vswz[i] = R300_FPI0_ARGC_ZERO;			sswz[i] = R300_FPI2_ARGA_ZERO;			continue;		}				hwsrc[i] = t_hw_src(rp, src[i], GL_FALSE);			if (emit_vop && vop != R300_FPI0_OUTC_REPL_ALPHA) {			srcpos = add_src(rp, hwsrc[i], vpos,					 v_swiz[src[i].v_swz].flags);				vswz[i] = (v_swiz[src[i].v_swz].base +				   (srcpos * v_swiz[src[i].v_swz].stride)) |				(src[i].negate_v ? ARG_NEG : 0) |				(src[i].absolute ? ARG_ABS : 0);		} else vswz[i] = R300_FPI0_ARGC_ZERO;				if (emit_sop) {			srcpos = add_src(rp, hwsrc[i], spos,					 s_swiz[src[i].s_swz].flags);			sswz[i] = (s_swiz[src[i].s_swz].base +				   (srcpos * s_swiz[src[i].s_swz].stride)) |				(src[i].negate_s ? ARG_NEG : 0) |				(src[i].absolute ? ARG_ABS : 0);			} else sswz[i] = R300_FPI2_ARGA_ZERO;	}	hwdest = t_hw_dst(rp, dest, GL_FALSE);		if (flags & PFS_FLAG_SAT) {		vop |= R300_FPI0_OUTC_SAT;		sop |= R300_FPI2_OUTA_SAT;	}	/* Throw the pieces together and get FPI0/1 */	rp->alu.inst[vpos].inst1 =			((cs->slot[vpos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) |			 (cs->slot[vpos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) |			 (cs->slot[vpos].vsrc[2] << R300_FPI1_SRC2C_SHIFT));	if (emit_vop) {		rp->alu.inst[vpos].inst0 = vop |				(vswz[0] << R300_FPI0_ARG0C_SHIFT) |				(vswz[1] << R300_FPI0_ARG1C_SHIFT) |				(vswz[2] << R300_FPI0_ARG2C_SHIFT);		rp->alu.inst[vpos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT;		if (dest.type == REG_TYPE_OUTPUT) {			if (dest.index == FRAG_RESULT_COLR) {				rp->alu.inst[vpos].inst1 |=					(mask & WRITEMASK_XYZ) << R300_FPI1_DSTC_OUTPUT_MASK_SHIFT;			} else assert(0);		} else {			rp->alu.inst[vpos].inst1 |=					(mask & WRITEMASK_XYZ) << R300_FPI1_DSTC_REG_MASK_SHIFT;		}		cs->v_pos = vpos+1;	} else if (spos >= vpos)		rp->alu.inst[spos].inst0 = NOP_INST0;	/* And now FPI2/3 */	rp->alu.inst[spos].inst3 =			((cs->slot[spos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) |			 (cs->slot[spos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) |			 (cs->slot[spos].ssrc[2] << R300_FPI3_SRC2A_SHIFT));	if (emit_sop) {		rp->alu.inst[spos].inst2 = sop |				sswz[0] << R300_FPI2_ARG0A_SHIFT |				sswz[1] << R300_FPI2_ARG1A_SHIFT |				sswz[2] << R300_FPI2_ARG2A_SHIFT;		if (mask & WRITEMASK_W) {			if (dest.type == REG_TYPE_OUTPUT) {				if (dest.index == FRAG_RESULT_COLR) {					rp->alu.inst[spos].inst3 |= 							(hwdest << R300_FPI3_DSTA_SHIFT) | R300_FPI3_DSTA_OUTPUT;				} else if (dest.index == FRAG_RESULT_DEPR) {					rp->alu.inst[spos].inst3 |= R300_FPI3_DSTA_DEPTH;				} else assert(0);			} else {				rp->alu.inst[spos].inst3 |=						(hwdest << R300_FPI3_DSTA_SHIFT) | R300_FPI3_DSTA_REG;			}		}		cs->s_pos = spos+1;	} else if (vpos >= spos)		rp->alu.inst[vpos].inst2 = NOP_INST2;	return;};#if 0static pfs_reg_t get_attrib(struct r300_fragment_program *rp, GLuint attr){	struct fragment_program *mp = &rp->mesa_program;	pfs_reg_t r = undef;	if (!(mp->Base.InputsRead & (1<<attr))) {		ERROR("Attribute %d was not provided!\n", attr);		return undef;	}	r.type  = REG_TYPE_INPUT;	r.index = attr;	r.valid = GL_TRUE;	return r;}#endifstatic GLboolean parse_program(struct r300_fragment_program *rp){		struct fragment_program *mp = &rp->mesa_program;	const struct prog_instruction *inst = mp->Base.Instructions;	struct prog_instruction *fpi;	pfs_reg_t src[3], dest, temp;	int flags, mask;	if (!inst || inst[0].Opcode == OPCODE_END) {		ERROR("empty program?\n");		return GL_FALSE;	}	for (fpi=mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {		if (fpi->SaturateMode == SATURATE_ZERO_ONE)			flags = PFS_FLAG_SAT;		else

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -