⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fourway_matmult_4x4.cpp

📁 用于GPU通用计算的编程语言BrookGPU 0.4
💻 CPP
📖 第 1 页 / 共 3 页
字号:
				"TEX r0, t1, texture[7], RECT;\n"
				"TEX r1, t0, texture[0], RECT;\n"
				"MOV r5.x, r4.x;\n"
				"MOV r5.y, r2.x;\n"
				"MOV r5.z, r3.x;\n"
				"MOV r5.w, r0.x;\n"
				"DP4 r0.x, r1, r5;\n"
				"MOV r5.x, r4.y;\n"
				"MOV r5.y, r2.y;\n"
				"MOV r5.z, r3.y;\n"
				"MOV r5.w, r0.y;\n"
				"DP4 r0.y, r1, r5;\n"
				"MOV r3.x, r4.z;\n"
				"MOV r2.x, r4.w;\n"
				"MOV r3.y, r2.z;\n"
				"MOV r2.y, r2.w;\n"
				"MOV r2.z, r3.w;\n"
				"MOV r3.w, r0.z;\n"
				"MOV r2.w, r0.w;\n"
				"DP4 r0.z, r1, r3;\n"
				"DP4 r0.w, r1, r2;\n"
				"MOV oC0, r0;\n"
				"END\n"
				" \n"
				"##!!BRCC\n"
				"##narg:3\n"
				"##s:0:a\n"
				"##s:0:b\n"
				"##o:0:result\n"
				"##workspace:1024\n"
				"##!!multipleOutputInfo:0:1:\n"
				"##!!fullAddressTrans:0:\n"
				"##!!reductionFactor:0:\n"
				"")
				.sampler(1, 0)
				.sampler(1, 1)
				.sampler(1, 2)
				.sampler(1, 3)
				.sampler(2, 0)
				.sampler(2, 1)
				.sampler(2, 2)
				.sampler(2, 3)
				.interpolant(1, kStreamInterpolant_Position)
				.interpolant(2, kStreamInterpolant_Position)
				.output(3, 0)
			)
			.pass( gpu_pass_desc(
				"!!ARBfp1.0\n"
				"OUTPUT oC0 = result.color;\n"
				"TEMP r0;\n"
				"TEMP r1;\n"
				"TEMP r2;\n"
				"TEMP r3;\n"
				"TEMP r4;\n"
				"TEMP r5;\n"
				"ATTRIB t0 = fragment.texcoord[0];\n"
				"ATTRIB t1 = fragment.texcoord[1];\n"
				"TEX r4, t1, texture[4], RECT;\n"
				"TEX r2, t1, texture[5], RECT;\n"
				"TEX r3, t1, texture[6], RECT;\n"
				"TEX r0, t1, texture[7], RECT;\n"
				"TEX r1, t0, texture[1], RECT;\n"
				"MOV r5.x, r4.x;\n"
				"MOV r5.y, r2.x;\n"
				"MOV r5.z, r3.x;\n"
				"MOV r5.w, r0.x;\n"
				"DP4 r0.x, r1, r5;\n"
				"MOV r5.x, r4.y;\n"
				"MOV r5.y, r2.y;\n"
				"MOV r5.z, r3.y;\n"
				"MOV r5.w, r0.y;\n"
				"DP4 r0.y, r1, r5;\n"
				"MOV r3.x, r4.z;\n"
				"MOV r2.x, r4.w;\n"
				"MOV r3.y, r2.z;\n"
				"MOV r2.y, r2.w;\n"
				"MOV r2.z, r3.w;\n"
				"MOV r3.w, r0.z;\n"
				"MOV r2.w, r0.w;\n"
				"DP4 r0.z, r1, r3;\n"
				"DP4 r0.w, r1, r2;\n"
				"MOV oC0, r0;\n"
				"END\n"
				" \n"
				"##!!BRCC\n"
				"##narg:3\n"
				"##s:0:a\n"
				"##s:0:b\n"
				"##o:0:result\n"
				"##workspace:1024\n"
				"##!!multipleOutputInfo:1:1:\n"
				"##!!fullAddressTrans:0:\n"
				"##!!reductionFactor:0:\n"
				"")
				.sampler(1, 0)
				.sampler(1, 1)
				.sampler(1, 2)
				.sampler(1, 3)
				.sampler(2, 0)
				.sampler(2, 1)
				.sampler(2, 2)
				.sampler(2, 3)
				.interpolant(1, kStreamInterpolant_Position)
				.interpolant(2, kStreamInterpolant_Position)
				.output(3, 1)
			)
			.pass( gpu_pass_desc(
				"!!ARBfp1.0\n"
				"OUTPUT oC0 = result.color;\n"
				"TEMP r0;\n"
				"TEMP r1;\n"
				"TEMP r2;\n"
				"TEMP r3;\n"
				"TEMP r4;\n"
				"TEMP r5;\n"
				"ATTRIB t0 = fragment.texcoord[0];\n"
				"ATTRIB t1 = fragment.texcoord[1];\n"
				"TEX r4, t1, texture[4], RECT;\n"
				"TEX r2, t1, texture[5], RECT;\n"
				"TEX r3, t1, texture[6], RECT;\n"
				"TEX r0, t1, texture[7], RECT;\n"
				"TEX r1, t0, texture[2], RECT;\n"
				"MOV r5.x, r4.x;\n"
				"MOV r5.y, r2.x;\n"
				"MOV r5.z, r3.x;\n"
				"MOV r5.w, r0.x;\n"
				"DP4 r0.x, r1, r5;\n"
				"MOV r5.x, r4.y;\n"
				"MOV r5.y, r2.y;\n"
				"MOV r5.z, r3.y;\n"
				"MOV r5.w, r0.y;\n"
				"DP4 r0.y, r1, r5;\n"
				"MOV r3.x, r4.z;\n"
				"MOV r2.x, r4.w;\n"
				"MOV r3.y, r2.z;\n"
				"MOV r2.y, r2.w;\n"
				"MOV r2.z, r3.w;\n"
				"MOV r3.w, r0.z;\n"
				"MOV r2.w, r0.w;\n"
				"DP4 r0.z, r1, r3;\n"
				"DP4 r0.w, r1, r2;\n"
				"MOV oC0, r0;\n"
				"END\n"
				" \n"
				"##!!BRCC\n"
				"##narg:3\n"
				"##s:0:a\n"
				"##s:0:b\n"
				"##o:0:result\n"
				"##workspace:1024\n"
				"##!!multipleOutputInfo:2:1:\n"
				"##!!fullAddressTrans:0:\n"
				"##!!reductionFactor:0:\n"
				"")
				.sampler(1, 0)
				.sampler(1, 1)
				.sampler(1, 2)
				.sampler(1, 3)
				.sampler(2, 0)
				.sampler(2, 1)
				.sampler(2, 2)
				.sampler(2, 3)
				.interpolant(1, kStreamInterpolant_Position)
				.interpolant(2, kStreamInterpolant_Position)
				.output(3, 2)
			)
			.pass( gpu_pass_desc(
				"!!ARBfp1.0\n"
				"OUTPUT oC0 = result.color;\n"
				"TEMP r0;\n"
				"TEMP r1;\n"
				"TEMP r2;\n"
				"TEMP r3;\n"
				"TEMP r4;\n"
				"TEMP r5;\n"
				"ATTRIB t0 = fragment.texcoord[0];\n"
				"ATTRIB t1 = fragment.texcoord[1];\n"
				"TEX r4, t1, texture[4], RECT;\n"
				"TEX r2, t1, texture[5], RECT;\n"
				"TEX r3, t1, texture[6], RECT;\n"
				"TEX r0, t1, texture[7], RECT;\n"
				"TEX r1, t0, texture[3], RECT;\n"
				"MOV r5.x, r4.x;\n"
				"MOV r5.y, r2.x;\n"
				"MOV r5.z, r3.x;\n"
				"MOV r5.w, r0.x;\n"
				"DP4 r0.x, r1, r5;\n"
				"MOV r5.x, r4.y;\n"
				"MOV r5.y, r2.y;\n"
				"MOV r5.z, r3.y;\n"
				"MOV r5.w, r0.y;\n"
				"DP4 r0.y, r1, r5;\n"
				"MOV r3.x, r4.z;\n"
				"MOV r2.x, r4.w;\n"
				"MOV r3.y, r2.z;\n"
				"MOV r2.y, r2.w;\n"
				"MOV r2.z, r3.w;\n"
				"MOV r3.w, r0.z;\n"
				"MOV r2.w, r0.w;\n"
				"DP4 r0.z, r1, r3;\n"
				"DP4 r0.w, r1, r2;\n"
				"MOV oC0, r0;\n"
				"END\n"
				" \n"
				"##!!BRCC\n"
				"##narg:3\n"
				"##s:0:a\n"
				"##s:0:b\n"
				"##o:0:result\n"
				"##workspace:1024\n"
				"##!!multipleOutputInfo:3:1:\n"
				"##!!fullAddressTrans:0:\n"
				"##!!reductionFactor:0:\n"
				"")
				.sampler(1, 0)
				.sampler(1, 1)
				.sampler(1, 2)
				.sampler(1, 3)
				.sampler(2, 0)
				.sampler(2, 1)
				.sampler(2, 2)
				.sampler(2, 3)
				.interpolant(1, kStreamInterpolant_Position)
				.interpolant(2, kStreamInterpolant_Position)
				.output(3, 3)
			)
		);
	static const void* __fourway_matmult_4x4_arb = &__fourway_matmult_4x4_arb_desc;
}

void  fourway_matmult_4x4 (::brook::stream a,
		::brook::stream b,
		::brook::stream result) {
  static const void *__fourway_matmult_4x4_fp[] = {
     "fp30", __fourway_matmult_4x4_fp30,
     "arb", __fourway_matmult_4x4_arb,
     "ps20", __fourway_matmult_4x4_ps20,
     NULL, NULL };
  static brook::kernel k(__fourway_matmult_4x4_fp);

  k->PushStream(a);
  k->PushStream(b);
  k->PushOutput(result);
  k->Map();

}



namespace {
	using namespace ::brook::desc;
	static const gpu_kernel_desc __fourway_matmult_4x4_pretransposed_ps20_desc = gpu_kernel_desc()
		.technique( gpu_technique_desc()
			.pass( gpu_pass_desc(
				"    ps_2_0\n"
				"    dcl t0.xy\n"
				"    dcl t1.xy\n"
				"    dcl_2d s0\n"
				"    dcl_2d s1\n"
				"    dcl_2d s2\n"
				"    dcl_2d s3\n"
				"    dcl_2d s4\n"
				"    dcl_2d s5\n"
				"    dcl_2d s6\n"
				"    dcl_2d s7\n"
				"    texld r8, t0, s0\n"
				"    texld r0, t1, s4\n"
				"    texld r4, t1, s5\n"
				"    texld r3, t1, s6\n"
				"    texld r2, t1, s7\n"
				"    texld r7, t0, s1\n"
				"    texld r6, t0, s2\n"
				"    texld r1, t0, s3\n"
				"    dp4 r5.x, r8, r0\n"
				"    dp4 r5.y, r8, r4\n"
				"    dp4 r5.z, r8, r3\n"
				"    dp4 r5.w, r8, r2\n"
				"    mov oC0, r5\n"
				"    dp4 r5.x, r7, r0\n"
				"    dp4 r5.y, r7, r4\n"
				"    dp4 r5.z, r7, r3\n"
				"    dp4 r5.w, r7, r2\n"
				"    mov oC1, r5\n"
				"    dp4 r5.x, r6, r0\n"
				"    dp4 r5.y, r6, r4\n"
				"    dp4 r5.z, r6, r3\n"
				"    dp4 r5.w, r6, r2\n"
				"    mov oC2, r5\n"
				"    dp4 r0.x, r1, r0\n"
				"    dp4 r0.y, r1, r4\n"
				"    dp4 r0.z, r1, r3\n"
				"    dp4 r0.w, r1, r2\n"
				"    mov oC3, r0\n"
				"\n"
				" \n"
				"//!!BRCC\n"
				"//narg:3\n"
				"//s:0:a\n"
				"//s:0:b\n"
				"//o:0:result\n"
				"//workspace:1024\n"
				"//!!multipleOutputInfo:0:4:\n"
				"//!!fullAddressTrans:0:\n"
				"//!!reductionFactor:0:\n"
				"")
				.sampler(1, 0)
				.sampler(1, 1)
				.sampler(1, 2)
				.sampler(1, 3)
				.sampler(2, 0)
				.sampler(2, 1)
				.sampler(2, 2)
				.sampler(2, 3)
				.interpolant(1, kStreamInterpolant_Position)
				.interpolant(2, kStreamInterpolant_Position)
				.output(3, 0)
				.output(3, 1)
				.output(3, 2)
				.output(3, 3)
			)
		);
	static const void* __fourway_matmult_4x4_pretransposed_ps20 = &__fourway_matmult_4x4_pretransposed_ps20_desc;
}


namespace {
	using namespace ::brook::desc;
	static const gpu_kernel_desc __fourway_matmult_4x4_pretransposed_fp30_desc = gpu_kernel_desc()
		.technique( gpu_technique_desc()
			.pass( gpu_pass_desc(
				"!!FP1.0\n"
				"# NV_fragment_program generated by NVIDIA Cg compiler\n"
				"# cgc version 1.1.0003, build date Jul  7 2003  11:55:19\n"
				"# command line args: -quiet -profile fp30 -DUSERECT=1 -DCGC=1\n"
				"#vendor NVIDIA Corporation\n"
				"#version 1.0.02\n"
				"#profile fp30\n"
				"#program main\n"
				"#semantic main.__structsampler0_a : TEXUNIT0\n"
				"#semantic main.__structsampler1_a : TEXUNIT1\n"
				"#semantic main.__structsampler2_a : TEXUNIT2\n"
				"#semantic main.__structsampler3_a : TEXUNIT3\n"
				"#semantic main.__structsampler0_b : TEXUNIT4\n"
				"#semantic main.__structsampler1_b : TEXUNIT5\n"
				"#semantic main.__structsampler2_b : TEXUNIT6\n"
				"#semantic main.__structsampler3_b : TEXUNIT7\n"
				"#semantic main.__workspace : C0\n"
				"#var samplerRECT __structsampler0_a : TEXUNIT0 : texunit 0 : 0 : 1\n"
				"#var samplerRECT __structsampler1_a : TEXUNIT1 : texunit 1 : 1 : 1\n"
				"#var samplerRECT __structsampler2_a : TEXUNIT2 : texunit 2 : 2 : 1\n"
				"#var samplerRECT __structsampler3_a : TEXUNIT3 : texunit 3 : 3 : 1\n"
				"#var float2 _tex_a_pos : $vin.TEXCOORD0 : TEXCOORD0 : 4 : 1\n"
				"#var samplerRECT __structsampler0_b : TEXUNIT4 : texunit 4 : 5 : 1\n"
				"#var samplerRECT __structsampler1_b : TEXUNIT5 : texunit 5 : 6 : 1\n"
				"#var samplerRECT __structsampler2_b : TEXUNIT6 : texunit 6 : 7 : 1\n"
				"#var samplerRECT __structsampler3_b : TEXUNIT7 : texunit 7 : 8 : 1\n"
				"#var float2 _tex_b_pos : $vin.TEXCOORD1 : TEXCOORD1 : 9 : 1\n"
				"#var float4 __output_0 : $vout.COLOR0 : COLOR0 : 10 : 1\n"
				"#var float4 __workspace : C0 :  : 11 : 1\n"
				"DECLARE __workspace;\n"
				"TEX R0, f[TEX0].xyxx, TEX0, RECT;\n"
				"TEX R1, f[TEX1].xyxx, TEX7, RECT;\n"
				"DP4R R1.x, R0, R1;\n"
				"TEX R2, f[TEX1].xyxx, TEX6, RECT;\n"
				"TEX R3, f[TEX1].xyxx, TEX5, RECT;\n"
				"DP4R R1.y, R0, R2;\n"
				"DP4R R1.z, R0, R3;\n"
				"MOVR o[COLR].w, R1.x;\n"
				"MOVR o[COLR].z, R1.y;\n"
				"MOVR o[COLR].y, R1.z;\n"
				"TEX R1, f[TEX1].xyxx, TEX4, RECT;\n"
				"DP4R R0.x, R0, R1;\n"
				"MOVR o[COLR].x, R0.x;\n"
				"END \n"
				"##!!BRCC\n"
				"##narg:3\n"
				"##s:0:a\n"
				"##s:0:b\n"
				"##o:0:result\n"
				"##workspace:1024\n"
				"##!!multipleOutputInfo:0:1:\n"
				"##!!fullAddressTrans:0:\n"
				"##!!reductionFactor:0:\n"
				"")
				.sampler(1, 0)
				.sampler(1, 1)
				.sampler(1, 2)
				.sampler(1, 3)
				.sampler(2, 0)
				.sampler(2, 1)
				.sampler(2, 2)
				.sampler(2, 3)
				.interpolant(1, kStreamInterpolant_Position)
				.interpolant(2, kStreamInterpolant_Position)
				.output(3, 0)
			)
			.pass( gpu_pass_desc(
				"!!FP1.0\n"
				"# NV_fragment_program generated by NVIDIA Cg compiler\n"
				"# cgc version 1.1.0003, build date Jul  7 2003  11:55:19\n"
				"# command line args: -quiet -profile fp30 -DUSERECT=1 -DCGC=1\n"
				"#vendor NVIDIA Corporation\n"
				"#version 1.0.02\n"
				"#profile fp30\n"
				"#program main\n"
				"#semantic main.__structsampler0_a : TEXUNIT0\n"
				"#semantic main.__structsampler1_a : TEXUNIT1\n"
				"#semantic main.__structsampler2_a : TEXUNIT2\n"
				"#semantic main.__structsampler3_a : TEXUNIT3\n"
				"#semantic main.__structsampler0_b : TEXUNIT4\n"
				"#semantic main.__structsampler1_b : TEXUNIT5\n"
				"#semantic main.__structsampler2_b : TEXUNIT6\n"
				"#semantic main.__structsampler3_b : TEXUNIT7\n"
				"#semantic main.__workspace : C0\n"
				"#var samplerRECT __structsampler0_a : TEXUNIT0 : texunit 0 : 0 : 1\n"
				"#var samplerRECT __structsampler1_a : TEXUNIT1 : texunit 1 : 1 : 1\n"
				"#var samplerRECT __structsampler2_a : TEXUNIT2 : texunit 2 : 2 : 1\n"
				"#var samplerRECT __structsampler3_a : TEXUNIT3 : texunit 3 : 3 : 1\n"
				"#var float2 _tex_a_pos : $vin.TEXCOORD0 : TEXCOORD0 : 4 : 1\n"
				"#var samplerRECT __structsampler0_b : TEXUNIT4 : texunit 4 : 5 : 1\n"
				"#var samplerRECT __structsampler1_b : TEXUNIT5 : texunit 5 : 6 : 1\n"
				"#var samplerRECT __structsampler2_b : TEXUNIT6 : texunit 6 : 7 : 1\n"
				"#var samplerRECT __structsampler3_b : TEXUNIT7 : texunit 7 : 8 : 1\n"
				"#var float2 _tex_b_pos : $vin.TEXCOORD1 : TEXCOORD1 : 9 : 1\n"
				"#var float4 __output_1 : $vout.COLOR0 : COLOR0 : 10 : 1\n"
				"#var float4 __workspace : C0 :  : 11 : 1\n"
				"DECLARE __workspace;\n"
				"TEX R0, f[TEX0].xyxx, TEX1, RECT;\n"
				"TEX R1, f[TEX1].xyxx, TEX7, RECT;\n"
				"DP4R R1.x, R0, R1;\n"
				"TEX R2, f[TEX1].xyxx, TEX6, RECT;\n"
				"TEX R3, f[TEX1].xyxx, TEX5, RECT;\n"
				"DP4R R1.y, R0, R2;\n"
				"DP4R R1.z, R0, R3;\n"
				"MOVR o[COLR].w, R1.x;\n"
				"MOVR o[COLR].z, R1.y;\n"
				"MOVR o[COLR].y, R1.z;\n"
				"TEX R1, f[TEX1].xyxx, TEX4, RECT;\n"
				"DP4R R0.x, R0, R1;\n"
				"MOVR o[COLR].x, R0.x;\n"
				"END \n"
				"##!!BRCC\n"
				"##narg:3\n"
				"##s:0:a\n"
				"##s:0:b\n"
				"##o:0:result\n"
				"##workspace:1024\n"
				"##!!multipleOutputInfo:1:1:\n"
				"##!!fullAddressTrans:0:\n"
				"##!!reductionFactor:0:\n"
				"")
				.sampler(1, 0)
				.sampler(1, 1)
				.sampler(1, 2)
				.sampler(1, 3)
				.sampler(2, 0)
				.sampler(2, 1)
				.sampler(2, 2)
				.sampler(2, 3)
				.interpolant(1, kStreamInterpolant_Position)
				.interpolant(2, kStreamInterpolant_Position)
				.output(3, 1)
			)
			.pass( gpu_pass_desc(
				"!!FP1.0\n"
				"# NV_fragment_program generated by NVIDIA Cg compiler\n"
				"# cgc version 1.1.0003, build date Jul  7 2003  11:55:19\n"
				"# command line args: -quiet -profile fp30 -DUSERECT=1 -DCGC=1\n"
				"#vendor NVIDIA Corporation\n"
				"#version 1.0.02\n"
				"#profile fp30\n"
				"#program main\n"
				"#semantic main.__structsampler0_a : TEXUNIT0\n"
				"#semantic main.__structsampler1_a : TEXUNIT1\n"
				"#semantic main.__structsampler2_a : TEXUNIT2\n"
				"#semantic main.__structsampler3_a : TEXUNIT3\n"
				"#semantic main.__structsampler0_b : TEXUNIT4\n"
				"#semantic main.__structsampler1_b : TEXUNIT5\n"
				"#semantic main.__structsampler2_b : TEXUNIT6\n"
				"#semantic main.__structsampler3_b : TEXUNIT7\n"
				"#semantic main.__workspace : C0\n"
				"#var samplerRECT __structsampler0_a : TEXUNIT0 : texunit 0 : 0 : 1\n"

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -