📄 spu_raytracer.cpp
字号:
/* Copyright (c) 2007 Massachusetts Institute of Technology * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of * the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. *//** * spu_raytracer.cpp - blue-steel SPU ray tracer program * @author Brian Sweatt * * This is the actual SPU program that receives messages from the PPU and performs the * requested operations. This program holds a Camera object and a RayTracer object whose * properties can be updated from the PPU given the aforementioned messages (methods for * sending these messages are providied in the bsapi.h file. * * This program DMAs in all objects, lights, and materials each frame, and renders one * horizontal strip of the screen at a time, with a strip of the screen being double-buffered * on the SPU that is working on it. In the presence of N SPUs, blue-steel with allocate * each SPU to render every Nth strip of the screen. (SPU 1 will render horizontal strips * 1, 7, 13,..., while SPU 2 will render 2, 8, 14,..., and so on). * * To maximize performance, this program utilizes extensive latency hiding that would * not be possible if using a blocking software managed cache. Because of this, all objects * being rendered by the SPU must be able to fit in local store. In practice, this number * is on the order of 800-1000 triangles / spheres. * * Since the format of the color values are different for the frame buffer than they are * for typical image data, the user will need to #define FRAME_BUFFER 1 if they plan to * write the resulting image data to the frame buffer each frame, and #define FRAME_BUFFER 0 * otherwise. This #define is located in the src/common.h file. While this implies that the * user must recompile blue-steel when changing from images to the frame buffer, it is necessary * to know this information at compile time. Otherwise, there would be a branch in the inner loop * that writes and DMAs the color data. As the SPU has no real branch prediction, this could * result in up to a ~20 cycle delay for each packet of RGB values that are written to the * buffer. Because of this, recompilation is necessary for optimal performance. */extern "C" {#include <spu_intrinsics.h>#include <vec_literal.h>#include <libvector.h>#include <float.h>#include <math.h>#include <simdmath.h>#include <libmisc.h>#include <spu_mfcio.h>#include <pack_rgba8_v.h>#include <stdio.h>}#include "../src/triangle.h"#include "../src/sphere.h"#include "../src/camera.h"#include "../src/common.h"#include "../src/rgbpacket.h"#include "../src/objectset.h"#include "../src/raytracer.h"#include "../src/light.h"#include "../src/material.h"/*#define CACHE_NAME obj_cache#define CACHED_TYPE Sphere#define CACHELINE_LOG2SIZE 7#define CACHE_LOG2NWAY 2#define CACHE_LOG2NSETS 5#define CACHE_TYPE 1#include <cache-api.h>*/#define MAX_REGION_SIZE 512const int STRIP_SIZE = STRIP_WIDTH * STRIP_HEIGHT;typedef struct { Camera *camera; uint32_t startx; uint32_t starty; uint32_t total_width; uint32_t total_height; uint32_t region_width; uint32_t region_height; uint32_t stride; uint32_t addr;} RenderContext;int spe_num;void (*opcode_handler[NUM_OPCODES])(ObjectSet *, RenderContext&, RayTracer*, vector unsigned int image_buf[2][STRIP_SIZE], ObjectSet *obj_ea);void renderRegion(Camera *camera, const RayTracer *rt, const RenderContext &context, vector unsigned int image_buf[2][STRIP_SIZE], ObjectSet *objects, ObjectSet *obj_ea) { // Before doing ANYTHING, start the DMA request on the objects, so we can grind away on other stuff // while they're transferring mfc_get((void *)objects->triangles, (uint32_t)obj_ea->triangles, sizeof(Triangle)*objects->num_triangles, DMA_TAG_TRIANGLES, 0, 0); mfc_get((void *)objects->spheres, (uint32_t)obj_ea->spheres, sizeof(Sphere)*objects->num_spheres, DMA_TAG_SPHERES, 0, 0); mfc_get((void *)objects->lights, (uint32_t)obj_ea->lights, sizeof(light)*objects->num_lights, DMA_TAG_LIGHTS, 0, 0); mfc_get((void *)objects->materials, (uint32_t)obj_ea->materials, sizeof(material)*objects->num_materials, DMA_TAG_MATERIALS, 0, 0); vector float xFactor = spu_re(spu_splats((float)context.total_width)); vector float yFactor = spu_re(spu_splats((float)context.total_height)); vector float zero_v = spu_splats(0.0f); vector float max_v = spu_splats(MAX_FLOAT); int index, start_index = 0; uint32_t buffer = 0; vector float offset = (vector float) {0.0f, 1.0f, 2.0f, 3.0f}; vector float offset2 = (vector float) {4.0f, 5.0f, 6.0f, 7.0f}; vector float startx = spu_splats((float)context.startx); uint32_t saddr = context.addr; uint32_t scene_size = STRIP_SIZE * sizeof(vector unsigned int); // Render the scene in horizontal strips, assuming that each pass renders the entire width of the screen // Upon rendering each horizontal strip, start the dma to main memory, while rendering the next strip as // defined by the stride member of the rendering context. (ie; If 6 spes are rendering, they will each // render every sixth line, starting at different offsets for (int s = context.starty; s < context.total_height; s += context.stride) { index = 0; vector float y = spu_splats((float)s ); y = spu_mul(y, yFactor); for (int j = 0; j < IMG_WIDTH; j+=4) { vector float j_v = spu_splats((float) j); vector float x1 = spu_add(j_v, offset); HitPacket h; h.t = max_v; h.nx = h.ny = h.nz = zero_v; h.materialID = spu_splats((uint32_t)0); x1 = spu_mul(x1, xFactor); RayPacket r = camera->generateRayPacket(x1, y); RGBPacket color = rt->tracePacket(r, h, 0.0f, MAX_BOUNCES, spu_splats(1.0f)); *(image_buf[buffer] + index) = _pack_rgba8_v(color.r, color.g, color.b, zero_v);#if(FRAME_BUFFER) // The following is used to make the color structure compliant with the frame buffer *(image_buf[buffer] + index) = spu_rl(image_buf[buffer][index], 24);#endif index++; } // Start the DMA for the buffer just rendered mfc_put((void *) image_buf[buffer], saddr, scene_size, buffer, 0, 0); // Compute the address for the next strip that is to be rendered... by skipping stride*width pixels saddr += (STRIP_SIZE) * context.stride * sizeof(vector unsigned int); // Switch to the other buffer... buffer = 1 - buffer; // ... and wait for the DMA operation on it to finish (if it hasn't yet) mfc_write_tag_mask(1 << buffer); mfc_read_tag_status_all(); } mfc_write_tag_mask(1 << (1-buffer)); mfc_read_tag_status_all();}void handle_spe_nop(ObjectSet *objects, RenderContext &render_context, RayTracer *rt, vector unsigned int image_buf[2][STRIP_SIZE], ObjectSet *obj_ea) { return;}void handle_spe_read_scene(ObjectSet *objects, RenderContext &render_context, RayTracer *rt, vector unsigned int image_buf[2][STRIP_SIZE], ObjectSet *obj_ea) { dprintf("(%i)Reading Scene...\n", spe_num); uint32_t saddr = spu_read_in_mbox(); uint32_t num = spu_read_in_mbox(); dprintf("(%i)addr: %i objects: %i\n", spe_num, saddr, num);}void handle_spe_render_region(ObjectSet *objects, RenderContext &render_context, RayTracer *rt, vector unsigned int image_buf[2][STRIP_SIZE], ObjectSet *obj_ea) { render_context.addr = spu_read_in_mbox(); render_context.startx = spu_read_in_mbox(); render_context.starty = spu_read_in_mbox(); renderRegion(render_context.camera, rt, render_context, image_buf, objects, obj_ea); spu_write_out_mbox(1); //cache_pr_stats(obj_cache);}void handle_spe_update_camera_pos(ObjectSet *objects, RenderContext &render_context, RayTracer *rt, vector unsigned int image_buf[2][STRIP_SIZE], ObjectSet *obj_ea) { float x = uint32_as_float(spu_read_in_mbox()); float y = uint32_as_float(spu_read_in_mbox()); float z = uint32_as_float(spu_read_in_mbox()); render_context.camera->setCenter((vector float) {x,y,z,0});}void handle_spe_update_camera_dir(ObjectSet *objects, RenderContext &render_context, RayTracer *rt, vector unsigned int image_buf[2][STRIP_SIZE], ObjectSet *obj_ea) { float x = uint32_as_float(spu_read_in_mbox()); float y = uint32_as_float(spu_read_in_mbox()); float z = uint32_as_float(spu_read_in_mbox()); float ux = uint32_as_float(spu_read_in_mbox());
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -