#include "stdafx.h" // gdraw_ps3gcm.cpp - author: Fabian Giesen - copyright 2010-2011 RAD Game Tools // // This implements the Iggy graphics driver layer for GCM. // GDraw consists of several components that interact fairly loosely with each other; // e.g. the resource management, drawing and filtering parts are all fairly independent // of each other. If you want to modify some aspect of GDraw - say the texture allocation // logic - your best bet is usually to just look for one of the related entry points, // e.g. MakeTextureBegin, and take it from there. There's a bunch of code in this file, // but most of it isn't really complicated. // // One bit you might be tempted to touch is GDraws state management, to integrate it with // an existing state caching system. This is *not recommended*; Iggy tends to generate a fairly // large number of batches, and the existing implementation was designed to keep PPU overhead to a // minimum while also avoiding redundant state changes and keeping the command buffer small. // Introducing any extra indirections is likely to cause notably degraded performance. Nonetheless, // if you want to continue, here's the list of functions that modify RSX state in some way: // - The video memory defragmentation logic // - All of the rendering helpers // - RenderTile*/TextureDrawBuffer* may change the active rendertarget and depth/stencil surface, // as do GCM_NoMoreGDrawThisFrame and the rendertarget management code (see comments there) // - set_rsx_texture / set_texture // - set_renderstate and set_renderstate_full. These are the main places where render state changes occur; // you should probably start here. // - DrawIndexedTriangles calls set_vertex_decl which sets the active vertex/index buffers and vertex declaration // - Most of the functions in the "filter effects" section modify RSX state, mostly pixel shader constants and textures // // On fences: GDraw makes use of fences to synchronize texture/vertex buffer updates // that happen during the middle of a frame (which can happen when GDraw uses a texture // or vertex buffer that hasn't been used before, or one that has since been freed to make // space for other resources). This uses its own RSX label so you shouldn't generally notice // or need to worry about it. It is something to keep in mind when modifying any of the // resource-management code, though; you need to be careful not to mess up the synchronization // that's there. The current fence index (stored as gdraw->next_fence_index) is just a 64-bit counter, // incremented whenever a fence (usually a texture label) is inserted into the command buffer. // wait_on_fence is then used to make sure that the RSX has passed such a fence whenever GDraw // is about to start an operation that needs synchronization, e.g. modify or free a texture. // (Technically, memory only needs to be synchronized when it's being re-allocated, not when // it's freed, but the only case where GDraw frees textures in usual operation is to make // space for new allocations, so we don't finesse this). This logic was carefully written (and // tested) to make sure it works properly even in the presence of counter wraparound. // // One final thing to be aware about is RSX pipeline offsets between different type of fences. // As mentioned above, most fences we insert into the command stream are implemented using // (fairly light-weight) texture labels. But we do use backend labels during video memory // defragmentation and to synchronize render-to-texture operations. Backend labels are written // later in the pipeline than texture labels, causing a race condition in sequences like // // cellGcmSetWriteTextureLabel(label, 0) // ... // cellGcmSetWriteBackEndLabel(label, 1) // ... // cellGcmSetWriteTextureLabel(label, 2) // // where label might theoretically become first 2 then 1. Needless to say, this would // be very bad. To ensure this can't happen, we make sure to wait on the last issued // fence before we insert a backend fence. For defragmentation, this wait is on the PPU // (we need to wait for rendering to finish before we can shuffle textures or vertex // buffer around). For render-to-texture operations, we insert a WaitLabel command // first. This may stall the RSX for a short amount of time, but it's no big deal, // because if we didn't stall there, we'd still have to wait immediately afterwards // since the render target change that follows it also causes a pipeline flush. This // was benchmarked not to cause any notable performance degradation - our first // implementation used two RSX labels, one only updated with texture labels and the // other with backend labels, but this variant is somewhat simpler and uses just one // RSX label, so we switched to it. #define GDRAW_ASSERTS #include #include #include #include #include #include #include "gdraw.h" #include "iggy.h" struct GcmTexture; #include "gdraw_ps3gcm.h" typedef union { struct { GcmTexture *gcm; void *gcm_ptr; } tex; struct { void *verts; void *inds; } vbuf; } GDrawNativeHandle; #define GDRAW_MANAGE_MEM #define GDRAW_DEFRAGMENT #define GDRAW_BUFFER_RING #define GDRAW_MIN_FREE_AMOUNT (64*1024) // always try to free at least this many bytes when throwing out old textures #include "gdraw_shared.inl" // max rendertarget stack depth. this depends on the extent to which you // use filters and non-standard blend modes, and how nested they are. #define MAX_RENDER_STACK_DEPTH 8 // Iggy is hardcoded to a limit of 16... probably 1-3 is realistic #define MAX_SAMPLERS 3 // max number of texture samplers used #define AATEX_SAMPLER 7 // sampler that aa_tex gets set in #define FENCE_BATCH_INTERVAL 64 // put a fence after every N'th batch static GDrawFunctions gdraw_funcs; // render target state typedef struct { GDrawHandle *color_buffer; S32 base_x, base_y, width, height; rrbool cached; } GDrawFramebufferState; struct ProgramWithCachedVariableLocations { union { unsigned char *prog_data; CGprogram program; }; union { void *ucode; // used for vertex progs CellCgbFragmentProgramConfiguration cfg; // used for fragment progs }; int vars[MAX_VARS]; // it's unsigned in d3d, but we want an 'undefined' value }; struct GcmTexture { // in hardware register format! (for quick texture switching) U32 offset; U32 format; U32 remap; U32 imagerect; U32 control3; // used internally U32 width, height; U32 pitch; U32 swizzled; }; #define CHAN_A 0 #define CHAN_R 1 #define CHAN_G 2 #define CHAN_B 3 #define TEXREMAP(chan,out,in) ((CELL_GCM_TEXTURE_REMAP_ ## out << (chan*2 + 8)) | ((CELL_GCM_TEXTURE_REMAP_ ## in << (chan*2)))) /////////////////////////////////////////////////////////////////////////////// // // GDraw data structure // // // This is the primary rendering abstraction, which hides all // the platform-specific rendering behavior from Iggy. It is // full of platform-specific graphics state, and also general // graphics state so that it doesn't have to callback into Iggy // to get at that graphics state. typedef struct { CellGcmContextData *gcm; // scale factor converting worldspace to viewspace <0,0>.. F32 world_to_pixel[2]; F32 projection[4]; // cached state int vert_format; // active vertex format (-1 if unknown) U32 scissor_state; // ~0 if unknown, otherwise 0 or 1 int blend_mode; // active blend mode (-1 if unknown) U32 stencil_key; // field built from stencil test flags. 0=no stencil, ~0 is used for "unknown state" U32 z_key; // same for z write/z test GDrawTexture *active_tex[MAX_SAMPLERS]; ProgramWithCachedVariableLocations *cur_fprog; // fragment shader base pointers ProgramWithCachedVariableLocations *basic_fprog[GDRAW_TEXTURE__count]; // render targets CellGcmSurface main_surface; GDrawHandleCache rendertargets; GDrawHandle rendertarget_handles[MAX_RENDER_STACK_DEPTH]; // not -1, because we use +1 to initialize GcmTexture rendertarget_textures[MAX_RENDER_STACK_DEPTH+1]; gswf_recti rt_valid[MAX_RENDER_STACK_DEPTH+1]; // valid rect for texture clamping // size of our render targets S32 frametex_width, frametex_height; // viewport setting (in pixels) for current frame S32 vx,vy; S32 fw,fh; // full width/height of virtual display S32 tw,th; // actual width/height of current tile S32 tpw,tph; // width/height of padded version of tile S32 tx0,ty0; S32 tx0p,ty0p; rrbool in_blur; struct { S32 x,y,w,h; } cview; // current viewport GcmTexture aa_tex; // rsx local mem state GDrawArena local_arena; // this is where shaders etc. land U32 transfer_mode; // used for defragmentation // render-state stack described above for 'temporary' rendering GDrawFramebufferState frame[MAX_RENDER_STACK_DEPTH]; GDrawFramebufferState *cur; // ppu/rsx sync U32 fence_label_index; volatile U32 *fence_label; U64 next_fence_index; // next fence index we're going to write U32 fence_batch_counter; // used to write fences every N batches // texture and vertex buffer pools GDrawHandleCache *texturecache; int tex_loc; // CELL_GCM_LOCATION of textures GDrawHandleCache *vbufcache; U8 *vbuf_base; // used for fast addr2offs int vbuf_loc; // CELL_GCM_LOCATION of vertex buffers // rendertarget stuff GDrawArena rt_arena; S32 rt_pitch; // render target pitch int rt_loc; // CELL_GCM_LOCATION of rendertargets // dyn vertex buffer gdraw_bufring dyn_vb; U8 *dynvb_base; // used for fast addr2offs int dynvb_loc; // CELL_GCM_LOCATION of dyn vertex buffer // fragment shaders ProgramWithCachedVariableLocations fprog[GDRAW_TEXTURE__count][3]; ProgramWithCachedVariableLocations exceptional_blend[GDRAW_BLENDSPECIAL__count]; ProgramWithCachedVariableLocations filter_prog[2][16]; ProgramWithCachedVariableLocations blur_prog[MAX_TAPS+1]; ProgramWithCachedVariableLocations colormatrix; // vertex shaders ProgramWithCachedVariableLocations vprog[GDRAW_vformat__basic_count]; U32 vslot[GDRAW_vformat__basic_count]; // mipmapping GDrawMipmapContext mipmap; // for bookkeeping GDrawFence tile_end_fence; } GDraw; #define COLOR_MASK_ALL (CELL_GCM_COLOR_MASK_R | CELL_GCM_COLOR_MASK_G | CELL_GCM_COLOR_MASK_B | CELL_GCM_COLOR_MASK_A) static GDraw *gdraw; //////////////////////////////////////////////////////////////////////// // // Vertex program registers // // These are direct constant register indices // NOTE: RSX can't set more than 8 vertex constants at once! #define VVAR_world 0 #define VVAR_count_worldonly 2 // number of constants to update when we only change world matrix #define VVAR_x_off 2 #define VVAR_color_mul 3 #define VVAR_count_world_and_color 4 #define VVAR_tex_s 4 #define VVAR_tex_t 5 #define VVAR_count 6 // number of vertex program registers to update when we change everything #define VVAR_viewproj 6 // projection is only updated when it actually changes struct Vec4 { F32 x,y,z,w; }; struct VertexVars { Vec4 world[2]; Vec4 x_off; Vec4 color_mul; Vec4 s0_texgen; Vec4 t0_texgen; Vec4 proj; }; //////////////////////////////////////////////////////////////////////// // // Fence and command buffer helpers // // used to write to the command buffer: gcc won't generate clrldi // instructions for struct member access, but it will for pointer // arithmetic. struct CommandData { U32 w0,w1,w2,w3,w4,w5,w6,w7; U32 w8,w9,wa,wb,wc,wd,we,wf; }; static RADINLINE CommandData *reserve_command(CellGcmContextData * RADRESTRICT gcm, U32 size) { cellGcmReserveMethodSizeInline(gcm, size); return (CommandData *) gcm->current; } static RADINLINE CommandData *put_command(CellGcmContextData * RADRESTRICT gcm, U32 size) { cellGcmReserveMethodSizeInline(gcm, size); CommandData *cmd = (CommandData *) gcm->current; gcm->current += size; return cmd; } static RADINLINE GDrawFence get_next_fence() { GDrawFence fence; fence.value = gdraw->next_fence_index; return fence; } static RADINLINE void flush_delayed_fence_updates() { } static RADINLINE rrbool is_fence_pending(GDrawFence fence) { // if it's older than one full wrap of the fence counter, // we know it's retired. (we can't have more than 4 billion // fences pending in command buffers with only 256MB of // main memory!) if (gdraw->next_fence_index - fence.value > 0xffffffffu) return false; // this is how far the GPU is. U32 retired = *gdraw->fence_label; // everything between "retired" (exclusive) and "next_fence_index" // (inclusive) is pending. everything else is definitely done. // // we need to be careful about this test since the "fence" value // coming in is, for all practical purposes, an arbitrary U32. our // fence counter might have wrapped around multiple times since we last // used a resource that gets freed, for instance! so if we report a // fence ID as pending that's not actually in flight, we might end up // with an infinite wait. // // this is a bit subtle since it depends on unsigned wraparound for us // to do the right thing. // number of pending fences (next_fence_index has, by definition, not been written yet) U32 num_pending = U32(gdraw->next_fence_index) - retired; // position of the current fence in the "list of pending fences", counting // from end (i.e. the "youngest" fence that we haven't even issued yet) U32 pos_in_pending_list = U32(gdraw->next_fence_index - fence.value); return pos_in_pending_list < num_pending; } static GDrawFence put_fence() { GDrawFence fence; gdraw->fence_batch_counter = FENCE_BATCH_INTERVAL; fence.value = gdraw->next_fence_index++; cellGcmSetWriteTextureLabelInline(gdraw->gcm, gdraw->fence_label_index, (U32) fence.value); return fence; } static GDrawFence put_backend_fence() { // careful with backend labels, they have no defined ordering wrt // texture labels in the same command stream! if you use them, make // sure there are no races. GDrawFence fence; gdraw->fence_batch_counter = FENCE_BATCH_INTERVAL; fence.value = gdraw->next_fence_index++; cellGcmSetWriteBackEndLabel(gdraw->gcm, gdraw->fence_label_index, (U32) fence.value); return fence; } static void wait_on_fence(GDrawFence fence) { if (is_fence_pending(fence)) { GDraw * RADRESTRICT gd = gdraw; if (fence.value == gd->next_fence_index) // haven't even written this one yet! put_fence(); cellGcmFlush(gd->gcm); IggyWaitOnFence((void *) gd->fence_label, (U32) fence.value); } } static U32 addr2offs(void *addr) { U32 offs; int32_t res = cellGcmAddressToOffset(addr, &offs); res = res; // avoid warning in release builds assert(res == CELL_OK); return offs; } static RADINLINE U32 vbufaddr2offs(GDraw * RADRESTRICT gd, void *addr) { return (U8 *) addr - gd->vbuf_base; } static RADINLINE U32 dynvbaddr2offs(GDraw * RADRESTRICT gd, void *addr) { return (U8 *) addr - gd->dynvb_base; } //////////////////////////////////////////////////////////////////////// // // Texture/video memory defragmentation support code // static void gdraw_gpu_memcpy(GDrawHandleCache *c, void *dst, void *src, U32 num_bytes) { RR_UNUSED_VARIABLE(c); U32 dstoffs = addr2offs(dst); U32 srcoffs = addr2offs(src); U32 pos = 0; U32 edgelen = 4096; U32 blocksize = edgelen*edgelen*4; assert((num_bytes & 3) == 0); while (pos < num_bytes) { // peel off square image transfers of edgelen x edgelen as long as we can while (pos + blocksize <= num_bytes) { assert(((dstoffs + pos) & (CELL_GCM_SURFACE_LINEAR_ALIGN_OFFSET - 1)) == 0); assert(((srcoffs + pos) & (CELL_GCM_SURFACE_LINEAR_ALIGN_OFFSET - 1)) == 0); cellGcmSetTransferImage(gdraw->gcm, gdraw->transfer_mode, dstoffs + pos, edgelen * 4, 0, 0, srcoffs + pos, edgelen * 4, 0, 0, edgelen, edgelen, 4); pos += blocksize; } edgelen >>= 1; blocksize >>= 2; if (edgelen == 32) break; // handle the rest (<4k "pixels") using a 1-line transfer } if (pos < num_bytes) { U32 amount = num_bytes - pos; assert(((dstoffs + pos) & (CELL_GCM_SURFACE_LINEAR_ALIGN_OFFSET - 1)) == 0); assert(((srcoffs + pos) & (CELL_GCM_SURFACE_LINEAR_ALIGN_OFFSET - 1)) == 0); cellGcmSetTransferImage(gdraw->gcm, gdraw->transfer_mode, dstoffs + pos, amount, 0, 0, srcoffs + pos, amount, 0, 0, amount >> 2, 1, 4); } } static void gdraw_defragment_cache(GDrawHandleCache *c, GDrawStats *stats) { GDrawFence fence; S32 i; int loc; if (!gdraw_CanDefragment(c)) return; // gpu needs to finish pending batches before it can start defragmenting fence = put_fence(); cellGcmSetWaitLabel(gdraw->gcm, gdraw->fence_label_index, (U32) fence.value); // actual defragmentation... loc = c->is_vertex ? gdraw->vbuf_loc : gdraw->tex_loc; gdraw->transfer_mode = (loc == CELL_GCM_LOCATION_LOCAL) ? CELL_GCM_TRANSFER_LOCAL_TO_LOCAL : CELL_GCM_TRANSFER_MAIN_TO_MAIN; gdraw_DefragmentMain(c, 0, stats); // go over all handles and adjustment pointers. // pointer adjustment is different for textures than it is for vertex buffers if (!c->is_vertex) { for (i=0; i < c->max_handles; i++) { GDrawHandle *h = &c->handle[i]; if (gdraw_res_is_managed(h)) { h->handle.tex.gcm_ptr = h->raw_ptr; h->handle.tex.gcm->offset = CELL_GCM_METHOD_DATA_TEXTURE_OFFSET(addr2offs(h->handle.tex.gcm_ptr)); } } } else { for (i=0; i < c->max_handles; i++) { GDrawHandle *h = &c->handle[i]; if (gdraw_res_is_managed(h)) { SINTa ind_offs = ((U8 *) h->handle.vbuf.inds - (U8 *) h->handle.vbuf.verts); h->handle.vbuf.verts = h->raw_ptr; h->handle.vbuf.inds = (U8 *) h->raw_ptr + ind_offs; } } } // texture pointers have changed, so textures need to be reset memset(&gdraw->active_tex, 0, sizeof(gdraw->active_tex)); // wait for RSX to finish, since we can't safely allocate memory in this cache // while data is being moved around. // could delay this until the next alloc, but the only place we ever call // defragment from is just before an alloc, so there's no point. // no backend fence race: we had RSX wait on completion of the last pending // texture label before we started this. cellGcmSetInvalidateTextureCache(gdraw->gcm, CELL_GCM_INVALIDATE_TEXTURE); cellGcmSetInvalidateVertexCache(gdraw->gcm); wait_on_fence(put_backend_fence()); } //////////////////////////////////////////////////////////////////////// // // RSX texture swizzling code. // // cellGcmConvertSwizzleFormat is too slow (one callback per pixel!) // and RSX swizzle transfers require local mem for the unswizzled and // swizzled versions at the same time during the conversion, which // is unacceptable for large textures. // // NOTE: // // RSX texture swizzling uses Morton order inside squares of size // minor x minor, where minor=min(w,h). If the texture is non-square, // we might have multiple such squares, which are arranged linearly // in memory. // // There's a nice way to step pixel coordinates given in Morton order // incrementally (cf. "Morton-order Matrices Deserve Compilers' Support", // D. S. Wise and J. D. Frens, https://www.cs.indiana.edu/cgi-bin/techreports/TRNNN.cgi?trnum=TR533). // // "insert" a 0 bit after each of the 16 low bits of x. used for morton encoding. static U32 part1by1(U32 x) { x &= 0x0000ffff; // x = ---- ---- ---- ---- fedc ba98 7654 3210 x = (x ^ (x << 8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210 x = (x ^ (x << 4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210 x = (x ^ (x << 2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10 x = (x ^ (x << 1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0 return x; } // same for the RSX modified morton encoding. here we need to know which axis we're dealing with. static U32 rsx_morton_encode(S32 x, U32 axis, U32 minor) { // low bits are morton, high bits are linear return (part1by1((U32) x & (minor - 1)) << axis) + ((U32) x & ~(minor - 1)) * minor; } #ifdef __SNC__ #define GCCSchedBarrier() #else #define GCCSchedBarrier() __asm__ volatile("") #endif // update a subrect of a 8bpp texture. w x h pixels are copied from src to // dest at pixel position (dx,dy). the full destination texture is dw x dh pixels. static void swizzle_subrect_8bpp(U8 * RADRESTRICT dest, U32 dx, U32 dy, U32 dw, U32 dh, U8 * RADRESTRICT src, U32 srcpitch, U32 w, U32 h) { // determine morton-order stepping constants. U32 minor = RR_MIN(dw, dh); S64 xinc = (S32) rsx_morton_encode(-1, 0, minor); S64 yinc = (S32) rsx_morton_encode(-1, 1, minor); // determine start offsets along x/y axis U64 outx0 = rsx_morton_encode(dx, 0, minor); U64 outy = rsx_morton_encode(dy, 1, minor); while (h--) { U8 *in = src - 1; U8 *out = dest + outy; U64 i,outx; // copy and swizzle one line outx = outx0; #ifdef __SNC__ for (i=0; i < w; i++) { out[outx] = *++in; outx = (outx - xinc) & xinc; // outx++ with bit-interleaving } #else // this loop does the same as the above but generates *way* better code on GCC. for (i=0; i < w; i++) { U64 v,ox; v = __lbzu(1, in); GCCSchedBarrier(); ox = outx; outx = __subf(xinc, outx); GCCSchedBarrier(); __stbx(v, out, ox); outx = __and(outx, xinc); } #endif src += srcpitch; outy = (outy - yinc) & yinc; // outy++ with bit-interleaving } } // update a subrect of a 32bpp texture (version for small rects) static void swizzle_subrect_32bpp_small(U8 * RADRESTRICT dest, U32 dx, U32 dy, U32 dw, U32 dh, U8 * RADRESTRICT src, U32 srcpitch, U32 w, U32 h) { // determine morton-order stepping constants U32 minor = RR_MIN(dw, dh); S64 xinc = (S32) rsx_morton_encode(-1, 0, minor) * 4; // *4 since we work with byte offsets S64 yinc = (S32) rsx_morton_encode(-1, 1, minor) * 4; // determine start offsets along x/y axis U64 outx0 = rsx_morton_encode(dx, 0, minor) * 4; U64 outy = rsx_morton_encode(dy, 1, minor) * 4; while (h--) { U32 *in = (U32 *) src - 1; U8 *out = dest + outy; U64 i,outx; // copy and swizzle one line outx = outx0; #ifdef __SNC__ for (i=0; i < w; i++) { *((U32 *) (out + outx)) = *++in; outx = (outx - xinc) & xinc; // outx++ with bit-interleaving } #else // this loop does the same as the above but generates *way* better code on GCC. for (i=0; i < w; i++) { U64 v,ox; v = __lwzu(4, in); GCCSchedBarrier(); ox = outx; outx = __subf(xinc, outx); GCCSchedBarrier(); __stwx(v, out, ox); outx = __and(outx, xinc); } #endif src += srcpitch; outy = (outy - yinc) & yinc; // outy++ with bit-interleaving } } // update a subrect of a 32 bpp texture (main entry point) static void swizzle_subrect_32bpp(U8 * RADRESTRICT dest, U32 dx, U32 dy, U32 dw, U32 dh, U8 * RADRESTRICT src, U32 srcpitch, U32 w, U32 h) { U32 minor; U32 dx0,dy0,dx1,dy1; U32 wa,ha; U32 x,y; S64 incx,incy; // output increment (per block) U64 outx0,outx,outy; // output *offset* in x/y U64 a,b,c,d; // we have a fast path that updates aligned groups of 8x4 pixels at a time. // use that for the bulk of the data, then puzzle the rest together from smaller rects. if (w < 8 || h < 4) { // small block, don't bother swizzle_subrect_32bpp_small(dest, dx, dy, dw, dh, src, srcpitch, w, h); return; } // calculate the aligned part of the subrect that we process with the fast loop // we cut the image up like this: // +---------------------+ dy // | | // +---+---------------+-+ dy0 // | | | | // | | | | // +---+---------------+-+ dy1 // +---------------------+ dy+h // dx dx0 dx1 dx+w dx0 = (dx + 7) & ~7; dy0 = (dy + 3) & ~3; dx1 = (dx + w) & ~7; dy1 = (dy + h) & ~3; wa = dx1 - dx0; ha = dy1 - dy0; // if dy wasn't aligned, peel off the first couple of lines if (dy < dy0) { swizzle_subrect_32bpp_small(dest, dx, dy, dw, dh, src, srcpitch, w, dy0 - dy); src += srcpitch * (dy0 - dy); } // take care of the left/right parts that aren't aligned if (dx < dx0) swizzle_subrect_32bpp_small(dest, dx, dy0, dw, dh, src, srcpitch, dx0 - dx, ha); if (dx1 < dx+w) swizzle_subrect_32bpp_small(dest, dx1, dy0, dw, dh, src + (dx1-dx)*4, srcpitch, (dx+w) - dx1, ha); // main part: go through image in blocks of 8x4 pixels. (8x4 since that's one full cache line, // so we write pixels one cache line at a time) minor = RR_MIN(dw, dh); incx = (S32) rsx_morton_encode(-8, 0, minor) * 4; // *4 since it's all byte offsets incy = (S32) rsx_morton_encode(-4, 1, minor) * 4; outx0 = rsx_morton_encode(dx0, 0, minor) * 4; outy = rsx_morton_encode(dy0, 1, minor) * 4; for (y=0; y < ha/4; ++y) { // set up source line pointers for four lines void *src0 = (src + (dx0 - dx) * 4 - 8); void *src1 = ((U8 *) src0 + srcpitch); void *src2 = ((U8 *) src1 + srcpitch); void *src3 = ((U8 *) src2 + srcpitch); outx = outx0; // @TODO prefetches would probably be a good idea here, but need proper test data. for (x=0; x < wa/8; ++x) { void *out = (void *) (dest + outx + outy); outx = (outx - incx) & incx; // advance pointer for next pixel // just read 8x4 pixels and write them out in the z-order pattern // we can use doubleword loads since even in Z-order, groups of two horizontal // pixels don't get reordered. the rest is just the swizzle pattern expanded into offsets. a = __ld(0x08, src0); b = __ld(0x08, src1); c = __ld(0x10, src0); d = __ld(0x10, src1); __std(a, 0x00, out); __std(b, 0x08, out); __std(c, 0x10, out); __std(d, 0x18, out); a = __ld(0x08, src2); b = __ld(0x08, src3); c = __ld(0x10, src2); d = __ld(0x10, src3); __std(a, 0x20, out); __std(b, 0x28, out); __std(c, 0x30, out); __std(d, 0x38, out); a = __ld(0x18, src0); b = __ld(0x18, src1); c = __ldu(0x20, src0); d = __ldu(0x20, src1); __std(a, 0x40, out); __std(b, 0x48, out); __std(c, 0x50, out); __std(d, 0x58, out); a = __ld(0x18, src2); b = __ld(0x18, src3); c = __ldu(0x20, src2); d = __ldu(0x20, src3); __std(a, 0x60, out); __std(b, 0x68, out); __std(c, 0x70, out); __std(d, 0x78, out); } src += 4*srcpitch; outy = (outy - incy) & incy; } // and finally, the last few lines if (dy1 < dy+h) swizzle_subrect_32bpp_small(dest, dx, dy1, dw, dh, src, srcpitch, w, (dy+h) - dy1); } static void api_free_resource(GDrawHandle *r) { // we just need to clean up our state cache S32 i; for (i=0; i < MAX_SAMPLERS; i++) if (gdraw->active_tex[i] == (GDrawTexture *) r) gdraw->active_tex[i] = NULL; } static void RADLINK gdraw_UnlockHandles(GDrawStats *stats) { RR_UNUSED_VARIABLE(stats); gdraw_HandleCacheUnlockAll(gdraw->texturecache); gdraw_HandleCacheUnlockAll(gdraw->vbufcache); } //////////////////////////////////////////////////////////////////////// // // Texture creation/updating/deletion // extern GDrawTexture *gdraw_GCM_WrappedTextureCreate(CellGcmTexture *gcm_tex) { GDrawStats stats; memset(&stats, 0, sizeof(stats)); GDrawHandle *p = gdraw_res_alloc_begin(gdraw->texturecache, 0, &stats); // it may need to free one item to give us a handle p->handle.tex.gcm_ptr = 0; gdraw_HandleCacheAllocateEnd(p, 0, NULL, GDRAW_HANDLE_STATE_user_owned); gdraw_GCM_WrappedTextureChange((GDrawTexture *) p, gcm_tex); return (GDrawTexture *) p; } extern void gdraw_GCM_WrappedTextureChange(GDrawTexture *tex, CellGcmTexture *gcm_tex) { GDrawHandle *p = (GDrawHandle *) tex; GcmTexture *gcm = p->handle.tex.gcm; gcm->offset = CELL_GCM_METHOD_DATA_TEXTURE_OFFSET(gcm_tex->offset); gcm->format = CELL_GCM_METHOD_DATA_TEXTURE_FORMAT(gcm_tex->location, gcm_tex->cubemap, gcm_tex->dimension, gcm_tex->format, gcm_tex->mipmap); gcm->remap = gcm_tex->remap; gcm->imagerect = CELL_GCM_METHOD_DATA_TEXTURE_IMAGE_RECT(gcm_tex->height, gcm_tex->width); gcm->control3 = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL3(gcm_tex->pitch, gcm_tex->depth); gcm->width = gcm_tex->width; gcm->height = gcm_tex->height; gcm->pitch = gcm_tex->pitch; gcm->swizzled = 0; // unused since we never upload to this } extern void gdraw_GCM_WrappedTextureDestroy(GDrawTexture *tex) { GDrawStats stats; gdraw_res_free((GDrawHandle *) tex, &stats); } static void RADLINK gdraw_SetTextureUniqueID(GDrawTexture *tex, void *old_id, void *new_id) { GDrawHandle *p = (GDrawHandle *) tex; // if this is still the handle it's thought to be, change the owner; // if the owner *doesn't* match, then they're changing a stale handle, so ignore if (p->owner == old_id) p->owner = new_id; } static bool is_texture_swizzled(S32 w, S32 h) { // we swizzle a texture if it's pow2 and not a line texture return h > 1 && (w & (w-1)) == 0 && (h & (h-1)) == 0; } static rrbool RADLINK gdraw_MakeTextureBegin(void *owner, S32 width, S32 height, gdraw_texture_format gformat, U32 flags, GDraw_MakeTexture_ProcessingInfo *p, GDrawStats *stats) { S32 bytes_pixel = 4; GDrawHandle *t = NULL; bool swizzled = false; if (width > 4096 || height > 4096) { IggyGDrawSendWarning(NULL, "GDraw %d x %d texture not supported by hardware (dimension size limit 4096)", width, height); return false; } if (gformat == GDRAW_TEXTURE_FORMAT_font) bytes_pixel = 1; swizzled = is_texture_swizzled(width, height); // determine the number of mipmaps to use and the size of the corresponding texture allocation // this assumes linear texture memory layout S32 pitch = width * bytes_pixel; U32 mipmaps = 0; S32 size = 0; if (!swizzled) // RSX HW bug: linear texture pitch size should be at least 16 bytes pitch = RR_MAX(pitch, 16); do { if (!swizzled) size += pitch * (RR_MAX(height >> mipmaps, 1)); else size += RR_MAX(pitch >> mipmaps, bytes_pixel) * RR_MAX(height >> mipmaps, 1); mipmaps++; } while ((flags & GDRAW_MAKETEXTURE_FLAGS_mipmap) && ((width >> mipmaps) || (height >> mipmaps))); // allocate a handle and make room in the cache for this much data assert(size != 0); t = gdraw_res_alloc_begin(gdraw->texturecache, size, stats); if (!t) return false; t->handle.tex.gcm_ptr = t->raw_ptr; GcmTexture *tex = t->handle.tex.gcm; U8 format; U32 remap; if (gformat == GDRAW_TEXTURE_FORMAT_font) { format = CELL_GCM_TEXTURE_B8; remap = TEXREMAP(CHAN_A, REMAP, FROM_B) | TEXREMAP(CHAN_R, ZERO, FROM_R) | TEXREMAP(CHAN_G, ZERO, FROM_G) | TEXREMAP(CHAN_B, ZERO, FROM_B); } else { format = CELL_GCM_TEXTURE_A8R8G8B8; // remap rgba -> argb remap = TEXREMAP(CHAN_A, REMAP, FROM_B) | TEXREMAP(CHAN_R, REMAP, FROM_A) | TEXREMAP(CHAN_G, REMAP, FROM_R) | TEXREMAP(CHAN_B, REMAP, FROM_G); } format |= (swizzled ? CELL_GCM_TEXTURE_SZ : CELL_GCM_TEXTURE_LN) | CELL_GCM_TEXTURE_NR; U8 dimension = (height != 1) ? CELL_GCM_TEXTURE_DIMENSION_2 : CELL_GCM_TEXTURE_DIMENSION_1; U8 cubemap = 0; U8 depth = 1; U8 location = gdraw->tex_loc; tex->offset = CELL_GCM_METHOD_DATA_TEXTURE_OFFSET(addr2offs(t->handle.tex.gcm_ptr)); tex->format = CELL_GCM_METHOD_DATA_TEXTURE_FORMAT(location, cubemap, dimension, format, mipmaps); tex->remap = remap; tex->imagerect = CELL_GCM_METHOD_DATA_TEXTURE_IMAGE_RECT(height, width); tex->control3 = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL3(pitch, depth); tex->width = width; tex->height = height; tex->pitch = pitch; tex->swizzled = swizzled; gdraw_HandleCacheAllocateEnd(t, size, owner, (flags & GDRAW_MAKETEXTURE_FLAGS_never_flush) ? GDRAW_HANDLE_STATE_pinned : GDRAW_HANDLE_STATE_locked); stats->nonzero_flags |= GDRAW_STATS_alloc_tex; stats->alloc_tex += 1; stats->alloc_tex_bytes += size; p->texture_type = GDRAW_TEXTURE_TYPE_rgba; p->p0 = t; if (swizzled || (flags & GDRAW_MAKETEXTURE_FLAGS_mipmap)) { rrbool ok; assert(p->temp_buffer != NULL); ok = gdraw_MipmapBegin(&gdraw->mipmap, width, height, mipmaps, bytes_pixel, p->temp_buffer, p->temp_buffer_bytes); assert(ok); // this should never hit unless the temp_buffer is way too small p->p1 = &gdraw->mipmap; p->texture_data = gdraw->mipmap.pixels[0]; p->num_rows = gdraw->mipmap.bheight; p->stride_in_bytes = gdraw->mipmap.pitch[0]; p->i0 = 0; // current output y } else { p->p1 = 0; p->texture_data = (U8 *) t->handle.tex.gcm_ptr; p->num_rows = height; p->stride_in_bytes = t->handle.tex.gcm->pitch; } return true; } static rrbool RADLINK gdraw_MakeTextureMore(GDraw_MakeTexture_ProcessingInfo *p) { GDrawHandle *t = (GDrawHandle *) p->p0; if (p->p1) { GcmTexture *tex = t->handle.tex.gcm; GDrawMipmapContext *c = (GDrawMipmapContext *) p->p1; U32 pitch = tex->pitch; U32 width = tex->width; U32 height = tex->height; U32 bheight = c->bheight; U32 level = 0; U32 outy = p->i0; U8 *mipstart = (U8 *) t->handle.tex.gcm_ptr; if (outy >= tex->height) // wait, we've already processed the whole texture! return false; do { // copy image data to destination if (!tex->swizzled) { U8 *dest = mipstart + (outy >> level) * pitch; U8 *src = c->pixels[level]; U32 y; for (y=0; y < bheight; y++) { memcpy(dest, src, width * c->bpp); dest += pitch; src += c->pitch[level]; } mipstart += pitch * height; } else { if (c->bpp == 4) swizzle_subrect_32bpp(mipstart, 0, outy >> level, width, height, c->pixels[level], c->pitch[level], width, bheight); else if (c->bpp == 1) swizzle_subrect_8bpp(mipstart, 0, outy >> level, width, height, c->pixels[level], c->pitch[level], width, bheight); else { assert(c->bpp == 1 || c->bpp == 4); } mipstart += width * height * c->bpp; } width = RR_MAX(width >> 1, 1); height = RR_MAX(height >> 1, 1); bheight = RR_MAX(bheight >> 1, 1); } while (gdraw_MipmapAddLines(c, ++level)); // next chunk please! p->i0 += p->num_rows; // increment y p->texture_data = c->pixels[0]; p->num_rows = c->bheight = RR_MIN(c->bheight, tex->height - p->i0); return true; } else return false; // what do you mean, "more"? you got the whole image already! } static GDrawTexture * RADLINK gdraw_MakeTextureEnd(GDraw_MakeTexture_ProcessingInfo *p, GDrawStats *stats) { RR_UNUSED_VARIABLE(stats); if (p->p1) gdraw_MakeTextureMore(p); // submit last piece of data using more return (GDrawTexture *) p->p0; } static rrbool RADLINK gdraw_UpdateTextureBegin(GDrawTexture *t, void *unique_id, GDrawStats *stats) { RR_UNUSED_VARIABLE(stats); return gdraw_HandleCacheLock((GDrawHandle *) t, unique_id); } static void RADLINK gdraw_UpdateTextureRect(GDrawTexture *t, void *unique_id, S32 x, S32 y, S32 stride, S32 w, S32 h, U8 *samples, gdraw_texture_format format) { RR_UNUSED_VARIABLE(unique_id); GDrawHandle *s = (GDrawHandle *) t; S32 bpp = (format == GDRAW_TEXTURE_FORMAT_font) ? 1 : 4, bpl = bpp * w; GcmTexture *tex = s->handle.tex.gcm; U8 *texptr = (U8 *) s->handle.tex.gcm_ptr; wait_on_fence(s->fence); // make sure it's not active if (!tex->swizzled) { S32 dpitch = tex->pitch; U8 *src = samples; U8 *dst = texptr + y * dpitch + x * bpp; while (h--) { memcpy(dst, src, bpl); dst += dpitch; src += stride; } } else { if (format == GDRAW_TEXTURE_FORMAT_font) swizzle_subrect_8bpp(texptr, x, y, tex->width, tex->height, samples, stride, w, h); else swizzle_subrect_32bpp(texptr, x, y, tex->width, tex->height, samples, stride, w, h); } } static void RADLINK gdraw_UpdateTextureEnd(GDrawTexture *t, void *unique_id, GDrawStats *stats) { RR_UNUSED_VARIABLE(unique_id); RR_UNUSED_VARIABLE(stats); gdraw_HandleCacheUnlock((GDrawHandle *) t); } static void RADLINK gdraw_FreeTexture(GDrawTexture *tt, void *unique_id, GDrawStats *stats) { GDrawHandle *t = (GDrawHandle *) tt; assert(t != NULL); // @GDRAW_ASSERT if (t->owner == unique_id || unique_id == NULL) { if (t->cache == &gdraw->rendertargets) { gdraw_HandleCacheUnlock(t); // cache it by simply not freeing it return; } gdraw_res_kill(t, stats); } } static rrbool RADLINK gdraw_TryToLockTexture(GDrawTexture *t, void *unique_id, GDrawStats *stats) { RR_UNUSED_VARIABLE(stats); return gdraw_HandleCacheLock((GDrawHandle *) t, unique_id); } static void RADLINK gdraw_DescribeTexture(GDrawTexture *tex, GDraw_Texture_Description *desc) { GDrawHandle *p = (GDrawHandle *) tex; desc->width = p->handle.tex.gcm->width; desc->height = p->handle.tex.gcm->height; desc->size_in_bytes = p->bytes; } static void RADLINK gdraw_SetAntialiasTexture(S32 width, U8 *rgba) { if (gdraw->aa_tex.offset) return; S32 pitch = RR_MAX(width * 4, 16); // RSX HW bug: linear textures with pitch<16 cause trouble (we're not swizzled, but play it safe anyway). U8 *data = (U8 *)gdraw_arena_alloc(&gdraw->local_arena, pitch, CELL_GCM_TEXTURE_SWIZZLE_ALIGN_OFFSET); if (!data) return; // since it's a line texture, we can safely mark it as swizzled. gdraw->aa_tex.offset = CELL_GCM_METHOD_DATA_TEXTURE_OFFSET(addr2offs(data)); gdraw->aa_tex.format = CELL_GCM_METHOD_DATA_TEXTURE_FORMAT(CELL_GCM_LOCATION_LOCAL, 0, CELL_GCM_TEXTURE_DIMENSION_1, CELL_GCM_TEXTURE_A8R8G8B8 | CELL_GCM_TEXTURE_SZ | CELL_GCM_TEXTURE_NR, 1); gdraw->aa_tex.remap = TEXREMAP(CHAN_A, REMAP, FROM_B) | TEXREMAP(CHAN_R, REMAP, FROM_A) | TEXREMAP(CHAN_G, REMAP, FROM_R) | TEXREMAP(CHAN_B, REMAP, FROM_G); gdraw->aa_tex.imagerect = CELL_GCM_METHOD_DATA_TEXTURE_IMAGE_RECT(1, width); gdraw->aa_tex.control3 = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL3(pitch, 1); gdraw->aa_tex.width = width; gdraw->aa_tex.height = 1; gdraw->aa_tex.pitch = pitch; memcpy(data, rgba, width * 4); } //////////////////////////////////////////////////////////////////////// // // Vertex buffer creation/deletion // static rrbool RADLINK gdraw_MakeVertexBufferBegin(void *unique_id, gdraw_vformat vformat, S32 vbuf_size, S32 ibuf_size, GDraw_MakeVertexBuffer_ProcessingInfo *p, GDrawStats *stats) { RR_UNUSED_VARIABLE(vformat); GDrawHandle *vb; vb = gdraw_res_alloc_begin(gdraw->vbufcache, vbuf_size + ibuf_size, stats); if (!vb) return false; vb->handle.vbuf.verts = vb->raw_ptr; vb->handle.vbuf.inds = (U8 *) vb->raw_ptr + vbuf_size; p->p0 = vb; p->vertex_data = (U8 *) vb->handle.vbuf.verts; p->index_data = (U8 *) vb->handle.vbuf.inds; p->vertex_data_length = vbuf_size; p->index_data_length = ibuf_size; gdraw_HandleCacheAllocateEnd(vb, vbuf_size + ibuf_size, unique_id, GDRAW_HANDLE_STATE_locked); return true; } static rrbool RADLINK gdraw_MakeVertexBufferMore(GDraw_MakeVertexBuffer_ProcessingInfo *p) { RR_UNUSED_VARIABLE(p); assert(0); return false; } static GDrawVertexBuffer * RADLINK gdraw_MakeVertexBufferEnd(GDraw_MakeVertexBuffer_ProcessingInfo *p, GDrawStats *stats) { RR_UNUSED_VARIABLE(stats); return (GDrawVertexBuffer *) p->p0; } static rrbool RADLINK gdraw_TryLockVertexBuffer(GDrawVertexBuffer *vb, void *unique_id, GDrawStats *stats) { RR_UNUSED_VARIABLE(stats); return gdraw_HandleCacheLock((GDrawHandle *) vb, unique_id); } static void RADLINK gdraw_FreeVertexBuffer(GDrawVertexBuffer *vb, void *unique_id, GDrawStats *stats) { GDrawHandle *h = (GDrawHandle *) vb; assert(h != NULL); // @GDRAW_ASSERT if (h->owner == unique_id) gdraw_res_kill(h, stats); } static void RADLINK gdraw_DescribeVertexBuffer(GDrawVertexBuffer *vbuf, GDraw_VertexBuffer_Description *desc) { GDrawHandle *p = (GDrawHandle *) vbuf; desc->size_in_bytes = p->bytes; } //////////////////////////////////////////////////////////////////////// // // Create/free (or cache) framebuffer-sized textures // static GDrawHandle *get_color_rendertarget(GDrawStats *stats) { RR_UNUSED_VARIABLE(stats); GDrawHandle *t; S32 pitch = gdraw->rt_pitch; S32 size = pitch * gdraw->frametex_height; t = gdraw_HandleCacheGetLRU(&gdraw->rendertargets); if (t) { gdraw_HandleCacheLock(t, (void *) 1); return t; } t = gdraw_HandleCacheAllocateBegin(&gdraw->rendertargets); if (!t) { IggyGDrawSendWarning(NULL, "GDraw rendertarget allocation failed: hit handle limit"); return t; } void *ptr = gdraw_arena_alloc(&gdraw->rt_arena, size, 1); if (!ptr) { IggyGDrawSendWarning(NULL, "GDraw rendertarget allocation failed: out of rendertarget texture memory"); gdraw_HandleCacheAllocateFail(t); return NULL; } t->fence = get_next_fence(); // we're about to start using it immediately, so... t->raw_ptr = NULL; t->handle.tex.gcm_ptr = ptr; GcmTexture *tex = t->handle.tex.gcm; tex->offset = CELL_GCM_METHOD_DATA_TEXTURE_OFFSET(addr2offs(t->handle.tex.gcm_ptr)); tex->format = CELL_GCM_METHOD_DATA_TEXTURE_FORMAT(gdraw->rt_loc, 0, CELL_GCM_TEXTURE_DIMENSION_2, CELL_GCM_TEXTURE_A8R8G8B8 | CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_NR, 1); tex->remap = TEXREMAP(CHAN_A, REMAP, FROM_A) | TEXREMAP(CHAN_R, REMAP, FROM_R) | TEXREMAP(CHAN_G, REMAP, FROM_G) | TEXREMAP(CHAN_B, REMAP, FROM_B); tex->imagerect = CELL_GCM_METHOD_DATA_TEXTURE_IMAGE_RECT(gdraw->frametex_height, gdraw->frametex_width); tex->control3 = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL3(pitch, 1); tex->width = gdraw->frametex_width; tex->height = gdraw->frametex_height; tex->pitch = pitch; gdraw_HandleCacheAllocateEnd(t, size, (void *) 1, GDRAW_HANDLE_STATE_locked); return t; } //////////////////////////////////////////////////////////////////////// // // Rendering helpers // static void set_rsx_texture(CellGcmContextData * RADRESTRICT gcm, U32 sampler, GcmTexture * RADRESTRICT tex, U32 wrap, U32 nearest) { static const U32 addrmodes[] = { // GDRAW_WRAP_clamp CELL_GCM_METHOD_DATA_TEXTURE_ADDRESS(CELL_GCM_TEXTURE_CLAMP_TO_EDGE, CELL_GCM_TEXTURE_CLAMP_TO_EDGE, CELL_GCM_TEXTURE_CLAMP_TO_EDGE, CELL_GCM_TEXTURE_UNSIGNED_REMAP_NORMAL, CELL_GCM_TEXTURE_ZFUNC_NEVER, 0, 0), // GDRAW_WRAP_repeat CELL_GCM_METHOD_DATA_TEXTURE_ADDRESS(CELL_GCM_TEXTURE_WRAP, CELL_GCM_TEXTURE_WRAP, CELL_GCM_TEXTURE_CLAMP_TO_EDGE, CELL_GCM_TEXTURE_UNSIGNED_REMAP_NORMAL, CELL_GCM_TEXTURE_ZFUNC_NEVER, 0, 0), // GDRAW_WRAP_mirror CELL_GCM_METHOD_DATA_TEXTURE_ADDRESS(CELL_GCM_TEXTURE_MIRROR, CELL_GCM_TEXTURE_MIRROR, CELL_GCM_TEXTURE_CLAMP_TO_EDGE, CELL_GCM_TEXTURE_UNSIGNED_REMAP_NORMAL, CELL_GCM_TEXTURE_ZFUNC_NEVER, 0, 0), }; static const U32 filtermodes[] = { CELL_GCM_METHOD_DATA_TEXTURE_FILTER(0, CELL_GCM_TEXTURE_LINEAR_LINEAR, CELL_GCM_TEXTURE_LINEAR, CELL_GCM_TEXTURE_CONVOLUTION_QUINCUNX), CELL_GCM_METHOD_DATA_TEXTURE_FILTER(0, CELL_GCM_TEXTURE_LINEAR_LINEAR, CELL_GCM_TEXTURE_NEAREST, CELL_GCM_TEXTURE_CONVOLUTION_QUINCUNX), }; assert(wrap < sizeof(addrmodes) / sizeof(addrmodes[0])); assert(nearest < sizeof(filtermodes) / sizeof(filtermodes[0])); CommandData * RADRESTRICT cmd = put_command(gcm, 11); cmd->w0 = CELL_GCM_METHOD_HEADER_TEXTURE_OFFSET(sampler, 8); cmd->w1 = tex->offset; cmd->w2 = tex->format; cmd->w3 = addrmodes[wrap]; cmd->w4 = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL0(1, 0, 12<<8, 0); cmd->w5 = tex->remap; cmd->w6 = filtermodes[nearest]; cmd->w7 = tex->imagerect; cmd->w8 = CELL_GCM_METHOD_DATA_TEXTURE_BORDER_COLOR(0); cmd->w9 = CELL_GCM_METHOD_HEADER_TEXTURE_CONTROL3(sampler, 1); cmd->wa = tex->control3; } static inline void disable_scissor(bool force) { if (force || gdraw->scissor_state) { // set whole viewport as scissor test gdraw->scissor_state = 0; cellGcmSetScissor(gdraw->gcm, gdraw->cview.x, gdraw->cview.y, gdraw->cview.w, gdraw->cview.h); } } static void set_viewport_raw(S32 x, S32 y, S32 w, S32 h) { float scale[4] = { w*0.5f, -h*0.5f, 0.5f, 0.0f }; float offset[4] = { x + scale[0], y - scale[1], 0.5f, 0.0f }; cellGcmSetViewport(gdraw->gcm, x, y, w, h, 0.0f, 1.0f, scale, offset); gdraw->cview.x = x; gdraw->cview.y = y; gdraw->cview.w = w; gdraw->cview.h = h; disable_scissor(true); } static void set_projection_raw(S32 x0, S32 x1, S32 y0, S32 y1) { gdraw->projection[0] = 2.0f / (x1-x0); gdraw->projection[1] = 2.0f / (y1-y0); gdraw->projection[2] = (x1 + x0) / (F32) (x0 - x1); gdraw->projection[3] = (y1 + y0) / (F32) (y0 - y1); cellGcmSetVertexProgramConstants(gdraw->gcm, VVAR_viewproj, 4, gdraw->projection); } static void set_viewport(void) { if (gdraw->in_blur) { set_viewport_raw(gdraw->cview.x, gdraw->cview.y, gdraw->cview.w, gdraw->cview.h); return; } if (gdraw->cur == gdraw->frame) // if the rendering stack is empty // render a tile-sized region to the user-request tile location set_viewport_raw(gdraw->vx, gdraw->vy, gdraw->tw, gdraw->th); else if (gdraw->cur->cached) set_viewport_raw(0, 0, gdraw->cur->width, gdraw->cur->height); else // if on the render stack, draw a padded-tile-sized region at the origin set_viewport_raw(0, 0, gdraw->tpw, gdraw->tph); } static void set_projection(void) { if (gdraw->in_blur) return; if (gdraw->cur == gdraw->frame) // if the render stack is empty set_projection_raw(gdraw->tx0,gdraw->tx0+gdraw->tw,gdraw->ty0+gdraw->th,gdraw->ty0); else if (gdraw->cur->cached) set_projection_raw(gdraw->cur->base_x, gdraw->cur->base_x + gdraw->cur->width, gdraw->cur->base_y + gdraw->cur->height, gdraw->cur->base_y); else set_projection_raw(gdraw->tx0p,gdraw->tx0p+gdraw->tpw,gdraw->ty0p+gdraw->tph,gdraw->ty0p); } static void set_common_renderstate() { CellGcmContextData *gcm = gdraw->gcm; S32 i; // all the render states we never change while drawing cellGcmSetCullFaceEnable(gcm, CELL_GCM_FALSE); cellGcmSetCylindricalWrap(gcm, 0, 0); cellGcmSetDepthBoundsTestEnable(gcm, CELL_GCM_FALSE); cellGcmSetLogicOpEnable(gcm, CELL_GCM_FALSE); cellGcmSetPolygonOffset(gcm, 0.0f, 0.0f); cellGcmSetPolygonStippleEnable(gcm, CELL_GCM_FALSE); cellGcmSetPolySmoothEnable(gcm, CELL_GCM_FALSE); cellGcmSetShadeMode(gcm, CELL_GCM_SMOOTH); cellGcmSetUserClipPlaneControl(gcm, CELL_GCM_USER_CLIP_PLANE_DISABLE, CELL_GCM_USER_CLIP_PLANE_DISABLE, CELL_GCM_USER_CLIP_PLANE_DISABLE, CELL_GCM_USER_CLIP_PLANE_DISABLE, CELL_GCM_USER_CLIP_PLANE_DISABLE, CELL_GCM_USER_CLIP_PLANE_DISABLE); cellGcmSetStencilOp(gcm, CELL_GCM_KEEP, CELL_GCM_KEEP, CELL_GCM_REPLACE); cellGcmSetZcullEnable(gcm, 0, 0); cellGcmSetTwoSidedStencilTestEnable(gcm, CELL_GCM_FALSE); cellGcmSetTwoSideLightEnable(gcm, CELL_GCM_FALSE); cellGcmSetClearDepthStencil(gcm, 0xffffff00); cellGcmSetClearColor(gcm, 0); cellGcmSetBlendEquation(gcm, CELL_GCM_FUNC_ADD, CELL_GCM_FUNC_ADD); cellGcmSetColorMask(gcm, COLOR_MASK_ALL); cellGcmSetAlphaTestEnable(gcm, CELL_GCM_FALSE); // clear all vertex attr inputs for (i=0; i < 16; i++) cellGcmSetVertexDataArray(gcm, i, 0, 0, 0, CELL_GCM_VERTEX_F, CELL_GCM_LOCATION_LOCAL, 0); cellGcmSetTransferLocation(gcm,CELL_GCM_LOCATION_LOCAL); cellGcmSetVertexDataBase(gcm, 0, 0); // load all our vertex programs into their respective slots so we can switch between them // by just changing the start slot for (i=0; i < GDRAW_vformat__basic_count; i++) { ProgramWithCachedVariableLocations *p = &gdraw->vprog[i]; if (p->program) cellGcmSetVertexProgram(gcm, p->program, p->ucode); } cellGcmSetVertexAttribInputMask(gcm, 0x3); // enable position+attr1 input (all we ever use) // initialize all our vertex constants to zero void *vconst; cellGcmSetVertexProgramConstantsPointer(gcm, 0, VVAR_count, &vconst); memset(vconst, 0, 4 * sizeof(F32) * VVAR_count); // reset our state caching for (i=0; i < MAX_SAMPLERS; i++) { gdraw->active_tex[i] = NULL; cellGcmSetTextureControl(gcm, i, CELL_GCM_FALSE, 0, 0, 0); } assert(gdraw->aa_tex.offset != 0); // if you hit this, your initialization is screwed up. set_rsx_texture(gdraw->gcm, AATEX_SAMPLER, &gdraw->aa_tex, GDRAW_WRAP_clamp, 0); gdraw->cur_fprog = NULL; gdraw->vert_format = -1; gdraw->scissor_state = ~0u; gdraw->blend_mode = -1; gdraw->stencil_key = ~0u; gdraw->z_key = ~0u; } static void clear_renderstate(void) { CellGcmContextData *gcm = gdraw->gcm; cellGcmSetStencilTestEnable(gcm, CELL_GCM_FALSE); cellGcmSetColorMask(gcm, COLOR_MASK_ALL); cellGcmSetDepthTestEnable(gcm, CELL_GCM_FALSE); cellGcmSetDepthFunc(gcm, CELL_GCM_LESS); cellGcmSetDepthMask(gcm, CELL_GCM_FALSE); cellGcmSetBlendEnable(gcm, CELL_GCM_FALSE); disable_scissor(false); gdraw->scissor_state = 0; gdraw->blend_mode = GDRAW_BLEND_none; gdraw->stencil_key = 0; gdraw->z_key = 0; } static void set_render_target() { GcmTexture *tex = NULL; S32 i; CellGcmSurface surf = gdraw->main_surface; if (gdraw->cur->color_buffer) tex = gdraw->cur->color_buffer->handle.tex.gcm; if (tex) { surf.colorLocation[0] = gdraw->rt_loc; surf.colorOffset[0] = tex->offset; surf.colorPitch[0] = tex->pitch; surf.x = 0; surf.y = 0; surf.width = tex->width; surf.height = tex->height; } cellGcmSetSurface(gdraw->gcm, &surf); // invalidate current textures (need to reset them to force L1 texture cache flush) for (i=0; i < MAX_SAMPLERS; ++i) gdraw->active_tex[i] = NULL; } //////////////////////////////////////////////////////////////////////// // // Begin rendering for a frame // void gdraw_GCM_SetTileOrigin(CellGcmSurface *surf, S32 x, S32 y) { assert(surf->colorFormat == CELL_GCM_SURFACE_A8R8G8B8); assert(surf->depthFormat == CELL_GCM_SURFACE_Z24S8); gdraw->main_surface = *surf; gdraw->vx = x; gdraw->vy = y; for (int i=1; i<4; i++) { gdraw->main_surface.colorLocation[i] = CELL_GCM_LOCATION_LOCAL; gdraw->main_surface.colorOffset[i] = 0; gdraw->main_surface.colorPitch[i] = 64; } } static void RADLINK gdraw_SetViewSizeAndWorldScale(S32 w, S32 h, F32 scalex, F32 scaley) { memset(gdraw->frame, 0, sizeof(gdraw->frame)); gdraw->cur = gdraw->frame; gdraw->fw = w; gdraw->fh = h; gdraw->tw = w; gdraw->th = h; gdraw->world_to_pixel[0] = scalex; gdraw->world_to_pixel[1] = scaley; set_viewport(); } // must include anything necessary for texture creation/update static void RADLINK gdraw_RenderingBegin(void) { set_common_renderstate(); } static void RADLINK gdraw_RenderingEnd(void) { clear_renderstate(); } static void RADLINK gdraw_RenderTileBegin(S32 x0, S32 y0, S32 x1, S32 y1, S32 pad, GDrawStats *stats) { RR_UNUSED_VARIABLE(stats); if (x0 == 0 && y0 == 0 && x1 == gdraw->fw && y1 == gdraw->fh) pad = 0; gdraw->tx0 = x0; gdraw->ty0 = y0; gdraw->tw = x1-x0; gdraw->th = y1-y0; // padded region gdraw->tx0p = RR_MAX(x0 - pad, 0); gdraw->ty0p = RR_MAX(y0 - pad, 0); gdraw->tpw = RR_MIN(x1 + pad, gdraw->fw) - gdraw->tx0p; gdraw->tph = RR_MIN(y1 + pad, gdraw->fh) - gdraw->ty0p; // check if this fits inside our rendertarget buffers // if you hit this assert, the rendertarget_width / rendertarget_height // in your gdraw_GCM_MemoryConfig is too small! assert(gdraw->tpw <= gdraw->frametex_width && gdraw->tph <= gdraw->frametex_height); // need to clear both the requested area of the viewport // *and* the area the rendertargets will use... ideally // we'd work out the minimal rectangles to clear, or just // always clear the whole thing set_render_target(); set_viewport_raw(0, 0, gdraw->tpw, gdraw->tph); cellGcmSetStencilMask(gdraw->gcm, 0xff); cellGcmSetClearSurface(gdraw->gcm, CELL_GCM_CLEAR_Z | CELL_GCM_CLEAR_S); set_viewport(); set_projection(); // if the first clear didn't get the actual viewport region, clear it if (gdraw->tpw < gdraw->vx+gdraw->tw || gdraw->tph < gdraw->vy+gdraw->th) cellGcmSetClearSurface(gdraw->gcm, CELL_GCM_CLEAR_Z | CELL_GCM_CLEAR_S); } static void RADLINK gdraw_RenderTileEnd(GDrawStats *stats) { gdraw->tile_end_fence = put_fence(); // reap once per frame even if there are no allocs gdraw_res_reap(gdraw->texturecache, stats); gdraw_res_reap(gdraw->vbufcache, stats); } void gdraw_GCM_NoMoreGDrawThisFrame(void) { gdraw_HandleCacheTick(gdraw->texturecache, gdraw->tile_end_fence); gdraw_HandleCacheTick(gdraw->vbufcache, gdraw->tile_end_fence); } #define MAX_DEPTH_VALUE (1 << 13) static void RADLINK gdraw_GetInfo(GDrawInfo *d) { d->num_stencil_bits = 8; d->max_id = MAX_DEPTH_VALUE-2; // for floating point depth, just use mantissa, e.g. 16-20 bits d->max_texture_size = 4096; d->buffer_format = GDRAW_BFORMAT_vbib; d->shared_depth_stencil = 1; d->always_mipmap = 0; d->conditional_nonpow2 = 0; } //////////////////////////////////////////////////////////////////////// // // Enable/disable rendertargets in stack fashion // static rrbool RADLINK gdraw_TextureDrawBufferBegin(gswf_recti *region, gdraw_texture_format format, U32 flags, void *owner, GDrawStats *stats) { RR_UNUSED_VARIABLE(format); RR_UNUSED_VARIABLE(flags); GDrawFramebufferState *n = gdraw->cur+1; GDrawHandle *t; if (gdraw->tw == 0 || gdraw->th == 0) { IggyGDrawSendWarning(NULL, "GDraw warning: w=0,h=0 rendertarget"); return false; } if (n >= &gdraw->frame[MAX_RENDER_STACK_DEPTH]) { assert(0); IggyGDrawSendWarning(NULL, "GDraw rendertarget nesting exceeds MAX_RENDER_STACK_DEPTH"); return false; } if (owner) { // @TODO implement t = NULL; assert(0); // nyi } else { t = get_color_rendertarget(stats); if (!t) return false; } n->color_buffer = t; assert(n->color_buffer != NULL); // @GDRAW_ASSERT n->cached = owner != NULL; if (owner) { n->base_x = region->x0; n->base_y = region->y0; n->width = region->x1 - region->x0; n->height = region->y1 - region->y0; } assert(gdraw->frametex_width >= gdraw->tw && gdraw->frametex_height >= gdraw->th); // @GDRAW_ASSERT int k = t - gdraw->rendertargets.handle; if (region) { S32 ox, oy, pad = 2; // 2 pixels of border on all sides // 1 pixel turns out to be not quite enough with the interpolator precision we get. if (gdraw->in_blur) ox = oy = 0; else ox = gdraw->tx0p, oy = gdraw->ty0p; // clamp region to tile S32 xt0 = RR_MAX(region->x0 - ox, 0); S32 yt0 = RR_MAX(region->y0 - oy, 0); S32 xt1 = RR_MIN(region->x1 - ox, gdraw->tpw); S32 yt1 = RR_MIN(region->y1 - oy, gdraw->tph); // but the padding needs to clamp to render target bounds S32 x = RR_MAX(xt0 - pad, 0); S32 y = RR_MAX(yt0 - pad, 0); S32 w = RR_MIN(xt1 + pad, gdraw->frametex_width) - x; S32 h = RR_MIN(yt1 + pad, gdraw->frametex_height) - y; if (w <= 0 || h <= 0) { // region doesn't intersect with current tile gdraw_FreeTexture((GDrawTexture *) t, 0, stats); // note: don't send a warning since this will happen during regular tiled rendering return false; } if (!gdraw->in_blur) set_viewport_raw(x, y, w, h); // not strictly necessary (we only use this for a clear), just to avoid GPAD warnings cellGcmSetScissor(gdraw->gcm, x, y, w, h); gdraw->rt_valid[k].x0 = xt0; gdraw->rt_valid[k].y0 = yt0; gdraw->rt_valid[k].x1 = xt1; gdraw->rt_valid[k].y1 = yt1; } else { gdraw->rt_valid[k].x0 = 0; gdraw->rt_valid[k].y0 = 0; gdraw->rt_valid[k].x1 = gdraw->frametex_width; gdraw->rt_valid[k].y1 = gdraw->frametex_height; } // wait for all currently queued draw commands to finish reading textures // (in case a draw command is still using this rendertarget as a texture) GDrawFence fence = put_fence(); cellGcmFlush(gdraw->gcm); cellGcmSetWaitLabel(gdraw->gcm, gdraw->fence_label_index, (U32) fence.value); ++gdraw->cur; set_render_target(); cellGcmSetClearSurface(gdraw->gcm, CELL_GCM_CLEAR_R | CELL_GCM_CLEAR_G | CELL_GCM_CLEAR_B | CELL_GCM_CLEAR_A); set_viewport(); set_projection(); return true; } static GDrawTexture *RADLINK gdraw_TextureDrawBufferEnd(GDrawStats *stats) { RR_UNUSED_VARIABLE(stats); GDrawFramebufferState *n = gdraw->cur; GDrawFramebufferState *m = --gdraw->cur; if (gdraw->tw == 0 || gdraw->th == 0) return 0; if (n >= &gdraw->frame[MAX_RENDER_STACK_DEPTH]) return 0; // already returned a warning in Start...() assert(m >= gdraw->frame); // bug in Iggy -- unbalanced if (m != gdraw->frame) { assert(m->color_buffer != NULL); // @GDRAW_ASSERT } assert(n->color_buffer != NULL); // @GDRAW_ASSERT // wait for render to texture operation to finish // can't put down a backend fence directly, there might still be // a texture fence pending that completes after the backend fence // (can this really happen? not sure, but better safe than sorry). // // so: wait for completion of pending texture fence, then put down // backend label, then wait on that. cellGcmSetWaitLabel(gdraw->gcm, gdraw->fence_label_index, U32(gdraw->next_fence_index - 1)); GDrawFence fence = put_backend_fence(); cellGcmFlush(gdraw->gcm); cellGcmSetWaitLabel(gdraw->gcm, gdraw->fence_label_index, (U32) fence.value); cellGcmSetInvalidateTextureCache(gdraw->gcm, CELL_GCM_INVALIDATE_TEXTURE); n->color_buffer->fence = fence; // switch back to old rendertarget set_render_target(); set_viewport(); set_projection(); return (GDrawTexture *) n->color_buffer; } //////////////////////////////////////////////////////////////////////// // // Clear stencil/depth buffers // // Open question whether we'd be better off finding bounding boxes // and only clearing those; it depends exactly how fast clearing works. // static void RADLINK gdraw_ClearStencilBits(U32 bits) { // @TODO: actually only clear 'bits', not everything RR_UNUSED_VARIABLE(bits); cellGcmSetStencilMask(gdraw->gcm, 0xff); cellGcmSetClearSurface(gdraw->gcm, CELL_GCM_CLEAR_S); gdraw->stencil_key = ~0u; // invalid } // this only happens rarely (hopefully never) if we use the depth buffer, // so we can just clear the whole thing static void RADLINK gdraw_ClearID(void) { cellGcmSetClearSurface(gdraw->gcm, CELL_GCM_CLEAR_Z); } //////////////////////////////////////////////////////////////////////// // // Fragment shader setting / patching // static RADINLINE U64 microcode_endian_swap(U64 v) { U64 lo = v & 0x0000ffff0000ffffull; U64 hi = v ^ lo; return (lo << 16) | (hi >> 16); } static void set_fragment_shader(GDraw * RADRESTRICT gd, ProgramWithCachedVariableLocations *prg) { if (prg != gd->cur_fprog) { gd->cur_fprog = prg; cellGcmSetFragmentProgramLoad(gd->gcm, &prg->cfg); } } static void set_fragment_para(GDraw * RADRESTRICT gd, U32 ucode_offs, int para, const void *values, int count) { if (para == -1) return; gd->cur_fprog = NULL; // need to re-set shader after patching const U64 *inv = (const U64 *) values; const int *patch_offs = (const int *) para; int offs = *patch_offs; void *ptr; while (count--) { U64 v0 = microcode_endian_swap(*inv++); U64 v1 = microcode_endian_swap(*inv++); do { cellGcmSetInlineTransferPointer(gd->gcm, ucode_offs + offs, 4, &ptr); U64 * RADRESTRICT p64 = (U64 *) ptr; p64[0] = v0; p64[1] = v1; } while ((offs = *++patch_offs) != -1); offs = *++patch_offs; } } //////////////////////////////////////////////////////////////////////// // // Set all the render state from GDrawRenderState // // This also is responsible for getting the framebuffer into a texture // if the read-modify-write blend operation can't be expressed with // the native blend operators. (E.g. "screen") // static RADINLINE void set_texture(S32 texunit, GDrawTexture *tex) { assert(texunit < MAX_SAMPLERS); assert(tex != NULL); if (gdraw->active_tex[texunit] != tex) { gdraw->active_tex[texunit] = tex; GDrawHandle *h = (GDrawHandle *) tex; set_rsx_texture(gdraw->gcm, texunit, h->handle.tex.gcm, GDRAW_WRAP_clamp, 0); } } #define MAKEBLEND(src,dst) ((src << 16) | src), ((dst << 16) | dst) static struct gdraw_gcm_blendspec { U32 enable; U32 src; U32 dst; U32 _pad; } blend_states[] = { { 0, MAKEBLEND(CELL_GCM_ONE, CELL_GCM_ZERO), 0 }, // GDRAW_BLEND_none { 1, MAKEBLEND(CELL_GCM_ONE, CELL_GCM_ONE_MINUS_SRC_ALPHA), 0 }, // GDRAW_BLEND_alpha { 1, MAKEBLEND(CELL_GCM_DST_COLOR, CELL_GCM_ONE_MINUS_SRC_ALPHA), 0 }, // GDRAW_BLEND_multiply { 1, MAKEBLEND(CELL_GCM_ONE, CELL_GCM_ONE), 0 }, // GDRAW_BLEND_add { 0, MAKEBLEND(CELL_GCM_ONE, CELL_GCM_ZERO), 0 }, // GDRAW_BLEND_filter { 0, MAKEBLEND(CELL_GCM_ONE, CELL_GCM_ZERO), 0 }, // GDRAW_BLEND_special }; #undef MAKEBLEND // converts a depth id into a Z value static RADINLINE F64 depth_from_id(S64 id) { #ifndef __SNC__ return (1.0 - 1.0 / (F64) MAX_DEPTH_VALUE) - __fcfid(id) * (1.0 / (F64) MAX_DEPTH_VALUE); // = 1 - (id + 1) / MAX_DEPTH_VALUE #else return (1.0 - 1.0 / (F64) MAX_DEPTH_VALUE) - id * (1.0 / (F64) MAX_DEPTH_VALUE); // = 1 - (id + 1) / MAX_DEPTH_VALUE #endif } #define copy_vec4(d,s) { F32 x = s[0]; F32 y = s[1]; F32 z = s[2]; F32 w = s[3]; d.x = x; d.y = y; d.z = z; d.w = w; } static void set_renderstate_full(U32 vertex_slot, const GDrawRenderState * RADRESTRICT r, GDrawStats *stats) { RR_UNUSED_VARIABLE(stats); GDraw * RADRESTRICT gd = gdraw; CellGcmContextData * RADRESTRICT gcm = gd->gcm; volatile VertexVars * RADRESTRICT vvars; ProgramWithCachedVariableLocations *fprog; #ifndef __SNC__ volatile S64 depth_id = r->id; // load and sign extend here to avoid an LHS stall #else S64 depth_id = r->id; #endif // prepare commands for vertex program switch + constant upload int nconstants = (r->texgen0_enabled ? VVAR_count : VVAR_count_world_and_color) * 4; CommandData * RADRESTRICT cmd = put_command(gcm, 2 + // set vertex program start slot (2 + nconstants)); // vertex constant loads // set vertex program start slot cmd->w0 = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_TRANSFORM_PROGRAM_START, 1); cmd->w1 = vertex_slot; // update vertex shader constants cmd->w2 = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_TRANSFORM_CONSTANT_LOAD, nconstants + 1); cmd->w3 = 0; // load starting from constant 0 void *vvars_ptr = &cmd->w4; // vertex constants start here vvars = (volatile VertexVars *) vvars_ptr; int texgen0 = r->texgen0_enabled; int tex0mode = r->tex0_mode; int blend_mode = r->blend_mode; if (!r->use_world_space) gdraw_ObjectSpace(&vvars->world[0].x, r->o2w, depth_from_id(depth_id), 0.0f); else gdraw_WorldSpace(&vvars->world[0].x, gd->world_to_pixel, depth_from_id(depth_id), 0.0f); copy_vec4(vvars->x_off, r->edge_matrix); copy_vec4(vvars->color_mul, r->color); if (texgen0) { copy_vec4(vvars->s0_texgen, r->s0_texgen); copy_vec4(vvars->t0_texgen, r->t0_texgen); } // set the blend mode assert(blend_mode != GDRAW_BLEND_filter); // shouldn't come through this path assert(blend_mode >= 0 && blend_mode < (int) (sizeof(blend_states)/sizeof(*blend_states))); gdraw_gcm_blendspec *blend = &blend_states[blend_mode]; if (blend_mode != gd->blend_mode) { gd->blend_mode = blend_mode; cmd = reserve_command(gcm, 4); // must be enough for both cases! if (blend->enable) { cmd->w0 = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_BLEND_ENABLE, 3); cmd->w1 = 1; // enable cmd->w2 = blend->src; // src factor cmd->w3 = blend->dst; // dst factor gcm->current = &cmd->w4; } else { cmd->w0 = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_BLEND_ENABLE, 1); cmd->w1 = 0; gcm->current = &cmd->w2; } } // set the fragment program if (blend_mode != GDRAW_BLEND_special) { fprog = gd->basic_fprog[tex0mode]; if (r->cxf_add) { fprog++; // additive if (r->cxf_add[3]) fprog++; // additive alpha } } else fprog = &gd->exceptional_blend[r->special_blend]; // set textures if (tex0mode != GDRAW_TEXTURE_none) { if (!r->tex[0]) // this can happen if some allocs fail. the rendered image will be invalid and we don't care. return; if (gd->active_tex[0] != r->tex[0]) { gd->active_tex[0] = r->tex[0]; set_rsx_texture(gcm, 0, ((GDrawHandle *) r->tex[0])->handle.tex.gcm, r->wrap0, r->nearest0); } } // Set pixel shader and constants if (tex0mode == GDRAW_TEXTURE_focal_gradient) set_fragment_para(gd, fprog->cfg.offset, fprog->vars[VAR_focal], r->focal_point, 1); if (r->cxf_add) { float temp[4] = { r->cxf_add[0]/255.0f, r->cxf_add[1]/255.0f, r->cxf_add[2]/255.0f, r->cxf_add[3]/255.0f }; set_fragment_para(gd, fprog->cfg.offset, fprog->vars[VAR_cadd], temp, 1); } set_fragment_shader(gd, fprog); // Set pixel operation states if (r->scissor) { int x,y,w,h,xs,ys; if (gd->cur == gd->frame) { xs = gd->tx0 - gd->vx; ys = gd->ty0 - gd->vy; } else { xs = gd->tx0p; ys = gd->ty0p; } // clip against viewport x = RR_MAX(r->scissor_rect.x0 - xs, gd->cview.x); y = RR_MAX(r->scissor_rect.y0 - ys, gd->cview.y); w = RR_MIN(r->scissor_rect.x1 - xs, gd->cview.x + gd->cview.w) - x; h = RR_MIN(r->scissor_rect.y1 - ys, gd->cview.y + gd->cview.h) - y; if (w <= 0 || h <= 0) { // dummy scissor rect in case our actual scissor is empty x = gd->cview.x; y = gd->cview.y; w = h = 0; } cellGcmSetScissorInline(gcm, x, y, w, h); gd->scissor_state = 1; } else if (r->scissor != gd->scissor_state) disable_scissor(0); // stencil changed? U32 stencil_key = r->stencil_test | (r->stencil_set << 8); if (stencil_key != gd->stencil_key) { gd->stencil_key = stencil_key; // it just so happens that all the states we want to set are in a contiguous method index range! cmd = put_command(gcm, 7); cmd->w0 = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_COLOR_MASK, 6); cmd->w1 = r->stencil_set ? 0 : COLOR_MASK_ALL; // COLOR_MASK cmd->w2 = (r->stencil_set | r->stencil_test) != 0; // STENCIL_TEST_ENABLE cmd->w3 = r->stencil_set; // STENCIL_MASK cmd->w4 = CELL_GCM_EQUAL; // STENCIL_FUNC cmd->w5 = 0xff; // STENCIL_FUNC_REF cmd->w6 = r->stencil_test; // STENCIL_FUNC_MASK } // z mode changed? U32 z_key = r->set_id | (r->test_id << 1); if (z_key != gd->z_key) { gd->z_key = z_key; cmd = put_command(gcm, 4); cmd->w0 = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_DEPTH_FUNC, 3); cmd->w1 = r->test_id ? CELL_GCM_LESS : CELL_GCM_EQUAL; // DEPTH_FUNC cmd->w2 = r->set_id; // DEPTH_MASK cmd->w3 = r->test_id | r->set_id; // DEPTH_TEST_ENABLE } } static RADINLINE void set_renderstate(U32 vertex_slot, const GDrawRenderState * RADRESTRICT r, GDrawStats *stats) { if (r->identical_state) { // may need to switch vertex shader, but that's it - very quick to set up. CommandData * RADRESTRICT cmd = put_command(gdraw->gcm, 2); cmd->w0 = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_TRANSFORM_PROGRAM_START, 1); cmd->w1 = vertex_slot; } else set_renderstate_full(vertex_slot, r, stats); } //////////////////////////////////////////////////////////////////////// // // Vertex formats // struct gdraw_gcm_vformat_desc { int size; int tex_offs; U32 format0; U32 format1; } vformats[GDRAW_vformat__basic_count] = { // sz toffs stride0 size0 type0 stride1 size1 type1 { 8, 0, ( 8 << 8) | (2 << 4) | CELL_GCM_VERTEX_F, ( 0 << 8) | (0 << 4) | CELL_GCM_VERTEX_F }, // GDRAW_vformat_v2 { 16, 8, (16 << 8) | (2 << 4) | CELL_GCM_VERTEX_F, (16 << 8) | (4 << 4) | CELL_GCM_VERTEX_S32K }, // GDRAW_vformat_v2aa { 16, 8, (16 << 8) | (2 << 4) | CELL_GCM_VERTEX_F, (16 << 8) | (2 << 4) | CELL_GCM_VERTEX_F }, // GDRAW_vformat_v2tc2 }; static RADINLINE void set_vertex_decl(GDraw * RADRESTRICT gd, CellGcmContextData * RADRESTRICT gcm, int format_index, int location, U32 vertaddr) { assert(format_index >= 0 && format_index < (int) (sizeof(vformats)/sizeof(*vformats))); CommandData * RADRESTRICT cmd = reserve_command(gcm, 6); // must be enough for the worst case! cmd->w0 = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_VERTEX_DATA_ARRAY_OFFSET, 2); cmd->w1 = CELL_GCM_METHOD_DATA_VERTEX_DATA_ARRAY_OFFSET(location, vertaddr + 0); // offset for attr 0 cmd->w2 = CELL_GCM_METHOD_DATA_VERTEX_DATA_ARRAY_OFFSET(location, vertaddr + 8); // offset for attr 1 if (format_index != gd->vert_format) { gd->vert_format = format_index; gdraw_gcm_vformat_desc *fmt = &vformats[format_index]; U32 fmt0 = fmt->format0; U32 fmt1 = fmt->format1; cmd->w3 = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_VERTEX_DATA_ARRAY_FORMAT, 2); cmd->w4 = fmt0; cmd->w5 = fmt1; gcm->current = &cmd->w6; } else gcm->current = &cmd->w3; } //////////////////////////////////////////////////////////////////////// // // Draw triangles with a given renderstate // static RADINLINE void fence_resources(GDraw * RADRESTRICT gd, void *r1, void *r2=NULL, void *r3=NULL, void *r4=NULL) { GDrawFence fence; fence.value = gd->next_fence_index; if (r1) ((GDrawHandle *) r1)->fence = fence; if (r2) ((GDrawHandle *) r2)->fence = fence; if (r3) ((GDrawHandle *) r3)->fence = fence; if (r4) ((GDrawHandle *) r4)->fence = fence; if (--gd->fence_batch_counter == 0) put_fence(); } static U32 vprog(S32 vertex_format, GDrawRenderState *r) { RR_UNUSED_VARIABLE(r); return gdraw->vslot[vertex_format]; } static void RADLINK gdraw_DrawIndexedTriangles(GDrawRenderState *r, GDrawPrimitive *p, GDrawVertexBuffer *buf, GDrawStats * RADRESTRICT stats) { GDrawHandle *vb = (GDrawHandle *) buf; GDraw * RADRESTRICT gd = gdraw; CellGcmContextData * RADRESTRICT gcm = gd->gcm; S32 vformat = p->vertex_format; set_renderstate(vprog(vformat, r), r, stats); if (vb) { U32 vertaddr = vbufaddr2offs(gd, vb->handle.vbuf.verts) + (U32) (UINTa) p->vertices; U32 indaddr = vbufaddr2offs(gd, vb->handle.vbuf.inds) + (U32) (UINTa) p->indices; int loc = gd->vbuf_loc; set_vertex_decl(gd, gcm, vformat, loc, vertaddr); cellGcmSetDrawIndexArray(gcm, CELL_GCM_PRIMITIVE_TRIANGLES, p->num_indices, CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16, loc, indaddr); } else if (p->indices) { S32 vertex_size = p->num_vertices * vformats[vformat].size; S32 index_size = p->num_indices * 2; U8 * RADRESTRICT ring_data = (U8 *) gdraw_bufring_alloc(&gd->dyn_vb, vertex_size + index_size, CELL_GCM_VERTEX_TEXTURE_CACHE_LINE_SIZE); if (ring_data) { // normal case: fits inside ring buffer memcpy(ring_data, p->vertices, vertex_size); memcpy(ring_data + vertex_size, p->indices, index_size); cellGcmSetInvalidateVertexCache(gcm); U32 vertaddr = dynvbaddr2offs(gd, ring_data); U32 indaddr = vertaddr + vertex_size; int loc = gd->dynvb_loc; set_vertex_decl(gd, gcm, vformat, loc, vertaddr); cellGcmSetDrawIndexArray(gcm, CELL_GCM_PRIMITIVE_TRIANGLES, p->num_indices, CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16, loc, indaddr); } else { // convert it into a non-indexed triangle list that we can chunk without extra mem // this is a fall-back path and, needless to say, it's slow. S32 vsize = vformats[p->vertex_format].size; S32 tris_per_chunk = gdraw->dyn_vb.seg_size / (3 * vsize); S32 verts_per_chunk = tris_per_chunk * 3; U16 * RADRESTRICT inds = (U16 *) p->indices; S32 pos = 0, i; while (pos < p->num_indices) { S32 vert_count = RR_MIN(p->num_indices - pos, verts_per_chunk); void *ring = gdraw_bufring_alloc(&gd->dyn_vb, vert_count * vsize, CELL_GCM_VERTEX_TEXTURE_CACHE_LINE_SIZE); assert(ring != NULL); // we specifically chunked so this alloc succeeds! // prepare for painting... cellGcmSetInvalidateVertexCache(gcm); set_vertex_decl(gd, gcm, p->vertex_format, gd->dynvb_loc, dynvbaddr2offs(gd, ring)); // build the triangle list (two versions for the two sizes since it's a small number of bytes being copied) if (vsize == 8) { ring = (void *) ((U8 *) ring - 8); for (i=0; i < vert_count; i++) __stdu(*((U64 *) p->vertices + *inds++), 8, ring); } else if (vsize == 16) { ring = (void *) ((U8 *) ring - 8); for (i=0; i < vert_count; i++) { U64 *src = (U64 *) p->vertices + *inds++ * 2; U64 v0 = __ld(0, src); U64 v1 = __ld(8, src); __std(v0, 8, ring); __stdu(v1, 16, ring); } } else assert(0); // unknown size, shouldn't happen // paint this batch cellGcmSetDrawArrays(gcm, CELL_GCM_PRIMITIVE_TRIANGLES, 0, vert_count); pos += vert_count; } } } else { // dynamic quads U32 vsize = vformats[p->vertex_format].size; S32 verts_per_chunk = (gdraw->dyn_vb.seg_size / vsize) & ~3; S32 pos = 0; while (pos < p->num_vertices) { S32 vert_count = RR_MIN(p->num_vertices - pos, verts_per_chunk); U32 chunk_bytes = vert_count * vsize; void *ring = gdraw_bufring_alloc(&gd->dyn_vb, chunk_bytes, CELL_GCM_VERTEX_TEXTURE_CACHE_LINE_SIZE); assert(ring != NULL); // we picked the chunk size so this alloc succeeds! memcpy(ring, (U8 *)p->vertices + pos * vsize, chunk_bytes); cellGcmSetInvalidateVertexCache(gcm); set_vertex_decl(gd, gcm, p->vertex_format, gd->dynvb_loc, dynvbaddr2offs(gd, ring)); cellGcmSetDrawArrays(gcm, CELL_GCM_PRIMITIVE_QUADS, 0, vert_count); pos += vert_count; } } fence_resources(gd, vb, r->tex[0], r->tex[1]); { // avoid LHS S16 oldflags = stats->nonzero_flags; S16 oldbatches = stats->num_batches; stats->nonzero_flags = oldflags | GDRAW_STATS_batches; stats->num_batches = oldbatches + 1; stats->drawn_indices += p->num_indices; stats->drawn_vertices += p->num_vertices; } } /////////////////////////////////////////////////////////////////////// // // Flash 8 filter effects // static void set_pixel_constant(U32 ucode_offs, S32 constant, F32 x, F32 y, F32 z, F32 w) { F32 values[4] = { x,y,z,w }; set_fragment_para(gdraw, ucode_offs, constant, values, 1); } // caller sets up texture coordinates static void do_screen_quad(gswf_recti *s, F32 *tc, GDrawStats *stats) { RR_UNUSED_VARIABLE(stats); F32 px0 = (F32) s->x0, py0 = (F32) s->y0, px1 = (F32) s->x1, py1 = (F32) s->y1; CellGcmContextData * RADRESTRICT gcm = gdraw->gcm; VertexVars * RADRESTRICT vvars; volatile F32 * RADRESTRICT vert; void *ptr; cellGcmSetVertexProgramStartSlot(gcm, gdraw->vslot[GDRAW_vformat_v2tc2]); cellGcmSetVertexProgramConstantsPointer(gcm, 0, VVAR_count_worldonly * 4, &ptr); vvars = (VertexVars *) ptr; gdraw_PixelSpace(&vvars->world[0].x); set_vertex_decl(gdraw, gcm, GDRAW_vformat_v2tc2, CELL_GCM_LOCATION_LOCAL, 0); cellGcmSetDrawBegin(gcm, CELL_GCM_PRIMITIVE_QUADS); cellGcmSetDrawInlineArrayPointer(gcm, 4 * 4, &ptr); // build vertex data vert = (volatile F32 *) ptr; vert[ 0] = px0; vert[ 1] = py0; vert[ 2] = tc[0]; vert[ 3] = tc[1]; vert[ 4] = px1; vert[ 5] = py0; vert[ 6] = tc[2]; vert[ 7] = tc[1]; vert[ 8] = px1; vert[ 9] = py1; vert[10] = tc[2]; vert[11] = tc[3]; vert[12] = px0; vert[13] = py1; vert[14] = tc[0]; vert[15] = tc[3]; cellGcmSetDrawEnd(gcm); } static void gdraw_DriverBlurPass(GDrawRenderState *r, int taps, float *data, gswf_recti *s, float *tc, float height_max, float *clamp, GDrawStats *stats) { RR_UNUSED_VARIABLE(height_max); ProgramWithCachedVariableLocations *prg = &gdraw->blur_prog[taps]; U32 ucode_offs = prg->cfg.offset; set_texture(0, r->tex[0]); set_fragment_para(gdraw, ucode_offs, prg->vars[VAR_blur_tap], data, taps); set_fragment_para(gdraw, ucode_offs, prg->vars[VAR_blur_clampv], clamp, 1); set_fragment_shader(gdraw, prg); do_screen_quad(s, tc, stats); fence_resources(gdraw, r->tex[0]); } static void gdraw_Colormatrix(GDrawRenderState *r, gswf_recti *s, float *tc, GDrawStats *stats) { ProgramWithCachedVariableLocations *prg = &gdraw->colormatrix; U32 ucode_offs = prg->cfg.offset; if (!gdraw_TextureDrawBufferBegin(s, GDRAW_TEXTURE_FORMAT_rgba32, GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_color | GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_alpha, 0, stats)) return; set_texture(0, r->tex[0]); set_fragment_para(gdraw, ucode_offs, prg->vars[VAR_colormatrix_data], r->shader_data, 5); set_fragment_shader(gdraw, prg); do_screen_quad(s, tc, stats); fence_resources(gdraw, r->tex[0]); r->tex[0] = gdraw_TextureDrawBufferEnd(stats); } static gswf_recti *get_valid_rect(GDrawTexture *tex) { GDrawHandle *h = (GDrawHandle *) tex; S32 n = (S32) (h - gdraw->rendertargets.handle); assert(n >= 0 && n <= MAX_RENDER_STACK_DEPTH+1); return &gdraw->rt_valid[n]; } static void set_clamp_constant(U32 ucode_offs, S32 constant, GDrawTexture *tex) { gswf_recti *s = get_valid_rect(tex); // when we make the valid data, we make sure there is an extra empty pixel at the border set_pixel_constant(ucode_offs, constant, (s->x0-0.5f) / gdraw->frametex_width, (s->y0-0.5f) / gdraw->frametex_height, (s->x1+0.5f) / gdraw->frametex_width, (s->y1+0.5f) / gdraw->frametex_height); } static void gdraw_Filter(GDrawRenderState *r, gswf_recti *s, float *tc, int isbevel, GDrawStats *stats) { ProgramWithCachedVariableLocations *prg = &gdraw->filter_prog[isbevel][r->filter_mode]; U32 ucode_offs = prg->cfg.offset; if (!gdraw_TextureDrawBufferBegin(s, GDRAW_TEXTURE_FORMAT_rgba32, GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_color | GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_alpha, NULL, stats)) return; set_texture(0, r->tex[0]); set_texture(1, r->tex[1]); if (r->tex[2]) set_texture(2, r->tex[2]); set_pixel_constant(ucode_offs, prg->vars[VAR_filter_color], r->shader_data[0], r->shader_data[1], r->shader_data[2], r->shader_data[3]); set_pixel_constant(ucode_offs, prg->vars[VAR_filter_tc_off], -r->shader_data[4] / (F32)gdraw->frametex_width, -r->shader_data[5] / (F32)gdraw->frametex_height, r->shader_data[6], 0); set_pixel_constant(ucode_offs, prg->vars[VAR_filter_color2], r->shader_data[8], r->shader_data[9], r->shader_data[10], r->shader_data[11]); set_clamp_constant(ucode_offs, prg->vars[VAR_filter_clamp0], r->tex[0]); set_clamp_constant(ucode_offs, prg->vars[VAR_filter_clamp1], r->tex[1]); set_fragment_shader(gdraw, prg); do_screen_quad(s, tc, stats); fence_resources(gdraw, r->tex[0], r->tex[1], r->tex[2]); r->tex[0] = gdraw_TextureDrawBufferEnd(stats); } static void RADLINK gdraw_FilterQuad(GDrawRenderState *r, S32 x0, S32 y0, S32 x1, S32 y1, GDrawStats *stats) { F32 tc[4]; gswf_recti s; // clip to tile boundaries s.x0 = RR_MAX(x0, gdraw->tx0p); s.y0 = RR_MAX(y0, gdraw->ty0p); s.x1 = RR_MIN(x1, gdraw->tx0p + gdraw->tpw); s.y1 = RR_MIN(y1, gdraw->ty0p + gdraw->tph); if (s.x1 < s.x0 || s.y1 < s.y0) return; // prepare for drawing tc[0] = (s.x0 - gdraw->tx0p) / (F32) gdraw->frametex_width; tc[1] = (s.y0 - gdraw->ty0p) / (F32) gdraw->frametex_height; tc[2] = (s.x1 - gdraw->tx0p) / (F32) gdraw->frametex_width; tc[3] = (s.y1 - gdraw->ty0p) / (F32) gdraw->frametex_height; clear_renderstate(); if (r->blend_mode == GDRAW_BLEND_filter) { switch (r->filter) { case GDRAW_FILTER_blur: { GDrawBlurInfo b; gswf_recti bounds = *get_valid_rect(r->tex[0]); gdraw_ShiftRect(&s, &s, -gdraw->tx0p, -gdraw->ty0p); // blur uses physical rendertarget coordinates b.BlurPass = gdraw_DriverBlurPass; b.w = gdraw->tpw; b.h = gdraw->tph; b.frametex_width = gdraw->frametex_width; b.frametex_height = gdraw->frametex_height; // blur needs to draw with multiple passes, so set up special state gdraw->in_blur = true; set_viewport_raw(0,0,gdraw->tpw,gdraw->tph); set_projection_raw(0,gdraw->tpw,gdraw->tph,0); // do the blur gdraw_Blur(&gdraw_funcs, &b,r, &s, &bounds, stats); // restore the normal state gdraw->in_blur = false; set_viewport(); set_projection(); break; } case GDRAW_FILTER_colormatrix: gdraw_Colormatrix(r, &s, tc, stats); break; case GDRAW_FILTER_dropshadow: gdraw_Filter(r, &s, tc, 0, stats); break; case GDRAW_FILTER_bevel: gdraw_Filter(r, &s, tc, 1, stats); break; default: assert(0); } } else { GDrawHandle *blend_tex = NULL; GDraw *gd = gdraw; // for crazy blend modes, we need to read back from the framebuffer // and do the blending in the pixel shader. so we need to copy the // relevant pixels from our active render target into a texture. if (r->blend_mode == GDRAW_BLEND_special && (blend_tex = get_color_rendertarget(stats)) != NULL) { // slightly different logic depending on whether we were rendering // to the main color buffer or a render target, because the former // has tile origin-based coordinates while the latter don't. also, // we don't have a texture for the main framebuffer lying around. int bpp = 4; GcmTexture *dest = blend_tex->handle.tex.gcm; if (gd->cur == gd->frame) { CellGcmSurface *src = &gd->main_surface; cellGcmSetTransferImage(gd->gcm, CELL_GCM_TRANSFER_LOCAL_TO_LOCAL, dest->offset, dest->pitch, gd->tx0 - gd->tx0p, gd->ty0 - gd->ty0p, src->colorOffset[0], src->colorPitch[0], src->x + gd->vx, src->y + gd->vy, gd->tw, gd->th, bpp); } else { GcmTexture *src = gd->cur->color_buffer->handle.tex.gcm; cellGcmSetTransferImage(gd->gcm, CELL_GCM_TRANSFER_LOCAL_TO_LOCAL, dest->offset, dest->pitch, 0, 0, src->offset, src->pitch, 0, 0, gd->tpw, gd->tph, bpp); } set_texture(1, (GDrawTexture *) blend_tex); } set_renderstate(vprog(GDRAW_vformat_v2tc2, r), r, stats); do_screen_quad(&s, tc, stats); fence_resources(gdraw, r->tex[0], r->tex[1]); if (blend_tex) gdraw_FreeTexture((GDrawTexture *) blend_tex, 0, stats); } } /////////////////////////////////////////////////////////////////////// // // Shaders // #include "gdraw_ps3gcm_shaders.inl" static void create_fragment_program(ProgramWithCachedVariableLocations *p, ProgramWithCachedVariableLocations *src) { *p = *src; if (p->program) { U32 ucode_size; void *ucode_main, *ucode_local; cellGcmCgInitProgram(p->program); cellGcmCgGetUCode(p->program, &ucode_main, &ucode_size); ucode_local = gdraw_arena_alloc(&gdraw->local_arena, ucode_size + 400, CELL_GCM_FRAGMENT_UCODE_LOCAL_ALIGN_OFFSET); // 400 for overfetch assert(ucode_local != NULL); // if this triggers, it's a GDraw bug memcpy(ucode_local, ucode_main, ucode_size); cellGcmCgGetCgbFragmentProgramConfiguration(p->program, &p->cfg, 0, 1, 0); p->cfg.offset = addr2offs(ucode_local); p->cfg.attributeInputMask &= ~CELL_GCM_ATTRIB_OUTPUT_MASK_POINTSIZE; // we don't use point sprites } } static void create_all_shaders() { S32 i; U32 slot = 0; for (i=0; i < GDRAW_TEXTURE__count*3; ++i) create_fragment_program(&gdraw->fprog[0][i], pshader_basic_arr + i); for (i=0; i < GDRAW_BLENDSPECIAL__count; ++i) create_fragment_program(&gdraw->exceptional_blend[i], pshader_exceptional_blend_arr + i); for (i=0; i < 32; ++i) create_fragment_program(&gdraw->filter_prog[0][i], pshader_filter_arr + i); for (i=0; i < MAX_TAPS+1; ++i) create_fragment_program(&gdraw->blur_prog[i], pshader_blur_arr + i); create_fragment_program(&gdraw->colormatrix, pshader_color_matrix_arr); for (i=0; i < GDRAW_TEXTURE__count; ++i) gdraw->basic_fprog[i] = &gdraw->fprog[i][0]; for (i=0; i < GDRAW_vformat__basic_count; i++) { ProgramWithCachedVariableLocations *p = &gdraw->vprog[i]; U32 ucode_size; *p = vshader_vsps3_arr[i]; if (p->program) { cellGcmCgInitProgram(p->program); cellGcmCgSetInstructionSlot(p->program, slot); cellGcmCgGetUCode(p->program, &p->ucode, &ucode_size); gdraw->vslot[i] = slot; slot += ucode_size / 16; } } assert(slot <= 512); // that's all the space we have for vertex shaders! } //////////////////////////////////////////////////////////////////////// // // Create and tear-down the state // typedef struct { S32 num_handles; S32 num_bytes; void *ptr; } GDrawResourceMem; static GDrawResourceMem gdraw_mem[GDRAW_GCM_RESOURCE__count]; static void *rt_mem; static S32 rt_size, rt_width, rt_height, rt_pitch; static GDrawHandleCache *make_handle_cache(gdraw_gcm_resourcetype type, U32 align) { S32 num_handles = gdraw_mem[type].num_handles; S32 num_bytes = gdraw_mem[type].num_bytes; rrbool is_vertex = type == GDRAW_GCM_RESOURCE_vertexbuffer; U32 cache_size = sizeof(GDrawHandleCache) + (num_handles - 1) * sizeof(GDrawHandle); U32 header_size = is_vertex ? 0 : sizeof(GcmTexture) * num_handles; if (!num_handles) return NULL; GDrawHandleCache *cache = (GDrawHandleCache *) IggyGDrawMalloc(cache_size + header_size); if (cache) { gdraw_HandleCacheInit(cache, num_handles, num_bytes); cache->is_vertex = is_vertex; // set up resource headers if (type != GDRAW_GCM_RESOURCE_vertexbuffer) { GcmTexture *tex = (GcmTexture *) ((U8 *) cache + cache_size); S32 i; for (i=0; i < num_handles; i++) cache->handle[i].handle.tex.gcm = &tex[i]; } // set up our allocator cache->alloc = gfxalloc_create(gdraw_mem[type].ptr, num_bytes, align, num_handles); if (!cache->alloc) { IggyGDrawFree(cache); cache = NULL; } } return cache; } static void free_handle_cache(GDrawHandleCache *c) { if (c) { if (c->alloc) IggyGDrawFree(c->alloc); IggyGDrawFree(c); } } static int get_location(void *ptr) { CellGcmConfig cfg; cellGcmGetConfiguration(&cfg); U8 *local_start = (U8 *) cfg.localAddress; U8 *local_end = local_start + cfg.localSize; return (ptr >= local_start && ptr < local_end) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN; } int gdraw_GCM_SetResourceMemory(gdraw_gcm_resourcetype type, S32 num_handles, void *ptr, S32 num_bytes) { GDrawStats stats; assert(type >= GDRAW_GCM_RESOURCE_texture && type < GDRAW_GCM_RESOURCE__count); assert(num_handles >= 0); assert(num_bytes >= 0); if (!num_handles) num_handles = 1; switch (type) { case GDRAW_GCM_RESOURCE_texture: make_pool_aligned(&ptr, &num_bytes, CELL_GCM_TEXTURE_SWIZZLE_ALIGN_OFFSET); break; case GDRAW_GCM_RESOURCE_vertexbuffer: case GDRAW_GCM_RESOURCE_dyn_vertexbuffer: make_pool_aligned(&ptr, &num_bytes, CELL_GCM_SURFACE_LINEAR_ALIGN_OFFSET); break; default: // avoid compiler warning break; } gdraw_mem[type].num_handles = num_handles; gdraw_mem[type].num_bytes = num_bytes; gdraw_mem[type].ptr = ptr; // if no gdraw context created, there's nothing to worry about if (!gdraw) return 1; // wait until GPU is done, then reap everything wait_on_fence(gdraw->tile_end_fence); memset(&stats, 0, sizeof(stats)); if (gdraw->texturecache) gdraw_res_reap(gdraw->texturecache, &stats); if (gdraw->vbufcache ) gdraw_res_reap(gdraw->vbufcache, &stats); // resize the appropriate pool switch (type) { case GDRAW_GCM_RESOURCE_texture: free_handle_cache(gdraw->texturecache); gdraw->texturecache = make_handle_cache(GDRAW_GCM_RESOURCE_texture, CELL_GCM_TEXTURE_SWIZZLE_ALIGN_OFFSET); gdraw->tex_loc = get_location(ptr); return gdraw->texturecache != NULL; case GDRAW_GCM_RESOURCE_vertexbuffer: free_handle_cache(gdraw->vbufcache); gdraw->vbufcache = make_handle_cache(GDRAW_GCM_RESOURCE_vertexbuffer, CELL_GCM_SURFACE_LINEAR_ALIGN_OFFSET); gdraw->vbuf_base = ptr ? (U8 *) ptr - addr2offs(ptr) : 0; gdraw->vbuf_loc = get_location(ptr); return gdraw->vbufcache != NULL; case GDRAW_GCM_RESOURCE_dyn_vertexbuffer: gdraw_bufring_shutdown(&gdraw->dyn_vb); gdraw_bufring_init(&gdraw->dyn_vb, ptr, num_bytes, 2, CELL_GCM_VERTEX_TEXTURE_CACHE_LINE_SIZE); gdraw->dynvb_base = ptr ? (U8 *) ptr - addr2offs(ptr) : 0; gdraw->dynvb_loc = get_location(ptr); return gdraw->dyn_vb.seg_size != 0; default: return 0; } } int gdraw_GCM_SetRendertargetMemory(void *ptr, S32 num_bytes, S32 width, S32 height, S32 pitch) { S32 i; assert(num_bytes >= 0); rt_mem = ptr; rt_size = num_bytes; rt_width = width; rt_height = height; rt_pitch = pitch; if (!gdraw) return 1; // make sure RSX is done first wait_on_fence(gdraw->tile_end_fence); gdraw->frametex_width = width; gdraw->frametex_height = height; gdraw->rt_pitch = pitch; gdraw_arena_init(&gdraw->rt_arena, ptr, rt_size); gdraw_arena_reset(&gdraw->rt_arena); // unnecessary, just to avoid warning about unused function gdraw->rt_loc = get_location(ptr); gdraw_HandleCacheInit(&gdraw->rendertargets, MAX_RENDER_STACK_DEPTH + 1, rt_size); for (i=0; i < MAX_RENDER_STACK_DEPTH + 1; ++i) gdraw->rendertargets.handle[i].handle.tex.gcm = &gdraw->rendertarget_textures[i]; return 1; } void gdraw_GCM_ResetAllResourceMemory() { gdraw_GCM_SetResourceMemory(GDRAW_GCM_RESOURCE_texture, 0, NULL, 0); gdraw_GCM_SetResourceMemory(GDRAW_GCM_RESOURCE_vertexbuffer, 0, NULL, 0); gdraw_GCM_SetResourceMemory(GDRAW_GCM_RESOURCE_dyn_vertexbuffer, 0, NULL, 0); gdraw_GCM_SetRendertargetMemory(NULL, 0, 0, 0, 0); } GDrawFunctions *gdraw_GCM_CreateContext(CellGcmContextData *gcm, void *local_workmem, U8 rsx_label_index) { S32 i; gdraw = (GDraw *) IggyGDrawMalloc(sizeof(*gdraw)); // make sure gdraw struct is PPU cache line aligned if (!gdraw) return NULL; memset(gdraw, 0, sizeof(*gdraw)); // set up local work mem pointers gdraw_arena_init(&gdraw->local_arena, local_workmem, GDRAW_GCM_LOCAL_WORKMEM_SIZE); // set up fence stuff gdraw->fence_label_index = rsx_label_index; gdraw->fence_label = cellGcmGetLabelAddress(gdraw->fence_label_index); *gdraw->fence_label = 0; gdraw->tile_end_fence.value = 0; gdraw->next_fence_index = 1; gdraw->fence_batch_counter = FENCE_BATCH_INTERVAL; // set up memory for all resource types for (i = 0; i < GDRAW_GCM_RESOURCE__count; ++i) gdraw_GCM_SetResourceMemory((gdraw_gcm_resourcetype) i, gdraw_mem[i].num_handles, gdraw_mem[i].ptr, gdraw_mem[i].num_bytes); gdraw_GCM_SetRendertargetMemory(rt_mem, rt_size, rt_width, rt_height, rt_pitch); gdraw->gcm = gcm; create_all_shaders(); gdraw_funcs.SetViewSizeAndWorldScale = gdraw_SetViewSizeAndWorldScale; gdraw_funcs.GetInfo = gdraw_GetInfo; gdraw_funcs.DescribeTexture = gdraw_DescribeTexture; gdraw_funcs.DescribeVertexBuffer = gdraw_DescribeVertexBuffer; gdraw_funcs.RenderingBegin = gdraw_RenderingBegin; gdraw_funcs.RenderingEnd = gdraw_RenderingEnd; gdraw_funcs.RenderTileBegin = gdraw_RenderTileBegin; gdraw_funcs.RenderTileEnd = gdraw_RenderTileEnd; gdraw_funcs.TextureDrawBufferBegin = gdraw_TextureDrawBufferBegin; gdraw_funcs.TextureDrawBufferEnd = gdraw_TextureDrawBufferEnd; gdraw_funcs.DrawIndexedTriangles = gdraw_DrawIndexedTriangles; gdraw_funcs.FilterQuad = gdraw_FilterQuad; gdraw_funcs.SetAntialiasTexture = gdraw_SetAntialiasTexture; gdraw_funcs.ClearStencilBits = gdraw_ClearStencilBits; gdraw_funcs.ClearID = gdraw_ClearID; gdraw_funcs.MakeTextureBegin = gdraw_MakeTextureBegin; gdraw_funcs.MakeTextureMore = gdraw_MakeTextureMore; gdraw_funcs.MakeTextureEnd = gdraw_MakeTextureEnd; gdraw_funcs.UpdateTextureBegin = gdraw_UpdateTextureBegin; gdraw_funcs.UpdateTextureRect = gdraw_UpdateTextureRect; gdraw_funcs.UpdateTextureEnd = gdraw_UpdateTextureEnd; gdraw_funcs.FreeTexture = gdraw_FreeTexture; gdraw_funcs.TryToLockTexture = gdraw_TryToLockTexture; gdraw_funcs.MakeTextureFromResource = (gdraw_make_texture_from_resource *) gdraw_GCM_MakeTextureFromResource; gdraw_funcs.FreeTextureFromResource = gdraw_GCM_DestroyTextureFromResource; gdraw_funcs.MakeVertexBufferBegin = gdraw_MakeVertexBufferBegin; gdraw_funcs.MakeVertexBufferMore = gdraw_MakeVertexBufferMore; gdraw_funcs.MakeVertexBufferEnd = gdraw_MakeVertexBufferEnd; gdraw_funcs.TryToLockVertexBuffer = gdraw_TryLockVertexBuffer; gdraw_funcs.FreeVertexBuffer = gdraw_FreeVertexBuffer; gdraw_funcs.UnlockHandles = gdraw_UnlockHandles; gdraw_funcs.SetTextureUniqueID = gdraw_SetTextureUniqueID; return &gdraw_funcs; } void gdraw_GCM_DestroyContext(void) { if (gdraw) { GDrawStats stats; memset(&stats, 0, sizeof(stats)); if (gdraw->texturecache) gdraw_res_flush(gdraw->texturecache, &stats); if (gdraw->vbufcache) gdraw_res_flush(gdraw->vbufcache, &stats); // make sure RSX is done first wait_on_fence(gdraw->tile_end_fence); gdraw_bufring_shutdown(&gdraw->dyn_vb); free_handle_cache(gdraw->texturecache); free_handle_cache(gdraw->vbufcache); IggyGDrawFree(gdraw); gdraw = NULL; } } void RADLINK gdraw_GCM_BeginCustomDraw(IggyCustomDrawCallbackRegion *region, float *matrix) { clear_renderstate(); gdraw_GetObjectSpaceMatrix(matrix, region->o2w, gdraw->projection, 0.0f, 0); } void RADLINK gdraw_GCM_CalculateCustomDraw_4J(IggyCustomDrawCallbackRegion * region, F32 mat[16]) { gdraw_GetObjectSpaceMatrix(mat, region->o2w, gdraw->projection, 0.0f, 0); } void RADLINK gdraw_GCM_EndCustomDraw(IggyCustomDrawCallbackRegion *region) { RR_UNUSED_VARIABLE(region); set_common_renderstate(); set_projection(); } GDrawTexture * RADLINK gdraw_GCM_MakeTextureFromResource(U8 *resource_file, S32 len, IggyFileTexturePS3 *texture) { CellGcmTexture *tex = (CellGcmTexture *) (&texture->texture); tex->offset = addr2offs(resource_file + texture->file_offset); RR_UNUSED_VARIABLE(len); // slightly more efficient if we'd done the following at bake time, // but doing it here lets us avoid having this knowledge visible // in the source to iggyconvert, which lets us avoid having a // mechanism for hiding some of that source from non-PS3 customers switch (texture->format) { case IFT_FORMAT_la_88: tex->remap = CELL_GCM_TEXTURE_REMAP_REMAP << 14 | CELL_GCM_TEXTURE_REMAP_REMAP << 12 | CELL_GCM_TEXTURE_REMAP_REMAP << 10 | CELL_GCM_TEXTURE_REMAP_REMAP << 8 | CELL_GCM_TEXTURE_REMAP_FROM_R << 6 | CELL_GCM_TEXTURE_REMAP_FROM_R << 4 | CELL_GCM_TEXTURE_REMAP_FROM_R << 2 | CELL_GCM_TEXTURE_REMAP_FROM_G; break; case IFT_FORMAT_i_8: tex->remap = CELL_GCM_TEXTURE_REMAP_REMAP << 14 | CELL_GCM_TEXTURE_REMAP_REMAP << 12 | CELL_GCM_TEXTURE_REMAP_REMAP << 10 | CELL_GCM_TEXTURE_REMAP_REMAP << 8 | CELL_GCM_TEXTURE_REMAP_FROM_R << 6 | CELL_GCM_TEXTURE_REMAP_FROM_R << 4 | CELL_GCM_TEXTURE_REMAP_FROM_R << 2 | CELL_GCM_TEXTURE_REMAP_FROM_R; break; } return gdraw_GCM_WrappedTextureCreate(tex); } void RADLINK gdraw_GCM_DestroyTextureFromResource(GDrawTexture *tex) { gdraw_GCM_WrappedTextureDestroy(tex); }