// gdraw_psp2.cpp - author: Fabian Giesen - copyright 2014 RAD Game Tools // // This implements the Iggy graphics driver layer for PSP2. // GDraw consists of several components that interact fairly loosely with each other; // e.g. the resource management, drawing and filtering parts are all fairly independent // of each other. If you want to modify some aspect of GDraw - say the texture allocation // logic - your best bet is usually to just look for one of the related entry points, // e.g. MakeTextureBegin, and take it from there. There's a bunch of code in this file, // but most of it isn't really complicated. The bits that are somewhat tricky have a more // detailed explanation at the top of the relevant section. #include #include #include #include #include #include "iggy.h" #include "gdraw.h" #include "gdraw_psp2.h" typedef union { struct { SceGxmTexture *gxm; } tex; struct { void *verts; void *inds; } vbuf; } GDrawNativeHandle; #define GDRAW_MANAGE_MEM #define GDRAW_MANAGE_MEM_TWOPOOL #define GDRAW_NO_BLURS #define GDRAW_MIN_FREE_AMOUNT (64*1024) // always try to free at least this many bytes when throwing out old textures #define GDRAW_MAYBE_UNUSED __attribute__((unused)) #include "gdraw_shared.inl" #define MAX_SAMPLERS 1 #define AATEX_SAMPLER 1 // sampler that aa_tex gets set in #define QUAD_IB_COUNT 1024 // quad index buffer has indices for this many quads #define ASSERT_COUNT(a,b) ((a) == (b) ? (b) : -1) #define MAX_TEXTURE2D_DIM 4096 #define MAX_AATEX_WIDTH 64 #define GPU_MEMCPY_ALIGN 16 // bytes static GDrawFunctions gdraw_funcs; struct ShaderCode { void *blob; union { void *dummy; SceGxmShaderPatcherId id; }; bool registered; }; // Canonical blends enum gdraw_canonical_blend { GDRAW_CBLEND_none, // direct copy GDRAW_CBLEND_alpha, // premultiplied alpha GDRAW_CBLEND_add, // add GDRAW_CBLEND_nowrite, // color writes disabled GDRAW_CBLEND__count }; enum gdraw_outstanding_transfer { GDRAW_TRANSFER_texture = 1 << 0, GDRAW_TRANSFER_vertex = 1 << 1, }; /////////////////////////////////////////////////////////////////////////////// // // GDraw data structure // // // This is the primary rendering abstraction, which hides all // the platform-specific rendering behavior from Iggy. It is // full of platform-specific graphics state, and also general // graphics state so that it doesn't have to callback into Iggy // to get at that graphics state. struct GDraw { // 16-byte aligned! F32 projection[4]; // always 2D scale+2D translate. first two are scale, last two are translate. // scale factor converting worldspace to viewspace <0,0>.. F32 world_to_pixel[2]; // graphics context SceGxmContext *gxm; // cached state U32 scissor_state; // 0=disabled, 1=enabled. U32 z_stencil_key; // field built from z/stencil test flags. 0 = no z/stencil test, ~0 is used for "unknown state" gswf_recti cur_scissor; GDrawTexture *active_tex[MAX_SAMPLERS]; SceGxmFragmentProgram *cur_fp; SceGxmVertexProgram *cur_vp; // viewport setting (in pixels) for the current tile S32 vx, vy; S32 fw, fh; // full width/height of virtual display S32 tw, th; // actual width/height of current tile S32 tpw, tph; // width/height of padded version of tile S32 tx0, ty0; S32 tx0p, ty0p; S32 tx0v, ty0v; // tile bounds relative to tile origin struct { S32 x0, y0, x1, y1; } cview; // current viewport gswf_recti screen_bounds; SceGxmTexture aa_tex; GDrawArena context_arena; // texture and vertex buffer pools GDrawHandleCache *texturecache; GDrawHandleCache *vbufcache; // dynamic data buffer GDrawArena dynamic; gdraw_psp2_dynamic_stats dynamic_stats; gdraw_psp2_dynamic_buffer *dyn_buf; // fragment programs SceGxmFragmentProgram *main_fp[GDRAW_TEXTURE__count][3][GDRAW_CBLEND__count]; // [texmode][additive][cblend] SceGxmFragmentProgram *clear_fp; SceGxmFragmentProgram *mask_update_fp; // vertex programs SceGxmVertexProgram *vp[GDRAW_vformat__basic_count]; SceGxmVertexProgram *mask_vp; // baked index buffers U16 *quad_ib; U16 *mask_ib; // precomputed mask draw SceGxmPrecomputedDraw mask_draw; void *mask_draw_gpu; // synchronization volatile U32 *fence_label; U64 next_fence_index; GDrawFence scene_end_fence; // mipmapping GDrawMipmapContext mipmap; // shader patcher SceGxmShaderPatcher *patcher; // clear color F32 clear_color_rgba[4]; const F32 *next_tile_clear; // transfers U32 outstanding_transfers; U32 draw_transfer_flush_mask; }; static GDraw *gdraw; static const F32 four_zeros[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; //////////////////////////////////////////////////////////////////////// // // Synchronization, pointer wrangling and command buffer management // static RADINLINE GDrawFence get_next_fence() { GDrawFence fence; fence.value = gdraw->next_fence_index; return fence; } static SceGxmNotification scene_end_notification() { SceGxmNotification n; n.address = (uint32_t *)gdraw->fence_label; n.value = (uint32_t)gdraw->next_fence_index; gdraw->scene_end_fence.value = gdraw->next_fence_index; gdraw->next_fence_index++; return n; } static RADINLINE rrbool is_fence_pending(GDrawFence fence) { // if it's older than one full wrap of the fence counter, // we know it's retired. (can't have >=4 billion frames in // flight!) if (gdraw->next_fence_index - fence.value > 0xffffffffu) return false; // this is how far the GPU is. U32 retired = *gdraw->fence_label; // everything between "retired" (exclusive) and // "next_fence_index" (inclusive) is pending. everything else // is definitely done. // // this is a bit subtle; it uses unsigned wraparound to handle // the edge case where we've wrapped around recently. // number of pending fences (next_fence_index hasn't been submitted yet!) U32 num_pending = (U32)gdraw->next_fence_index - retired; return (U32)(gdraw->next_fence_index - fence.value) < num_pending; } static void wait_on_fence(GDrawFence fence) { if (is_fence_pending(fence)) { // Is the fence in a scene we haven't even submitted yet? We can't do that. // // NOTE: you might see this from user code if you try to do the following // seqeuence within a single GXM scene: // - Render an Iggy // - Tick it (or otherwise trigger some event that causes a font cache // update). // - Render the same Iggy again. // This is not supported. If you modify the state, you *have* to end the // scene first. if (fence.value >= gdraw->next_fence_index) // uh oh, it's in a scene we haven't even submitted yet. this is a bug! RR_BREAK(); IggyWaitOnFence(&fence, 0); } } extern "C" void gdraw_psp2_wait(U64 fence_val) { GDrawFence fence; fence.value = fence_val; // NOTE: if you see this function, either in Razor as a time hog or in the // Debugger because you're stuck here, there's a problem with how you're using // GDraw. These two cases are related but seperate, and I'll cover them here. // // PERF PROBLEM ("why do we spend a lot of time here?"): // GDraw calls this function when it wants to recycle memory in its // resource pools; to do so, it needs to wait until any previous GPU // commands using that memory have finished. // // Generally, GDraw will free resources in LRU (least recently used) order. // This means that whatever memory is being freed has (hopefully) not been // touched for a few frames, and GDraw should be able to reuse it without // having to wait. // // However, if you see this function show up in your profile, that's // clearly not the case. In that case, you might be thrashing the resource // pools. GDraw will warn about this - do you have an Iggy warning callback // installed? Anyway, if there are resource thrashing warnings, try increasing // the size of the corresponding resource pool and this stall should go away. // // The other reason why GDraw would wait is because you're passing a "dynamic // buffer" to gdraw_psp2_Begin() that the GPU is still reading from a previous // frame. If that's the case (just check whether your callstack contains // "WaitForDynamicBufferIdle"), you might be using just one dynamic buffer. // Consider double- or triple-buffering it instead to avoid this kind of stall. // // DEADLOCK ("the app is stuck here and never makes any progress"): // // GDraw uses this function when it needs to wait for the GPU to complete // rendering a previous scene, because we're trying to reuse memory we used // in that scene. // // If you end up here, this could mean one of several things: // // 1. The GPU isn't writing the scene completion notifications that GDraw is // waiting for. Did you remember to pass the SceGxmNotification returned // by gdraw_psp2_End as "fragmentNotification" to sceGxmEndScene? This is // necessary for GDraw's memory management to work! // 2. Check that the notification label passed to GDraw (on CreateContext) // is not being used by someone else. Again, GDraw's memory management // mechanism will break if we read unexpected valus! Just make sure that // GDraw has its own notification label. There are lots of notification // labels (SCE_GXM_NOTIFICATION_COUNT, 512 at the time of this writing) // so this shouldn't be an issue. // 3. You are using gdraw_psp2_Begin incorrectly. You may only have one call // to gdraw_psp2_Begin per scene (this should not be a limitation since // you can draw multiple Iggys or do other rendering during that time), // and you must make sure that the scene containing a gdraw_psp2_Begin // is submitted before you can call it again. // // The latter needs a bit of explanation. If you're on an immediate context, // it's pretty easy. In any GXM scene that's supposed to contain Iggy // rendering, adhere to the following pattern: // // sceGxmBeginScene(imm_ctx, ...) // // --- may issue rendering calls here (but not draw Iggys) // gdraw_psp2_Begin(...); // // --- may issue other rendering calls and/or call IggyPlayerDraw // // (once or several times) here // SceGxmNotification notify = gdraw_psp2_End(); // // --- again, may issue rendering calls here (but not draw Iggys) // sceGxmEndScene(imm_ctx, NULL, ¬ify); // // That is, exactly one gdraw_psp2_Begin/_End pair for that scene, and // all IggyPlayerDraws must be inside that pair. That's it. // // On a deferred context, *the same rules apply* - you must still make // sure to render a GXM scene using the notification returned by // gdraw_psp2_End before you call gdraw_psp2_Begin again. This means // that you can't prepare multiple independent command lists for // separate scenes on one thread and then issue them all later on // the main thread (sorry). // // It's definitely easier to stick with immediate contexts. If you // really need deferred contexts and are running into problems with // this, contact Iggy support. while (is_fence_pending(fence)) sceGxmWaitEvent(); } //////////////////////////////////////////////////////////////////////// // // Texture/vertex memory defragmentation support code // static void gdraw_gpu_memcpy(GDrawHandleCache *c, void *dst, void *src, U32 num_bytes) { // "If srcFormat is SCE_GXM_TRANSFER_FORMAT_RAW128, then a maximum of 262144 pixels can be copied (512x512)." static const U32 max_width = 512; static const U32 max_height = 512; static const U32 row_size = max_width * GPU_MEMCPY_ALIGN; U8 *dstp = (U8 *)dst; U8 *srcp = (U8 *)src; U32 offs = 0; // needs to be properly aligned assert(((UINTa)dstp & (GPU_MEMCPY_ALIGN - 1)) == 0); assert(((UINTa)srcp & (GPU_MEMCPY_ALIGN - 1)) == 0); // copy using 512-wide transfers for the bulk part while (num_bytes - offs >= row_size) { U32 num_rows = (num_bytes - offs) / row_size; num_rows = RR_MIN(num_rows, max_height); sceGxmTransferCopy(max_width, num_rows, 0, 0, SCE_GXM_TRANSFER_COLORKEY_NONE, SCE_GXM_TRANSFER_FORMAT_RAW128, SCE_GXM_TRANSFER_LINEAR, srcp + offs, 0, 0, row_size, SCE_GXM_TRANSFER_FORMAT_RAW128, SCE_GXM_TRANSFER_LINEAR, dstp + offs, 0, 0, row_size, NULL, 0, NULL); offs += num_rows * row_size; } // handle the rest // NOTE: our 16-byte alignment guarantees that despite rounding up, we're not going // to overwrite memory belonging to another resource. if (offs < num_bytes) { U32 remaining_pixels = (num_bytes - offs + GPU_MEMCPY_ALIGN - 1) / GPU_MEMCPY_ALIGN; sceGxmTransferCopy(remaining_pixels, 1, 0, 0, SCE_GXM_TRANSFER_COLORKEY_NONE, SCE_GXM_TRANSFER_FORMAT_RAW128, SCE_GXM_TRANSFER_LINEAR, srcp + offs, 0, 0, row_size, SCE_GXM_TRANSFER_FORMAT_RAW128, SCE_GXM_TRANSFER_LINEAR, dstp + offs, 0, 0, row_size, NULL, 0, NULL); } if (c->is_vertex) gdraw->outstanding_transfers |= GDRAW_TRANSFER_vertex; else gdraw->outstanding_transfers |= GDRAW_TRANSFER_texture; } static void gdraw_resource_moved(GDrawHandle *t) { if (!t->cache->is_vertex) sceGxmTextureSetData(t->handle.tex.gxm, t->raw_ptr); else { SINTa index_offs = (U8 *)t->handle.vbuf.inds - (U8 *)t->handle.vbuf.verts; t->handle.vbuf.verts = t->raw_ptr; t->handle.vbuf.inds = (U8 *)t->raw_ptr + index_offs; } } static void gdraw_gpu_wait_for_transfer_completion() { if (gdraw->outstanding_transfers) { sceGxmTransferFinish(); gdraw->outstanding_transfers = 0; } } static void gdraw_defragment_cache(GDrawHandleCache *c, GDrawStats *stats) { // wait until we're done with the previous frame wait_on_fence(gdraw->scene_end_fence); // reap; after this point, the only remaining dead resources // should be ones we've touched in this frame. gdraw_res_reap(c, stats); // at this point, the pool we're switching to should be empty; // we can still have outstanding references to the previous pool // (from this frame), but not anything to the pool from before then. if (!gfxalloc_is_empty(c->alloc_other)) { // if this triggers, there's a bug in GDraw's resource management. RR_BREAK(); } rrbool ok = gdraw_TwoPoolDefragmentMain(c, stats); if (!ok) // if this went wrong, we had some serious heap corruption. RR_BREAK(); } static void handle_cache_tick(GDrawHandleCache *c, GDrawFence now, GDrawStats *stats) { gdraw_PostDefragmentCleanup(c, stats); gdraw_HandleCacheTick(c, now); } static void api_free_resource(GDrawHandle *r) { if (!r->cache->is_vertex) { for (S32 i=0; i < MAX_SAMPLERS; i++) if (gdraw->active_tex[i] == (GDrawTexture *) r) gdraw->active_tex[i] = NULL; } } static void RADLINK gdraw_UnlockHandles(GDrawStats *stats) { // We're on a tiled renderer; the Iggy side doesn't get to unlock *anything*. // We do the unlock ourselves once we're done with the scene, in // gdraw_psp2_End. } //////////////////////////////////////////////////////////////////////// // // Various helpers // static void track_dynamic_alloc_attempt(U32 size, U32 align) { gdraw->dynamic_stats.allocs_attempted++; gdraw->dynamic_stats.bytes_attempted += size; gdraw->dynamic_stats.largest_bytes_attempted = RR_MAX(gdraw->dynamic_stats.largest_bytes_attempted, size); } static void track_dynamic_alloc_failed() { if (gdraw->dynamic_stats.allocs_attempted == gdraw->dynamic_stats.allocs_succeeded + 1) { // warn the first time we run out of mem IggyGDrawSendWarning(NULL, "GDraw out of dynamic memory"); } } static void *alloc_dynamic(U32 size, U32 align) { track_dynamic_alloc_attempt(size, align); void *ptr = gdraw_arena_alloc(&gdraw->dynamic, size, align); if (ptr) { gdraw->dynamic_stats.allocs_succeeded++; gdraw->dynamic_stats.bytes_succeeded += size; gdraw->dynamic_stats.largest_bytes_succeeded = RR_MAX(gdraw->dynamic_stats.largest_bytes_succeeded, size); } else track_dynamic_alloc_failed(); return ptr; } static void *alloc_and_set_fragment_uniforms(SceGxmContext *gxm, U32 slot, size_t size) { void *ptr = alloc_dynamic(size, sizeof(U32)); if (ptr) sceGxmSetFragmentUniformBuffer(gxm, 0, ptr); return ptr; } static void *alloc_and_set_vertex_uniforms(SceGxmContext *gxm, U32 slot, size_t size) { void *ptr = alloc_dynamic(size, sizeof(U32)); if (ptr) sceGxmSetVertexUniformBuffer(gxm, 0, ptr); return ptr; } //////////////////////////////////////////////////////////////////////// // // Texture creation/updating/deletion // GDrawTexture * RADLINK gdraw_psp2_WrappedTextureCreate(SceGxmTexture *tex) { GDrawStats stats = {}; GDrawHandle *p = gdraw_res_alloc_begin(gdraw->texturecache, 0, &stats); gdraw_HandleCacheAllocateEnd(p, 0, NULL, GDRAW_HANDLE_STATE_user_owned); gdraw_psp2_WrappedTextureChange((GDrawTexture *) p, tex); return (GDrawTexture *) p; } void RADLINK gdraw_psp2_WrappedTextureChange(GDrawTexture *handle, SceGxmTexture *tex) { GDrawHandle *p = (GDrawHandle *) handle; *p->handle.tex.gxm = *tex; } void RADLINK gdraw_psp2_WrappedTextureDestroy(GDrawTexture *handle) { GDrawStats stats = {}; gdraw_res_free((GDrawHandle *) handle, &stats); } static void RADLINK gdraw_SetTextureUniqueID(GDrawTexture *tex, void *old_id, void *new_id) { GDrawHandle *p = (GDrawHandle *) tex; // if this is still the handle it's thought to be, change the owner; // if the owner *doesn't* match, then they're changing a stale handle, so ignore if (p->owner == old_id) p->owner = new_id; } static U32 align_down(U32 x, U32 align) { return x & ~(align - 1); } static U32 align_up(U32 x, U32 align) { return (x + align - 1) & ~(align - 1); } static U32 round_up_to_pow2(U32 x) { x--; x |= x >> 16; x |= x >> 8; x |= x >> 4; x |= x >> 2; x |= x >> 1; return x + 1; } static U32 tex_linear_stride(U32 width) { return align_up(width, 8); } static rrbool RADLINK gdraw_MakeTextureBegin(void *owner, S32 width, S32 height, gdraw_texture_format gformat, U32 flags, GDraw_MakeTexture_ProcessingInfo *p, GDrawStats *stats) { S32 bytes_pixel = 4; GDrawHandle *t = NULL; SceGxmTextureFormat format = SCE_GXM_TEXTURE_FORMAT_U8U8U8U8_ABGR; if (width > MAX_TEXTURE2D_DIM || height > MAX_TEXTURE2D_DIM) { IggyGDrawSendWarning(NULL, "GDraw %d x %d texture not supported by hardware (dimension limit %d)", width, height, MAX_TEXTURE2D_DIM); return false; } if (gformat == GDRAW_TEXTURE_FORMAT_font) { format = SCE_GXM_TEXTURE_FORMAT_U8_RRRR; bytes_pixel = 1; } // determine the number of mipmaps to use and size of resulting surface U32 mipmaps = 0; U32 size = 0; U32 base_stride = tex_linear_stride(width); if (flags & GDRAW_MAKETEXTURE_FLAGS_mipmap) { U32 pow2_w = round_up_to_pow2(width); U32 pow2_h = round_up_to_pow2(height); do { // mip offsets are based on size rounded up to pow2, so we pay for that amount of space. size += RR_MAX(pow2_w >> mipmaps, 1) * RR_MAX(pow2_h >> mipmaps, 1) * bytes_pixel; mipmaps++; } while ((width >> mipmaps) || (height >> mipmaps)); } else { // no mips mipmaps = 1; size = base_stride * height * bytes_pixel; } // allocate a handle and make room in the cache for this much data t = gdraw_res_alloc_begin(gdraw->texturecache, size, stats); if (!t) return false; sceGxmTextureInitLinear(t->handle.tex.gxm, t->raw_ptr, format, width, height, mipmaps); gdraw_HandleCacheAllocateEnd(t, size, owner, (flags & GDRAW_MAKETEXTURE_FLAGS_never_flush) ? GDRAW_HANDLE_STATE_pinned : GDRAW_HANDLE_STATE_locked); stats->nonzero_flags |= GDRAW_STATS_alloc_tex; stats->alloc_tex += 1; stats->alloc_tex_bytes += size; p->texture_type = GDRAW_TEXTURE_TYPE_rgba; p->p0 = t; if (flags & GDRAW_MAKETEXTURE_FLAGS_mipmap) { rrbool ok; assert(p->temp_buffer != NULL); ok = gdraw_MipmapBegin(&gdraw->mipmap, width, height, mipmaps, bytes_pixel, p->temp_buffer, p->temp_buffer_bytes); if (!ok) RR_BREAK(); // this should never trigger unless the temp_buffer is way too small (Iggy bug) p->p1 = &gdraw->mipmap; p->texture_data = gdraw->mipmap.pixels[0]; p->num_rows = gdraw->mipmap.bheight; p->stride_in_bytes = gdraw->mipmap.pitch[0]; p->i0 = 0; // current output y p->i1 = width; p->i2 = height; } else { // non-mipmapped textures, we just upload straight to their destination p->p1 = NULL; p->texture_data = (U8 *)t->raw_ptr; p->num_rows = height; p->stride_in_bytes = base_stride * bytes_pixel; } return true; } static rrbool RADLINK gdraw_MakeTextureMore(GDraw_MakeTexture_ProcessingInfo *p) { GDrawHandle *t = (GDrawHandle *)p->p0; if (p->p1) { U8 *mipstart = (U8 *)t->raw_ptr; GDrawMipmapContext *c = (GDrawMipmapContext *)p->p1; U32 width = p->i1; U32 height = p->i2; U32 bheight = c->bheight; U32 w_pow2 = round_up_to_pow2(width); U32 h_pow2 = round_up_to_pow2(height); U32 level = 0; U32 outy = p->i0; if (outy >= height) // wait, we've already processed the whole texture! return false; do { U32 pitch = tex_linear_stride(width) * c->bpp; U32 srcpitch = c->pitch[level]; // copy image data to destination U8 *dest = mipstart + (outy >> level) * pitch; U8 *src = c->pixels[level]; for (U32 y=0; y < bheight; y++) memcpy(dest + y*pitch, src + y*srcpitch, width * c->bpp); // mip offsets are computed from pow2 base size mipstart += RR_MAX(w_pow2 >> level, 1) * RR_MAX(h_pow2 >> level, 1) * c->bpp; width = RR_MAX(width >> 1, 1); height = RR_MAX(height >> 1, 1); bheight = RR_MAX(bheight >> 1, 1); } while(gdraw_MipmapAddLines(c, ++level)); // next chunk please! p->i0 += p->num_rows; p->texture_data = c->pixels[0]; p->num_rows = c->bheight = RR_MIN(c->bheight, p->i2 - p->i0); return true; } else return false; // non-streaming upload; you got the full image first time! } static GDrawTexture * RADLINK gdraw_MakeTextureEnd(GDraw_MakeTexture_ProcessingInfo *p, GDrawStats *stats) { if (p->p1) gdraw_MakeTextureMore(p); // submit last piece of data RR_UNUSED_VARIABLE(stats); return (GDrawTexture *) p->p0; } static rrbool RADLINK gdraw_UpdateTextureBegin(GDrawTexture *t, void *unique_id, GDrawStats *stats) { return gdraw_HandleCacheLockStats((GDrawHandle *) t, unique_id, stats); } static void RADLINK gdraw_UpdateTextureRect(GDrawTexture *t, void *unique_id, S32 x, S32 y, S32 stride, S32 w, S32 h, U8 *samples, gdraw_texture_format format) { RR_UNUSED_VARIABLE(unique_id); GDrawHandle *s = (GDrawHandle *) t; U32 bpp = (format == GDRAW_TEXTURE_FORMAT_font) ? 1 : 4; // make sure texture is not active. note that we don't implement texture ghosting; // this is an actual wait and you can't update a texture you've already used during // the current frame. (we only use this path for font cache updates) wait_on_fence(s->fence); U32 bpl = w * bpp; U32 dpitch = tex_linear_stride(sceGxmTextureGetWidth(s->handle.tex.gxm)) * bpp; U8 *src = samples; U8 *dst = (U8 *)s->raw_ptr + y*dpitch + x*bpp; for (S32 row=0; row < h; row++) memcpy(dst + row*dpitch, src + row*stride, bpl); } static void RADLINK gdraw_UpdateTextureEnd(GDrawTexture *t, void *unique_id, GDrawStats *stats) { RR_UNUSED_VARIABLE(unique_id); RR_UNUSED_VARIABLE(stats); RR_UNUSED_VARIABLE(t); // no unlock! (tiled renderer again) } static void RADLINK gdraw_FreeTexture(GDrawTexture *tt, void *unique_id, GDrawStats *stats) { GDrawHandle *t = (GDrawHandle *) tt; assert(t != NULL); if (t->owner == unique_id || unique_id == NULL) { gdraw_res_kill(t, stats); } } static rrbool RADLINK gdraw_TryToLockTexture(GDrawTexture *t, void *unique_id, GDrawStats *stats) { return gdraw_HandleCacheLockStats((GDrawHandle *) t, unique_id, stats); } static void RADLINK gdraw_DescribeTexture(GDrawTexture *tex, GDraw_Texture_Description *desc) { GDrawHandle *p = (GDrawHandle *) tex; desc->width = sceGxmTextureGetWidth(p->handle.tex.gxm); desc->height = sceGxmTextureGetHeight(p->handle.tex.gxm); desc->size_in_bytes = p->bytes; } static void RADLINK gdraw_SetAntialiasTexture(S32 width, U8 *rgba) { if (sceGxmTextureGetData(&gdraw->aa_tex) != NULL) return; assert(width <= MAX_AATEX_WIDTH); void *ptr = gdraw_arena_alloc(&gdraw->context_arena, width * 4, GDRAW_PSP2_TEXTURE_ALIGNMENT); if (!ptr) return; sceGxmTextureInitLinear(&gdraw->aa_tex, ptr, SCE_GXM_TEXTURE_FORMAT_U8U8U8U8_ABGR, width, 1, 1); memcpy(ptr, rgba, width * 4); } //////////////////////////////////////////////////////////////////////// // // Vertex buffer creation/deletion // static rrbool RADLINK gdraw_MakeVertexBufferBegin(void *unique_id, gdraw_vformat vformat, S32 vbuf_size, S32 ibuf_size, GDraw_MakeVertexBuffer_ProcessingInfo *p, GDrawStats *stats) { GDrawHandle *vb; vb = gdraw_res_alloc_begin(gdraw->vbufcache, vbuf_size + ibuf_size, stats); if (!vb) return false; vb->handle.vbuf.verts = vb->raw_ptr; vb->handle.vbuf.inds = (U8 *) vb->raw_ptr + vbuf_size; p->p0 = vb; p->vertex_data = (U8 *)vb->handle.vbuf.verts; p->index_data = (U8 *)vb->handle.vbuf.inds; p->vertex_data_length = vbuf_size; p->index_data_length = ibuf_size; gdraw_HandleCacheAllocateEnd(vb, vbuf_size + ibuf_size, unique_id, GDRAW_HANDLE_STATE_locked); return true; } static rrbool RADLINK gdraw_MakeVertexBufferMore(GDraw_MakeVertexBuffer_ProcessingInfo *p) { RR_BREAK(); return false; } static GDrawVertexBuffer * RADLINK gdraw_MakeVertexBufferEnd(GDraw_MakeVertexBuffer_ProcessingInfo *p, GDrawStats *stats) { RR_UNUSED_VARIABLE(stats); return (GDrawVertexBuffer *)p->p0; } static rrbool RADLINK gdraw_TryLockVertexBuffer(GDrawVertexBuffer *vb, void *unique_id, GDrawStats *stats) { return gdraw_HandleCacheLockStats((GDrawHandle *) vb, unique_id, stats); } static void RADLINK gdraw_FreeVertexBuffer(GDrawVertexBuffer *vb, void *unique_id, GDrawStats *stats) { GDrawHandle *h = (GDrawHandle *) vb; assert(h != NULL); // @GDRAW_ASSERT if (h->owner == unique_id) gdraw_res_kill(h, stats); } static void RADLINK gdraw_DescribeVertexBuffer(GDrawVertexBuffer *vbuf, GDraw_VertexBuffer_Description *desc) { GDrawHandle *p = (GDrawHandle *) vbuf; desc->size_in_bytes = p->bytes; } //////////////////////////////////////////////////////////////////////// // // Constant buffer layouts // struct VertexVars { F32 world[2][4]; F32 x_offs[4]; F32 texgen_s[4]; F32 texgen_t[4]; F32 viewproj[4]; }; struct PixelCommonVars { F32 color_mul[4]; F32 color_add[4]; F32 focal[4]; }; //////////////////////////////////////////////////////////////////////// // // Rendering helpers // static void set_gxm_texture(U32 unit, SceGxmTexture *tex, U32 wrap, U32 nearest) { static const U32 addrbits[ASSERT_COUNT(GDRAW_WRAP__count, 4)] = { #define CLAMPMODE(x) ((SCE_GXM_PDS_DOUTT0_UADDRMODE_##x << SCE_GXM_PDS_DOUTT0_UADDRMODE_SHIFT) | (SCE_GXM_PDS_DOUTT0_VADDRMODE_##x << SCE_GXM_PDS_DOUTT0_VADDRMODE_SHIFT)) CLAMPMODE(CLAMP), // GDRAW_WRAP_clamp CLAMPMODE(REPEAT), // GDRAW_WRAP_repeat CLAMPMODE(MIRROR), // GDRAW_WRAP_mirror CLAMPMODE(CLAMP), // GDRAW_WRAP_clamp_to_border (unused in this impl - just use regular clamp) #undef CLAMPMODE }; static const U32 addrmask = SCE_GXM_PDS_DOUTT0_UADDRMODE_MASK | SCE_GXM_PDS_DOUTT0_VADDRMODE_MASK; static const U32 filterbits[2] = { // nearest off SCE_GXM_PDS_DOUTT0_MINFILTER_MASK | SCE_GXM_PDS_DOUTT0_MAGFILTER_MASK | SCE_GXM_PDS_DOUTT0_MIPFILTER_MASK, // nearest on SCE_GXM_PDS_DOUTT0_MINFILTER_MASK | SCE_GXM_PDS_DOUTT0_MIPFILTER_MASK, }; static const U32 filtermask = SCE_GXM_PDS_DOUTT0_MINFILTER_MASK | SCE_GXM_PDS_DOUTT0_MAGFILTER_MASK | SCE_GXM_PDS_DOUTT0_MIPFILTER_MASK; assert(wrap < GDRAW_WRAP__count); assert(nearest < 2); SceGxmTexture texv = *tex; texv.controlWords[0] = (tex->controlWords[0] & ~(addrmask | filtermask)) | addrbits[wrap] | filterbits[nearest]; sceGxmSetFragmentTexture(gdraw->gxm, unit, &texv); } static void remove_scissor(); static inline void disable_scissor(bool force) { if (gdraw->scissor_state) remove_scissor(); if (force || gdraw->scissor_state != 0) { gdraw->scissor_state = 0; sceGxmSetRegionClip(gdraw->gxm, SCE_GXM_REGION_CLIP_OUTSIDE, gdraw->cview.x0, gdraw->cview.y0, gdraw->cview.x1 - 1, gdraw->cview.y1 - 1); } } static void set_viewport_raw(S32 x, S32 y, S32 w, S32 h) { gdraw->cview.x0 = x; gdraw->cview.y0 = y; gdraw->cview.x1 = x + w; gdraw->cview.y1 = y + h; // AP - ZOffset/ZScale were set to 0.0/1.0 which is wrong. This fixed the bad poly draw order effect on the models in Skin Select sceGxmSetViewport(gdraw->gxm, (F32)x + (F32)w*0.5f, (F32)w * 0.5f, (F32)y + (F32)h*0.5f, (F32)h * -0.5f, 0.5f, 0.5f); } static void set_projection_raw(S32 x0, S32 x1, S32 y0, S32 y1) { gdraw->projection[0] = 2.0f / (x1-x0); gdraw->projection[1] = 2.0f / (y1-y0); gdraw->projection[2] = (x1 + x0) / (F32) (x0 - x1); gdraw->projection[3] = (y1 + y0) / (F32) (y0 - y1); } static void set_viewport() { set_viewport_raw(gdraw->vx, gdraw->vy, gdraw->tw, gdraw->th); } static void set_projection() { set_projection_raw(gdraw->tx0, gdraw->tx0 + gdraw->tw, gdraw->ty0 + gdraw->th, gdraw->ty0); } static void clear_renderstate() { SceGxmContext *gxm = gdraw->gxm; sceGxmSetFrontDepthFunc(gxm, SCE_GXM_DEPTH_FUNC_ALWAYS); sceGxmSetFrontDepthWriteEnable(gxm, SCE_GXM_DEPTH_WRITE_DISABLED); sceGxmSetFrontStencilFunc(gxm, SCE_GXM_STENCIL_FUNC_ALWAYS, SCE_GXM_STENCIL_OP_KEEP, SCE_GXM_STENCIL_OP_KEEP, SCE_GXM_STENCIL_OP_KEEP, 0, 0); // AP - Added this in to reset the clip region to full screen. This fixed the Splash text not appearing and the bad draw order on the 2x2 crafting cursor. disable_scissor(true); gdraw->z_stencil_key = 0; } static void set_common_renderstate() { SceGxmContext *gxm = gdraw->gxm; // clear our state caching memset(gdraw->active_tex, 0, sizeof(gdraw->active_tex)); gdraw->scissor_state = 0; gdraw->cur_fp = NULL; gdraw->cur_vp = NULL; // all the state we won't touch again until we're done rendering sceGxmSetCullMode(gxm, SCE_GXM_CULL_NONE); sceGxmSetFrontDepthBias(gxm, 0, 0); sceGxmSetFrontFragmentProgramEnable(gxm, SCE_GXM_FRAGMENT_PROGRAM_ENABLED); sceGxmSetFrontPolygonMode(gxm, SCE_GXM_POLYGON_MODE_TRIANGLE_FILL); sceGxmSetFrontVisibilityTestEnable(gxm, SCE_GXM_VISIBILITY_TEST_DISABLED); sceGxmSetFrontStencilRef(gxm, 255); sceGxmSetTwoSidedEnable(gxm, SCE_GXM_TWO_SIDED_DISABLED); sceGxmSetViewportEnable(gxm, SCE_GXM_VIEWPORT_ENABLED); sceGxmSetWBufferEnable(gxm, SCE_GXM_WBUFFER_DISABLED); sceGxmSetWClampValue(gxm, 0.00001f); sceGxmSetWClampEnable(gxm, SCE_GXM_WCLAMP_MODE_ENABLED); set_gxm_texture(AATEX_SAMPLER, &gdraw->aa_tex, GDRAW_WRAP_clamp, 0); // states we modify during regular rendering clear_renderstate(); set_viewport(); set_projection(); disable_scissor(true); } static void set_fragment_program(SceGxmFragmentProgram *fp); static void do_screen_quad(gswf_recti *s, const F32 *tc, F32 z, GDrawStats *stats); static void render_clear_quad(gswf_recti *r, GDrawStats *stats) { do_screen_quad(r, four_zeros, 1.0f, stats); stats->nonzero_flags |= GDRAW_STATS_clears; stats->num_clears++; stats->cleared_pixels += (r->x1 - r->x0) * (r->y1 - r->y0); } static void clear_whole_surf(bool clear_depth, bool clear_stencil, const F32 *clear_color, GDrawStats *stats) { SceGxmContext *gxm = gdraw->gxm; gdraw->z_stencil_key = ~0u; // force reset on next draw SceGxmStencilOp stencil_op = clear_stencil ? SCE_GXM_STENCIL_OP_ZERO : SCE_GXM_STENCIL_OP_KEEP; sceGxmSetFrontDepthFunc(gxm, SCE_GXM_DEPTH_FUNC_ALWAYS); sceGxmSetFrontDepthWriteEnable(gxm, clear_depth ? SCE_GXM_DEPTH_WRITE_ENABLED : SCE_GXM_DEPTH_WRITE_DISABLED); sceGxmSetFrontStencilFunc(gxm, SCE_GXM_STENCIL_FUNC_ALWAYS, stencil_op, stencil_op, stencil_op, 0xff, 0xff); set_fragment_program(gdraw->clear_fp); PixelCommonVars *para = (PixelCommonVars *)alloc_and_set_fragment_uniforms(gxm, 0, sizeof(PixelCommonVars)); if (!para) return; memset(para, 0, sizeof(*para)); if (clear_color) vst1q_f32(para->color_mul, vld1q_f32(clear_color)); else sceGxmSetFrontFragmentProgramEnable(gxm, SCE_GXM_FRAGMENT_PROGRAM_DISABLED); set_viewport_raw(0, 0, gdraw->screen_bounds.x1, gdraw->screen_bounds.y1); set_projection_raw(0, gdraw->screen_bounds.x1, gdraw->screen_bounds.y1, 0); render_clear_quad(&gdraw->screen_bounds, stats); if (!clear_color) sceGxmSetFrontFragmentProgramEnable(gxm, SCE_GXM_FRAGMENT_PROGRAM_ENABLED); set_viewport(); set_projection(); } //////////////////////////////////////////////////////////////////////// // // Begin rendering for a frame // void gdraw_psp2_SetTileOrigin(S32 x, S32 y) { gdraw->vx = x; gdraw->vy = y; } void gdraw_psp2_ClearBeforeNextRender(const F32 clear_color_rgba[4]) { for (U32 i=0; i < 4; i++) gdraw->clear_color_rgba[i] = clear_color_rgba[i]; gdraw->next_tile_clear = gdraw->clear_color_rgba; } static void RADLINK gdraw_SetViewSizeAndWorldScale(S32 w, S32 h, F32 scalex, F32 scaley) { gdraw->fw = w; gdraw->fh = h; gdraw->tw = w; gdraw->th = h; gdraw->world_to_pixel[0] = scalex; gdraw->world_to_pixel[1] = scaley; } // must include anything necessary for texture creation/update static void RADLINK gdraw_RenderingBegin(void) { assert(gdraw->gxm != NULL); // call after gdraw_psp2_Begin set_common_renderstate(); } static void RADLINK gdraw_RenderingEnd(void) { clear_renderstate(); } static void RADLINK gdraw_RenderTileBegin(S32 x0, S32 y0, S32 x1, S32 y1, S32 pad, GDrawStats *stats) { pad = 0; // no reason to ever pad, we don't do filters. gdraw->tx0 = x0; gdraw->ty0 = y0; gdraw->tw = x1-x0; gdraw->th = y1-y0; gdraw->tx0v = gdraw->tx0 - gdraw->vx; gdraw->ty0v = gdraw->ty0 - gdraw->vy; // padded region gdraw->tx0p = RR_MAX(x0 - pad, 0); gdraw->ty0p = RR_MAX(y0 - pad, 0); gdraw->tpw = RR_MIN(x1 + pad, gdraw->fw) - gdraw->tx0p; gdraw->tph = RR_MIN(y1 + pad, gdraw->fh) - gdraw->ty0p; // clear our depth/stencil buffers (and also color if requested) clear_whole_surf(true, true, gdraw->next_tile_clear, stats); gdraw->next_tile_clear = NULL; } static void RADLINK gdraw_RenderTileEnd(GDrawStats *stats) { // necessary to reset mask bits at end of tile // (if we had them set) disable_scissor(false); // reap once per frame even if there are no allocs gdraw_res_reap(gdraw->texturecache, stats); gdraw_res_reap(gdraw->vbufcache, stats); } void gdraw_psp2_Begin(SceGxmContext *context, const SceGxmColorSurface *color, const SceGxmDepthStencilSurface *depth, gdraw_psp2_dynamic_buffer *dynamic_buf) { U32 xmin, ymin, xmax, ymax; assert(gdraw->gxm == NULL); // may not nest Begin calls // need to wait for the buffer to become idle before we can use it! gdraw_psp2_WaitForDynamicBufferIdle(dynamic_buf); gdraw->gxm = context; gdraw->dyn_buf = dynamic_buf; gdraw_arena_init(&gdraw->dynamic, dynamic_buf->start, dynamic_buf->size_in_bytes); memset(&gdraw->dynamic_stats, 0, sizeof(gdraw->dynamic_stats)); sceGxmColorSurfaceGetClip(color, &xmin, &ymin, &xmax, &ymax); gdraw->screen_bounds.x0 = xmin; gdraw->screen_bounds.y0 = ymin; gdraw->screen_bounds.x1 = xmax + 1; gdraw->screen_bounds.y1 = ymax + 1; // If we don't have a depth/stencil surface with the right format, // Iggy rendering is not going to come out right. if (!depth || !sceGxmDepthStencilSurfaceIsEnabled(depth) || sceGxmDepthStencilSurfaceGetFormat(depth) != SCE_GXM_DEPTH_STENCIL_FORMAT_DF32M_S8) { // Why this format? // - We need the stencil buffer to support Flash masking operations. // - We need the mask bit to perform pixel-accurate scissor testing. // There's only one format that satisfies both requirements. IggyGDrawSendWarning(NULL, "Iggy rendering will not work correctly unless a depth/stencil buffer in DF32M_S8 format is provided."); } // For immediate contexts, we need to flush pending vertex transfers before // every draw because we might hit a mid-scene flush. On a deferred // context, we need not worry about this happening. SceGxmContextType type; SceGxmErrorCode err = sceGxmGetContextType(context, &type); if (err == SCE_OK && type == SCE_GXM_CONTEXT_TYPE_DEFERRED) gdraw->draw_transfer_flush_mask = 0; else gdraw->draw_transfer_flush_mask = GDRAW_TRANSFER_vertex; } SceGxmNotification gdraw_psp2_End() { GDrawStats gdraw_stats = {}; SceGxmNotification notify; assert(gdraw->gxm != NULL); // please keep Begin / End pairs properly matched notify = scene_end_notification(); gdraw->dyn_buf->sync = gdraw->scene_end_fence.value; gdraw->dyn_buf->stats = gdraw->dynamic_stats; gdraw_arena_init(&gdraw->dynamic, NULL, 0); gdraw->gxm = NULL; gdraw->dyn_buf = NULL; // NOTE: the stats from these go nowhere. That's a bit unfortunate, but the // GDrawStats model is that things can be accounted to something in the // display tree, and that's simply not the case with scene-global things // like this. With only one Iggy file, a sensible place would be to // accumulate stats in the root node. But the user can render multiple Iggys // in the same scene, and it's unclear what to do in that case. handle_cache_tick(gdraw->texturecache, gdraw->scene_end_fence, &gdraw_stats); handle_cache_tick(gdraw->vbufcache, gdraw->scene_end_fence, &gdraw_stats); // finally, unlock everything gdraw_HandleCacheUnlockAll(gdraw->texturecache); gdraw_HandleCacheUnlockAll(gdraw->vbufcache); return notify; } #define MAX_DEPTH_VALUE (1 << 22) static void RADLINK gdraw_GetInfo(GDrawInfo *d) { d->num_stencil_bits = 8; d->max_id = MAX_DEPTH_VALUE-2; // for floating point depth, just use mantissa, e.g. 16-20 bits d->max_texture_size = MAX_TEXTURE2D_DIM; d->buffer_format = GDRAW_BFORMAT_vbib; d->shared_depth_stencil = 1; d->always_mipmap = 0; d->conditional_nonpow2 = 0; d->has_rendertargets = 0; } //////////////////////////////////////////////////////////////////////// // // Render targets // static rrbool RADLINK gdraw_TextureDrawBufferBegin(gswf_recti *region, gdraw_texture_format format, U32 flags, void *owner, GDrawStats *stats) { IggyGDrawSendWarning(NULL, "GDraw no rendertarget support on PSP2"); return false; } static GDrawTexture *RADLINK gdraw_TextureDrawBufferEnd(GDrawStats *stats) { return NULL; } //////////////////////////////////////////////////////////////////////// // // Clear stencil/depth buffers // static void RADLINK gdraw_ClearStencilBits(U32 bits) { GDrawStats stats = {}; clear_whole_surf(false, true, NULL, &stats); } static void RADLINK gdraw_ClearID(void) { GDrawStats stats = {}; clear_whole_surf(true, false, NULL, &stats); } //////////////////////////////////////////////////////////////////////// // // Fragment programs and scissor mask // static RADINLINE void set_fragment_program(SceGxmFragmentProgram *fp) { if (gdraw->cur_fp != fp) { gdraw->cur_fp = fp; sceGxmSetFragmentProgram(gdraw->gxm, fp); } } static RADINLINE void set_vertex_program(SceGxmVertexProgram *vp) { if (gdraw->cur_vp != vp) { gdraw->cur_vp = vp; sceGxmSetVertexProgram(gdraw->gxm, vp); } } static void draw_scissor_region(gswf_recti *r, SceGxmStencilFunc func) { GDraw * RADRESTRICT gd = gdraw; // determine tile-aligned rect gswf_recti tile_rect; tile_rect.x0 = align_down(r->x0, SCE_GXM_TILE_SIZEX); tile_rect.y0 = align_down(r->y0, SCE_GXM_TILE_SIZEY); tile_rect.x1 = align_up(r->x1, SCE_GXM_TILE_SIZEX); tile_rect.y1 = align_up(r->y1, SCE_GXM_TILE_SIZEY); // set up vertex positions F32 *vpos = (F32 *)alloc_and_set_vertex_uniforms(gd->gxm, 0, 8 * sizeof(F32)); if (!vpos) return; vpos[0] = (F32)tile_rect.x0; vpos[1] = (F32)r->x0; vpos[2] = (F32)r->x1; vpos[3] = (F32)tile_rect.x1; vpos[4] = (F32)tile_rect.y0; vpos[5] = (F32)r->y0; vpos[6] = (F32)r->y1; vpos[7] = (F32)tile_rect.y1; // set our region clip; note gxm bounds are max-inclusive, hence the -1. sceGxmSetRegionClip(gd->gxm, SCE_GXM_REGION_CLIP_OUTSIDE, tile_rect.x0, tile_rect.y0, tile_rect.x1 - 1, tile_rect.y1 - 1); // set up programs and state set_vertex_program(gd->mask_vp); set_fragment_program(gd->mask_update_fp); // draw gdraw->z_stencil_key = ~0u; // invalidate z state -> force reset on next draw sceGxmSetFrontStencilFunc(gd->gxm, func, SCE_GXM_STENCIL_OP_KEEP, SCE_GXM_STENCIL_OP_KEEP, SCE_GXM_STENCIL_OP_KEEP, 0, 0); sceGxmSetViewportEnable(gd->gxm, SCE_GXM_VIEWPORT_DISABLED); sceGxmDrawPrecomputed(gd->gxm, &gd->mask_draw); sceGxmSetViewportEnable(gd->gxm, SCE_GXM_VIEWPORT_ENABLED); } static void remove_scissor() { // re-enable drawing in the exclusion region draw_scissor_region(&gdraw->cur_scissor, SCE_GXM_STENCIL_FUNC_ALWAYS); } static void materialize_scissor(int x0, int y0, int x1, int y1) { GDraw * RADRESTRICT gd = gdraw; // did we have scissor set? if (gd->scissor_state) { // are we about to set the same scissor again? if (gd->cur_scissor.x0 == x0 && gd->cur_scissor.y0 == y0 && gd->cur_scissor.x1 == x1 && gd->cur_scissor.y1 == y1) return; // nothing to do! remove_scissor(); } // draw the mask: disable drawing outside scissor region gd->cur_scissor.x0 = x0; gd->cur_scissor.y0 = y0; gd->cur_scissor.x1 = x1; gd->cur_scissor.y1 = y1; draw_scissor_region(&gdraw->cur_scissor, SCE_GXM_STENCIL_FUNC_NEVER); gd->scissor_state = 1; } //////////////////////////////////////////////////////////////////////// // // Set all the render state from GDrawRenderState // // converts a depth id into a Z value static inline F32 depth_from_id(S32 id) { return (1.0f - 1.0f / MAX_DEPTH_VALUE) - id * (1.0f / MAX_DEPTH_VALUE); // = 1 - (id + 1) / MAX_DEPTH_VALUE } static bool set_renderstate_full(const GDrawRenderState * RADRESTRICT r, GDrawStats *stats) { static const int canonical_blend[ASSERT_COUNT(GDRAW_BLEND__count, 6)] = { GDRAW_CBLEND_none, // GDRAW_BLEND_none GDRAW_CBLEND_alpha, // GDRAW_BLEND_alpha GDRAW_CBLEND_none, // GDRAW_BLEND_multiply - UNSUPPORTED on PSP2 (only occurs in layer blends which we don't allow) GDRAW_CBLEND_add, // GDRAW_BLEND_add GDRAW_CBLEND_none, // GDRAW_BLEND_filter GDRAW_CBLEND_none, // GDRAW_BLEND_special }; GDraw * RADRESTRICT gd = gdraw; SceGxmContext * RADRESTRICT gxm = gd->gxm; // we need to handle scissor first, since it might require us to draw things. if (r->scissor) { S32 xs = gd->tx0v; S32 ys = gd->ty0v; // clip against viewport S32 x0 = RR_MAX(r->scissor_rect.x0 - xs, gd->cview.x0); S32 y0 = RR_MAX(r->scissor_rect.y0 - ys, gd->cview.y0); S32 x1 = RR_MIN(r->scissor_rect.x1 - xs, gd->cview.x1); S32 y1 = RR_MIN(r->scissor_rect.y1 - ys, gd->cview.y1); // in case our actual scissor is empty, bail. if (x1 <= x0 || y1 <= y0) return false; materialize_scissor(x0, y0, x1, y1); } else if (r->scissor != gd->scissor_state) disable_scissor(0); // allocate dynamic uniform bufs void * dyn_uniform = alloc_dynamic(sizeof(VertexVars) + sizeof(PixelCommonVars), sizeof(U32)); if (!dyn_uniform) return false; VertexVars * RADRESTRICT vvars = (VertexVars *)dyn_uniform; PixelCommonVars * RADRESTRICT pvars = (PixelCommonVars *)(vvars + 1); // right after VertexVars in dyn_uniform alloc sceGxmSetVertexUniformBuffer(gxm, 0, vvars); sceGxmSetFragmentUniformBuffer(gxm, 0, pvars); // vertex uniforms F32 depth = depth_from_id(r->id); if (!r->use_world_space) { gdraw_ObjectSpace(vvars->world[0], r->o2w, depth, 0.0f); } else { gdraw_WorldSpace(vvars->world[0], gdraw->world_to_pixel, depth, 0.0f); } float32x4_t edge = vld1q_f32(r->edge_matrix); float32x4_t s0_texgen = vld1q_f32(r->s0_texgen); // always copy, even when unused. float32x4_t t0_texgen = vld1q_f32(r->t0_texgen); float32x4_t viewproj = vld1q_f32(gd->projection); vst1q_f32(vvars->x_offs, edge); vst1q_f32(vvars->texgen_s, s0_texgen); vst1q_f32(vvars->texgen_t, t0_texgen); vst1q_f32(vvars->viewproj, viewproj); // fragment uniforms float32x4_t col_mul = vld1q_f32(r->color); float32x4_t col_add = vdupq_n_f32(0.0f); float32x4_t focal = vld1q_f32(r->focal_point); if (r->cxf_add) col_add = vmulq_n_f32(vcvtq_f32_s32(vmovl_s16(vld1_s16(r->cxf_add))), 1.0f / 255.0f); vst1q_f32(pvars->color_mul, col_mul); vst1q_f32(pvars->color_add, col_add); vst1q_f32(pvars->focal, focal); // set the fragment program int tex0mode = r->tex0_mode; int cblend_mode = canonical_blend[r->blend_mode]; if (r->stencil_set) cblend_mode = GDRAW_CBLEND_nowrite; int additive_mode = 0; if (r->cxf_add) additive_mode = r->cxf_add[3] ? 2 : 1; set_fragment_program(gd->main_fp[tex0mode][additive_mode][cblend_mode]); // set textures if (tex0mode != GDRAW_TEXTURE_none) { if (!r->tex[0]) // this can happen if some allocs fail. just abort in that case. return false; if (gd->active_tex[0] != r->tex[0]) { gd->active_tex[0] = r->tex[0]; set_gxm_texture(0, ((GDrawHandle *) r->tex[0])->handle.tex.gxm, r->wrap0, r->nearest0); } } // z/stencil mode changed? U32 z_stencil_key = r->set_id | (r->test_id << 1) | (r->stencil_test << 16) | (r->stencil_set << 24); if (z_stencil_key != gd->z_stencil_key) { gd->z_stencil_key = z_stencil_key; sceGxmSetFrontDepthFunc(gxm, r->test_id ? SCE_GXM_DEPTH_FUNC_LESS : SCE_GXM_DEPTH_FUNC_ALWAYS); sceGxmSetFrontDepthWriteEnable(gxm, r->set_id ? SCE_GXM_DEPTH_WRITE_ENABLED : SCE_GXM_DEPTH_WRITE_DISABLED); sceGxmSetFrontStencilFunc(gxm, r->stencil_test ? SCE_GXM_STENCIL_FUNC_EQUAL : SCE_GXM_STENCIL_FUNC_ALWAYS, SCE_GXM_STENCIL_OP_KEEP, SCE_GXM_STENCIL_OP_KEEP, SCE_GXM_STENCIL_OP_REPLACE, r->stencil_test, r->stencil_set); } return true; } static RADINLINE bool set_renderstate(const GDrawRenderState * RADRESTRICT r, GDrawStats *stats) { if (!r->identical_state) return set_renderstate_full(r, stats); else return true; } //////////////////////////////////////////////////////////////////////// // // Draw triangles with a given renderstate // static const U32 vfmt_stride_bytes[ASSERT_COUNT(GDRAW_vformat__basic_count, 3)] = { 8, // GDRAW_vformat_v2 16, // GDRAW_vformat_v2aa 16, // GDRAW_vformat_v2tc2 }; #ifdef GDRAW_DEBUG static GDrawHandle *check_resource(void *ptr) { GDrawHandle *h = (GDrawHandle *)ptr; // This is our memory management invariant for tilers. assert(h->state == GDRAW_HANDLE_STATE_locked || h->state == GDRAW_HANDLE_STATE_pinned || h->state == GDRAW_HANDLE_STATE_user_owned); return h; } #else #define check_resource(ptr) ((GDrawHandle *)(ptr)) #endif static RADINLINE void fence_resources(void *r1, void *r2=NULL, void *r3=NULL) { GDrawFence fence = get_next_fence(); if (r1) check_resource(r1)->fence = fence; if (r2) check_resource(r2)->fence = fence; if (r3) check_resource(r3)->fence = fence; } static RADINLINE void draw_ind_tris_u16(SceGxmContext *gxm, S32 vfmt, const void *verts, const void *inds, S32 num_inds) { sceGxmSetVertexStream(gxm, 0, verts); sceGxmDraw(gxm, SCE_GXM_PRIMITIVE_TRIANGLES, SCE_GXM_INDEX_FORMAT_U16, inds, num_inds); } static void RADLINK gdraw_DrawIndexedTriangles(GDrawRenderState *r, GDrawPrimitive *p, GDrawVertexBuffer *buf, GDrawStats *stats) { SceGxmContext *gxm = gdraw->gxm; GDrawHandle *vb = (GDrawHandle *) buf; S32 vfmt = p->vertex_format; // AP only round the coords for type 2 vertex format (text) if( p->vertex_format == 2 ) { r->o2w->trans[0] = (int) r->o2w->trans[0]; r->o2w->trans[1] = (int) r->o2w->trans[1]; } assert(vfmt < GDRAW_vformat__basic_count); U32 stride = vfmt_stride_bytes[vfmt]; if (!set_renderstate(r, stats)) return; set_vertex_program(gdraw->vp[vfmt]); // do we have transfers we need to flush before we draw? if ((gdraw->outstanding_transfers & gdraw->draw_transfer_flush_mask) != 0) gdraw_gpu_wait_for_transfer_completion(); if (vb) draw_ind_tris_u16(gxm, vfmt, (U8 *)vb->handle.vbuf.verts + (UINTa)p->vertices, (U8 *)vb->handle.vbuf.inds + (UINTa)p->indices, p->num_indices); else if (p->indices) { U32 vbytes = p->num_vertices * stride; U32 ibytes = p->num_indices * sizeof(U16); U8 *buf = (U8 *)alloc_dynamic(vbytes + ibytes, sizeof(U32)); if (!buf) return; memcpy(buf, p->vertices, vbytes); memcpy(buf + vbytes, p->indices, ibytes); draw_ind_tris_u16(gxm, vfmt, buf, buf + vbytes, p->num_indices); } else { // dynamic quads assert(p->num_vertices % 4 == 0); U32 num_bytes = (U32)p->num_vertices * stride; U8 *buf = (U8 *)alloc_dynamic(num_bytes, sizeof(U32)); if (!buf) return; memcpy(buf, p->vertices, num_bytes); S32 pos = 0; while (pos < p->num_vertices) { S32 vert_count = RR_MIN(p->num_vertices - pos, QUAD_IB_COUNT * 4); draw_ind_tris_u16(gxm, vfmt, buf + pos*stride, gdraw->quad_ib, (vert_count >> 2) * 6); pos += vert_count; } } fence_resources(vb, r->tex[0], r->tex[1]); stats->nonzero_flags |= GDRAW_STATS_batches; stats->num_batches += 1; stats->drawn_indices += p->num_indices; stats->drawn_vertices += p->num_vertices; } /////////////////////////////////////////////////////////////////////// // // Flash 8 filter effects // static void do_screen_quad(gswf_recti *s, const F32 *tc, F32 z, GDrawStats *stats) { static const F32 worldv[2][4] = { { 1.0f, 0.0f, 0.0f, 0.0f }, { 0.0f, 1.0f, 0.0f, 0.0f }, }; set_vertex_program(gdraw->vp[GDRAW_vformat_v2tc2]); VertexVars *vvars = (VertexVars *)alloc_and_set_vertex_uniforms(gdraw->gxm, 0, sizeof(VertexVars)); if (!vvars) return; float32x4_t world0 = vld1q_f32(worldv[0]); float32x4_t world1 = vld1q_f32(worldv[1]); float32x4_t zero = vdupq_n_f32(0.0f); float32x4_t viewproj = vld1q_f32(gdraw->projection); world0 = vsetq_lane_f32(z, world0, 2); vst1q_f32(vvars->world[0], world0); vst1q_f32(vvars->world[1], world1); vst1q_f32(vvars->x_offs, zero); vst1q_f32(vvars->texgen_s, zero); vst1q_f32(vvars->texgen_t, zero); vst1q_f32(vvars->viewproj, viewproj); gswf_vertex_xyst * RADRESTRICT v = (gswf_vertex_xyst *)alloc_dynamic(4 * sizeof(gswf_vertex_xyst), 4); if (!v) return; F32 px0 = (F32) s->x0, py0 = (F32) s->y0, px1 = (F32) s->x1, py1 = (F32) s->y1; v[0].x = px0; v[0].y = py0; v[0].s = tc[0]; v[0].t = tc[1]; v[1].x = px1; v[1].y = py0; v[1].s = tc[2]; v[1].t = tc[1]; v[2].x = px1; v[2].y = py1; v[2].s = tc[2]; v[2].t = tc[3]; v[3].x = px0; v[3].y = py1; v[3].s = tc[0]; v[3].t = tc[3]; sceGxmSetVertexStream(gdraw->gxm, 0, v); sceGxmDraw(gdraw->gxm, SCE_GXM_PRIMITIVE_TRIANGLES, SCE_GXM_INDEX_FORMAT_U16, gdraw->quad_ib, 6); } static void RADLINK gdraw_FilterQuad(GDrawRenderState *r, S32 x0, S32 y0, S32 x1, S32 y1, GDrawStats *stats) { F32 tc[4]; gswf_recti s; // clip to tile boundaries s.x0 = RR_MAX(x0, gdraw->tx0p); s.y0 = RR_MAX(y0, gdraw->ty0p); s.x1 = RR_MIN(x1, gdraw->tx0p + gdraw->tpw); s.y1 = RR_MIN(y1, gdraw->ty0p + gdraw->tph); if (s.x1 < s.x0 || s.y1 < s.y0) return; // prepare for drawing tc[0] = (s.x0 - gdraw->tx0p) / (F32) gdraw->screen_bounds.x1; tc[1] = (s.y0 - gdraw->ty0p) / (F32) gdraw->screen_bounds.y1; tc[2] = (s.x1 - gdraw->tx0p) / (F32) gdraw->screen_bounds.x1; tc[3] = (s.y1 - gdraw->ty0p) / (F32) gdraw->screen_bounds.y1; // actual filter effects and special blends aren't supported on PSP2. if (r->blend_mode == GDRAW_BLEND_filter || r->blend_mode == GDRAW_BLEND_special) { IggyGDrawSendWarning(NULL, "GDraw no filter or special blend support on PSP2"); // just don't do anything. } else { // just a plain quad. if (!set_renderstate(r, stats)) return; do_screen_quad(&s, tc, 0.0f, stats); fence_resources(r->tex[0], r->tex[1]); } } //////////////////////////////////////////////////////////////////////// // // Shaders and state initialization // #include "gdraw_psp2_shaders.inl" static bool gxm_check(SceGxmErrorCode err) { if (err != SCE_OK) IggyGDrawSendWarning(NULL, "GXM error"); return err == SCE_OK; } static bool register_shader(SceGxmShaderPatcher *patcher, ShaderCode *shader) { if (!shader->blob) return SCE_OK; bool ok = gxm_check(sceGxmShaderPatcherRegisterProgram(patcher, (const SceGxmProgram *)shader->blob, &shader->id)); shader->registered = ok; return ok; } static void unregister_shader(SceGxmShaderPatcher *patcher, ShaderCode *shader) { if (shader->registered) { sceGxmShaderPatcherUnregisterProgram(patcher, shader->id); shader->registered = false; } } static bool register_and_create_vertex_prog(SceGxmVertexProgram **out_prog, SceGxmShaderPatcher *patcher, ShaderCode *shader, U32 attr_bytes) { SceGxmVertexAttribute attr; SceGxmVertexStream stream; if (!register_shader(patcher, shader)) return NULL; *out_prog = NULL; if (attr_bytes) { attr.streamIndex = 0; attr.offset = 0; attr.format = SCE_GXM_ATTRIBUTE_FORMAT_UNTYPED; attr.componentCount = attr_bytes / sizeof(U32); attr.regIndex = 0; stream.stride = attr_bytes; stream.indexSource = SCE_GXM_INDEX_SOURCE_INDEX_16BIT; return gxm_check(sceGxmShaderPatcherCreateVertexProgram(patcher, shader->id, &attr, 1, &stream, 1, out_prog)); } else return gxm_check(sceGxmShaderPatcherCreateVertexProgram(patcher, shader->id, NULL, 0, NULL, 0, out_prog)); } static void destroy_vertex_prog(SceGxmShaderPatcher *patcher, SceGxmVertexProgram *prog) { if (prog) sceGxmShaderPatcherReleaseVertexProgram(patcher, prog); } static bool create_fragment_prog(SceGxmFragmentProgram **out_prog, SceGxmShaderPatcher *patcher, ShaderCode *shader, const SceGxmBlendInfo *blend, SceGxmOutputRegisterFormat out_fmt) { *out_prog = NULL; return gxm_check(sceGxmShaderPatcherCreateFragmentProgram(patcher, shader->id, out_fmt, SCE_GXM_MULTISAMPLE_NONE, blend, NULL, out_prog)); } static void destroy_fragment_prog(SceGxmShaderPatcher *patcher, SceGxmFragmentProgram *prog) { if (prog) sceGxmShaderPatcherReleaseFragmentProgram(patcher, prog); } static bool create_all_programs(SceGxmOutputRegisterFormat reg_format) { SceGxmShaderPatcher *patcher = gdraw->patcher; // blend states static const SceGxmBlendInfo blends[ASSERT_COUNT(GDRAW_CBLEND__count, 4)] = { // GDRAW_CBLEND_none { SCE_GXM_COLOR_MASK_ALL, SCE_GXM_BLEND_FUNC_ADD, SCE_GXM_BLEND_FUNC_ADD, SCE_GXM_BLEND_FACTOR_ONE, SCE_GXM_BLEND_FACTOR_ZERO, SCE_GXM_BLEND_FACTOR_ONE, SCE_GXM_BLEND_FACTOR_ZERO }, // GDRAW_CBLEND_alpha { SCE_GXM_COLOR_MASK_ALL, SCE_GXM_BLEND_FUNC_ADD, SCE_GXM_BLEND_FUNC_ADD, SCE_GXM_BLEND_FACTOR_ONE, SCE_GXM_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, SCE_GXM_BLEND_FACTOR_ONE, SCE_GXM_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA }, // GDRAW_CBLEND_add { SCE_GXM_COLOR_MASK_ALL, SCE_GXM_BLEND_FUNC_ADD, SCE_GXM_BLEND_FUNC_ADD, SCE_GXM_BLEND_FACTOR_ONE, SCE_GXM_BLEND_FACTOR_ONE, SCE_GXM_BLEND_FACTOR_ONE, SCE_GXM_BLEND_FACTOR_ONE }, // GDRAW_CBLEND_nowrite { SCE_GXM_COLOR_MASK_NONE, SCE_GXM_BLEND_FUNC_ADD, SCE_GXM_BLEND_FUNC_ADD, SCE_GXM_BLEND_FACTOR_ONE, SCE_GXM_BLEND_FACTOR_ZERO, SCE_GXM_BLEND_FACTOR_ONE, SCE_GXM_BLEND_FACTOR_ZERO }, }; // vertex shaders for (int i=0; i < GDRAW_vformat__basic_count; i++) { if (!register_and_create_vertex_prog(&gdraw->vp[i], patcher, vshader_vspsp2_arr + i, vfmt_stride_bytes[i])) return false; } if (!register_and_create_vertex_prog(&gdraw->mask_vp, patcher, vshader_vspsp2_mask_arr, 0)) return false; // fragment shaders for (int i=0; i < GDRAW_TEXTURE__count; i++) { for (int j=0; j < 3; j++) { ShaderCode *sh = pshader_basic_arr + i*3 + j; if (!register_shader(patcher, sh)) return false; for (int k=0; k < GDRAW_CBLEND__count; k++) if (!create_fragment_prog(&gdraw->main_fp[i][j][k], patcher, sh, &blends[k], reg_format)) return false; } } if (!register_shader(patcher, pshader_manual_clear_arr) || !create_fragment_prog(&gdraw->clear_fp, patcher, pshader_manual_clear_arr, NULL, reg_format)) return false; gdraw->mask_update_fp = NULL; return gxm_check(sceGxmShaderPatcherCreateMaskUpdateFragmentProgram(patcher, &gdraw->mask_update_fp)); } static void destroy_all_programs() { SceGxmShaderPatcher *patcher = gdraw->patcher; // release all programs for (int i=0; i < GDRAW_vformat__basic_count; i++) destroy_vertex_prog(patcher, gdraw->vp[i]); destroy_vertex_prog(patcher, gdraw->mask_vp); for (int i=0; i < GDRAW_TEXTURE__count * 3 * GDRAW_CBLEND__count; i++) destroy_fragment_prog(patcher, gdraw->main_fp[0][0][i]); destroy_fragment_prog(patcher, gdraw->clear_fp); sceGxmShaderPatcherReleaseFragmentProgram(patcher, gdraw->mask_update_fp); // unregister shaders for (int i=0; i < GDRAW_vformat__basic_count; i++) unregister_shader(patcher, vshader_vspsp2_arr + i); for (int i=0; i < GDRAW_TEXTURE__count*3; i++) unregister_shader(patcher, pshader_basic_arr + i); unregister_shader(patcher, pshader_manual_clear_arr); } typedef struct { S32 num_handles; S32 num_bytes; void *ptr; } GDrawResourceLimit; // Resource limits used by GDraw. Change these using SetResouceLimits! static GDrawResourceLimit gdraw_limits[GDRAW_PSP2_RESOURCE__count]; static GDrawHandleCache *make_handle_cache(gdraw_psp2_resourcetype type, U32 align, rrbool use_twopool) { S32 num_handles = gdraw_limits[type].num_handles; S32 one_pool_bytes = gdraw_limits[type].num_bytes; U32 cache_size = sizeof(GDrawHandleCache) + (num_handles - 1) * sizeof(GDrawHandle); bool is_vertex = (type == GDRAW_PSP2_RESOURCE_vertexbuffer); U32 header_size = num_handles * (is_vertex ? 0 : sizeof(SceGxmTexture)); GDrawHandleCache *cache; if (use_twopool) one_pool_bytes = align_down(one_pool_bytes / 2, align); if (one_pool_bytes < (S32)align) return NULL; cache = (GDrawHandleCache *) IggyGDrawMalloc(cache_size + header_size); if (cache) { gdraw_HandleCacheInit(cache, num_handles, one_pool_bytes); cache->is_vertex = is_vertex; // set up resource headers void *header_start = (U8 *) cache + cache_size; if (!is_vertex) { SceGxmTexture *headers = (SceGxmTexture *) header_start; for (S32 i=0; i < num_handles; i++) cache->handle[i].handle.tex.gxm = &headers[i]; } // set up allocators cache->alloc = gfxalloc_create(gdraw_limits[type].ptr, one_pool_bytes, align, num_handles); if (!cache->alloc) { IggyGDrawFree(cache); return NULL; } if (use_twopool) { cache->alloc_other = gfxalloc_create((U8 *)gdraw_limits[type].ptr + one_pool_bytes, one_pool_bytes, align, num_handles); if (!cache->alloc_other) { IggyGDrawFree(cache->alloc); IggyGDrawFree(cache); return NULL; } // two dummy copies to make sure we have gpu read/write access assert(align >= GPU_MEMCPY_ALIGN); U8 *mem_begin = (U8 *)gdraw_limits[type].ptr; U8 *mem_near_end = (U8 *)gdraw_limits[type].ptr + 2*one_pool_bytes - GPU_MEMCPY_ALIGN; // reads near begin, writes near end gdraw_gpu_memcpy(cache, mem_near_end, mem_begin, GPU_MEMCPY_ALIGN); // reads near end, writes near begin gdraw_gpu_memcpy(cache, mem_begin, mem_near_end, GPU_MEMCPY_ALIGN); gdraw_gpu_wait_for_transfer_completion(); } } return cache; } static void free_handle_cache(GDrawHandleCache *c) { if (c) { if (c->alloc) IggyGDrawFree(c->alloc); if (c->alloc_other) IggyGDrawFree(c->alloc_other); IggyGDrawFree(c); } } void gdraw_psp2_InitDynamicBuffer(gdraw_psp2_dynamic_buffer *buf, void *ptr, U32 num_bytes) { memset(buf, 0, sizeof(*buf)); buf->start = ptr; buf->size_in_bytes = num_bytes; } void gdraw_psp2_WaitForDynamicBufferIdle(gdraw_psp2_dynamic_buffer *buf) { GDrawFence fence; fence.value = buf->sync; wait_on_fence(fence); } int gdraw_psp2_SetResourceMemory(gdraw_psp2_resourcetype type, S32 num_handles, void *ptr, S32 num_bytes) { GDrawStats stats={0}; assert(type >= GDRAW_PSP2_RESOURCE_texture && type < GDRAW_PSP2_RESOURCE__count); assert(num_handles >= 0); assert(num_bytes >= 0); if (!num_handles) num_handles = 1; switch (type) { case GDRAW_PSP2_RESOURCE_texture: make_pool_aligned(&ptr, &num_bytes, GDRAW_PSP2_TEXTURE_ALIGNMENT); break; case GDRAW_PSP2_RESOURCE_vertexbuffer: make_pool_aligned(&ptr, &num_bytes, GDRAW_PSP2_VERTEXBUFFER_ALIGNMENT); break; default: break; } gdraw_limits[type].num_handles = num_handles; gdraw_limits[type].num_bytes = num_bytes; gdraw_limits[type].ptr = ptr; // if no gdraw context created, there's nothing to worry about if (!gdraw) return 1; // make sure GPU is done first (assuming we're in a state where we can dispatch commands) assert(!is_fence_pending(gdraw->scene_end_fence)); // you may not call this while GPU is still busy with Iggy command buffers! if (gdraw->texturecache) gdraw_res_reap(gdraw->texturecache, &stats); if (gdraw->vbufcache) gdraw_res_reap(gdraw->vbufcache, &stats); // in theory we can now check that the given cache is really empty at this point // resize the appropriate pool switch (type) { case GDRAW_PSP2_RESOURCE_texture: free_handle_cache(gdraw->texturecache); gdraw->texturecache = make_handle_cache(GDRAW_PSP2_RESOURCE_texture, GDRAW_PSP2_TEXTURE_ALIGNMENT, true); return gdraw->texturecache != NULL; case GDRAW_PSP2_RESOURCE_vertexbuffer: free_handle_cache(gdraw->vbufcache); gdraw->vbufcache = make_handle_cache(GDRAW_PSP2_RESOURCE_vertexbuffer, GDRAW_PSP2_VERTEXBUFFER_ALIGNMENT, true); return gdraw->vbufcache != NULL; default: return 0; } } void gdraw_psp2_ResetAllResourceMemory() { gdraw_psp2_SetResourceMemory(GDRAW_PSP2_RESOURCE_texture, 0, NULL, 0); gdraw_psp2_SetResourceMemory(GDRAW_PSP2_RESOURCE_vertexbuffer, 0, NULL, 0); } GDrawFunctions *gdraw_psp2_CreateContext(SceGxmShaderPatcher *shader_patcher, void *context_mem, volatile U32 *notification, SceGxmOutputRegisterFormat reg_format) { // mask index buffer: // // 0-----------------------3 y0 // | \5---------------6 / | y1 // | | | | // | | | | // | 9---------------a | y2 // | / \ | // c-----------------------f y3 // // x0 x1 x2 x3 static const U16 mask_ib_data[5*2] = { // tri strip 0,5, 3,6, 15,10, 12,9, 0,5, }; gdraw = (GDraw *) IggyGDrawMalloc(sizeof(*gdraw)); if (!gdraw) return NULL; memset(gdraw, 0, sizeof(*gdraw)); // context shared memory gdraw_arena_init(&gdraw->context_arena, context_mem, GDRAW_PSP2_CONTEXT_MEM_SIZE); // notifications *notification = 0; gdraw->fence_label = notification; gdraw->next_fence_index = 1; gdraw->scene_end_fence.value = 0; // shader patcher gdraw->patcher = shader_patcher; // set up memory for all resource types for (int i=0; i < GDRAW_PSP2_RESOURCE__count; i++) gdraw_psp2_SetResourceMemory((gdraw_psp2_resourcetype) i, gdraw_limits[i].num_handles, gdraw_limits[i].ptr, gdraw_limits[i].num_bytes); // shaders and state gdraw->quad_ib = (U16 *)gdraw_arena_alloc(&gdraw->context_arena, QUAD_IB_COUNT * 6 * sizeof(U16), sizeof(U32)); gdraw->mask_ib = (U16 *)gdraw_arena_alloc(&gdraw->context_arena, sizeof(mask_ib_data), sizeof(U32)); if (!gdraw->quad_ib || !gdraw->mask_ib || !create_all_programs(reg_format)) { gdraw_psp2_DestroyContext(); return NULL; } // init quad index buffer for (int i=0; i < QUAD_IB_COUNT; i++) { U16 *out_ind = gdraw->quad_ib + i*6; U16 base = (U16)(i * 4); out_ind[0] = base + 0; out_ind[1] = base + 1; out_ind[2] = base + 2; out_ind[3] = base + 0; out_ind[4] = base + 2; out_ind[5] = base + 3; } // mask draw (can only alloc this here since we need mask_vp) gdraw->mask_draw_gpu = gdraw_arena_alloc(&gdraw->context_arena, sceGxmGetPrecomputedDrawSize(gdraw->mask_vp), SCE_GXM_PRECOMPUTED_ALIGNMENT); if (!gdraw->mask_draw_gpu) { gdraw_psp2_DestroyContext(); return NULL; } memcpy(gdraw->mask_ib, mask_ib_data, sizeof(mask_ib_data)); sceGxmPrecomputedDrawInit(&gdraw->mask_draw, gdraw->mask_vp, gdraw->mask_draw_gpu); sceGxmPrecomputedDrawSetParams(&gdraw->mask_draw, SCE_GXM_PRIMITIVE_TRIANGLE_STRIP, SCE_GXM_INDEX_FORMAT_U16, gdraw->mask_ib, 5*2); // API gdraw_funcs.SetViewSizeAndWorldScale = gdraw_SetViewSizeAndWorldScale; gdraw_funcs.GetInfo = gdraw_GetInfo; gdraw_funcs.DescribeTexture = gdraw_DescribeTexture; gdraw_funcs.DescribeVertexBuffer = gdraw_DescribeVertexBuffer; gdraw_funcs.RenderingBegin = gdraw_RenderingBegin; gdraw_funcs.RenderingEnd = gdraw_RenderingEnd; gdraw_funcs.RenderTileBegin = gdraw_RenderTileBegin; gdraw_funcs.RenderTileEnd = gdraw_RenderTileEnd; gdraw_funcs.TextureDrawBufferBegin = gdraw_TextureDrawBufferBegin; gdraw_funcs.TextureDrawBufferEnd = gdraw_TextureDrawBufferEnd; gdraw_funcs.DrawIndexedTriangles = gdraw_DrawIndexedTriangles; gdraw_funcs.FilterQuad = gdraw_FilterQuad; gdraw_funcs.SetAntialiasTexture = gdraw_SetAntialiasTexture; gdraw_funcs.ClearStencilBits = gdraw_ClearStencilBits; gdraw_funcs.ClearID = gdraw_ClearID; gdraw_funcs.MakeTextureBegin = gdraw_MakeTextureBegin; gdraw_funcs.MakeTextureMore = gdraw_MakeTextureMore; gdraw_funcs.MakeTextureEnd = gdraw_MakeTextureEnd; gdraw_funcs.UpdateTextureBegin = gdraw_UpdateTextureBegin; gdraw_funcs.UpdateTextureRect = gdraw_UpdateTextureRect; gdraw_funcs.UpdateTextureEnd = gdraw_UpdateTextureEnd; gdraw_funcs.FreeTexture = gdraw_FreeTexture; gdraw_funcs.TryToLockTexture = gdraw_TryToLockTexture; gdraw_funcs.MakeVertexBufferBegin = gdraw_MakeVertexBufferBegin; gdraw_funcs.MakeVertexBufferMore = gdraw_MakeVertexBufferMore; gdraw_funcs.MakeVertexBufferEnd = gdraw_MakeVertexBufferEnd; gdraw_funcs.TryToLockVertexBuffer = gdraw_TryLockVertexBuffer; gdraw_funcs.FreeVertexBuffer = gdraw_FreeVertexBuffer; gdraw_funcs.MakeTextureFromResource = (gdraw_make_texture_from_resource *) gdraw_psp2_MakeTextureFromResource; gdraw_funcs.FreeTextureFromResource = gdraw_psp2_DestroyTextureFromResource; gdraw_funcs.UnlockHandles = gdraw_UnlockHandles; gdraw_funcs.SetTextureUniqueID = gdraw_SetTextureUniqueID; return &gdraw_funcs; } void gdraw_psp2_DestroyContext(void) { if (gdraw) { GDrawStats stats; memset(&stats, 0, sizeof(stats)); if (gdraw->texturecache) gdraw_res_flush(gdraw->texturecache, &stats); if (gdraw->vbufcache) gdraw_res_flush(gdraw->vbufcache, &stats); // make sure the GPU is done first assert(!is_fence_pending(gdraw->scene_end_fence)); free_handle_cache(gdraw->texturecache); free_handle_cache(gdraw->vbufcache); destroy_all_programs(); IggyGDrawFree(gdraw); gdraw = NULL; } } void RADLINK gdraw_psp2_BeginCustomDraw(IggyCustomDrawCallbackRegion *region, float matrix[16]) { clear_renderstate(); gdraw_GetObjectSpaceMatrix(matrix, region->o2w, gdraw->projection, 0.0f, 0); } void RADLINK gdraw_psp2_CalculateCustomDraw_4J(IggyCustomDrawCallbackRegion * region, F32 mat[16]) { gdraw_GetObjectSpaceMatrix(mat, region->o2w, gdraw->projection, 0.0f, 0); } void RADLINK gdraw_psp2_EndCustomDraw(IggyCustomDrawCallbackRegion *region) { set_common_renderstate(); } GDrawTexture * RADLINK gdraw_psp2_MakeTextureFromResource(U8 *file_in_memory, S32 len, IggyFileTexturePSP2 *tex) { SceGxmErrorCode (*init_func)(SceGxmTexture *texture, const void *data, SceGxmTextureFormat texFormat, uint32_t width, uint32_t height, uint32_t mipCount) = NULL; switch (tex->texture.type) { case SCE_GXM_TEXTURE_SWIZZLED: init_func = sceGxmTextureInitSwizzled; break; case SCE_GXM_TEXTURE_LINEAR: init_func = sceGxmTextureInitLinear; break; case SCE_GXM_TEXTURE_TILED: init_func = sceGxmTextureInitTiled; break; case SCE_GXM_TEXTURE_SWIZZLED_ARBITRARY: init_func = sceGxmTextureInitSwizzledArbitrary; break; } if (!init_func) { IggyGDrawSendWarning(NULL, "Unsupported texture type in MakeTextureFromResource"); return NULL; } SceGxmTexture gxm; SceGxmErrorCode err = init_func(&gxm, file_in_memory + tex->file_offset, (SceGxmTextureFormat)tex->texture.format, tex->texture.width, tex->texture.height, tex->texture.mip_count); if (err != SCE_OK) { IggyGDrawSendWarning(NULL, "Texture init failed in MakeTextureFromResource (bad data?)"); return NULL; } return gdraw_psp2_WrappedTextureCreate(&gxm); } extern void RADLINK gdraw_psp2_DestroyTextureFromResource(GDrawTexture *tex) { gdraw_psp2_WrappedTextureDestroy(tex); }