#include "stdafx.h" // gdraw_orbis.cpp - author: Fabian Giesen - copyright 2012 RAD Game Tools // // This implements the Iggy graphics driver layer for Orbis. // GDraw consists of several components that interact fairly loosely with each other; // e.g. the resource management, drawing and filtering parts are all fairly independent // of each other. If you want to modify some aspect of GDraw - say the texture allocation // logic - your best bet is usually to just look for one of the related entry points, // e.g. MakeTextureBegin, and take it from there. There's a bunch of code in this file, // but most of it isn't really complicated. The bits that are somewhat tricky have a more // detailed explanation at the top of the relevant section. #include #include #include #include "iggy.h" #include "gdraw.h" #include #include #include #include "gdraw_orbis.h" using namespace sce; typedef union { struct { Gnm::Texture *gnm; void *gnm_ptr; } tex; struct { void *verts; void *inds; } vbuf; } GDrawNativeHandle; #define GDRAW_MANAGE_MEM #define GDRAW_DEFRAGMENT #define GDRAW_NO_STREAMING_MIPGEN #define GDRAW_MIN_FREE_AMOUNT (64*1024) // always try to free at least this many bytes when throwing out old textures #define GDRAW_MAYBE_UNUSED __attribute__((unused)) #include "gdraw_shared.inl" // max rendertarget stack depth. this depends on the extent to which you // use filters and non-standard blend modes, and how nested they are. #define MAX_RENDER_STACK_DEPTH 8 // Iggy is hardcoded to a limit of 16... probably 1-3 is realistic! #define MAX_SAMPLERS 3 #define MAX_ATTRS 2 // max number of attrs read by a vertex shader #define AATEX_SAMPLER 7 // sampler that aa_tex gets set in #define ASSERT_COUNT(a,b) ((a) == (b) ? (b) : -1) #define MAX_TEXTURE2D_DIM 16384 // from GPU walkthrough #define MAX_AATEX_WIDTH 64 static GDrawFunctions gdraw_funcs; // render target state typedef struct { GDrawHandle *color_buffer; S32 base_x, base_y, width, height; rrbool cached; rrbool needs_clear_eliminate; U32 clear_col[2]; } GDrawFramebufferState; struct ShaderCode { void *blob; union { void *desc; Gnmx::ShaderCommonData *common; Gnmx::PsShader *ps; Gnmx::VsShader *vs; Gnmx::CsShader *cs; }; }; /////////////////////////////////////////////////////////////////////////////// // // GDraw data structure // // // This is the primary rendering abstraction, which hides all // the platform-specific rendering behavior from Iggy. It is // full of platform-specific graphics state, and also general // graphics state so that it doesn't have to callback into Iggy // to get at that graphics state. struct GDraw { // 16-byte aligned! F32 projection[4]; // always 2D scale+2D translate. first two are scale, last two are translate. // scale factor converting worldspace to viewspace <0,0>.. F32 world_to_pixel[2]; // graphics context Gnmx::GfxContext *gfxc; // cached state U32 scissor_state; // ~0 if unknown, otherwise 0 or 1 int blend_mode; // active blend mode (-1 if unknown) int writes_masked; // are color writes masked or not? (-1 if unknown) U32 z_stencil_key; // field built from z/stencil test flags. 0 = no z/stencil test, ~0 is used for "unknown state" GDrawTexture *active_tex[MAX_SAMPLERS]; ShaderCode *cur_ps; // pixel shader base pointers ShaderCode *basic_ps[GDRAW_TEXTURE__count]; // render targets Gnm::RenderTarget main_colorbuffer; Gnm::DepthRenderTarget main_depthbuffer; GDrawHandleCache rendertargets; GDrawHandle rendertarget_handles[MAX_RENDER_STACK_DEPTH]; // not -1, because we use +1 to initialize Gnm::Texture rendertarget_textures[MAX_RENDER_STACK_DEPTH+1]; gswf_recti rt_valid[MAX_RENDER_STACK_DEPTH+1]; // valid rect for texture clamping // size of our render targets S32 frametex_width, frametex_height; // viewport setting (in pixels) for the current tile S32 vx, vy; S32 fw, fh; // full width/height of virtual display S32 tw, th; // actual width/height of current tile S32 tpw, tph; // width/height of padded version of tile S32 tx0, ty0; S32 tx0p, ty0p; rrbool in_blur; struct { S32 x0, y0, x1, y1; } cview; // current viewport Gnm::Texture aa_tex; Gnm::Buffer pixel_common_zero_cbuf; GDrawArena vidshared_arena; // mainly for shaders // synchronization volatile U64 *label_ptr; U64 next_fence_index; // render target stack described above for 'temporary' rendering GDrawFramebufferState frame[MAX_RENDER_STACK_DEPTH]; GDrawFramebufferState *cur; // texture and vertex buffer pools GDrawHandleCache *texturecache; GDrawHandleCache *vbufcache; // render target storage Gnm::RenderTarget rt_colorbuffer; Gnm::SizeAlign rt_colorbuffer_sa; GDrawArena rt_arena; // staging buffer GDrawArena staging; gdraw_orbis_staging_stats staging_stats; // upload temp texture Gnm::Texture upload_tex; // precompiled state Gnm::Sampler sampler_state[2][GDRAW_WRAP__count]; // [nearest][wrap] Gnm::DepthStencilControl depth_stencil_control[2][2][2]; // [set_id][test_id][stencil_enable] Gnm::BlendControl blend_control[GDRAW_BLEND__count]; // pixel shaders ShaderCode main_ps[GDRAW_TEXTURE__count][3]; ShaderCode exceptional_blend[GDRAW_BLENDSPECIAL__count]; ShaderCode filter_ps[2][16]; ShaderCode blur_ps[MAX_TAPS+1]; ShaderCode colormatrix; ShaderCode clear_ps; // compute shaders ShaderCode texupload_cs; ShaderCode memset_cs; ShaderCode defragment_cs; ShaderCode mipgen_cs; // vertex formats struct VFormatDesc { U32 stride; U32 num_attribs; Gnm::DataFormat formats[MAX_ATTRS]; U32 vb_offs[MAX_ATTRS]; }; VFormatDesc vfmt[GDRAW_vformat__count]; // vertex shader ShaderCode vs; // for bookkeeping GDrawFence tile_end_fence; // antialias texture upload cache bool aatex_new; U8 aatex_data[MAX_AATEX_WIDTH * 4]; }; static GDraw *gdraw; static const F32 four_zeros[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; //////////////////////////////////////////////////////////////////////// // // Synchronization, pointer wrangling and command buffer management // static RADINLINE GDrawFence get_next_fence() { GDrawFence fence; fence.value = gdraw->next_fence_index; return fence; } static RADINLINE rrbool is_fence_pending(GDrawFence fence) { return gdraw->label_ptr[0] < fence.value; } static GDrawFence put_fence() { GDrawFence fence = { gdraw->next_fence_index++ }; gdraw->gfxc->writeImmediateAtEndOfPipe(Gnm::kEopFlushCbDbCaches, (void *)gdraw->label_ptr, fence.value, Gnm::kCacheActionNone); return fence; } static void *insert_cb_label(); static void wait_on_fence(GDrawFence fence) { // we don't actually wait here, current Orbis GDraw memory management is // non-blocking by design. but the resource manager emits these when it's // about to free (and then reuse) a resource that was previously in use, // so when we get the call, make sure to finish shading first, because // the following commands are likely to stomp over (part of) the resource // pools. if (is_fence_pending(fence)) { void *label = insert_cb_label(); gdraw->gfxc->writeAtEndOfShader(Gnm::kEosPsDone, label, 1); gdraw->gfxc->waitOnAddress(label, ~0, Gnm::kWaitCompareFuncEqual, 1); } } extern "C" void gdraw_ps4_wait(U64) { // stub for Iggy - ignored. } static void *insert_cb_label() { U64 *label = (U64 *) gdraw->gfxc->allocateFromCommandBuffer(sizeof(U64), Gnm::kEmbeddedDataAlignment8); *label = 0; return label; } // compute->compute sync (just wait for previous dispatch to finish) static void compute_to_compute_sync() { Gnmx::GfxContext *gfxc = gdraw->gfxc; void *label = insert_cb_label(); gfxc->writeAtEndOfShader(Gnm::kEosCsDone, label, 1); gfxc->waitOnAddress(label, ~0, Gnm::kWaitCompareFuncEqual, 1); } // compute->graphics sync static void compute_to_graphics_sync() { compute_to_compute_sync(); // compute writes made it to L2 but not all CU L1s, so need to wipe L1 before we go on. gdraw->gfxc->flushShaderCachesAndWait(Gnm::kCacheActionInvalidateL1, 0, Gnm::kStallCommandBufferParserDisable); } // render-to-texture sync static RADINLINE void rtt_sync(void *base, U32 size256) { UINTa addr = (UINTa) base; assert((addr & 0xff) == 0); U32 base256 = (U32) (addr >> 8); gdraw->gfxc->waitForGraphicsWrites(base256, size256, Gnm::kWaitTargetSlotCb0, Gnm::kCacheActionWriteBackAndInvalidateL1andL2, Gnm::kExtendedCacheActionFlushAndInvalidateCbCache, Gnm::kStallCommandBufferParserDisable); } //////////////////////////////////////////////////////////////////////// // // Texture/vertex memory defragmentation support code // static void gdraw_gpu_memcpy(GDrawHandleCache *c, void *dst, void *src, U32 num_bytes) { Gnmx::GfxContext *gfxc = gdraw->gfxc; Gnm::Buffer src_buf, dst_buf; U32 num_16b = (num_bytes + 15) / 16; src_buf.initAsDataBuffer(src, Gnm::kDataFormatR32G32B32A32Uint, num_16b); dst_buf.initAsDataBuffer(dst, Gnm::kDataFormatR32G32B32A32Uint, num_16b); src_buf.setResourceMemoryType(Gnm::kResourceMemoryTypeGC); dst_buf.setResourceMemoryType(Gnm::kResourceMemoryTypeGC); gfxc->setBuffers(Gnm::kShaderStageCs, 0, 1, &src_buf); gfxc->setRwBuffers(Gnm::kShaderStageCs, 0, 1, &dst_buf); gfxc->dispatch(1, 1, 1); // need to sync before the next one can start (because of potential overlaps) // no need to flush L1 because defragment doesn't read any data it just wrote. compute_to_compute_sync(); } static void gdraw_defragment_cache(GDrawHandleCache *c, GDrawStats *stats) { if (!gdraw_CanDefragment(c)) return; gdraw->gfxc->setShaderType(Gnm::kShaderTypeCompute); gdraw->gfxc->setCsShader(gdraw->defragment_cs.cs); // actual defragmentation... gdraw_DefragmentMain(c, GDRAW_DEFRAGMENT_may_overlap, stats); // go over all handles and adjust pointers. // pointer adjustment is different between textures and vertex buffers if (!c->is_vertex) { for (S32 i=0; i < c->max_handles; i++) { GDrawHandle *h = &c->handle[i]; if (gdraw_res_is_managed(h)) { Gnm::Texture *tex = h->handle.tex.gnm; tex->setBaseAddress(h->raw_ptr); } } } else { for (S32 i=0; i < c->max_handles; i++) { GDrawHandle *h = &c->handle[i]; if (gdraw_res_is_managed(h)) { SINTa index_offs = (U8 *)h->handle.vbuf.inds - (U8 *)h->handle.vbuf.verts; h->handle.vbuf.verts = h->raw_ptr; h->handle.vbuf.inds = (U8 *)h->raw_ptr + index_offs; } } } // synchronize compute_to_graphics_sync(); gdraw->gfxc->setCsShader(NULL); gdraw->gfxc->setShaderType(Gnm::kShaderTypeGraphics); // don't need to wait till GPU is done since we never access GPU memory from the // CPU, we always go through the staging buffer. } static void api_free_resource(GDrawHandle *r) { if (!r->cache->is_vertex) { for (S32 i=0; i < MAX_SAMPLERS; i++) if (gdraw->active_tex[i] == (GDrawTexture *) r) gdraw->active_tex[i] = NULL; } } static void RADLINK gdraw_UnlockHandles(GDrawStats *stats) { gdraw_HandleCacheUnlockAll(gdraw->texturecache); gdraw_HandleCacheUnlockAll(gdraw->vbufcache); } //////////////////////////////////////////////////////////////////////// // // Various helpers // struct TexUploadPara { U32 dest_pos[2]; U32 size[2]; }; struct MipGenPara { U32 sizeMinus1[2]; }; static bool can_staging_satisfy(U32 size, U32 align) { // determine aligned start addr UINTa start_addr = ((UINTa) gdraw->staging.current + align-1) & ~((UINTa) align - 1); U8 *ptr = (U8 *) start_addr; return ptr + size <= gdraw->staging.end; } static void track_staging_alloc_attempt(U32 size, U32 align) { // TODO: include alignment bytes in tracking! gdraw->staging_stats.allocs_attempted++; gdraw->staging_stats.bytes_attempted += size; gdraw->staging_stats.largest_bytes_attempted = RR_MAX(gdraw->staging_stats.largest_bytes_attempted, size); } static void track_staging_alloc_failed() { if (gdraw->staging_stats.allocs_attempted == gdraw->staging_stats.allocs_succeeded + 1) { // warn the first time we run out of mem IggyGDrawSendWarning(NULL, "GDraw out of staging memory"); } } static void *alloc_staging(U32 size, U32 align) { track_staging_alloc_attempt(size, align); void *ptr = gdraw_arena_alloc(&gdraw->staging, size, align); if (ptr) { gdraw->staging_stats.allocs_succeeded++; gdraw->staging_stats.bytes_succeeded += size; gdraw->staging_stats.largest_bytes_succeeded = RR_MAX(gdraw->staging_stats.largest_bytes_succeeded, size); } else track_staging_alloc_failed(); return ptr; } static void *embed_const_buffer_impl(Gnm::ShaderStage stage, U32 index, U32 size) { Gnm::Buffer buf; void *ptr = gdraw->gfxc->allocateFromCommandBuffer(size, Gnm::kEmbeddedDataAlignment4); buf.initAsConstantBuffer(ptr, size); gdraw->gfxc->setConstantBuffers(stage, index, 1, &buf); return ptr; } template static RADINLINE T *embed_const_buffer(Gnm::ShaderStage stage, U32 index) { return (T *) embed_const_buffer_impl(stage, index, sizeof(T)); } static void upload_begin() { gdraw->gfxc->setShaderType(Gnm::kShaderTypeCompute); gdraw->gfxc->setCsShader(gdraw->texupload_cs.cs); } static void upload_end() { // synchronize compute_to_graphics_sync(); gdraw->gfxc->setShaderType(Gnm::kShaderTypeGraphics); } static void upload_dispatch(const Gnm::Texture &dst_tex, const Gnm::Texture &src_tex, U32 dst_x, U32 dst_y, U32 ncols, U32 nrows) { auto para = embed_const_buffer(Gnm::kShaderStageCs, 0); para->dest_pos[0] = dst_x; para->dest_pos[1] = dst_y; para->size[0] = ncols; para->size[1] = nrows; Gnmx::GfxContext *gfxc = gdraw->gfxc; gfxc->setTextures(Gnm::kShaderStageCs, 0, 1, &src_tex); gfxc->setRwTextures(Gnm::kShaderStageCs, 0, 1, &dst_tex); gfxc->dispatch((ncols + 7) / 8, (nrows + 7) / 8, 1); } static void upload_tex_subrect(Gnm::Texture *dst, U32 dst_mip, U32 dst_x, U32 dst_y, const void *src, U32 pitch, U32 ncols, U32 nrows, U32 bpp) { // set up texture descriptions Gnm::Texture dst_tex = *dst; dst_tex.setMipLevelRange(dst_mip, dst_mip); dst_tex.setResourceMemoryType(Gnm::kResourceMemoryTypeGC); Gnm::Texture src_tex; Gnm::SizeAlign sa = src_tex.initAs2d(ncols, nrows, 1, dst_tex.getDataFormat(), Gnm::kTileModeDisplay_LinearAligned, Gnm::kNumSamples1); // alloc staging buffer for source texture U8 *staging = (U8 *) alloc_staging(sa.m_size, sa.m_align); if (!staging) // if out of staging mem, bail return; src_tex.setBaseAddress(staging); src_tex.setResourceMemoryType(Gnm::kResourceMemoryTypeGC); // copy source data into staging buffer U32 staging_pitch_bytes = src_tex.getPitch() * bpp; U32 copy_bytes = ncols * bpp; for (U32 y=0; y < nrows; y++) memcpy(staging + y * staging_pitch_bytes, (const U8 *) src + y * pitch, copy_bytes); upload_dispatch(dst_tex, src_tex, dst_x, dst_y, ncols, nrows); } static void gpu_compute_memset(void *ptr, U32 value, U32 size_in_bytes) { assert((size_in_bytes & 3) == 0); Gnmx::GfxContext *gfxc = gdraw->gfxc; gfxc->setShaderType(Gnm::kShaderTypeCompute); gfxc->setCsShader(gdraw->memset_cs.cs); auto para = embed_const_buffer(Gnm::kShaderStageCs, 0); *para = value; // we tag resources as SC (system coherent); i.e. no L1, write to L2, // tag as volatile so we can flush it cheaply. Gnm::Buffer buf; if (size_in_bytes >= 16) { U32 nelem = size_in_bytes / 16; buf.initAsDataBuffer(ptr, Gnm::kDataFormatR32G32B32A32Uint, nelem); buf.setResourceMemoryType(Gnm::kResourceMemoryTypeSC); gfxc->setRwBuffers(Gnm::kShaderStageCs, 0, 1, &buf); gfxc->dispatch((nelem + 255) / 256, 1, 1); } if (size_in_bytes & 0xf) { buf.initAsDataBuffer((U8 *)ptr + (size_in_bytes & ~0xf), Gnm::kDataFormatR32Uint, (size_in_bytes & 0xf) / 4); buf.setResourceMemoryType(Gnm::kResourceMemoryTypeSC); gfxc->setRwBuffers(Gnm::kShaderStageCs, 0, 1, &buf); gfxc->dispatch(1, 1, 1); } compute_to_compute_sync(); // write back L2 volatile here to make sure the values reach memory. // this is all necessary because this function is used to clear HTile // buffers which are written by the CB/DB blocks and not accessed // through the regular caches. gfxc->flushShaderCachesAndWait(Gnm::kCacheActionWriteBackL2Volatile, 0, Gnm::kStallCommandBufferParserDisable); gfxc->setShaderType(Gnm::kShaderTypeGraphics); gfxc->setCsShader(NULL); } //////////////////////////////////////////////////////////////////////// // // Texture creation/updating/deletion // GDrawTexture * RADLINK gdraw_orbis_WrappedTextureCreate(Gnm::Texture *tex) { GDrawStats stats = {}; GDrawHandle *p = gdraw_res_alloc_begin(gdraw->texturecache, 0, &stats); p->handle.tex.gnm_ptr = NULL; gdraw_HandleCacheAllocateEnd(p, 0, NULL, GDRAW_HANDLE_STATE_user_owned); gdraw_orbis_WrappedTextureChange((GDrawTexture *) p, tex); return (GDrawTexture *) p; } void RADLINK gdraw_orbis_WrappedTextureChange(GDrawTexture *handle, Gnm::Texture *tex) { GDrawHandle *p = (GDrawHandle *) handle; *p->handle.tex.gnm = *tex; } void RADLINK gdraw_orbis_WrappedTextureDestroy(GDrawTexture *handle) { // 4J Changed sce::Gnmx::GfxContext *oldContext = gdraw->gfxc; gdraw->gfxc = RenderManager.GetCurrentBackBufferContext(); GDrawStats stats = {}; gdraw_res_free((GDrawHandle *) handle, &stats); gdraw->gfxc = oldContext; } static void RADLINK gdraw_SetTextureUniqueID(GDrawTexture *tex, void *old_id, void *new_id) { GDrawHandle *p = (GDrawHandle *) tex; // if this is still the handle it's thought to be, change the owner; // if the owner *doesn't* match, then they're changing a stale handle, so ignore if (p->owner == old_id) p->owner = new_id; } static rrbool RADLINK gdraw_MakeTextureBegin(void *owner, S32 width, S32 height, gdraw_texture_format gformat, U32 flags, GDraw_MakeTexture_ProcessingInfo *p, GDrawStats *stats) { S32 bytes_pixel = 4; GDrawHandle *t = NULL; Gnm::Texture gt; Gnm::SizeAlign sa; Gnm::DataFormat format = Gnm::kDataFormatR8G8B8A8Unorm; if (width > MAX_TEXTURE2D_DIM || height > MAX_TEXTURE2D_DIM) { IggyGDrawSendWarning(NULL, "GDraw %d x %d texture not supported by hardware (dimension limit %d)", width, height, MAX_TEXTURE2D_DIM); return false; } if (gformat == GDRAW_TEXTURE_FORMAT_font) { format = Gnm::kDataFormatR8Unorm; bytes_pixel = 1; } // don't do mipmaps for gradients! if (height == 1) flags &= ~GDRAW_MAKETEXTURE_FLAGS_mipmap; // determine the number of mipmaps to use and size of resulting surface U32 mipmaps = 0; do mipmaps++; while ((flags & GDRAW_MAKETEXTURE_FLAGS_mipmap) && ((width >> mipmaps) || (height >> mipmaps))); sa = gt.initAs2d(width, height, mipmaps, format, (height == 1) ? Gnm::kTileModeDisplay_LinearAligned : Gnm::kTileModeThin_1dThin, Gnm::kNumSamples1); if (gformat == GDRAW_TEXTURE_FORMAT_font) // we want an A8 not R8 texture! gt.setChannelOrder(Gnm::kTextureChannelX, Gnm::kTextureChannelX, Gnm::kTextureChannelX, Gnm::kTextureChannelX); // Make sure we actually satisfy alignment requirements assert(sa.m_align <= GDRAW_ORBIS_TEXTURE_ALIGNMENT); // Determine space requirements for the upload texture and check if there's enough space // do this before gdraw_res_alloc_begin so we don't start freeing resources to make space // only to later discover that we can't proceed due to lack of staging mem anyway. Gnm::SizeAlign sa_up = gdraw->upload_tex.initAs2d(width, height, 1, format, Gnm::kTileModeDisplay_LinearAligned, Gnm::kNumSamples1); if (!can_staging_satisfy(sa_up.m_size, sa_up.m_align)) { track_staging_alloc_attempt(sa_up.m_size, sa_up.m_align); track_staging_alloc_failed(); return false; } // allocate a handle and make room in the cache for this much data U32 size = sa.m_size; t = gdraw_res_alloc_begin(gdraw->texturecache, size, stats); if (!t) return false; t->handle.tex.gnm_ptr = t->raw_ptr; gt.setBaseAddress(t->raw_ptr); *t->handle.tex.gnm = gt; // allocate staging texture (we checked that there was enough space earlier) void *upload_ptr = alloc_staging(sa_up.m_size, sa_up.m_align); if (!upload_ptr) { // not supposed to happen - we checked there was enough space earlier! // but if we ever get here, be sure to handle it properly anyway. assert(0); gdraw_HandleCacheAllocateFail(t); return false; } gdraw->upload_tex.setBaseAddress(upload_ptr); gdraw_HandleCacheAllocateEnd(t, size, owner, (flags & GDRAW_MAKETEXTURE_FLAGS_never_flush) ? GDRAW_HANDLE_STATE_pinned : GDRAW_HANDLE_STATE_locked); stats->nonzero_flags |= GDRAW_STATS_alloc_tex; stats->alloc_tex += 1; stats->alloc_tex_bytes += size; p->texture_type = GDRAW_TEXTURE_TYPE_rgba; p->p0 = t; p->texture_data = (U8 *) upload_ptr; p->num_rows = height; // just send the whole texture at once p->stride_in_bytes = gdraw->upload_tex.getPitch() * bytes_pixel; return true; } static rrbool RADLINK gdraw_MakeTextureMore(GDraw_MakeTexture_ProcessingInfo *p) { return false; // we always let the user write the full texture on the first try } static GDrawTexture * RADLINK gdraw_MakeTextureEnd(GDraw_MakeTexture_ProcessingInfo *p, GDrawStats *stats) { GDrawHandle *t = (GDrawHandle *) p->p0; Gnm::Texture *gnm_tex = t->handle.tex.gnm; Gnmx::GfxContext *gfxc = gdraw->gfxc; U32 width = gnm_tex->getWidth(); U32 height = gnm_tex->getHeight(); // upload the mip data upload_begin(); Gnm::Texture dst_tex = *gnm_tex; dst_tex.setResourceMemoryType(Gnm::kResourceMemoryTypeGC); Gnm::Texture src_tex = dst_tex; dst_tex.setMipLevelRange(0, 0); upload_dispatch(dst_tex, gdraw->upload_tex, 0, 0, width, height); upload_end(); // compute the mip maps gfxc->setShaderType(Gnm::kShaderTypeCompute); gfxc->setCsShader(gdraw->mipgen_cs.cs); for (U32 mip=1; mip <= gnm_tex->getLastMipLevel(); mip++) { U32 mipw = RR_MAX(gnm_tex->getWidth() >> mip, 1); U32 miph = RR_MAX(gnm_tex->getHeight() >> mip, 1); src_tex.setMipLevelRange(mip - 1, mip - 1); dst_tex.setMipLevelRange(mip, mip); auto para = embed_const_buffer(Gnm::kShaderStageCs, 0); para->sizeMinus1[0] = RR_MAX(gnm_tex->getWidth() >> (mip - 1), 1) - 1; para->sizeMinus1[1] = RR_MAX(gnm_tex->getHeight() >> (mip - 1), 1) - 1; gfxc->setTextures(Gnm::kShaderStageCs, 0, 1, &src_tex); gfxc->setRwTextures(Gnm::kShaderStageCs, 0, 1, &dst_tex); gfxc->dispatch((mipw + 7) / 8, (miph + 7) / 8, 1); if (mip < gnm_tex->getLastMipLevel()) compute_to_compute_sync(); else compute_to_graphics_sync(); } gfxc->setShaderType(Gnm::kShaderTypeGraphics); return (GDrawTexture *) p->p0; } static rrbool RADLINK gdraw_UpdateTextureBegin(GDrawTexture *t, void *unique_id, GDrawStats *stats) { if (gdraw_HandleCacheLock((GDrawHandle *) t, unique_id)) { upload_begin(); return true; } else return false; } static void RADLINK gdraw_UpdateTextureRect(GDrawTexture *t, void *unique_id, S32 x, S32 y, S32 stride, S32 w, S32 h, U8 *samples, gdraw_texture_format format) { GDrawHandle *s = (GDrawHandle *) t; Gnm::Texture *tex = s->handle.tex.gnm; U32 bpp = (format == GDRAW_TEXTURE_FORMAT_font) ? 1 : 4; upload_tex_subrect(tex, 0, x, y, samples, stride, w, h, bpp); } static void RADLINK gdraw_UpdateTextureEnd(GDrawTexture *t, void *unique_id, GDrawStats *stats) { GDrawHandle *s = (GDrawHandle *) t; upload_end(); gdraw_HandleCacheUnlock(s); } static void RADLINK gdraw_FreeTexture(GDrawTexture *tt, void *unique_id, GDrawStats *stats) { GDrawHandle *t = (GDrawHandle *) tt; assert(t != NULL); if (t->owner == unique_id || unique_id == NULL) { if (t->cache == &gdraw->rendertargets) { gdraw_HandleCacheUnlock(t); // cache it by simply not freeing it return; } gdraw_res_kill(t, stats); } } static rrbool RADLINK gdraw_TryToLockTexture(GDrawTexture *t, void *unique_id, GDrawStats *stats) { return gdraw_HandleCacheLock((GDrawHandle *) t, unique_id); } static void RADLINK gdraw_DescribeTexture(GDrawTexture *tex, GDraw_Texture_Description *desc) { GDrawHandle *p = (GDrawHandle *) tex; desc->width = p->handle.tex.gnm->getWidth(); desc->height = p->handle.tex.gnm->getHeight(); desc->size_in_bytes = p->bytes; } static void antialias_tex_upload() { if (!gdraw->aatex_new || !gdraw->gfxc) return; U32 width = gdraw->aa_tex.getWidth(); upload_begin(); upload_tex_subrect(&gdraw->aa_tex, 0, 0, 0, gdraw->aatex_data, width*4, width, 1, 4); upload_end(); gdraw->aatex_new = false; } static void RADLINK gdraw_SetAntialiasTexture(S32 width, U8 *rgba) { if (gdraw->aa_tex.isTexture()) return; Gnm::SizeAlign sa = gdraw->aa_tex.initAs2d(width, 1, 1, Gnm::kDataFormatR8G8B8A8Unorm, Gnm::kTileModeDisplay_LinearAligned, Gnm::kNumSamples1); void *ptr = gdraw_arena_alloc(&gdraw->vidshared_arena, sa.m_size, sa.m_align); if (!ptr) return; gdraw->aa_tex.setBaseAddress(ptr); assert(width <= MAX_AATEX_WIDTH); memcpy(gdraw->aatex_data, rgba, width*4); gdraw->aatex_new = true; antialias_tex_upload(); } //////////////////////////////////////////////////////////////////////// // // Vertex buffer creation/deletion // static rrbool RADLINK gdraw_MakeVertexBufferBegin(void *unique_id, gdraw_vformat vformat, S32 vbuf_size, S32 ibuf_size, GDraw_MakeVertexBuffer_ProcessingInfo *p, GDrawStats *stats) { GDrawHandle *vb; vb = gdraw_res_alloc_begin(gdraw->vbufcache, vbuf_size + ibuf_size, stats); if (!vb) return false; vb->handle.vbuf.verts = vb->raw_ptr; vb->handle.vbuf.inds = (U8 *) vb->raw_ptr + vbuf_size; p->p0 = vb; p->vertex_data_length = vbuf_size; p->index_data_length = ibuf_size; // need to go through staging buffer for uploads p->p1 = alloc_staging(vbuf_size + ibuf_size, Gnm::kAlignmentOfBufferInBytes); if (!p->p1) { gdraw_HandleCacheAllocateFail(vb); return false; } p->vertex_data = (U8 *) p->p1; p->index_data = (U8 *) p->p1 + vbuf_size; p->i0 = vbuf_size + ibuf_size; gdraw_HandleCacheAllocateEnd(vb, vbuf_size + ibuf_size, unique_id, GDRAW_HANDLE_STATE_locked); return true; } static rrbool RADLINK gdraw_MakeVertexBufferMore(GDraw_MakeVertexBuffer_ProcessingInfo *p) { assert(0); return false; } static GDrawVertexBuffer * RADLINK gdraw_MakeVertexBufferEnd(GDraw_MakeVertexBuffer_ProcessingInfo *p, GDrawStats *stats) { GDrawHandle *vb = (GDrawHandle *) p->p0; // DMA from staging buffer to actual target address. gdraw->gfxc->copyData(vb->raw_ptr, p->p1, p->i0, Gnm::kDmaDataBlockingEnable); // Flush shader L1 & L2 so we can safely use the updated VB // need to stall parsing of the command buffer because if the next // command is drawing this vertex buffer, PS4 gpu will prefetch // the index data, and since we haven't flushed yet, it can fetch // bogus index data gdraw->gfxc->flushShaderCachesAndWait(Gnm::kCacheActionWriteBackAndInvalidateL1andL2, 0, Gnm::kStallCommandBufferParserEnable); return (GDrawVertexBuffer *) vb; } static rrbool RADLINK gdraw_TryLockVertexBuffer(GDrawVertexBuffer *vb, void *unique_id, GDrawStats *stats) { return gdraw_HandleCacheLock((GDrawHandle *) vb, unique_id); } static void RADLINK gdraw_FreeVertexBuffer(GDrawVertexBuffer *vb, void *unique_id, GDrawStats *stats) { GDrawHandle *h = (GDrawHandle *) vb; assert(h != NULL); // @GDRAW_ASSERT if (h->owner == unique_id) gdraw_res_kill(h, stats); } static void RADLINK gdraw_DescribeVertexBuffer(GDrawVertexBuffer *vbuf, GDraw_VertexBuffer_Description *desc) { GDrawHandle *p = (GDrawHandle *) vbuf; desc->size_in_bytes = p->bytes; } //////////////////////////////////////////////////////////////////////// // // Create/free (or cache) framebuffer-sized textures // static GDrawHandle *get_color_rendertarget(GDrawStats *stats) { GDrawHandle *t; t = gdraw_HandleCacheGetLRU(&gdraw->rendertargets); if (t) { gdraw_HandleCacheLock(t, (void *) 1); return t; } t = gdraw_HandleCacheAllocateBegin(&gdraw->rendertargets); if (!t) { IggyGDrawSendWarning(NULL, "GDraw rendertarget allocation failed: hit handle limit"); return t; } U8 *ptr = (U8 *)gdraw_arena_alloc(&gdraw->rt_arena, gdraw->rt_colorbuffer_sa.m_size, gdraw->rt_colorbuffer_sa.m_align); if (!ptr) { IggyGDrawSendWarning(NULL, "GDraw rendertarget allocation failed: out of rendertarget texture memory"); gdraw_HandleCacheAllocateFail(t); return NULL; } t->fence = get_next_fence(); t->raw_ptr = NULL; t->handle.tex.gnm_ptr = ptr; t->handle.tex.gnm->initFromRenderTarget(&gdraw->rt_colorbuffer, false); t->handle.tex.gnm->setBaseAddress(ptr); gdraw_HandleCacheAllocateEnd(t, gdraw->rt_colorbuffer_sa.m_size, (void *) 1, GDRAW_HANDLE_STATE_locked); return t; } //////////////////////////////////////////////////////////////////////// // // Constant buffer layouts // struct VertexVars { F32 world[2][4]; F32 x_off[4]; F32 texgen_s[4]; F32 texgen_t[4]; F32 viewproj[4]; }; struct PixelCommonVars { F32 color_mul[4]; F32 color_add[4]; F32 focal[4]; }; struct PixelParaFilter { F32 clamp0[4], clamp1[4]; F32 color[4], color2[4]; F32 tc_off[4]; }; struct PixelParaBlur { F32 clamp[4]; F32 tap[9][4]; }; struct PixelParaColorMatrix { F32 data[5][4]; }; //////////////////////////////////////////////////////////////////////// // // Rendering helpers // static void set_gnm_texture(U32 unit, Gnm::Texture *tex, U32 wrap, U32 nearest) { assert(wrap < GDRAW_WRAP__count); assert(nearest < 2); Gnmx::GfxContext *gfxc = gdraw->gfxc; gfxc->setSamplers(Gnm::kShaderStagePs, unit, 1, &gdraw->sampler_state[nearest][wrap]); gfxc->setTextures(Gnm::kShaderStagePs, unit, 1, tex); } static inline void disable_scissor(bool force) { if (force || gdraw->scissor_state) { // set whole viewport as scissor rect gdraw->scissor_state = 0; gdraw->gfxc->setScreenScissor(gdraw->cview.x0, gdraw->cview.y0, gdraw->cview.x1, gdraw->cview.y1); } } static void set_viewport_raw(S32 x, S32 y, S32 w, S32 h) { // check against hardware limits assert(w >= 0 && w <= 16384); assert(h >= 0 && h <= 16384); gdraw->cview.x0 = x; gdraw->cview.y0 = y; gdraw->cview.x1 = x + w; gdraw->cview.y1 = y + h; F32 scale[3] = { (F32)w * 0.5f, -(F32)h * 0.5f, 1.0f }; F32 offset[3] = { (F32)x + (F32)w * 0.5f, (F32)y + (F32)h * 0.5f, 0.0f }; gdraw->gfxc->setViewport(0, 0.0f, 1.0f, scale, offset); disable_scissor(true); } static void set_projection_raw(S32 x0, S32 x1, S32 y0, S32 y1) { gdraw->projection[0] = 2.0f / (x1-x0); gdraw->projection[1] = 2.0f / (y1-y0); gdraw->projection[2] = (x1 + x0) / (F32) (x0 - x1); gdraw->projection[3] = (y1 + y0) / (F32) (y0 - y1); } static void set_viewport() { if (gdraw->in_blur) { // blur needs special setup set_viewport_raw(0, 0, gdraw->tpw, gdraw->tph); return; } if (gdraw->cur == gdraw->frame) // if the rendering stack is empty // render a tile-sized region to the user-request tile location set_viewport_raw(gdraw->vx, gdraw->vy, gdraw->tw, gdraw->th); else if (gdraw->cur->cached) set_viewport_raw(0, 0, gdraw->cur->width, gdraw->cur->height); else // if on the render stack, draw a padded-tile-sized region at the origin set_viewport_raw(0, 0, gdraw->tpw, gdraw->tph); } static void set_projection() { if (gdraw->in_blur) { // blur needs special setup set_projection_raw(0, gdraw->tpw, gdraw->tph, 0); return; } if (gdraw->cur == gdraw->frame) // if the render stack is empty set_projection_raw(gdraw->tx0,gdraw->tx0+gdraw->tw,gdraw->ty0+gdraw->th,gdraw->ty0); else if (gdraw->cur->cached) set_projection_raw(gdraw->cur->base_x, gdraw->cur->base_x + gdraw->cur->width, gdraw->cur->base_y + gdraw->cur->height, gdraw->cur->base_y); else set_projection_raw(gdraw->tx0p,gdraw->tx0p+gdraw->tpw,gdraw->ty0p+gdraw->tph,gdraw->ty0p); } static void set_render_target() { Gnmx::GfxContext *gfxc = gdraw->gfxc; if (GDrawHandle *color = gdraw->cur->color_buffer) { gdraw->rt_colorbuffer.setBaseAddress256ByteBlocks(color->handle.tex.gnm->getBaseAddress256ByteBlocks()); gfxc->setRenderTarget(0, &gdraw->rt_colorbuffer); } else gfxc->setRenderTarget(0, &gdraw->main_colorbuffer); gfxc->setDepthRenderTarget(&gdraw->main_depthbuffer); gfxc->setCmaskClearColor(0, gdraw->cur->clear_col); } static void clear_renderstate() { Gnmx::GfxContext *gfxc = gdraw->gfxc; gfxc->setDepthStencilControl(gdraw->depth_stencil_control[0][0][0]); gfxc->setRenderTargetMask(0xf); gfxc->setBlendControl(0, gdraw->blend_control[GDRAW_BLEND_none]); disable_scissor(false); gdraw->z_stencil_key = 0; gdraw->writes_masked = 0; gdraw->blend_mode = GDRAW_BLEND_none; } static void set_common_renderstate() { Gnmx::GfxContext *gfxc = gdraw->gfxc; // clear our state caching memset(gdraw->active_tex, 0, sizeof(gdraw->active_tex)); gdraw->cur_ps = NULL; gdraw->scissor_state = ~0u; gdraw->blend_mode = -1; // all the state we won't touch again until we're done rendering Gnm::ClipControl clip_control; clip_control.init(); clip_control.setClipSpace(Gnm::kClipControlClipSpaceDX); gfxc->setClipControl(clip_control); gfxc->setShaderType(Gnm::kShaderTypeGraphics); gfxc->setIndexSize(Gnm::kIndexSize16); gfxc->setStencilClearValue(0); gfxc->setDepthClearValue(1.0f); gfxc->setVsShader(gdraw->vs.vs, 0, (void*)0); Gnm::StencilOpControl stencil_op; stencil_op.init(); stencil_op.setStencilOps(Gnm::kStencilOpKeep, Gnm::kStencilOpReplaceTest, Gnm::kStencilOpKeep); gfxc->setStencilOpControl(stencil_op); Gnm::ViewportTransformControl vt_control; vt_control.init(); vt_control.setPassThroughEnable(false); gfxc->setViewportTransformControl(vt_control); // set up guard band and hardware screen offset once // we know ahead of time which viewports we're going to set: // * our viewport top/left corner is always >= (0,0) // * viewport bottom/right is <= size of the largest render target S32 min_x = 0; S32 min_y = 0; S32 max_x = RR_MAX(gdraw->main_colorbuffer.getWidth(), gdraw->rt_colorbuffer.getWidth()); S32 max_y = RR_MAX(gdraw->main_colorbuffer.getHeight(), gdraw->rt_colorbuffer.getHeight()); F32 offs_x = (F32) (min_x + max_x) * 0.5f; F32 offs_y = (F32) (min_y + max_y) * 0.5f; F32 abs_scale_x = (F32) (max_x - min_x) * 0.5f; F32 abs_scale_y = (F32) (max_y - min_y) * 0.5f; // set up guard band offset so we're centered around our viewport region // hardware offset must be a multiple of 16 pixels S32 hw_offset_x = (S32)floorf(offs_x/16.0f + 0.5f) * 16; S32 hw_offset_y = (S32)floorf(offs_y/16.0f + 0.5f) * 16; gfxc->setHardwareScreenOffset(hw_offset_x >> 4, hw_offset_y >> 4); // set up guard band clip and discard distances // NB both the values for hw_min and hw_max are slightly smaller than the actual min/max // (by about 1/256th) to keep a bit of a safety margin for FP round-off error F32 hw_min = -(F32)(0xff<<16) / (F32)(1<<8); F32 hw_max = (F32)(0xff<<16) / (F32)(1<<8); F32 gb_max_x = RR_MIN(hw_max - abs_scale_x - offs_x + hw_offset_x, -abs_scale_x + offs_x - hw_offset_x - hw_min); F32 gb_max_y = RR_MIN(hw_max - abs_scale_y - offs_y + hw_offset_y, -abs_scale_y + offs_y - hw_offset_y - hw_min); gfxc->setGuardBandClip(gb_max_x / abs_scale_x, gb_max_y / abs_scale_y); gfxc->setGuardBandDiscard(1.0f, 1.0f); assert(gdraw->aa_tex.isTexture()); // if this triggers, your initialization sequence is wrong. set_gnm_texture(AATEX_SAMPLER, &gdraw->aa_tex, GDRAW_WRAP_clamp, 0); // states we modify during regular rendering clear_renderstate(); set_render_target(); set_viewport(); set_projection(); } static void set_pixel_shader(ShaderCode *ps); static void do_screen_quad(gswf_recti *s, const F32 *tc, GDrawStats *stats); static void render_clear_quad(gswf_recti *r, GDrawStats *stats) { set_pixel_shader(&gdraw->clear_ps); do_screen_quad(r, four_zeros, stats); stats->nonzero_flags |= GDRAW_STATS_clears; stats->num_clears++; stats->cleared_pixels += (r->x1 - r->x0) * (r->y1 - r->y0); } static void manual_clear_color(S32 x, S32 y, S32 w, S32 h, GDrawStats *stats) { clear_renderstate(); set_viewport_raw(0, 0, gdraw->frametex_width, gdraw->frametex_height); set_projection_raw(0, gdraw->frametex_width, gdraw->frametex_height, 0); gswf_recti r = { x, y, x+w, y+h }; gdraw->gfxc->setConstantBuffers(Gnm::kShaderStagePs, 0, 1, &gdraw->pixel_common_zero_cbuf); render_clear_quad(&r, stats); } static void clear_whole_zs(bool clear_depth, bool clear_stencil, GDrawStats *stats) { Gnm::DepthRenderTarget &depthbuf = gdraw->main_depthbuffer; // to clear both depth and stencil, we can just set up the metadata in HTile with a // compute shader if a) there's a HTile to begin with and b) it contains stencil metadata. // if no stencil info in HTile, clearing HTile manually is a net perf loss. if (clear_depth && clear_stencil && depthbuf.getHtileAccelerationEnable() && !depthbuf.getHtileStencilDisable()) { gdraw->gfxc->triggerEvent(Gnm::kEventTypeFlushAndInvalidateDbMeta); gpu_compute_memset(depthbuf.getHtileAddress(), 0xfffc00f0, depthbuf.getHtileSizeInBytes()); } else { S32 w = gdraw->main_depthbuffer.getWidth(); S32 h = gdraw->main_depthbuffer.getHeight(); Gnmx::GfxContext *gfxc = gdraw->gfxc; Gnm::DbRenderControl db_control; db_control.init(); db_control.setDepthClearEnable(clear_depth); db_control.setStencilClearEnable(clear_stencil); gfxc->setDbRenderControl(db_control); Gnm::DepthStencilControl ds_control; ds_control.init(); ds_control.setDepthControl(clear_depth ? Gnm::kDepthControlZWriteEnable : Gnm::kDepthControlZWriteDisable, Gnm::kCompareFuncAlways); ds_control.setStencilFunction(Gnm::kCompareFuncAlways); ds_control.setDepthEnable(clear_depth); ds_control.setStencilEnable(clear_stencil); gfxc->setDepthStencilControl(ds_control); Gnm::StencilControl st_control; st_control.m_testVal = 255; st_control.m_mask = 255; st_control.m_writeMask = 255; st_control.m_opVal = 0; gfxc->setStencil(st_control); set_viewport_raw(0, 0, w, h); set_projection_raw(0, w, h, 0); gfxc->setRenderTargetMask(0); gswf_recti r = { 0, 0, w, h }; gfxc->setConstantBuffers(Gnm::kShaderStagePs, 0, 1, &gdraw->pixel_common_zero_cbuf); render_clear_quad(&r, stats); db_control.init(); gfxc->setDbRenderControl(db_control); clear_renderstate(); set_viewport(); set_projection(); } } static void eliminate_fast_clear() { if (!gdraw->cur->needs_clear_eliminate) return; clear_renderstate(); Gnmx::GfxContext *gfxc = gdraw->gfxc; gfxc->triggerEvent(Gnm::kEventTypeFlushAndInvalidateCbMeta); // flush CMask data gswf_recti r; if (gdraw->cur != gdraw->frame) { r.x0 = gdraw->cur->base_x; r.y0 = gdraw->cur->base_y; r.x1 = r.x0 + gdraw->cur->width; r.y1 = r.y0 + gdraw->cur->height; } else { r.x0 = 0; r.y0 = 0; r.x1 = gdraw->main_colorbuffer.getWidth(); r.y1 = gdraw->main_colorbuffer.getHeight(); } gfxc->setCbControl(Gnm::kCbModeEliminateFastClear, Gnm::kRasterOpSrcCopy); gfxc->setPsShader(NULL); set_viewport_raw(r.x0, r.y0, r.x1 - r.x0, r.y1 - r.y0); set_projection_raw(r.x0, r.x1, r.y1, r.y0); GDrawStats stats = {}; // we already counted these clears once, so don't add to main stats gfxc->setConstantBuffers(Gnm::kShaderStagePs, 0, 1, &gdraw->pixel_common_zero_cbuf); render_clear_quad(&r, &stats); void *label = insert_cb_label(); gfxc->writeImmediateAtEndOfPipe(Gnm::kEopFlushCbDbCaches, label, 1, Gnm::kCacheActionNone); gfxc->waitOnAddress(label, ~0u, Gnm::kWaitCompareFuncEqual, 1); gfxc->setCbControl(Gnm::kCbModeNormal, Gnm::kRasterOpSrcCopy); set_viewport(); set_projection(); gdraw->cur_ps = NULL; gdraw->cur->needs_clear_eliminate = false; } //////////////////////////////////////////////////////////////////////// // // Begin rendering for a frame // void gdraw_orbis_SetTileOrigin(Gnm::RenderTarget *color, Gnm::DepthRenderTarget *depth, S32 x, S32 y) { gdraw->main_colorbuffer = *color; gdraw->main_depthbuffer = *depth; gdraw->vx = x; gdraw->vy = y; } static inline U32 pack8(F32 v) { if (v < 0.0f) v = 0.0f; if (v > 1.0f) v = 1.0f; return (U32) (S32) (v * 255.0f + 0.5f); } static inline U32 pack_color_8888(F32 x, F32 y, F32 z, F32 w) { return (pack8(x) << 0) | (pack8(y) << 8) | (pack8(z) << 16) | (pack8(w) << 24); } void gdraw_orbis_ClearWholeRenderTarget(const F32 clear_color_rgba[4]) { assert(gdraw->gfxc != NULL); // call after gdraw_orbis_Begin gdraw->cur = gdraw->frame; set_common_renderstate(); clear_renderstate(); if (gdraw->main_colorbuffer.getCmaskFastClearEnable()) { Gnmx::GfxContext *gfxc = gdraw->gfxc; // CB flush before gfxc->triggerEvent(Gnm::kEventTypeFlushAndInvalidateCbPixelData); // Clear Cmask gfxc->fillData(gdraw->main_colorbuffer.getCmaskAddress(), 0, gdraw->main_colorbuffer.getCmaskSizeInBytes(), Gnm::kDmaDataBlockingEnable); // CB flush after void *label = insert_cb_label(); gfxc->writeImmediateAtEndOfPipe(Gnm::kEopFlushCbDbCaches, label, 1, Gnm::kCacheActionNone); gfxc->waitOnAddress(label, ~0u, Gnm::kWaitCompareFuncEqual, 1); // Set Cmask clear color Gnm::DataFormat fmt = gdraw->main_colorbuffer.getDataFormat(); if (fmt.m_asInt == Gnm::kDataFormatB8G8R8A8Unorm.m_asInt || fmt.m_asInt == Gnm::kDataFormatB8G8R8X8Unorm.m_asInt) { gdraw->cur->clear_col[0] = pack_color_8888(clear_color_rgba[2], clear_color_rgba[1], clear_color_rgba[0], clear_color_rgba[3]); gdraw->cur->clear_col[1] = 0; } else if (fmt.m_asInt == Gnm::kDataFormatR8G8B8A8Unorm.m_asInt) { gdraw->cur->clear_col[0] = pack_color_8888(clear_color_rgba[0], clear_color_rgba[1], clear_color_rgba[2], clear_color_rgba[3]); } else assert(0); // unsupported color format! gfxc->setCmaskClearColor(0, gdraw->cur->clear_col); gdraw->cur->needs_clear_eliminate = true; } else { auto para = embed_const_buffer(Gnm::kShaderStagePs, 0); memset(para, 0, sizeof(*para)); for (U32 i=0; i < 4; i++) para->color_mul[i] = clear_color_rgba[i]; GDrawStats stats = {}; gswf_recti r = { 0, 0, (S32) gdraw->main_colorbuffer.getWidth(), (S32) gdraw->main_colorbuffer.getHeight() }; set_viewport_raw(0, 0, r.x1, r.y1); set_projection_raw(0, r.x1, r.y1, 0); render_clear_quad(&r, &stats); } } static void RADLINK gdraw_SetViewSizeAndWorldScale(S32 w, S32 h, F32 scalex, F32 scaley) { gdraw->cur = gdraw->frame; gdraw->fw = w; gdraw->fh = h; gdraw->tw = w; gdraw->th = h; gdraw->world_to_pixel[0] = scalex; gdraw->world_to_pixel[1] = scaley; } // must include anything necessary for texture creation/update static void RADLINK gdraw_RenderingBegin(void) { assert(gdraw->gfxc != NULL); // call after gdraw_orbis_Begin // unbind all shaders Gnmx::GfxContext *gfxc = gdraw->gfxc; gfxc->setVsShader(NULL, 0, (void*)0); gfxc->setPsShader(NULL); gfxc->setCsShader(NULL); gfxc->setLsHsShaders(NULL, 0, (void*)0, NULL, 0); gfxc->setEsShader(NULL, 0, (void *) 0); gfxc->setGsVsShaders(NULL); set_common_renderstate(); } static void RADLINK gdraw_RenderingEnd(void) { clear_renderstate(); } static void RADLINK gdraw_RenderTileBegin(S32 x0, S32 y0, S32 x1, S32 y1, S32 pad, GDrawStats *stats) { if (x0 == 0 && y0 == 0 && x1 == gdraw->fw && y1 == gdraw->fh) pad = 0; gdraw->tx0 = x0; gdraw->ty0 = y0; gdraw->tw = x1-x0; gdraw->th = y1-y0; // padded region gdraw->tx0p = RR_MAX(x0 - pad, 0); gdraw->ty0p = RR_MAX(y0 - pad, 0); gdraw->tpw = RR_MIN(x1 + pad, gdraw->fw) - gdraw->tx0p; gdraw->tph = RR_MIN(y1 + pad, gdraw->fh) - gdraw->ty0p; // check if this fits inside our rendertarget buffers assert(gdraw->tpw <= gdraw->frametex_width && gdraw->tph <= gdraw->frametex_height); gdraw->frame->base_x = gdraw->tx0p; gdraw->frame->base_y = gdraw->ty0p; gdraw->frame->width = gdraw->tpw; gdraw->frame->height = gdraw->tph; // clear our depth/stencil buffers clear_whole_zs(true, true, stats); } static void RADLINK gdraw_RenderTileEnd(GDrawStats *stats) { gdraw->tile_end_fence = put_fence(); // reap once per frame even if there are no allocs gdraw_res_reap(gdraw->texturecache, stats); gdraw_res_reap(gdraw->vbufcache, stats); } GDRAW_MAYBE_UNUSED static bool mem_is_direct_and_write_combined_or_cached(const void *mem) { SceKernelVirtualQueryInfo info; if (sceKernelVirtualQuery(mem, 0, &info, sizeof(info)) != 0) return false; // if this errors, mem is likely not even mapped! if (!info.isDirectMemory) return false; return true; } void gdraw_orbis_Begin(sce::Gnmx::GfxContext *context, void *staging_buffer, U32 staging_buf_bytes) { assert(gdraw->gfxc == NULL); // may not nest Begin calls // make sure that the memory setup is sensible. // if any of these asserts fire, please relocate your command buffers // and staging buffers to direct memory that is either cached or // write-combined! assert(mem_is_direct_and_write_combined_or_cached(context->m_dcb.m_cmdptr)); assert(mem_is_direct_and_write_combined_or_cached(context->m_ccb.m_cmdptr)); assert(mem_is_direct_and_write_combined_or_cached(staging_buffer)); gdraw->gfxc = context; gdraw_arena_init(&gdraw->staging, staging_buffer, staging_buf_bytes); memset(&gdraw->staging_stats, 0, sizeof(gdraw->staging_stats)); context->initializeToDefaultContextState(); antialias_tex_upload(); } void gdraw_orbis_End(gdraw_orbis_staging_stats *stats) { assert(gdraw->gfxc != NULL); // please keep Begin / End pairs properly matched gdraw_HandleCacheTick(gdraw->texturecache, gdraw->tile_end_fence); gdraw_HandleCacheTick(gdraw->vbufcache, gdraw->tile_end_fence); gdraw_arena_init(&gdraw->staging, NULL, 0); gdraw->gfxc = NULL; if (stats) *stats = gdraw->staging_stats; } void gdraw_orbis_EliminateFastClears(void) { assert(gdraw->gfxc != NULL); // call between gdraw_orbis_Begin and gdraw_orbis_End eliminate_fast_clear(); } #define MAX_DEPTH_VALUE (1 << 14) static void RADLINK gdraw_GetInfo(GDrawInfo *d) { d->num_stencil_bits = 8; d->max_id = MAX_DEPTH_VALUE-2; // for floating point depth, just use mantissa, e.g. 16-20 bits d->max_texture_size = MAX_TEXTURE2D_DIM; d->buffer_format = GDRAW_BFORMAT_vbib; d->shared_depth_stencil = 1; d->always_mipmap = 0; d->conditional_nonpow2 = 0; } //////////////////////////////////////////////////////////////////////// // // Render targets // static rrbool RADLINK gdraw_TextureDrawBufferBegin(gswf_recti *region, gdraw_texture_format format, U32 flags, void *owner, GDrawStats *stats) { GDrawFramebufferState *n = gdraw->cur+1; GDrawHandle *t; if (gdraw->tw == 0 || gdraw->th == 0) { IggyGDrawSendWarning(NULL, "GDraw warning: w=0,h=0 rendertarget"); return false; } if (n >= &gdraw->frame[MAX_RENDER_STACK_DEPTH]) { IggyGDrawSendWarning(NULL, "GDraw rendertarget nesting exceeds MAX_RENDER_STACK_DEPTH"); return false; } if (owner) { // @TODO implement t = NULL; assert(0); // nyi } else { t = get_color_rendertarget(stats); if (!t) return false; } n->color_buffer = t; assert(n->color_buffer != NULL); // @GDRAW_ASSERT n->cached = owner != NULL; if (owner) { n->base_x = region->x0; n->base_y = region->y0; n->width = region->x1 - region->x0; n->height = region->y1 - region->y0; } assert(gdraw->frametex_width >= gdraw->tw && gdraw->frametex_height >= gdraw->th); // @GDRAW_ASSERT int k = n->color_buffer - gdraw->rendertargets.handle; S32 x, y, w, h; if (region) { S32 ox, oy, pad = 2; // 2 pixels of border on all sides // 1 pixel turns out to be not quite enough with the interpolator precision we get. if (gdraw->in_blur) ox = oy = 0; else ox = gdraw->tx0p, oy = gdraw->ty0p; // clamp region to tile S32 xt0 = RR_MAX(region->x0 - ox, 0); S32 yt0 = RR_MAX(region->y0 - oy, 0); S32 xt1 = RR_MIN(region->x1 - ox, gdraw->tpw); S32 yt1 = RR_MIN(region->y1 - oy, gdraw->tph); // but the padding needs to clamp to render target bounds x = RR_MAX(xt0 - pad, 0); y = RR_MAX(yt0 - pad, 0); w = RR_MIN(xt1 + pad, gdraw->frametex_width) - x; h = RR_MIN(yt1 + pad, gdraw->frametex_height) - y; if (w <= 0 || h <= 0) { // region doesn't intersect with current tile gdraw_FreeTexture((GDrawTexture *) n->color_buffer, 0, stats); // note: don't send a warning since this will happen during regular tiled rendering return false; } gdraw->rt_valid[k].x0 = xt0; gdraw->rt_valid[k].y0 = yt0; gdraw->rt_valid[k].x1 = xt1; gdraw->rt_valid[k].y1 = yt1; } else { x = 0; y = 0; w = gdraw->frametex_width; h = gdraw->frametex_height; gdraw->rt_valid[k].x0 = 0; gdraw->rt_valid[k].y0 = 0; gdraw->rt_valid[k].x1 = w; gdraw->rt_valid[k].y1 = h; } stats->nonzero_flags |= GDRAW_STATS_rendtarg; stats->rendertarget_changes++; ++gdraw->cur; gdraw->rt_colorbuffer.setBaseAddress(n->color_buffer->handle.tex.gnm_ptr); set_render_target(); manual_clear_color(x, y, w, h, stats); set_viewport(); set_projection(); return true; } static GDrawTexture *RADLINK gdraw_TextureDrawBufferEnd(GDrawStats *stats) { GDrawFramebufferState *n = gdraw->cur; GDrawFramebufferState *m = --gdraw->cur; if (gdraw->tw == 0 || gdraw->th == 0) return 0; if (n >= &gdraw->frame[MAX_RENDER_STACK_DEPTH]) return 0; // already returned a warning in Start...() assert(m >= gdraw->frame); // bug in Iggy -- unbalanced if (m != gdraw->frame) { assert(m->color_buffer != NULL); // @GDRAW_ASSERT } assert(n->color_buffer != NULL); // @GDRAW_ASSERT // sync on draw completion for this render target rtt_sync(n->color_buffer->handle.tex.gnm_ptr, gdraw->rt_colorbuffer_sa.m_size >> 8); n->color_buffer->fence = get_next_fence(); // switch back to old rendertarget set_render_target(); set_viewport(); set_projection(); stats->nonzero_flags |= GDRAW_STATS_rendtarg; stats->rendertarget_changes++; return (GDrawTexture *) n->color_buffer; } //////////////////////////////////////////////////////////////////////// // // Clear stencil/depth buffers // static void RADLINK gdraw_ClearStencilBits(U32 bits) { GDrawStats stats = {}; clear_whole_zs(false, true, &stats); } static void RADLINK gdraw_ClearID(void) { GDrawStats stats = {}; clear_whole_zs(true, false, &stats); } //////////////////////////////////////////////////////////////////////// // // Set all the render state from GDrawRenderState // static RADINLINE void set_texture(U32 texunit, GDrawTexture *tex) { assert(texunit < MAX_SAMPLERS); assert(tex != NULL); if (gdraw->active_tex[texunit] != tex) { gdraw->active_tex[texunit] = tex; GDrawHandle *h = (GDrawHandle *) tex; set_gnm_texture(texunit, h->handle.tex.gnm, GDRAW_WRAP_clamp, 0); } } static RADINLINE void set_pixel_shader(ShaderCode *ps) { if (gdraw->cur_ps != ps) { gdraw->cur_ps = ps; gdraw->gfxc->setPsShader(ps->ps); } } // converts a depth id into a Z value static inline F32 depth_from_id(S32 id) { return (1.0f - 2.0f / MAX_DEPTH_VALUE) - id * (2.0f / MAX_DEPTH_VALUE); // = 1 - 2 * (id + 1) / MAX_DEPTH_VALUE } static void set_renderstate_full(const GDrawRenderState * RADRESTRICT r, GDrawStats *stats) { GDraw * RADRESTRICT gd = gdraw; Gnmx::GfxContext * RADRESTRICT gfxc = gd->gfxc; F32 depth = depth_from_id(r->id); // set vertex shader constants VertexVars * RADRESTRICT vvars = embed_const_buffer(Gnm::kShaderStageVs, 0); if (!r->use_world_space) gdraw_ObjectSpace(vvars->world[0], r->o2w, depth, 0.0f); else gdraw_WorldSpace(vvars->world[0], gdraw->world_to_pixel, depth, 0.0f); __m128 edge = _mm_loadu_ps(r->edge_matrix); __m128 s0_texgen = _mm_setzero_ps(); __m128 t0_texgen = _mm_setzero_ps(); __m128 viewproj = _mm_load_ps(gd->projection); if (r->texgen0_enabled) { s0_texgen = _mm_loadu_ps(&r->s0_texgen[0]); t0_texgen = _mm_loadu_ps(&r->t0_texgen[0]); } _mm_storeu_ps(&vvars->x_off[0], edge); _mm_storeu_ps(&vvars->texgen_s[0], s0_texgen); _mm_storeu_ps(&vvars->texgen_t[0], t0_texgen); _mm_storeu_ps(&vvars->viewproj[0], viewproj); // set the blend mode int tex0mode = r->tex0_mode; int blend_mode = r->blend_mode; if (blend_mode != gd->blend_mode) { gd->blend_mode = blend_mode; gfxc->setBlendControl(0, gd->blend_control[blend_mode]); } // color channel write mask: stencil set mode doesn't write color if (r->stencil_set != gd->writes_masked) { gd->writes_masked = r->stencil_set; gfxc->setRenderTargetMask(r->stencil_set ? 0 : 0xf); } // set the pixel shader if (blend_mode != GDRAW_BLEND_special) { assert(tex0mode >= 0 && tex0mode < sizeof(gd->main_ps) / sizeof(*gd->main_ps)); ShaderCode *ps = gd->basic_ps[tex0mode]; if (r->cxf_add) { ps++; if (r->cxf_add[3]) ps++; } set_pixel_shader(ps); } else // special blends have a special pixel shader. set_pixel_shader(&gd->exceptional_blend[r->special_blend]); // set textures if (tex0mode != GDRAW_TEXTURE_none) { if (!r->tex[0]) // this can happen if some allocs fail. just abort in that case. return; if (gd->active_tex[0] != r->tex[0]) { gd->active_tex[0] = r->tex[0]; set_gnm_texture(0, ((GDrawHandle *) r->tex[0])->handle.tex.gnm, r->wrap0, r->nearest0); } } // pixel shader constants PixelCommonVars * RADRESTRICT pvars = embed_const_buffer(Gnm::kShaderStagePs, 0); __m128 col_mul = _mm_loadu_ps(r->color); __m128 col_add = _mm_setzero_ps(); __m128 focal = _mm_loadu_ps(r->focal_point); if (r->cxf_add) { const float scalef = 1.0f / 255.0f; col_add = _mm_setr_ps((F32) r->cxf_add[0] * scalef, (F32) r->cxf_add[1] * scalef, (F32) r->cxf_add[2] * scalef, (F32) r->cxf_add[3] * scalef); } _mm_storeu_ps(&pvars->color_mul[0], col_mul); _mm_storeu_ps(&pvars->color_add[0], col_add); _mm_storeu_ps(&pvars->focal[0], focal); // set scissor if (r->scissor) { S32 xs,ys; if (gd->cur == gd->frame) { xs = gd->tx0 - gd->vx; ys = gd->ty0 - gd->vy; } else { xs = gd->tx0p; ys = gd->ty0p; } // clip against viewport S32 x0 = RR_MAX(r->scissor_rect.x0 - xs, gd->cview.x0); S32 y0 = RR_MAX(r->scissor_rect.y0 - ys, gd->cview.y0); S32 x1 = RR_MIN(r->scissor_rect.x1 - xs, gd->cview.x1); S32 y1 = RR_MIN(r->scissor_rect.y1 - ys, gd->cview.y1); if (x1 <= x0 || y1 <= y0) { // dummy scissor rect in case our actual scissor is empty x0 = x1 = gd->cview.x0; y0 = y1 = gd->cview.y0; } gfxc->setScreenScissor(x0, y0, x1, y1); gd->scissor_state = 1; } else if (r->scissor != gd->scissor_state) disable_scissor(0); // z/stencil mode changed? U32 z_stencil_key = r->set_id | (r->test_id << 1) | (r->stencil_test << 16) | (r->stencil_set << 24); if (z_stencil_key != gd->z_stencil_key) { gd->z_stencil_key = z_stencil_key; if (r->stencil_test | r->stencil_set) { Gnm::StencilControl ctl; ctl.m_testVal = 255; ctl.m_mask = r->stencil_test; ctl.m_writeMask = r->stencil_set; ctl.m_opVal = 255; gfxc->setDepthStencilControl(gd->depth_stencil_control[r->set_id][r->test_id][1]); gfxc->setStencil(ctl); } else gfxc->setDepthStencilControl(gd->depth_stencil_control[r->set_id][r->test_id][0]); } } static RADINLINE void set_renderstate(const GDrawRenderState * RADRESTRICT r, GDrawStats *stats) { if (!r->identical_state) set_renderstate_full(r, stats); } //////////////////////////////////////////////////////////////////////// // // Draw triangles with a given renderstate // static RADINLINE const GDraw::VFormatDesc *get_vertex_fmt(S32 vfmt) { assert(vfmt >= 0 && vfmt < GDRAW_vformat__count); return &gdraw->vfmt[vfmt]; } static void set_vertex_buffer(const GDraw::VFormatDesc *fmtdesc, void *ptr, U32 num_verts) { Gnm::Buffer bufs[MAX_ATTRS]; for (U32 i=0; i < fmtdesc->num_attribs; i++) bufs[i].initAsVertexBuffer((U8 *)ptr + fmtdesc->vb_offs[i], fmtdesc->formats[i], fmtdesc->stride, num_verts); gdraw->gfxc->setBuffers(Gnm::kShaderStageVs, 0, fmtdesc->num_attribs, bufs); } static RADINLINE void fence_resources(void *r1, void *r2=NULL, void *r3=NULL, void *r4=NULL) { GDrawFence fence = get_next_fence(); if (r1) ((GDrawHandle *) r1)->fence = fence; if (r2) ((GDrawHandle *) r2)->fence = fence; if (r3) ((GDrawHandle *) r3)->fence = fence; if (r4) ((GDrawHandle *) r4)->fence = fence; } static void RADLINK gdraw_DrawIndexedTriangles(GDrawRenderState *r, GDrawPrimitive *p, GDrawVertexBuffer *buf, GDrawStats *stats) { Gnmx::GfxContext * RADRESTRICT gfxc = gdraw->gfxc; GDrawHandle *vb = (GDrawHandle *) buf; const GDraw::VFormatDesc * RADRESTRICT vfmt = get_vertex_fmt(p->vertex_format); set_renderstate(r, stats); if (vb) { set_vertex_buffer(vfmt, (U8 *) vb->handle.vbuf.verts + (UINTa) p->vertices, p->num_vertices); gfxc->setPrimitiveType(Gnm::kPrimitiveTypeTriList); gfxc->drawIndex(p->num_indices, (U8 *)vb->handle.vbuf.inds + (UINTa) p->indices); } else if (p->indices) { U32 vbytes = p->num_vertices * vfmt->stride; U32 ibytes = p->num_indices * 2; gfxc->setPrimitiveType(Gnm::kPrimitiveTypeTriList); U8 *buf = (U8 *) alloc_staging(vbytes + ibytes, Gnm::kAlignmentOfBufferInBytes); if (!buf) return; memcpy(buf, p->vertices, vbytes); memcpy(buf + vbytes, p->indices, ibytes); set_vertex_buffer(vfmt, buf, p->num_vertices); gfxc->drawIndex(p->num_indices, buf + vbytes); } else { // dynamic quads assert(p->num_vertices % 4 == 0); U32 stride = vfmt->stride; U32 num_bytes = (U32)p->num_vertices * stride; gfxc->setPrimitiveType(Gnm::kPrimitiveTypeQuadList); U8 *buf = (U8 *) alloc_staging(num_bytes, Gnm::kAlignmentOfBufferInBytes); if (!buf) return; memcpy(buf, p->vertices, num_bytes); set_vertex_buffer(vfmt, buf, p->num_vertices); gfxc->drawIndexAuto(p->num_vertices); } fence_resources(vb, r->tex[0], r->tex[1]); stats->nonzero_flags |= GDRAW_STATS_batches; stats->num_batches += 1; stats->drawn_indices += p->num_indices; stats->drawn_vertices += p->num_vertices; } /////////////////////////////////////////////////////////////////////// // // Flash 8 filter effects // static void do_screen_quad(gswf_recti *s, const F32 *tc, GDrawStats *stats) { VertexVars *vvars = embed_const_buffer(Gnm::kShaderStageVs, 0); __m128 world0 = _mm_setr_ps(1.0f, 0.0f, 0.0f, 0.0f); __m128 world1 = _mm_setr_ps(0.0f, 1.0f, 0.0f, 0.0f); __m128 zero = _mm_setzero_ps(); __m128 viewproj = _mm_load_ps(gdraw->projection); _mm_storeu_ps(vvars->world[0], world0); _mm_storeu_ps(vvars->world[1], world1); _mm_storeu_ps(vvars->x_off, zero); _mm_storeu_ps(vvars->texgen_s, zero); _mm_storeu_ps(vvars->texgen_t, zero); _mm_storeu_ps(vvars->viewproj, viewproj); gswf_vertex_xyst * RADRESTRICT v = (gswf_vertex_xyst *) alloc_staging(3 * sizeof(gswf_vertex_xyst), Gnm::kAlignmentOfBufferInBytes); if (!v) return; F32 px0 = (F32) s->x0, py0 = (F32) s->y0, px1 = (F32) s->x1, py1 = (F32) s->y1; v[0].x = px0; v[0].y = py0; v[0].s = tc[0]; v[0].t = tc[1]; v[1].x = px1; v[1].y = py0; v[1].s = tc[2]; v[1].t = tc[1]; v[2].x = px0; v[2].y = py1; v[2].s = tc[0]; v[2].t = tc[3]; set_vertex_buffer(&gdraw->vfmt[GDRAW_vformat_v2tc2], v, 3); gdraw->gfxc->setPrimitiveType(Gnm::kPrimitiveTypeRectList); gdraw->gfxc->drawIndexAuto(3); } static void gdraw_DriverBlurPass(GDrawRenderState *r, int taps, float *data, gswf_recti *s, float *tc, float /*height_max*/, float *clamp, GDrawStats *gstats) { set_texture(0, r->tex[0]); set_pixel_shader(&gdraw->blur_ps[taps]); auto para = embed_const_buffer(Gnm::kShaderStagePs, 1); memcpy(para->clamp, clamp, 4 * sizeof(float)); memcpy(para->tap, data, taps * 4 * sizeof(float)); do_screen_quad(s, tc, gstats); fence_resources(r->tex[0]); } static void gdraw_Colormatrix(GDrawRenderState *r, gswf_recti *s, float *tc, GDrawStats *stats) { if (!gdraw_TextureDrawBufferBegin(s, GDRAW_TEXTURE_FORMAT_rgba32, GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_color | GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_alpha, 0, stats)) return; set_texture(0, r->tex[0]); set_pixel_shader(&gdraw->colormatrix); auto para = embed_const_buffer(Gnm::kShaderStagePs, 1); memcpy(para->data, r->shader_data, 5 * 4 * sizeof(float)); do_screen_quad(s, tc, stats); fence_resources(r->tex[0]); r->tex[0] = gdraw_TextureDrawBufferEnd(stats); } static gswf_recti *get_valid_rect(GDrawTexture *tex) { GDrawHandle *h = (GDrawHandle *) tex; S32 n = (S32) (h - gdraw->rendertargets.handle); assert(n >= 0 && n <= MAX_RENDER_STACK_DEPTH+1); return &gdraw->rt_valid[n]; } static void set_pixel_constant(F32 *constant, F32 x, F32 y, F32 z, F32 w) { constant[0] = x; constant[1] = y; constant[2] = z; constant[3] = w; } static void set_clamp_constant(F32 *constant, GDrawTexture *tex) { gswf_recti *s = get_valid_rect(tex); // when we make the valid data, we make sure there is an extra empty pixel at the border set_pixel_constant(constant, (s->x0-0.5f) / gdraw->frametex_width, (s->y0-0.5f) / gdraw->frametex_height, (s->x1+0.5f) / gdraw->frametex_width, (s->y1+0.5f) / gdraw->frametex_height); } static void gdraw_Filter(GDrawRenderState *r, gswf_recti *s, float *tc, int isbevel, GDrawStats *stats) { if (!gdraw_TextureDrawBufferBegin(s, GDRAW_TEXTURE_FORMAT_rgba32, GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_color | GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_alpha, NULL, stats)) return; set_texture(0, r->tex[0]); set_texture(1, r->tex[1]); if (r->tex[2]) set_texture(2, r->tex[2]); set_pixel_shader(&gdraw->filter_ps[isbevel][r->filter_mode]); auto para = embed_const_buffer(Gnm::kShaderStagePs, 1); set_clamp_constant(para->clamp0, r->tex[0]); set_clamp_constant(para->clamp1, r->tex[1]); set_pixel_constant(para->color, r->shader_data[0], r->shader_data[1], r->shader_data[2], r->shader_data[3]); set_pixel_constant(para->color2, r->shader_data[8], r->shader_data[9], r->shader_data[10], r->shader_data[11]); set_pixel_constant(para->tc_off, -r->shader_data[4] / (F32)gdraw->frametex_width, -r->shader_data[5] / (F32)gdraw->frametex_height, r->shader_data[6], 0); do_screen_quad(s, tc, stats); fence_resources(r->tex[0], r->tex[1], r->tex[2]); r->tex[0] = gdraw_TextureDrawBufferEnd(stats); } static void RADLINK gdraw_FilterQuad(GDrawRenderState *r, S32 x0, S32 y0, S32 x1, S32 y1, GDrawStats *stats) { F32 tc[4]; gswf_recti s; // clip to tile boundaries s.x0 = RR_MAX(x0, gdraw->tx0p); s.y0 = RR_MAX(y0, gdraw->ty0p); s.x1 = RR_MIN(x1, gdraw->tx0p + gdraw->tpw); s.y1 = RR_MIN(y1, gdraw->ty0p + gdraw->tph); if (s.x1 < s.x0 || s.y1 < s.y0) return; // prepare for drawing tc[0] = (s.x0 - gdraw->tx0p) / (F32) gdraw->frametex_width; tc[1] = (s.y0 - gdraw->ty0p) / (F32) gdraw->frametex_height; tc[2] = (s.x1 - gdraw->tx0p) / (F32) gdraw->frametex_width; tc[3] = (s.y1 - gdraw->ty0p) / (F32) gdraw->frametex_height; clear_renderstate(); if (r->blend_mode == GDRAW_BLEND_filter) { switch (r->filter) { case GDRAW_FILTER_blur: { GDrawBlurInfo b; gswf_recti bounds = *get_valid_rect(r->tex[0]); gdraw_ShiftRect(&s, &s, -gdraw->tx0p, -gdraw->ty0p); // blur uses physical rendertarget coordinates b.BlurPass = gdraw_DriverBlurPass; b.w = gdraw->tpw; b.h = gdraw->tph; b.frametex_width = gdraw->frametex_width; b.frametex_height = gdraw->frametex_height; // blur needs to draw with multiple passes, so set up special state gdraw->in_blur = true; set_viewport(); set_projection(); // do the blur gdraw_Blur(&gdraw_funcs, &b,r, &s, &bounds, stats); // restore the normal state gdraw->in_blur = false; set_viewport(); set_projection(); break; } case GDRAW_FILTER_colormatrix: gdraw_Colormatrix(r, &s, tc, stats); break; case GDRAW_FILTER_dropshadow: gdraw_Filter(r, &s, tc, 0, stats); break; case GDRAW_FILTER_bevel: gdraw_Filter(r, &s, tc, 1, stats); break; default: assert(0); } } else { // for crazy blend modes, we need to read back from the framebuffer // and do the blending in the pixel shader. because we render as // a RectList, no quad is ever shaded twice, so we can do this safely // while texturing from the render target that we're writing to. if (r->blend_mode == GDRAW_BLEND_special) { // before we texture from this RT, we need to eliminate fast clears. eliminate_fast_clear(); // input texture. slightly different logic between the main render // target and our scratch render targets, because they might have // different dimensions. Gnm::Texture rt_tex; if (gdraw->cur == gdraw->frame) { rt_tex.initFromRenderTarget(&gdraw->main_colorbuffer, false); rtt_sync(rt_tex.getBaseAddress(), gdraw->main_colorbuffer.getSizeInBytes() >> 8); } else { rt_tex = *gdraw->cur->color_buffer->handle.tex.gnm; rtt_sync(rt_tex.getBaseAddress(), gdraw->rt_colorbuffer_sa.m_size >> 8); } set_gnm_texture(1, &rt_tex, GDRAW_WRAP_clamp, 0); } set_renderstate(r, stats); do_screen_quad(&s, tc, stats); fence_resources(r->tex[0], r->tex[1]); } } //////////////////////////////////////////////////////////////////////// // // Shaders and state initialization // #include "gdraw_orbis_shaders.inl" static void init_shader(ShaderCode *shader, const ShaderCode *src) { *shader = *src; if (!shader->blob) return; Gnmx::ShaderFileHeader *shdr = (Gnmx::ShaderFileHeader *) shader->blob; shader->desc = shdr + 1; // grab gpu code and copy to arena const void *shader_code = (const U8 *) shader->desc + shdr->m_shaderHeaderSizeInDW * 4; void *gpu_ptr = gdraw_arena_alloc(&gdraw->vidshared_arena, shader->common->m_shaderSize, Gnm::kAlignmentOfShaderInBytes); memcpy(gpu_ptr, shader_code, shader->common->m_shaderSize); // patch the shader switch (shdr->m_type) { case Gnmx::kVertexShader: shader->vs->m_vsStageRegisters.m_spiShaderPgmHiVs = ~0u; shader->vs->patchShaderGpuAddress(gpu_ptr); break; case Gnmx::kPixelShader: shader->ps->m_psStageRegisters.m_spiShaderPgmHiPs = ~0u; shader->ps->patchShaderGpuAddress(gpu_ptr); break; case Gnmx::kComputeShader: shader->cs->m_csStageRegisters.m_computePgmHi = ~0u; shader->cs->patchShaderGpuAddress(gpu_ptr); break; default: assert(0); } } static void create_all_shaders_and_state() { // sampler state static const Gnm::WrapMode addrmodes[ASSERT_COUNT(GDRAW_WRAP__count, 4)] = { Gnm::kWrapModeClampLastTexel, // GDRAW_WRAP_clamp Gnm::kWrapModeWrap, // GDRAW_WRAP_repeat Gnm::kWrapModeMirror, // GDRAW_WRAP_mirror Gnm::kWrapModeClampBorder, // GDRAW_WRAP_clamp_to_border }; for (int nearest=0; nearest < 2; nearest++) for (int addr=0; addr < GDRAW_WRAP__count; addr++) { Gnm::Sampler *smp = &gdraw->sampler_state[nearest][addr]; smp->init(); smp->setWrapMode(addrmodes[addr], addrmodes[addr], addrmodes[addr]); smp->setBorderColor(Gnm::kBorderColorTransBlack); smp->setXyFilterMode(nearest ? Gnm::kFilterModePoint : Gnm::kFilterModeBilinear, Gnm::kFilterModeBilinear); smp->setMipFilterMode(Gnm::kMipFilterModeLinear); } // depth/stencil state for (int set_id=0; set_id < 2; set_id++) for (int test_id=0; test_id < 2; test_id++) for (int stencil_enable=0; stencil_enable < 2; stencil_enable++) { Gnm::DepthStencilControl *ctl = &gdraw->depth_stencil_control[set_id][test_id][stencil_enable]; ctl->init(); ctl->setDepthEnable(set_id || test_id); ctl->setDepthControl(set_id ? Gnm::kDepthControlZWriteEnable : Gnm::kDepthControlZWriteDisable, test_id ? Gnm::kCompareFuncLess : Gnm::kCompareFuncAlways); ctl->setStencilEnable(stencil_enable != 0); ctl->setStencilFunction(Gnm::kCompareFuncEqual); } // blend state static const struct { bool enable; Gnm::BlendMultiplier src; Gnm::BlendMultiplier dst; } blend_states[ASSERT_COUNT(GDRAW_BLEND__count, 6)] = { { false, Gnm::kBlendMultiplierOne, Gnm::kBlendMultiplierZero }, // GDRAW_BLEND_none { true, Gnm::kBlendMultiplierOne, Gnm::kBlendMultiplierOneMinusSrcAlpha }, // GDRAW_BLEND_alpha { true, Gnm::kBlendMultiplierDestColor, Gnm::kBlendMultiplierOneMinusSrcAlpha }, // GDRAW_BLEND_multiply { true, Gnm::kBlendMultiplierOne, Gnm::kBlendMultiplierOne }, // GDRAW_BLEND_add { false, Gnm::kBlendMultiplierOne, Gnm::kBlendMultiplierZero }, // GDRAW_BLEND_filter { false, Gnm::kBlendMultiplierOne, Gnm::kBlendMultiplierZero }, // GDRAW_BLEND_special }; for (int mode = 0; mode < GDRAW_BLEND__count; mode++) { Gnm::BlendControl *ctl = &gdraw->blend_control[mode]; ctl->init(); ctl->setBlendEnable(blend_states[mode].enable); ctl->setSeparateAlphaEnable(false); ctl->setColorEquation(blend_states[mode].src, Gnm::kBlendFuncAdd, blend_states[mode].dst); } // vertex shader init_shader(&gdraw->vs, vshader_vsps4_arr); // pixel shaders for (int i=0; i < GDRAW_TEXTURE__count*3; i++) init_shader(&gdraw->main_ps[0][i], pshader_basic_arr + i); for (int i=0; i < GDRAW_BLENDSPECIAL__count; i++) init_shader(&gdraw->exceptional_blend[i], pshader_exceptional_blend_arr + i); for (int i=0; i < 32; i++) init_shader(&gdraw->filter_ps[0][i], pshader_filter_arr + i); for (int i=0; i <= MAX_TAPS; i++) init_shader(&gdraw->blur_ps[i], pshader_blur_arr + i); init_shader(&gdraw->colormatrix, pshader_color_matrix_arr); init_shader(&gdraw->clear_ps, pshader_manual_clear_arr); for (int i=0; i < GDRAW_TEXTURE__count; i++) gdraw->basic_ps[i] = &gdraw->main_ps[i][0]; // compute shaders init_shader(&gdraw->texupload_cs, cshader_tex_upload_arr); init_shader(&gdraw->memset_cs, cshader_memset_arr); init_shader(&gdraw->defragment_cs, cshader_defragment_arr); init_shader(&gdraw->mipgen_cs, cshader_mipgen_arr); // vertex formats struct VAttrDesc { U32 offset; Gnm::DataFormat fmt; }; static const struct VFmtDesc { U32 stride; U32 num_attribs; VAttrDesc attribs[MAX_ATTRS]; } vformats[ASSERT_COUNT(GDRAW_vformat__basic_count, 3)] = { // GDRAW_vformat_v2 { 8, 2, { { 0, {{{ Gnm::kSurfaceFormat32_32, Gnm::kBufferChannelTypeFloat, Gnm::kBufferChannelX, Gnm::kBufferChannelY, Gnm::kBufferChannelConstant0, Gnm::kBufferChannelConstant1 }}} }, { 4, {{{ Gnm::kSurfaceFormat8_8_8_8, Gnm::kBufferChannelTypeUNorm, Gnm::kBufferChannelConstant0, Gnm::kBufferChannelConstant0, Gnm::kBufferChannelConstant1, Gnm::kBufferChannelConstant1 }}} }, } }, // GDRAW_vformat_v2aa { 16, 2, { { 0, {{{ Gnm::kSurfaceFormat32_32, Gnm::kBufferChannelTypeFloat, Gnm::kBufferChannelX, Gnm::kBufferChannelY, Gnm::kBufferChannelConstant0, Gnm::kBufferChannelConstant1 }}} }, { 8, {{{ Gnm::kSurfaceFormat16_16_16_16, Gnm::kBufferChannelTypeSScaled, Gnm::kBufferChannelX, Gnm::kBufferChannelY, Gnm::kBufferChannelZ, Gnm::kBufferChannelConstant0 }}} }, } }, // GDRAW_vformat_v2tc2 { 16, 2, { { 0, {{{ Gnm::kSurfaceFormat32_32, Gnm::kBufferChannelTypeFloat, Gnm::kBufferChannelX, Gnm::kBufferChannelY, Gnm::kBufferChannelConstant0, Gnm::kBufferChannelConstant1 }}} }, { 8, {{{ Gnm::kSurfaceFormat32_32, Gnm::kBufferChannelTypeFloat, Gnm::kBufferChannelX, Gnm::kBufferChannelY, Gnm::kBufferChannelConstant0, Gnm::kBufferChannelConstant1 }}} }, } }, }; for (int i=0; i < GDRAW_vformat__basic_count; i++) { gdraw->vfmt[i].stride = vformats[i].stride; gdraw->vfmt[i].num_attribs = vformats[i].num_attribs; for (U32 j=0; j < vformats[i].num_attribs; j++) { const VAttrDesc *desc = &vformats[i].attribs[j]; gdraw->vfmt[i].formats[j] = desc->fmt; gdraw->vfmt[i].vb_offs[j] = desc->offset; } } // zero "pixel common" constant buffer PixelCommonVars *pvars = (PixelCommonVars *) gdraw_arena_alloc(&gdraw->vidshared_arena, sizeof(PixelCommonVars), Gnm::kAlignmentOfBufferInBytes); memset(pvars, 0, sizeof(*pvars)); gdraw->pixel_common_zero_cbuf.initAsConstantBuffer(pvars, sizeof(*pvars)); } typedef struct { S32 num_handles; S32 num_bytes; void *ptr; } GDrawResourceLimit; // Resource limits used by GDraw. Change these using SetResouceLimits! static GDrawResourceLimit gdraw_limits[GDRAW_ORBIS_RESOURCE__count]; static GDrawHandleCache *make_handle_cache(gdraw_orbis_resourcetype type, U32 align) { S32 num_handles = gdraw_limits[type].num_handles; S32 num_bytes = gdraw_limits[type].num_bytes; U32 cache_size = sizeof(GDrawHandleCache) + (num_handles - 1) * sizeof(GDrawHandle); bool is_vertex = (type == GDRAW_ORBIS_RESOURCE_vertexbuffer); U32 header_size = num_handles * (is_vertex ? 0 : sizeof(Gnm::Texture)); GDrawHandleCache *cache = (GDrawHandleCache *) IggyGDrawMalloc(cache_size + header_size); if (cache) { gdraw_HandleCacheInit(cache, num_handles, num_bytes); cache->is_vertex = is_vertex; // set up resource headers void *header_start = (U8 *) cache + cache_size; if (!is_vertex) { Gnm::Texture *headers = (Gnm::Texture *) header_start; for (S32 i=0; i < num_handles; i++) cache->handle[i].handle.tex.gnm = &headers[i]; } // set up allocator cache->alloc = gfxalloc_create(gdraw_limits[type].ptr, num_bytes, align, num_handles); if (!cache->alloc) { IggyGDrawFree(cache); cache = NULL; } } return cache; } static void free_handle_cache(GDrawHandleCache *c) { if (c) { if (c->alloc) IggyGDrawFree(c->alloc); IggyGDrawFree(c); } } int gdraw_orbis_SetResourceMemory(gdraw_orbis_resourcetype type, S32 num_handles, void *ptr, S32 num_bytes) { GDrawStats stats={0}; assert(type >= GDRAW_ORBIS_RESOURCE_rendertarget && type < GDRAW_ORBIS_RESOURCE__count); assert(num_handles >= 0); assert(num_bytes >= 0); if (!num_handles) num_handles = 1; switch (type) { case GDRAW_ORBIS_RESOURCE_texture: make_pool_aligned(&ptr, &num_bytes, GDRAW_ORBIS_TEXTURE_ALIGNMENT); break; case GDRAW_ORBIS_RESOURCE_vertexbuffer: make_pool_aligned(&ptr, &num_bytes, Gnm::kAlignmentOfBufferInBytes); break; default: break; } gdraw_limits[type].num_handles = num_handles; gdraw_limits[type].num_bytes = num_bytes; gdraw_limits[type].ptr = ptr; // if no gdraw context created, there's nothing to worry about if (!gdraw) return 1; // make sure GPU is done first (assuming we're in a state where we can dispatch commands) assert(!is_fence_pending(gdraw->tile_end_fence)); // you may not call this while GPU is still busy with Iggy command buffers! if (gdraw->texturecache) gdraw_res_reap(gdraw->texturecache, &stats); if (gdraw->vbufcache) gdraw_res_reap(gdraw->vbufcache, &stats); // in theory we can now check that the given cache is really empty at this point // resize the appropriate pool switch (type) { case GDRAW_ORBIS_RESOURCE_rendertarget: gdraw_HandleCacheInit(&gdraw->rendertargets, MAX_RENDER_STACK_DEPTH + 1, num_bytes); for (int i=0; i < MAX_RENDER_STACK_DEPTH + 1; i++) gdraw->rendertargets.handle[i].handle.tex.gnm = &gdraw->rendertarget_textures[i]; gdraw_arena_init(&gdraw->rt_arena, ptr, num_bytes); return 1; case GDRAW_ORBIS_RESOURCE_texture: free_handle_cache(gdraw->texturecache); gdraw->texturecache = make_handle_cache(GDRAW_ORBIS_RESOURCE_texture, GDRAW_ORBIS_TEXTURE_ALIGNMENT); return gdraw->texturecache != NULL; case GDRAW_ORBIS_RESOURCE_vertexbuffer: free_handle_cache(gdraw->vbufcache); gdraw->vbufcache = make_handle_cache(GDRAW_ORBIS_RESOURCE_vertexbuffer, GDRAW_ORBIS_VERTEXBUFFER_ALIGNMENT); return gdraw->vbufcache != NULL; default: return 0; } } void gdraw_orbis_ResetAllResourceMemory() { gdraw_orbis_SetResourceMemory(GDRAW_ORBIS_RESOURCE_rendertarget, 0, NULL, 0); gdraw_orbis_SetResourceMemory(GDRAW_ORBIS_RESOURCE_texture, 0, NULL, 0); gdraw_orbis_SetResourceMemory(GDRAW_ORBIS_RESOURCE_vertexbuffer, 0, NULL, 0); } GDrawFunctions *gdraw_orbis_CreateContext(S32 w, S32 h, void *context_shared_mem) { U32 cpram_shadow_size = Gnmx::ConstantUpdateEngine::computeCpRamShadowSize(); gdraw = (GDraw *) IggyGDrawMalloc(sizeof(*gdraw) + cpram_shadow_size); if (!gdraw) return NULL; memset(gdraw, 0, sizeof(*gdraw)); // context shared memory gdraw_arena_init(&gdraw->vidshared_arena, context_shared_mem, GDRAW_ORBIS_CONTEXT_MEM_SIZE); // labels gdraw->label_ptr = (volatile U64 *) gdraw_arena_alloc(&gdraw->vidshared_arena, sizeof(U64), sizeof(U64)); *gdraw->label_ptr = 0; gdraw->next_fence_index = 1; gdraw->tile_end_fence.value = 0; // set up memory for all resource types for (int i=0; i < GDRAW_ORBIS_RESOURCE__count; i++) gdraw_orbis_SetResourceMemory((gdraw_orbis_resourcetype) i, gdraw_limits[i].num_handles, gdraw_limits[i].ptr, gdraw_limits[i].num_bytes); // initialize render target texture desc gdraw->frametex_width = w; gdraw->frametex_height = h; Gnm::DataFormat rtFormat = Gnm::kDataFormatR8G8B8A8Unorm; Gnm::TileMode tileMode; GpuAddress::computeSurfaceTileMode(&tileMode, GpuAddress::kSurfaceTypeRwTextureFlat, rtFormat, 1); gdraw->rt_colorbuffer_sa = gdraw->rt_colorbuffer.init(gdraw->frametex_width, gdraw->frametex_height, 1, rtFormat, tileMode, Gnm::kNumSamples1, Gnm::kNumFragments1, NULL, NULL); gdraw->rt_colorbuffer.setCmaskFastClearEnable(false); // shaders and state create_all_shaders_and_state(); // API gdraw_funcs.SetViewSizeAndWorldScale = gdraw_SetViewSizeAndWorldScale; gdraw_funcs.GetInfo = gdraw_GetInfo; gdraw_funcs.DescribeTexture = gdraw_DescribeTexture; gdraw_funcs.DescribeVertexBuffer = gdraw_DescribeVertexBuffer; gdraw_funcs.RenderingBegin = gdraw_RenderingBegin; gdraw_funcs.RenderingEnd = gdraw_RenderingEnd; gdraw_funcs.RenderTileBegin = gdraw_RenderTileBegin; gdraw_funcs.RenderTileEnd = gdraw_RenderTileEnd; gdraw_funcs.TextureDrawBufferBegin = gdraw_TextureDrawBufferBegin; gdraw_funcs.TextureDrawBufferEnd = gdraw_TextureDrawBufferEnd; gdraw_funcs.DrawIndexedTriangles = gdraw_DrawIndexedTriangles; gdraw_funcs.FilterQuad = gdraw_FilterQuad; gdraw_funcs.SetAntialiasTexture = gdraw_SetAntialiasTexture; gdraw_funcs.ClearStencilBits = gdraw_ClearStencilBits; gdraw_funcs.ClearID = gdraw_ClearID; gdraw_funcs.MakeTextureBegin = gdraw_MakeTextureBegin; gdraw_funcs.MakeTextureMore = gdraw_MakeTextureMore; gdraw_funcs.MakeTextureEnd = gdraw_MakeTextureEnd; gdraw_funcs.UpdateTextureBegin = gdraw_UpdateTextureBegin; gdraw_funcs.UpdateTextureRect = gdraw_UpdateTextureRect; gdraw_funcs.UpdateTextureEnd = gdraw_UpdateTextureEnd; gdraw_funcs.FreeTexture = gdraw_FreeTexture; gdraw_funcs.TryToLockTexture = gdraw_TryToLockTexture; gdraw_funcs.MakeVertexBufferBegin = gdraw_MakeVertexBufferBegin; gdraw_funcs.MakeVertexBufferMore = gdraw_MakeVertexBufferMore; gdraw_funcs.MakeVertexBufferEnd = gdraw_MakeVertexBufferEnd; gdraw_funcs.TryToLockVertexBuffer = gdraw_TryLockVertexBuffer; gdraw_funcs.FreeVertexBuffer = gdraw_FreeVertexBuffer; gdraw_funcs.MakeTextureFromResource = (gdraw_make_texture_from_resource *) gdraw_orbis_MakeTextureFromResource; gdraw_funcs.FreeTextureFromResource = gdraw_orbis_DestroyTextureFromResource; gdraw_funcs.UnlockHandles = gdraw_UnlockHandles; gdraw_funcs.SetTextureUniqueID = gdraw_SetTextureUniqueID; return &gdraw_funcs; } void gdraw_orbis_DestroyContext(void) { if (gdraw) { GDrawStats stats; memset(&stats, 0, sizeof(stats)); if (gdraw->texturecache) gdraw_res_flush(gdraw->texturecache, &stats); if (gdraw->vbufcache) gdraw_res_flush(gdraw->vbufcache, &stats); // make sure the GPU is done first assert(!is_fence_pending(gdraw->tile_end_fence)); free_handle_cache(gdraw->texturecache); free_handle_cache(gdraw->vbufcache); IggyGDrawFree(gdraw); gdraw = NULL; } } void RADLINK gdraw_orbis_BeginCustomDraw(IggyCustomDrawCallbackRegion *region, float matrix[16]) { clear_renderstate(); gdraw_GetObjectSpaceMatrix(matrix, region->o2w, gdraw->projection, 0.0f, 0); } void RADLINK gdraw_orbis_CalculateCustomDraw_4J(IggyCustomDrawCallbackRegion * region, F32 mat[16]) { gdraw_GetObjectSpaceMatrix(mat, region->o2w, gdraw->projection, 0.0f, 0); } void RADLINK gdraw_orbis_EndCustomDraw(IggyCustomDrawCallbackRegion *region) { set_common_renderstate(); } GDrawTexture * RADLINK gdraw_orbis_MakeTextureFromResource(U8 *file_in_memory, S32 len, IggyFileTexturePS4 *tex) { Gnm::Texture *texture = (Gnm::Texture *) &tex->texture; texture->setBaseAddress(file_in_memory + tex->file_offset); texture->m_regs[7] = 0; switch (tex->format) { case IFT_FORMAT_la_88: texture->setChannelOrder(Gnm::kTextureChannelX, Gnm::kTextureChannelX, Gnm::kTextureChannelX, Gnm::kTextureChannelY); break; case IFT_FORMAT_i_8: texture->setChannelOrder(Gnm::kTextureChannelX, Gnm::kTextureChannelX, Gnm::kTextureChannelX, Gnm::kTextureChannelX); break; case IFT_FORMAT_l_8: texture->setChannelOrder(Gnm::kTextureChannelX, Gnm::kTextureChannelX, Gnm::kTextureChannelX, Gnm::kTextureChannelConstant1); break; } return gdraw_orbis_WrappedTextureCreate(texture); } extern void RADLINK gdraw_orbis_DestroyTextureFromResource(GDrawTexture *tex) { gdraw_orbis_WrappedTextureDestroy(tex); } // 4J added - copy of set_viewport_raw that sets an opengl style z-range rather than the direct-x range used in set_viewport_raw static void set_viewport_raw_4J(S32 x, S32 y, S32 w, S32 h) { // check against hardware limits assert(w >= 0 && w <= 16384); assert(h >= 0 && h <= 16384); gdraw->cview.x0 = x; gdraw->cview.y0 = y; gdraw->cview.x1 = x + w; gdraw->cview.y1 = y + h; F32 scale[3] = { (F32)w * 0.5f, -(F32)h * 0.5f, 0.5f }; F32 offset[3] = { (F32)x + (F32)w * 0.5f, (F32)y + (F32)h * 0.5f, 0.5f }; gdraw->gfxc->setViewport(0, 0.0f, 1.0f, scale, offset); disable_scissor(true); } // 4J added - copy of setViewport, that sets the current viewport but with an opengl-style z-range rather than the direct-x range that Iggy uses internally // on PS4. We need this to set up a viewport to match Iggy when doing custom rendering void gdraw_orbis_setViewport_4J() { if (gdraw->in_blur) { // blur needs special setup set_viewport_raw_4J(0, 0, gdraw->tpw, gdraw->tph); return; } if (gdraw->cur == gdraw->frame) // if the rendering stack is empty // render a tile-sized region to the user-request tile location set_viewport_raw_4J(gdraw->vx, gdraw->vy, gdraw->tw, gdraw->th); else if (gdraw->cur->cached) set_viewport_raw_4J(0, 0, gdraw->cur->width, gdraw->cur->height); else // if on the render stack, draw a padded-tile-sized region at the origin set_viewport_raw_4J(0, 0, gdraw->tpw, gdraw->tph); }