2496 lines
85 KiB
C++
2496 lines
85 KiB
C++
#include "stdafx.h"
|
|
// gdraw_orbis.cpp - author: Fabian Giesen - copyright 2012 RAD Game Tools
|
|
//
|
|
// This implements the Iggy graphics driver layer for Orbis.
|
|
|
|
// GDraw consists of several components that interact fairly loosely with each other;
|
|
// e.g. the resource management, drawing and filtering parts are all fairly independent
|
|
// of each other. If you want to modify some aspect of GDraw - say the texture allocation
|
|
// logic - your best bet is usually to just look for one of the related entry points,
|
|
// e.g. MakeTextureBegin, and take it from there. There's a bunch of code in this file,
|
|
// but most of it isn't really complicated. The bits that are somewhat tricky have a more
|
|
// detailed explanation at the top of the relevant section.
|
|
|
|
#include <kernel.h>
|
|
#include <gnm.h>
|
|
#include <gnmx.h>
|
|
#include "iggy.h"
|
|
#include "gdraw.h"
|
|
#include <math.h>
|
|
#include <kernel.h>
|
|
#include <xmmintrin.h>
|
|
|
|
#include "gdraw_orbis.h"
|
|
|
|
using namespace sce;
|
|
|
|
typedef union {
|
|
struct {
|
|
Gnm::Texture *gnm;
|
|
void *gnm_ptr;
|
|
} tex;
|
|
|
|
struct {
|
|
void *verts;
|
|
void *inds;
|
|
} vbuf;
|
|
} GDrawNativeHandle;
|
|
|
|
#define GDRAW_MANAGE_MEM
|
|
#define GDRAW_DEFRAGMENT
|
|
#define GDRAW_NO_STREAMING_MIPGEN
|
|
#define GDRAW_MIN_FREE_AMOUNT (64*1024) // always try to free at least this many bytes when throwing out old textures
|
|
#define GDRAW_MAYBE_UNUSED __attribute__((unused))
|
|
#include "gdraw_shared.inl"
|
|
|
|
// max rendertarget stack depth. this depends on the extent to which you
|
|
// use filters and non-standard blend modes, and how nested they are.
|
|
#define MAX_RENDER_STACK_DEPTH 8 // Iggy is hardcoded to a limit of 16... probably 1-3 is realistic!
|
|
#define MAX_SAMPLERS 3
|
|
#define MAX_ATTRS 2 // max number of attrs read by a vertex shader
|
|
#define AATEX_SAMPLER 7 // sampler that aa_tex gets set in
|
|
|
|
#define ASSERT_COUNT(a,b) ((a) == (b) ? (b) : -1)
|
|
|
|
#define MAX_TEXTURE2D_DIM 16384 // from GPU walkthrough
|
|
#define MAX_AATEX_WIDTH 64
|
|
|
|
static GDrawFunctions gdraw_funcs;
|
|
|
|
// render target state
|
|
typedef struct
|
|
{
|
|
GDrawHandle *color_buffer;
|
|
S32 base_x, base_y, width, height;
|
|
rrbool cached;
|
|
rrbool needs_clear_eliminate;
|
|
U32 clear_col[2];
|
|
} GDrawFramebufferState;
|
|
|
|
struct ShaderCode
|
|
{
|
|
void *blob;
|
|
union
|
|
{
|
|
void *desc;
|
|
Gnmx::ShaderCommonData *common;
|
|
Gnmx::PsShader *ps;
|
|
Gnmx::VsShader *vs;
|
|
Gnmx::CsShader *cs;
|
|
};
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// GDraw data structure
|
|
//
|
|
//
|
|
// This is the primary rendering abstraction, which hides all
|
|
// the platform-specific rendering behavior from Iggy. It is
|
|
// full of platform-specific graphics state, and also general
|
|
// graphics state so that it doesn't have to callback into Iggy
|
|
// to get at that graphics state.
|
|
|
|
struct GDraw
|
|
{
|
|
// 16-byte aligned!
|
|
F32 projection[4]; // always 2D scale+2D translate. first two are scale, last two are translate.
|
|
|
|
// scale factor converting worldspace to viewspace <0,0>..<w,h>
|
|
F32 world_to_pixel[2];
|
|
|
|
// graphics context
|
|
Gnmx::GfxContext *gfxc;
|
|
|
|
// cached state
|
|
U32 scissor_state; // ~0 if unknown, otherwise 0 or 1
|
|
int blend_mode; // active blend mode (-1 if unknown)
|
|
int writes_masked; // are color writes masked or not? (-1 if unknown)
|
|
U32 z_stencil_key; // field built from z/stencil test flags. 0 = no z/stencil test, ~0 is used for "unknown state"
|
|
|
|
GDrawTexture *active_tex[MAX_SAMPLERS];
|
|
ShaderCode *cur_ps;
|
|
|
|
// pixel shader base pointers
|
|
ShaderCode *basic_ps[GDRAW_TEXTURE__count];
|
|
|
|
// render targets
|
|
Gnm::RenderTarget main_colorbuffer;
|
|
Gnm::DepthRenderTarget main_depthbuffer;
|
|
GDrawHandleCache rendertargets;
|
|
GDrawHandle rendertarget_handles[MAX_RENDER_STACK_DEPTH]; // not -1, because we use +1 to initialize
|
|
Gnm::Texture rendertarget_textures[MAX_RENDER_STACK_DEPTH+1];
|
|
|
|
gswf_recti rt_valid[MAX_RENDER_STACK_DEPTH+1]; // valid rect for texture clamping
|
|
|
|
// size of our render targets
|
|
S32 frametex_width, frametex_height;
|
|
|
|
// viewport setting (in pixels) for the current tile
|
|
S32 vx, vy;
|
|
S32 fw, fh; // full width/height of virtual display
|
|
S32 tw, th; // actual width/height of current tile
|
|
S32 tpw, tph; // width/height of padded version of tile
|
|
|
|
S32 tx0, ty0;
|
|
S32 tx0p, ty0p;
|
|
|
|
rrbool in_blur;
|
|
struct {
|
|
S32 x0, y0, x1, y1;
|
|
} cview; // current viewport
|
|
|
|
Gnm::Texture aa_tex;
|
|
Gnm::Buffer pixel_common_zero_cbuf;
|
|
GDrawArena vidshared_arena; // mainly for shaders
|
|
|
|
// synchronization
|
|
volatile U64 *label_ptr;
|
|
U64 next_fence_index;
|
|
|
|
// render target stack described above for 'temporary' rendering
|
|
GDrawFramebufferState frame[MAX_RENDER_STACK_DEPTH];
|
|
GDrawFramebufferState *cur;
|
|
|
|
// texture and vertex buffer pools
|
|
GDrawHandleCache *texturecache;
|
|
GDrawHandleCache *vbufcache;
|
|
|
|
// render target storage
|
|
Gnm::RenderTarget rt_colorbuffer;
|
|
Gnm::SizeAlign rt_colorbuffer_sa;
|
|
GDrawArena rt_arena;
|
|
|
|
// staging buffer
|
|
GDrawArena staging;
|
|
gdraw_orbis_staging_stats staging_stats;
|
|
|
|
// upload temp texture
|
|
Gnm::Texture upload_tex;
|
|
|
|
// precompiled state
|
|
Gnm::Sampler sampler_state[2][GDRAW_WRAP__count]; // [nearest][wrap]
|
|
Gnm::DepthStencilControl depth_stencil_control[2][2][2]; // [set_id][test_id][stencil_enable]
|
|
Gnm::BlendControl blend_control[GDRAW_BLEND__count];
|
|
|
|
// pixel shaders
|
|
ShaderCode main_ps[GDRAW_TEXTURE__count][3];
|
|
ShaderCode exceptional_blend[GDRAW_BLENDSPECIAL__count];
|
|
ShaderCode filter_ps[2][16];
|
|
ShaderCode blur_ps[MAX_TAPS+1];
|
|
ShaderCode colormatrix;
|
|
ShaderCode clear_ps;
|
|
|
|
// compute shaders
|
|
ShaderCode texupload_cs;
|
|
ShaderCode memset_cs;
|
|
ShaderCode defragment_cs;
|
|
ShaderCode mipgen_cs;
|
|
|
|
// vertex formats
|
|
struct VFormatDesc {
|
|
U32 stride;
|
|
U32 num_attribs;
|
|
Gnm::DataFormat formats[MAX_ATTRS];
|
|
U32 vb_offs[MAX_ATTRS];
|
|
};
|
|
VFormatDesc vfmt[GDRAW_vformat__count];
|
|
|
|
// vertex shader
|
|
ShaderCode vs;
|
|
|
|
// for bookkeeping
|
|
GDrawFence tile_end_fence;
|
|
|
|
// antialias texture upload cache
|
|
bool aatex_new;
|
|
U8 aatex_data[MAX_AATEX_WIDTH * 4];
|
|
};
|
|
|
|
static GDraw *gdraw;
|
|
static const F32 four_zeros[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Synchronization, pointer wrangling and command buffer management
|
|
//
|
|
|
|
static RADINLINE GDrawFence get_next_fence()
|
|
{
|
|
GDrawFence fence;
|
|
fence.value = gdraw->next_fence_index;
|
|
return fence;
|
|
}
|
|
|
|
static RADINLINE rrbool is_fence_pending(GDrawFence fence)
|
|
{
|
|
return gdraw->label_ptr[0] < fence.value;
|
|
}
|
|
|
|
static GDrawFence put_fence()
|
|
{
|
|
GDrawFence fence = { gdraw->next_fence_index++ };
|
|
gdraw->gfxc->writeImmediateAtEndOfPipe(Gnm::kEopFlushCbDbCaches, (void *)gdraw->label_ptr, fence.value, Gnm::kCacheActionNone);
|
|
return fence;
|
|
}
|
|
|
|
static void *insert_cb_label();
|
|
|
|
static void wait_on_fence(GDrawFence fence)
|
|
{
|
|
// we don't actually wait here, current Orbis GDraw memory management is
|
|
// non-blocking by design. but the resource manager emits these when it's
|
|
// about to free (and then reuse) a resource that was previously in use,
|
|
// so when we get the call, make sure to finish shading first, because
|
|
// the following commands are likely to stomp over (part of) the resource
|
|
// pools.
|
|
if (is_fence_pending(fence)) {
|
|
void *label = insert_cb_label();
|
|
gdraw->gfxc->writeAtEndOfShader(Gnm::kEosPsDone, label, 1);
|
|
gdraw->gfxc->waitOnAddress(label, ~0, Gnm::kWaitCompareFuncEqual, 1);
|
|
}
|
|
}
|
|
|
|
extern "C" void gdraw_ps4_wait(U64)
|
|
{
|
|
// stub for Iggy - ignored.
|
|
}
|
|
|
|
static void *insert_cb_label()
|
|
{
|
|
U64 *label = (U64 *) gdraw->gfxc->allocateFromCommandBuffer(sizeof(U64), Gnm::kEmbeddedDataAlignment8);
|
|
*label = 0;
|
|
return label;
|
|
}
|
|
|
|
// compute->compute sync (just wait for previous dispatch to finish)
|
|
static void compute_to_compute_sync()
|
|
{
|
|
Gnmx::GfxContext *gfxc = gdraw->gfxc;
|
|
|
|
void *label = insert_cb_label();
|
|
gfxc->writeAtEndOfShader(Gnm::kEosCsDone, label, 1);
|
|
gfxc->waitOnAddress(label, ~0, Gnm::kWaitCompareFuncEqual, 1);
|
|
}
|
|
|
|
// compute->graphics sync
|
|
static void compute_to_graphics_sync()
|
|
{
|
|
compute_to_compute_sync();
|
|
// compute writes made it to L2 but not all CU L1s, so need to wipe L1 before we go on.
|
|
gdraw->gfxc->flushShaderCachesAndWait(Gnm::kCacheActionInvalidateL1, 0, Gnm::kStallCommandBufferParserDisable);
|
|
}
|
|
|
|
// render-to-texture sync
|
|
static RADINLINE void rtt_sync(void *base, U32 size256)
|
|
{
|
|
UINTa addr = (UINTa) base;
|
|
assert((addr & 0xff) == 0);
|
|
U32 base256 = (U32) (addr >> 8);
|
|
gdraw->gfxc->waitForGraphicsWrites(base256, size256,
|
|
Gnm::kWaitTargetSlotCb0,
|
|
Gnm::kCacheActionWriteBackAndInvalidateL1andL2,
|
|
Gnm::kExtendedCacheActionFlushAndInvalidateCbCache,
|
|
Gnm::kStallCommandBufferParserDisable);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Texture/vertex memory defragmentation support code
|
|
//
|
|
|
|
static void gdraw_gpu_memcpy(GDrawHandleCache *c, void *dst, void *src, U32 num_bytes)
|
|
{
|
|
Gnmx::GfxContext *gfxc = gdraw->gfxc;
|
|
Gnm::Buffer src_buf, dst_buf;
|
|
U32 num_16b = (num_bytes + 15) / 16;
|
|
|
|
src_buf.initAsDataBuffer(src, Gnm::kDataFormatR32G32B32A32Uint, num_16b);
|
|
dst_buf.initAsDataBuffer(dst, Gnm::kDataFormatR32G32B32A32Uint, num_16b);
|
|
src_buf.setResourceMemoryType(Gnm::kResourceMemoryTypeGC);
|
|
dst_buf.setResourceMemoryType(Gnm::kResourceMemoryTypeGC);
|
|
|
|
gfxc->setBuffers(Gnm::kShaderStageCs, 0, 1, &src_buf);
|
|
gfxc->setRwBuffers(Gnm::kShaderStageCs, 0, 1, &dst_buf);
|
|
gfxc->dispatch(1, 1, 1);
|
|
|
|
// need to sync before the next one can start (because of potential overlaps)
|
|
// no need to flush L1 because defragment doesn't read any data it just wrote.
|
|
compute_to_compute_sync();
|
|
}
|
|
|
|
static void gdraw_defragment_cache(GDrawHandleCache *c, GDrawStats *stats)
|
|
{
|
|
if (!gdraw_CanDefragment(c))
|
|
return;
|
|
|
|
gdraw->gfxc->setShaderType(Gnm::kShaderTypeCompute);
|
|
gdraw->gfxc->setCsShader(gdraw->defragment_cs.cs);
|
|
|
|
// actual defragmentation...
|
|
gdraw_DefragmentMain(c, GDRAW_DEFRAGMENT_may_overlap, stats);
|
|
|
|
// go over all handles and adjust pointers.
|
|
// pointer adjustment is different between textures and vertex buffers
|
|
if (!c->is_vertex) {
|
|
for (S32 i=0; i < c->max_handles; i++) {
|
|
GDrawHandle *h = &c->handle[i];
|
|
if (gdraw_res_is_managed(h)) {
|
|
Gnm::Texture *tex = h->handle.tex.gnm;
|
|
tex->setBaseAddress(h->raw_ptr);
|
|
}
|
|
}
|
|
} else {
|
|
for (S32 i=0; i < c->max_handles; i++) {
|
|
GDrawHandle *h = &c->handle[i];
|
|
if (gdraw_res_is_managed(h)) {
|
|
SINTa index_offs = (U8 *)h->handle.vbuf.inds - (U8 *)h->handle.vbuf.verts;
|
|
h->handle.vbuf.verts = h->raw_ptr;
|
|
h->handle.vbuf.inds = (U8 *)h->raw_ptr + index_offs;
|
|
}
|
|
}
|
|
}
|
|
|
|
// synchronize
|
|
compute_to_graphics_sync();
|
|
gdraw->gfxc->setCsShader(NULL);
|
|
gdraw->gfxc->setShaderType(Gnm::kShaderTypeGraphics);
|
|
|
|
// don't need to wait till GPU is done since we never access GPU memory from the
|
|
// CPU, we always go through the staging buffer.
|
|
}
|
|
|
|
static void api_free_resource(GDrawHandle *r)
|
|
{
|
|
if (!r->cache->is_vertex) {
|
|
for (S32 i=0; i < MAX_SAMPLERS; i++)
|
|
if (gdraw->active_tex[i] == (GDrawTexture *) r)
|
|
gdraw->active_tex[i] = NULL;
|
|
}
|
|
}
|
|
|
|
static void RADLINK gdraw_UnlockHandles(GDrawStats *stats)
|
|
{
|
|
gdraw_HandleCacheUnlockAll(gdraw->texturecache);
|
|
gdraw_HandleCacheUnlockAll(gdraw->vbufcache);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Various helpers
|
|
//
|
|
|
|
struct TexUploadPara
|
|
{
|
|
U32 dest_pos[2];
|
|
U32 size[2];
|
|
};
|
|
|
|
struct MipGenPara
|
|
{
|
|
U32 sizeMinus1[2];
|
|
};
|
|
|
|
static bool can_staging_satisfy(U32 size, U32 align)
|
|
{
|
|
// determine aligned start addr
|
|
UINTa start_addr = ((UINTa) gdraw->staging.current + align-1) & ~((UINTa) align - 1);
|
|
U8 *ptr = (U8 *) start_addr;
|
|
return ptr + size <= gdraw->staging.end;
|
|
}
|
|
|
|
static void track_staging_alloc_attempt(U32 size, U32 align)
|
|
{
|
|
// TODO: include alignment bytes in tracking!
|
|
gdraw->staging_stats.allocs_attempted++;
|
|
gdraw->staging_stats.bytes_attempted += size;
|
|
gdraw->staging_stats.largest_bytes_attempted = RR_MAX(gdraw->staging_stats.largest_bytes_attempted, size);
|
|
}
|
|
|
|
static void track_staging_alloc_failed()
|
|
{
|
|
if (gdraw->staging_stats.allocs_attempted == gdraw->staging_stats.allocs_succeeded + 1) { // warn the first time we run out of mem
|
|
IggyGDrawSendWarning(NULL, "GDraw out of staging memory");
|
|
}
|
|
}
|
|
|
|
static void *alloc_staging(U32 size, U32 align)
|
|
{
|
|
track_staging_alloc_attempt(size, align);
|
|
|
|
void *ptr = gdraw_arena_alloc(&gdraw->staging, size, align);
|
|
if (ptr) {
|
|
gdraw->staging_stats.allocs_succeeded++;
|
|
gdraw->staging_stats.bytes_succeeded += size;
|
|
gdraw->staging_stats.largest_bytes_succeeded = RR_MAX(gdraw->staging_stats.largest_bytes_succeeded, size);
|
|
} else
|
|
track_staging_alloc_failed();
|
|
|
|
return ptr;
|
|
}
|
|
|
|
static void *embed_const_buffer_impl(Gnm::ShaderStage stage, U32 index, U32 size)
|
|
{
|
|
Gnm::Buffer buf;
|
|
void *ptr = gdraw->gfxc->allocateFromCommandBuffer(size, Gnm::kEmbeddedDataAlignment4);
|
|
buf.initAsConstantBuffer(ptr, size);
|
|
gdraw->gfxc->setConstantBuffers(stage, index, 1, &buf);
|
|
return ptr;
|
|
}
|
|
|
|
template<typename T>
|
|
static RADINLINE T *embed_const_buffer(Gnm::ShaderStage stage, U32 index)
|
|
{
|
|
return (T *) embed_const_buffer_impl(stage, index, sizeof(T));
|
|
}
|
|
|
|
static void upload_begin()
|
|
{
|
|
gdraw->gfxc->setShaderType(Gnm::kShaderTypeCompute);
|
|
gdraw->gfxc->setCsShader(gdraw->texupload_cs.cs);
|
|
}
|
|
|
|
static void upload_end()
|
|
{
|
|
// synchronize
|
|
compute_to_graphics_sync();
|
|
gdraw->gfxc->setShaderType(Gnm::kShaderTypeGraphics);
|
|
}
|
|
|
|
static void upload_dispatch(const Gnm::Texture &dst_tex, const Gnm::Texture &src_tex, U32 dst_x, U32 dst_y, U32 ncols, U32 nrows)
|
|
{
|
|
auto para = embed_const_buffer<TexUploadPara>(Gnm::kShaderStageCs, 0);
|
|
para->dest_pos[0] = dst_x;
|
|
para->dest_pos[1] = dst_y;
|
|
para->size[0] = ncols;
|
|
para->size[1] = nrows;
|
|
|
|
Gnmx::GfxContext *gfxc = gdraw->gfxc;
|
|
gfxc->setTextures(Gnm::kShaderStageCs, 0, 1, &src_tex);
|
|
gfxc->setRwTextures(Gnm::kShaderStageCs, 0, 1, &dst_tex);
|
|
gfxc->dispatch((ncols + 7) / 8, (nrows + 7) / 8, 1);
|
|
}
|
|
|
|
static void upload_tex_subrect(Gnm::Texture *dst, U32 dst_mip, U32 dst_x, U32 dst_y, const void *src, U32 pitch, U32 ncols, U32 nrows, U32 bpp)
|
|
{
|
|
// set up texture descriptions
|
|
Gnm::Texture dst_tex = *dst;
|
|
dst_tex.setMipLevelRange(dst_mip, dst_mip);
|
|
dst_tex.setResourceMemoryType(Gnm::kResourceMemoryTypeGC);
|
|
|
|
Gnm::Texture src_tex;
|
|
Gnm::SizeAlign sa = src_tex.initAs2d(ncols, nrows, 1, dst_tex.getDataFormat(), Gnm::kTileModeDisplay_LinearAligned, Gnm::kNumSamples1);
|
|
|
|
// alloc staging buffer for source texture
|
|
U8 *staging = (U8 *) alloc_staging(sa.m_size, sa.m_align);
|
|
if (!staging) // if out of staging mem, bail
|
|
return;
|
|
|
|
src_tex.setBaseAddress(staging);
|
|
src_tex.setResourceMemoryType(Gnm::kResourceMemoryTypeGC);
|
|
|
|
// copy source data into staging buffer
|
|
U32 staging_pitch_bytes = src_tex.getPitch() * bpp;
|
|
U32 copy_bytes = ncols * bpp;
|
|
for (U32 y=0; y < nrows; y++)
|
|
memcpy(staging + y * staging_pitch_bytes, (const U8 *) src + y * pitch, copy_bytes);
|
|
|
|
upload_dispatch(dst_tex, src_tex, dst_x, dst_y, ncols, nrows);
|
|
}
|
|
|
|
static void gpu_compute_memset(void *ptr, U32 value, U32 size_in_bytes)
|
|
{
|
|
assert((size_in_bytes & 3) == 0);
|
|
|
|
Gnmx::GfxContext *gfxc = gdraw->gfxc;
|
|
gfxc->setShaderType(Gnm::kShaderTypeCompute);
|
|
gfxc->setCsShader(gdraw->memset_cs.cs);
|
|
|
|
auto para = embed_const_buffer<U32>(Gnm::kShaderStageCs, 0);
|
|
*para = value;
|
|
|
|
// we tag resources as SC (system coherent); i.e. no L1, write to L2,
|
|
// tag as volatile so we can flush it cheaply.
|
|
|
|
Gnm::Buffer buf;
|
|
if (size_in_bytes >= 16) {
|
|
U32 nelem = size_in_bytes / 16;
|
|
|
|
buf.initAsDataBuffer(ptr, Gnm::kDataFormatR32G32B32A32Uint, nelem);
|
|
buf.setResourceMemoryType(Gnm::kResourceMemoryTypeSC);
|
|
gfxc->setRwBuffers(Gnm::kShaderStageCs, 0, 1, &buf);
|
|
gfxc->dispatch((nelem + 255) / 256, 1, 1);
|
|
}
|
|
|
|
if (size_in_bytes & 0xf) {
|
|
buf.initAsDataBuffer((U8 *)ptr + (size_in_bytes & ~0xf), Gnm::kDataFormatR32Uint, (size_in_bytes & 0xf) / 4);
|
|
buf.setResourceMemoryType(Gnm::kResourceMemoryTypeSC);
|
|
gfxc->setRwBuffers(Gnm::kShaderStageCs, 0, 1, &buf);
|
|
gfxc->dispatch(1, 1, 1);
|
|
}
|
|
|
|
compute_to_compute_sync();
|
|
|
|
// write back L2 volatile here to make sure the values reach memory.
|
|
// this is all necessary because this function is used to clear HTile
|
|
// buffers which are written by the CB/DB blocks and not accessed
|
|
// through the regular caches.
|
|
gfxc->flushShaderCachesAndWait(Gnm::kCacheActionWriteBackL2Volatile, 0, Gnm::kStallCommandBufferParserDisable);
|
|
gfxc->setShaderType(Gnm::kShaderTypeGraphics);
|
|
gfxc->setCsShader(NULL);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Texture creation/updating/deletion
|
|
//
|
|
|
|
GDrawTexture * RADLINK gdraw_orbis_WrappedTextureCreate(Gnm::Texture *tex)
|
|
{
|
|
GDrawStats stats = {};
|
|
GDrawHandle *p = gdraw_res_alloc_begin(gdraw->texturecache, 0, &stats);
|
|
p->handle.tex.gnm_ptr = NULL;
|
|
gdraw_HandleCacheAllocateEnd(p, 0, NULL, GDRAW_HANDLE_STATE_user_owned);
|
|
gdraw_orbis_WrappedTextureChange((GDrawTexture *) p, tex);
|
|
return (GDrawTexture *) p;
|
|
}
|
|
|
|
void RADLINK gdraw_orbis_WrappedTextureChange(GDrawTexture *handle, Gnm::Texture *tex)
|
|
{
|
|
GDrawHandle *p = (GDrawHandle *) handle;
|
|
*p->handle.tex.gnm = *tex;
|
|
}
|
|
|
|
void RADLINK gdraw_orbis_WrappedTextureDestroy(GDrawTexture *handle)
|
|
{
|
|
// 4J Changed
|
|
sce::Gnmx::GfxContext *oldContext = gdraw->gfxc;
|
|
gdraw->gfxc = RenderManager.GetCurrentBackBufferContext();
|
|
GDrawStats stats = {};
|
|
gdraw_res_free((GDrawHandle *) handle, &stats);
|
|
gdraw->gfxc = oldContext;
|
|
}
|
|
|
|
static void RADLINK gdraw_SetTextureUniqueID(GDrawTexture *tex, void *old_id, void *new_id)
|
|
{
|
|
GDrawHandle *p = (GDrawHandle *) tex;
|
|
// if this is still the handle it's thought to be, change the owner;
|
|
// if the owner *doesn't* match, then they're changing a stale handle, so ignore
|
|
if (p->owner == old_id)
|
|
p->owner = new_id;
|
|
}
|
|
|
|
static rrbool RADLINK gdraw_MakeTextureBegin(void *owner, S32 width, S32 height, gdraw_texture_format gformat, U32 flags, GDraw_MakeTexture_ProcessingInfo *p, GDrawStats *stats)
|
|
{
|
|
S32 bytes_pixel = 4;
|
|
GDrawHandle *t = NULL;
|
|
Gnm::Texture gt;
|
|
Gnm::SizeAlign sa;
|
|
|
|
Gnm::DataFormat format = Gnm::kDataFormatR8G8B8A8Unorm;
|
|
if (width > MAX_TEXTURE2D_DIM || height > MAX_TEXTURE2D_DIM) {
|
|
IggyGDrawSendWarning(NULL, "GDraw %d x %d texture not supported by hardware (dimension limit %d)", width, height, MAX_TEXTURE2D_DIM);
|
|
return false;
|
|
}
|
|
|
|
if (gformat == GDRAW_TEXTURE_FORMAT_font) {
|
|
format = Gnm::kDataFormatR8Unorm;
|
|
bytes_pixel = 1;
|
|
}
|
|
|
|
// don't do mipmaps for gradients!
|
|
if (height == 1)
|
|
flags &= ~GDRAW_MAKETEXTURE_FLAGS_mipmap;
|
|
|
|
// determine the number of mipmaps to use and size of resulting surface
|
|
U32 mipmaps = 0;
|
|
do
|
|
mipmaps++;
|
|
while ((flags & GDRAW_MAKETEXTURE_FLAGS_mipmap) && ((width >> mipmaps) || (height >> mipmaps)));
|
|
|
|
sa = gt.initAs2d(width, height, mipmaps, format, (height == 1) ? Gnm::kTileModeDisplay_LinearAligned : Gnm::kTileModeThin_1dThin, Gnm::kNumSamples1);
|
|
|
|
if (gformat == GDRAW_TEXTURE_FORMAT_font) // we want an A8 not R8 texture!
|
|
gt.setChannelOrder(Gnm::kTextureChannelX, Gnm::kTextureChannelX, Gnm::kTextureChannelX, Gnm::kTextureChannelX);
|
|
|
|
// Make sure we actually satisfy alignment requirements
|
|
assert(sa.m_align <= GDRAW_ORBIS_TEXTURE_ALIGNMENT);
|
|
|
|
// Determine space requirements for the upload texture and check if there's enough space
|
|
// do this before gdraw_res_alloc_begin so we don't start freeing resources to make space
|
|
// only to later discover that we can't proceed due to lack of staging mem anyway.
|
|
Gnm::SizeAlign sa_up = gdraw->upload_tex.initAs2d(width, height, 1, format, Gnm::kTileModeDisplay_LinearAligned, Gnm::kNumSamples1);
|
|
if (!can_staging_satisfy(sa_up.m_size, sa_up.m_align)) {
|
|
track_staging_alloc_attempt(sa_up.m_size, sa_up.m_align);
|
|
track_staging_alloc_failed();
|
|
return false;
|
|
}
|
|
|
|
// allocate a handle and make room in the cache for this much data
|
|
U32 size = sa.m_size;
|
|
t = gdraw_res_alloc_begin(gdraw->texturecache, size, stats);
|
|
if (!t)
|
|
return false;
|
|
|
|
t->handle.tex.gnm_ptr = t->raw_ptr;
|
|
gt.setBaseAddress(t->raw_ptr);
|
|
*t->handle.tex.gnm = gt;
|
|
|
|
// allocate staging texture (we checked that there was enough space earlier)
|
|
void *upload_ptr = alloc_staging(sa_up.m_size, sa_up.m_align);
|
|
if (!upload_ptr) {
|
|
// not supposed to happen - we checked there was enough space earlier!
|
|
// but if we ever get here, be sure to handle it properly anyway.
|
|
assert(0);
|
|
gdraw_HandleCacheAllocateFail(t);
|
|
return false;
|
|
}
|
|
gdraw->upload_tex.setBaseAddress(upload_ptr);
|
|
|
|
gdraw_HandleCacheAllocateEnd(t, size, owner, (flags & GDRAW_MAKETEXTURE_FLAGS_never_flush) ? GDRAW_HANDLE_STATE_pinned : GDRAW_HANDLE_STATE_locked);
|
|
stats->nonzero_flags |= GDRAW_STATS_alloc_tex;
|
|
stats->alloc_tex += 1;
|
|
stats->alloc_tex_bytes += size;
|
|
|
|
p->texture_type = GDRAW_TEXTURE_TYPE_rgba;
|
|
p->p0 = t;
|
|
p->texture_data = (U8 *) upload_ptr;
|
|
p->num_rows = height; // just send the whole texture at once
|
|
p->stride_in_bytes = gdraw->upload_tex.getPitch() * bytes_pixel;
|
|
|
|
return true;
|
|
}
|
|
|
|
static rrbool RADLINK gdraw_MakeTextureMore(GDraw_MakeTexture_ProcessingInfo *p)
|
|
{
|
|
return false; // we always let the user write the full texture on the first try
|
|
}
|
|
|
|
static GDrawTexture * RADLINK gdraw_MakeTextureEnd(GDraw_MakeTexture_ProcessingInfo *p, GDrawStats *stats)
|
|
{
|
|
GDrawHandle *t = (GDrawHandle *) p->p0;
|
|
Gnm::Texture *gnm_tex = t->handle.tex.gnm;
|
|
Gnmx::GfxContext *gfxc = gdraw->gfxc;
|
|
U32 width = gnm_tex->getWidth();
|
|
U32 height = gnm_tex->getHeight();
|
|
|
|
// upload the mip data
|
|
upload_begin();
|
|
|
|
Gnm::Texture dst_tex = *gnm_tex;
|
|
dst_tex.setResourceMemoryType(Gnm::kResourceMemoryTypeGC);
|
|
Gnm::Texture src_tex = dst_tex;
|
|
dst_tex.setMipLevelRange(0, 0);
|
|
upload_dispatch(dst_tex, gdraw->upload_tex, 0, 0, width, height);
|
|
|
|
upload_end();
|
|
|
|
// compute the mip maps
|
|
gfxc->setShaderType(Gnm::kShaderTypeCompute);
|
|
gfxc->setCsShader(gdraw->mipgen_cs.cs);
|
|
|
|
for (U32 mip=1; mip <= gnm_tex->getLastMipLevel(); mip++) {
|
|
U32 mipw = RR_MAX(gnm_tex->getWidth() >> mip, 1);
|
|
U32 miph = RR_MAX(gnm_tex->getHeight() >> mip, 1);
|
|
|
|
src_tex.setMipLevelRange(mip - 1, mip - 1);
|
|
dst_tex.setMipLevelRange(mip, mip);
|
|
|
|
auto para = embed_const_buffer<MipGenPara>(Gnm::kShaderStageCs, 0);
|
|
para->sizeMinus1[0] = RR_MAX(gnm_tex->getWidth() >> (mip - 1), 1) - 1;
|
|
para->sizeMinus1[1] = RR_MAX(gnm_tex->getHeight() >> (mip - 1), 1) - 1;
|
|
|
|
gfxc->setTextures(Gnm::kShaderStageCs, 0, 1, &src_tex);
|
|
gfxc->setRwTextures(Gnm::kShaderStageCs, 0, 1, &dst_tex);
|
|
gfxc->dispatch((mipw + 7) / 8, (miph + 7) / 8, 1);
|
|
if (mip < gnm_tex->getLastMipLevel())
|
|
compute_to_compute_sync();
|
|
else
|
|
compute_to_graphics_sync();
|
|
}
|
|
|
|
gfxc->setShaderType(Gnm::kShaderTypeGraphics);
|
|
|
|
return (GDrawTexture *) p->p0;
|
|
}
|
|
|
|
static rrbool RADLINK gdraw_UpdateTextureBegin(GDrawTexture *t, void *unique_id, GDrawStats *stats)
|
|
{
|
|
if (gdraw_HandleCacheLock((GDrawHandle *) t, unique_id)) {
|
|
upload_begin();
|
|
return true;
|
|
} else
|
|
return false;
|
|
}
|
|
|
|
static void RADLINK gdraw_UpdateTextureRect(GDrawTexture *t, void *unique_id, S32 x, S32 y, S32 stride, S32 w, S32 h, U8 *samples, gdraw_texture_format format)
|
|
{
|
|
GDrawHandle *s = (GDrawHandle *) t;
|
|
Gnm::Texture *tex = s->handle.tex.gnm;
|
|
U32 bpp = (format == GDRAW_TEXTURE_FORMAT_font) ? 1 : 4;
|
|
|
|
upload_tex_subrect(tex, 0, x, y, samples, stride, w, h, bpp);
|
|
}
|
|
|
|
static void RADLINK gdraw_UpdateTextureEnd(GDrawTexture *t, void *unique_id, GDrawStats *stats)
|
|
{
|
|
GDrawHandle *s = (GDrawHandle *) t;
|
|
upload_end();
|
|
gdraw_HandleCacheUnlock(s);
|
|
}
|
|
|
|
static void RADLINK gdraw_FreeTexture(GDrawTexture *tt, void *unique_id, GDrawStats *stats)
|
|
{
|
|
GDrawHandle *t = (GDrawHandle *) tt;
|
|
assert(t != NULL);
|
|
if (t->owner == unique_id || unique_id == NULL) {
|
|
if (t->cache == &gdraw->rendertargets) {
|
|
gdraw_HandleCacheUnlock(t);
|
|
// cache it by simply not freeing it
|
|
return;
|
|
}
|
|
|
|
gdraw_res_kill(t, stats);
|
|
}
|
|
}
|
|
|
|
static rrbool RADLINK gdraw_TryToLockTexture(GDrawTexture *t, void *unique_id, GDrawStats *stats)
|
|
{
|
|
return gdraw_HandleCacheLock((GDrawHandle *) t, unique_id);
|
|
}
|
|
|
|
static void RADLINK gdraw_DescribeTexture(GDrawTexture *tex, GDraw_Texture_Description *desc)
|
|
{
|
|
GDrawHandle *p = (GDrawHandle *) tex;
|
|
desc->width = p->handle.tex.gnm->getWidth();
|
|
desc->height = p->handle.tex.gnm->getHeight();
|
|
desc->size_in_bytes = p->bytes;
|
|
}
|
|
|
|
static void antialias_tex_upload()
|
|
{
|
|
if (!gdraw->aatex_new || !gdraw->gfxc)
|
|
return;
|
|
|
|
U32 width = gdraw->aa_tex.getWidth();
|
|
|
|
upload_begin();
|
|
upload_tex_subrect(&gdraw->aa_tex, 0, 0, 0, gdraw->aatex_data, width*4, width, 1, 4);
|
|
upload_end();
|
|
gdraw->aatex_new = false;
|
|
}
|
|
|
|
static void RADLINK gdraw_SetAntialiasTexture(S32 width, U8 *rgba)
|
|
{
|
|
if (gdraw->aa_tex.isTexture())
|
|
return;
|
|
|
|
Gnm::SizeAlign sa = gdraw->aa_tex.initAs2d(width, 1, 1, Gnm::kDataFormatR8G8B8A8Unorm, Gnm::kTileModeDisplay_LinearAligned, Gnm::kNumSamples1);
|
|
void *ptr = gdraw_arena_alloc(&gdraw->vidshared_arena, sa.m_size, sa.m_align);
|
|
if (!ptr)
|
|
return;
|
|
gdraw->aa_tex.setBaseAddress(ptr);
|
|
|
|
assert(width <= MAX_AATEX_WIDTH);
|
|
memcpy(gdraw->aatex_data, rgba, width*4);
|
|
gdraw->aatex_new = true;
|
|
|
|
antialias_tex_upload();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Vertex buffer creation/deletion
|
|
//
|
|
|
|
static rrbool RADLINK gdraw_MakeVertexBufferBegin(void *unique_id, gdraw_vformat vformat, S32 vbuf_size, S32 ibuf_size, GDraw_MakeVertexBuffer_ProcessingInfo *p, GDrawStats *stats)
|
|
{
|
|
GDrawHandle *vb;
|
|
vb = gdraw_res_alloc_begin(gdraw->vbufcache, vbuf_size + ibuf_size, stats);
|
|
if (!vb)
|
|
return false;
|
|
|
|
vb->handle.vbuf.verts = vb->raw_ptr;
|
|
vb->handle.vbuf.inds = (U8 *) vb->raw_ptr + vbuf_size;
|
|
|
|
p->p0 = vb;
|
|
p->vertex_data_length = vbuf_size;
|
|
p->index_data_length = ibuf_size;
|
|
|
|
// need to go through staging buffer for uploads
|
|
p->p1 = alloc_staging(vbuf_size + ibuf_size, Gnm::kAlignmentOfBufferInBytes);
|
|
if (!p->p1) {
|
|
gdraw_HandleCacheAllocateFail(vb);
|
|
return false;
|
|
}
|
|
|
|
p->vertex_data = (U8 *) p->p1;
|
|
p->index_data = (U8 *) p->p1 + vbuf_size;
|
|
p->i0 = vbuf_size + ibuf_size;
|
|
|
|
gdraw_HandleCacheAllocateEnd(vb, vbuf_size + ibuf_size, unique_id, GDRAW_HANDLE_STATE_locked);
|
|
return true;
|
|
}
|
|
|
|
static rrbool RADLINK gdraw_MakeVertexBufferMore(GDraw_MakeVertexBuffer_ProcessingInfo *p)
|
|
{
|
|
assert(0);
|
|
return false;
|
|
}
|
|
|
|
static GDrawVertexBuffer * RADLINK gdraw_MakeVertexBufferEnd(GDraw_MakeVertexBuffer_ProcessingInfo *p, GDrawStats *stats)
|
|
{
|
|
GDrawHandle *vb = (GDrawHandle *) p->p0;
|
|
|
|
// DMA from staging buffer to actual target address.
|
|
gdraw->gfxc->copyData(vb->raw_ptr, p->p1, p->i0, Gnm::kDmaDataBlockingEnable);
|
|
|
|
// Flush shader L1 & L2 so we can safely use the updated VB
|
|
// need to stall parsing of the command buffer because if the next
|
|
// command is drawing this vertex buffer, PS4 gpu will prefetch
|
|
// the index data, and since we haven't flushed yet, it can fetch
|
|
// bogus index data
|
|
gdraw->gfxc->flushShaderCachesAndWait(Gnm::kCacheActionWriteBackAndInvalidateL1andL2, 0, Gnm::kStallCommandBufferParserEnable);
|
|
|
|
return (GDrawVertexBuffer *) vb;
|
|
}
|
|
|
|
static rrbool RADLINK gdraw_TryLockVertexBuffer(GDrawVertexBuffer *vb, void *unique_id, GDrawStats *stats)
|
|
{
|
|
return gdraw_HandleCacheLock((GDrawHandle *) vb, unique_id);
|
|
}
|
|
|
|
static void RADLINK gdraw_FreeVertexBuffer(GDrawVertexBuffer *vb, void *unique_id, GDrawStats *stats)
|
|
{
|
|
GDrawHandle *h = (GDrawHandle *) vb;
|
|
assert(h != NULL); // @GDRAW_ASSERT
|
|
if (h->owner == unique_id)
|
|
gdraw_res_kill(h, stats);
|
|
}
|
|
|
|
static void RADLINK gdraw_DescribeVertexBuffer(GDrawVertexBuffer *vbuf, GDraw_VertexBuffer_Description *desc)
|
|
{
|
|
GDrawHandle *p = (GDrawHandle *) vbuf;
|
|
desc->size_in_bytes = p->bytes;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Create/free (or cache) framebuffer-sized textures
|
|
//
|
|
|
|
static GDrawHandle *get_color_rendertarget(GDrawStats *stats)
|
|
{
|
|
GDrawHandle *t;
|
|
|
|
t = gdraw_HandleCacheGetLRU(&gdraw->rendertargets);
|
|
if (t) {
|
|
gdraw_HandleCacheLock(t, (void *) 1);
|
|
return t;
|
|
}
|
|
|
|
t = gdraw_HandleCacheAllocateBegin(&gdraw->rendertargets);
|
|
if (!t) {
|
|
IggyGDrawSendWarning(NULL, "GDraw rendertarget allocation failed: hit handle limit");
|
|
return t;
|
|
}
|
|
|
|
U8 *ptr = (U8 *)gdraw_arena_alloc(&gdraw->rt_arena, gdraw->rt_colorbuffer_sa.m_size, gdraw->rt_colorbuffer_sa.m_align);
|
|
if (!ptr) {
|
|
IggyGDrawSendWarning(NULL, "GDraw rendertarget allocation failed: out of rendertarget texture memory");
|
|
gdraw_HandleCacheAllocateFail(t);
|
|
return NULL;
|
|
}
|
|
|
|
t->fence = get_next_fence();
|
|
t->raw_ptr = NULL;
|
|
|
|
t->handle.tex.gnm_ptr = ptr;
|
|
t->handle.tex.gnm->initFromRenderTarget(&gdraw->rt_colorbuffer, false);
|
|
t->handle.tex.gnm->setBaseAddress(ptr);
|
|
|
|
gdraw_HandleCacheAllocateEnd(t, gdraw->rt_colorbuffer_sa.m_size, (void *) 1, GDRAW_HANDLE_STATE_locked);
|
|
return t;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Constant buffer layouts
|
|
//
|
|
|
|
struct VertexVars
|
|
{
|
|
F32 world[2][4];
|
|
F32 x_off[4];
|
|
F32 texgen_s[4];
|
|
F32 texgen_t[4];
|
|
F32 viewproj[4];
|
|
};
|
|
|
|
struct PixelCommonVars
|
|
{
|
|
F32 color_mul[4];
|
|
F32 color_add[4];
|
|
F32 focal[4];
|
|
};
|
|
|
|
struct PixelParaFilter
|
|
{
|
|
F32 clamp0[4], clamp1[4];
|
|
F32 color[4], color2[4];
|
|
F32 tc_off[4];
|
|
};
|
|
|
|
struct PixelParaBlur
|
|
{
|
|
F32 clamp[4];
|
|
F32 tap[9][4];
|
|
};
|
|
|
|
struct PixelParaColorMatrix
|
|
{
|
|
F32 data[5][4];
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Rendering helpers
|
|
//
|
|
|
|
static void set_gnm_texture(U32 unit, Gnm::Texture *tex, U32 wrap, U32 nearest)
|
|
{
|
|
assert(wrap < GDRAW_WRAP__count);
|
|
assert(nearest < 2);
|
|
|
|
Gnmx::GfxContext *gfxc = gdraw->gfxc;
|
|
gfxc->setSamplers(Gnm::kShaderStagePs, unit, 1, &gdraw->sampler_state[nearest][wrap]);
|
|
gfxc->setTextures(Gnm::kShaderStagePs, unit, 1, tex);
|
|
}
|
|
|
|
static inline void disable_scissor(bool force)
|
|
{
|
|
if (force || gdraw->scissor_state) {
|
|
// set whole viewport as scissor rect
|
|
gdraw->scissor_state = 0;
|
|
gdraw->gfxc->setScreenScissor(gdraw->cview.x0, gdraw->cview.y0, gdraw->cview.x1, gdraw->cview.y1);
|
|
}
|
|
}
|
|
|
|
static void set_viewport_raw(S32 x, S32 y, S32 w, S32 h)
|
|
{
|
|
// check against hardware limits
|
|
assert(w >= 0 && w <= 16384);
|
|
assert(h >= 0 && h <= 16384);
|
|
|
|
gdraw->cview.x0 = x;
|
|
gdraw->cview.y0 = y;
|
|
gdraw->cview.x1 = x + w;
|
|
gdraw->cview.y1 = y + h;
|
|
|
|
F32 scale[3] = { (F32)w * 0.5f, -(F32)h * 0.5f, 1.0f };
|
|
F32 offset[3] = { (F32)x + (F32)w * 0.5f, (F32)y + (F32)h * 0.5f, 0.0f };
|
|
gdraw->gfxc->setViewport(0, 0.0f, 1.0f, scale, offset);
|
|
disable_scissor(true);
|
|
}
|
|
|
|
static void set_projection_raw(S32 x0, S32 x1, S32 y0, S32 y1)
|
|
{
|
|
gdraw->projection[0] = 2.0f / (x1-x0);
|
|
gdraw->projection[1] = 2.0f / (y1-y0);
|
|
gdraw->projection[2] = (x1 + x0) / (F32) (x0 - x1);
|
|
gdraw->projection[3] = (y1 + y0) / (F32) (y0 - y1);
|
|
}
|
|
|
|
static void set_viewport()
|
|
{
|
|
if (gdraw->in_blur) { // blur needs special setup
|
|
set_viewport_raw(0, 0, gdraw->tpw, gdraw->tph);
|
|
return;
|
|
}
|
|
|
|
if (gdraw->cur == gdraw->frame) // if the rendering stack is empty
|
|
// render a tile-sized region to the user-request tile location
|
|
set_viewport_raw(gdraw->vx, gdraw->vy, gdraw->tw, gdraw->th);
|
|
else if (gdraw->cur->cached)
|
|
set_viewport_raw(0, 0, gdraw->cur->width, gdraw->cur->height);
|
|
else
|
|
// if on the render stack, draw a padded-tile-sized region at the origin
|
|
set_viewport_raw(0, 0, gdraw->tpw, gdraw->tph);
|
|
}
|
|
|
|
static void set_projection()
|
|
{
|
|
if (gdraw->in_blur) { // blur needs special setup
|
|
set_projection_raw(0, gdraw->tpw, gdraw->tph, 0);
|
|
return;
|
|
}
|
|
|
|
if (gdraw->cur == gdraw->frame) // if the render stack is empty
|
|
set_projection_raw(gdraw->tx0,gdraw->tx0+gdraw->tw,gdraw->ty0+gdraw->th,gdraw->ty0);
|
|
else if (gdraw->cur->cached)
|
|
set_projection_raw(gdraw->cur->base_x, gdraw->cur->base_x + gdraw->cur->width, gdraw->cur->base_y + gdraw->cur->height, gdraw->cur->base_y);
|
|
else
|
|
set_projection_raw(gdraw->tx0p,gdraw->tx0p+gdraw->tpw,gdraw->ty0p+gdraw->tph,gdraw->ty0p);
|
|
}
|
|
|
|
static void set_render_target()
|
|
{
|
|
Gnmx::GfxContext *gfxc = gdraw->gfxc;
|
|
if (GDrawHandle *color = gdraw->cur->color_buffer) {
|
|
gdraw->rt_colorbuffer.setBaseAddress256ByteBlocks(color->handle.tex.gnm->getBaseAddress256ByteBlocks());
|
|
gfxc->setRenderTarget(0, &gdraw->rt_colorbuffer);
|
|
} else
|
|
gfxc->setRenderTarget(0, &gdraw->main_colorbuffer);
|
|
|
|
gfxc->setDepthRenderTarget(&gdraw->main_depthbuffer);
|
|
gfxc->setCmaskClearColor(0, gdraw->cur->clear_col);
|
|
}
|
|
|
|
static void clear_renderstate()
|
|
{
|
|
Gnmx::GfxContext *gfxc = gdraw->gfxc;
|
|
gfxc->setDepthStencilControl(gdraw->depth_stencil_control[0][0][0]);
|
|
gfxc->setRenderTargetMask(0xf);
|
|
gfxc->setBlendControl(0, gdraw->blend_control[GDRAW_BLEND_none]);
|
|
|
|
disable_scissor(false);
|
|
gdraw->z_stencil_key = 0;
|
|
gdraw->writes_masked = 0;
|
|
gdraw->blend_mode = GDRAW_BLEND_none;
|
|
}
|
|
|
|
static void set_common_renderstate()
|
|
{
|
|
Gnmx::GfxContext *gfxc = gdraw->gfxc;
|
|
|
|
// clear our state caching
|
|
memset(gdraw->active_tex, 0, sizeof(gdraw->active_tex));
|
|
gdraw->cur_ps = NULL;
|
|
gdraw->scissor_state = ~0u;
|
|
gdraw->blend_mode = -1;
|
|
|
|
// all the state we won't touch again until we're done rendering
|
|
Gnm::ClipControl clip_control;
|
|
clip_control.init();
|
|
clip_control.setClipSpace(Gnm::kClipControlClipSpaceDX);
|
|
gfxc->setClipControl(clip_control);
|
|
gfxc->setShaderType(Gnm::kShaderTypeGraphics);
|
|
gfxc->setIndexSize(Gnm::kIndexSize16);
|
|
gfxc->setStencilClearValue(0);
|
|
gfxc->setDepthClearValue(1.0f);
|
|
gfxc->setVsShader(gdraw->vs.vs, 0, (void*)0);
|
|
|
|
Gnm::StencilOpControl stencil_op;
|
|
stencil_op.init();
|
|
stencil_op.setStencilOps(Gnm::kStencilOpKeep, Gnm::kStencilOpReplaceTest, Gnm::kStencilOpKeep);
|
|
gfxc->setStencilOpControl(stencil_op);
|
|
|
|
Gnm::ViewportTransformControl vt_control;
|
|
vt_control.init();
|
|
vt_control.setPassThroughEnable(false);
|
|
gfxc->setViewportTransformControl(vt_control);
|
|
|
|
// set up guard band and hardware screen offset once
|
|
// we know ahead of time which viewports we're going to set:
|
|
// * our viewport top/left corner is always >= (0,0)
|
|
// * viewport bottom/right is <= size of the largest render target
|
|
S32 min_x = 0;
|
|
S32 min_y = 0;
|
|
S32 max_x = RR_MAX(gdraw->main_colorbuffer.getWidth(), gdraw->rt_colorbuffer.getWidth());
|
|
S32 max_y = RR_MAX(gdraw->main_colorbuffer.getHeight(), gdraw->rt_colorbuffer.getHeight());
|
|
|
|
F32 offs_x = (F32) (min_x + max_x) * 0.5f;
|
|
F32 offs_y = (F32) (min_y + max_y) * 0.5f;
|
|
F32 abs_scale_x = (F32) (max_x - min_x) * 0.5f;
|
|
F32 abs_scale_y = (F32) (max_y - min_y) * 0.5f;
|
|
|
|
// set up guard band offset so we're centered around our viewport region
|
|
// hardware offset must be a multiple of 16 pixels
|
|
S32 hw_offset_x = (S32)floorf(offs_x/16.0f + 0.5f) * 16;
|
|
S32 hw_offset_y = (S32)floorf(offs_y/16.0f + 0.5f) * 16;
|
|
gfxc->setHardwareScreenOffset(hw_offset_x >> 4, hw_offset_y >> 4);
|
|
|
|
// set up guard band clip and discard distances
|
|
// NB both the values for hw_min and hw_max are slightly smaller than the actual min/max
|
|
// (by about 1/256th) to keep a bit of a safety margin for FP round-off error
|
|
F32 hw_min = -(F32)(0xff<<16) / (F32)(1<<8);
|
|
F32 hw_max = (F32)(0xff<<16) / (F32)(1<<8);
|
|
F32 gb_max_x = RR_MIN(hw_max - abs_scale_x - offs_x + hw_offset_x, -abs_scale_x + offs_x - hw_offset_x - hw_min);
|
|
F32 gb_max_y = RR_MIN(hw_max - abs_scale_y - offs_y + hw_offset_y, -abs_scale_y + offs_y - hw_offset_y - hw_min);
|
|
gfxc->setGuardBandClip(gb_max_x / abs_scale_x, gb_max_y / abs_scale_y);
|
|
gfxc->setGuardBandDiscard(1.0f, 1.0f);
|
|
|
|
assert(gdraw->aa_tex.isTexture()); // if this triggers, your initialization sequence is wrong.
|
|
set_gnm_texture(AATEX_SAMPLER, &gdraw->aa_tex, GDRAW_WRAP_clamp, 0);
|
|
|
|
// states we modify during regular rendering
|
|
clear_renderstate();
|
|
set_render_target();
|
|
set_viewport();
|
|
set_projection();
|
|
}
|
|
|
|
static void set_pixel_shader(ShaderCode *ps);
|
|
static void do_screen_quad(gswf_recti *s, const F32 *tc, GDrawStats *stats);
|
|
|
|
static void render_clear_quad(gswf_recti *r, GDrawStats *stats)
|
|
{
|
|
set_pixel_shader(&gdraw->clear_ps);
|
|
do_screen_quad(r, four_zeros, stats);
|
|
|
|
stats->nonzero_flags |= GDRAW_STATS_clears;
|
|
stats->num_clears++;
|
|
stats->cleared_pixels += (r->x1 - r->x0) * (r->y1 - r->y0);
|
|
}
|
|
|
|
static void manual_clear_color(S32 x, S32 y, S32 w, S32 h, GDrawStats *stats)
|
|
{
|
|
clear_renderstate();
|
|
set_viewport_raw(0, 0, gdraw->frametex_width, gdraw->frametex_height);
|
|
set_projection_raw(0, gdraw->frametex_width, gdraw->frametex_height, 0);
|
|
|
|
gswf_recti r = { x, y, x+w, y+h };
|
|
gdraw->gfxc->setConstantBuffers(Gnm::kShaderStagePs, 0, 1, &gdraw->pixel_common_zero_cbuf);
|
|
render_clear_quad(&r, stats);
|
|
}
|
|
|
|
static void clear_whole_zs(bool clear_depth, bool clear_stencil, GDrawStats *stats)
|
|
{
|
|
Gnm::DepthRenderTarget &depthbuf = gdraw->main_depthbuffer;
|
|
|
|
// to clear both depth and stencil, we can just set up the metadata in HTile with a
|
|
// compute shader if a) there's a HTile to begin with and b) it contains stencil metadata.
|
|
// if no stencil info in HTile, clearing HTile manually is a net perf loss.
|
|
if (clear_depth && clear_stencil && depthbuf.getHtileAccelerationEnable() && !depthbuf.getHtileStencilDisable()) {
|
|
gdraw->gfxc->triggerEvent(Gnm::kEventTypeFlushAndInvalidateDbMeta);
|
|
gpu_compute_memset(depthbuf.getHtileAddress(), 0xfffc00f0, depthbuf.getHtileSizeInBytes());
|
|
} else {
|
|
S32 w = gdraw->main_depthbuffer.getWidth();
|
|
S32 h = gdraw->main_depthbuffer.getHeight();
|
|
|
|
Gnmx::GfxContext *gfxc = gdraw->gfxc;
|
|
Gnm::DbRenderControl db_control;
|
|
db_control.init();
|
|
db_control.setDepthClearEnable(clear_depth);
|
|
db_control.setStencilClearEnable(clear_stencil);
|
|
gfxc->setDbRenderControl(db_control);
|
|
|
|
Gnm::DepthStencilControl ds_control;
|
|
ds_control.init();
|
|
ds_control.setDepthControl(clear_depth ? Gnm::kDepthControlZWriteEnable : Gnm::kDepthControlZWriteDisable, Gnm::kCompareFuncAlways);
|
|
ds_control.setStencilFunction(Gnm::kCompareFuncAlways);
|
|
ds_control.setDepthEnable(clear_depth);
|
|
ds_control.setStencilEnable(clear_stencil);
|
|
gfxc->setDepthStencilControl(ds_control);
|
|
|
|
Gnm::StencilControl st_control;
|
|
st_control.m_testVal = 255;
|
|
st_control.m_mask = 255;
|
|
st_control.m_writeMask = 255;
|
|
st_control.m_opVal = 0;
|
|
gfxc->setStencil(st_control);
|
|
|
|
set_viewport_raw(0, 0, w, h);
|
|
set_projection_raw(0, w, h, 0);
|
|
gfxc->setRenderTargetMask(0);
|
|
|
|
gswf_recti r = { 0, 0, w, h };
|
|
gfxc->setConstantBuffers(Gnm::kShaderStagePs, 0, 1, &gdraw->pixel_common_zero_cbuf);
|
|
render_clear_quad(&r, stats);
|
|
|
|
db_control.init();
|
|
gfxc->setDbRenderControl(db_control);
|
|
clear_renderstate();
|
|
set_viewport();
|
|
set_projection();
|
|
}
|
|
}
|
|
|
|
static void eliminate_fast_clear()
|
|
{
|
|
if (!gdraw->cur->needs_clear_eliminate)
|
|
return;
|
|
|
|
clear_renderstate();
|
|
|
|
Gnmx::GfxContext *gfxc = gdraw->gfxc;
|
|
gfxc->triggerEvent(Gnm::kEventTypeFlushAndInvalidateCbMeta); // flush CMask data
|
|
|
|
gswf_recti r;
|
|
if (gdraw->cur != gdraw->frame) {
|
|
r.x0 = gdraw->cur->base_x;
|
|
r.y0 = gdraw->cur->base_y;
|
|
r.x1 = r.x0 + gdraw->cur->width;
|
|
r.y1 = r.y0 + gdraw->cur->height;
|
|
} else {
|
|
r.x0 = 0;
|
|
r.y0 = 0;
|
|
r.x1 = gdraw->main_colorbuffer.getWidth();
|
|
r.y1 = gdraw->main_colorbuffer.getHeight();
|
|
}
|
|
|
|
gfxc->setCbControl(Gnm::kCbModeEliminateFastClear, Gnm::kRasterOpSrcCopy);
|
|
gfxc->setPsShader(NULL);
|
|
set_viewport_raw(r.x0, r.y0, r.x1 - r.x0, r.y1 - r.y0);
|
|
set_projection_raw(r.x0, r.x1, r.y1, r.y0);
|
|
GDrawStats stats = {}; // we already counted these clears once, so don't add to main stats
|
|
gfxc->setConstantBuffers(Gnm::kShaderStagePs, 0, 1, &gdraw->pixel_common_zero_cbuf);
|
|
render_clear_quad(&r, &stats);
|
|
|
|
void *label = insert_cb_label();
|
|
gfxc->writeImmediateAtEndOfPipe(Gnm::kEopFlushCbDbCaches, label, 1, Gnm::kCacheActionNone);
|
|
gfxc->waitOnAddress(label, ~0u, Gnm::kWaitCompareFuncEqual, 1);
|
|
|
|
gfxc->setCbControl(Gnm::kCbModeNormal, Gnm::kRasterOpSrcCopy);
|
|
set_viewport();
|
|
set_projection();
|
|
|
|
gdraw->cur_ps = NULL;
|
|
gdraw->cur->needs_clear_eliminate = false;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Begin rendering for a frame
|
|
//
|
|
|
|
void gdraw_orbis_SetTileOrigin(Gnm::RenderTarget *color, Gnm::DepthRenderTarget *depth, S32 x, S32 y)
|
|
{
|
|
gdraw->main_colorbuffer = *color;
|
|
gdraw->main_depthbuffer = *depth;
|
|
gdraw->vx = x;
|
|
gdraw->vy = y;
|
|
}
|
|
|
|
static inline U32 pack8(F32 v)
|
|
{
|
|
if (v < 0.0f) v = 0.0f;
|
|
if (v > 1.0f) v = 1.0f;
|
|
return (U32) (S32) (v * 255.0f + 0.5f);
|
|
}
|
|
|
|
static inline U32 pack_color_8888(F32 x, F32 y, F32 z, F32 w)
|
|
{
|
|
return (pack8(x) << 0) | (pack8(y) << 8) | (pack8(z) << 16) | (pack8(w) << 24);
|
|
}
|
|
|
|
void gdraw_orbis_ClearWholeRenderTarget(const F32 clear_color_rgba[4])
|
|
{
|
|
assert(gdraw->gfxc != NULL); // call after gdraw_orbis_Begin
|
|
|
|
gdraw->cur = gdraw->frame;
|
|
set_common_renderstate();
|
|
clear_renderstate();
|
|
|
|
if (gdraw->main_colorbuffer.getCmaskFastClearEnable()) {
|
|
Gnmx::GfxContext *gfxc = gdraw->gfxc;
|
|
|
|
// CB flush before
|
|
gfxc->triggerEvent(Gnm::kEventTypeFlushAndInvalidateCbPixelData);
|
|
|
|
// Clear Cmask
|
|
gfxc->fillData(gdraw->main_colorbuffer.getCmaskAddress(), 0, gdraw->main_colorbuffer.getCmaskSizeInBytes(), Gnm::kDmaDataBlockingEnable);
|
|
|
|
// CB flush after
|
|
void *label = insert_cb_label();
|
|
gfxc->writeImmediateAtEndOfPipe(Gnm::kEopFlushCbDbCaches, label, 1, Gnm::kCacheActionNone);
|
|
gfxc->waitOnAddress(label, ~0u, Gnm::kWaitCompareFuncEqual, 1);
|
|
|
|
// Set Cmask clear color
|
|
Gnm::DataFormat fmt = gdraw->main_colorbuffer.getDataFormat();
|
|
if (fmt.m_asInt == Gnm::kDataFormatB8G8R8A8Unorm.m_asInt || fmt.m_asInt == Gnm::kDataFormatB8G8R8X8Unorm.m_asInt) {
|
|
gdraw->cur->clear_col[0] = pack_color_8888(clear_color_rgba[2], clear_color_rgba[1], clear_color_rgba[0], clear_color_rgba[3]);
|
|
gdraw->cur->clear_col[1] = 0;
|
|
} else if (fmt.m_asInt == Gnm::kDataFormatR8G8B8A8Unorm.m_asInt) {
|
|
gdraw->cur->clear_col[0] = pack_color_8888(clear_color_rgba[0], clear_color_rgba[1], clear_color_rgba[2], clear_color_rgba[3]);
|
|
} else
|
|
assert(0); // unsupported color format!
|
|
|
|
gfxc->setCmaskClearColor(0, gdraw->cur->clear_col);
|
|
gdraw->cur->needs_clear_eliminate = true;
|
|
} else {
|
|
auto para = embed_const_buffer<PixelCommonVars>(Gnm::kShaderStagePs, 0);
|
|
memset(para, 0, sizeof(*para));
|
|
for (U32 i=0; i < 4; i++)
|
|
para->color_mul[i] = clear_color_rgba[i];
|
|
|
|
GDrawStats stats = {};
|
|
gswf_recti r = { 0, 0, (S32) gdraw->main_colorbuffer.getWidth(), (S32) gdraw->main_colorbuffer.getHeight() };
|
|
set_viewport_raw(0, 0, r.x1, r.y1);
|
|
set_projection_raw(0, r.x1, r.y1, 0);
|
|
render_clear_quad(&r, &stats);
|
|
}
|
|
}
|
|
|
|
static void RADLINK gdraw_SetViewSizeAndWorldScale(S32 w, S32 h, F32 scalex, F32 scaley)
|
|
{
|
|
gdraw->cur = gdraw->frame;
|
|
gdraw->fw = w;
|
|
gdraw->fh = h;
|
|
gdraw->tw = w;
|
|
gdraw->th = h;
|
|
gdraw->world_to_pixel[0] = scalex;
|
|
gdraw->world_to_pixel[1] = scaley;
|
|
}
|
|
|
|
// must include anything necessary for texture creation/update
|
|
static void RADLINK gdraw_RenderingBegin(void)
|
|
{
|
|
assert(gdraw->gfxc != NULL); // call after gdraw_orbis_Begin
|
|
|
|
// unbind all shaders
|
|
Gnmx::GfxContext *gfxc = gdraw->gfxc;
|
|
gfxc->setVsShader(NULL, 0, (void*)0);
|
|
gfxc->setPsShader(NULL);
|
|
gfxc->setCsShader(NULL);
|
|
gfxc->setLsHsShaders(NULL, 0, (void*)0, NULL, 0);
|
|
gfxc->setEsShader(NULL, 0, (void *) 0);
|
|
gfxc->setGsVsShaders(NULL);
|
|
|
|
set_common_renderstate();
|
|
}
|
|
|
|
static void RADLINK gdraw_RenderingEnd(void)
|
|
{
|
|
clear_renderstate();
|
|
}
|
|
|
|
static void RADLINK gdraw_RenderTileBegin(S32 x0, S32 y0, S32 x1, S32 y1, S32 pad, GDrawStats *stats)
|
|
{
|
|
if (x0 == 0 && y0 == 0 && x1 == gdraw->fw && y1 == gdraw->fh)
|
|
pad = 0;
|
|
|
|
gdraw->tx0 = x0;
|
|
gdraw->ty0 = y0;
|
|
gdraw->tw = x1-x0;
|
|
gdraw->th = y1-y0;
|
|
|
|
// padded region
|
|
gdraw->tx0p = RR_MAX(x0 - pad, 0);
|
|
gdraw->ty0p = RR_MAX(y0 - pad, 0);
|
|
gdraw->tpw = RR_MIN(x1 + pad, gdraw->fw) - gdraw->tx0p;
|
|
gdraw->tph = RR_MIN(y1 + pad, gdraw->fh) - gdraw->ty0p;
|
|
|
|
// check if this fits inside our rendertarget buffers
|
|
assert(gdraw->tpw <= gdraw->frametex_width && gdraw->tph <= gdraw->frametex_height);
|
|
|
|
gdraw->frame->base_x = gdraw->tx0p;
|
|
gdraw->frame->base_y = gdraw->ty0p;
|
|
gdraw->frame->width = gdraw->tpw;
|
|
gdraw->frame->height = gdraw->tph;
|
|
|
|
// clear our depth/stencil buffers
|
|
clear_whole_zs(true, true, stats);
|
|
}
|
|
|
|
static void RADLINK gdraw_RenderTileEnd(GDrawStats *stats)
|
|
{
|
|
gdraw->tile_end_fence = put_fence();
|
|
|
|
// reap once per frame even if there are no allocs
|
|
gdraw_res_reap(gdraw->texturecache, stats);
|
|
gdraw_res_reap(gdraw->vbufcache, stats);
|
|
}
|
|
|
|
GDRAW_MAYBE_UNUSED static bool mem_is_direct_and_write_combined_or_cached(const void *mem)
|
|
{
|
|
SceKernelVirtualQueryInfo info;
|
|
if (sceKernelVirtualQuery(mem, 0, &info, sizeof(info)) != 0)
|
|
return false; // if this errors, mem is likely not even mapped!
|
|
|
|
if (!info.isDirectMemory)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
void gdraw_orbis_Begin(sce::Gnmx::GfxContext *context, void *staging_buffer, U32 staging_buf_bytes)
|
|
{
|
|
assert(gdraw->gfxc == NULL); // may not nest Begin calls
|
|
|
|
// make sure that the memory setup is sensible.
|
|
// if any of these asserts fire, please relocate your command buffers
|
|
// and staging buffers to direct memory that is either cached or
|
|
// write-combined!
|
|
assert(mem_is_direct_and_write_combined_or_cached(context->m_dcb.m_cmdptr));
|
|
assert(mem_is_direct_and_write_combined_or_cached(context->m_ccb.m_cmdptr));
|
|
assert(mem_is_direct_and_write_combined_or_cached(staging_buffer));
|
|
|
|
gdraw->gfxc = context;
|
|
gdraw_arena_init(&gdraw->staging, staging_buffer, staging_buf_bytes);
|
|
memset(&gdraw->staging_stats, 0, sizeof(gdraw->staging_stats));
|
|
|
|
context->initializeToDefaultContextState();
|
|
antialias_tex_upload();
|
|
}
|
|
|
|
void gdraw_orbis_End(gdraw_orbis_staging_stats *stats)
|
|
{
|
|
assert(gdraw->gfxc != NULL); // please keep Begin / End pairs properly matched
|
|
|
|
gdraw_HandleCacheTick(gdraw->texturecache, gdraw->tile_end_fence);
|
|
gdraw_HandleCacheTick(gdraw->vbufcache, gdraw->tile_end_fence);
|
|
|
|
gdraw_arena_init(&gdraw->staging, NULL, 0);
|
|
gdraw->gfxc = NULL;
|
|
|
|
if (stats)
|
|
*stats = gdraw->staging_stats;
|
|
}
|
|
|
|
void gdraw_orbis_EliminateFastClears(void)
|
|
{
|
|
assert(gdraw->gfxc != NULL); // call between gdraw_orbis_Begin and gdraw_orbis_End
|
|
|
|
eliminate_fast_clear();
|
|
}
|
|
|
|
#define MAX_DEPTH_VALUE (1 << 14)
|
|
|
|
static void RADLINK gdraw_GetInfo(GDrawInfo *d)
|
|
{
|
|
d->num_stencil_bits = 8;
|
|
d->max_id = MAX_DEPTH_VALUE-2;
|
|
// for floating point depth, just use mantissa, e.g. 16-20 bits
|
|
d->max_texture_size = MAX_TEXTURE2D_DIM;
|
|
d->buffer_format = GDRAW_BFORMAT_vbib;
|
|
d->shared_depth_stencil = 1;
|
|
d->always_mipmap = 0;
|
|
d->conditional_nonpow2 = 0;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Render targets
|
|
//
|
|
|
|
static rrbool RADLINK gdraw_TextureDrawBufferBegin(gswf_recti *region, gdraw_texture_format format, U32 flags, void *owner, GDrawStats *stats)
|
|
{
|
|
GDrawFramebufferState *n = gdraw->cur+1;
|
|
GDrawHandle *t;
|
|
if (gdraw->tw == 0 || gdraw->th == 0) {
|
|
IggyGDrawSendWarning(NULL, "GDraw warning: w=0,h=0 rendertarget");
|
|
return false;
|
|
}
|
|
|
|
if (n >= &gdraw->frame[MAX_RENDER_STACK_DEPTH]) {
|
|
IggyGDrawSendWarning(NULL, "GDraw rendertarget nesting exceeds MAX_RENDER_STACK_DEPTH");
|
|
return false;
|
|
}
|
|
|
|
if (owner) {
|
|
// @TODO implement
|
|
t = NULL;
|
|
assert(0); // nyi
|
|
} else {
|
|
t = get_color_rendertarget(stats);
|
|
if (!t)
|
|
return false;
|
|
}
|
|
|
|
n->color_buffer = t;
|
|
assert(n->color_buffer != NULL); // @GDRAW_ASSERT
|
|
|
|
n->cached = owner != NULL;
|
|
if (owner) {
|
|
n->base_x = region->x0;
|
|
n->base_y = region->y0;
|
|
n->width = region->x1 - region->x0;
|
|
n->height = region->y1 - region->y0;
|
|
}
|
|
|
|
assert(gdraw->frametex_width >= gdraw->tw && gdraw->frametex_height >= gdraw->th); // @GDRAW_ASSERT
|
|
int k = n->color_buffer - gdraw->rendertargets.handle;
|
|
S32 x, y, w, h;
|
|
|
|
if (region) {
|
|
S32 ox, oy, pad = 2; // 2 pixels of border on all sides
|
|
// 1 pixel turns out to be not quite enough with the interpolator precision we get.
|
|
|
|
if (gdraw->in_blur)
|
|
ox = oy = 0;
|
|
else
|
|
ox = gdraw->tx0p, oy = gdraw->ty0p;
|
|
|
|
// clamp region to tile
|
|
S32 xt0 = RR_MAX(region->x0 - ox, 0);
|
|
S32 yt0 = RR_MAX(region->y0 - oy, 0);
|
|
S32 xt1 = RR_MIN(region->x1 - ox, gdraw->tpw);
|
|
S32 yt1 = RR_MIN(region->y1 - oy, gdraw->tph);
|
|
|
|
// but the padding needs to clamp to render target bounds
|
|
x = RR_MAX(xt0 - pad, 0);
|
|
y = RR_MAX(yt0 - pad, 0);
|
|
w = RR_MIN(xt1 + pad, gdraw->frametex_width) - x;
|
|
h = RR_MIN(yt1 + pad, gdraw->frametex_height) - y;
|
|
|
|
if (w <= 0 || h <= 0) { // region doesn't intersect with current tile
|
|
gdraw_FreeTexture((GDrawTexture *) n->color_buffer, 0, stats);
|
|
// note: don't send a warning since this will happen during regular tiled rendering
|
|
return false;
|
|
}
|
|
|
|
gdraw->rt_valid[k].x0 = xt0;
|
|
gdraw->rt_valid[k].y0 = yt0;
|
|
gdraw->rt_valid[k].x1 = xt1;
|
|
gdraw->rt_valid[k].y1 = yt1;
|
|
} else {
|
|
x = 0;
|
|
y = 0;
|
|
w = gdraw->frametex_width;
|
|
h = gdraw->frametex_height;
|
|
|
|
gdraw->rt_valid[k].x0 = 0;
|
|
gdraw->rt_valid[k].y0 = 0;
|
|
gdraw->rt_valid[k].x1 = w;
|
|
gdraw->rt_valid[k].y1 = h;
|
|
}
|
|
|
|
stats->nonzero_flags |= GDRAW_STATS_rendtarg;
|
|
stats->rendertarget_changes++;
|
|
|
|
++gdraw->cur;
|
|
gdraw->rt_colorbuffer.setBaseAddress(n->color_buffer->handle.tex.gnm_ptr);
|
|
set_render_target();
|
|
manual_clear_color(x, y, w, h, stats);
|
|
set_viewport();
|
|
set_projection();
|
|
|
|
return true;
|
|
}
|
|
|
|
static GDrawTexture *RADLINK gdraw_TextureDrawBufferEnd(GDrawStats *stats)
|
|
{
|
|
GDrawFramebufferState *n = gdraw->cur;
|
|
GDrawFramebufferState *m = --gdraw->cur;
|
|
if (gdraw->tw == 0 || gdraw->th == 0) return 0;
|
|
|
|
if (n >= &gdraw->frame[MAX_RENDER_STACK_DEPTH])
|
|
return 0; // already returned a warning in Start...()
|
|
|
|
assert(m >= gdraw->frame); // bug in Iggy -- unbalanced
|
|
|
|
if (m != gdraw->frame) {
|
|
assert(m->color_buffer != NULL); // @GDRAW_ASSERT
|
|
}
|
|
assert(n->color_buffer != NULL); // @GDRAW_ASSERT
|
|
|
|
// sync on draw completion for this render target
|
|
rtt_sync(n->color_buffer->handle.tex.gnm_ptr, gdraw->rt_colorbuffer_sa.m_size >> 8);
|
|
n->color_buffer->fence = get_next_fence();
|
|
|
|
// switch back to old rendertarget
|
|
set_render_target();
|
|
set_viewport();
|
|
set_projection();
|
|
|
|
stats->nonzero_flags |= GDRAW_STATS_rendtarg;
|
|
stats->rendertarget_changes++;
|
|
|
|
return (GDrawTexture *) n->color_buffer;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Clear stencil/depth buffers
|
|
//
|
|
|
|
static void RADLINK gdraw_ClearStencilBits(U32 bits)
|
|
{
|
|
GDrawStats stats = {};
|
|
clear_whole_zs(false, true, &stats);
|
|
}
|
|
|
|
static void RADLINK gdraw_ClearID(void)
|
|
{
|
|
GDrawStats stats = {};
|
|
clear_whole_zs(true, false, &stats);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Set all the render state from GDrawRenderState
|
|
//
|
|
|
|
static RADINLINE void set_texture(U32 texunit, GDrawTexture *tex)
|
|
{
|
|
assert(texunit < MAX_SAMPLERS);
|
|
assert(tex != NULL);
|
|
|
|
if (gdraw->active_tex[texunit] != tex) {
|
|
gdraw->active_tex[texunit] = tex;
|
|
GDrawHandle *h = (GDrawHandle *) tex;
|
|
set_gnm_texture(texunit, h->handle.tex.gnm, GDRAW_WRAP_clamp, 0);
|
|
}
|
|
}
|
|
|
|
static RADINLINE void set_pixel_shader(ShaderCode *ps)
|
|
{
|
|
if (gdraw->cur_ps != ps) {
|
|
gdraw->cur_ps = ps;
|
|
gdraw->gfxc->setPsShader(ps->ps);
|
|
}
|
|
}
|
|
|
|
// converts a depth id into a Z value
|
|
static inline F32 depth_from_id(S32 id)
|
|
{
|
|
return (1.0f - 2.0f / MAX_DEPTH_VALUE) - id * (2.0f / MAX_DEPTH_VALUE); // = 1 - 2 * (id + 1) / MAX_DEPTH_VALUE
|
|
}
|
|
|
|
static void set_renderstate_full(const GDrawRenderState * RADRESTRICT r, GDrawStats *stats)
|
|
{
|
|
GDraw * RADRESTRICT gd = gdraw;
|
|
Gnmx::GfxContext * RADRESTRICT gfxc = gd->gfxc;
|
|
F32 depth = depth_from_id(r->id);
|
|
|
|
// set vertex shader constants
|
|
VertexVars * RADRESTRICT vvars = embed_const_buffer<VertexVars>(Gnm::kShaderStageVs, 0);
|
|
if (!r->use_world_space)
|
|
gdraw_ObjectSpace(vvars->world[0], r->o2w, depth, 0.0f);
|
|
else
|
|
gdraw_WorldSpace(vvars->world[0], gdraw->world_to_pixel, depth, 0.0f);
|
|
|
|
__m128 edge = _mm_loadu_ps(r->edge_matrix);
|
|
__m128 s0_texgen = _mm_setzero_ps();
|
|
__m128 t0_texgen = _mm_setzero_ps();
|
|
__m128 viewproj = _mm_load_ps(gd->projection);
|
|
if (r->texgen0_enabled) {
|
|
s0_texgen = _mm_loadu_ps(&r->s0_texgen[0]);
|
|
t0_texgen = _mm_loadu_ps(&r->t0_texgen[0]);
|
|
}
|
|
|
|
_mm_storeu_ps(&vvars->x_off[0], edge);
|
|
_mm_storeu_ps(&vvars->texgen_s[0], s0_texgen);
|
|
_mm_storeu_ps(&vvars->texgen_t[0], t0_texgen);
|
|
_mm_storeu_ps(&vvars->viewproj[0], viewproj);
|
|
|
|
// set the blend mode
|
|
int tex0mode = r->tex0_mode;
|
|
int blend_mode = r->blend_mode;
|
|
if (blend_mode != gd->blend_mode) {
|
|
gd->blend_mode = blend_mode;
|
|
gfxc->setBlendControl(0, gd->blend_control[blend_mode]);
|
|
}
|
|
|
|
// color channel write mask: stencil set mode doesn't write color
|
|
if (r->stencil_set != gd->writes_masked) {
|
|
gd->writes_masked = r->stencil_set;
|
|
gfxc->setRenderTargetMask(r->stencil_set ? 0 : 0xf);
|
|
}
|
|
|
|
// set the pixel shader
|
|
if (blend_mode != GDRAW_BLEND_special) {
|
|
assert(tex0mode >= 0 && tex0mode < sizeof(gd->main_ps) / sizeof(*gd->main_ps));
|
|
ShaderCode *ps = gd->basic_ps[tex0mode];
|
|
|
|
if (r->cxf_add) {
|
|
ps++;
|
|
if (r->cxf_add[3]) ps++;
|
|
}
|
|
|
|
set_pixel_shader(ps);
|
|
} else // special blends have a special pixel shader.
|
|
set_pixel_shader(&gd->exceptional_blend[r->special_blend]);
|
|
|
|
// set textures
|
|
if (tex0mode != GDRAW_TEXTURE_none) {
|
|
if (!r->tex[0]) // this can happen if some allocs fail. just abort in that case.
|
|
return;
|
|
|
|
if (gd->active_tex[0] != r->tex[0]) {
|
|
gd->active_tex[0] = r->tex[0];
|
|
set_gnm_texture(0, ((GDrawHandle *) r->tex[0])->handle.tex.gnm, r->wrap0, r->nearest0);
|
|
}
|
|
}
|
|
|
|
// pixel shader constants
|
|
PixelCommonVars * RADRESTRICT pvars = embed_const_buffer<PixelCommonVars>(Gnm::kShaderStagePs, 0);
|
|
__m128 col_mul = _mm_loadu_ps(r->color);
|
|
__m128 col_add = _mm_setzero_ps();
|
|
__m128 focal = _mm_loadu_ps(r->focal_point);
|
|
if (r->cxf_add) {
|
|
const float scalef = 1.0f / 255.0f;
|
|
col_add = _mm_setr_ps((F32) r->cxf_add[0] * scalef, (F32) r->cxf_add[1] * scalef, (F32) r->cxf_add[2] * scalef, (F32) r->cxf_add[3] * scalef);
|
|
}
|
|
|
|
_mm_storeu_ps(&pvars->color_mul[0], col_mul);
|
|
_mm_storeu_ps(&pvars->color_add[0], col_add);
|
|
_mm_storeu_ps(&pvars->focal[0], focal);
|
|
|
|
// set scissor
|
|
if (r->scissor) {
|
|
S32 xs,ys;
|
|
if (gd->cur == gd->frame) {
|
|
xs = gd->tx0 - gd->vx;
|
|
ys = gd->ty0 - gd->vy;
|
|
} else {
|
|
xs = gd->tx0p;
|
|
ys = gd->ty0p;
|
|
}
|
|
|
|
// clip against viewport
|
|
S32 x0 = RR_MAX(r->scissor_rect.x0 - xs, gd->cview.x0);
|
|
S32 y0 = RR_MAX(r->scissor_rect.y0 - ys, gd->cview.y0);
|
|
S32 x1 = RR_MIN(r->scissor_rect.x1 - xs, gd->cview.x1);
|
|
S32 y1 = RR_MIN(r->scissor_rect.y1 - ys, gd->cview.y1);
|
|
|
|
if (x1 <= x0 || y1 <= y0) {
|
|
// dummy scissor rect in case our actual scissor is empty
|
|
x0 = x1 = gd->cview.x0;
|
|
y0 = y1 = gd->cview.y0;
|
|
}
|
|
|
|
gfxc->setScreenScissor(x0, y0, x1, y1);
|
|
gd->scissor_state = 1;
|
|
} else if (r->scissor != gd->scissor_state)
|
|
disable_scissor(0);
|
|
|
|
// z/stencil mode changed?
|
|
U32 z_stencil_key = r->set_id | (r->test_id << 1) | (r->stencil_test << 16) | (r->stencil_set << 24);
|
|
|
|
if (z_stencil_key != gd->z_stencil_key) {
|
|
gd->z_stencil_key = z_stencil_key;
|
|
if (r->stencil_test | r->stencil_set) {
|
|
Gnm::StencilControl ctl;
|
|
ctl.m_testVal = 255;
|
|
ctl.m_mask = r->stencil_test;
|
|
ctl.m_writeMask = r->stencil_set;
|
|
ctl.m_opVal = 255;
|
|
|
|
gfxc->setDepthStencilControl(gd->depth_stencil_control[r->set_id][r->test_id][1]);
|
|
gfxc->setStencil(ctl);
|
|
} else
|
|
gfxc->setDepthStencilControl(gd->depth_stencil_control[r->set_id][r->test_id][0]);
|
|
}
|
|
}
|
|
|
|
static RADINLINE void set_renderstate(const GDrawRenderState * RADRESTRICT r, GDrawStats *stats)
|
|
{
|
|
if (!r->identical_state)
|
|
set_renderstate_full(r, stats);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Draw triangles with a given renderstate
|
|
//
|
|
|
|
static RADINLINE const GDraw::VFormatDesc *get_vertex_fmt(S32 vfmt)
|
|
{
|
|
assert(vfmt >= 0 && vfmt < GDRAW_vformat__count);
|
|
return &gdraw->vfmt[vfmt];
|
|
}
|
|
|
|
static void set_vertex_buffer(const GDraw::VFormatDesc *fmtdesc, void *ptr, U32 num_verts)
|
|
{
|
|
Gnm::Buffer bufs[MAX_ATTRS];
|
|
|
|
for (U32 i=0; i < fmtdesc->num_attribs; i++)
|
|
bufs[i].initAsVertexBuffer((U8 *)ptr + fmtdesc->vb_offs[i], fmtdesc->formats[i], fmtdesc->stride, num_verts);
|
|
gdraw->gfxc->setBuffers(Gnm::kShaderStageVs, 0, fmtdesc->num_attribs, bufs);
|
|
}
|
|
|
|
static RADINLINE void fence_resources(void *r1, void *r2=NULL, void *r3=NULL, void *r4=NULL)
|
|
{
|
|
GDrawFence fence = get_next_fence();
|
|
if (r1) ((GDrawHandle *) r1)->fence = fence;
|
|
if (r2) ((GDrawHandle *) r2)->fence = fence;
|
|
if (r3) ((GDrawHandle *) r3)->fence = fence;
|
|
if (r4) ((GDrawHandle *) r4)->fence = fence;
|
|
}
|
|
|
|
static void RADLINK gdraw_DrawIndexedTriangles(GDrawRenderState *r, GDrawPrimitive *p, GDrawVertexBuffer *buf, GDrawStats *stats)
|
|
{
|
|
Gnmx::GfxContext * RADRESTRICT gfxc = gdraw->gfxc;
|
|
GDrawHandle *vb = (GDrawHandle *) buf;
|
|
const GDraw::VFormatDesc * RADRESTRICT vfmt = get_vertex_fmt(p->vertex_format);
|
|
|
|
set_renderstate(r, stats);
|
|
|
|
if (vb) {
|
|
set_vertex_buffer(vfmt, (U8 *) vb->handle.vbuf.verts + (UINTa) p->vertices, p->num_vertices);
|
|
gfxc->setPrimitiveType(Gnm::kPrimitiveTypeTriList);
|
|
gfxc->drawIndex(p->num_indices, (U8 *)vb->handle.vbuf.inds + (UINTa) p->indices);
|
|
} else if (p->indices) {
|
|
U32 vbytes = p->num_vertices * vfmt->stride;
|
|
U32 ibytes = p->num_indices * 2;
|
|
|
|
gfxc->setPrimitiveType(Gnm::kPrimitiveTypeTriList);
|
|
U8 *buf = (U8 *) alloc_staging(vbytes + ibytes, Gnm::kAlignmentOfBufferInBytes);
|
|
if (!buf)
|
|
return;
|
|
|
|
memcpy(buf, p->vertices, vbytes);
|
|
memcpy(buf + vbytes, p->indices, ibytes);
|
|
set_vertex_buffer(vfmt, buf, p->num_vertices);
|
|
gfxc->drawIndex(p->num_indices, buf + vbytes);
|
|
} else { // dynamic quads
|
|
assert(p->num_vertices % 4 == 0);
|
|
U32 stride = vfmt->stride;
|
|
U32 num_bytes = (U32)p->num_vertices * stride;
|
|
|
|
gfxc->setPrimitiveType(Gnm::kPrimitiveTypeQuadList);
|
|
U8 *buf = (U8 *) alloc_staging(num_bytes, Gnm::kAlignmentOfBufferInBytes);
|
|
if (!buf)
|
|
return;
|
|
|
|
memcpy(buf, p->vertices, num_bytes);
|
|
set_vertex_buffer(vfmt, buf, p->num_vertices);
|
|
gfxc->drawIndexAuto(p->num_vertices);
|
|
}
|
|
|
|
fence_resources(vb, r->tex[0], r->tex[1]);
|
|
|
|
stats->nonzero_flags |= GDRAW_STATS_batches;
|
|
stats->num_batches += 1;
|
|
stats->drawn_indices += p->num_indices;
|
|
stats->drawn_vertices += p->num_vertices;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Flash 8 filter effects
|
|
//
|
|
|
|
static void do_screen_quad(gswf_recti *s, const F32 *tc, GDrawStats *stats)
|
|
{
|
|
VertexVars *vvars = embed_const_buffer<VertexVars>(Gnm::kShaderStageVs, 0);
|
|
__m128 world0 = _mm_setr_ps(1.0f, 0.0f, 0.0f, 0.0f);
|
|
__m128 world1 = _mm_setr_ps(0.0f, 1.0f, 0.0f, 0.0f);
|
|
__m128 zero = _mm_setzero_ps();
|
|
__m128 viewproj = _mm_load_ps(gdraw->projection);
|
|
_mm_storeu_ps(vvars->world[0], world0);
|
|
_mm_storeu_ps(vvars->world[1], world1);
|
|
_mm_storeu_ps(vvars->x_off, zero);
|
|
_mm_storeu_ps(vvars->texgen_s, zero);
|
|
_mm_storeu_ps(vvars->texgen_t, zero);
|
|
_mm_storeu_ps(vvars->viewproj, viewproj);
|
|
|
|
gswf_vertex_xyst * RADRESTRICT v = (gswf_vertex_xyst *) alloc_staging(3 * sizeof(gswf_vertex_xyst), Gnm::kAlignmentOfBufferInBytes);
|
|
if (!v)
|
|
return;
|
|
|
|
F32 px0 = (F32) s->x0, py0 = (F32) s->y0, px1 = (F32) s->x1, py1 = (F32) s->y1;
|
|
v[0].x = px0; v[0].y = py0; v[0].s = tc[0]; v[0].t = tc[1];
|
|
v[1].x = px1; v[1].y = py0; v[1].s = tc[2]; v[1].t = tc[1];
|
|
v[2].x = px0; v[2].y = py1; v[2].s = tc[0]; v[2].t = tc[3];
|
|
|
|
set_vertex_buffer(&gdraw->vfmt[GDRAW_vformat_v2tc2], v, 3);
|
|
gdraw->gfxc->setPrimitiveType(Gnm::kPrimitiveTypeRectList);
|
|
gdraw->gfxc->drawIndexAuto(3);
|
|
}
|
|
|
|
static void gdraw_DriverBlurPass(GDrawRenderState *r, int taps, float *data, gswf_recti *s, float *tc, float /*height_max*/, float *clamp, GDrawStats *gstats)
|
|
{
|
|
set_texture(0, r->tex[0]);
|
|
set_pixel_shader(&gdraw->blur_ps[taps]);
|
|
auto para = embed_const_buffer<PixelParaBlur>(Gnm::kShaderStagePs, 1);
|
|
memcpy(para->clamp, clamp, 4 * sizeof(float));
|
|
memcpy(para->tap, data, taps * 4 * sizeof(float));
|
|
|
|
do_screen_quad(s, tc, gstats);
|
|
fence_resources(r->tex[0]);
|
|
}
|
|
|
|
static void gdraw_Colormatrix(GDrawRenderState *r, gswf_recti *s, float *tc, GDrawStats *stats)
|
|
{
|
|
if (!gdraw_TextureDrawBufferBegin(s, GDRAW_TEXTURE_FORMAT_rgba32, GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_color | GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_alpha, 0, stats))
|
|
return;
|
|
|
|
set_texture(0, r->tex[0]);
|
|
set_pixel_shader(&gdraw->colormatrix);
|
|
auto para = embed_const_buffer<PixelParaColorMatrix>(Gnm::kShaderStagePs, 1);
|
|
memcpy(para->data, r->shader_data, 5 * 4 * sizeof(float));
|
|
|
|
do_screen_quad(s, tc, stats);
|
|
fence_resources(r->tex[0]);
|
|
r->tex[0] = gdraw_TextureDrawBufferEnd(stats);
|
|
}
|
|
|
|
static gswf_recti *get_valid_rect(GDrawTexture *tex)
|
|
{
|
|
GDrawHandle *h = (GDrawHandle *) tex;
|
|
S32 n = (S32) (h - gdraw->rendertargets.handle);
|
|
assert(n >= 0 && n <= MAX_RENDER_STACK_DEPTH+1);
|
|
return &gdraw->rt_valid[n];
|
|
}
|
|
|
|
static void set_pixel_constant(F32 *constant, F32 x, F32 y, F32 z, F32 w)
|
|
{
|
|
constant[0] = x;
|
|
constant[1] = y;
|
|
constant[2] = z;
|
|
constant[3] = w;
|
|
}
|
|
|
|
static void set_clamp_constant(F32 *constant, GDrawTexture *tex)
|
|
{
|
|
gswf_recti *s = get_valid_rect(tex);
|
|
// when we make the valid data, we make sure there is an extra empty pixel at the border
|
|
set_pixel_constant(constant,
|
|
(s->x0-0.5f) / gdraw->frametex_width,
|
|
(s->y0-0.5f) / gdraw->frametex_height,
|
|
(s->x1+0.5f) / gdraw->frametex_width,
|
|
(s->y1+0.5f) / gdraw->frametex_height);
|
|
}
|
|
|
|
static void gdraw_Filter(GDrawRenderState *r, gswf_recti *s, float *tc, int isbevel, GDrawStats *stats)
|
|
{
|
|
if (!gdraw_TextureDrawBufferBegin(s, GDRAW_TEXTURE_FORMAT_rgba32, GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_color | GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_alpha, NULL, stats))
|
|
return;
|
|
|
|
set_texture(0, r->tex[0]);
|
|
set_texture(1, r->tex[1]);
|
|
if (r->tex[2]) set_texture(2, r->tex[2]);
|
|
set_pixel_shader(&gdraw->filter_ps[isbevel][r->filter_mode]);
|
|
|
|
auto para = embed_const_buffer<PixelParaFilter>(Gnm::kShaderStagePs, 1);
|
|
set_clamp_constant(para->clamp0, r->tex[0]);
|
|
set_clamp_constant(para->clamp1, r->tex[1]);
|
|
set_pixel_constant(para->color, r->shader_data[0], r->shader_data[1], r->shader_data[2], r->shader_data[3]);
|
|
set_pixel_constant(para->color2, r->shader_data[8], r->shader_data[9], r->shader_data[10], r->shader_data[11]);
|
|
set_pixel_constant(para->tc_off, -r->shader_data[4] / (F32)gdraw->frametex_width, -r->shader_data[5] / (F32)gdraw->frametex_height, r->shader_data[6], 0);
|
|
|
|
do_screen_quad(s, tc, stats);
|
|
fence_resources(r->tex[0], r->tex[1], r->tex[2]);
|
|
r->tex[0] = gdraw_TextureDrawBufferEnd(stats);
|
|
}
|
|
|
|
static void RADLINK gdraw_FilterQuad(GDrawRenderState *r, S32 x0, S32 y0, S32 x1, S32 y1, GDrawStats *stats)
|
|
{
|
|
F32 tc[4];
|
|
gswf_recti s;
|
|
|
|
// clip to tile boundaries
|
|
s.x0 = RR_MAX(x0, gdraw->tx0p);
|
|
s.y0 = RR_MAX(y0, gdraw->ty0p);
|
|
s.x1 = RR_MIN(x1, gdraw->tx0p + gdraw->tpw);
|
|
s.y1 = RR_MIN(y1, gdraw->ty0p + gdraw->tph);
|
|
if (s.x1 < s.x0 || s.y1 < s.y0)
|
|
return;
|
|
|
|
// prepare for drawing
|
|
tc[0] = (s.x0 - gdraw->tx0p) / (F32) gdraw->frametex_width;
|
|
tc[1] = (s.y0 - gdraw->ty0p) / (F32) gdraw->frametex_height;
|
|
tc[2] = (s.x1 - gdraw->tx0p) / (F32) gdraw->frametex_width;
|
|
tc[3] = (s.y1 - gdraw->ty0p) / (F32) gdraw->frametex_height;
|
|
|
|
clear_renderstate();
|
|
|
|
if (r->blend_mode == GDRAW_BLEND_filter) {
|
|
switch (r->filter) {
|
|
case GDRAW_FILTER_blur: {
|
|
GDrawBlurInfo b;
|
|
gswf_recti bounds = *get_valid_rect(r->tex[0]);
|
|
gdraw_ShiftRect(&s, &s, -gdraw->tx0p, -gdraw->ty0p); // blur uses physical rendertarget coordinates
|
|
|
|
b.BlurPass = gdraw_DriverBlurPass;
|
|
b.w = gdraw->tpw;
|
|
b.h = gdraw->tph;
|
|
b.frametex_width = gdraw->frametex_width;
|
|
b.frametex_height = gdraw->frametex_height;
|
|
|
|
// blur needs to draw with multiple passes, so set up special state
|
|
gdraw->in_blur = true;
|
|
set_viewport();
|
|
set_projection();
|
|
|
|
// do the blur
|
|
gdraw_Blur(&gdraw_funcs, &b,r, &s, &bounds, stats);
|
|
|
|
// restore the normal state
|
|
gdraw->in_blur = false;
|
|
set_viewport();
|
|
set_projection();
|
|
break;
|
|
}
|
|
|
|
case GDRAW_FILTER_colormatrix:
|
|
gdraw_Colormatrix(r, &s, tc, stats);
|
|
break;
|
|
|
|
case GDRAW_FILTER_dropshadow:
|
|
gdraw_Filter(r, &s, tc, 0, stats);
|
|
break;
|
|
|
|
case GDRAW_FILTER_bevel:
|
|
gdraw_Filter(r, &s, tc, 1, stats);
|
|
break;
|
|
|
|
default:
|
|
assert(0);
|
|
}
|
|
} else {
|
|
// for crazy blend modes, we need to read back from the framebuffer
|
|
// and do the blending in the pixel shader. because we render as
|
|
// a RectList, no quad is ever shaded twice, so we can do this safely
|
|
// while texturing from the render target that we're writing to.
|
|
if (r->blend_mode == GDRAW_BLEND_special) {
|
|
// before we texture from this RT, we need to eliminate fast clears.
|
|
eliminate_fast_clear();
|
|
|
|
// input texture. slightly different logic between the main render
|
|
// target and our scratch render targets, because they might have
|
|
// different dimensions.
|
|
Gnm::Texture rt_tex;
|
|
|
|
if (gdraw->cur == gdraw->frame) {
|
|
rt_tex.initFromRenderTarget(&gdraw->main_colorbuffer, false);
|
|
rtt_sync(rt_tex.getBaseAddress(), gdraw->main_colorbuffer.getSizeInBytes() >> 8);
|
|
} else {
|
|
rt_tex = *gdraw->cur->color_buffer->handle.tex.gnm;
|
|
rtt_sync(rt_tex.getBaseAddress(), gdraw->rt_colorbuffer_sa.m_size >> 8);
|
|
}
|
|
|
|
set_gnm_texture(1, &rt_tex, GDRAW_WRAP_clamp, 0);
|
|
}
|
|
|
|
set_renderstate(r, stats);
|
|
do_screen_quad(&s, tc, stats);
|
|
fence_resources(r->tex[0], r->tex[1]);
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Shaders and state initialization
|
|
//
|
|
|
|
#include "gdraw_orbis_shaders.inl"
|
|
|
|
static void init_shader(ShaderCode *shader, const ShaderCode *src)
|
|
{
|
|
*shader = *src;
|
|
if (!shader->blob)
|
|
return;
|
|
|
|
Gnmx::ShaderFileHeader *shdr = (Gnmx::ShaderFileHeader *) shader->blob;
|
|
shader->desc = shdr + 1;
|
|
|
|
// grab gpu code and copy to arena
|
|
const void *shader_code = (const U8 *) shader->desc + shdr->m_shaderHeaderSizeInDW * 4;
|
|
void *gpu_ptr = gdraw_arena_alloc(&gdraw->vidshared_arena, shader->common->m_shaderSize, Gnm::kAlignmentOfShaderInBytes);
|
|
memcpy(gpu_ptr, shader_code, shader->common->m_shaderSize);
|
|
|
|
// patch the shader
|
|
switch (shdr->m_type) {
|
|
case Gnmx::kVertexShader: shader->vs->m_vsStageRegisters.m_spiShaderPgmHiVs = ~0u; shader->vs->patchShaderGpuAddress(gpu_ptr); break;
|
|
case Gnmx::kPixelShader: shader->ps->m_psStageRegisters.m_spiShaderPgmHiPs = ~0u; shader->ps->patchShaderGpuAddress(gpu_ptr); break;
|
|
case Gnmx::kComputeShader: shader->cs->m_csStageRegisters.m_computePgmHi = ~0u; shader->cs->patchShaderGpuAddress(gpu_ptr); break;
|
|
default: assert(0);
|
|
}
|
|
}
|
|
|
|
static void create_all_shaders_and_state()
|
|
{
|
|
// sampler state
|
|
static const Gnm::WrapMode addrmodes[ASSERT_COUNT(GDRAW_WRAP__count, 4)] = {
|
|
Gnm::kWrapModeClampLastTexel, // GDRAW_WRAP_clamp
|
|
Gnm::kWrapModeWrap, // GDRAW_WRAP_repeat
|
|
Gnm::kWrapModeMirror, // GDRAW_WRAP_mirror
|
|
Gnm::kWrapModeClampBorder, // GDRAW_WRAP_clamp_to_border
|
|
};
|
|
|
|
for (int nearest=0; nearest < 2; nearest++)
|
|
for (int addr=0; addr < GDRAW_WRAP__count; addr++) {
|
|
Gnm::Sampler *smp = &gdraw->sampler_state[nearest][addr];
|
|
smp->init();
|
|
smp->setWrapMode(addrmodes[addr], addrmodes[addr], addrmodes[addr]);
|
|
smp->setBorderColor(Gnm::kBorderColorTransBlack);
|
|
smp->setXyFilterMode(nearest ? Gnm::kFilterModePoint : Gnm::kFilterModeBilinear, Gnm::kFilterModeBilinear);
|
|
smp->setMipFilterMode(Gnm::kMipFilterModeLinear);
|
|
}
|
|
|
|
// depth/stencil state
|
|
for (int set_id=0; set_id < 2; set_id++)
|
|
for (int test_id=0; test_id < 2; test_id++)
|
|
for (int stencil_enable=0; stencil_enable < 2; stencil_enable++) {
|
|
Gnm::DepthStencilControl *ctl = &gdraw->depth_stencil_control[set_id][test_id][stencil_enable];
|
|
ctl->init();
|
|
ctl->setDepthEnable(set_id || test_id);
|
|
ctl->setDepthControl(set_id ? Gnm::kDepthControlZWriteEnable : Gnm::kDepthControlZWriteDisable, test_id ? Gnm::kCompareFuncLess : Gnm::kCompareFuncAlways);
|
|
ctl->setStencilEnable(stencil_enable != 0);
|
|
ctl->setStencilFunction(Gnm::kCompareFuncEqual);
|
|
}
|
|
|
|
// blend state
|
|
static const struct {
|
|
bool enable;
|
|
Gnm::BlendMultiplier src;
|
|
Gnm::BlendMultiplier dst;
|
|
} blend_states[ASSERT_COUNT(GDRAW_BLEND__count, 6)] = {
|
|
{ false, Gnm::kBlendMultiplierOne, Gnm::kBlendMultiplierZero }, // GDRAW_BLEND_none
|
|
{ true, Gnm::kBlendMultiplierOne, Gnm::kBlendMultiplierOneMinusSrcAlpha }, // GDRAW_BLEND_alpha
|
|
{ true, Gnm::kBlendMultiplierDestColor, Gnm::kBlendMultiplierOneMinusSrcAlpha }, // GDRAW_BLEND_multiply
|
|
{ true, Gnm::kBlendMultiplierOne, Gnm::kBlendMultiplierOne }, // GDRAW_BLEND_add
|
|
|
|
{ false, Gnm::kBlendMultiplierOne, Gnm::kBlendMultiplierZero }, // GDRAW_BLEND_filter
|
|
{ false, Gnm::kBlendMultiplierOne, Gnm::kBlendMultiplierZero }, // GDRAW_BLEND_special
|
|
};
|
|
for (int mode = 0; mode < GDRAW_BLEND__count; mode++) {
|
|
Gnm::BlendControl *ctl = &gdraw->blend_control[mode];
|
|
ctl->init();
|
|
ctl->setBlendEnable(blend_states[mode].enable);
|
|
ctl->setSeparateAlphaEnable(false);
|
|
ctl->setColorEquation(blend_states[mode].src, Gnm::kBlendFuncAdd, blend_states[mode].dst);
|
|
}
|
|
|
|
// vertex shader
|
|
init_shader(&gdraw->vs, vshader_vsps4_arr);
|
|
|
|
// pixel shaders
|
|
for (int i=0; i < GDRAW_TEXTURE__count*3; i++) init_shader(&gdraw->main_ps[0][i], pshader_basic_arr + i);
|
|
for (int i=0; i < GDRAW_BLENDSPECIAL__count; i++) init_shader(&gdraw->exceptional_blend[i], pshader_exceptional_blend_arr + i);
|
|
for (int i=0; i < 32; i++) init_shader(&gdraw->filter_ps[0][i], pshader_filter_arr + i);
|
|
for (int i=0; i <= MAX_TAPS; i++) init_shader(&gdraw->blur_ps[i], pshader_blur_arr + i);
|
|
init_shader(&gdraw->colormatrix, pshader_color_matrix_arr);
|
|
init_shader(&gdraw->clear_ps, pshader_manual_clear_arr);
|
|
|
|
for (int i=0; i < GDRAW_TEXTURE__count; i++)
|
|
gdraw->basic_ps[i] = &gdraw->main_ps[i][0];
|
|
|
|
// compute shaders
|
|
init_shader(&gdraw->texupload_cs, cshader_tex_upload_arr);
|
|
init_shader(&gdraw->memset_cs, cshader_memset_arr);
|
|
init_shader(&gdraw->defragment_cs, cshader_defragment_arr);
|
|
init_shader(&gdraw->mipgen_cs, cshader_mipgen_arr);
|
|
|
|
// vertex formats
|
|
struct VAttrDesc
|
|
{
|
|
U32 offset;
|
|
Gnm::DataFormat fmt;
|
|
};
|
|
|
|
static const struct VFmtDesc {
|
|
U32 stride;
|
|
U32 num_attribs;
|
|
VAttrDesc attribs[MAX_ATTRS];
|
|
} vformats[ASSERT_COUNT(GDRAW_vformat__basic_count, 3)] = {
|
|
// GDRAW_vformat_v2
|
|
{ 8, 2, {
|
|
{ 0, {{{ Gnm::kSurfaceFormat32_32, Gnm::kBufferChannelTypeFloat, Gnm::kBufferChannelX, Gnm::kBufferChannelY, Gnm::kBufferChannelConstant0, Gnm::kBufferChannelConstant1 }}} },
|
|
{ 4, {{{ Gnm::kSurfaceFormat8_8_8_8, Gnm::kBufferChannelTypeUNorm, Gnm::kBufferChannelConstant0, Gnm::kBufferChannelConstant0, Gnm::kBufferChannelConstant1, Gnm::kBufferChannelConstant1 }}} },
|
|
} },
|
|
// GDRAW_vformat_v2aa
|
|
{ 16, 2, {
|
|
{ 0, {{{ Gnm::kSurfaceFormat32_32, Gnm::kBufferChannelTypeFloat, Gnm::kBufferChannelX, Gnm::kBufferChannelY, Gnm::kBufferChannelConstant0, Gnm::kBufferChannelConstant1 }}} },
|
|
{ 8, {{{ Gnm::kSurfaceFormat16_16_16_16, Gnm::kBufferChannelTypeSScaled, Gnm::kBufferChannelX, Gnm::kBufferChannelY, Gnm::kBufferChannelZ, Gnm::kBufferChannelConstant0 }}} },
|
|
} },
|
|
// GDRAW_vformat_v2tc2
|
|
{ 16, 2, {
|
|
{ 0, {{{ Gnm::kSurfaceFormat32_32, Gnm::kBufferChannelTypeFloat, Gnm::kBufferChannelX, Gnm::kBufferChannelY, Gnm::kBufferChannelConstant0, Gnm::kBufferChannelConstant1 }}} },
|
|
{ 8, {{{ Gnm::kSurfaceFormat32_32, Gnm::kBufferChannelTypeFloat, Gnm::kBufferChannelX, Gnm::kBufferChannelY, Gnm::kBufferChannelConstant0, Gnm::kBufferChannelConstant1 }}} },
|
|
} },
|
|
};
|
|
|
|
for (int i=0; i < GDRAW_vformat__basic_count; i++) {
|
|
gdraw->vfmt[i].stride = vformats[i].stride;
|
|
gdraw->vfmt[i].num_attribs = vformats[i].num_attribs;
|
|
for (U32 j=0; j < vformats[i].num_attribs; j++) {
|
|
const VAttrDesc *desc = &vformats[i].attribs[j];
|
|
gdraw->vfmt[i].formats[j] = desc->fmt;
|
|
gdraw->vfmt[i].vb_offs[j] = desc->offset;
|
|
}
|
|
}
|
|
|
|
// zero "pixel common" constant buffer
|
|
PixelCommonVars *pvars = (PixelCommonVars *) gdraw_arena_alloc(&gdraw->vidshared_arena, sizeof(PixelCommonVars), Gnm::kAlignmentOfBufferInBytes);
|
|
memset(pvars, 0, sizeof(*pvars));
|
|
gdraw->pixel_common_zero_cbuf.initAsConstantBuffer(pvars, sizeof(*pvars));
|
|
}
|
|
|
|
typedef struct
|
|
{
|
|
S32 num_handles;
|
|
S32 num_bytes;
|
|
void *ptr;
|
|
} GDrawResourceLimit;
|
|
|
|
// Resource limits used by GDraw. Change these using SetResouceLimits!
|
|
static GDrawResourceLimit gdraw_limits[GDRAW_ORBIS_RESOURCE__count];
|
|
|
|
static GDrawHandleCache *make_handle_cache(gdraw_orbis_resourcetype type, U32 align)
|
|
{
|
|
S32 num_handles = gdraw_limits[type].num_handles;
|
|
S32 num_bytes = gdraw_limits[type].num_bytes;
|
|
U32 cache_size = sizeof(GDrawHandleCache) + (num_handles - 1) * sizeof(GDrawHandle);
|
|
bool is_vertex = (type == GDRAW_ORBIS_RESOURCE_vertexbuffer);
|
|
U32 header_size = num_handles * (is_vertex ? 0 : sizeof(Gnm::Texture));
|
|
|
|
GDrawHandleCache *cache = (GDrawHandleCache *) IggyGDrawMalloc(cache_size + header_size);
|
|
if (cache) {
|
|
gdraw_HandleCacheInit(cache, num_handles, num_bytes);
|
|
cache->is_vertex = is_vertex;
|
|
|
|
// set up resource headers
|
|
void *header_start = (U8 *) cache + cache_size;
|
|
if (!is_vertex) {
|
|
Gnm::Texture *headers = (Gnm::Texture *) header_start;
|
|
for (S32 i=0; i < num_handles; i++)
|
|
cache->handle[i].handle.tex.gnm = &headers[i];
|
|
}
|
|
|
|
// set up allocator
|
|
cache->alloc = gfxalloc_create(gdraw_limits[type].ptr, num_bytes, align, num_handles);
|
|
if (!cache->alloc) {
|
|
IggyGDrawFree(cache);
|
|
cache = NULL;
|
|
}
|
|
}
|
|
|
|
return cache;
|
|
}
|
|
|
|
static void free_handle_cache(GDrawHandleCache *c)
|
|
{
|
|
if (c) {
|
|
if (c->alloc) IggyGDrawFree(c->alloc);
|
|
IggyGDrawFree(c);
|
|
}
|
|
}
|
|
|
|
|
|
int gdraw_orbis_SetResourceMemory(gdraw_orbis_resourcetype type, S32 num_handles, void *ptr, S32 num_bytes)
|
|
{
|
|
GDrawStats stats={0};
|
|
|
|
assert(type >= GDRAW_ORBIS_RESOURCE_rendertarget && type < GDRAW_ORBIS_RESOURCE__count);
|
|
assert(num_handles >= 0);
|
|
assert(num_bytes >= 0);
|
|
|
|
if (!num_handles) num_handles = 1;
|
|
|
|
switch (type) {
|
|
case GDRAW_ORBIS_RESOURCE_texture:
|
|
make_pool_aligned(&ptr, &num_bytes, GDRAW_ORBIS_TEXTURE_ALIGNMENT);
|
|
break;
|
|
|
|
case GDRAW_ORBIS_RESOURCE_vertexbuffer:
|
|
make_pool_aligned(&ptr, &num_bytes, Gnm::kAlignmentOfBufferInBytes);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
gdraw_limits[type].num_handles = num_handles;
|
|
gdraw_limits[type].num_bytes = num_bytes;
|
|
gdraw_limits[type].ptr = ptr;
|
|
|
|
// if no gdraw context created, there's nothing to worry about
|
|
if (!gdraw)
|
|
return 1;
|
|
|
|
// make sure GPU is done first (assuming we're in a state where we can dispatch commands)
|
|
assert(!is_fence_pending(gdraw->tile_end_fence)); // you may not call this while GPU is still busy with Iggy command buffers!
|
|
|
|
if (gdraw->texturecache) gdraw_res_reap(gdraw->texturecache, &stats);
|
|
if (gdraw->vbufcache) gdraw_res_reap(gdraw->vbufcache, &stats);
|
|
// in theory we can now check that the given cache is really empty at this point
|
|
|
|
// resize the appropriate pool
|
|
switch (type) {
|
|
case GDRAW_ORBIS_RESOURCE_rendertarget:
|
|
gdraw_HandleCacheInit(&gdraw->rendertargets, MAX_RENDER_STACK_DEPTH + 1, num_bytes);
|
|
for (int i=0; i < MAX_RENDER_STACK_DEPTH + 1; i++)
|
|
gdraw->rendertargets.handle[i].handle.tex.gnm = &gdraw->rendertarget_textures[i];
|
|
gdraw_arena_init(&gdraw->rt_arena, ptr, num_bytes);
|
|
return 1;
|
|
|
|
case GDRAW_ORBIS_RESOURCE_texture:
|
|
free_handle_cache(gdraw->texturecache);
|
|
gdraw->texturecache = make_handle_cache(GDRAW_ORBIS_RESOURCE_texture, GDRAW_ORBIS_TEXTURE_ALIGNMENT);
|
|
return gdraw->texturecache != NULL;
|
|
|
|
case GDRAW_ORBIS_RESOURCE_vertexbuffer:
|
|
free_handle_cache(gdraw->vbufcache);
|
|
gdraw->vbufcache = make_handle_cache(GDRAW_ORBIS_RESOURCE_vertexbuffer, GDRAW_ORBIS_VERTEXBUFFER_ALIGNMENT);
|
|
return gdraw->vbufcache != NULL;
|
|
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
void gdraw_orbis_ResetAllResourceMemory()
|
|
{
|
|
gdraw_orbis_SetResourceMemory(GDRAW_ORBIS_RESOURCE_rendertarget, 0, NULL, 0);
|
|
gdraw_orbis_SetResourceMemory(GDRAW_ORBIS_RESOURCE_texture, 0, NULL, 0);
|
|
gdraw_orbis_SetResourceMemory(GDRAW_ORBIS_RESOURCE_vertexbuffer, 0, NULL, 0);
|
|
}
|
|
|
|
GDrawFunctions *gdraw_orbis_CreateContext(S32 w, S32 h, void *context_shared_mem)
|
|
{
|
|
U32 cpram_shadow_size = Gnmx::ConstantUpdateEngine::computeCpRamShadowSize();
|
|
|
|
gdraw = (GDraw *) IggyGDrawMalloc(sizeof(*gdraw) + cpram_shadow_size);
|
|
if (!gdraw) return NULL;
|
|
|
|
memset(gdraw, 0, sizeof(*gdraw));
|
|
|
|
// context shared memory
|
|
gdraw_arena_init(&gdraw->vidshared_arena, context_shared_mem, GDRAW_ORBIS_CONTEXT_MEM_SIZE);
|
|
|
|
// labels
|
|
gdraw->label_ptr = (volatile U64 *) gdraw_arena_alloc(&gdraw->vidshared_arena, sizeof(U64), sizeof(U64));
|
|
*gdraw->label_ptr = 0;
|
|
gdraw->next_fence_index = 1;
|
|
gdraw->tile_end_fence.value = 0;
|
|
|
|
// set up memory for all resource types
|
|
for (int i=0; i < GDRAW_ORBIS_RESOURCE__count; i++)
|
|
gdraw_orbis_SetResourceMemory((gdraw_orbis_resourcetype) i, gdraw_limits[i].num_handles, gdraw_limits[i].ptr, gdraw_limits[i].num_bytes);
|
|
|
|
// initialize render target texture desc
|
|
gdraw->frametex_width = w;
|
|
gdraw->frametex_height = h;
|
|
Gnm::DataFormat rtFormat = Gnm::kDataFormatR8G8B8A8Unorm;
|
|
Gnm::TileMode tileMode;
|
|
GpuAddress::computeSurfaceTileMode(&tileMode, GpuAddress::kSurfaceTypeRwTextureFlat, rtFormat, 1);
|
|
gdraw->rt_colorbuffer_sa = gdraw->rt_colorbuffer.init(gdraw->frametex_width, gdraw->frametex_height, 1, rtFormat, tileMode, Gnm::kNumSamples1, Gnm::kNumFragments1, NULL, NULL);
|
|
gdraw->rt_colorbuffer.setCmaskFastClearEnable(false);
|
|
|
|
// shaders and state
|
|
create_all_shaders_and_state();
|
|
|
|
// API
|
|
gdraw_funcs.SetViewSizeAndWorldScale = gdraw_SetViewSizeAndWorldScale;
|
|
gdraw_funcs.GetInfo = gdraw_GetInfo;
|
|
|
|
gdraw_funcs.DescribeTexture = gdraw_DescribeTexture;
|
|
gdraw_funcs.DescribeVertexBuffer = gdraw_DescribeVertexBuffer;
|
|
|
|
gdraw_funcs.RenderingBegin = gdraw_RenderingBegin;
|
|
gdraw_funcs.RenderingEnd = gdraw_RenderingEnd;
|
|
gdraw_funcs.RenderTileBegin = gdraw_RenderTileBegin;
|
|
gdraw_funcs.RenderTileEnd = gdraw_RenderTileEnd;
|
|
|
|
gdraw_funcs.TextureDrawBufferBegin = gdraw_TextureDrawBufferBegin;
|
|
gdraw_funcs.TextureDrawBufferEnd = gdraw_TextureDrawBufferEnd;
|
|
|
|
gdraw_funcs.DrawIndexedTriangles = gdraw_DrawIndexedTriangles;
|
|
gdraw_funcs.FilterQuad = gdraw_FilterQuad;
|
|
|
|
gdraw_funcs.SetAntialiasTexture = gdraw_SetAntialiasTexture;
|
|
|
|
gdraw_funcs.ClearStencilBits = gdraw_ClearStencilBits;
|
|
gdraw_funcs.ClearID = gdraw_ClearID;
|
|
|
|
gdraw_funcs.MakeTextureBegin = gdraw_MakeTextureBegin;
|
|
gdraw_funcs.MakeTextureMore = gdraw_MakeTextureMore;
|
|
gdraw_funcs.MakeTextureEnd = gdraw_MakeTextureEnd;
|
|
|
|
gdraw_funcs.UpdateTextureBegin = gdraw_UpdateTextureBegin;
|
|
gdraw_funcs.UpdateTextureRect = gdraw_UpdateTextureRect;
|
|
gdraw_funcs.UpdateTextureEnd = gdraw_UpdateTextureEnd;
|
|
|
|
gdraw_funcs.FreeTexture = gdraw_FreeTexture;
|
|
gdraw_funcs.TryToLockTexture = gdraw_TryToLockTexture;
|
|
|
|
gdraw_funcs.MakeVertexBufferBegin = gdraw_MakeVertexBufferBegin;
|
|
gdraw_funcs.MakeVertexBufferMore = gdraw_MakeVertexBufferMore;
|
|
gdraw_funcs.MakeVertexBufferEnd = gdraw_MakeVertexBufferEnd;
|
|
gdraw_funcs.TryToLockVertexBuffer = gdraw_TryLockVertexBuffer;
|
|
gdraw_funcs.FreeVertexBuffer = gdraw_FreeVertexBuffer;
|
|
|
|
gdraw_funcs.MakeTextureFromResource = (gdraw_make_texture_from_resource *) gdraw_orbis_MakeTextureFromResource;
|
|
gdraw_funcs.FreeTextureFromResource = gdraw_orbis_DestroyTextureFromResource;
|
|
|
|
gdraw_funcs.UnlockHandles = gdraw_UnlockHandles;
|
|
gdraw_funcs.SetTextureUniqueID = gdraw_SetTextureUniqueID;
|
|
|
|
return &gdraw_funcs;
|
|
}
|
|
|
|
void gdraw_orbis_DestroyContext(void)
|
|
{
|
|
if (gdraw) {
|
|
GDrawStats stats;
|
|
memset(&stats, 0, sizeof(stats));
|
|
if (gdraw->texturecache) gdraw_res_flush(gdraw->texturecache, &stats);
|
|
if (gdraw->vbufcache) gdraw_res_flush(gdraw->vbufcache, &stats);
|
|
|
|
// make sure the GPU is done first
|
|
assert(!is_fence_pending(gdraw->tile_end_fence));
|
|
|
|
free_handle_cache(gdraw->texturecache);
|
|
free_handle_cache(gdraw->vbufcache);
|
|
IggyGDrawFree(gdraw);
|
|
gdraw = NULL;
|
|
}
|
|
}
|
|
|
|
void RADLINK gdraw_orbis_BeginCustomDraw(IggyCustomDrawCallbackRegion *region, float matrix[16])
|
|
{
|
|
clear_renderstate();
|
|
gdraw_GetObjectSpaceMatrix(matrix, region->o2w, gdraw->projection, 0.0f, 0);
|
|
}
|
|
|
|
void RADLINK gdraw_orbis_CalculateCustomDraw_4J(IggyCustomDrawCallbackRegion * region, F32 mat[16])
|
|
{
|
|
gdraw_GetObjectSpaceMatrix(mat, region->o2w, gdraw->projection, 0.0f, 0);
|
|
}
|
|
|
|
void RADLINK gdraw_orbis_EndCustomDraw(IggyCustomDrawCallbackRegion *region)
|
|
{
|
|
set_common_renderstate();
|
|
}
|
|
|
|
GDrawTexture * RADLINK gdraw_orbis_MakeTextureFromResource(U8 *file_in_memory, S32 len, IggyFileTexturePS4 *tex)
|
|
{
|
|
Gnm::Texture *texture = (Gnm::Texture *) &tex->texture;
|
|
texture->setBaseAddress(file_in_memory + tex->file_offset);
|
|
texture->m_regs[7] = 0;
|
|
switch (tex->format) {
|
|
case IFT_FORMAT_la_88: texture->setChannelOrder(Gnm::kTextureChannelX, Gnm::kTextureChannelX, Gnm::kTextureChannelX, Gnm::kTextureChannelY); break;
|
|
case IFT_FORMAT_i_8: texture->setChannelOrder(Gnm::kTextureChannelX, Gnm::kTextureChannelX, Gnm::kTextureChannelX, Gnm::kTextureChannelX); break;
|
|
case IFT_FORMAT_l_8: texture->setChannelOrder(Gnm::kTextureChannelX, Gnm::kTextureChannelX, Gnm::kTextureChannelX, Gnm::kTextureChannelConstant1); break;
|
|
}
|
|
return gdraw_orbis_WrappedTextureCreate(texture);
|
|
}
|
|
|
|
extern void RADLINK gdraw_orbis_DestroyTextureFromResource(GDrawTexture *tex)
|
|
{
|
|
gdraw_orbis_WrappedTextureDestroy(tex);
|
|
}
|
|
|
|
|
|
// 4J added - copy of set_viewport_raw that sets an opengl style z-range rather than the direct-x range used in set_viewport_raw
|
|
static void set_viewport_raw_4J(S32 x, S32 y, S32 w, S32 h)
|
|
{
|
|
// check against hardware limits
|
|
assert(w >= 0 && w <= 16384);
|
|
assert(h >= 0 && h <= 16384);
|
|
|
|
gdraw->cview.x0 = x;
|
|
gdraw->cview.y0 = y;
|
|
gdraw->cview.x1 = x + w;
|
|
gdraw->cview.y1 = y + h;
|
|
|
|
F32 scale[3] = { (F32)w * 0.5f, -(F32)h * 0.5f, 0.5f };
|
|
F32 offset[3] = { (F32)x + (F32)w * 0.5f, (F32)y + (F32)h * 0.5f, 0.5f };
|
|
gdraw->gfxc->setViewport(0, 0.0f, 1.0f, scale, offset);
|
|
disable_scissor(true);
|
|
}
|
|
|
|
// 4J added - copy of setViewport, that sets the current viewport but with an opengl-style z-range rather than the direct-x range that Iggy uses internally
|
|
// on PS4. We need this to set up a viewport to match Iggy when doing custom rendering
|
|
void gdraw_orbis_setViewport_4J()
|
|
{
|
|
if (gdraw->in_blur) { // blur needs special setup
|
|
set_viewport_raw_4J(0, 0, gdraw->tpw, gdraw->tph);
|
|
return;
|
|
}
|
|
|
|
if (gdraw->cur == gdraw->frame) // if the rendering stack is empty
|
|
// render a tile-sized region to the user-request tile location
|
|
set_viewport_raw_4J(gdraw->vx, gdraw->vy, gdraw->tw, gdraw->th);
|
|
else if (gdraw->cur->cached)
|
|
set_viewport_raw_4J(0, 0, gdraw->cur->width, gdraw->cur->height);
|
|
else
|
|
// if on the render stack, draw a padded-tile-sized region at the origin
|
|
set_viewport_raw_4J(0, 0, gdraw->tpw, gdraw->tph);
|
|
}
|