2713 lines
97 KiB
C++
2713 lines
97 KiB
C++
#include "stdafx.h"
|
|
// gdraw_ps3gcm.cpp - author: Fabian Giesen - copyright 2010-2011 RAD Game Tools
|
|
//
|
|
// This implements the Iggy graphics driver layer for GCM.
|
|
|
|
// GDraw consists of several components that interact fairly loosely with each other;
|
|
// e.g. the resource management, drawing and filtering parts are all fairly independent
|
|
// of each other. If you want to modify some aspect of GDraw - say the texture allocation
|
|
// logic - your best bet is usually to just look for one of the related entry points,
|
|
// e.g. MakeTextureBegin, and take it from there. There's a bunch of code in this file,
|
|
// but most of it isn't really complicated.
|
|
//
|
|
// One bit you might be tempted to touch is GDraws state management, to integrate it with
|
|
// an existing state caching system. This is *not recommended*; Iggy tends to generate a fairly
|
|
// large number of batches, and the existing implementation was designed to keep PPU overhead to a
|
|
// minimum while also avoiding redundant state changes and keeping the command buffer small.
|
|
// Introducing any extra indirections is likely to cause notably degraded performance. Nonetheless,
|
|
// if you want to continue, here's the list of functions that modify RSX state in some way:
|
|
// - The video memory defragmentation logic
|
|
// - All of the rendering helpers
|
|
// - RenderTile*/TextureDrawBuffer* may change the active rendertarget and depth/stencil surface,
|
|
// as do GCM_NoMoreGDrawThisFrame and the rendertarget management code (see comments there)
|
|
// - set_rsx_texture / set_texture
|
|
// - set_renderstate and set_renderstate_full. These are the main places where render state changes occur;
|
|
// you should probably start here.
|
|
// - DrawIndexedTriangles calls set_vertex_decl which sets the active vertex/index buffers and vertex declaration
|
|
// - Most of the functions in the "filter effects" section modify RSX state, mostly pixel shader constants and textures
|
|
//
|
|
// On fences: GDraw makes use of fences to synchronize texture/vertex buffer updates
|
|
// that happen during the middle of a frame (which can happen when GDraw uses a texture
|
|
// or vertex buffer that hasn't been used before, or one that has since been freed to make
|
|
// space for other resources). This uses its own RSX label so you shouldn't generally notice
|
|
// or need to worry about it. It is something to keep in mind when modifying any of the
|
|
// resource-management code, though; you need to be careful not to mess up the synchronization
|
|
// that's there. The current fence index (stored as gdraw->next_fence_index) is just a 64-bit counter,
|
|
// incremented whenever a fence (usually a texture label) is inserted into the command buffer.
|
|
// wait_on_fence is then used to make sure that the RSX has passed such a fence whenever GDraw
|
|
// is about to start an operation that needs synchronization, e.g. modify or free a texture.
|
|
// (Technically, memory only needs to be synchronized when it's being re-allocated, not when
|
|
// it's freed, but the only case where GDraw frees textures in usual operation is to make
|
|
// space for new allocations, so we don't finesse this). This logic was carefully written (and
|
|
// tested) to make sure it works properly even in the presence of counter wraparound.
|
|
//
|
|
// One final thing to be aware about is RSX pipeline offsets between different type of fences.
|
|
// As mentioned above, most fences we insert into the command stream are implemented using
|
|
// (fairly light-weight) texture labels. But we do use backend labels during video memory
|
|
// defragmentation and to synchronize render-to-texture operations. Backend labels are written
|
|
// later in the pipeline than texture labels, causing a race condition in sequences like
|
|
//
|
|
// cellGcmSetWriteTextureLabel(label, 0)
|
|
// ...
|
|
// cellGcmSetWriteBackEndLabel(label, 1)
|
|
// ...
|
|
// cellGcmSetWriteTextureLabel(label, 2)
|
|
//
|
|
// where label might theoretically become first 2 then 1. Needless to say, this would
|
|
// be very bad. To ensure this can't happen, we make sure to wait on the last issued
|
|
// fence before we insert a backend fence. For defragmentation, this wait is on the PPU
|
|
// (we need to wait for rendering to finish before we can shuffle textures or vertex
|
|
// buffer around). For render-to-texture operations, we insert a WaitLabel command
|
|
// first. This may stall the RSX for a short amount of time, but it's no big deal,
|
|
// because if we didn't stall there, we'd still have to wait immediately afterwards
|
|
// since the render target change that follows it also causes a pipeline flush. This
|
|
// was benchmarked not to cause any notable performance degradation - our first
|
|
// implementation used two RSX labels, one only updated with texture labels and the
|
|
// other with backend labels, but this variant is somewhat simpler and uses just one
|
|
// RSX label, so we switched to it.
|
|
|
|
#define GDRAW_ASSERTS
|
|
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <cell/gcm.h>
|
|
#include <cell/gcm/gcm_method_data.h>
|
|
#include <ppu_asm_intrinsics.h>
|
|
#include <math.h>
|
|
#include "gdraw.h"
|
|
#include "iggy.h"
|
|
|
|
struct GcmTexture;
|
|
|
|
#include "gdraw_ps3gcm.h"
|
|
|
|
typedef union {
|
|
struct {
|
|
GcmTexture *gcm;
|
|
void *gcm_ptr;
|
|
} tex;
|
|
|
|
struct {
|
|
void *verts;
|
|
void *inds;
|
|
} vbuf;
|
|
} GDrawNativeHandle;
|
|
|
|
#define GDRAW_MANAGE_MEM
|
|
#define GDRAW_DEFRAGMENT
|
|
#define GDRAW_BUFFER_RING
|
|
#define GDRAW_MIN_FREE_AMOUNT (64*1024) // always try to free at least this many bytes when throwing out old textures
|
|
#include "gdraw_shared.inl"
|
|
|
|
// max rendertarget stack depth. this depends on the extent to which you
|
|
// use filters and non-standard blend modes, and how nested they are.
|
|
#define MAX_RENDER_STACK_DEPTH 8 // Iggy is hardcoded to a limit of 16... probably 1-3 is realistic
|
|
#define MAX_SAMPLERS 3 // max number of texture samplers used
|
|
#define AATEX_SAMPLER 7 // sampler that aa_tex gets set in
|
|
#define FENCE_BATCH_INTERVAL 64 // put a fence after every N'th batch
|
|
|
|
static GDrawFunctions gdraw_funcs;
|
|
|
|
// render target state
|
|
typedef struct
|
|
{
|
|
GDrawHandle *color_buffer;
|
|
S32 base_x, base_y, width, height;
|
|
rrbool cached;
|
|
} GDrawFramebufferState;
|
|
|
|
struct ProgramWithCachedVariableLocations
|
|
{
|
|
union {
|
|
unsigned char *prog_data;
|
|
CGprogram program;
|
|
};
|
|
union {
|
|
void *ucode; // used for vertex progs
|
|
CellCgbFragmentProgramConfiguration cfg; // used for fragment progs
|
|
};
|
|
int vars[MAX_VARS]; // it's unsigned in d3d, but we want an 'undefined' value
|
|
};
|
|
|
|
struct GcmTexture
|
|
{
|
|
// in hardware register format! (for quick texture switching)
|
|
U32 offset;
|
|
U32 format;
|
|
U32 remap;
|
|
U32 imagerect;
|
|
U32 control3;
|
|
|
|
// used internally
|
|
U32 width, height;
|
|
U32 pitch;
|
|
U32 swizzled;
|
|
};
|
|
|
|
#define CHAN_A 0
|
|
#define CHAN_R 1
|
|
#define CHAN_G 2
|
|
#define CHAN_B 3
|
|
#define TEXREMAP(chan,out,in) ((CELL_GCM_TEXTURE_REMAP_ ## out << (chan*2 + 8)) | ((CELL_GCM_TEXTURE_REMAP_ ## in << (chan*2))))
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// GDraw data structure
|
|
//
|
|
//
|
|
// This is the primary rendering abstraction, which hides all
|
|
// the platform-specific rendering behavior from Iggy. It is
|
|
// full of platform-specific graphics state, and also general
|
|
// graphics state so that it doesn't have to callback into Iggy
|
|
// to get at that graphics state.
|
|
|
|
typedef struct
|
|
{
|
|
CellGcmContextData *gcm;
|
|
|
|
// scale factor converting worldspace to viewspace <0,0>..<w,h>
|
|
F32 world_to_pixel[2];
|
|
F32 projection[4];
|
|
|
|
// cached state
|
|
int vert_format; // active vertex format (-1 if unknown)
|
|
U32 scissor_state; // ~0 if unknown, otherwise 0 or 1
|
|
int blend_mode; // active blend mode (-1 if unknown)
|
|
U32 stencil_key; // field built from stencil test flags. 0=no stencil, ~0 is used for "unknown state"
|
|
U32 z_key; // same for z write/z test
|
|
|
|
GDrawTexture *active_tex[MAX_SAMPLERS];
|
|
ProgramWithCachedVariableLocations *cur_fprog;
|
|
|
|
// fragment shader base pointers
|
|
ProgramWithCachedVariableLocations *basic_fprog[GDRAW_TEXTURE__count];
|
|
|
|
// render targets
|
|
CellGcmSurface main_surface;
|
|
GDrawHandleCache rendertargets;
|
|
GDrawHandle rendertarget_handles[MAX_RENDER_STACK_DEPTH]; // not -1, because we use +1 to initialize
|
|
GcmTexture rendertarget_textures[MAX_RENDER_STACK_DEPTH+1];
|
|
|
|
gswf_recti rt_valid[MAX_RENDER_STACK_DEPTH+1]; // valid rect for texture clamping
|
|
|
|
// size of our render targets
|
|
S32 frametex_width, frametex_height;
|
|
|
|
// viewport setting (in pixels) for current frame
|
|
S32 vx,vy;
|
|
S32 fw,fh; // full width/height of virtual display
|
|
S32 tw,th; // actual width/height of current tile
|
|
S32 tpw,tph; // width/height of padded version of tile
|
|
|
|
S32 tx0,ty0;
|
|
S32 tx0p,ty0p;
|
|
|
|
rrbool in_blur;
|
|
struct {
|
|
S32 x,y,w,h;
|
|
} cview; // current viewport
|
|
|
|
GcmTexture aa_tex;
|
|
|
|
// rsx local mem state
|
|
GDrawArena local_arena; // this is where shaders etc. land
|
|
U32 transfer_mode; // used for defragmentation
|
|
|
|
// render-state stack described above for 'temporary' rendering
|
|
GDrawFramebufferState frame[MAX_RENDER_STACK_DEPTH];
|
|
GDrawFramebufferState *cur;
|
|
|
|
// ppu/rsx sync
|
|
U32 fence_label_index;
|
|
volatile U32 *fence_label;
|
|
U64 next_fence_index; // next fence index we're going to write
|
|
U32 fence_batch_counter; // used to write fences every N batches
|
|
|
|
// texture and vertex buffer pools
|
|
GDrawHandleCache *texturecache;
|
|
int tex_loc; // CELL_GCM_LOCATION of textures
|
|
|
|
GDrawHandleCache *vbufcache;
|
|
U8 *vbuf_base; // used for fast addr2offs
|
|
int vbuf_loc; // CELL_GCM_LOCATION of vertex buffers
|
|
|
|
// rendertarget stuff
|
|
GDrawArena rt_arena;
|
|
S32 rt_pitch; // render target pitch
|
|
int rt_loc; // CELL_GCM_LOCATION of rendertargets
|
|
|
|
// dyn vertex buffer
|
|
gdraw_bufring dyn_vb;
|
|
U8 *dynvb_base; // used for fast addr2offs
|
|
int dynvb_loc; // CELL_GCM_LOCATION of dyn vertex buffer
|
|
|
|
// fragment shaders
|
|
ProgramWithCachedVariableLocations fprog[GDRAW_TEXTURE__count][3];
|
|
ProgramWithCachedVariableLocations exceptional_blend[GDRAW_BLENDSPECIAL__count];
|
|
ProgramWithCachedVariableLocations filter_prog[2][16];
|
|
ProgramWithCachedVariableLocations blur_prog[MAX_TAPS+1];
|
|
ProgramWithCachedVariableLocations colormatrix;
|
|
|
|
// vertex shaders
|
|
ProgramWithCachedVariableLocations vprog[GDRAW_vformat__basic_count];
|
|
U32 vslot[GDRAW_vformat__basic_count];
|
|
|
|
// mipmapping
|
|
GDrawMipmapContext mipmap;
|
|
|
|
// for bookkeeping
|
|
GDrawFence tile_end_fence;
|
|
} GDraw;
|
|
|
|
#define COLOR_MASK_ALL (CELL_GCM_COLOR_MASK_R | CELL_GCM_COLOR_MASK_G | CELL_GCM_COLOR_MASK_B | CELL_GCM_COLOR_MASK_A)
|
|
|
|
static GDraw *gdraw;
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Vertex program registers
|
|
//
|
|
|
|
// These are direct constant register indices
|
|
// NOTE: RSX can't set more than 8 vertex constants at once!
|
|
#define VVAR_world 0
|
|
#define VVAR_count_worldonly 2 // number of constants to update when we only change world matrix
|
|
#define VVAR_x_off 2
|
|
#define VVAR_color_mul 3
|
|
#define VVAR_count_world_and_color 4
|
|
#define VVAR_tex_s 4
|
|
#define VVAR_tex_t 5
|
|
#define VVAR_count 6 // number of vertex program registers to update when we change everything
|
|
|
|
#define VVAR_viewproj 6 // projection is only updated when it actually changes
|
|
|
|
struct Vec4
|
|
{
|
|
F32 x,y,z,w;
|
|
};
|
|
|
|
struct VertexVars
|
|
{
|
|
Vec4 world[2];
|
|
Vec4 x_off;
|
|
Vec4 color_mul;
|
|
Vec4 s0_texgen;
|
|
Vec4 t0_texgen;
|
|
Vec4 proj;
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Fence and command buffer helpers
|
|
//
|
|
|
|
// used to write to the command buffer: gcc won't generate clrldi
|
|
// instructions for struct member access, but it will for pointer
|
|
// arithmetic.
|
|
struct CommandData
|
|
{
|
|
U32 w0,w1,w2,w3,w4,w5,w6,w7;
|
|
U32 w8,w9,wa,wb,wc,wd,we,wf;
|
|
};
|
|
|
|
static RADINLINE CommandData *reserve_command(CellGcmContextData * RADRESTRICT gcm, U32 size)
|
|
{
|
|
cellGcmReserveMethodSizeInline(gcm, size);
|
|
return (CommandData *) gcm->current;
|
|
}
|
|
|
|
static RADINLINE CommandData *put_command(CellGcmContextData * RADRESTRICT gcm, U32 size)
|
|
{
|
|
cellGcmReserveMethodSizeInline(gcm, size);
|
|
CommandData *cmd = (CommandData *) gcm->current;
|
|
gcm->current += size;
|
|
return cmd;
|
|
}
|
|
|
|
static RADINLINE GDrawFence get_next_fence()
|
|
{
|
|
GDrawFence fence;
|
|
fence.value = gdraw->next_fence_index;
|
|
return fence;
|
|
}
|
|
|
|
static RADINLINE void flush_delayed_fence_updates()
|
|
{
|
|
}
|
|
|
|
static RADINLINE rrbool is_fence_pending(GDrawFence fence)
|
|
{
|
|
// if it's older than one full wrap of the fence counter,
|
|
// we know it's retired. (we can't have more than 4 billion
|
|
// fences pending in command buffers with only 256MB of
|
|
// main memory!)
|
|
if (gdraw->next_fence_index - fence.value > 0xffffffffu)
|
|
return false;
|
|
|
|
// this is how far the GPU is.
|
|
U32 retired = *gdraw->fence_label;
|
|
|
|
// everything between "retired" (exclusive) and "next_fence_index"
|
|
// (inclusive) is pending. everything else is definitely done.
|
|
//
|
|
// we need to be careful about this test since the "fence" value
|
|
// coming in is, for all practical purposes, an arbitrary U32. our
|
|
// fence counter might have wrapped around multiple times since we last
|
|
// used a resource that gets freed, for instance! so if we report a
|
|
// fence ID as pending that's not actually in flight, we might end up
|
|
// with an infinite wait.
|
|
//
|
|
// this is a bit subtle since it depends on unsigned wraparound for us
|
|
// to do the right thing.
|
|
|
|
// number of pending fences (next_fence_index has, by definition, not been written yet)
|
|
U32 num_pending = U32(gdraw->next_fence_index) - retired;
|
|
|
|
// position of the current fence in the "list of pending fences", counting
|
|
// from end (i.e. the "youngest" fence that we haven't even issued yet)
|
|
U32 pos_in_pending_list = U32(gdraw->next_fence_index - fence.value);
|
|
|
|
return pos_in_pending_list < num_pending;
|
|
}
|
|
|
|
static GDrawFence put_fence()
|
|
{
|
|
GDrawFence fence;
|
|
gdraw->fence_batch_counter = FENCE_BATCH_INTERVAL;
|
|
fence.value = gdraw->next_fence_index++;
|
|
cellGcmSetWriteTextureLabelInline(gdraw->gcm, gdraw->fence_label_index, (U32) fence.value);
|
|
return fence;
|
|
}
|
|
|
|
static GDrawFence put_backend_fence()
|
|
{
|
|
// careful with backend labels, they have no defined ordering wrt
|
|
// texture labels in the same command stream! if you use them, make
|
|
// sure there are no races.
|
|
GDrawFence fence;
|
|
gdraw->fence_batch_counter = FENCE_BATCH_INTERVAL;
|
|
fence.value = gdraw->next_fence_index++;
|
|
cellGcmSetWriteBackEndLabel(gdraw->gcm, gdraw->fence_label_index, (U32) fence.value);
|
|
return fence;
|
|
}
|
|
|
|
static void wait_on_fence(GDrawFence fence)
|
|
{
|
|
if (is_fence_pending(fence)) {
|
|
GDraw * RADRESTRICT gd = gdraw;
|
|
if (fence.value == gd->next_fence_index) // haven't even written this one yet!
|
|
put_fence();
|
|
cellGcmFlush(gd->gcm);
|
|
IggyWaitOnFence((void *) gd->fence_label, (U32) fence.value);
|
|
}
|
|
}
|
|
|
|
static U32 addr2offs(void *addr)
|
|
{
|
|
U32 offs;
|
|
int32_t res = cellGcmAddressToOffset(addr, &offs);
|
|
res = res; // avoid warning in release builds
|
|
assert(res == CELL_OK);
|
|
return offs;
|
|
}
|
|
|
|
static RADINLINE U32 vbufaddr2offs(GDraw * RADRESTRICT gd, void *addr)
|
|
{
|
|
return (U8 *) addr - gd->vbuf_base;
|
|
}
|
|
|
|
static RADINLINE U32 dynvbaddr2offs(GDraw * RADRESTRICT gd, void *addr)
|
|
{
|
|
return (U8 *) addr - gd->dynvb_base;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Texture/video memory defragmentation support code
|
|
//
|
|
|
|
static void gdraw_gpu_memcpy(GDrawHandleCache *c, void *dst, void *src, U32 num_bytes)
|
|
{
|
|
RR_UNUSED_VARIABLE(c);
|
|
U32 dstoffs = addr2offs(dst);
|
|
U32 srcoffs = addr2offs(src);
|
|
U32 pos = 0;
|
|
U32 edgelen = 4096;
|
|
U32 blocksize = edgelen*edgelen*4;
|
|
|
|
assert((num_bytes & 3) == 0);
|
|
|
|
while (pos < num_bytes) {
|
|
// peel off square image transfers of edgelen x edgelen as long as we can
|
|
while (pos + blocksize <= num_bytes) {
|
|
assert(((dstoffs + pos) & (CELL_GCM_SURFACE_LINEAR_ALIGN_OFFSET - 1)) == 0);
|
|
assert(((srcoffs + pos) & (CELL_GCM_SURFACE_LINEAR_ALIGN_OFFSET - 1)) == 0);
|
|
cellGcmSetTransferImage(gdraw->gcm, gdraw->transfer_mode,
|
|
dstoffs + pos, edgelen * 4, 0, 0,
|
|
srcoffs + pos, edgelen * 4, 0, 0,
|
|
edgelen, edgelen, 4);
|
|
pos += blocksize;
|
|
}
|
|
|
|
edgelen >>= 1;
|
|
blocksize >>= 2;
|
|
if (edgelen == 32)
|
|
break; // handle the rest (<4k "pixels") using a 1-line transfer
|
|
}
|
|
|
|
if (pos < num_bytes) {
|
|
U32 amount = num_bytes - pos;
|
|
assert(((dstoffs + pos) & (CELL_GCM_SURFACE_LINEAR_ALIGN_OFFSET - 1)) == 0);
|
|
assert(((srcoffs + pos) & (CELL_GCM_SURFACE_LINEAR_ALIGN_OFFSET - 1)) == 0);
|
|
cellGcmSetTransferImage(gdraw->gcm, gdraw->transfer_mode,
|
|
dstoffs + pos, amount, 0, 0,
|
|
srcoffs + pos, amount, 0, 0,
|
|
amount >> 2, 1, 4);
|
|
}
|
|
}
|
|
|
|
static void gdraw_defragment_cache(GDrawHandleCache *c, GDrawStats *stats)
|
|
{
|
|
GDrawFence fence;
|
|
S32 i;
|
|
int loc;
|
|
|
|
if (!gdraw_CanDefragment(c))
|
|
return;
|
|
|
|
// gpu needs to finish pending batches before it can start defragmenting
|
|
fence = put_fence();
|
|
cellGcmSetWaitLabel(gdraw->gcm, gdraw->fence_label_index, (U32) fence.value);
|
|
|
|
// actual defragmentation...
|
|
loc = c->is_vertex ? gdraw->vbuf_loc : gdraw->tex_loc;
|
|
gdraw->transfer_mode = (loc == CELL_GCM_LOCATION_LOCAL) ? CELL_GCM_TRANSFER_LOCAL_TO_LOCAL : CELL_GCM_TRANSFER_MAIN_TO_MAIN;
|
|
gdraw_DefragmentMain(c, 0, stats);
|
|
|
|
// go over all handles and adjustment pointers.
|
|
// pointer adjustment is different for textures than it is for vertex buffers
|
|
if (!c->is_vertex) {
|
|
for (i=0; i < c->max_handles; i++) {
|
|
GDrawHandle *h = &c->handle[i];
|
|
if (gdraw_res_is_managed(h)) {
|
|
h->handle.tex.gcm_ptr = h->raw_ptr;
|
|
h->handle.tex.gcm->offset = CELL_GCM_METHOD_DATA_TEXTURE_OFFSET(addr2offs(h->handle.tex.gcm_ptr));
|
|
}
|
|
}
|
|
} else {
|
|
for (i=0; i < c->max_handles; i++) {
|
|
GDrawHandle *h = &c->handle[i];
|
|
if (gdraw_res_is_managed(h)) {
|
|
SINTa ind_offs = ((U8 *) h->handle.vbuf.inds - (U8 *) h->handle.vbuf.verts);
|
|
h->handle.vbuf.verts = h->raw_ptr;
|
|
h->handle.vbuf.inds = (U8 *) h->raw_ptr + ind_offs;
|
|
}
|
|
}
|
|
}
|
|
|
|
// texture pointers have changed, so textures need to be reset
|
|
memset(&gdraw->active_tex, 0, sizeof(gdraw->active_tex));
|
|
|
|
// wait for RSX to finish, since we can't safely allocate memory in this cache
|
|
// while data is being moved around.
|
|
// could delay this until the next alloc, but the only place we ever call
|
|
// defragment from is just before an alloc, so there's no point.
|
|
// no backend fence race: we had RSX wait on completion of the last pending
|
|
// texture label before we started this.
|
|
cellGcmSetInvalidateTextureCache(gdraw->gcm, CELL_GCM_INVALIDATE_TEXTURE);
|
|
cellGcmSetInvalidateVertexCache(gdraw->gcm);
|
|
wait_on_fence(put_backend_fence());
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// RSX texture swizzling code.
|
|
//
|
|
// cellGcmConvertSwizzleFormat is too slow (one callback per pixel!)
|
|
// and RSX swizzle transfers require local mem for the unswizzled and
|
|
// swizzled versions at the same time during the conversion, which
|
|
// is unacceptable for large textures.
|
|
//
|
|
|
|
// NOTE:
|
|
//
|
|
// RSX texture swizzling uses Morton order inside squares of size
|
|
// minor x minor, where minor=min(w,h). If the texture is non-square,
|
|
// we might have multiple such squares, which are arranged linearly
|
|
// in memory.
|
|
//
|
|
// There's a nice way to step pixel coordinates given in Morton order
|
|
// incrementally (cf. "Morton-order Matrices Deserve Compilers' Support",
|
|
// D. S. Wise and J. D. Frens, https://www.cs.indiana.edu/cgi-bin/techreports/TRNNN.cgi?trnum=TR533).
|
|
//
|
|
|
|
// "insert" a 0 bit after each of the 16 low bits of x. used for morton encoding.
|
|
static U32 part1by1(U32 x)
|
|
{
|
|
x &= 0x0000ffff; // x = ---- ---- ---- ---- fedc ba98 7654 3210
|
|
x = (x ^ (x << 8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210
|
|
x = (x ^ (x << 4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210
|
|
x = (x ^ (x << 2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10
|
|
x = (x ^ (x << 1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
|
|
return x;
|
|
}
|
|
|
|
// same for the RSX modified morton encoding. here we need to know which axis we're dealing with.
|
|
static U32 rsx_morton_encode(S32 x, U32 axis, U32 minor)
|
|
{
|
|
// low bits are morton, high bits are linear
|
|
return (part1by1((U32) x & (minor - 1)) << axis) + ((U32) x & ~(minor - 1)) * minor;
|
|
}
|
|
|
|
#ifdef __SNC__
|
|
#define GCCSchedBarrier()
|
|
#else
|
|
#define GCCSchedBarrier() __asm__ volatile("")
|
|
#endif
|
|
|
|
// update a subrect of a 8bpp texture. w x h pixels are copied from src to
|
|
// dest at pixel position (dx,dy). the full destination texture is dw x dh pixels.
|
|
static void swizzle_subrect_8bpp(U8 * RADRESTRICT dest, U32 dx, U32 dy, U32 dw, U32 dh, U8 * RADRESTRICT src, U32 srcpitch, U32 w, U32 h)
|
|
{
|
|
// determine morton-order stepping constants.
|
|
U32 minor = RR_MIN(dw, dh);
|
|
S64 xinc = (S32) rsx_morton_encode(-1, 0, minor);
|
|
S64 yinc = (S32) rsx_morton_encode(-1, 1, minor);
|
|
|
|
// determine start offsets along x/y axis
|
|
U64 outx0 = rsx_morton_encode(dx, 0, minor);
|
|
U64 outy = rsx_morton_encode(dy, 1, minor);
|
|
|
|
while (h--) {
|
|
U8 *in = src - 1;
|
|
U8 *out = dest + outy;
|
|
U64 i,outx;
|
|
|
|
// copy and swizzle one line
|
|
outx = outx0;
|
|
#ifdef __SNC__
|
|
for (i=0; i < w; i++) {
|
|
out[outx] = *++in;
|
|
outx = (outx - xinc) & xinc; // outx++ with bit-interleaving
|
|
}
|
|
#else
|
|
// this loop does the same as the above but generates *way* better code on GCC.
|
|
for (i=0; i < w; i++) {
|
|
U64 v,ox;
|
|
|
|
v = __lbzu(1, in);
|
|
GCCSchedBarrier();
|
|
ox = outx;
|
|
outx = __subf(xinc, outx);
|
|
GCCSchedBarrier();
|
|
__stbx(v, out, ox);
|
|
outx = __and(outx, xinc);
|
|
}
|
|
#endif
|
|
|
|
src += srcpitch;
|
|
outy = (outy - yinc) & yinc; // outy++ with bit-interleaving
|
|
}
|
|
}
|
|
|
|
// update a subrect of a 32bpp texture (version for small rects)
|
|
static void swizzle_subrect_32bpp_small(U8 * RADRESTRICT dest, U32 dx, U32 dy, U32 dw, U32 dh, U8 * RADRESTRICT src, U32 srcpitch, U32 w, U32 h)
|
|
{
|
|
// determine morton-order stepping constants
|
|
U32 minor = RR_MIN(dw, dh);
|
|
S64 xinc = (S32) rsx_morton_encode(-1, 0, minor) * 4; // *4 since we work with byte offsets
|
|
S64 yinc = (S32) rsx_morton_encode(-1, 1, minor) * 4;
|
|
|
|
// determine start offsets along x/y axis
|
|
U64 outx0 = rsx_morton_encode(dx, 0, minor) * 4;
|
|
U64 outy = rsx_morton_encode(dy, 1, minor) * 4;
|
|
|
|
while (h--) {
|
|
U32 *in = (U32 *) src - 1;
|
|
U8 *out = dest + outy;
|
|
U64 i,outx;
|
|
|
|
// copy and swizzle one line
|
|
outx = outx0;
|
|
#ifdef __SNC__
|
|
for (i=0; i < w; i++) {
|
|
*((U32 *) (out + outx)) = *++in;
|
|
outx = (outx - xinc) & xinc; // outx++ with bit-interleaving
|
|
}
|
|
#else
|
|
// this loop does the same as the above but generates *way* better code on GCC.
|
|
for (i=0; i < w; i++) {
|
|
U64 v,ox;
|
|
|
|
v = __lwzu(4, in);
|
|
GCCSchedBarrier();
|
|
ox = outx;
|
|
outx = __subf(xinc, outx);
|
|
GCCSchedBarrier();
|
|
__stwx(v, out, ox);
|
|
outx = __and(outx, xinc);
|
|
}
|
|
#endif
|
|
|
|
src += srcpitch;
|
|
outy = (outy - yinc) & yinc; // outy++ with bit-interleaving
|
|
}
|
|
}
|
|
|
|
// update a subrect of a 32 bpp texture (main entry point)
|
|
static void swizzle_subrect_32bpp(U8 * RADRESTRICT dest, U32 dx, U32 dy, U32 dw, U32 dh, U8 * RADRESTRICT src, U32 srcpitch, U32 w, U32 h)
|
|
{
|
|
U32 minor;
|
|
U32 dx0,dy0,dx1,dy1;
|
|
U32 wa,ha;
|
|
U32 x,y;
|
|
S64 incx,incy; // output increment (per block)
|
|
U64 outx0,outx,outy; // output *offset* in x/y
|
|
U64 a,b,c,d;
|
|
|
|
// we have a fast path that updates aligned groups of 8x4 pixels at a time.
|
|
// use that for the bulk of the data, then puzzle the rest together from smaller rects.
|
|
if (w < 8 || h < 4) { // small block, don't bother
|
|
swizzle_subrect_32bpp_small(dest, dx, dy, dw, dh, src, srcpitch, w, h);
|
|
return;
|
|
}
|
|
|
|
// calculate the aligned part of the subrect that we process with the fast loop
|
|
// we cut the image up like this:
|
|
// +---------------------+ dy
|
|
// | |
|
|
// +---+---------------+-+ dy0
|
|
// | | | |
|
|
// | | | |
|
|
// +---+---------------+-+ dy1
|
|
// +---------------------+ dy+h
|
|
// dx dx0 dx1 dx+w
|
|
dx0 = (dx + 7) & ~7;
|
|
dy0 = (dy + 3) & ~3;
|
|
dx1 = (dx + w) & ~7;
|
|
dy1 = (dy + h) & ~3;
|
|
wa = dx1 - dx0;
|
|
ha = dy1 - dy0;
|
|
|
|
// if dy wasn't aligned, peel off the first couple of lines
|
|
if (dy < dy0) {
|
|
swizzle_subrect_32bpp_small(dest, dx, dy, dw, dh, src, srcpitch, w, dy0 - dy);
|
|
src += srcpitch * (dy0 - dy);
|
|
}
|
|
|
|
// take care of the left/right parts that aren't aligned
|
|
if (dx < dx0)
|
|
swizzle_subrect_32bpp_small(dest, dx, dy0, dw, dh, src, srcpitch, dx0 - dx, ha);
|
|
|
|
if (dx1 < dx+w)
|
|
swizzle_subrect_32bpp_small(dest, dx1, dy0, dw, dh, src + (dx1-dx)*4, srcpitch, (dx+w) - dx1, ha);
|
|
|
|
// main part: go through image in blocks of 8x4 pixels. (8x4 since that's one full cache line,
|
|
// so we write pixels one cache line at a time)
|
|
minor = RR_MIN(dw, dh);
|
|
incx = (S32) rsx_morton_encode(-8, 0, minor) * 4; // *4 since it's all byte offsets
|
|
incy = (S32) rsx_morton_encode(-4, 1, minor) * 4;
|
|
outx0 = rsx_morton_encode(dx0, 0, minor) * 4;
|
|
outy = rsx_morton_encode(dy0, 1, minor) * 4;
|
|
|
|
for (y=0; y < ha/4; ++y) {
|
|
// set up source line pointers for four lines
|
|
void *src0 = (src + (dx0 - dx) * 4 - 8);
|
|
void *src1 = ((U8 *) src0 + srcpitch);
|
|
void *src2 = ((U8 *) src1 + srcpitch);
|
|
void *src3 = ((U8 *) src2 + srcpitch);
|
|
outx = outx0;
|
|
|
|
// @TODO prefetches would probably be a good idea here, but need proper test data.
|
|
for (x=0; x < wa/8; ++x) {
|
|
void *out = (void *) (dest + outx + outy);
|
|
outx = (outx - incx) & incx; // advance pointer for next pixel
|
|
|
|
// just read 8x4 pixels and write them out in the z-order pattern
|
|
// we can use doubleword loads since even in Z-order, groups of two horizontal
|
|
// pixels don't get reordered. the rest is just the swizzle pattern expanded into offsets.
|
|
a = __ld(0x08, src0); b = __ld(0x08, src1); c = __ld(0x10, src0); d = __ld(0x10, src1);
|
|
__std(a, 0x00, out); __std(b, 0x08, out); __std(c, 0x10, out); __std(d, 0x18, out);
|
|
a = __ld(0x08, src2); b = __ld(0x08, src3); c = __ld(0x10, src2); d = __ld(0x10, src3);
|
|
__std(a, 0x20, out); __std(b, 0x28, out); __std(c, 0x30, out); __std(d, 0x38, out);
|
|
a = __ld(0x18, src0); b = __ld(0x18, src1); c = __ldu(0x20, src0); d = __ldu(0x20, src1);
|
|
__std(a, 0x40, out); __std(b, 0x48, out); __std(c, 0x50, out); __std(d, 0x58, out);
|
|
a = __ld(0x18, src2); b = __ld(0x18, src3); c = __ldu(0x20, src2); d = __ldu(0x20, src3);
|
|
__std(a, 0x60, out); __std(b, 0x68, out); __std(c, 0x70, out); __std(d, 0x78, out);
|
|
}
|
|
|
|
src += 4*srcpitch;
|
|
outy = (outy - incy) & incy;
|
|
}
|
|
|
|
// and finally, the last few lines
|
|
if (dy1 < dy+h)
|
|
swizzle_subrect_32bpp_small(dest, dx, dy1, dw, dh, src, srcpitch, w, (dy+h) - dy1);
|
|
}
|
|
|
|
static void api_free_resource(GDrawHandle *r)
|
|
{
|
|
// we just need to clean up our state cache
|
|
S32 i;
|
|
for (i=0; i < MAX_SAMPLERS; i++)
|
|
if (gdraw->active_tex[i] == (GDrawTexture *) r)
|
|
gdraw->active_tex[i] = NULL;
|
|
}
|
|
|
|
static void RADLINK gdraw_UnlockHandles(GDrawStats *stats)
|
|
{
|
|
RR_UNUSED_VARIABLE(stats);
|
|
gdraw_HandleCacheUnlockAll(gdraw->texturecache);
|
|
gdraw_HandleCacheUnlockAll(gdraw->vbufcache);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Texture creation/updating/deletion
|
|
//
|
|
|
|
extern GDrawTexture *gdraw_GCM_WrappedTextureCreate(CellGcmTexture *gcm_tex)
|
|
{
|
|
GDrawStats stats;
|
|
memset(&stats, 0, sizeof(stats));
|
|
GDrawHandle *p = gdraw_res_alloc_begin(gdraw->texturecache, 0, &stats); // it may need to free one item to give us a handle
|
|
p->handle.tex.gcm_ptr = 0;
|
|
gdraw_HandleCacheAllocateEnd(p, 0, NULL, GDRAW_HANDLE_STATE_user_owned);
|
|
gdraw_GCM_WrappedTextureChange((GDrawTexture *) p, gcm_tex);
|
|
return (GDrawTexture *) p;
|
|
}
|
|
|
|
extern void gdraw_GCM_WrappedTextureChange(GDrawTexture *tex, CellGcmTexture *gcm_tex)
|
|
{
|
|
GDrawHandle *p = (GDrawHandle *) tex;
|
|
GcmTexture *gcm = p->handle.tex.gcm;
|
|
gcm->offset = CELL_GCM_METHOD_DATA_TEXTURE_OFFSET(gcm_tex->offset);
|
|
gcm->format = CELL_GCM_METHOD_DATA_TEXTURE_FORMAT(gcm_tex->location, gcm_tex->cubemap, gcm_tex->dimension, gcm_tex->format, gcm_tex->mipmap);
|
|
gcm->remap = gcm_tex->remap;
|
|
gcm->imagerect = CELL_GCM_METHOD_DATA_TEXTURE_IMAGE_RECT(gcm_tex->height, gcm_tex->width);
|
|
gcm->control3 = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL3(gcm_tex->pitch, gcm_tex->depth);
|
|
gcm->width = gcm_tex->width;
|
|
gcm->height = gcm_tex->height;
|
|
gcm->pitch = gcm_tex->pitch;
|
|
gcm->swizzled = 0; // unused since we never upload to this
|
|
}
|
|
|
|
extern void gdraw_GCM_WrappedTextureDestroy(GDrawTexture *tex)
|
|
{
|
|
GDrawStats stats;
|
|
gdraw_res_free((GDrawHandle *) tex, &stats);
|
|
}
|
|
|
|
static void RADLINK gdraw_SetTextureUniqueID(GDrawTexture *tex, void *old_id, void *new_id)
|
|
{
|
|
GDrawHandle *p = (GDrawHandle *) tex;
|
|
// if this is still the handle it's thought to be, change the owner;
|
|
// if the owner *doesn't* match, then they're changing a stale handle, so ignore
|
|
if (p->owner == old_id)
|
|
p->owner = new_id;
|
|
}
|
|
|
|
static bool is_texture_swizzled(S32 w, S32 h)
|
|
{
|
|
// we swizzle a texture if it's pow2 and not a line texture
|
|
return h > 1 && (w & (w-1)) == 0 && (h & (h-1)) == 0;
|
|
}
|
|
|
|
static rrbool RADLINK gdraw_MakeTextureBegin(void *owner, S32 width, S32 height, gdraw_texture_format gformat, U32 flags, GDraw_MakeTexture_ProcessingInfo *p, GDrawStats *stats)
|
|
{
|
|
S32 bytes_pixel = 4;
|
|
GDrawHandle *t = NULL;
|
|
bool swizzled = false;
|
|
|
|
if (width > 4096 || height > 4096) {
|
|
IggyGDrawSendWarning(NULL, "GDraw %d x %d texture not supported by hardware (dimension size limit 4096)", width, height);
|
|
return false;
|
|
}
|
|
|
|
if (gformat == GDRAW_TEXTURE_FORMAT_font)
|
|
bytes_pixel = 1;
|
|
|
|
swizzled = is_texture_swizzled(width, height);
|
|
|
|
// determine the number of mipmaps to use and the size of the corresponding texture allocation
|
|
// this assumes linear texture memory layout
|
|
S32 pitch = width * bytes_pixel;
|
|
U32 mipmaps = 0;
|
|
S32 size = 0;
|
|
if (!swizzled) // RSX HW bug: linear texture pitch size should be at least 16 bytes
|
|
pitch = RR_MAX(pitch, 16);
|
|
|
|
do {
|
|
if (!swizzled)
|
|
size += pitch * (RR_MAX(height >> mipmaps, 1));
|
|
else
|
|
size += RR_MAX(pitch >> mipmaps, bytes_pixel) * RR_MAX(height >> mipmaps, 1);
|
|
mipmaps++;
|
|
} while ((flags & GDRAW_MAKETEXTURE_FLAGS_mipmap) && ((width >> mipmaps) || (height >> mipmaps)));
|
|
|
|
// allocate a handle and make room in the cache for this much data
|
|
assert(size != 0);
|
|
t = gdraw_res_alloc_begin(gdraw->texturecache, size, stats);
|
|
if (!t)
|
|
return false;
|
|
|
|
t->handle.tex.gcm_ptr = t->raw_ptr;
|
|
|
|
GcmTexture *tex = t->handle.tex.gcm;
|
|
U8 format;
|
|
U32 remap;
|
|
|
|
if (gformat == GDRAW_TEXTURE_FORMAT_font) {
|
|
format = CELL_GCM_TEXTURE_B8;
|
|
remap = TEXREMAP(CHAN_A, REMAP, FROM_B) | TEXREMAP(CHAN_R, ZERO, FROM_R) | TEXREMAP(CHAN_G, ZERO, FROM_G) | TEXREMAP(CHAN_B, ZERO, FROM_B);
|
|
} else {
|
|
format = CELL_GCM_TEXTURE_A8R8G8B8;
|
|
// remap rgba -> argb
|
|
remap = TEXREMAP(CHAN_A, REMAP, FROM_B) | TEXREMAP(CHAN_R, REMAP, FROM_A) | TEXREMAP(CHAN_G, REMAP, FROM_R) | TEXREMAP(CHAN_B, REMAP, FROM_G);
|
|
}
|
|
|
|
format |= (swizzled ? CELL_GCM_TEXTURE_SZ : CELL_GCM_TEXTURE_LN) | CELL_GCM_TEXTURE_NR;
|
|
U8 dimension = (height != 1) ? CELL_GCM_TEXTURE_DIMENSION_2 : CELL_GCM_TEXTURE_DIMENSION_1;
|
|
U8 cubemap = 0;
|
|
U8 depth = 1;
|
|
U8 location = gdraw->tex_loc;
|
|
|
|
tex->offset = CELL_GCM_METHOD_DATA_TEXTURE_OFFSET(addr2offs(t->handle.tex.gcm_ptr));
|
|
tex->format = CELL_GCM_METHOD_DATA_TEXTURE_FORMAT(location, cubemap, dimension, format, mipmaps);
|
|
tex->remap = remap;
|
|
tex->imagerect = CELL_GCM_METHOD_DATA_TEXTURE_IMAGE_RECT(height, width);
|
|
tex->control3 = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL3(pitch, depth);
|
|
tex->width = width;
|
|
tex->height = height;
|
|
tex->pitch = pitch;
|
|
tex->swizzled = swizzled;
|
|
|
|
gdraw_HandleCacheAllocateEnd(t, size, owner, (flags & GDRAW_MAKETEXTURE_FLAGS_never_flush) ? GDRAW_HANDLE_STATE_pinned : GDRAW_HANDLE_STATE_locked);
|
|
stats->nonzero_flags |= GDRAW_STATS_alloc_tex;
|
|
stats->alloc_tex += 1;
|
|
stats->alloc_tex_bytes += size;
|
|
|
|
p->texture_type = GDRAW_TEXTURE_TYPE_rgba;
|
|
p->p0 = t;
|
|
|
|
if (swizzled || (flags & GDRAW_MAKETEXTURE_FLAGS_mipmap)) {
|
|
rrbool ok;
|
|
|
|
assert(p->temp_buffer != NULL);
|
|
ok = gdraw_MipmapBegin(&gdraw->mipmap, width, height, mipmaps,
|
|
bytes_pixel, p->temp_buffer, p->temp_buffer_bytes);
|
|
assert(ok); // this should never hit unless the temp_buffer is way too small
|
|
|
|
p->p1 = &gdraw->mipmap;
|
|
p->texture_data = gdraw->mipmap.pixels[0];
|
|
p->num_rows = gdraw->mipmap.bheight;
|
|
p->stride_in_bytes = gdraw->mipmap.pitch[0];
|
|
p->i0 = 0; // current output y
|
|
} else {
|
|
p->p1 = 0;
|
|
p->texture_data = (U8 *) t->handle.tex.gcm_ptr;
|
|
p->num_rows = height;
|
|
p->stride_in_bytes = t->handle.tex.gcm->pitch;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static rrbool RADLINK gdraw_MakeTextureMore(GDraw_MakeTexture_ProcessingInfo *p)
|
|
{
|
|
GDrawHandle *t = (GDrawHandle *) p->p0;
|
|
|
|
if (p->p1) {
|
|
GcmTexture *tex = t->handle.tex.gcm;
|
|
GDrawMipmapContext *c = (GDrawMipmapContext *) p->p1;
|
|
U32 pitch = tex->pitch;
|
|
U32 width = tex->width;
|
|
U32 height = tex->height;
|
|
U32 bheight = c->bheight;
|
|
U32 level = 0;
|
|
U32 outy = p->i0;
|
|
U8 *mipstart = (U8 *) t->handle.tex.gcm_ptr;
|
|
|
|
if (outy >= tex->height) // wait, we've already processed the whole texture!
|
|
return false;
|
|
|
|
do {
|
|
// copy image data to destination
|
|
if (!tex->swizzled) {
|
|
U8 *dest = mipstart + (outy >> level) * pitch;
|
|
U8 *src = c->pixels[level];
|
|
U32 y;
|
|
|
|
for (y=0; y < bheight; y++) {
|
|
memcpy(dest, src, width * c->bpp);
|
|
dest += pitch;
|
|
src += c->pitch[level];
|
|
}
|
|
|
|
mipstart += pitch * height;
|
|
} else {
|
|
if (c->bpp == 4)
|
|
swizzle_subrect_32bpp(mipstart, 0, outy >> level, width, height, c->pixels[level], c->pitch[level], width, bheight);
|
|
else if (c->bpp == 1)
|
|
swizzle_subrect_8bpp(mipstart, 0, outy >> level, width, height, c->pixels[level], c->pitch[level], width, bheight);
|
|
else {
|
|
assert(c->bpp == 1 || c->bpp == 4);
|
|
}
|
|
|
|
mipstart += width * height * c->bpp;
|
|
}
|
|
|
|
width = RR_MAX(width >> 1, 1);
|
|
height = RR_MAX(height >> 1, 1);
|
|
bheight = RR_MAX(bheight >> 1, 1);
|
|
} while (gdraw_MipmapAddLines(c, ++level));
|
|
|
|
// next chunk please!
|
|
p->i0 += p->num_rows; // increment y
|
|
p->texture_data = c->pixels[0];
|
|
p->num_rows = c->bheight = RR_MIN(c->bheight, tex->height - p->i0);
|
|
return true;
|
|
} else
|
|
return false; // what do you mean, "more"? you got the whole image already!
|
|
}
|
|
|
|
static GDrawTexture * RADLINK gdraw_MakeTextureEnd(GDraw_MakeTexture_ProcessingInfo *p, GDrawStats *stats)
|
|
{
|
|
RR_UNUSED_VARIABLE(stats);
|
|
if (p->p1)
|
|
gdraw_MakeTextureMore(p); // submit last piece of data using more
|
|
|
|
return (GDrawTexture *) p->p0;
|
|
}
|
|
|
|
static rrbool RADLINK gdraw_UpdateTextureBegin(GDrawTexture *t, void *unique_id, GDrawStats *stats)
|
|
{
|
|
RR_UNUSED_VARIABLE(stats);
|
|
return gdraw_HandleCacheLock((GDrawHandle *) t, unique_id);
|
|
}
|
|
|
|
static void RADLINK gdraw_UpdateTextureRect(GDrawTexture *t, void *unique_id, S32 x, S32 y, S32 stride, S32 w, S32 h, U8 *samples, gdraw_texture_format format)
|
|
{
|
|
RR_UNUSED_VARIABLE(unique_id);
|
|
GDrawHandle *s = (GDrawHandle *) t;
|
|
S32 bpp = (format == GDRAW_TEXTURE_FORMAT_font) ? 1 : 4, bpl = bpp * w;
|
|
GcmTexture *tex = s->handle.tex.gcm;
|
|
U8 *texptr = (U8 *) s->handle.tex.gcm_ptr;
|
|
|
|
wait_on_fence(s->fence); // make sure it's not active
|
|
|
|
if (!tex->swizzled) {
|
|
S32 dpitch = tex->pitch;
|
|
U8 *src = samples;
|
|
U8 *dst = texptr + y * dpitch + x * bpp;
|
|
while (h--) {
|
|
memcpy(dst, src, bpl);
|
|
dst += dpitch;
|
|
src += stride;
|
|
}
|
|
} else {
|
|
if (format == GDRAW_TEXTURE_FORMAT_font)
|
|
swizzle_subrect_8bpp(texptr, x, y, tex->width, tex->height, samples, stride, w, h);
|
|
else
|
|
swizzle_subrect_32bpp(texptr, x, y, tex->width, tex->height, samples, stride, w, h);
|
|
}
|
|
}
|
|
|
|
static void RADLINK gdraw_UpdateTextureEnd(GDrawTexture *t, void *unique_id, GDrawStats *stats)
|
|
{
|
|
RR_UNUSED_VARIABLE(unique_id);
|
|
RR_UNUSED_VARIABLE(stats);
|
|
gdraw_HandleCacheUnlock((GDrawHandle *) t);
|
|
}
|
|
|
|
static void RADLINK gdraw_FreeTexture(GDrawTexture *tt, void *unique_id, GDrawStats *stats)
|
|
{
|
|
GDrawHandle *t = (GDrawHandle *) tt;
|
|
assert(t != NULL); // @GDRAW_ASSERT
|
|
if (t->owner == unique_id || unique_id == NULL) {
|
|
if (t->cache == &gdraw->rendertargets) {
|
|
gdraw_HandleCacheUnlock(t);
|
|
// cache it by simply not freeing it
|
|
return;
|
|
}
|
|
|
|
gdraw_res_kill(t, stats);
|
|
}
|
|
}
|
|
|
|
static rrbool RADLINK gdraw_TryToLockTexture(GDrawTexture *t, void *unique_id, GDrawStats *stats)
|
|
{
|
|
RR_UNUSED_VARIABLE(stats);
|
|
return gdraw_HandleCacheLock((GDrawHandle *) t, unique_id);
|
|
}
|
|
|
|
static void RADLINK gdraw_DescribeTexture(GDrawTexture *tex, GDraw_Texture_Description *desc)
|
|
{
|
|
GDrawHandle *p = (GDrawHandle *) tex;
|
|
desc->width = p->handle.tex.gcm->width;
|
|
desc->height = p->handle.tex.gcm->height;
|
|
desc->size_in_bytes = p->bytes;
|
|
}
|
|
|
|
static void RADLINK gdraw_SetAntialiasTexture(S32 width, U8 *rgba)
|
|
{
|
|
if (gdraw->aa_tex.offset)
|
|
return;
|
|
|
|
S32 pitch = RR_MAX(width * 4, 16); // RSX HW bug: linear textures with pitch<16 cause trouble (we're not swizzled, but play it safe anyway).
|
|
U8 *data = (U8 *)gdraw_arena_alloc(&gdraw->local_arena, pitch, CELL_GCM_TEXTURE_SWIZZLE_ALIGN_OFFSET);
|
|
if (!data)
|
|
return;
|
|
|
|
// since it's a line texture, we can safely mark it as swizzled.
|
|
gdraw->aa_tex.offset = CELL_GCM_METHOD_DATA_TEXTURE_OFFSET(addr2offs(data));
|
|
gdraw->aa_tex.format = CELL_GCM_METHOD_DATA_TEXTURE_FORMAT(CELL_GCM_LOCATION_LOCAL, 0, CELL_GCM_TEXTURE_DIMENSION_1,
|
|
CELL_GCM_TEXTURE_A8R8G8B8 | CELL_GCM_TEXTURE_SZ | CELL_GCM_TEXTURE_NR, 1);
|
|
gdraw->aa_tex.remap = TEXREMAP(CHAN_A, REMAP, FROM_B) | TEXREMAP(CHAN_R, REMAP, FROM_A) | TEXREMAP(CHAN_G, REMAP, FROM_R) | TEXREMAP(CHAN_B, REMAP, FROM_G);
|
|
gdraw->aa_tex.imagerect = CELL_GCM_METHOD_DATA_TEXTURE_IMAGE_RECT(1, width);
|
|
gdraw->aa_tex.control3 = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL3(pitch, 1);
|
|
gdraw->aa_tex.width = width;
|
|
gdraw->aa_tex.height = 1;
|
|
gdraw->aa_tex.pitch = pitch;
|
|
memcpy(data, rgba, width * 4);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Vertex buffer creation/deletion
|
|
//
|
|
|
|
static rrbool RADLINK gdraw_MakeVertexBufferBegin(void *unique_id, gdraw_vformat vformat, S32 vbuf_size, S32 ibuf_size, GDraw_MakeVertexBuffer_ProcessingInfo *p, GDrawStats *stats)
|
|
{
|
|
RR_UNUSED_VARIABLE(vformat);
|
|
GDrawHandle *vb;
|
|
vb = gdraw_res_alloc_begin(gdraw->vbufcache, vbuf_size + ibuf_size, stats);
|
|
if (!vb)
|
|
return false;
|
|
|
|
vb->handle.vbuf.verts = vb->raw_ptr;
|
|
vb->handle.vbuf.inds = (U8 *) vb->raw_ptr + vbuf_size;
|
|
|
|
p->p0 = vb;
|
|
p->vertex_data = (U8 *) vb->handle.vbuf.verts;
|
|
p->index_data = (U8 *) vb->handle.vbuf.inds;
|
|
p->vertex_data_length = vbuf_size;
|
|
p->index_data_length = ibuf_size;
|
|
|
|
gdraw_HandleCacheAllocateEnd(vb, vbuf_size + ibuf_size, unique_id, GDRAW_HANDLE_STATE_locked);
|
|
|
|
return true;
|
|
}
|
|
|
|
static rrbool RADLINK gdraw_MakeVertexBufferMore(GDraw_MakeVertexBuffer_ProcessingInfo *p)
|
|
{
|
|
RR_UNUSED_VARIABLE(p);
|
|
assert(0);
|
|
return false;
|
|
}
|
|
|
|
static GDrawVertexBuffer * RADLINK gdraw_MakeVertexBufferEnd(GDraw_MakeVertexBuffer_ProcessingInfo *p, GDrawStats *stats)
|
|
{
|
|
RR_UNUSED_VARIABLE(stats);
|
|
return (GDrawVertexBuffer *) p->p0;
|
|
}
|
|
|
|
static rrbool RADLINK gdraw_TryLockVertexBuffer(GDrawVertexBuffer *vb, void *unique_id, GDrawStats *stats)
|
|
{
|
|
RR_UNUSED_VARIABLE(stats);
|
|
return gdraw_HandleCacheLock((GDrawHandle *) vb, unique_id);
|
|
}
|
|
|
|
static void RADLINK gdraw_FreeVertexBuffer(GDrawVertexBuffer *vb, void *unique_id, GDrawStats *stats)
|
|
{
|
|
GDrawHandle *h = (GDrawHandle *) vb;
|
|
assert(h != NULL); // @GDRAW_ASSERT
|
|
if (h->owner == unique_id)
|
|
gdraw_res_kill(h, stats);
|
|
}
|
|
|
|
static void RADLINK gdraw_DescribeVertexBuffer(GDrawVertexBuffer *vbuf, GDraw_VertexBuffer_Description *desc)
|
|
{
|
|
GDrawHandle *p = (GDrawHandle *) vbuf;
|
|
desc->size_in_bytes = p->bytes;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Create/free (or cache) framebuffer-sized textures
|
|
//
|
|
|
|
static GDrawHandle *get_color_rendertarget(GDrawStats *stats)
|
|
{
|
|
RR_UNUSED_VARIABLE(stats);
|
|
GDrawHandle *t;
|
|
S32 pitch = gdraw->rt_pitch;
|
|
S32 size = pitch * gdraw->frametex_height;
|
|
|
|
t = gdraw_HandleCacheGetLRU(&gdraw->rendertargets);
|
|
if (t) {
|
|
gdraw_HandleCacheLock(t, (void *) 1);
|
|
return t;
|
|
}
|
|
|
|
t = gdraw_HandleCacheAllocateBegin(&gdraw->rendertargets);
|
|
if (!t) {
|
|
IggyGDrawSendWarning(NULL, "GDraw rendertarget allocation failed: hit handle limit");
|
|
return t;
|
|
}
|
|
|
|
void *ptr = gdraw_arena_alloc(&gdraw->rt_arena, size, 1);
|
|
if (!ptr) {
|
|
IggyGDrawSendWarning(NULL, "GDraw rendertarget allocation failed: out of rendertarget texture memory");
|
|
gdraw_HandleCacheAllocateFail(t);
|
|
return NULL;
|
|
}
|
|
|
|
t->fence = get_next_fence(); // we're about to start using it immediately, so...
|
|
t->raw_ptr = NULL;
|
|
|
|
t->handle.tex.gcm_ptr = ptr;
|
|
|
|
GcmTexture *tex = t->handle.tex.gcm;
|
|
tex->offset = CELL_GCM_METHOD_DATA_TEXTURE_OFFSET(addr2offs(t->handle.tex.gcm_ptr));
|
|
tex->format = CELL_GCM_METHOD_DATA_TEXTURE_FORMAT(gdraw->rt_loc, 0, CELL_GCM_TEXTURE_DIMENSION_2,
|
|
CELL_GCM_TEXTURE_A8R8G8B8 | CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_NR, 1);
|
|
tex->remap = TEXREMAP(CHAN_A, REMAP, FROM_A) | TEXREMAP(CHAN_R, REMAP, FROM_R) | TEXREMAP(CHAN_G, REMAP, FROM_G) | TEXREMAP(CHAN_B, REMAP, FROM_B);
|
|
tex->imagerect = CELL_GCM_METHOD_DATA_TEXTURE_IMAGE_RECT(gdraw->frametex_height, gdraw->frametex_width);
|
|
tex->control3 = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL3(pitch, 1);
|
|
tex->width = gdraw->frametex_width;
|
|
tex->height = gdraw->frametex_height;
|
|
tex->pitch = pitch;
|
|
|
|
gdraw_HandleCacheAllocateEnd(t, size, (void *) 1, GDRAW_HANDLE_STATE_locked);
|
|
return t;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Rendering helpers
|
|
//
|
|
|
|
static void set_rsx_texture(CellGcmContextData * RADRESTRICT gcm, U32 sampler, GcmTexture * RADRESTRICT tex, U32 wrap, U32 nearest)
|
|
{
|
|
static const U32 addrmodes[] = {
|
|
// GDRAW_WRAP_clamp
|
|
CELL_GCM_METHOD_DATA_TEXTURE_ADDRESS(CELL_GCM_TEXTURE_CLAMP_TO_EDGE, CELL_GCM_TEXTURE_CLAMP_TO_EDGE, CELL_GCM_TEXTURE_CLAMP_TO_EDGE, CELL_GCM_TEXTURE_UNSIGNED_REMAP_NORMAL, CELL_GCM_TEXTURE_ZFUNC_NEVER, 0, 0),
|
|
// GDRAW_WRAP_repeat
|
|
CELL_GCM_METHOD_DATA_TEXTURE_ADDRESS(CELL_GCM_TEXTURE_WRAP, CELL_GCM_TEXTURE_WRAP, CELL_GCM_TEXTURE_CLAMP_TO_EDGE, CELL_GCM_TEXTURE_UNSIGNED_REMAP_NORMAL, CELL_GCM_TEXTURE_ZFUNC_NEVER, 0, 0),
|
|
// GDRAW_WRAP_mirror
|
|
CELL_GCM_METHOD_DATA_TEXTURE_ADDRESS(CELL_GCM_TEXTURE_MIRROR, CELL_GCM_TEXTURE_MIRROR, CELL_GCM_TEXTURE_CLAMP_TO_EDGE, CELL_GCM_TEXTURE_UNSIGNED_REMAP_NORMAL, CELL_GCM_TEXTURE_ZFUNC_NEVER, 0, 0),
|
|
};
|
|
|
|
static const U32 filtermodes[] = {
|
|
CELL_GCM_METHOD_DATA_TEXTURE_FILTER(0, CELL_GCM_TEXTURE_LINEAR_LINEAR, CELL_GCM_TEXTURE_LINEAR, CELL_GCM_TEXTURE_CONVOLUTION_QUINCUNX),
|
|
CELL_GCM_METHOD_DATA_TEXTURE_FILTER(0, CELL_GCM_TEXTURE_LINEAR_LINEAR, CELL_GCM_TEXTURE_NEAREST, CELL_GCM_TEXTURE_CONVOLUTION_QUINCUNX),
|
|
};
|
|
|
|
assert(wrap < sizeof(addrmodes) / sizeof(addrmodes[0]));
|
|
assert(nearest < sizeof(filtermodes) / sizeof(filtermodes[0]));
|
|
|
|
CommandData * RADRESTRICT cmd = put_command(gcm, 11);
|
|
cmd->w0 = CELL_GCM_METHOD_HEADER_TEXTURE_OFFSET(sampler, 8);
|
|
cmd->w1 = tex->offset;
|
|
cmd->w2 = tex->format;
|
|
cmd->w3 = addrmodes[wrap];
|
|
cmd->w4 = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL0(1, 0, 12<<8, 0);
|
|
cmd->w5 = tex->remap;
|
|
cmd->w6 = filtermodes[nearest];
|
|
cmd->w7 = tex->imagerect;
|
|
cmd->w8 = CELL_GCM_METHOD_DATA_TEXTURE_BORDER_COLOR(0);
|
|
cmd->w9 = CELL_GCM_METHOD_HEADER_TEXTURE_CONTROL3(sampler, 1);
|
|
cmd->wa = tex->control3;
|
|
}
|
|
|
|
static inline void disable_scissor(bool force)
|
|
{
|
|
if (force || gdraw->scissor_state) {
|
|
// set whole viewport as scissor test
|
|
gdraw->scissor_state = 0;
|
|
cellGcmSetScissor(gdraw->gcm, gdraw->cview.x, gdraw->cview.y, gdraw->cview.w, gdraw->cview.h);
|
|
}
|
|
}
|
|
|
|
static void set_viewport_raw(S32 x, S32 y, S32 w, S32 h)
|
|
{
|
|
float scale[4] = { w*0.5f, -h*0.5f, 0.5f, 0.0f };
|
|
float offset[4] = { x + scale[0], y - scale[1], 0.5f, 0.0f };
|
|
cellGcmSetViewport(gdraw->gcm, x, y, w, h, 0.0f, 1.0f, scale, offset);
|
|
|
|
gdraw->cview.x = x;
|
|
gdraw->cview.y = y;
|
|
gdraw->cview.w = w;
|
|
gdraw->cview.h = h;
|
|
disable_scissor(true);
|
|
}
|
|
|
|
static void set_projection_raw(S32 x0, S32 x1, S32 y0, S32 y1)
|
|
{
|
|
gdraw->projection[0] = 2.0f / (x1-x0);
|
|
gdraw->projection[1] = 2.0f / (y1-y0);
|
|
gdraw->projection[2] = (x1 + x0) / (F32) (x0 - x1);
|
|
gdraw->projection[3] = (y1 + y0) / (F32) (y0 - y1);
|
|
cellGcmSetVertexProgramConstants(gdraw->gcm, VVAR_viewproj, 4, gdraw->projection);
|
|
}
|
|
|
|
static void set_viewport(void)
|
|
{
|
|
if (gdraw->in_blur) {
|
|
set_viewport_raw(gdraw->cview.x, gdraw->cview.y, gdraw->cview.w, gdraw->cview.h);
|
|
return;
|
|
}
|
|
|
|
if (gdraw->cur == gdraw->frame) // if the rendering stack is empty
|
|
// render a tile-sized region to the user-request tile location
|
|
set_viewport_raw(gdraw->vx, gdraw->vy, gdraw->tw, gdraw->th);
|
|
else if (gdraw->cur->cached)
|
|
set_viewport_raw(0, 0, gdraw->cur->width, gdraw->cur->height);
|
|
else
|
|
// if on the render stack, draw a padded-tile-sized region at the origin
|
|
set_viewport_raw(0, 0, gdraw->tpw, gdraw->tph);
|
|
}
|
|
|
|
static void set_projection(void)
|
|
{
|
|
if (gdraw->in_blur) return;
|
|
if (gdraw->cur == gdraw->frame) // if the render stack is empty
|
|
set_projection_raw(gdraw->tx0,gdraw->tx0+gdraw->tw,gdraw->ty0+gdraw->th,gdraw->ty0);
|
|
else if (gdraw->cur->cached)
|
|
set_projection_raw(gdraw->cur->base_x, gdraw->cur->base_x + gdraw->cur->width, gdraw->cur->base_y + gdraw->cur->height, gdraw->cur->base_y);
|
|
else
|
|
set_projection_raw(gdraw->tx0p,gdraw->tx0p+gdraw->tpw,gdraw->ty0p+gdraw->tph,gdraw->ty0p);
|
|
}
|
|
|
|
static void set_common_renderstate()
|
|
{
|
|
CellGcmContextData *gcm = gdraw->gcm;
|
|
S32 i;
|
|
|
|
// all the render states we never change while drawing
|
|
cellGcmSetCullFaceEnable(gcm, CELL_GCM_FALSE);
|
|
cellGcmSetCylindricalWrap(gcm, 0, 0);
|
|
cellGcmSetDepthBoundsTestEnable(gcm, CELL_GCM_FALSE);
|
|
cellGcmSetLogicOpEnable(gcm, CELL_GCM_FALSE);
|
|
cellGcmSetPolygonOffset(gcm, 0.0f, 0.0f);
|
|
cellGcmSetPolygonStippleEnable(gcm, CELL_GCM_FALSE);
|
|
cellGcmSetPolySmoothEnable(gcm, CELL_GCM_FALSE);
|
|
cellGcmSetShadeMode(gcm, CELL_GCM_SMOOTH);
|
|
cellGcmSetUserClipPlaneControl(gcm, CELL_GCM_USER_CLIP_PLANE_DISABLE, CELL_GCM_USER_CLIP_PLANE_DISABLE, CELL_GCM_USER_CLIP_PLANE_DISABLE, CELL_GCM_USER_CLIP_PLANE_DISABLE, CELL_GCM_USER_CLIP_PLANE_DISABLE, CELL_GCM_USER_CLIP_PLANE_DISABLE);
|
|
cellGcmSetStencilOp(gcm, CELL_GCM_KEEP, CELL_GCM_KEEP, CELL_GCM_REPLACE);
|
|
cellGcmSetZcullEnable(gcm, 0, 0);
|
|
cellGcmSetTwoSidedStencilTestEnable(gcm, CELL_GCM_FALSE);
|
|
cellGcmSetTwoSideLightEnable(gcm, CELL_GCM_FALSE);
|
|
cellGcmSetClearDepthStencil(gcm, 0xffffff00);
|
|
cellGcmSetClearColor(gcm, 0);
|
|
cellGcmSetBlendEquation(gcm, CELL_GCM_FUNC_ADD, CELL_GCM_FUNC_ADD);
|
|
cellGcmSetColorMask(gcm, COLOR_MASK_ALL);
|
|
cellGcmSetAlphaTestEnable(gcm, CELL_GCM_FALSE);
|
|
|
|
// clear all vertex attr inputs
|
|
for (i=0; i < 16; i++)
|
|
cellGcmSetVertexDataArray(gcm, i, 0, 0, 0, CELL_GCM_VERTEX_F, CELL_GCM_LOCATION_LOCAL, 0);
|
|
|
|
cellGcmSetTransferLocation(gcm,CELL_GCM_LOCATION_LOCAL);
|
|
|
|
cellGcmSetVertexDataBase(gcm, 0, 0);
|
|
|
|
// load all our vertex programs into their respective slots so we can switch between them
|
|
// by just changing the start slot
|
|
for (i=0; i < GDRAW_vformat__basic_count; i++) {
|
|
ProgramWithCachedVariableLocations *p = &gdraw->vprog[i];
|
|
if (p->program)
|
|
cellGcmSetVertexProgram(gcm, p->program, p->ucode);
|
|
}
|
|
cellGcmSetVertexAttribInputMask(gcm, 0x3); // enable position+attr1 input (all we ever use)
|
|
|
|
// initialize all our vertex constants to zero
|
|
void *vconst;
|
|
cellGcmSetVertexProgramConstantsPointer(gcm, 0, VVAR_count, &vconst);
|
|
memset(vconst, 0, 4 * sizeof(F32) * VVAR_count);
|
|
|
|
// reset our state caching
|
|
for (i=0; i < MAX_SAMPLERS; i++) {
|
|
gdraw->active_tex[i] = NULL;
|
|
cellGcmSetTextureControl(gcm, i, CELL_GCM_FALSE, 0, 0, 0);
|
|
}
|
|
|
|
assert(gdraw->aa_tex.offset != 0); // if you hit this, your initialization is screwed up.
|
|
set_rsx_texture(gdraw->gcm, AATEX_SAMPLER, &gdraw->aa_tex, GDRAW_WRAP_clamp, 0);
|
|
|
|
gdraw->cur_fprog = NULL;
|
|
gdraw->vert_format = -1;
|
|
gdraw->scissor_state = ~0u;
|
|
gdraw->blend_mode = -1;
|
|
gdraw->stencil_key = ~0u;
|
|
gdraw->z_key = ~0u;
|
|
}
|
|
|
|
static void clear_renderstate(void)
|
|
{
|
|
CellGcmContextData *gcm = gdraw->gcm;
|
|
cellGcmSetStencilTestEnable(gcm, CELL_GCM_FALSE);
|
|
cellGcmSetColorMask(gcm, COLOR_MASK_ALL);
|
|
cellGcmSetDepthTestEnable(gcm, CELL_GCM_FALSE);
|
|
cellGcmSetDepthFunc(gcm, CELL_GCM_LESS);
|
|
cellGcmSetDepthMask(gcm, CELL_GCM_FALSE);
|
|
cellGcmSetBlendEnable(gcm, CELL_GCM_FALSE);
|
|
disable_scissor(false);
|
|
|
|
gdraw->scissor_state = 0;
|
|
gdraw->blend_mode = GDRAW_BLEND_none;
|
|
gdraw->stencil_key = 0;
|
|
gdraw->z_key = 0;
|
|
}
|
|
|
|
static void set_render_target()
|
|
{
|
|
GcmTexture *tex = NULL;
|
|
S32 i;
|
|
CellGcmSurface surf = gdraw->main_surface;
|
|
|
|
if (gdraw->cur->color_buffer)
|
|
tex = gdraw->cur->color_buffer->handle.tex.gcm;
|
|
|
|
if (tex) {
|
|
surf.colorLocation[0] = gdraw->rt_loc;
|
|
surf.colorOffset[0] = tex->offset;
|
|
surf.colorPitch[0] = tex->pitch;
|
|
surf.x = 0;
|
|
surf.y = 0;
|
|
surf.width = tex->width;
|
|
surf.height = tex->height;
|
|
}
|
|
|
|
cellGcmSetSurface(gdraw->gcm, &surf);
|
|
|
|
// invalidate current textures (need to reset them to force L1 texture cache flush)
|
|
for (i=0; i < MAX_SAMPLERS; ++i)
|
|
gdraw->active_tex[i] = NULL;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Begin rendering for a frame
|
|
//
|
|
|
|
void gdraw_GCM_SetTileOrigin(CellGcmSurface *surf, S32 x, S32 y)
|
|
{
|
|
assert(surf->colorFormat == CELL_GCM_SURFACE_A8R8G8B8);
|
|
assert(surf->depthFormat == CELL_GCM_SURFACE_Z24S8);
|
|
|
|
gdraw->main_surface = *surf;
|
|
gdraw->vx = x;
|
|
gdraw->vy = y;
|
|
|
|
for (int i=1; i<4; i++) {
|
|
gdraw->main_surface.colorLocation[i] = CELL_GCM_LOCATION_LOCAL;
|
|
gdraw->main_surface.colorOffset[i] = 0;
|
|
gdraw->main_surface.colorPitch[i] = 64;
|
|
}
|
|
}
|
|
|
|
static void RADLINK gdraw_SetViewSizeAndWorldScale(S32 w, S32 h, F32 scalex, F32 scaley)
|
|
{
|
|
memset(gdraw->frame, 0, sizeof(gdraw->frame));
|
|
gdraw->cur = gdraw->frame;
|
|
gdraw->fw = w;
|
|
gdraw->fh = h;
|
|
gdraw->tw = w;
|
|
gdraw->th = h;
|
|
gdraw->world_to_pixel[0] = scalex;
|
|
gdraw->world_to_pixel[1] = scaley;
|
|
set_viewport();
|
|
}
|
|
|
|
// must include anything necessary for texture creation/update
|
|
static void RADLINK gdraw_RenderingBegin(void)
|
|
{
|
|
set_common_renderstate();
|
|
}
|
|
|
|
static void RADLINK gdraw_RenderingEnd(void)
|
|
{
|
|
clear_renderstate();
|
|
}
|
|
|
|
static void RADLINK gdraw_RenderTileBegin(S32 x0, S32 y0, S32 x1, S32 y1, S32 pad, GDrawStats *stats)
|
|
{
|
|
RR_UNUSED_VARIABLE(stats);
|
|
if (x0 == 0 && y0 == 0 && x1 == gdraw->fw && y1 == gdraw->fh)
|
|
pad = 0;
|
|
|
|
gdraw->tx0 = x0;
|
|
gdraw->ty0 = y0;
|
|
gdraw->tw = x1-x0;
|
|
gdraw->th = y1-y0;
|
|
|
|
// padded region
|
|
gdraw->tx0p = RR_MAX(x0 - pad, 0);
|
|
gdraw->ty0p = RR_MAX(y0 - pad, 0);
|
|
gdraw->tpw = RR_MIN(x1 + pad, gdraw->fw) - gdraw->tx0p;
|
|
gdraw->tph = RR_MIN(y1 + pad, gdraw->fh) - gdraw->ty0p;
|
|
|
|
// check if this fits inside our rendertarget buffers
|
|
// if you hit this assert, the rendertarget_width / rendertarget_height
|
|
// in your gdraw_GCM_MemoryConfig is too small!
|
|
assert(gdraw->tpw <= gdraw->frametex_width && gdraw->tph <= gdraw->frametex_height);
|
|
|
|
// need to clear both the requested area of the viewport
|
|
// *and* the area the rendertargets will use... ideally
|
|
// we'd work out the minimal rectangles to clear, or just
|
|
// always clear the whole thing
|
|
set_render_target();
|
|
set_viewport_raw(0, 0, gdraw->tpw, gdraw->tph);
|
|
cellGcmSetStencilMask(gdraw->gcm, 0xff);
|
|
cellGcmSetClearSurface(gdraw->gcm, CELL_GCM_CLEAR_Z | CELL_GCM_CLEAR_S);
|
|
|
|
set_viewport();
|
|
set_projection();
|
|
|
|
// if the first clear didn't get the actual viewport region, clear it
|
|
if (gdraw->tpw < gdraw->vx+gdraw->tw || gdraw->tph < gdraw->vy+gdraw->th)
|
|
cellGcmSetClearSurface(gdraw->gcm, CELL_GCM_CLEAR_Z | CELL_GCM_CLEAR_S);
|
|
}
|
|
|
|
static void RADLINK gdraw_RenderTileEnd(GDrawStats *stats)
|
|
{
|
|
gdraw->tile_end_fence = put_fence();
|
|
|
|
// reap once per frame even if there are no allocs
|
|
gdraw_res_reap(gdraw->texturecache, stats);
|
|
gdraw_res_reap(gdraw->vbufcache, stats);
|
|
}
|
|
|
|
void gdraw_GCM_NoMoreGDrawThisFrame(void)
|
|
{
|
|
gdraw_HandleCacheTick(gdraw->texturecache, gdraw->tile_end_fence);
|
|
gdraw_HandleCacheTick(gdraw->vbufcache, gdraw->tile_end_fence);
|
|
}
|
|
|
|
#define MAX_DEPTH_VALUE (1 << 13)
|
|
|
|
static void RADLINK gdraw_GetInfo(GDrawInfo *d)
|
|
{
|
|
d->num_stencil_bits = 8;
|
|
d->max_id = MAX_DEPTH_VALUE-2;
|
|
// for floating point depth, just use mantissa, e.g. 16-20 bits
|
|
d->max_texture_size = 4096;
|
|
d->buffer_format = GDRAW_BFORMAT_vbib;
|
|
d->shared_depth_stencil = 1;
|
|
d->always_mipmap = 0;
|
|
d->conditional_nonpow2 = 0;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Enable/disable rendertargets in stack fashion
|
|
//
|
|
|
|
static rrbool RADLINK gdraw_TextureDrawBufferBegin(gswf_recti *region, gdraw_texture_format format, U32 flags, void *owner, GDrawStats *stats)
|
|
{
|
|
RR_UNUSED_VARIABLE(format);
|
|
RR_UNUSED_VARIABLE(flags);
|
|
|
|
GDrawFramebufferState *n = gdraw->cur+1;
|
|
GDrawHandle *t;
|
|
if (gdraw->tw == 0 || gdraw->th == 0) {
|
|
IggyGDrawSendWarning(NULL, "GDraw warning: w=0,h=0 rendertarget");
|
|
return false;
|
|
}
|
|
|
|
if (n >= &gdraw->frame[MAX_RENDER_STACK_DEPTH]) {
|
|
assert(0);
|
|
IggyGDrawSendWarning(NULL, "GDraw rendertarget nesting exceeds MAX_RENDER_STACK_DEPTH");
|
|
return false;
|
|
}
|
|
|
|
if (owner) {
|
|
// @TODO implement
|
|
t = NULL;
|
|
assert(0); // nyi
|
|
} else {
|
|
t = get_color_rendertarget(stats);
|
|
if (!t)
|
|
return false;
|
|
}
|
|
|
|
n->color_buffer = t;
|
|
assert(n->color_buffer != NULL); // @GDRAW_ASSERT
|
|
|
|
n->cached = owner != NULL;
|
|
if (owner) {
|
|
n->base_x = region->x0;
|
|
n->base_y = region->y0;
|
|
n->width = region->x1 - region->x0;
|
|
n->height = region->y1 - region->y0;
|
|
}
|
|
|
|
assert(gdraw->frametex_width >= gdraw->tw && gdraw->frametex_height >= gdraw->th); // @GDRAW_ASSERT
|
|
int k = t - gdraw->rendertargets.handle;
|
|
|
|
if (region) {
|
|
S32 ox, oy, pad = 2; // 2 pixels of border on all sides
|
|
// 1 pixel turns out to be not quite enough with the interpolator precision we get.
|
|
|
|
if (gdraw->in_blur)
|
|
ox = oy = 0;
|
|
else
|
|
ox = gdraw->tx0p, oy = gdraw->ty0p;
|
|
|
|
// clamp region to tile
|
|
S32 xt0 = RR_MAX(region->x0 - ox, 0);
|
|
S32 yt0 = RR_MAX(region->y0 - oy, 0);
|
|
S32 xt1 = RR_MIN(region->x1 - ox, gdraw->tpw);
|
|
S32 yt1 = RR_MIN(region->y1 - oy, gdraw->tph);
|
|
|
|
// but the padding needs to clamp to render target bounds
|
|
S32 x = RR_MAX(xt0 - pad, 0);
|
|
S32 y = RR_MAX(yt0 - pad, 0);
|
|
S32 w = RR_MIN(xt1 + pad, gdraw->frametex_width) - x;
|
|
S32 h = RR_MIN(yt1 + pad, gdraw->frametex_height) - y;
|
|
|
|
if (w <= 0 || h <= 0) { // region doesn't intersect with current tile
|
|
gdraw_FreeTexture((GDrawTexture *) t, 0, stats);
|
|
// note: don't send a warning since this will happen during regular tiled rendering
|
|
return false;
|
|
}
|
|
|
|
if (!gdraw->in_blur)
|
|
set_viewport_raw(x, y, w, h); // not strictly necessary (we only use this for a clear), just to avoid GPAD warnings
|
|
|
|
cellGcmSetScissor(gdraw->gcm, x, y, w, h);
|
|
gdraw->rt_valid[k].x0 = xt0;
|
|
gdraw->rt_valid[k].y0 = yt0;
|
|
gdraw->rt_valid[k].x1 = xt1;
|
|
gdraw->rt_valid[k].y1 = yt1;
|
|
} else {
|
|
gdraw->rt_valid[k].x0 = 0;
|
|
gdraw->rt_valid[k].y0 = 0;
|
|
gdraw->rt_valid[k].x1 = gdraw->frametex_width;
|
|
gdraw->rt_valid[k].y1 = gdraw->frametex_height;
|
|
}
|
|
|
|
// wait for all currently queued draw commands to finish reading textures
|
|
// (in case a draw command is still using this rendertarget as a texture)
|
|
GDrawFence fence = put_fence();
|
|
cellGcmFlush(gdraw->gcm);
|
|
cellGcmSetWaitLabel(gdraw->gcm, gdraw->fence_label_index, (U32) fence.value);
|
|
|
|
++gdraw->cur;
|
|
set_render_target();
|
|
cellGcmSetClearSurface(gdraw->gcm, CELL_GCM_CLEAR_R | CELL_GCM_CLEAR_G | CELL_GCM_CLEAR_B | CELL_GCM_CLEAR_A);
|
|
set_viewport();
|
|
set_projection();
|
|
|
|
return true;
|
|
}
|
|
|
|
static GDrawTexture *RADLINK gdraw_TextureDrawBufferEnd(GDrawStats *stats)
|
|
{
|
|
RR_UNUSED_VARIABLE(stats);
|
|
GDrawFramebufferState *n = gdraw->cur;
|
|
GDrawFramebufferState *m = --gdraw->cur;
|
|
if (gdraw->tw == 0 || gdraw->th == 0) return 0;
|
|
|
|
if (n >= &gdraw->frame[MAX_RENDER_STACK_DEPTH])
|
|
return 0; // already returned a warning in Start...()
|
|
|
|
assert(m >= gdraw->frame); // bug in Iggy -- unbalanced
|
|
|
|
if (m != gdraw->frame) {
|
|
assert(m->color_buffer != NULL); // @GDRAW_ASSERT
|
|
}
|
|
assert(n->color_buffer != NULL); // @GDRAW_ASSERT
|
|
|
|
// wait for render to texture operation to finish
|
|
// can't put down a backend fence directly, there might still be
|
|
// a texture fence pending that completes after the backend fence
|
|
// (can this really happen? not sure, but better safe than sorry).
|
|
//
|
|
// so: wait for completion of pending texture fence, then put down
|
|
// backend label, then wait on that.
|
|
|
|
cellGcmSetWaitLabel(gdraw->gcm, gdraw->fence_label_index, U32(gdraw->next_fence_index - 1));
|
|
GDrawFence fence = put_backend_fence();
|
|
cellGcmFlush(gdraw->gcm);
|
|
cellGcmSetWaitLabel(gdraw->gcm, gdraw->fence_label_index, (U32) fence.value);
|
|
cellGcmSetInvalidateTextureCache(gdraw->gcm, CELL_GCM_INVALIDATE_TEXTURE);
|
|
n->color_buffer->fence = fence;
|
|
|
|
// switch back to old rendertarget
|
|
set_render_target();
|
|
set_viewport();
|
|
set_projection();
|
|
|
|
return (GDrawTexture *) n->color_buffer;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Clear stencil/depth buffers
|
|
//
|
|
// Open question whether we'd be better off finding bounding boxes
|
|
// and only clearing those; it depends exactly how fast clearing works.
|
|
//
|
|
|
|
static void RADLINK gdraw_ClearStencilBits(U32 bits)
|
|
{
|
|
// @TODO: actually only clear 'bits', not everything
|
|
RR_UNUSED_VARIABLE(bits);
|
|
cellGcmSetStencilMask(gdraw->gcm, 0xff);
|
|
cellGcmSetClearSurface(gdraw->gcm, CELL_GCM_CLEAR_S);
|
|
gdraw->stencil_key = ~0u; // invalid
|
|
}
|
|
|
|
// this only happens rarely (hopefully never) if we use the depth buffer,
|
|
// so we can just clear the whole thing
|
|
static void RADLINK gdraw_ClearID(void)
|
|
{
|
|
cellGcmSetClearSurface(gdraw->gcm, CELL_GCM_CLEAR_Z);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Fragment shader setting / patching
|
|
//
|
|
|
|
static RADINLINE U64 microcode_endian_swap(U64 v)
|
|
{
|
|
U64 lo = v & 0x0000ffff0000ffffull;
|
|
U64 hi = v ^ lo;
|
|
|
|
return (lo << 16) | (hi >> 16);
|
|
}
|
|
|
|
static void set_fragment_shader(GDraw * RADRESTRICT gd, ProgramWithCachedVariableLocations *prg)
|
|
{
|
|
if (prg != gd->cur_fprog) {
|
|
gd->cur_fprog = prg;
|
|
cellGcmSetFragmentProgramLoad(gd->gcm, &prg->cfg);
|
|
}
|
|
}
|
|
|
|
static void set_fragment_para(GDraw * RADRESTRICT gd, U32 ucode_offs, int para, const void *values, int count)
|
|
{
|
|
if (para == -1)
|
|
return;
|
|
|
|
gd->cur_fprog = NULL; // need to re-set shader after patching
|
|
|
|
const U64 *inv = (const U64 *) values;
|
|
const int *patch_offs = (const int *) para;
|
|
int offs = *patch_offs;
|
|
void *ptr;
|
|
|
|
while (count--) {
|
|
U64 v0 = microcode_endian_swap(*inv++);
|
|
U64 v1 = microcode_endian_swap(*inv++);
|
|
|
|
do {
|
|
cellGcmSetInlineTransferPointer(gd->gcm, ucode_offs + offs, 4, &ptr);
|
|
U64 * RADRESTRICT p64 = (U64 *) ptr;
|
|
p64[0] = v0;
|
|
p64[1] = v1;
|
|
} while ((offs = *++patch_offs) != -1);
|
|
|
|
offs = *++patch_offs;
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Set all the render state from GDrawRenderState
|
|
//
|
|
// This also is responsible for getting the framebuffer into a texture
|
|
// if the read-modify-write blend operation can't be expressed with
|
|
// the native blend operators. (E.g. "screen")
|
|
//
|
|
|
|
static RADINLINE void set_texture(S32 texunit, GDrawTexture *tex)
|
|
{
|
|
assert(texunit < MAX_SAMPLERS);
|
|
assert(tex != NULL);
|
|
|
|
if (gdraw->active_tex[texunit] != tex) {
|
|
gdraw->active_tex[texunit] = tex;
|
|
GDrawHandle *h = (GDrawHandle *) tex;
|
|
set_rsx_texture(gdraw->gcm, texunit, h->handle.tex.gcm, GDRAW_WRAP_clamp, 0);
|
|
}
|
|
}
|
|
|
|
#define MAKEBLEND(src,dst) ((src << 16) | src), ((dst << 16) | dst)
|
|
|
|
static struct gdraw_gcm_blendspec {
|
|
U32 enable;
|
|
U32 src;
|
|
U32 dst;
|
|
U32 _pad;
|
|
} blend_states[] = {
|
|
{ 0, MAKEBLEND(CELL_GCM_ONE, CELL_GCM_ZERO), 0 }, // GDRAW_BLEND_none
|
|
{ 1, MAKEBLEND(CELL_GCM_ONE, CELL_GCM_ONE_MINUS_SRC_ALPHA), 0 }, // GDRAW_BLEND_alpha
|
|
{ 1, MAKEBLEND(CELL_GCM_DST_COLOR, CELL_GCM_ONE_MINUS_SRC_ALPHA), 0 }, // GDRAW_BLEND_multiply
|
|
{ 1, MAKEBLEND(CELL_GCM_ONE, CELL_GCM_ONE), 0 }, // GDRAW_BLEND_add
|
|
|
|
{ 0, MAKEBLEND(CELL_GCM_ONE, CELL_GCM_ZERO), 0 }, // GDRAW_BLEND_filter
|
|
{ 0, MAKEBLEND(CELL_GCM_ONE, CELL_GCM_ZERO), 0 }, // GDRAW_BLEND_special
|
|
};
|
|
|
|
#undef MAKEBLEND
|
|
|
|
// converts a depth id into a Z value
|
|
static RADINLINE F64 depth_from_id(S64 id)
|
|
{
|
|
#ifndef __SNC__
|
|
return (1.0 - 1.0 / (F64) MAX_DEPTH_VALUE) - __fcfid(id) * (1.0 / (F64) MAX_DEPTH_VALUE); // = 1 - (id + 1) / MAX_DEPTH_VALUE
|
|
#else
|
|
return (1.0 - 1.0 / (F64) MAX_DEPTH_VALUE) - id * (1.0 / (F64) MAX_DEPTH_VALUE); // = 1 - (id + 1) / MAX_DEPTH_VALUE
|
|
#endif
|
|
}
|
|
|
|
#define copy_vec4(d,s) { F32 x = s[0]; F32 y = s[1]; F32 z = s[2]; F32 w = s[3]; d.x = x; d.y = y; d.z = z; d.w = w; }
|
|
|
|
static void set_renderstate_full(U32 vertex_slot, const GDrawRenderState * RADRESTRICT r, GDrawStats *stats)
|
|
{
|
|
RR_UNUSED_VARIABLE(stats);
|
|
GDraw * RADRESTRICT gd = gdraw;
|
|
CellGcmContextData * RADRESTRICT gcm = gd->gcm;
|
|
volatile VertexVars * RADRESTRICT vvars;
|
|
ProgramWithCachedVariableLocations *fprog;
|
|
#ifndef __SNC__
|
|
volatile S64 depth_id = r->id; // load and sign extend here to avoid an LHS stall
|
|
#else
|
|
S64 depth_id = r->id;
|
|
#endif
|
|
|
|
// prepare commands for vertex program switch + constant upload
|
|
int nconstants = (r->texgen0_enabled ? VVAR_count : VVAR_count_world_and_color) * 4;
|
|
|
|
CommandData * RADRESTRICT cmd = put_command(gcm,
|
|
2 + // set vertex program start slot
|
|
(2 + nconstants)); // vertex constant loads
|
|
|
|
// set vertex program start slot
|
|
cmd->w0 = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_TRANSFORM_PROGRAM_START, 1);
|
|
cmd->w1 = vertex_slot;
|
|
|
|
// update vertex shader constants
|
|
cmd->w2 = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_TRANSFORM_CONSTANT_LOAD, nconstants + 1);
|
|
cmd->w3 = 0; // load starting from constant 0
|
|
void *vvars_ptr = &cmd->w4; // vertex constants start here
|
|
vvars = (volatile VertexVars *) vvars_ptr;
|
|
|
|
int texgen0 = r->texgen0_enabled;
|
|
int tex0mode = r->tex0_mode;
|
|
int blend_mode = r->blend_mode;
|
|
|
|
if (!r->use_world_space)
|
|
gdraw_ObjectSpace(&vvars->world[0].x, r->o2w, depth_from_id(depth_id), 0.0f);
|
|
else
|
|
gdraw_WorldSpace(&vvars->world[0].x, gd->world_to_pixel, depth_from_id(depth_id), 0.0f);
|
|
|
|
copy_vec4(vvars->x_off, r->edge_matrix);
|
|
copy_vec4(vvars->color_mul, r->color);
|
|
|
|
if (texgen0) {
|
|
copy_vec4(vvars->s0_texgen, r->s0_texgen);
|
|
copy_vec4(vvars->t0_texgen, r->t0_texgen);
|
|
}
|
|
|
|
// set the blend mode
|
|
assert(blend_mode != GDRAW_BLEND_filter); // shouldn't come through this path
|
|
assert(blend_mode >= 0 && blend_mode < (int) (sizeof(blend_states)/sizeof(*blend_states)));
|
|
gdraw_gcm_blendspec *blend = &blend_states[blend_mode];
|
|
if (blend_mode != gd->blend_mode) {
|
|
gd->blend_mode = blend_mode;
|
|
cmd = reserve_command(gcm, 4); // must be enough for both cases!
|
|
|
|
if (blend->enable) {
|
|
cmd->w0 = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_BLEND_ENABLE, 3);
|
|
cmd->w1 = 1; // enable
|
|
cmd->w2 = blend->src; // src factor
|
|
cmd->w3 = blend->dst; // dst factor
|
|
gcm->current = &cmd->w4;
|
|
} else {
|
|
cmd->w0 = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_BLEND_ENABLE, 1);
|
|
cmd->w1 = 0;
|
|
gcm->current = &cmd->w2;
|
|
}
|
|
}
|
|
|
|
// set the fragment program
|
|
if (blend_mode != GDRAW_BLEND_special) {
|
|
fprog = gd->basic_fprog[tex0mode];
|
|
|
|
if (r->cxf_add) {
|
|
fprog++; // additive
|
|
if (r->cxf_add[3]) fprog++; // additive alpha
|
|
}
|
|
} else
|
|
fprog = &gd->exceptional_blend[r->special_blend];
|
|
|
|
// set textures
|
|
if (tex0mode != GDRAW_TEXTURE_none) {
|
|
if (!r->tex[0]) // this can happen if some allocs fail. the rendered image will be invalid and we don't care.
|
|
return;
|
|
|
|
if (gd->active_tex[0] != r->tex[0]) {
|
|
gd->active_tex[0] = r->tex[0];
|
|
set_rsx_texture(gcm, 0, ((GDrawHandle *) r->tex[0])->handle.tex.gcm, r->wrap0, r->nearest0);
|
|
}
|
|
}
|
|
|
|
// Set pixel shader and constants
|
|
if (tex0mode == GDRAW_TEXTURE_focal_gradient)
|
|
set_fragment_para(gd, fprog->cfg.offset, fprog->vars[VAR_focal], r->focal_point, 1);
|
|
|
|
if (r->cxf_add) {
|
|
float temp[4] = { r->cxf_add[0]/255.0f, r->cxf_add[1]/255.0f, r->cxf_add[2]/255.0f, r->cxf_add[3]/255.0f };
|
|
set_fragment_para(gd, fprog->cfg.offset, fprog->vars[VAR_cadd], temp, 1);
|
|
}
|
|
|
|
set_fragment_shader(gd, fprog);
|
|
|
|
// Set pixel operation states
|
|
if (r->scissor) {
|
|
int x,y,w,h,xs,ys;
|
|
if (gd->cur == gd->frame) {
|
|
xs = gd->tx0 - gd->vx;
|
|
ys = gd->ty0 - gd->vy;
|
|
} else {
|
|
xs = gd->tx0p;
|
|
ys = gd->ty0p;
|
|
}
|
|
|
|
// clip against viewport
|
|
x = RR_MAX(r->scissor_rect.x0 - xs, gd->cview.x);
|
|
y = RR_MAX(r->scissor_rect.y0 - ys, gd->cview.y);
|
|
w = RR_MIN(r->scissor_rect.x1 - xs, gd->cview.x + gd->cview.w) - x;
|
|
h = RR_MIN(r->scissor_rect.y1 - ys, gd->cview.y + gd->cview.h) - y;
|
|
|
|
if (w <= 0 || h <= 0) {
|
|
// dummy scissor rect in case our actual scissor is empty
|
|
x = gd->cview.x;
|
|
y = gd->cview.y;
|
|
w = h = 0;
|
|
}
|
|
|
|
cellGcmSetScissorInline(gcm, x, y, w, h);
|
|
gd->scissor_state = 1;
|
|
} else if (r->scissor != gd->scissor_state)
|
|
disable_scissor(0);
|
|
|
|
// stencil changed?
|
|
U32 stencil_key = r->stencil_test | (r->stencil_set << 8);
|
|
if (stencil_key != gd->stencil_key) {
|
|
gd->stencil_key = stencil_key;
|
|
|
|
// it just so happens that all the states we want to set are in a contiguous method index range!
|
|
cmd = put_command(gcm, 7);
|
|
cmd->w0 = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_COLOR_MASK, 6);
|
|
cmd->w1 = r->stencil_set ? 0 : COLOR_MASK_ALL; // COLOR_MASK
|
|
cmd->w2 = (r->stencil_set | r->stencil_test) != 0; // STENCIL_TEST_ENABLE
|
|
cmd->w3 = r->stencil_set; // STENCIL_MASK
|
|
cmd->w4 = CELL_GCM_EQUAL; // STENCIL_FUNC
|
|
cmd->w5 = 0xff; // STENCIL_FUNC_REF
|
|
cmd->w6 = r->stencil_test; // STENCIL_FUNC_MASK
|
|
}
|
|
|
|
// z mode changed?
|
|
U32 z_key = r->set_id | (r->test_id << 1);
|
|
if (z_key != gd->z_key) {
|
|
gd->z_key = z_key;
|
|
|
|
cmd = put_command(gcm, 4);
|
|
cmd->w0 = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_DEPTH_FUNC, 3);
|
|
cmd->w1 = r->test_id ? CELL_GCM_LESS : CELL_GCM_EQUAL; // DEPTH_FUNC
|
|
cmd->w2 = r->set_id; // DEPTH_MASK
|
|
cmd->w3 = r->test_id | r->set_id; // DEPTH_TEST_ENABLE
|
|
}
|
|
}
|
|
|
|
static RADINLINE void set_renderstate(U32 vertex_slot, const GDrawRenderState * RADRESTRICT r, GDrawStats *stats)
|
|
{
|
|
if (r->identical_state) {
|
|
// may need to switch vertex shader, but that's it - very quick to set up.
|
|
CommandData * RADRESTRICT cmd = put_command(gdraw->gcm, 2);
|
|
cmd->w0 = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_TRANSFORM_PROGRAM_START, 1);
|
|
cmd->w1 = vertex_slot;
|
|
} else
|
|
set_renderstate_full(vertex_slot, r, stats);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Vertex formats
|
|
//
|
|
|
|
struct gdraw_gcm_vformat_desc {
|
|
int size;
|
|
int tex_offs;
|
|
U32 format0;
|
|
U32 format1;
|
|
} vformats[GDRAW_vformat__basic_count] = {
|
|
// sz toffs stride0 size0 type0 stride1 size1 type1
|
|
{ 8, 0, ( 8 << 8) | (2 << 4) | CELL_GCM_VERTEX_F, ( 0 << 8) | (0 << 4) | CELL_GCM_VERTEX_F }, // GDRAW_vformat_v2
|
|
{ 16, 8, (16 << 8) | (2 << 4) | CELL_GCM_VERTEX_F, (16 << 8) | (4 << 4) | CELL_GCM_VERTEX_S32K }, // GDRAW_vformat_v2aa
|
|
{ 16, 8, (16 << 8) | (2 << 4) | CELL_GCM_VERTEX_F, (16 << 8) | (2 << 4) | CELL_GCM_VERTEX_F }, // GDRAW_vformat_v2tc2
|
|
};
|
|
|
|
static RADINLINE void set_vertex_decl(GDraw * RADRESTRICT gd, CellGcmContextData * RADRESTRICT gcm, int format_index, int location, U32 vertaddr)
|
|
{
|
|
assert(format_index >= 0 && format_index < (int) (sizeof(vformats)/sizeof(*vformats)));
|
|
|
|
CommandData * RADRESTRICT cmd = reserve_command(gcm, 6); // must be enough for the worst case!
|
|
cmd->w0 = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_VERTEX_DATA_ARRAY_OFFSET, 2);
|
|
cmd->w1 = CELL_GCM_METHOD_DATA_VERTEX_DATA_ARRAY_OFFSET(location, vertaddr + 0); // offset for attr 0
|
|
cmd->w2 = CELL_GCM_METHOD_DATA_VERTEX_DATA_ARRAY_OFFSET(location, vertaddr + 8); // offset for attr 1
|
|
|
|
if (format_index != gd->vert_format) {
|
|
gd->vert_format = format_index;
|
|
gdraw_gcm_vformat_desc *fmt = &vformats[format_index];
|
|
U32 fmt0 = fmt->format0;
|
|
U32 fmt1 = fmt->format1;
|
|
|
|
cmd->w3 = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_VERTEX_DATA_ARRAY_FORMAT, 2);
|
|
cmd->w4 = fmt0;
|
|
cmd->w5 = fmt1;
|
|
gcm->current = &cmd->w6;
|
|
} else
|
|
gcm->current = &cmd->w3;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Draw triangles with a given renderstate
|
|
//
|
|
|
|
static RADINLINE void fence_resources(GDraw * RADRESTRICT gd, void *r1, void *r2=NULL, void *r3=NULL, void *r4=NULL)
|
|
{
|
|
GDrawFence fence;
|
|
fence.value = gd->next_fence_index;
|
|
|
|
if (r1) ((GDrawHandle *) r1)->fence = fence;
|
|
if (r2) ((GDrawHandle *) r2)->fence = fence;
|
|
if (r3) ((GDrawHandle *) r3)->fence = fence;
|
|
if (r4) ((GDrawHandle *) r4)->fence = fence;
|
|
|
|
if (--gd->fence_batch_counter == 0)
|
|
put_fence();
|
|
}
|
|
|
|
static U32 vprog(S32 vertex_format, GDrawRenderState *r)
|
|
{
|
|
RR_UNUSED_VARIABLE(r);
|
|
return gdraw->vslot[vertex_format];
|
|
}
|
|
|
|
static void RADLINK gdraw_DrawIndexedTriangles(GDrawRenderState *r, GDrawPrimitive *p, GDrawVertexBuffer *buf, GDrawStats * RADRESTRICT stats)
|
|
{
|
|
GDrawHandle *vb = (GDrawHandle *) buf;
|
|
GDraw * RADRESTRICT gd = gdraw;
|
|
CellGcmContextData * RADRESTRICT gcm = gd->gcm;
|
|
S32 vformat = p->vertex_format;
|
|
|
|
set_renderstate(vprog(vformat, r), r, stats);
|
|
|
|
if (vb) {
|
|
U32 vertaddr = vbufaddr2offs(gd, vb->handle.vbuf.verts) + (U32) (UINTa) p->vertices;
|
|
U32 indaddr = vbufaddr2offs(gd, vb->handle.vbuf.inds) + (U32) (UINTa) p->indices;
|
|
int loc = gd->vbuf_loc;
|
|
set_vertex_decl(gd, gcm, vformat, loc, vertaddr);
|
|
cellGcmSetDrawIndexArray(gcm, CELL_GCM_PRIMITIVE_TRIANGLES, p->num_indices, CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16, loc, indaddr);
|
|
} else if (p->indices) {
|
|
S32 vertex_size = p->num_vertices * vformats[vformat].size;
|
|
S32 index_size = p->num_indices * 2;
|
|
U8 * RADRESTRICT ring_data = (U8 *) gdraw_bufring_alloc(&gd->dyn_vb, vertex_size + index_size, CELL_GCM_VERTEX_TEXTURE_CACHE_LINE_SIZE);
|
|
|
|
if (ring_data) { // normal case: fits inside ring buffer
|
|
memcpy(ring_data, p->vertices, vertex_size);
|
|
memcpy(ring_data + vertex_size, p->indices, index_size);
|
|
|
|
cellGcmSetInvalidateVertexCache(gcm);
|
|
|
|
U32 vertaddr = dynvbaddr2offs(gd, ring_data);
|
|
U32 indaddr = vertaddr + vertex_size;
|
|
int loc = gd->dynvb_loc;
|
|
set_vertex_decl(gd, gcm, vformat, loc, vertaddr);
|
|
cellGcmSetDrawIndexArray(gcm, CELL_GCM_PRIMITIVE_TRIANGLES, p->num_indices, CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16, loc, indaddr);
|
|
} else {
|
|
// convert it into a non-indexed triangle list that we can chunk without extra mem
|
|
// this is a fall-back path and, needless to say, it's slow.
|
|
S32 vsize = vformats[p->vertex_format].size;
|
|
S32 tris_per_chunk = gdraw->dyn_vb.seg_size / (3 * vsize);
|
|
S32 verts_per_chunk = tris_per_chunk * 3;
|
|
U16 * RADRESTRICT inds = (U16 *) p->indices;
|
|
S32 pos = 0, i;
|
|
|
|
while (pos < p->num_indices) {
|
|
S32 vert_count = RR_MIN(p->num_indices - pos, verts_per_chunk);
|
|
void *ring = gdraw_bufring_alloc(&gd->dyn_vb, vert_count * vsize, CELL_GCM_VERTEX_TEXTURE_CACHE_LINE_SIZE);
|
|
assert(ring != NULL); // we specifically chunked so this alloc succeeds!
|
|
|
|
// prepare for painting...
|
|
cellGcmSetInvalidateVertexCache(gcm);
|
|
set_vertex_decl(gd, gcm, p->vertex_format, gd->dynvb_loc, dynvbaddr2offs(gd, ring));
|
|
|
|
// build the triangle list (two versions for the two sizes since it's a small number of bytes being copied)
|
|
if (vsize == 8) {
|
|
ring = (void *) ((U8 *) ring - 8);
|
|
for (i=0; i < vert_count; i++)
|
|
__stdu(*((U64 *) p->vertices + *inds++), 8, ring);
|
|
} else if (vsize == 16) {
|
|
ring = (void *) ((U8 *) ring - 8);
|
|
for (i=0; i < vert_count; i++) {
|
|
U64 *src = (U64 *) p->vertices + *inds++ * 2;
|
|
U64 v0 = __ld(0, src);
|
|
U64 v1 = __ld(8, src);
|
|
__std(v0, 8, ring);
|
|
__stdu(v1, 16, ring);
|
|
}
|
|
} else
|
|
assert(0); // unknown size, shouldn't happen
|
|
|
|
// paint this batch
|
|
cellGcmSetDrawArrays(gcm, CELL_GCM_PRIMITIVE_TRIANGLES, 0, vert_count);
|
|
pos += vert_count;
|
|
}
|
|
}
|
|
} else { // dynamic quads
|
|
U32 vsize = vformats[p->vertex_format].size;
|
|
S32 verts_per_chunk = (gdraw->dyn_vb.seg_size / vsize) & ~3;
|
|
S32 pos = 0;
|
|
|
|
while (pos < p->num_vertices) {
|
|
S32 vert_count = RR_MIN(p->num_vertices - pos, verts_per_chunk);
|
|
U32 chunk_bytes = vert_count * vsize;
|
|
void *ring = gdraw_bufring_alloc(&gd->dyn_vb, chunk_bytes, CELL_GCM_VERTEX_TEXTURE_CACHE_LINE_SIZE);
|
|
assert(ring != NULL); // we picked the chunk size so this alloc succeeds!
|
|
|
|
memcpy(ring, (U8 *)p->vertices + pos * vsize, chunk_bytes);
|
|
cellGcmSetInvalidateVertexCache(gcm);
|
|
set_vertex_decl(gd, gcm, p->vertex_format, gd->dynvb_loc, dynvbaddr2offs(gd, ring));
|
|
cellGcmSetDrawArrays(gcm, CELL_GCM_PRIMITIVE_QUADS, 0, vert_count);
|
|
pos += vert_count;
|
|
}
|
|
}
|
|
|
|
fence_resources(gd, vb, r->tex[0], r->tex[1]);
|
|
|
|
{
|
|
// avoid LHS
|
|
S16 oldflags = stats->nonzero_flags;
|
|
S16 oldbatches = stats->num_batches;
|
|
|
|
stats->nonzero_flags = oldflags | GDRAW_STATS_batches;
|
|
stats->num_batches = oldbatches + 1;
|
|
stats->drawn_indices += p->num_indices;
|
|
stats->drawn_vertices += p->num_vertices;
|
|
}
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Flash 8 filter effects
|
|
//
|
|
|
|
static void set_pixel_constant(U32 ucode_offs, S32 constant, F32 x, F32 y, F32 z, F32 w)
|
|
{
|
|
F32 values[4] = { x,y,z,w };
|
|
set_fragment_para(gdraw, ucode_offs, constant, values, 1);
|
|
}
|
|
|
|
// caller sets up texture coordinates
|
|
static void do_screen_quad(gswf_recti *s, F32 *tc, GDrawStats *stats)
|
|
{
|
|
RR_UNUSED_VARIABLE(stats);
|
|
|
|
F32 px0 = (F32) s->x0, py0 = (F32) s->y0, px1 = (F32) s->x1, py1 = (F32) s->y1;
|
|
CellGcmContextData * RADRESTRICT gcm = gdraw->gcm;
|
|
VertexVars * RADRESTRICT vvars;
|
|
volatile F32 * RADRESTRICT vert;
|
|
void *ptr;
|
|
|
|
cellGcmSetVertexProgramStartSlot(gcm, gdraw->vslot[GDRAW_vformat_v2tc2]);
|
|
cellGcmSetVertexProgramConstantsPointer(gcm, 0, VVAR_count_worldonly * 4, &ptr);
|
|
vvars = (VertexVars *) ptr;
|
|
gdraw_PixelSpace(&vvars->world[0].x);
|
|
|
|
set_vertex_decl(gdraw, gcm, GDRAW_vformat_v2tc2, CELL_GCM_LOCATION_LOCAL, 0);
|
|
cellGcmSetDrawBegin(gcm, CELL_GCM_PRIMITIVE_QUADS);
|
|
cellGcmSetDrawInlineArrayPointer(gcm, 4 * 4, &ptr);
|
|
|
|
// build vertex data
|
|
vert = (volatile F32 *) ptr;
|
|
vert[ 0] = px0; vert[ 1] = py0; vert[ 2] = tc[0]; vert[ 3] = tc[1];
|
|
vert[ 4] = px1; vert[ 5] = py0; vert[ 6] = tc[2]; vert[ 7] = tc[1];
|
|
vert[ 8] = px1; vert[ 9] = py1; vert[10] = tc[2]; vert[11] = tc[3];
|
|
vert[12] = px0; vert[13] = py1; vert[14] = tc[0]; vert[15] = tc[3];
|
|
|
|
cellGcmSetDrawEnd(gcm);
|
|
}
|
|
|
|
static void gdraw_DriverBlurPass(GDrawRenderState *r, int taps, float *data, gswf_recti *s, float *tc, float height_max, float *clamp, GDrawStats *stats)
|
|
{
|
|
RR_UNUSED_VARIABLE(height_max);
|
|
|
|
ProgramWithCachedVariableLocations *prg = &gdraw->blur_prog[taps];
|
|
U32 ucode_offs = prg->cfg.offset;
|
|
|
|
set_texture(0, r->tex[0]);
|
|
set_fragment_para(gdraw, ucode_offs, prg->vars[VAR_blur_tap], data, taps);
|
|
set_fragment_para(gdraw, ucode_offs, prg->vars[VAR_blur_clampv], clamp, 1);
|
|
set_fragment_shader(gdraw, prg);
|
|
|
|
do_screen_quad(s, tc, stats);
|
|
fence_resources(gdraw, r->tex[0]);
|
|
}
|
|
|
|
static void gdraw_Colormatrix(GDrawRenderState *r, gswf_recti *s, float *tc, GDrawStats *stats)
|
|
{
|
|
ProgramWithCachedVariableLocations *prg = &gdraw->colormatrix;
|
|
U32 ucode_offs = prg->cfg.offset;
|
|
if (!gdraw_TextureDrawBufferBegin(s, GDRAW_TEXTURE_FORMAT_rgba32, GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_color | GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_alpha, 0, stats))
|
|
return;
|
|
|
|
set_texture(0, r->tex[0]);
|
|
set_fragment_para(gdraw, ucode_offs, prg->vars[VAR_colormatrix_data], r->shader_data, 5);
|
|
set_fragment_shader(gdraw, prg);
|
|
|
|
do_screen_quad(s, tc, stats);
|
|
fence_resources(gdraw, r->tex[0]);
|
|
r->tex[0] = gdraw_TextureDrawBufferEnd(stats);
|
|
}
|
|
|
|
static gswf_recti *get_valid_rect(GDrawTexture *tex)
|
|
{
|
|
GDrawHandle *h = (GDrawHandle *) tex;
|
|
S32 n = (S32) (h - gdraw->rendertargets.handle);
|
|
assert(n >= 0 && n <= MAX_RENDER_STACK_DEPTH+1);
|
|
return &gdraw->rt_valid[n];
|
|
}
|
|
|
|
static void set_clamp_constant(U32 ucode_offs, S32 constant, GDrawTexture *tex)
|
|
{
|
|
gswf_recti *s = get_valid_rect(tex);
|
|
// when we make the valid data, we make sure there is an extra empty pixel at the border
|
|
set_pixel_constant(ucode_offs, constant,
|
|
(s->x0-0.5f) / gdraw->frametex_width,
|
|
(s->y0-0.5f) / gdraw->frametex_height,
|
|
(s->x1+0.5f) / gdraw->frametex_width,
|
|
(s->y1+0.5f) / gdraw->frametex_height);
|
|
}
|
|
|
|
static void gdraw_Filter(GDrawRenderState *r, gswf_recti *s, float *tc, int isbevel, GDrawStats *stats)
|
|
{
|
|
ProgramWithCachedVariableLocations *prg = &gdraw->filter_prog[isbevel][r->filter_mode];
|
|
U32 ucode_offs = prg->cfg.offset;
|
|
if (!gdraw_TextureDrawBufferBegin(s, GDRAW_TEXTURE_FORMAT_rgba32, GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_color | GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_alpha, NULL, stats))
|
|
return;
|
|
|
|
set_texture(0, r->tex[0]);
|
|
set_texture(1, r->tex[1]);
|
|
if (r->tex[2]) set_texture(2, r->tex[2]);
|
|
|
|
set_pixel_constant(ucode_offs, prg->vars[VAR_filter_color], r->shader_data[0], r->shader_data[1], r->shader_data[2], r->shader_data[3]);
|
|
set_pixel_constant(ucode_offs, prg->vars[VAR_filter_tc_off], -r->shader_data[4] / (F32)gdraw->frametex_width, -r->shader_data[5] / (F32)gdraw->frametex_height, r->shader_data[6], 0);
|
|
set_pixel_constant(ucode_offs, prg->vars[VAR_filter_color2], r->shader_data[8], r->shader_data[9], r->shader_data[10], r->shader_data[11]);
|
|
set_clamp_constant(ucode_offs, prg->vars[VAR_filter_clamp0], r->tex[0]);
|
|
set_clamp_constant(ucode_offs, prg->vars[VAR_filter_clamp1], r->tex[1]);
|
|
set_fragment_shader(gdraw, prg);
|
|
|
|
do_screen_quad(s, tc, stats);
|
|
fence_resources(gdraw, r->tex[0], r->tex[1], r->tex[2]);
|
|
r->tex[0] = gdraw_TextureDrawBufferEnd(stats);
|
|
}
|
|
|
|
static void RADLINK gdraw_FilterQuad(GDrawRenderState *r, S32 x0, S32 y0, S32 x1, S32 y1, GDrawStats *stats)
|
|
{
|
|
F32 tc[4];
|
|
gswf_recti s;
|
|
|
|
// clip to tile boundaries
|
|
s.x0 = RR_MAX(x0, gdraw->tx0p);
|
|
s.y0 = RR_MAX(y0, gdraw->ty0p);
|
|
s.x1 = RR_MIN(x1, gdraw->tx0p + gdraw->tpw);
|
|
s.y1 = RR_MIN(y1, gdraw->ty0p + gdraw->tph);
|
|
if (s.x1 < s.x0 || s.y1 < s.y0)
|
|
return;
|
|
|
|
// prepare for drawing
|
|
tc[0] = (s.x0 - gdraw->tx0p) / (F32) gdraw->frametex_width;
|
|
tc[1] = (s.y0 - gdraw->ty0p) / (F32) gdraw->frametex_height;
|
|
tc[2] = (s.x1 - gdraw->tx0p) / (F32) gdraw->frametex_width;
|
|
tc[3] = (s.y1 - gdraw->ty0p) / (F32) gdraw->frametex_height;
|
|
|
|
clear_renderstate();
|
|
|
|
if (r->blend_mode == GDRAW_BLEND_filter) {
|
|
switch (r->filter) {
|
|
case GDRAW_FILTER_blur: {
|
|
GDrawBlurInfo b;
|
|
gswf_recti bounds = *get_valid_rect(r->tex[0]);
|
|
gdraw_ShiftRect(&s, &s, -gdraw->tx0p, -gdraw->ty0p); // blur uses physical rendertarget coordinates
|
|
|
|
b.BlurPass = gdraw_DriverBlurPass;
|
|
b.w = gdraw->tpw;
|
|
b.h = gdraw->tph;
|
|
b.frametex_width = gdraw->frametex_width;
|
|
b.frametex_height = gdraw->frametex_height;
|
|
|
|
// blur needs to draw with multiple passes, so set up special state
|
|
gdraw->in_blur = true;
|
|
set_viewport_raw(0,0,gdraw->tpw,gdraw->tph);
|
|
set_projection_raw(0,gdraw->tpw,gdraw->tph,0);
|
|
|
|
// do the blur
|
|
gdraw_Blur(&gdraw_funcs, &b,r, &s, &bounds, stats);
|
|
|
|
// restore the normal state
|
|
gdraw->in_blur = false;
|
|
set_viewport();
|
|
set_projection();
|
|
break;
|
|
}
|
|
|
|
case GDRAW_FILTER_colormatrix:
|
|
gdraw_Colormatrix(r, &s, tc, stats);
|
|
break;
|
|
|
|
case GDRAW_FILTER_dropshadow:
|
|
gdraw_Filter(r, &s, tc, 0, stats);
|
|
break;
|
|
|
|
case GDRAW_FILTER_bevel:
|
|
gdraw_Filter(r, &s, tc, 1, stats);
|
|
break;
|
|
|
|
default:
|
|
assert(0);
|
|
}
|
|
} else {
|
|
GDrawHandle *blend_tex = NULL;
|
|
GDraw *gd = gdraw;
|
|
|
|
// for crazy blend modes, we need to read back from the framebuffer
|
|
// and do the blending in the pixel shader. so we need to copy the
|
|
// relevant pixels from our active render target into a texture.
|
|
if (r->blend_mode == GDRAW_BLEND_special &&
|
|
(blend_tex = get_color_rendertarget(stats)) != NULL) {
|
|
// slightly different logic depending on whether we were rendering
|
|
// to the main color buffer or a render target, because the former
|
|
// has tile origin-based coordinates while the latter don't. also,
|
|
// we don't have a texture for the main framebuffer lying around.
|
|
int bpp = 4;
|
|
GcmTexture *dest = blend_tex->handle.tex.gcm;
|
|
|
|
if (gd->cur == gd->frame) {
|
|
CellGcmSurface *src = &gd->main_surface;
|
|
cellGcmSetTransferImage(gd->gcm, CELL_GCM_TRANSFER_LOCAL_TO_LOCAL,
|
|
dest->offset, dest->pitch, gd->tx0 - gd->tx0p, gd->ty0 - gd->ty0p,
|
|
src->colorOffset[0], src->colorPitch[0], src->x + gd->vx, src->y + gd->vy, gd->tw, gd->th, bpp);
|
|
} else {
|
|
GcmTexture *src = gd->cur->color_buffer->handle.tex.gcm;
|
|
cellGcmSetTransferImage(gd->gcm, CELL_GCM_TRANSFER_LOCAL_TO_LOCAL,
|
|
dest->offset, dest->pitch, 0, 0,
|
|
src->offset, src->pitch, 0, 0, gd->tpw, gd->tph, bpp);
|
|
}
|
|
|
|
set_texture(1, (GDrawTexture *) blend_tex);
|
|
}
|
|
|
|
set_renderstate(vprog(GDRAW_vformat_v2tc2, r), r, stats);
|
|
do_screen_quad(&s, tc, stats);
|
|
fence_resources(gdraw, r->tex[0], r->tex[1]);
|
|
if (blend_tex)
|
|
gdraw_FreeTexture((GDrawTexture *) blend_tex, 0, stats);
|
|
}
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Shaders
|
|
//
|
|
|
|
#include "gdraw_ps3gcm_shaders.inl"
|
|
|
|
static void create_fragment_program(ProgramWithCachedVariableLocations *p, ProgramWithCachedVariableLocations *src)
|
|
{
|
|
*p = *src;
|
|
|
|
if (p->program) {
|
|
U32 ucode_size;
|
|
void *ucode_main, *ucode_local;
|
|
|
|
cellGcmCgInitProgram(p->program);
|
|
cellGcmCgGetUCode(p->program, &ucode_main, &ucode_size);
|
|
|
|
ucode_local = gdraw_arena_alloc(&gdraw->local_arena, ucode_size + 400, CELL_GCM_FRAGMENT_UCODE_LOCAL_ALIGN_OFFSET); // 400 for overfetch
|
|
assert(ucode_local != NULL); // if this triggers, it's a GDraw bug
|
|
memcpy(ucode_local, ucode_main, ucode_size);
|
|
|
|
cellGcmCgGetCgbFragmentProgramConfiguration(p->program, &p->cfg, 0, 1, 0);
|
|
p->cfg.offset = addr2offs(ucode_local);
|
|
p->cfg.attributeInputMask &= ~CELL_GCM_ATTRIB_OUTPUT_MASK_POINTSIZE; // we don't use point sprites
|
|
}
|
|
}
|
|
|
|
static void create_all_shaders()
|
|
{
|
|
S32 i;
|
|
U32 slot = 0;
|
|
|
|
for (i=0; i < GDRAW_TEXTURE__count*3; ++i) create_fragment_program(&gdraw->fprog[0][i], pshader_basic_arr + i);
|
|
for (i=0; i < GDRAW_BLENDSPECIAL__count; ++i) create_fragment_program(&gdraw->exceptional_blend[i], pshader_exceptional_blend_arr + i);
|
|
for (i=0; i < 32; ++i) create_fragment_program(&gdraw->filter_prog[0][i], pshader_filter_arr + i);
|
|
for (i=0; i < MAX_TAPS+1; ++i) create_fragment_program(&gdraw->blur_prog[i], pshader_blur_arr + i);
|
|
create_fragment_program(&gdraw->colormatrix, pshader_color_matrix_arr);
|
|
|
|
for (i=0; i < GDRAW_TEXTURE__count; ++i) gdraw->basic_fprog[i] = &gdraw->fprog[i][0];
|
|
|
|
for (i=0; i < GDRAW_vformat__basic_count; i++) {
|
|
ProgramWithCachedVariableLocations *p = &gdraw->vprog[i];
|
|
U32 ucode_size;
|
|
|
|
*p = vshader_vsps3_arr[i];
|
|
if (p->program) {
|
|
cellGcmCgInitProgram(p->program);
|
|
cellGcmCgSetInstructionSlot(p->program, slot);
|
|
cellGcmCgGetUCode(p->program, &p->ucode, &ucode_size);
|
|
gdraw->vslot[i] = slot;
|
|
slot += ucode_size / 16;
|
|
}
|
|
}
|
|
|
|
assert(slot <= 512); // that's all the space we have for vertex shaders!
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Create and tear-down the state
|
|
//
|
|
|
|
typedef struct
|
|
{
|
|
S32 num_handles;
|
|
S32 num_bytes;
|
|
void *ptr;
|
|
} GDrawResourceMem;
|
|
|
|
static GDrawResourceMem gdraw_mem[GDRAW_GCM_RESOURCE__count];
|
|
static void *rt_mem;
|
|
static S32 rt_size, rt_width, rt_height, rt_pitch;
|
|
|
|
static GDrawHandleCache *make_handle_cache(gdraw_gcm_resourcetype type, U32 align)
|
|
{
|
|
S32 num_handles = gdraw_mem[type].num_handles;
|
|
S32 num_bytes = gdraw_mem[type].num_bytes;
|
|
rrbool is_vertex = type == GDRAW_GCM_RESOURCE_vertexbuffer;
|
|
U32 cache_size = sizeof(GDrawHandleCache) + (num_handles - 1) * sizeof(GDrawHandle);
|
|
U32 header_size = is_vertex ? 0 : sizeof(GcmTexture) * num_handles;
|
|
|
|
if (!num_handles)
|
|
return NULL;
|
|
|
|
GDrawHandleCache *cache = (GDrawHandleCache *) IggyGDrawMalloc(cache_size + header_size);
|
|
if (cache) {
|
|
gdraw_HandleCacheInit(cache, num_handles, num_bytes);
|
|
cache->is_vertex = is_vertex;
|
|
|
|
// set up resource headers
|
|
if (type != GDRAW_GCM_RESOURCE_vertexbuffer) {
|
|
GcmTexture *tex = (GcmTexture *) ((U8 *) cache + cache_size);
|
|
S32 i;
|
|
for (i=0; i < num_handles; i++)
|
|
cache->handle[i].handle.tex.gcm = &tex[i];
|
|
}
|
|
|
|
// set up our allocator
|
|
cache->alloc = gfxalloc_create(gdraw_mem[type].ptr, num_bytes, align, num_handles);
|
|
if (!cache->alloc) {
|
|
IggyGDrawFree(cache);
|
|
cache = NULL;
|
|
}
|
|
}
|
|
|
|
return cache;
|
|
}
|
|
|
|
static void free_handle_cache(GDrawHandleCache *c)
|
|
{
|
|
if (c) {
|
|
if (c->alloc) IggyGDrawFree(c->alloc);
|
|
IggyGDrawFree(c);
|
|
}
|
|
}
|
|
|
|
static int get_location(void *ptr)
|
|
{
|
|
CellGcmConfig cfg;
|
|
cellGcmGetConfiguration(&cfg);
|
|
|
|
U8 *local_start = (U8 *) cfg.localAddress;
|
|
U8 *local_end = local_start + cfg.localSize;
|
|
return (ptr >= local_start && ptr < local_end) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN;
|
|
}
|
|
|
|
int gdraw_GCM_SetResourceMemory(gdraw_gcm_resourcetype type, S32 num_handles, void *ptr, S32 num_bytes)
|
|
{
|
|
GDrawStats stats;
|
|
|
|
assert(type >= GDRAW_GCM_RESOURCE_texture && type < GDRAW_GCM_RESOURCE__count);
|
|
assert(num_handles >= 0);
|
|
assert(num_bytes >= 0);
|
|
|
|
if (!num_handles) num_handles = 1;
|
|
|
|
switch (type) {
|
|
case GDRAW_GCM_RESOURCE_texture:
|
|
make_pool_aligned(&ptr, &num_bytes, CELL_GCM_TEXTURE_SWIZZLE_ALIGN_OFFSET);
|
|
break;
|
|
|
|
case GDRAW_GCM_RESOURCE_vertexbuffer:
|
|
case GDRAW_GCM_RESOURCE_dyn_vertexbuffer:
|
|
make_pool_aligned(&ptr, &num_bytes, CELL_GCM_SURFACE_LINEAR_ALIGN_OFFSET);
|
|
break;
|
|
|
|
default: // avoid compiler warning
|
|
break;
|
|
}
|
|
|
|
gdraw_mem[type].num_handles = num_handles;
|
|
gdraw_mem[type].num_bytes = num_bytes;
|
|
gdraw_mem[type].ptr = ptr;
|
|
|
|
// if no gdraw context created, there's nothing to worry about
|
|
if (!gdraw)
|
|
return 1;
|
|
|
|
// wait until GPU is done, then reap everything
|
|
wait_on_fence(gdraw->tile_end_fence);
|
|
memset(&stats, 0, sizeof(stats));
|
|
if (gdraw->texturecache) gdraw_res_reap(gdraw->texturecache, &stats);
|
|
if (gdraw->vbufcache ) gdraw_res_reap(gdraw->vbufcache, &stats);
|
|
|
|
// resize the appropriate pool
|
|
switch (type) {
|
|
case GDRAW_GCM_RESOURCE_texture:
|
|
free_handle_cache(gdraw->texturecache);
|
|
gdraw->texturecache = make_handle_cache(GDRAW_GCM_RESOURCE_texture, CELL_GCM_TEXTURE_SWIZZLE_ALIGN_OFFSET);
|
|
gdraw->tex_loc = get_location(ptr);
|
|
return gdraw->texturecache != NULL;
|
|
|
|
case GDRAW_GCM_RESOURCE_vertexbuffer:
|
|
free_handle_cache(gdraw->vbufcache);
|
|
gdraw->vbufcache = make_handle_cache(GDRAW_GCM_RESOURCE_vertexbuffer, CELL_GCM_SURFACE_LINEAR_ALIGN_OFFSET);
|
|
gdraw->vbuf_base = ptr ? (U8 *) ptr - addr2offs(ptr) : 0;
|
|
gdraw->vbuf_loc = get_location(ptr);
|
|
return gdraw->vbufcache != NULL;
|
|
|
|
case GDRAW_GCM_RESOURCE_dyn_vertexbuffer:
|
|
gdraw_bufring_shutdown(&gdraw->dyn_vb);
|
|
gdraw_bufring_init(&gdraw->dyn_vb, ptr, num_bytes, 2, CELL_GCM_VERTEX_TEXTURE_CACHE_LINE_SIZE);
|
|
gdraw->dynvb_base = ptr ? (U8 *) ptr - addr2offs(ptr) : 0;
|
|
gdraw->dynvb_loc = get_location(ptr);
|
|
return gdraw->dyn_vb.seg_size != 0;
|
|
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
int gdraw_GCM_SetRendertargetMemory(void *ptr, S32 num_bytes, S32 width, S32 height, S32 pitch)
|
|
{
|
|
S32 i;
|
|
assert(num_bytes >= 0);
|
|
|
|
rt_mem = ptr;
|
|
rt_size = num_bytes;
|
|
rt_width = width;
|
|
rt_height = height;
|
|
rt_pitch = pitch;
|
|
|
|
if (!gdraw)
|
|
return 1;
|
|
|
|
// make sure RSX is done first
|
|
wait_on_fence(gdraw->tile_end_fence);
|
|
|
|
gdraw->frametex_width = width;
|
|
gdraw->frametex_height = height;
|
|
gdraw->rt_pitch = pitch;
|
|
gdraw_arena_init(&gdraw->rt_arena, ptr, rt_size);
|
|
gdraw_arena_reset(&gdraw->rt_arena); // unnecessary, just to avoid warning about unused function
|
|
gdraw->rt_loc = get_location(ptr);
|
|
gdraw_HandleCacheInit(&gdraw->rendertargets, MAX_RENDER_STACK_DEPTH + 1, rt_size);
|
|
for (i=0; i < MAX_RENDER_STACK_DEPTH + 1; ++i)
|
|
gdraw->rendertargets.handle[i].handle.tex.gcm = &gdraw->rendertarget_textures[i];
|
|
|
|
return 1;
|
|
}
|
|
|
|
void gdraw_GCM_ResetAllResourceMemory()
|
|
{
|
|
gdraw_GCM_SetResourceMemory(GDRAW_GCM_RESOURCE_texture, 0, NULL, 0);
|
|
gdraw_GCM_SetResourceMemory(GDRAW_GCM_RESOURCE_vertexbuffer, 0, NULL, 0);
|
|
gdraw_GCM_SetResourceMemory(GDRAW_GCM_RESOURCE_dyn_vertexbuffer, 0, NULL, 0);
|
|
gdraw_GCM_SetRendertargetMemory(NULL, 0, 0, 0, 0);
|
|
}
|
|
|
|
GDrawFunctions *gdraw_GCM_CreateContext(CellGcmContextData *gcm, void *local_workmem, U8 rsx_label_index)
|
|
{
|
|
S32 i;
|
|
|
|
gdraw = (GDraw *) IggyGDrawMalloc(sizeof(*gdraw)); // make sure gdraw struct is PPU cache line aligned
|
|
if (!gdraw) return NULL;
|
|
|
|
memset(gdraw, 0, sizeof(*gdraw));
|
|
|
|
// set up local work mem pointers
|
|
gdraw_arena_init(&gdraw->local_arena, local_workmem, GDRAW_GCM_LOCAL_WORKMEM_SIZE);
|
|
|
|
// set up fence stuff
|
|
gdraw->fence_label_index = rsx_label_index;
|
|
gdraw->fence_label = cellGcmGetLabelAddress(gdraw->fence_label_index);
|
|
*gdraw->fence_label = 0;
|
|
gdraw->tile_end_fence.value = 0;
|
|
gdraw->next_fence_index = 1;
|
|
gdraw->fence_batch_counter = FENCE_BATCH_INTERVAL;
|
|
|
|
// set up memory for all resource types
|
|
for (i = 0; i < GDRAW_GCM_RESOURCE__count; ++i)
|
|
gdraw_GCM_SetResourceMemory((gdraw_gcm_resourcetype) i, gdraw_mem[i].num_handles, gdraw_mem[i].ptr, gdraw_mem[i].num_bytes);
|
|
|
|
gdraw_GCM_SetRendertargetMemory(rt_mem, rt_size, rt_width, rt_height, rt_pitch);
|
|
|
|
gdraw->gcm = gcm;
|
|
create_all_shaders();
|
|
|
|
gdraw_funcs.SetViewSizeAndWorldScale = gdraw_SetViewSizeAndWorldScale;
|
|
gdraw_funcs.GetInfo = gdraw_GetInfo;
|
|
|
|
gdraw_funcs.DescribeTexture = gdraw_DescribeTexture;
|
|
gdraw_funcs.DescribeVertexBuffer = gdraw_DescribeVertexBuffer;
|
|
|
|
gdraw_funcs.RenderingBegin = gdraw_RenderingBegin;
|
|
gdraw_funcs.RenderingEnd = gdraw_RenderingEnd;
|
|
gdraw_funcs.RenderTileBegin = gdraw_RenderTileBegin;
|
|
gdraw_funcs.RenderTileEnd = gdraw_RenderTileEnd;
|
|
|
|
gdraw_funcs.TextureDrawBufferBegin = gdraw_TextureDrawBufferBegin;
|
|
gdraw_funcs.TextureDrawBufferEnd = gdraw_TextureDrawBufferEnd;
|
|
|
|
gdraw_funcs.DrawIndexedTriangles = gdraw_DrawIndexedTriangles;
|
|
gdraw_funcs.FilterQuad = gdraw_FilterQuad;
|
|
|
|
gdraw_funcs.SetAntialiasTexture = gdraw_SetAntialiasTexture;
|
|
|
|
gdraw_funcs.ClearStencilBits = gdraw_ClearStencilBits;
|
|
gdraw_funcs.ClearID = gdraw_ClearID;
|
|
|
|
gdraw_funcs.MakeTextureBegin = gdraw_MakeTextureBegin;
|
|
gdraw_funcs.MakeTextureMore = gdraw_MakeTextureMore;
|
|
gdraw_funcs.MakeTextureEnd = gdraw_MakeTextureEnd;
|
|
|
|
gdraw_funcs.UpdateTextureBegin = gdraw_UpdateTextureBegin;
|
|
gdraw_funcs.UpdateTextureRect = gdraw_UpdateTextureRect;
|
|
gdraw_funcs.UpdateTextureEnd = gdraw_UpdateTextureEnd;
|
|
|
|
gdraw_funcs.FreeTexture = gdraw_FreeTexture;
|
|
gdraw_funcs.TryToLockTexture = gdraw_TryToLockTexture;
|
|
|
|
gdraw_funcs.MakeTextureFromResource = (gdraw_make_texture_from_resource *) gdraw_GCM_MakeTextureFromResource;
|
|
gdraw_funcs.FreeTextureFromResource = gdraw_GCM_DestroyTextureFromResource;
|
|
|
|
gdraw_funcs.MakeVertexBufferBegin = gdraw_MakeVertexBufferBegin;
|
|
gdraw_funcs.MakeVertexBufferMore = gdraw_MakeVertexBufferMore;
|
|
gdraw_funcs.MakeVertexBufferEnd = gdraw_MakeVertexBufferEnd;
|
|
gdraw_funcs.TryToLockVertexBuffer = gdraw_TryLockVertexBuffer;
|
|
gdraw_funcs.FreeVertexBuffer = gdraw_FreeVertexBuffer;
|
|
|
|
gdraw_funcs.UnlockHandles = gdraw_UnlockHandles;
|
|
gdraw_funcs.SetTextureUniqueID = gdraw_SetTextureUniqueID;
|
|
|
|
return &gdraw_funcs;
|
|
}
|
|
|
|
void gdraw_GCM_DestroyContext(void)
|
|
{
|
|
if (gdraw) {
|
|
GDrawStats stats;
|
|
memset(&stats, 0, sizeof(stats));
|
|
|
|
if (gdraw->texturecache) gdraw_res_flush(gdraw->texturecache, &stats);
|
|
if (gdraw->vbufcache) gdraw_res_flush(gdraw->vbufcache, &stats);
|
|
|
|
// make sure RSX is done first
|
|
wait_on_fence(gdraw->tile_end_fence);
|
|
|
|
gdraw_bufring_shutdown(&gdraw->dyn_vb);
|
|
free_handle_cache(gdraw->texturecache);
|
|
free_handle_cache(gdraw->vbufcache);
|
|
IggyGDrawFree(gdraw);
|
|
gdraw = NULL;
|
|
}
|
|
}
|
|
|
|
void RADLINK gdraw_GCM_BeginCustomDraw(IggyCustomDrawCallbackRegion *region, float *matrix)
|
|
{
|
|
clear_renderstate();
|
|
gdraw_GetObjectSpaceMatrix(matrix, region->o2w, gdraw->projection, 0.0f, 0);
|
|
}
|
|
|
|
void RADLINK gdraw_GCM_CalculateCustomDraw_4J(IggyCustomDrawCallbackRegion * region, F32 mat[16])
|
|
{
|
|
gdraw_GetObjectSpaceMatrix(mat, region->o2w, gdraw->projection, 0.0f, 0);
|
|
}
|
|
|
|
void RADLINK gdraw_GCM_EndCustomDraw(IggyCustomDrawCallbackRegion *region)
|
|
{
|
|
RR_UNUSED_VARIABLE(region);
|
|
set_common_renderstate();
|
|
set_projection();
|
|
}
|
|
|
|
GDrawTexture * RADLINK gdraw_GCM_MakeTextureFromResource(U8 *resource_file, S32 len, IggyFileTexturePS3 *texture)
|
|
{
|
|
CellGcmTexture *tex = (CellGcmTexture *) (&texture->texture);
|
|
tex->offset = addr2offs(resource_file + texture->file_offset);
|
|
RR_UNUSED_VARIABLE(len);
|
|
|
|
// slightly more efficient if we'd done the following at bake time,
|
|
// but doing it here lets us avoid having this knowledge visible
|
|
// in the source to iggyconvert, which lets us avoid having a
|
|
// mechanism for hiding some of that source from non-PS3 customers
|
|
switch (texture->format) {
|
|
case IFT_FORMAT_la_88:
|
|
tex->remap = CELL_GCM_TEXTURE_REMAP_REMAP << 14 |
|
|
CELL_GCM_TEXTURE_REMAP_REMAP << 12 |
|
|
CELL_GCM_TEXTURE_REMAP_REMAP << 10 |
|
|
CELL_GCM_TEXTURE_REMAP_REMAP << 8 |
|
|
CELL_GCM_TEXTURE_REMAP_FROM_R << 6 |
|
|
CELL_GCM_TEXTURE_REMAP_FROM_R << 4 |
|
|
CELL_GCM_TEXTURE_REMAP_FROM_R << 2 |
|
|
CELL_GCM_TEXTURE_REMAP_FROM_G;
|
|
break;
|
|
case IFT_FORMAT_i_8:
|
|
tex->remap = CELL_GCM_TEXTURE_REMAP_REMAP << 14 |
|
|
CELL_GCM_TEXTURE_REMAP_REMAP << 12 |
|
|
CELL_GCM_TEXTURE_REMAP_REMAP << 10 |
|
|
CELL_GCM_TEXTURE_REMAP_REMAP << 8 |
|
|
CELL_GCM_TEXTURE_REMAP_FROM_R << 6 |
|
|
CELL_GCM_TEXTURE_REMAP_FROM_R << 4 |
|
|
CELL_GCM_TEXTURE_REMAP_FROM_R << 2 |
|
|
CELL_GCM_TEXTURE_REMAP_FROM_R;
|
|
break;
|
|
}
|
|
return gdraw_GCM_WrappedTextureCreate(tex);
|
|
}
|
|
|
|
void RADLINK gdraw_GCM_DestroyTextureFromResource(GDrawTexture *tex)
|
|
{
|
|
gdraw_GCM_WrappedTextureDestroy(tex);
|
|
}
|
|
|