diff options
| author | 3gg <3gg@shellblade.net> | 2025-12-27 12:03:39 -0800 |
|---|---|---|
| committer | 3gg <3gg@shellblade.net> | 2025-12-27 12:03:39 -0800 |
| commit | 5a079a2d114f96d4847d1ee305d5b7c16eeec50e (patch) | |
| tree | 8926ab44f168acf787d8e19608857b3af0f82758 /contrib/SDL-3.2.8/src/render/gpu/SDL_render_gpu.c | |
Initial commit
Diffstat (limited to 'contrib/SDL-3.2.8/src/render/gpu/SDL_render_gpu.c')
| -rw-r--r-- | contrib/SDL-3.2.8/src/render/gpu/SDL_render_gpu.c | 1276 |
1 files changed, 1276 insertions, 0 deletions
diff --git a/contrib/SDL-3.2.8/src/render/gpu/SDL_render_gpu.c b/contrib/SDL-3.2.8/src/render/gpu/SDL_render_gpu.c new file mode 100644 index 0000000..73c1dbe --- /dev/null +++ b/contrib/SDL-3.2.8/src/render/gpu/SDL_render_gpu.c | |||
| @@ -0,0 +1,1276 @@ | |||
| 1 | /* | ||
| 2 | Simple DirectMedia Layer | ||
| 3 | Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org> | ||
| 4 | |||
| 5 | This software is provided 'as-is', without any express or implied | ||
| 6 | warranty. In no event will the authors be held liable for any damages | ||
| 7 | arising from the use of this software. | ||
| 8 | |||
| 9 | Permission is granted to anyone to use this software for any purpose, | ||
| 10 | including commercial applications, and to alter it and redistribute it | ||
| 11 | freely, subject to the following restrictions: | ||
| 12 | |||
| 13 | 1. The origin of this software must not be misrepresented; you must not | ||
| 14 | claim that you wrote the original software. If you use this software | ||
| 15 | in a product, an acknowledgment in the product documentation would be | ||
| 16 | appreciated but is not required. | ||
| 17 | 2. Altered source versions must be plainly marked as such, and must not be | ||
| 18 | misrepresented as being the original software. | ||
| 19 | 3. This notice may not be removed or altered from any source distribution. | ||
| 20 | */ | ||
| 21 | #include "SDL_internal.h" | ||
| 22 | |||
| 23 | #ifdef SDL_VIDEO_RENDER_GPU | ||
| 24 | |||
| 25 | #include "../../video/SDL_pixels_c.h" | ||
| 26 | #include "../SDL_d3dmath.h" | ||
| 27 | #include "../SDL_sysrender.h" | ||
| 28 | #include "SDL_gpu_util.h" | ||
| 29 | #include "SDL_pipeline_gpu.h" | ||
| 30 | #include "SDL_shaders_gpu.h" | ||
| 31 | |||
| 32 | typedef struct GPU_ShaderUniformData | ||
| 33 | { | ||
| 34 | Float4X4 mvp; | ||
| 35 | SDL_FColor color; | ||
| 36 | float texture_size[2]; | ||
| 37 | } GPU_ShaderUniformData; | ||
| 38 | |||
| 39 | typedef struct GPU_RenderData | ||
| 40 | { | ||
| 41 | SDL_GPUDevice *device; | ||
| 42 | GPU_Shaders shaders; | ||
| 43 | GPU_PipelineCache pipeline_cache; | ||
| 44 | |||
| 45 | struct | ||
| 46 | { | ||
| 47 | SDL_GPUTexture *texture; | ||
| 48 | SDL_GPUTextureFormat format; | ||
| 49 | Uint32 width; | ||
| 50 | Uint32 height; | ||
| 51 | } backbuffer; | ||
| 52 | |||
| 53 | struct | ||
| 54 | { | ||
| 55 | SDL_GPUSwapchainComposition composition; | ||
| 56 | SDL_GPUPresentMode present_mode; | ||
| 57 | } swapchain; | ||
| 58 | |||
| 59 | struct | ||
| 60 | { | ||
| 61 | SDL_GPUTransferBuffer *transfer_buf; | ||
| 62 | SDL_GPUBuffer *buffer; | ||
| 63 | Uint32 buffer_size; | ||
| 64 | } vertices; | ||
| 65 | |||
| 66 | struct | ||
| 67 | { | ||
| 68 | SDL_GPURenderPass *render_pass; | ||
| 69 | SDL_Texture *render_target; | ||
| 70 | SDL_GPUCommandBuffer *command_buffer; | ||
| 71 | SDL_GPUColorTargetInfo color_attachment; | ||
| 72 | SDL_GPUViewport viewport; | ||
| 73 | SDL_Rect scissor; | ||
| 74 | SDL_FColor draw_color; | ||
| 75 | bool scissor_enabled; | ||
| 76 | bool scissor_was_enabled; | ||
| 77 | GPU_ShaderUniformData shader_data; | ||
| 78 | } state; | ||
| 79 | |||
| 80 | SDL_GPUSampler *samplers[2][2]; | ||
| 81 | } GPU_RenderData; | ||
| 82 | |||
| 83 | typedef struct GPU_TextureData | ||
| 84 | { | ||
| 85 | SDL_GPUTexture *texture; | ||
| 86 | SDL_GPUTextureFormat format; | ||
| 87 | GPU_FragmentShaderID shader; | ||
| 88 | void *pixels; | ||
| 89 | int pitch; | ||
| 90 | SDL_Rect locked_rect; | ||
| 91 | } GPU_TextureData; | ||
| 92 | |||
| 93 | static bool GPU_SupportsBlendMode(SDL_Renderer *renderer, SDL_BlendMode blendMode) | ||
| 94 | { | ||
| 95 | SDL_BlendFactor srcColorFactor = SDL_GetBlendModeSrcColorFactor(blendMode); | ||
| 96 | SDL_BlendFactor srcAlphaFactor = SDL_GetBlendModeSrcAlphaFactor(blendMode); | ||
| 97 | SDL_BlendOperation colorOperation = SDL_GetBlendModeColorOperation(blendMode); | ||
| 98 | SDL_BlendFactor dstColorFactor = SDL_GetBlendModeDstColorFactor(blendMode); | ||
| 99 | SDL_BlendFactor dstAlphaFactor = SDL_GetBlendModeDstAlphaFactor(blendMode); | ||
| 100 | SDL_BlendOperation alphaOperation = SDL_GetBlendModeAlphaOperation(blendMode); | ||
| 101 | |||
| 102 | if (GPU_ConvertBlendFactor(srcColorFactor) == SDL_GPU_BLENDFACTOR_INVALID || | ||
| 103 | GPU_ConvertBlendFactor(srcAlphaFactor) == SDL_GPU_BLENDFACTOR_INVALID || | ||
| 104 | GPU_ConvertBlendOperation(colorOperation) == SDL_GPU_BLENDOP_INVALID || | ||
| 105 | GPU_ConvertBlendFactor(dstColorFactor) == SDL_GPU_BLENDFACTOR_INVALID || | ||
| 106 | GPU_ConvertBlendFactor(dstAlphaFactor) == SDL_GPU_BLENDFACTOR_INVALID || | ||
| 107 | GPU_ConvertBlendOperation(alphaOperation) == SDL_GPU_BLENDOP_INVALID) { | ||
| 108 | return false; | ||
| 109 | } | ||
| 110 | |||
| 111 | return true; | ||
| 112 | } | ||
| 113 | |||
| 114 | static SDL_GPUTextureFormat PixFormatToTexFormat(SDL_PixelFormat pixel_format) | ||
| 115 | { | ||
| 116 | switch (pixel_format) { | ||
| 117 | case SDL_PIXELFORMAT_BGRA32: | ||
| 118 | case SDL_PIXELFORMAT_BGRX32: | ||
| 119 | return SDL_GPU_TEXTUREFORMAT_B8G8R8A8_UNORM; | ||
| 120 | case SDL_PIXELFORMAT_RGBA32: | ||
| 121 | case SDL_PIXELFORMAT_RGBX32: | ||
| 122 | return SDL_GPU_TEXTUREFORMAT_R8G8B8A8_UNORM; | ||
| 123 | |||
| 124 | // YUV TODO | ||
| 125 | case SDL_PIXELFORMAT_YV12: | ||
| 126 | case SDL_PIXELFORMAT_IYUV: | ||
| 127 | case SDL_PIXELFORMAT_NV12: | ||
| 128 | case SDL_PIXELFORMAT_NV21: | ||
| 129 | case SDL_PIXELFORMAT_UYVY: | ||
| 130 | default: | ||
| 131 | return SDL_GPU_TEXTUREFORMAT_INVALID; | ||
| 132 | } | ||
| 133 | } | ||
| 134 | |||
| 135 | static SDL_PixelFormat TexFormatToPixFormat(SDL_GPUTextureFormat tex_format) | ||
| 136 | { | ||
| 137 | switch (tex_format) { | ||
| 138 | case SDL_GPU_TEXTUREFORMAT_R8G8B8A8_UNORM: | ||
| 139 | return SDL_PIXELFORMAT_RGBA32; | ||
| 140 | case SDL_GPU_TEXTUREFORMAT_B8G8R8A8_UNORM: | ||
| 141 | return SDL_PIXELFORMAT_BGRA32; | ||
| 142 | case SDL_GPU_TEXTUREFORMAT_B5G6R5_UNORM: | ||
| 143 | return SDL_PIXELFORMAT_BGR565; | ||
| 144 | case SDL_GPU_TEXTUREFORMAT_B5G5R5A1_UNORM: | ||
| 145 | return SDL_PIXELFORMAT_BGRA5551; | ||
| 146 | case SDL_GPU_TEXTUREFORMAT_B4G4R4A4_UNORM: | ||
| 147 | return SDL_PIXELFORMAT_BGRA4444; | ||
| 148 | case SDL_GPU_TEXTUREFORMAT_R10G10B10A2_UNORM: | ||
| 149 | return SDL_PIXELFORMAT_ABGR2101010; | ||
| 150 | case SDL_GPU_TEXTUREFORMAT_R16G16B16A16_UNORM: | ||
| 151 | return SDL_PIXELFORMAT_RGBA64; | ||
| 152 | case SDL_GPU_TEXTUREFORMAT_R8G8B8A8_SNORM: | ||
| 153 | return SDL_PIXELFORMAT_RGBA32; | ||
| 154 | case SDL_GPU_TEXTUREFORMAT_R16G16B16A16_FLOAT: | ||
| 155 | return SDL_PIXELFORMAT_RGBA64_FLOAT; | ||
| 156 | case SDL_GPU_TEXTUREFORMAT_R32G32B32A32_FLOAT: | ||
| 157 | return SDL_PIXELFORMAT_RGBA128_FLOAT; | ||
| 158 | case SDL_GPU_TEXTUREFORMAT_R8G8B8A8_UINT: | ||
| 159 | return SDL_PIXELFORMAT_RGBA32; | ||
| 160 | case SDL_GPU_TEXTUREFORMAT_R16G16B16A16_UINT: | ||
| 161 | return SDL_PIXELFORMAT_RGBA64; | ||
| 162 | case SDL_GPU_TEXTUREFORMAT_R8G8B8A8_UNORM_SRGB: | ||
| 163 | return SDL_PIXELFORMAT_RGBA32; | ||
| 164 | case SDL_GPU_TEXTUREFORMAT_B8G8R8A8_UNORM_SRGB: | ||
| 165 | return SDL_PIXELFORMAT_BGRA32; | ||
| 166 | default: | ||
| 167 | return SDL_PIXELFORMAT_UNKNOWN; | ||
| 168 | } | ||
| 169 | } | ||
| 170 | |||
| 171 | static bool GPU_CreateTexture(SDL_Renderer *renderer, SDL_Texture *texture, SDL_PropertiesID create_props) | ||
| 172 | { | ||
| 173 | GPU_RenderData *renderdata = (GPU_RenderData *)renderer->internal; | ||
| 174 | GPU_TextureData *data; | ||
| 175 | SDL_GPUTextureFormat format; | ||
| 176 | SDL_GPUTextureUsageFlags usage = SDL_GPU_TEXTUREUSAGE_SAMPLER; | ||
| 177 | |||
| 178 | format = PixFormatToTexFormat(texture->format); | ||
| 179 | |||
| 180 | if (format == SDL_GPU_TEXTUREFORMAT_INVALID) { | ||
| 181 | return SDL_SetError("Texture format %s not supported by SDL_GPU", | ||
| 182 | SDL_GetPixelFormatName(texture->format)); | ||
| 183 | } | ||
| 184 | |||
| 185 | data = (GPU_TextureData *)SDL_calloc(1, sizeof(*data)); | ||
| 186 | if (!data) { | ||
| 187 | return false; | ||
| 188 | } | ||
| 189 | |||
| 190 | if (texture->access == SDL_TEXTUREACCESS_STREAMING) { | ||
| 191 | size_t size; | ||
| 192 | data->pitch = texture->w * SDL_BYTESPERPIXEL(texture->format); | ||
| 193 | size = (size_t)texture->h * data->pitch; | ||
| 194 | if (texture->format == SDL_PIXELFORMAT_YV12 || | ||
| 195 | texture->format == SDL_PIXELFORMAT_IYUV) { | ||
| 196 | // Need to add size for the U and V planes | ||
| 197 | size += 2 * ((texture->h + 1) / 2) * ((data->pitch + 1) / 2); | ||
| 198 | } | ||
| 199 | if (texture->format == SDL_PIXELFORMAT_NV12 || | ||
| 200 | texture->format == SDL_PIXELFORMAT_NV21) { | ||
| 201 | // Need to add size for the U/V plane | ||
| 202 | size += 2 * ((texture->h + 1) / 2) * ((data->pitch + 1) / 2); | ||
| 203 | } | ||
| 204 | data->pixels = SDL_calloc(1, size); | ||
| 205 | if (!data->pixels) { | ||
| 206 | SDL_free(data); | ||
| 207 | return false; | ||
| 208 | } | ||
| 209 | |||
| 210 | // TODO allocate a persistent transfer buffer | ||
| 211 | } | ||
| 212 | |||
| 213 | if (texture->access == SDL_TEXTUREACCESS_TARGET) { | ||
| 214 | usage |= SDL_GPU_TEXTUREUSAGE_COLOR_TARGET; | ||
| 215 | } | ||
| 216 | |||
| 217 | texture->internal = data; | ||
| 218 | SDL_GPUTextureCreateInfo tci; | ||
| 219 | SDL_zero(tci); | ||
| 220 | tci.format = format; | ||
| 221 | tci.layer_count_or_depth = 1; | ||
| 222 | tci.num_levels = 1; | ||
| 223 | tci.usage = usage; | ||
| 224 | tci.width = texture->w; | ||
| 225 | tci.height = texture->h; | ||
| 226 | tci.sample_count = SDL_GPU_SAMPLECOUNT_1; | ||
| 227 | |||
| 228 | data->format = format; | ||
| 229 | data->texture = SDL_CreateGPUTexture(renderdata->device, &tci); | ||
| 230 | |||
| 231 | if (!data->texture) { | ||
| 232 | return false; | ||
| 233 | } | ||
| 234 | |||
| 235 | if (texture->format == SDL_PIXELFORMAT_RGBA32 || texture->format == SDL_PIXELFORMAT_BGRA32) { | ||
| 236 | data->shader = FRAG_SHADER_TEXTURE_RGBA; | ||
| 237 | } else { | ||
| 238 | data->shader = FRAG_SHADER_TEXTURE_RGB; | ||
| 239 | } | ||
| 240 | |||
| 241 | return true; | ||
| 242 | } | ||
| 243 | |||
| 244 | static bool GPU_UpdateTexture(SDL_Renderer *renderer, SDL_Texture *texture, | ||
| 245 | const SDL_Rect *rect, const void *pixels, int pitch) | ||
| 246 | { | ||
| 247 | GPU_RenderData *renderdata = (GPU_RenderData *)renderer->internal; | ||
| 248 | GPU_TextureData *data = (GPU_TextureData *)texture->internal; | ||
| 249 | const Uint32 texturebpp = SDL_BYTESPERPIXEL(texture->format); | ||
| 250 | |||
| 251 | size_t row_size, data_size; | ||
| 252 | |||
| 253 | if (!SDL_size_mul_check_overflow(rect->w, texturebpp, &row_size) || | ||
| 254 | !SDL_size_mul_check_overflow(rect->h, row_size, &data_size)) { | ||
| 255 | return SDL_SetError("update size overflow"); | ||
| 256 | } | ||
| 257 | |||
| 258 | SDL_GPUTransferBufferCreateInfo tbci; | ||
| 259 | SDL_zero(tbci); | ||
| 260 | tbci.size = (Uint32)data_size; | ||
| 261 | tbci.usage = SDL_GPU_TRANSFERBUFFERUSAGE_UPLOAD; | ||
| 262 | |||
| 263 | SDL_GPUTransferBuffer *tbuf = SDL_CreateGPUTransferBuffer(renderdata->device, &tbci); | ||
| 264 | |||
| 265 | if (tbuf == NULL) { | ||
| 266 | return false; | ||
| 267 | } | ||
| 268 | |||
| 269 | Uint8 *output = SDL_MapGPUTransferBuffer(renderdata->device, tbuf, false); | ||
| 270 | |||
| 271 | if ((size_t)pitch == row_size) { | ||
| 272 | SDL_memcpy(output, pixels, data_size); | ||
| 273 | } else { | ||
| 274 | // FIXME is negative pitch supposed to work? | ||
| 275 | // If not, maybe use SDL_GPUTextureTransferInfo::pixels_per_row instead of this | ||
| 276 | const Uint8 *input = pixels; | ||
| 277 | |||
| 278 | for (int i = 0; i < rect->h; ++i) { | ||
| 279 | SDL_memcpy(output, input, row_size); | ||
| 280 | output += row_size; | ||
| 281 | input += pitch; | ||
| 282 | } | ||
| 283 | } | ||
| 284 | |||
| 285 | SDL_UnmapGPUTransferBuffer(renderdata->device, tbuf); | ||
| 286 | |||
| 287 | SDL_GPUCommandBuffer *cbuf = renderdata->state.command_buffer; | ||
| 288 | SDL_GPUCopyPass *cpass = SDL_BeginGPUCopyPass(cbuf); | ||
| 289 | |||
| 290 | SDL_GPUTextureTransferInfo tex_src; | ||
| 291 | SDL_zero(tex_src); | ||
| 292 | tex_src.transfer_buffer = tbuf; | ||
| 293 | tex_src.rows_per_layer = rect->h; | ||
| 294 | tex_src.pixels_per_row = rect->w; | ||
| 295 | |||
| 296 | SDL_GPUTextureRegion tex_dst; | ||
| 297 | SDL_zero(tex_dst); | ||
| 298 | tex_dst.texture = data->texture; | ||
| 299 | tex_dst.x = rect->x; | ||
| 300 | tex_dst.y = rect->y; | ||
| 301 | tex_dst.w = rect->w; | ||
| 302 | tex_dst.h = rect->h; | ||
| 303 | tex_dst.d = 1; | ||
| 304 | |||
| 305 | SDL_UploadToGPUTexture(cpass, &tex_src, &tex_dst, false); | ||
| 306 | SDL_EndGPUCopyPass(cpass); | ||
| 307 | SDL_ReleaseGPUTransferBuffer(renderdata->device, tbuf); | ||
| 308 | |||
| 309 | return true; | ||
| 310 | } | ||
| 311 | |||
| 312 | static bool GPU_LockTexture(SDL_Renderer *renderer, SDL_Texture *texture, | ||
| 313 | const SDL_Rect *rect, void **pixels, int *pitch) | ||
| 314 | { | ||
| 315 | GPU_TextureData *data = (GPU_TextureData *)texture->internal; | ||
| 316 | |||
| 317 | data->locked_rect = *rect; | ||
| 318 | *pixels = | ||
| 319 | (void *)((Uint8 *)data->pixels + rect->y * data->pitch + | ||
| 320 | rect->x * SDL_BYTESPERPIXEL(texture->format)); | ||
| 321 | *pitch = data->pitch; | ||
| 322 | return true; | ||
| 323 | } | ||
| 324 | |||
| 325 | static void GPU_UnlockTexture(SDL_Renderer *renderer, SDL_Texture *texture) | ||
| 326 | { | ||
| 327 | GPU_TextureData *data = (GPU_TextureData *)texture->internal; | ||
| 328 | const SDL_Rect *rect; | ||
| 329 | void *pixels; | ||
| 330 | |||
| 331 | rect = &data->locked_rect; | ||
| 332 | pixels = | ||
| 333 | (void *)((Uint8 *)data->pixels + rect->y * data->pitch + | ||
| 334 | rect->x * SDL_BYTESPERPIXEL(texture->format)); | ||
| 335 | GPU_UpdateTexture(renderer, texture, rect, pixels, data->pitch); | ||
| 336 | } | ||
| 337 | |||
| 338 | static void GPU_SetTextureScaleMode(SDL_Renderer *renderer, SDL_Texture *texture, SDL_ScaleMode scale_mode) | ||
| 339 | { | ||
| 340 | // nothing to do in this backend. | ||
| 341 | } | ||
| 342 | |||
| 343 | static bool GPU_SetRenderTarget(SDL_Renderer *renderer, SDL_Texture *texture) | ||
| 344 | { | ||
| 345 | GPU_RenderData *data = (GPU_RenderData *)renderer->internal; | ||
| 346 | |||
| 347 | data->state.render_target = texture; | ||
| 348 | |||
| 349 | return true; | ||
| 350 | } | ||
| 351 | |||
| 352 | static bool GPU_QueueNoOp(SDL_Renderer *renderer, SDL_RenderCommand *cmd) | ||
| 353 | { | ||
| 354 | return true; // nothing to do in this backend. | ||
| 355 | } | ||
| 356 | |||
| 357 | static SDL_FColor GetDrawCmdColor(SDL_Renderer *renderer, SDL_RenderCommand *cmd) | ||
| 358 | { | ||
| 359 | SDL_FColor color = cmd->data.color.color; | ||
| 360 | |||
| 361 | if (SDL_RenderingLinearSpace(renderer)) { | ||
| 362 | SDL_ConvertToLinear(&color); | ||
| 363 | } | ||
| 364 | |||
| 365 | color.r *= cmd->data.color.color_scale; | ||
| 366 | color.g *= cmd->data.color.color_scale; | ||
| 367 | color.b *= cmd->data.color.color_scale; | ||
| 368 | |||
| 369 | return color; | ||
| 370 | } | ||
| 371 | |||
| 372 | static bool GPU_QueueDrawPoints(SDL_Renderer *renderer, SDL_RenderCommand *cmd, const SDL_FPoint *points, int count) | ||
| 373 | { | ||
| 374 | float *verts = (float *)SDL_AllocateRenderVertices(renderer, count * 2 * sizeof(float), 0, &cmd->data.draw.first); | ||
| 375 | |||
| 376 | if (!verts) { | ||
| 377 | return false; | ||
| 378 | } | ||
| 379 | |||
| 380 | cmd->data.draw.count = count; | ||
| 381 | for (int i = 0; i < count; i++) { | ||
| 382 | *(verts++) = 0.5f + points[i].x; | ||
| 383 | *(verts++) = 0.5f + points[i].y; | ||
| 384 | } | ||
| 385 | |||
| 386 | return true; | ||
| 387 | } | ||
| 388 | |||
| 389 | static bool GPU_QueueGeometry(SDL_Renderer *renderer, SDL_RenderCommand *cmd, SDL_Texture *texture, | ||
| 390 | const float *xy, int xy_stride, const SDL_FColor *color, int color_stride, const float *uv, int uv_stride, | ||
| 391 | int num_vertices, const void *indices, int num_indices, int size_indices, | ||
| 392 | float scale_x, float scale_y) | ||
| 393 | { | ||
| 394 | int i; | ||
| 395 | int count = indices ? num_indices : num_vertices; | ||
| 396 | float *verts; | ||
| 397 | size_t sz = 2 * sizeof(float) + 4 * sizeof(float) + (texture ? 2 : 0) * sizeof(float); | ||
| 398 | const float color_scale = cmd->data.draw.color_scale; | ||
| 399 | bool convert_color = SDL_RenderingLinearSpace(renderer); | ||
| 400 | |||
| 401 | verts = (float *)SDL_AllocateRenderVertices(renderer, count * sz, 0, &cmd->data.draw.first); | ||
| 402 | if (!verts) { | ||
| 403 | return false; | ||
| 404 | } | ||
| 405 | |||
| 406 | cmd->data.draw.count = count; | ||
| 407 | size_indices = indices ? size_indices : 0; | ||
| 408 | |||
| 409 | for (i = 0; i < count; i++) { | ||
| 410 | int j; | ||
| 411 | float *xy_; | ||
| 412 | SDL_FColor col_; | ||
| 413 | if (size_indices == 4) { | ||
| 414 | j = ((const Uint32 *)indices)[i]; | ||
| 415 | } else if (size_indices == 2) { | ||
| 416 | j = ((const Uint16 *)indices)[i]; | ||
| 417 | } else if (size_indices == 1) { | ||
| 418 | j = ((const Uint8 *)indices)[i]; | ||
| 419 | } else { | ||
| 420 | j = i; | ||
| 421 | } | ||
| 422 | |||
| 423 | xy_ = (float *)((char *)xy + j * xy_stride); | ||
| 424 | |||
| 425 | *(verts++) = xy_[0] * scale_x; | ||
| 426 | *(verts++) = xy_[1] * scale_y; | ||
| 427 | |||
| 428 | col_ = *(SDL_FColor *)((char *)color + j * color_stride); | ||
| 429 | if (convert_color) { | ||
| 430 | SDL_ConvertToLinear(&col_); | ||
| 431 | } | ||
| 432 | |||
| 433 | // FIXME: The Vulkan backend doesn't multiply by color_scale. GL does. I'm not sure which one is wrong. | ||
| 434 | // ANSWER: The color scale should be applied in linear space when using the scRGB colorspace. This is done in shaders in the Vulkan backend. | ||
| 435 | *(verts++) = col_.r * color_scale; | ||
| 436 | *(verts++) = col_.g * color_scale; | ||
| 437 | *(verts++) = col_.b * color_scale; | ||
| 438 | *(verts++) = col_.a; | ||
| 439 | |||
| 440 | if (texture) { | ||
| 441 | float *uv_ = (float *)((char *)uv + j * uv_stride); | ||
| 442 | *(verts++) = uv_[0] * texture->w; | ||
| 443 | *(verts++) = uv_[1] * texture->h; | ||
| 444 | } | ||
| 445 | } | ||
| 446 | return true; | ||
| 447 | } | ||
| 448 | |||
| 449 | static void GPU_InvalidateCachedState(SDL_Renderer *renderer) | ||
| 450 | { | ||
| 451 | GPU_RenderData *data = (GPU_RenderData *)renderer->internal; | ||
| 452 | |||
| 453 | data->state.render_target = NULL; | ||
| 454 | data->state.scissor_enabled = false; | ||
| 455 | } | ||
| 456 | |||
| 457 | static SDL_GPURenderPass *RestartRenderPass(GPU_RenderData *data) | ||
| 458 | { | ||
| 459 | if (data->state.render_pass) { | ||
| 460 | SDL_EndGPURenderPass(data->state.render_pass); | ||
| 461 | } | ||
| 462 | |||
| 463 | data->state.render_pass = SDL_BeginGPURenderPass( | ||
| 464 | data->state.command_buffer, &data->state.color_attachment, 1, NULL); | ||
| 465 | |||
| 466 | // *** FIXME *** | ||
| 467 | // This is busted. We should be able to know which load op to use. | ||
| 468 | // LOAD is incorrect behavior most of the time, unless we had to break a render pass. | ||
| 469 | // -cosmonaut | ||
| 470 | data->state.color_attachment.load_op = SDL_GPU_LOADOP_LOAD; | ||
| 471 | data->state.scissor_was_enabled = false; | ||
| 472 | |||
| 473 | return data->state.render_pass; | ||
| 474 | } | ||
| 475 | |||
| 476 | static void PushUniforms(GPU_RenderData *data, SDL_RenderCommand *cmd) | ||
| 477 | { | ||
| 478 | GPU_ShaderUniformData uniforms; | ||
| 479 | SDL_zero(uniforms); | ||
| 480 | uniforms.mvp.m[0][0] = 2.0f / data->state.viewport.w; | ||
| 481 | uniforms.mvp.m[1][1] = -2.0f / data->state.viewport.h; | ||
| 482 | uniforms.mvp.m[2][2] = 1.0f; | ||
| 483 | uniforms.mvp.m[3][0] = -1.0f; | ||
| 484 | uniforms.mvp.m[3][1] = 1.0f; | ||
| 485 | uniforms.mvp.m[3][3] = 1.0f; | ||
| 486 | |||
| 487 | uniforms.color = data->state.draw_color; | ||
| 488 | |||
| 489 | if (cmd->data.draw.texture) { | ||
| 490 | uniforms.texture_size[0] = cmd->data.draw.texture->w; | ||
| 491 | uniforms.texture_size[1] = cmd->data.draw.texture->h; | ||
| 492 | } | ||
| 493 | |||
| 494 | SDL_PushGPUVertexUniformData(data->state.command_buffer, 0, &uniforms, sizeof(uniforms)); | ||
| 495 | } | ||
| 496 | |||
| 497 | static SDL_GPUSampler **SamplerPointer( | ||
| 498 | GPU_RenderData *data, SDL_TextureAddressMode address_mode, SDL_ScaleMode scale_mode) | ||
| 499 | { | ||
| 500 | return &data->samplers[scale_mode][address_mode - 1]; | ||
| 501 | } | ||
| 502 | |||
| 503 | static void SetViewportAndScissor(GPU_RenderData *data) | ||
| 504 | { | ||
| 505 | SDL_SetGPUViewport(data->state.render_pass, &data->state.viewport); | ||
| 506 | |||
| 507 | if (data->state.scissor_enabled) { | ||
| 508 | SDL_SetGPUScissor(data->state.render_pass, &data->state.scissor); | ||
| 509 | data->state.scissor_was_enabled = true; | ||
| 510 | } else if (data->state.scissor_was_enabled) { | ||
| 511 | SDL_Rect r; | ||
| 512 | r.x = (int)data->state.viewport.x; | ||
| 513 | r.y = (int)data->state.viewport.y; | ||
| 514 | r.w = (int)data->state.viewport.w; | ||
| 515 | r.h = (int)data->state.viewport.h; | ||
| 516 | SDL_SetGPUScissor(data->state.render_pass, &r); | ||
| 517 | data->state.scissor_was_enabled = false; | ||
| 518 | } | ||
| 519 | } | ||
| 520 | |||
| 521 | static void Draw( | ||
| 522 | GPU_RenderData *data, SDL_RenderCommand *cmd, | ||
| 523 | Uint32 num_verts, | ||
| 524 | Uint32 offset, | ||
| 525 | SDL_GPUPrimitiveType prim) | ||
| 526 | { | ||
| 527 | if (!data->state.render_pass || data->state.color_attachment.load_op == SDL_GPU_LOADOP_CLEAR) { | ||
| 528 | RestartRenderPass(data); | ||
| 529 | } | ||
| 530 | |||
| 531 | GPU_VertexShaderID v_shader; | ||
| 532 | GPU_FragmentShaderID f_shader; | ||
| 533 | SDL_GPURenderPass *pass = data->state.render_pass; | ||
| 534 | GPU_TextureData *tdata = NULL; | ||
| 535 | |||
| 536 | if (cmd->data.draw.texture) { | ||
| 537 | tdata = (GPU_TextureData *)cmd->data.draw.texture->internal; | ||
| 538 | } | ||
| 539 | |||
| 540 | if (prim == SDL_GPU_PRIMITIVETYPE_TRIANGLELIST) { | ||
| 541 | if (cmd->data.draw.texture) { | ||
| 542 | v_shader = VERT_SHADER_TRI_TEXTURE; | ||
| 543 | f_shader = tdata->shader; | ||
| 544 | } else { | ||
| 545 | v_shader = VERT_SHADER_TRI_COLOR; | ||
| 546 | f_shader = FRAG_SHADER_COLOR; | ||
| 547 | } | ||
| 548 | } else { | ||
| 549 | v_shader = VERT_SHADER_LINEPOINT; | ||
| 550 | f_shader = FRAG_SHADER_COLOR; | ||
| 551 | } | ||
| 552 | |||
| 553 | GPU_PipelineParameters pipe_params; | ||
| 554 | SDL_zero(pipe_params); | ||
| 555 | pipe_params.blend_mode = cmd->data.draw.blend; | ||
| 556 | pipe_params.vert_shader = v_shader; | ||
| 557 | pipe_params.frag_shader = f_shader; | ||
| 558 | pipe_params.primitive_type = prim; | ||
| 559 | |||
| 560 | if (data->state.render_target) { | ||
| 561 | pipe_params.attachment_format = ((GPU_TextureData *)data->state.render_target->internal)->format; | ||
| 562 | } else { | ||
| 563 | pipe_params.attachment_format = data->backbuffer.format; | ||
| 564 | } | ||
| 565 | |||
| 566 | SDL_GPUGraphicsPipeline *pipe = GPU_GetPipeline(&data->pipeline_cache, &data->shaders, data->device, &pipe_params); | ||
| 567 | |||
| 568 | if (!pipe) { | ||
| 569 | return; | ||
| 570 | } | ||
| 571 | |||
| 572 | SetViewportAndScissor(data); | ||
| 573 | SDL_BindGPUGraphicsPipeline(data->state.render_pass, pipe); | ||
| 574 | |||
| 575 | if (tdata) { | ||
| 576 | SDL_GPUTextureSamplerBinding sampler_bind; | ||
| 577 | SDL_zero(sampler_bind); | ||
| 578 | sampler_bind.sampler = *SamplerPointer(data, cmd->data.draw.texture_address_mode, cmd->data.draw.texture->scaleMode); | ||
| 579 | sampler_bind.texture = tdata->texture; | ||
| 580 | SDL_BindGPUFragmentSamplers(pass, 0, &sampler_bind, 1); | ||
| 581 | } | ||
| 582 | |||
| 583 | SDL_GPUBufferBinding buffer_bind; | ||
| 584 | SDL_zero(buffer_bind); | ||
| 585 | buffer_bind.buffer = data->vertices.buffer; | ||
| 586 | buffer_bind.offset = offset; | ||
| 587 | |||
| 588 | SDL_BindGPUVertexBuffers(pass, 0, &buffer_bind, 1); | ||
| 589 | PushUniforms(data, cmd); | ||
| 590 | SDL_DrawGPUPrimitives(data->state.render_pass, num_verts, 1, 0, 0); | ||
| 591 | } | ||
| 592 | |||
| 593 | static void ReleaseVertexBuffer(GPU_RenderData *data) | ||
| 594 | { | ||
| 595 | if (data->vertices.buffer) { | ||
| 596 | SDL_ReleaseGPUBuffer(data->device, data->vertices.buffer); | ||
| 597 | } | ||
| 598 | |||
| 599 | if (data->vertices.transfer_buf) { | ||
| 600 | SDL_ReleaseGPUTransferBuffer(data->device, data->vertices.transfer_buf); | ||
| 601 | } | ||
| 602 | |||
| 603 | data->vertices.buffer_size = 0; | ||
| 604 | } | ||
| 605 | |||
| 606 | static bool InitVertexBuffer(GPU_RenderData *data, Uint32 size) | ||
| 607 | { | ||
| 608 | SDL_GPUBufferCreateInfo bci; | ||
| 609 | SDL_zero(bci); | ||
| 610 | bci.size = size; | ||
| 611 | bci.usage = SDL_GPU_BUFFERUSAGE_VERTEX; | ||
| 612 | |||
| 613 | data->vertices.buffer = SDL_CreateGPUBuffer(data->device, &bci); | ||
| 614 | |||
| 615 | if (!data->vertices.buffer) { | ||
| 616 | return false; | ||
| 617 | } | ||
| 618 | |||
| 619 | SDL_GPUTransferBufferCreateInfo tbci; | ||
| 620 | SDL_zero(tbci); | ||
| 621 | tbci.size = size; | ||
| 622 | tbci.usage = SDL_GPU_TRANSFERBUFFERUSAGE_UPLOAD; | ||
| 623 | |||
| 624 | data->vertices.transfer_buf = SDL_CreateGPUTransferBuffer(data->device, &tbci); | ||
| 625 | |||
| 626 | if (!data->vertices.transfer_buf) { | ||
| 627 | return false; | ||
| 628 | } | ||
| 629 | |||
| 630 | return true; | ||
| 631 | } | ||
| 632 | |||
| 633 | static bool UploadVertices(GPU_RenderData *data, void *vertices, size_t vertsize) | ||
| 634 | { | ||
| 635 | if (vertsize == 0) { | ||
| 636 | return true; | ||
| 637 | } | ||
| 638 | |||
| 639 | if (vertsize > data->vertices.buffer_size) { | ||
| 640 | ReleaseVertexBuffer(data); | ||
| 641 | if (!InitVertexBuffer(data, (Uint32)vertsize)) { | ||
| 642 | return false; | ||
| 643 | } | ||
| 644 | } | ||
| 645 | |||
| 646 | void *staging_buf = SDL_MapGPUTransferBuffer(data->device, data->vertices.transfer_buf, true); | ||
| 647 | SDL_memcpy(staging_buf, vertices, vertsize); | ||
| 648 | SDL_UnmapGPUTransferBuffer(data->device, data->vertices.transfer_buf); | ||
| 649 | |||
| 650 | SDL_GPUCopyPass *pass = SDL_BeginGPUCopyPass(data->state.command_buffer); | ||
| 651 | |||
| 652 | if (!pass) { | ||
| 653 | return false; | ||
| 654 | } | ||
| 655 | |||
| 656 | SDL_GPUTransferBufferLocation src; | ||
| 657 | SDL_zero(src); | ||
| 658 | src.transfer_buffer = data->vertices.transfer_buf; | ||
| 659 | |||
| 660 | SDL_GPUBufferRegion dst; | ||
| 661 | SDL_zero(dst); | ||
| 662 | dst.buffer = data->vertices.buffer; | ||
| 663 | dst.size = (Uint32)vertsize; | ||
| 664 | |||
| 665 | SDL_UploadToGPUBuffer(pass, &src, &dst, true); | ||
| 666 | SDL_EndGPUCopyPass(pass); | ||
| 667 | |||
| 668 | return true; | ||
| 669 | } | ||
| 670 | |||
| 671 | // *** FIXME *** | ||
| 672 | // We might be able to run these data uploads on a separate command buffer | ||
| 673 | // which would allow us to avoid breaking render passes. | ||
| 674 | // Honestly I'm a little skeptical of this entire approach, | ||
| 675 | // we already have a command buffer structure | ||
| 676 | // so it feels weird to be deferring the operations manually. | ||
| 677 | // We could also fairly easily run the geometry transformations | ||
| 678 | // on compute shaders instead of the CPU, which would be a HUGE performance win. | ||
| 679 | // -cosmonaut | ||
| 680 | static bool GPU_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, void *vertices, size_t vertsize) | ||
| 681 | { | ||
| 682 | GPU_RenderData *data = (GPU_RenderData *)renderer->internal; | ||
| 683 | |||
| 684 | if (!UploadVertices(data, vertices, vertsize)) { | ||
| 685 | return false; | ||
| 686 | } | ||
| 687 | |||
| 688 | data->state.color_attachment.load_op = SDL_GPU_LOADOP_LOAD; | ||
| 689 | |||
| 690 | if (renderer->target) { | ||
| 691 | GPU_TextureData *tdata = renderer->target->internal; | ||
| 692 | data->state.color_attachment.texture = tdata->texture; | ||
| 693 | } else { | ||
| 694 | data->state.color_attachment.texture = data->backbuffer.texture; | ||
| 695 | } | ||
| 696 | |||
| 697 | if (!data->state.color_attachment.texture) { | ||
| 698 | return SDL_SetError("Render target texture is NULL"); | ||
| 699 | } | ||
| 700 | |||
| 701 | while (cmd) { | ||
| 702 | switch (cmd->command) { | ||
| 703 | case SDL_RENDERCMD_SETDRAWCOLOR: | ||
| 704 | { | ||
| 705 | data->state.draw_color = GetDrawCmdColor(renderer, cmd); | ||
| 706 | break; | ||
| 707 | } | ||
| 708 | |||
| 709 | case SDL_RENDERCMD_SETVIEWPORT: | ||
| 710 | { | ||
| 711 | SDL_Rect *viewport = &cmd->data.viewport.rect; | ||
| 712 | data->state.viewport.x = viewport->x; | ||
| 713 | data->state.viewport.y = viewport->y; | ||
| 714 | data->state.viewport.w = viewport->w; | ||
| 715 | data->state.viewport.h = viewport->h; | ||
| 716 | break; | ||
| 717 | } | ||
| 718 | |||
| 719 | case SDL_RENDERCMD_SETCLIPRECT: | ||
| 720 | { | ||
| 721 | const SDL_Rect *rect = &cmd->data.cliprect.rect; | ||
| 722 | data->state.scissor.x = (int)data->state.viewport.x + rect->x; | ||
| 723 | data->state.scissor.y = (int)data->state.viewport.y + rect->y; | ||
| 724 | data->state.scissor.w = rect->w; | ||
| 725 | data->state.scissor.h = rect->h; | ||
| 726 | data->state.scissor_enabled = cmd->data.cliprect.enabled; | ||
| 727 | break; | ||
| 728 | } | ||
| 729 | |||
| 730 | case SDL_RENDERCMD_CLEAR: | ||
| 731 | { | ||
| 732 | data->state.color_attachment.clear_color = GetDrawCmdColor(renderer, cmd); | ||
| 733 | data->state.color_attachment.load_op = SDL_GPU_LOADOP_CLEAR; | ||
| 734 | break; | ||
| 735 | } | ||
| 736 | |||
| 737 | case SDL_RENDERCMD_FILL_RECTS: // unused | ||
| 738 | break; | ||
| 739 | |||
| 740 | case SDL_RENDERCMD_COPY: // unused | ||
| 741 | break; | ||
| 742 | |||
| 743 | case SDL_RENDERCMD_COPY_EX: // unused | ||
| 744 | break; | ||
| 745 | |||
| 746 | case SDL_RENDERCMD_DRAW_LINES: | ||
| 747 | { | ||
| 748 | Uint32 count = (Uint32)cmd->data.draw.count; | ||
| 749 | Uint32 offset = (Uint32)cmd->data.draw.first; | ||
| 750 | |||
| 751 | if (count > 2) { | ||
| 752 | // joined lines cannot be grouped | ||
| 753 | Draw(data, cmd, count, offset, SDL_GPU_PRIMITIVETYPE_LINESTRIP); | ||
| 754 | } else { | ||
| 755 | // let's group non joined lines | ||
| 756 | SDL_RenderCommand *finalcmd = cmd; | ||
| 757 | SDL_RenderCommand *nextcmd = cmd->next; | ||
| 758 | SDL_BlendMode thisblend = cmd->data.draw.blend; | ||
| 759 | |||
| 760 | while (nextcmd) { | ||
| 761 | const SDL_RenderCommandType nextcmdtype = nextcmd->command; | ||
| 762 | if (nextcmdtype != SDL_RENDERCMD_DRAW_LINES) { | ||
| 763 | break; // can't go any further on this draw call, different render command up next. | ||
| 764 | } else if (nextcmd->data.draw.count != 2) { | ||
| 765 | break; // can't go any further on this draw call, those are joined lines | ||
| 766 | } else if (nextcmd->data.draw.blend != thisblend) { | ||
| 767 | break; // can't go any further on this draw call, different blendmode copy up next. | ||
| 768 | } else { | ||
| 769 | finalcmd = nextcmd; // we can combine copy operations here. Mark this one as the furthest okay command. | ||
| 770 | count += (Uint32)nextcmd->data.draw.count; | ||
| 771 | } | ||
| 772 | nextcmd = nextcmd->next; | ||
| 773 | } | ||
| 774 | |||
| 775 | Draw(data, cmd, count, offset, SDL_GPU_PRIMITIVETYPE_LINELIST); | ||
| 776 | cmd = finalcmd; // skip any copy commands we just combined in here. | ||
| 777 | } | ||
| 778 | break; | ||
| 779 | } | ||
| 780 | |||
| 781 | case SDL_RENDERCMD_DRAW_POINTS: | ||
| 782 | case SDL_RENDERCMD_GEOMETRY: | ||
| 783 | { | ||
| 784 | /* as long as we have the same copy command in a row, with the | ||
| 785 | same texture, we can combine them all into a single draw call. */ | ||
| 786 | SDL_Texture *thistexture = cmd->data.draw.texture; | ||
| 787 | SDL_BlendMode thisblend = cmd->data.draw.blend; | ||
| 788 | const SDL_RenderCommandType thiscmdtype = cmd->command; | ||
| 789 | SDL_RenderCommand *finalcmd = cmd; | ||
| 790 | SDL_RenderCommand *nextcmd = cmd->next; | ||
| 791 | Uint32 count = (Uint32)cmd->data.draw.count; | ||
| 792 | Uint32 offset = (Uint32)cmd->data.draw.first; | ||
| 793 | |||
| 794 | while (nextcmd) { | ||
| 795 | const SDL_RenderCommandType nextcmdtype = nextcmd->command; | ||
| 796 | if (nextcmdtype != thiscmdtype) { | ||
| 797 | break; // can't go any further on this draw call, different render command up next. | ||
| 798 | } else if (nextcmd->data.draw.texture != thistexture || nextcmd->data.draw.blend != thisblend) { | ||
| 799 | // FIXME should we check address mode too? | ||
| 800 | break; // can't go any further on this draw call, different texture/blendmode copy up next. | ||
| 801 | } else { | ||
| 802 | finalcmd = nextcmd; // we can combine copy operations here. Mark this one as the furthest okay command. | ||
| 803 | count += (Uint32)nextcmd->data.draw.count; | ||
| 804 | } | ||
| 805 | nextcmd = nextcmd->next; | ||
| 806 | } | ||
| 807 | |||
| 808 | SDL_GPUPrimitiveType prim = SDL_GPU_PRIMITIVETYPE_TRIANGLELIST; // SDL_RENDERCMD_GEOMETRY | ||
| 809 | if (thiscmdtype == SDL_RENDERCMD_DRAW_POINTS) { | ||
| 810 | prim = SDL_GPU_PRIMITIVETYPE_POINTLIST; | ||
| 811 | } | ||
| 812 | |||
| 813 | Draw(data, cmd, count, offset, prim); | ||
| 814 | |||
| 815 | cmd = finalcmd; // skip any copy commands we just combined in here. | ||
| 816 | break; | ||
| 817 | } | ||
| 818 | |||
| 819 | case SDL_RENDERCMD_NO_OP: | ||
| 820 | break; | ||
| 821 | } | ||
| 822 | |||
| 823 | cmd = cmd->next; | ||
| 824 | } | ||
| 825 | |||
| 826 | if (data->state.color_attachment.load_op == SDL_GPU_LOADOP_CLEAR) { | ||
| 827 | RestartRenderPass(data); | ||
| 828 | } | ||
| 829 | |||
| 830 | if (data->state.render_pass) { | ||
| 831 | SDL_EndGPURenderPass(data->state.render_pass); | ||
| 832 | data->state.render_pass = NULL; | ||
| 833 | } | ||
| 834 | |||
| 835 | return true; | ||
| 836 | } | ||
| 837 | |||
| 838 | static SDL_Surface *GPU_RenderReadPixels(SDL_Renderer *renderer, const SDL_Rect *rect) | ||
| 839 | { | ||
| 840 | GPU_RenderData *data = (GPU_RenderData *)renderer->internal; | ||
| 841 | SDL_GPUTexture *gpu_tex; | ||
| 842 | SDL_PixelFormat pixfmt; | ||
| 843 | |||
| 844 | if (data->state.render_target) { | ||
| 845 | SDL_Texture *texture = data->state.render_target; | ||
| 846 | GPU_TextureData *texdata = texture->internal; | ||
| 847 | gpu_tex = texdata->texture; | ||
| 848 | pixfmt = texture->format; | ||
| 849 | } else { | ||
| 850 | gpu_tex = data->backbuffer.texture; | ||
| 851 | pixfmt = TexFormatToPixFormat(data->backbuffer.format); | ||
| 852 | |||
| 853 | if (pixfmt == SDL_PIXELFORMAT_UNKNOWN) { | ||
| 854 | SDL_SetError("Unsupported backbuffer format"); | ||
| 855 | return NULL; | ||
| 856 | } | ||
| 857 | } | ||
| 858 | |||
| 859 | Uint32 bpp = SDL_BYTESPERPIXEL(pixfmt); | ||
| 860 | size_t row_size, image_size; | ||
| 861 | |||
| 862 | if (!SDL_size_mul_check_overflow(rect->w, bpp, &row_size) || | ||
| 863 | !SDL_size_mul_check_overflow(rect->h, row_size, &image_size)) { | ||
| 864 | SDL_SetError("read size overflow"); | ||
| 865 | return NULL; | ||
| 866 | } | ||
| 867 | |||
| 868 | SDL_Surface *surface = SDL_CreateSurface(rect->w, rect->h, pixfmt); | ||
| 869 | |||
| 870 | if (!surface) { | ||
| 871 | return NULL; | ||
| 872 | } | ||
| 873 | |||
| 874 | SDL_GPUTransferBufferCreateInfo tbci; | ||
| 875 | SDL_zero(tbci); | ||
| 876 | tbci.size = (Uint32)image_size; | ||
| 877 | tbci.usage = SDL_GPU_TRANSFERBUFFERUSAGE_DOWNLOAD; | ||
| 878 | |||
| 879 | SDL_GPUTransferBuffer *tbuf = SDL_CreateGPUTransferBuffer(data->device, &tbci); | ||
| 880 | |||
| 881 | if (!tbuf) { | ||
| 882 | return NULL; | ||
| 883 | } | ||
| 884 | |||
| 885 | SDL_GPUCopyPass *pass = SDL_BeginGPUCopyPass(data->state.command_buffer); | ||
| 886 | |||
| 887 | SDL_GPUTextureRegion src; | ||
| 888 | SDL_zero(src); | ||
| 889 | src.texture = gpu_tex; | ||
| 890 | src.x = rect->x; | ||
| 891 | src.y = rect->y; | ||
| 892 | src.w = rect->w; | ||
| 893 | src.h = rect->h; | ||
| 894 | src.d = 1; | ||
| 895 | |||
| 896 | SDL_GPUTextureTransferInfo dst; | ||
| 897 | SDL_zero(dst); | ||
| 898 | dst.transfer_buffer = tbuf; | ||
| 899 | dst.rows_per_layer = rect->h; | ||
| 900 | dst.pixels_per_row = rect->w; | ||
| 901 | |||
| 902 | SDL_DownloadFromGPUTexture(pass, &src, &dst); | ||
| 903 | SDL_EndGPUCopyPass(pass); | ||
| 904 | |||
| 905 | SDL_GPUFence *fence = SDL_SubmitGPUCommandBufferAndAcquireFence(data->state.command_buffer); | ||
| 906 | SDL_WaitForGPUFences(data->device, true, &fence, 1); | ||
| 907 | SDL_ReleaseGPUFence(data->device, fence); | ||
| 908 | data->state.command_buffer = SDL_AcquireGPUCommandBuffer(data->device); | ||
| 909 | |||
| 910 | void *mapped_tbuf = SDL_MapGPUTransferBuffer(data->device, tbuf, false); | ||
| 911 | |||
| 912 | if ((size_t)surface->pitch == row_size) { | ||
| 913 | SDL_memcpy(surface->pixels, mapped_tbuf, image_size); | ||
| 914 | } else { | ||
| 915 | Uint8 *input = mapped_tbuf; | ||
| 916 | Uint8 *output = surface->pixels; | ||
| 917 | |||
| 918 | for (int row = 0; row < rect->h; ++row) { | ||
| 919 | SDL_memcpy(output, input, row_size); | ||
| 920 | output += surface->pitch; | ||
| 921 | input += row_size; | ||
| 922 | } | ||
| 923 | } | ||
| 924 | |||
| 925 | SDL_UnmapGPUTransferBuffer(data->device, tbuf); | ||
| 926 | SDL_ReleaseGPUTransferBuffer(data->device, tbuf); | ||
| 927 | |||
| 928 | return surface; | ||
| 929 | } | ||
| 930 | |||
| 931 | static bool CreateBackbuffer(GPU_RenderData *data, Uint32 w, Uint32 h, SDL_GPUTextureFormat fmt) | ||
| 932 | { | ||
| 933 | SDL_GPUTextureCreateInfo tci; | ||
| 934 | SDL_zero(tci); | ||
| 935 | tci.width = w; | ||
| 936 | tci.height = h; | ||
| 937 | tci.format = fmt; | ||
| 938 | tci.layer_count_or_depth = 1; | ||
| 939 | tci.num_levels = 1; | ||
| 940 | tci.sample_count = SDL_GPU_SAMPLECOUNT_1; | ||
| 941 | tci.usage = SDL_GPU_TEXTUREUSAGE_COLOR_TARGET | SDL_GPU_TEXTUREUSAGE_SAMPLER; | ||
| 942 | |||
| 943 | data->backbuffer.texture = SDL_CreateGPUTexture(data->device, &tci); | ||
| 944 | data->backbuffer.width = w; | ||
| 945 | data->backbuffer.height = h; | ||
| 946 | data->backbuffer.format = fmt; | ||
| 947 | |||
| 948 | if (!data->backbuffer.texture) { | ||
| 949 | return false; | ||
| 950 | } | ||
| 951 | |||
| 952 | return true; | ||
| 953 | } | ||
| 954 | |||
| 955 | static bool GPU_RenderPresent(SDL_Renderer *renderer) | ||
| 956 | { | ||
| 957 | GPU_RenderData *data = (GPU_RenderData *)renderer->internal; | ||
| 958 | |||
| 959 | SDL_GPUTexture *swapchain; | ||
| 960 | Uint32 swapchain_texture_width, swapchain_texture_height; | ||
| 961 | bool result = SDL_WaitAndAcquireGPUSwapchainTexture(data->state.command_buffer, renderer->window, &swapchain, &swapchain_texture_width, &swapchain_texture_height); | ||
| 962 | |||
| 963 | if (!result) { | ||
| 964 | SDL_LogError(SDL_LOG_CATEGORY_RENDER, "Failed to acquire swapchain texture: %s", SDL_GetError()); | ||
| 965 | } | ||
| 966 | |||
| 967 | if (swapchain != NULL) { | ||
| 968 | SDL_GPUBlitInfo blit_info; | ||
| 969 | SDL_zero(blit_info); | ||
| 970 | |||
| 971 | blit_info.source.texture = data->backbuffer.texture; | ||
| 972 | blit_info.source.w = data->backbuffer.width; | ||
| 973 | blit_info.source.h = data->backbuffer.height; | ||
| 974 | blit_info.destination.texture = swapchain; | ||
| 975 | blit_info.destination.w = swapchain_texture_width; | ||
| 976 | blit_info.destination.h = swapchain_texture_height; | ||
| 977 | blit_info.load_op = SDL_GPU_LOADOP_DONT_CARE; | ||
| 978 | blit_info.filter = SDL_GPU_FILTER_LINEAR; | ||
| 979 | |||
| 980 | SDL_BlitGPUTexture(data->state.command_buffer, &blit_info); | ||
| 981 | |||
| 982 | SDL_SubmitGPUCommandBuffer(data->state.command_buffer); | ||
| 983 | |||
| 984 | if (swapchain_texture_width != data->backbuffer.width || swapchain_texture_height != data->backbuffer.height) { | ||
| 985 | SDL_ReleaseGPUTexture(data->device, data->backbuffer.texture); | ||
| 986 | CreateBackbuffer(data, swapchain_texture_width, swapchain_texture_height, SDL_GetGPUSwapchainTextureFormat(data->device, renderer->window)); | ||
| 987 | } | ||
| 988 | } else { | ||
| 989 | SDL_SubmitGPUCommandBuffer(data->state.command_buffer); | ||
| 990 | } | ||
| 991 | |||
| 992 | data->state.command_buffer = SDL_AcquireGPUCommandBuffer(data->device); | ||
| 993 | |||
| 994 | return true; | ||
| 995 | } | ||
| 996 | |||
| 997 | static void GPU_DestroyTexture(SDL_Renderer *renderer, SDL_Texture *texture) | ||
| 998 | { | ||
| 999 | GPU_RenderData *renderdata = (GPU_RenderData *)renderer->internal; | ||
| 1000 | GPU_TextureData *data = (GPU_TextureData *)texture->internal; | ||
| 1001 | |||
| 1002 | if (renderdata->state.render_target == texture) { | ||
| 1003 | renderdata->state.render_target = NULL; | ||
| 1004 | } | ||
| 1005 | |||
| 1006 | if (!data) { | ||
| 1007 | return; | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | SDL_ReleaseGPUTexture(renderdata->device, data->texture); | ||
| 1011 | SDL_free(data->pixels); | ||
| 1012 | SDL_free(data); | ||
| 1013 | texture->internal = NULL; | ||
| 1014 | } | ||
| 1015 | |||
| 1016 | static void GPU_DestroyRenderer(SDL_Renderer *renderer) | ||
| 1017 | { | ||
| 1018 | GPU_RenderData *data = (GPU_RenderData *)renderer->internal; | ||
| 1019 | |||
| 1020 | if (!data) { | ||
| 1021 | return; | ||
| 1022 | } | ||
| 1023 | |||
| 1024 | if (data->state.command_buffer) { | ||
| 1025 | SDL_SubmitGPUCommandBuffer(data->state.command_buffer); | ||
| 1026 | data->state.command_buffer = NULL; | ||
| 1027 | } | ||
| 1028 | |||
| 1029 | for (Uint32 i = 0; i < sizeof(data->samplers) / sizeof(SDL_GPUSampler *); ++i) { | ||
| 1030 | SDL_ReleaseGPUSampler(data->device, ((SDL_GPUSampler **)data->samplers)[i]); | ||
| 1031 | } | ||
| 1032 | |||
| 1033 | if (data->backbuffer.texture) { | ||
| 1034 | SDL_ReleaseGPUTexture(data->device, data->backbuffer.texture); | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | if (renderer->window) { | ||
| 1038 | SDL_ReleaseWindowFromGPUDevice(data->device, renderer->window); | ||
| 1039 | } | ||
| 1040 | |||
| 1041 | ReleaseVertexBuffer(data); | ||
| 1042 | GPU_DestroyPipelineCache(&data->pipeline_cache); | ||
| 1043 | GPU_ReleaseShaders(&data->shaders, data->device); | ||
| 1044 | SDL_DestroyGPUDevice(data->device); | ||
| 1045 | |||
| 1046 | SDL_free(data); | ||
| 1047 | } | ||
| 1048 | |||
| 1049 | static bool ChoosePresentMode(SDL_GPUDevice *device, SDL_Window *window, const int vsync, SDL_GPUPresentMode *out_mode) | ||
| 1050 | { | ||
| 1051 | SDL_GPUPresentMode mode; | ||
| 1052 | |||
| 1053 | switch (vsync) { | ||
| 1054 | case 0: | ||
| 1055 | mode = SDL_GPU_PRESENTMODE_MAILBOX; | ||
| 1056 | |||
| 1057 | if (!SDL_WindowSupportsGPUPresentMode(device, window, mode)) { | ||
| 1058 | mode = SDL_GPU_PRESENTMODE_IMMEDIATE; | ||
| 1059 | |||
| 1060 | if (!SDL_WindowSupportsGPUPresentMode(device, window, mode)) { | ||
| 1061 | mode = SDL_GPU_PRESENTMODE_VSYNC; | ||
| 1062 | } | ||
| 1063 | } | ||
| 1064 | |||
| 1065 | // FIXME should we return an error if both mailbox and immediate fail? | ||
| 1066 | break; | ||
| 1067 | |||
| 1068 | case 1: | ||
| 1069 | mode = SDL_GPU_PRESENTMODE_VSYNC; | ||
| 1070 | break; | ||
| 1071 | |||
| 1072 | default: | ||
| 1073 | return SDL_Unsupported(); | ||
| 1074 | } | ||
| 1075 | |||
| 1076 | *out_mode = mode; | ||
| 1077 | return true; | ||
| 1078 | } | ||
| 1079 | |||
| 1080 | static bool GPU_SetVSync(SDL_Renderer *renderer, const int vsync) | ||
| 1081 | { | ||
| 1082 | GPU_RenderData *data = (GPU_RenderData *)renderer->internal; | ||
| 1083 | SDL_GPUPresentMode mode = SDL_GPU_PRESENTMODE_VSYNC; | ||
| 1084 | |||
| 1085 | if (!ChoosePresentMode(data->device, renderer->window, vsync, &mode)) { | ||
| 1086 | return false; | ||
| 1087 | } | ||
| 1088 | |||
| 1089 | if (mode != data->swapchain.present_mode) { | ||
| 1090 | // XXX returns bool instead of SDL-style error code | ||
| 1091 | if (SDL_SetGPUSwapchainParameters(data->device, renderer->window, data->swapchain.composition, mode)) { | ||
| 1092 | data->swapchain.present_mode = mode; | ||
| 1093 | return true; | ||
| 1094 | } else { | ||
| 1095 | return false; | ||
| 1096 | } | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | return true; | ||
| 1100 | } | ||
| 1101 | |||
| 1102 | static bool InitSamplers(GPU_RenderData *data) | ||
| 1103 | { | ||
| 1104 | struct | ||
| 1105 | { | ||
| 1106 | struct | ||
| 1107 | { | ||
| 1108 | SDL_TextureAddressMode address_mode; | ||
| 1109 | SDL_ScaleMode scale_mode; | ||
| 1110 | } sdl; | ||
| 1111 | struct | ||
| 1112 | { | ||
| 1113 | SDL_GPUSamplerAddressMode address_mode; | ||
| 1114 | SDL_GPUFilter filter; | ||
| 1115 | SDL_GPUSamplerMipmapMode mipmap_mode; | ||
| 1116 | Uint32 anisotropy; | ||
| 1117 | } gpu; | ||
| 1118 | } configs[] = { | ||
| 1119 | { | ||
| 1120 | { SDL_TEXTURE_ADDRESS_CLAMP, SDL_SCALEMODE_NEAREST }, | ||
| 1121 | { SDL_GPU_SAMPLERADDRESSMODE_REPEAT, SDL_GPU_FILTER_NEAREST, SDL_GPU_SAMPLERMIPMAPMODE_NEAREST, 0 }, | ||
| 1122 | }, | ||
| 1123 | { | ||
| 1124 | { SDL_TEXTURE_ADDRESS_CLAMP, SDL_SCALEMODE_LINEAR }, | ||
| 1125 | { SDL_GPU_SAMPLERADDRESSMODE_REPEAT, SDL_GPU_FILTER_LINEAR, SDL_GPU_SAMPLERMIPMAPMODE_LINEAR, 0 }, | ||
| 1126 | }, | ||
| 1127 | { | ||
| 1128 | { SDL_TEXTURE_ADDRESS_WRAP, SDL_SCALEMODE_NEAREST }, | ||
| 1129 | { SDL_GPU_SAMPLERADDRESSMODE_REPEAT, SDL_GPU_FILTER_NEAREST, SDL_GPU_SAMPLERMIPMAPMODE_NEAREST, 0 }, | ||
| 1130 | }, | ||
| 1131 | { | ||
| 1132 | { SDL_TEXTURE_ADDRESS_WRAP, SDL_SCALEMODE_LINEAR }, | ||
| 1133 | { SDL_GPU_SAMPLERADDRESSMODE_REPEAT, SDL_GPU_FILTER_LINEAR, SDL_GPU_SAMPLERMIPMAPMODE_LINEAR, 0 }, | ||
| 1134 | }, | ||
| 1135 | }; | ||
| 1136 | |||
| 1137 | for (Uint32 i = 0; i < SDL_arraysize(configs); ++i) { | ||
| 1138 | SDL_GPUSamplerCreateInfo sci; | ||
| 1139 | SDL_zero(sci); | ||
| 1140 | sci.max_anisotropy = configs[i].gpu.anisotropy; | ||
| 1141 | sci.enable_anisotropy = configs[i].gpu.anisotropy > 0; | ||
| 1142 | sci.address_mode_u = sci.address_mode_v = sci.address_mode_w = configs[i].gpu.address_mode; | ||
| 1143 | sci.min_filter = sci.mag_filter = configs[i].gpu.filter; | ||
| 1144 | sci.mipmap_mode = configs[i].gpu.mipmap_mode; | ||
| 1145 | |||
| 1146 | SDL_GPUSampler *sampler = SDL_CreateGPUSampler(data->device, &sci); | ||
| 1147 | |||
| 1148 | if (sampler == NULL) { | ||
| 1149 | return false; | ||
| 1150 | } | ||
| 1151 | |||
| 1152 | *SamplerPointer(data, configs[i].sdl.address_mode, configs[i].sdl.scale_mode) = sampler; | ||
| 1153 | } | ||
| 1154 | |||
| 1155 | return true; | ||
| 1156 | } | ||
| 1157 | |||
| 1158 | static bool GPU_CreateRenderer(SDL_Renderer *renderer, SDL_Window *window, SDL_PropertiesID create_props) | ||
| 1159 | { | ||
| 1160 | GPU_RenderData *data = NULL; | ||
| 1161 | |||
| 1162 | SDL_SetupRendererColorspace(renderer, create_props); | ||
| 1163 | |||
| 1164 | if (renderer->output_colorspace != SDL_COLORSPACE_SRGB) { | ||
| 1165 | // TODO | ||
| 1166 | return SDL_SetError("Unsupported output colorspace"); | ||
| 1167 | } | ||
| 1168 | |||
| 1169 | data = (GPU_RenderData *)SDL_calloc(1, sizeof(*data)); | ||
| 1170 | if (!data) { | ||
| 1171 | return false; | ||
| 1172 | } | ||
| 1173 | |||
| 1174 | renderer->SupportsBlendMode = GPU_SupportsBlendMode; | ||
| 1175 | renderer->CreateTexture = GPU_CreateTexture; | ||
| 1176 | renderer->UpdateTexture = GPU_UpdateTexture; | ||
| 1177 | renderer->LockTexture = GPU_LockTexture; | ||
| 1178 | renderer->UnlockTexture = GPU_UnlockTexture; | ||
| 1179 | renderer->SetTextureScaleMode = GPU_SetTextureScaleMode; | ||
| 1180 | renderer->SetRenderTarget = GPU_SetRenderTarget; | ||
| 1181 | renderer->QueueSetViewport = GPU_QueueNoOp; | ||
| 1182 | renderer->QueueSetDrawColor = GPU_QueueNoOp; | ||
| 1183 | renderer->QueueDrawPoints = GPU_QueueDrawPoints; | ||
| 1184 | renderer->QueueDrawLines = GPU_QueueDrawPoints; // lines and points queue vertices the same way. | ||
| 1185 | renderer->QueueGeometry = GPU_QueueGeometry; | ||
| 1186 | renderer->InvalidateCachedState = GPU_InvalidateCachedState; | ||
| 1187 | renderer->RunCommandQueue = GPU_RunCommandQueue; | ||
| 1188 | renderer->RenderReadPixels = GPU_RenderReadPixels; | ||
| 1189 | renderer->RenderPresent = GPU_RenderPresent; | ||
| 1190 | renderer->DestroyTexture = GPU_DestroyTexture; | ||
| 1191 | renderer->DestroyRenderer = GPU_DestroyRenderer; | ||
| 1192 | renderer->SetVSync = GPU_SetVSync; | ||
| 1193 | renderer->internal = data; | ||
| 1194 | renderer->window = window; | ||
| 1195 | renderer->name = GPU_RenderDriver.name; | ||
| 1196 | |||
| 1197 | bool debug = SDL_GetBooleanProperty(create_props, SDL_PROP_GPU_DEVICE_CREATE_DEBUGMODE_BOOLEAN, false); | ||
| 1198 | bool lowpower = SDL_GetBooleanProperty(create_props, SDL_PROP_GPU_DEVICE_CREATE_PREFERLOWPOWER_BOOLEAN, false); | ||
| 1199 | |||
| 1200 | // Prefer environment variables/hints if they exist, otherwise defer to properties | ||
| 1201 | debug = SDL_GetHintBoolean(SDL_HINT_RENDER_GPU_DEBUG, debug); | ||
| 1202 | lowpower = SDL_GetHintBoolean(SDL_HINT_RENDER_GPU_LOW_POWER, lowpower); | ||
| 1203 | |||
| 1204 | SDL_SetBooleanProperty(create_props, SDL_PROP_GPU_DEVICE_CREATE_DEBUGMODE_BOOLEAN, debug); | ||
| 1205 | SDL_SetBooleanProperty(create_props, SDL_PROP_GPU_DEVICE_CREATE_PREFERLOWPOWER_BOOLEAN, lowpower); | ||
| 1206 | |||
| 1207 | GPU_FillSupportedShaderFormats(create_props); | ||
| 1208 | data->device = SDL_CreateGPUDeviceWithProperties(create_props); | ||
| 1209 | |||
| 1210 | if (!data->device) { | ||
| 1211 | return false; | ||
| 1212 | } | ||
| 1213 | |||
| 1214 | if (!GPU_InitShaders(&data->shaders, data->device)) { | ||
| 1215 | return false; | ||
| 1216 | } | ||
| 1217 | |||
| 1218 | if (!GPU_InitPipelineCache(&data->pipeline_cache, data->device)) { | ||
| 1219 | return false; | ||
| 1220 | } | ||
| 1221 | |||
| 1222 | // XXX what's a good initial size? | ||
| 1223 | if (!InitVertexBuffer(data, 1 << 16)) { | ||
| 1224 | return false; | ||
| 1225 | } | ||
| 1226 | |||
| 1227 | if (!InitSamplers(data)) { | ||
| 1228 | return false; | ||
| 1229 | } | ||
| 1230 | |||
| 1231 | if (!SDL_ClaimWindowForGPUDevice(data->device, window)) { | ||
| 1232 | return false; | ||
| 1233 | } | ||
| 1234 | |||
| 1235 | data->swapchain.composition = SDL_GPU_SWAPCHAINCOMPOSITION_SDR; | ||
| 1236 | data->swapchain.present_mode = SDL_GPU_PRESENTMODE_VSYNC; | ||
| 1237 | |||
| 1238 | int vsync = (int)SDL_GetNumberProperty(create_props, SDL_PROP_RENDERER_CREATE_PRESENT_VSYNC_NUMBER, 0); | ||
| 1239 | ChoosePresentMode(data->device, window, vsync, &data->swapchain.present_mode); | ||
| 1240 | |||
| 1241 | SDL_SetGPUSwapchainParameters(data->device, window, data->swapchain.composition, data->swapchain.present_mode); | ||
| 1242 | |||
| 1243 | SDL_SetGPUAllowedFramesInFlight(data->device, 1); | ||
| 1244 | |||
| 1245 | SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_BGRA32); | ||
| 1246 | SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_RGBA32); | ||
| 1247 | SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_BGRX32); | ||
| 1248 | SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_RGBX32); | ||
| 1249 | |||
| 1250 | SDL_SetNumberProperty(SDL_GetRendererProperties(renderer), SDL_PROP_RENDERER_MAX_TEXTURE_SIZE_NUMBER, 16384); | ||
| 1251 | |||
| 1252 | data->state.draw_color.r = 1.0f; | ||
| 1253 | data->state.draw_color.g = 1.0f; | ||
| 1254 | data->state.draw_color.b = 1.0f; | ||
| 1255 | data->state.draw_color.a = 1.0f; | ||
| 1256 | data->state.viewport.min_depth = 0; | ||
| 1257 | data->state.viewport.max_depth = 1; | ||
| 1258 | data->state.command_buffer = SDL_AcquireGPUCommandBuffer(data->device); | ||
| 1259 | |||
| 1260 | int w, h; | ||
| 1261 | SDL_GetWindowSizeInPixels(window, &w, &h); | ||
| 1262 | |||
| 1263 | if (!CreateBackbuffer(data, w, h, SDL_GetGPUSwapchainTextureFormat(data->device, window))) { | ||
| 1264 | return false; | ||
| 1265 | } | ||
| 1266 | |||
| 1267 | SDL_SetPointerProperty(SDL_GetRendererProperties(renderer), SDL_PROP_RENDERER_GPU_DEVICE_POINTER, data->device); | ||
| 1268 | |||
| 1269 | return true; | ||
| 1270 | } | ||
| 1271 | |||
| 1272 | SDL_RenderDriver GPU_RenderDriver = { | ||
| 1273 | GPU_CreateRenderer, "gpu" | ||
| 1274 | }; | ||
| 1275 | |||
| 1276 | #endif // SDL_VIDEO_RENDER_GPU | ||
