From 5a079a2d114f96d4847d1ee305d5b7c16eeec50e Mon Sep 17 00:00:00 2001 From: 3gg <3gg@shellblade.net> Date: Sat, 27 Dec 2025 12:03:39 -0800 Subject: Initial commit --- contrib/SDL-3.2.8/src/gpu/metal/SDL_gpu_metal.m | 4580 +++++++++++++++++++++++ 1 file changed, 4580 insertions(+) create mode 100644 contrib/SDL-3.2.8/src/gpu/metal/SDL_gpu_metal.m (limited to 'contrib/SDL-3.2.8/src/gpu/metal/SDL_gpu_metal.m') diff --git a/contrib/SDL-3.2.8/src/gpu/metal/SDL_gpu_metal.m b/contrib/SDL-3.2.8/src/gpu/metal/SDL_gpu_metal.m new file mode 100644 index 0000000..365b344 --- /dev/null +++ b/contrib/SDL-3.2.8/src/gpu/metal/SDL_gpu_metal.m @@ -0,0 +1,4580 @@ +/* + Simple DirectMedia Layer + Copyright (C) 1997-2025 Sam Lantinga + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. +*/ + +#include "SDL_internal.h" + +#ifdef SDL_GPU_METAL + +#include +#include + +#include "../SDL_sysgpu.h" + +// Defines + +#define METAL_FIRST_VERTEX_BUFFER_SLOT 14 +#define WINDOW_PROPERTY_DATA "SDL_GPUMetalWindowPropertyData" +#define SDL_GPU_SHADERSTAGE_COMPUTE 2 + +#define TRACK_RESOURCE(resource, type, array, count, capacity) \ + do { \ + Uint32 i; \ + \ + for (i = 0; i < commandBuffer->count; i += 1) { \ + if (commandBuffer->array[i] == (resource)) { \ + return; \ + } \ + } \ + \ + if (commandBuffer->count == commandBuffer->capacity) { \ + commandBuffer->capacity += 1; \ + commandBuffer->array = SDL_realloc( \ + commandBuffer->array, \ + commandBuffer->capacity * sizeof(type)); \ + } \ + commandBuffer->array[commandBuffer->count] = (resource); \ + commandBuffer->count += 1; \ + SDL_AtomicIncRef(&(resource)->referenceCount); \ + } while (0) + +#define SET_ERROR_AND_RETURN(fmt, msg, ret) \ + do { \ + if (renderer->debugMode) { \ + SDL_LogError(SDL_LOG_CATEGORY_GPU, fmt, msg); \ + } \ + SDL_SetError(fmt, msg); \ + return ret; \ + } while (0) + +#define SET_STRING_ERROR_AND_RETURN(msg, ret) SET_ERROR_AND_RETURN("%s", msg, ret) + +// Blit Shaders + +#include "Metal_Blit.h" + +// Forward Declarations + +static bool METAL_Wait(SDL_GPURenderer *driverData); +static void METAL_ReleaseWindow( + SDL_GPURenderer *driverData, + SDL_Window *window); +static void METAL_INTERNAL_DestroyBlitResources(SDL_GPURenderer *driverData); + +// Conversions + +#define RETURN_FORMAT(availability, format) \ + if (availability) { return format; } else { return MTLPixelFormatInvalid; } + +static MTLPixelFormat SDLToMetal_TextureFormat(SDL_GPUTextureFormat format) +{ + switch (format) { + case SDL_GPU_TEXTUREFORMAT_INVALID: return MTLPixelFormatInvalid; + case SDL_GPU_TEXTUREFORMAT_A8_UNORM: return MTLPixelFormatA8Unorm; + case SDL_GPU_TEXTUREFORMAT_R8_UNORM: return MTLPixelFormatR8Unorm; + case SDL_GPU_TEXTUREFORMAT_R8G8_UNORM: return MTLPixelFormatRG8Unorm; + case SDL_GPU_TEXTUREFORMAT_R8G8B8A8_UNORM: return MTLPixelFormatRGBA8Unorm; + case SDL_GPU_TEXTUREFORMAT_R16_UNORM: return MTLPixelFormatR16Unorm; + case SDL_GPU_TEXTUREFORMAT_R16G16_UNORM: return MTLPixelFormatRG16Unorm; + case SDL_GPU_TEXTUREFORMAT_R16G16B16A16_UNORM: return MTLPixelFormatRGBA16Unorm; + case SDL_GPU_TEXTUREFORMAT_R10G10B10A2_UNORM: return MTLPixelFormatRGB10A2Unorm; + case SDL_GPU_TEXTUREFORMAT_B5G6R5_UNORM: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatB5G6R5Unorm); + case SDL_GPU_TEXTUREFORMAT_B5G5R5A1_UNORM: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatBGR5A1Unorm); + case SDL_GPU_TEXTUREFORMAT_B4G4R4A4_UNORM: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatABGR4Unorm); + case SDL_GPU_TEXTUREFORMAT_B8G8R8A8_UNORM: return MTLPixelFormatBGRA8Unorm; + case SDL_GPU_TEXTUREFORMAT_BC1_RGBA_UNORM: RETURN_FORMAT(@available(iOS 16.4, tvOS 16.4, *), MTLPixelFormatBC1_RGBA); + case SDL_GPU_TEXTUREFORMAT_BC2_RGBA_UNORM: RETURN_FORMAT(@available(iOS 16.4, tvOS 16.4, *), MTLPixelFormatBC2_RGBA); + case SDL_GPU_TEXTUREFORMAT_BC3_RGBA_UNORM: RETURN_FORMAT(@available(iOS 16.4, tvOS 16.4, *), MTLPixelFormatBC3_RGBA); + case SDL_GPU_TEXTUREFORMAT_BC4_R_UNORM: RETURN_FORMAT(@available(iOS 16.4, tvOS 16.4, *), MTLPixelFormatBC4_RUnorm); + case SDL_GPU_TEXTUREFORMAT_BC5_RG_UNORM: RETURN_FORMAT(@available(iOS 16.4, tvOS 16.4, *), MTLPixelFormatBC5_RGUnorm); + case SDL_GPU_TEXTUREFORMAT_BC7_RGBA_UNORM: RETURN_FORMAT(@available(iOS 16.4, tvOS 16.4, *), MTLPixelFormatBC7_RGBAUnorm); + case SDL_GPU_TEXTUREFORMAT_BC6H_RGB_FLOAT: RETURN_FORMAT(@available(iOS 16.4, tvOS 16.4, *), MTLPixelFormatBC6H_RGBFloat); + case SDL_GPU_TEXTUREFORMAT_BC6H_RGB_UFLOAT: RETURN_FORMAT(@available(iOS 16.4, tvOS 16.4, *), MTLPixelFormatBC6H_RGBUfloat); + case SDL_GPU_TEXTUREFORMAT_R8_SNORM: return MTLPixelFormatR8Snorm; + case SDL_GPU_TEXTUREFORMAT_R8G8_SNORM: return MTLPixelFormatRG8Snorm; + case SDL_GPU_TEXTUREFORMAT_R8G8B8A8_SNORM: return MTLPixelFormatRGBA8Snorm; + case SDL_GPU_TEXTUREFORMAT_R16_SNORM: return MTLPixelFormatR16Snorm; + case SDL_GPU_TEXTUREFORMAT_R16G16_SNORM: return MTLPixelFormatRG16Snorm; + case SDL_GPU_TEXTUREFORMAT_R16G16B16A16_SNORM: return MTLPixelFormatRGBA16Snorm; + case SDL_GPU_TEXTUREFORMAT_R16_FLOAT: return MTLPixelFormatR16Float; + case SDL_GPU_TEXTUREFORMAT_R16G16_FLOAT: return MTLPixelFormatRG16Float; + case SDL_GPU_TEXTUREFORMAT_R16G16B16A16_FLOAT: return MTLPixelFormatRGBA16Float; + case SDL_GPU_TEXTUREFORMAT_R32_FLOAT: return MTLPixelFormatR32Float; + case SDL_GPU_TEXTUREFORMAT_R32G32_FLOAT: return MTLPixelFormatRG32Float; + case SDL_GPU_TEXTUREFORMAT_R32G32B32A32_FLOAT: return MTLPixelFormatRGBA32Float; + case SDL_GPU_TEXTUREFORMAT_R11G11B10_UFLOAT: return MTLPixelFormatRG11B10Float; + case SDL_GPU_TEXTUREFORMAT_R8_UINT: return MTLPixelFormatR8Uint; + case SDL_GPU_TEXTUREFORMAT_R8G8_UINT: return MTLPixelFormatRG8Uint; + case SDL_GPU_TEXTUREFORMAT_R8G8B8A8_UINT: return MTLPixelFormatRGBA8Uint; + case SDL_GPU_TEXTUREFORMAT_R16_UINT: return MTLPixelFormatR16Uint; + case SDL_GPU_TEXTUREFORMAT_R16G16_UINT: return MTLPixelFormatRG16Uint; + case SDL_GPU_TEXTUREFORMAT_R16G16B16A16_UINT: return MTLPixelFormatRGBA16Uint; + case SDL_GPU_TEXTUREFORMAT_R32_UINT: return MTLPixelFormatR32Uint; + case SDL_GPU_TEXTUREFORMAT_R32G32_UINT: return MTLPixelFormatRG32Uint; + case SDL_GPU_TEXTUREFORMAT_R32G32B32A32_UINT: return MTLPixelFormatRGBA32Uint; + case SDL_GPU_TEXTUREFORMAT_R8_INT: return MTLPixelFormatR8Sint; + case SDL_GPU_TEXTUREFORMAT_R8G8_INT: return MTLPixelFormatRG8Sint; + case SDL_GPU_TEXTUREFORMAT_R8G8B8A8_INT: return MTLPixelFormatRGBA8Sint; + case SDL_GPU_TEXTUREFORMAT_R16_INT: return MTLPixelFormatR16Sint; + case SDL_GPU_TEXTUREFORMAT_R16G16_INT: return MTLPixelFormatRG16Sint; + case SDL_GPU_TEXTUREFORMAT_R16G16B16A16_INT: return MTLPixelFormatRGBA16Sint; + case SDL_GPU_TEXTUREFORMAT_R32_INT: return MTLPixelFormatR32Sint; + case SDL_GPU_TEXTUREFORMAT_R32G32_INT: return MTLPixelFormatRG32Sint; + case SDL_GPU_TEXTUREFORMAT_R32G32B32A32_INT: return MTLPixelFormatRGBA32Sint; + case SDL_GPU_TEXTUREFORMAT_R8G8B8A8_UNORM_SRGB: return MTLPixelFormatRGBA8Unorm_sRGB; + case SDL_GPU_TEXTUREFORMAT_B8G8R8A8_UNORM_SRGB: return MTLPixelFormatBGRA8Unorm_sRGB; + case SDL_GPU_TEXTUREFORMAT_BC1_RGBA_UNORM_SRGB: RETURN_FORMAT(@available(iOS 16.4, tvOS 16.4, *), MTLPixelFormatBC1_RGBA_sRGB); + case SDL_GPU_TEXTUREFORMAT_BC2_RGBA_UNORM_SRGB: RETURN_FORMAT(@available(iOS 16.4, tvOS 16.4, *), MTLPixelFormatBC2_RGBA_sRGB); + case SDL_GPU_TEXTUREFORMAT_BC3_RGBA_UNORM_SRGB: RETURN_FORMAT(@available(iOS 16.4, tvOS 16.4, *), MTLPixelFormatBC3_RGBA_sRGB); + case SDL_GPU_TEXTUREFORMAT_BC7_RGBA_UNORM_SRGB: RETURN_FORMAT(@available(iOS 16.4, tvOS 16.4, *), MTLPixelFormatBC7_RGBAUnorm_sRGB); + case SDL_GPU_TEXTUREFORMAT_D16_UNORM: RETURN_FORMAT(@available(iOS 13.0, tvOS 13.0, *), MTLPixelFormatDepth16Unorm); + case SDL_GPU_TEXTUREFORMAT_D24_UNORM: +#ifdef SDL_PLATFORM_MACOS + return MTLPixelFormatDepth24Unorm_Stencil8; +#else + return MTLPixelFormatInvalid; +#endif + case SDL_GPU_TEXTUREFORMAT_D32_FLOAT: return MTLPixelFormatDepth32Float; + case SDL_GPU_TEXTUREFORMAT_D24_UNORM_S8_UINT: +#ifdef SDL_PLATFORM_MACOS + return MTLPixelFormatDepth24Unorm_Stencil8; +#else + return MTLPixelFormatInvalid; +#endif + case SDL_GPU_TEXTUREFORMAT_D32_FLOAT_S8_UINT: return MTLPixelFormatDepth32Float_Stencil8; + case SDL_GPU_TEXTUREFORMAT_ASTC_4x4_UNORM: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_4x4_LDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_5x4_UNORM: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_5x4_LDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_5x5_UNORM: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_5x5_LDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_6x5_UNORM: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_6x5_LDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_6x6_UNORM: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_6x6_LDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_8x5_UNORM: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_8x5_LDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_8x6_UNORM: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_8x6_LDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_8x8_UNORM: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_8x8_LDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_10x5_UNORM: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_10x5_LDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_10x6_UNORM: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_10x6_LDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_10x8_UNORM: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_10x8_LDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_10x10_UNORM: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_10x10_LDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_12x10_UNORM: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_12x10_LDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_12x12_UNORM: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_12x12_LDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_4x4_UNORM_SRGB: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_4x4_sRGB); + case SDL_GPU_TEXTUREFORMAT_ASTC_5x4_UNORM_SRGB: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_5x4_sRGB); + case SDL_GPU_TEXTUREFORMAT_ASTC_5x5_UNORM_SRGB: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_5x5_sRGB); + case SDL_GPU_TEXTUREFORMAT_ASTC_6x5_UNORM_SRGB: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_6x5_sRGB); + case SDL_GPU_TEXTUREFORMAT_ASTC_6x6_UNORM_SRGB: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_6x6_sRGB); + case SDL_GPU_TEXTUREFORMAT_ASTC_8x5_UNORM_SRGB: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_8x5_sRGB); + case SDL_GPU_TEXTUREFORMAT_ASTC_8x6_UNORM_SRGB: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_8x6_sRGB); + case SDL_GPU_TEXTUREFORMAT_ASTC_8x8_UNORM_SRGB: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_8x8_sRGB); + case SDL_GPU_TEXTUREFORMAT_ASTC_10x5_UNORM_SRGB: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_10x5_sRGB); + case SDL_GPU_TEXTUREFORMAT_ASTC_10x6_UNORM_SRGB: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_10x6_sRGB); + case SDL_GPU_TEXTUREFORMAT_ASTC_10x8_UNORM_SRGB: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_10x8_sRGB); + case SDL_GPU_TEXTUREFORMAT_ASTC_10x10_UNORM_SRGB: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_10x10_sRGB); + case SDL_GPU_TEXTUREFORMAT_ASTC_12x10_UNORM_SRGB: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_12x10_sRGB); + case SDL_GPU_TEXTUREFORMAT_ASTC_12x12_UNORM_SRGB: RETURN_FORMAT(@available(macOS 11.0, *), MTLPixelFormatASTC_12x12_sRGB); + case SDL_GPU_TEXTUREFORMAT_ASTC_4x4_FLOAT: RETURN_FORMAT(@available(macOS 11.0, iOS 13.0, tvOS 16.0, *), MTLPixelFormatASTC_4x4_HDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_5x4_FLOAT: RETURN_FORMAT(@available(macOS 11.0, iOS 13.0, tvOS 16.0, *), MTLPixelFormatASTC_5x4_HDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_5x5_FLOAT: RETURN_FORMAT(@available(macOS 11.0, iOS 13.0, tvOS 16.0, *), MTLPixelFormatASTC_5x5_HDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_6x5_FLOAT: RETURN_FORMAT(@available(macOS 11.0, iOS 13.0, tvOS 16.0, *), MTLPixelFormatASTC_6x5_HDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_6x6_FLOAT: RETURN_FORMAT(@available(macOS 11.0, iOS 13.0, tvOS 16.0, *), MTLPixelFormatASTC_6x6_HDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_8x5_FLOAT: RETURN_FORMAT(@available(macOS 11.0, iOS 13.0, tvOS 16.0, *), MTLPixelFormatASTC_8x5_HDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_8x6_FLOAT: RETURN_FORMAT(@available(macOS 11.0, iOS 13.0, tvOS 16.0, *), MTLPixelFormatASTC_8x6_HDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_8x8_FLOAT: RETURN_FORMAT(@available(macOS 11.0, iOS 13.0, tvOS 16.0, *), MTLPixelFormatASTC_8x8_HDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_10x5_FLOAT: RETURN_FORMAT(@available(macOS 11.0, iOS 13.0, tvOS 16.0, *), MTLPixelFormatASTC_10x5_HDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_10x6_FLOAT: RETURN_FORMAT(@available(macOS 11.0, iOS 13.0, tvOS 16.0, *), MTLPixelFormatASTC_10x6_HDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_10x8_FLOAT: RETURN_FORMAT(@available(macOS 11.0, iOS 13.0, tvOS 16.0, *), MTLPixelFormatASTC_10x8_HDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_10x10_FLOAT: RETURN_FORMAT(@available(macOS 11.0, iOS 13.0, tvOS 16.0, *), MTLPixelFormatASTC_10x10_HDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_12x10_FLOAT: RETURN_FORMAT(@available(macOS 11.0, iOS 13.0, tvOS 16.0, *), MTLPixelFormatASTC_12x10_HDR); + case SDL_GPU_TEXTUREFORMAT_ASTC_12x12_FLOAT: RETURN_FORMAT(@available(macOS 11.0, iOS 13.0, tvOS 16.0, *), MTLPixelFormatASTC_12x12_HDR); + } +} + +#undef RETURN_FORMAT + +static MTLVertexFormat SDLToMetal_VertexFormat[] = { + MTLVertexFormatInvalid, // INVALID + MTLVertexFormatInt, // INT + MTLVertexFormatInt2, // INT2 + MTLVertexFormatInt3, // INT3 + MTLVertexFormatInt4, // INT4 + MTLVertexFormatUInt, // UINT + MTLVertexFormatUInt2, // UINT2 + MTLVertexFormatUInt3, // UINT3 + MTLVertexFormatUInt4, // UINT4 + MTLVertexFormatFloat, // FLOAT + MTLVertexFormatFloat2, // FLOAT2 + MTLVertexFormatFloat3, // FLOAT3 + MTLVertexFormatFloat4, // FLOAT4 + MTLVertexFormatChar2, // BYTE2 + MTLVertexFormatChar4, // BYTE4 + MTLVertexFormatUChar2, // UBYTE2 + MTLVertexFormatUChar4, // UBYTE4 + MTLVertexFormatChar2Normalized, // BYTE2_NORM + MTLVertexFormatChar4Normalized, // BYTE4_NORM + MTLVertexFormatUChar2Normalized, // UBYTE2_NORM + MTLVertexFormatUChar4Normalized, // UBYTE4_NORM + MTLVertexFormatShort2, // SHORT2 + MTLVertexFormatShort4, // SHORT4 + MTLVertexFormatUShort2, // USHORT2 + MTLVertexFormatUShort4, // USHORT4 + MTLVertexFormatShort2Normalized, // SHORT2_NORM + MTLVertexFormatShort4Normalized, // SHORT4_NORM + MTLVertexFormatUShort2Normalized, // USHORT2_NORM + MTLVertexFormatUShort4Normalized, // USHORT4_NORM + MTLVertexFormatHalf2, // HALF2 + MTLVertexFormatHalf4 // HALF4 +}; +SDL_COMPILE_TIME_ASSERT(SDLToMetal_VertexFormat, SDL_arraysize(SDLToMetal_VertexFormat) == SDL_GPU_VERTEXELEMENTFORMAT_MAX_ENUM_VALUE); + +static MTLIndexType SDLToMetal_IndexType[] = { + MTLIndexTypeUInt16, // 16BIT + MTLIndexTypeUInt32, // 32BIT +}; + +static MTLPrimitiveType SDLToMetal_PrimitiveType[] = { + MTLPrimitiveTypeTriangle, // TRIANGLELIST + MTLPrimitiveTypeTriangleStrip, // TRIANGLESTRIP + MTLPrimitiveTypeLine, // LINELIST + MTLPrimitiveTypeLineStrip, // LINESTRIP + MTLPrimitiveTypePoint // POINTLIST +}; + +static MTLTriangleFillMode SDLToMetal_PolygonMode[] = { + MTLTriangleFillModeFill, // FILL + MTLTriangleFillModeLines, // LINE +}; + +static MTLCullMode SDLToMetal_CullMode[] = { + MTLCullModeNone, // NONE + MTLCullModeFront, // FRONT + MTLCullModeBack, // BACK +}; + +static MTLWinding SDLToMetal_FrontFace[] = { + MTLWindingCounterClockwise, // COUNTER_CLOCKWISE + MTLWindingClockwise, // CLOCKWISE +}; + +static MTLBlendFactor SDLToMetal_BlendFactor[] = { + MTLBlendFactorZero, // INVALID + MTLBlendFactorZero, // ZERO + MTLBlendFactorOne, // ONE + MTLBlendFactorSourceColor, // SRC_COLOR + MTLBlendFactorOneMinusSourceColor, // ONE_MINUS_SRC_COLOR + MTLBlendFactorDestinationColor, // DST_COLOR + MTLBlendFactorOneMinusDestinationColor, // ONE_MINUS_DST_COLOR + MTLBlendFactorSourceAlpha, // SRC_ALPHA + MTLBlendFactorOneMinusSourceAlpha, // ONE_MINUS_SRC_ALPHA + MTLBlendFactorDestinationAlpha, // DST_ALPHA + MTLBlendFactorOneMinusDestinationAlpha, // ONE_MINUS_DST_ALPHA + MTLBlendFactorBlendColor, // CONSTANT_COLOR + MTLBlendFactorOneMinusBlendColor, // ONE_MINUS_CONSTANT_COLOR + MTLBlendFactorSourceAlphaSaturated, // SRC_ALPHA_SATURATE +}; +SDL_COMPILE_TIME_ASSERT(SDLToMetal_BlendFactor, SDL_arraysize(SDLToMetal_BlendFactor) == SDL_GPU_BLENDFACTOR_MAX_ENUM_VALUE); + +static MTLBlendOperation SDLToMetal_BlendOp[] = { + MTLBlendOperationAdd, // INVALID + MTLBlendOperationAdd, // ADD + MTLBlendOperationSubtract, // SUBTRACT + MTLBlendOperationReverseSubtract, // REVERSE_SUBTRACT + MTLBlendOperationMin, // MIN + MTLBlendOperationMax, // MAX +}; +SDL_COMPILE_TIME_ASSERT(SDLToMetal_BlendOp, SDL_arraysize(SDLToMetal_BlendOp) == SDL_GPU_BLENDOP_MAX_ENUM_VALUE); + +static MTLCompareFunction SDLToMetal_CompareOp[] = { + MTLCompareFunctionNever, // INVALID + MTLCompareFunctionNever, // NEVER + MTLCompareFunctionLess, // LESS + MTLCompareFunctionEqual, // EQUAL + MTLCompareFunctionLessEqual, // LESS_OR_EQUAL + MTLCompareFunctionGreater, // GREATER + MTLCompareFunctionNotEqual, // NOT_EQUAL + MTLCompareFunctionGreaterEqual, // GREATER_OR_EQUAL + MTLCompareFunctionAlways, // ALWAYS +}; +SDL_COMPILE_TIME_ASSERT(SDLToMetal_CompareOp, SDL_arraysize(SDLToMetal_CompareOp) == SDL_GPU_COMPAREOP_MAX_ENUM_VALUE); + +static MTLStencilOperation SDLToMetal_StencilOp[] = { + MTLStencilOperationKeep, // INVALID + MTLStencilOperationKeep, // KEEP + MTLStencilOperationZero, // ZERO + MTLStencilOperationReplace, // REPLACE + MTLStencilOperationIncrementClamp, // INCREMENT_AND_CLAMP + MTLStencilOperationDecrementClamp, // DECREMENT_AND_CLAMP + MTLStencilOperationInvert, // INVERT + MTLStencilOperationIncrementWrap, // INCREMENT_AND_WRAP + MTLStencilOperationDecrementWrap, // DECREMENT_AND_WRAP +}; +SDL_COMPILE_TIME_ASSERT(SDLToMetal_StencilOp, SDL_arraysize(SDLToMetal_StencilOp) == SDL_GPU_STENCILOP_MAX_ENUM_VALUE); + +static MTLSamplerAddressMode SDLToMetal_SamplerAddressMode[] = { + MTLSamplerAddressModeRepeat, // REPEAT + MTLSamplerAddressModeMirrorRepeat, // MIRRORED_REPEAT + MTLSamplerAddressModeClampToEdge // CLAMP_TO_EDGE +}; + +static MTLSamplerMinMagFilter SDLToMetal_MinMagFilter[] = { + MTLSamplerMinMagFilterNearest, // NEAREST + MTLSamplerMinMagFilterLinear, // LINEAR +}; + +static MTLSamplerMipFilter SDLToMetal_MipFilter[] = { + MTLSamplerMipFilterNearest, // NEAREST + MTLSamplerMipFilterLinear, // LINEAR +}; + +static MTLLoadAction SDLToMetal_LoadOp[] = { + MTLLoadActionLoad, // LOAD + MTLLoadActionClear, // CLEAR + MTLLoadActionDontCare, // DONT_CARE +}; + +static MTLStoreAction SDLToMetal_StoreOp[] = { + MTLStoreActionStore, + MTLStoreActionDontCare, + MTLStoreActionMultisampleResolve, + MTLStoreActionStoreAndMultisampleResolve +}; + +static MTLVertexStepFunction SDLToMetal_StepFunction[] = { + MTLVertexStepFunctionPerVertex, + MTLVertexStepFunctionPerInstance, +}; + +static NSUInteger SDLToMetal_SampleCount[] = { + 1, // SDL_GPU_SAMPLECOUNT_1 + 2, // SDL_GPU_SAMPLECOUNT_2 + 4, // SDL_GPU_SAMPLECOUNT_4 + 8 // SDL_GPU_SAMPLECOUNT_8 +}; + +static SDL_GPUTextureFormat SwapchainCompositionToFormat[] = { + SDL_GPU_TEXTUREFORMAT_B8G8R8A8_UNORM, // SDR + SDL_GPU_TEXTUREFORMAT_B8G8R8A8_UNORM_SRGB, // SDR_LINEAR + SDL_GPU_TEXTUREFORMAT_R16G16B16A16_FLOAT, // HDR_EXTENDED_LINEAR + SDL_GPU_TEXTUREFORMAT_R10G10B10A2_UNORM, // HDR10_ST2084 +}; + +static CFStringRef SwapchainCompositionToColorSpace[4]; // initialized on device creation + +static MTLTextureType SDLToMetal_TextureType(SDL_GPUTextureType textureType, bool isMSAA) +{ + switch (textureType) { + case SDL_GPU_TEXTURETYPE_2D: + return isMSAA ? MTLTextureType2DMultisample : MTLTextureType2D; + case SDL_GPU_TEXTURETYPE_2D_ARRAY: + return MTLTextureType2DArray; + case SDL_GPU_TEXTURETYPE_3D: + return MTLTextureType3D; + case SDL_GPU_TEXTURETYPE_CUBE: + return MTLTextureTypeCube; + case SDL_GPU_TEXTURETYPE_CUBE_ARRAY: + return MTLTextureTypeCubeArray; + default: + return MTLTextureType2D; + } +} + +static MTLColorWriteMask SDLToMetal_ColorWriteMask( + SDL_GPUColorComponentFlags mask) +{ + MTLColorWriteMask result = 0; + if (mask & SDL_GPU_COLORCOMPONENT_R) { + result |= MTLColorWriteMaskRed; + } + if (mask & SDL_GPU_COLORCOMPONENT_G) { + result |= MTLColorWriteMaskGreen; + } + if (mask & SDL_GPU_COLORCOMPONENT_B) { + result |= MTLColorWriteMaskBlue; + } + if (mask & SDL_GPU_COLORCOMPONENT_A) { + result |= MTLColorWriteMaskAlpha; + } + return result; +} + +static MTLDepthClipMode SDLToMetal_DepthClipMode( + bool enableDepthClip +) { + if (enableDepthClip) { + return MTLDepthClipModeClip; + } else { + return MTLDepthClipModeClamp; + } +} + +// Structs + +typedef struct MetalTexture +{ + id handle; + SDL_AtomicInt referenceCount; +} MetalTexture; + +typedef struct MetalTextureContainer +{ + TextureCommonHeader header; + + MetalTexture *activeTexture; + Uint8 canBeCycled; + + Uint32 textureCapacity; + Uint32 textureCount; + MetalTexture **textures; + + char *debugName; +} MetalTextureContainer; + +typedef struct MetalFence +{ + SDL_AtomicInt complete; + SDL_AtomicInt referenceCount; +} MetalFence; + +typedef struct MetalWindowData +{ + SDL_Window *window; + SDL_MetalView view; + CAMetalLayer *layer; + SDL_GPUPresentMode presentMode; + id drawable; + MetalTexture texture; + MetalTextureContainer textureContainer; + SDL_GPUFence *inFlightFences[MAX_FRAMES_IN_FLIGHT]; + Uint32 frameCounter; +} MetalWindowData; + +typedef struct MetalShader +{ + id library; + id function; + + SDL_GPUShaderStage stage; + Uint32 numSamplers; + Uint32 numUniformBuffers; + Uint32 numStorageBuffers; + Uint32 numStorageTextures; +} MetalShader; + +typedef struct MetalGraphicsPipeline +{ + id handle; + + SDL_GPURasterizerState rasterizerState; + SDL_GPUPrimitiveType primitiveType; + + id depth_stencil_state; + + Uint32 vertexSamplerCount; + Uint32 vertexUniformBufferCount; + Uint32 vertexStorageBufferCount; + Uint32 vertexStorageTextureCount; + + Uint32 fragmentSamplerCount; + Uint32 fragmentUniformBufferCount; + Uint32 fragmentStorageBufferCount; + Uint32 fragmentStorageTextureCount; +} MetalGraphicsPipeline; + +typedef struct MetalComputePipeline +{ + id handle; + Uint32 numSamplers; + Uint32 numReadonlyStorageTextures; + Uint32 numReadWriteStorageTextures; + Uint32 numReadonlyStorageBuffers; + Uint32 numReadWriteStorageBuffers; + Uint32 numUniformBuffers; + Uint32 threadcountX; + Uint32 threadcountY; + Uint32 threadcountZ; +} MetalComputePipeline; + +typedef struct MetalBuffer +{ + id handle; + SDL_AtomicInt referenceCount; +} MetalBuffer; + +typedef struct MetalBufferContainer +{ + MetalBuffer *activeBuffer; + Uint32 size; + + Uint32 bufferCapacity; + Uint32 bufferCount; + MetalBuffer **buffers; + + bool isPrivate; + bool isWriteOnly; + char *debugName; +} MetalBufferContainer; + +typedef struct MetalUniformBuffer +{ + id handle; + Uint32 writeOffset; + Uint32 drawOffset; +} MetalUniformBuffer; + +typedef struct MetalRenderer MetalRenderer; + +typedef struct MetalCommandBuffer +{ + CommandBufferCommonHeader common; + MetalRenderer *renderer; + + // Native Handle + id handle; + + // Presentation + MetalWindowData **windowDatas; + Uint32 windowDataCount; + Uint32 windowDataCapacity; + + // Render Pass + id renderEncoder; + MetalGraphicsPipeline *graphics_pipeline; + MetalBuffer *indexBuffer; + Uint32 indexBufferOffset; + SDL_GPUIndexElementSize index_element_size; + + // Copy Pass + id blitEncoder; + + // Compute Pass + id computeEncoder; + MetalComputePipeline *compute_pipeline; + + // Resource slot state + bool needVertexBufferBind; + bool needVertexSamplerBind; + bool needVertexStorageTextureBind; + bool needVertexStorageBufferBind; + bool needVertexUniformBufferBind[MAX_UNIFORM_BUFFERS_PER_STAGE]; + + bool needFragmentSamplerBind; + bool needFragmentStorageTextureBind; + bool needFragmentStorageBufferBind; + bool needFragmentUniformBufferBind[MAX_UNIFORM_BUFFERS_PER_STAGE]; + + bool needComputeSamplerBind; + bool needComputeReadOnlyStorageTextureBind; + bool needComputeReadOnlyStorageBufferBind; + bool needComputeUniformBufferBind[MAX_UNIFORM_BUFFERS_PER_STAGE]; + + id vertexBuffers[MAX_VERTEX_BUFFERS]; + Uint32 vertexBufferOffsets[MAX_VERTEX_BUFFERS]; + Uint32 vertexBufferCount; + + id vertexSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + id vertexTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + id vertexStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE]; + id vertexStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE]; + MetalUniformBuffer *vertexUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE]; + + id fragmentSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + id fragmentTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + id fragmentStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE]; + id fragmentStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE]; + MetalUniformBuffer *fragmentUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE]; + + id computeSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + id computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + id computeReadOnlyTextures[MAX_STORAGE_TEXTURES_PER_STAGE]; + id computeReadOnlyBuffers[MAX_STORAGE_BUFFERS_PER_STAGE]; + id computeReadWriteTextures[MAX_COMPUTE_WRITE_TEXTURES]; + id computeReadWriteBuffers[MAX_COMPUTE_WRITE_BUFFERS]; + MetalUniformBuffer *computeUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE]; + + MetalUniformBuffer **usedUniformBuffers; + Uint32 usedUniformBufferCount; + Uint32 usedUniformBufferCapacity; + + // Fences + MetalFence *fence; + bool autoReleaseFence; + + // Reference Counting + MetalBuffer **usedBuffers; + Uint32 usedBufferCount; + Uint32 usedBufferCapacity; + + MetalTexture **usedTextures; + Uint32 usedTextureCount; + Uint32 usedTextureCapacity; +} MetalCommandBuffer; + +typedef struct MetalSampler +{ + id handle; +} MetalSampler; + +typedef struct BlitPipeline +{ + SDL_GPUGraphicsPipeline *pipeline; + SDL_GPUTextureFormat format; +} BlitPipeline; + +struct MetalRenderer +{ + // Reference to the parent device + SDL_GPUDevice *sdlGPUDevice; + + id device; + id queue; + + bool debugMode; + Uint32 allowedFramesInFlight; + + MetalWindowData **claimedWindows; + Uint32 claimedWindowCount; + Uint32 claimedWindowCapacity; + + MetalCommandBuffer **availableCommandBuffers; + Uint32 availableCommandBufferCount; + Uint32 availableCommandBufferCapacity; + + MetalCommandBuffer **submittedCommandBuffers; + Uint32 submittedCommandBufferCount; + Uint32 submittedCommandBufferCapacity; + + MetalFence **availableFences; + Uint32 availableFenceCount; + Uint32 availableFenceCapacity; + + MetalUniformBuffer **uniformBufferPool; + Uint32 uniformBufferPoolCount; + Uint32 uniformBufferPoolCapacity; + + MetalBufferContainer **bufferContainersToDestroy; + Uint32 bufferContainersToDestroyCount; + Uint32 bufferContainersToDestroyCapacity; + + MetalTextureContainer **textureContainersToDestroy; + Uint32 textureContainersToDestroyCount; + Uint32 textureContainersToDestroyCapacity; + + // Blit + SDL_GPUShader *blitVertexShader; + SDL_GPUShader *blitFrom2DShader; + SDL_GPUShader *blitFrom2DArrayShader; + SDL_GPUShader *blitFrom3DShader; + SDL_GPUShader *blitFromCubeShader; + SDL_GPUShader *blitFromCubeArrayShader; + + SDL_GPUSampler *blitNearestSampler; + SDL_GPUSampler *blitLinearSampler; + + BlitPipelineCacheEntry *blitPipelines; + Uint32 blitPipelineCount; + Uint32 blitPipelineCapacity; + + // Mutexes + SDL_Mutex *submitLock; + SDL_Mutex *acquireCommandBufferLock; + SDL_Mutex *acquireUniformBufferLock; + SDL_Mutex *disposeLock; + SDL_Mutex *fenceLock; + SDL_Mutex *windowLock; +}; + +// Helper Functions + +// FIXME: This should be moved into SDL_sysgpu.h +static inline Uint32 METAL_INTERNAL_NextHighestAlignment( + Uint32 n, + Uint32 align) +{ + return align * ((n + align - 1) / align); +} + +// Quit + +static void METAL_DestroyDevice(SDL_GPUDevice *device) +{ + MetalRenderer *renderer = (MetalRenderer *)device->driverData; + + // Flush any remaining GPU work... + METAL_Wait(device->driverData); + + // Release the window data + for (Sint32 i = renderer->claimedWindowCount - 1; i >= 0; i -= 1) { + METAL_ReleaseWindow(device->driverData, renderer->claimedWindows[i]->window); + } + SDL_free(renderer->claimedWindows); + + // Release the blit resources + METAL_INTERNAL_DestroyBlitResources(device->driverData); + + // Release uniform buffers + for (Uint32 i = 0; i < renderer->uniformBufferPoolCount; i += 1) { + renderer->uniformBufferPool[i]->handle = nil; + SDL_free(renderer->uniformBufferPool[i]); + } + SDL_free(renderer->uniformBufferPool); + + // Release destroyed resource lists + SDL_free(renderer->bufferContainersToDestroy); + SDL_free(renderer->textureContainersToDestroy); + + // Release command buffer infrastructure + for (Uint32 i = 0; i < renderer->availableCommandBufferCount; i += 1) { + MetalCommandBuffer *commandBuffer = renderer->availableCommandBuffers[i]; + SDL_free(commandBuffer->usedBuffers); + SDL_free(commandBuffer->usedTextures); + SDL_free(commandBuffer->usedUniformBuffers); + SDL_free(commandBuffer->windowDatas); + SDL_free(commandBuffer); + } + SDL_free(renderer->availableCommandBuffers); + SDL_free(renderer->submittedCommandBuffers); + + // Release fence infrastructure + for (Uint32 i = 0; i < renderer->availableFenceCount; i += 1) { + SDL_free(renderer->availableFences[i]); + } + SDL_free(renderer->availableFences); + + // Release the mutexes + SDL_DestroyMutex(renderer->submitLock); + SDL_DestroyMutex(renderer->acquireCommandBufferLock); + SDL_DestroyMutex(renderer->acquireUniformBufferLock); + SDL_DestroyMutex(renderer->disposeLock); + SDL_DestroyMutex(renderer->fenceLock); + SDL_DestroyMutex(renderer->windowLock); + + // Release the command queue + renderer->queue = nil; + + // Free the primary structures + SDL_free(renderer); + SDL_free(device); +} + +// Resource tracking + +static void METAL_INTERNAL_TrackBuffer( + MetalCommandBuffer *commandBuffer, + MetalBuffer *buffer) +{ + TRACK_RESOURCE( + buffer, + MetalBuffer *, + usedBuffers, + usedBufferCount, + usedBufferCapacity); +} + +static void METAL_INTERNAL_TrackTexture( + MetalCommandBuffer *commandBuffer, + MetalTexture *texture) +{ + TRACK_RESOURCE( + texture, + MetalTexture *, + usedTextures, + usedTextureCount, + usedTextureCapacity); +} + +static void METAL_INTERNAL_TrackUniformBuffer( + MetalCommandBuffer *commandBuffer, + MetalUniformBuffer *uniformBuffer) +{ + Uint32 i; + for (i = 0; i < commandBuffer->usedUniformBufferCount; i += 1) { + if (commandBuffer->usedUniformBuffers[i] == uniformBuffer) { + return; + } + } + + if (commandBuffer->usedUniformBufferCount == commandBuffer->usedUniformBufferCapacity) { + commandBuffer->usedUniformBufferCapacity += 1; + commandBuffer->usedUniformBuffers = SDL_realloc( + commandBuffer->usedUniformBuffers, + commandBuffer->usedUniformBufferCapacity * sizeof(MetalUniformBuffer *)); + } + + commandBuffer->usedUniformBuffers[commandBuffer->usedUniformBufferCount] = uniformBuffer; + commandBuffer->usedUniformBufferCount += 1; +} + +// Shader Compilation + +typedef struct MetalLibraryFunction +{ + id library; + id function; +} MetalLibraryFunction; + +// This function assumes that it's called from within an autorelease pool +static MetalLibraryFunction METAL_INTERNAL_CompileShader( + MetalRenderer *renderer, + SDL_GPUShaderFormat format, + const Uint8 *code, + size_t codeSize, + const char *entrypoint) +{ + MetalLibraryFunction libraryFunction = { nil, nil }; + id library; + NSError *error; + dispatch_data_t data; + id function; + + if (format == SDL_GPU_SHADERFORMAT_MSL) { + NSString *codeString = [[NSString alloc] + initWithBytes:code + length:codeSize + encoding:NSUTF8StringEncoding]; + library = [renderer->device + newLibraryWithSource:codeString + options:nil + error:&error]; + } else if (format == SDL_GPU_SHADERFORMAT_METALLIB) { + data = dispatch_data_create( + code, + codeSize, + dispatch_get_global_queue(0, 0), + ^{ /* do nothing */ }); + library = [renderer->device newLibraryWithData:data error:&error]; + } else { + SDL_assert(!"SDL_gpu.c should have already validated this!"); + return libraryFunction; + } + + if (library == nil) { + SDL_LogError( + SDL_LOG_CATEGORY_GPU, + "Creating MTLLibrary failed: %s", + [[error description] cStringUsingEncoding:[NSString defaultCStringEncoding]]); + return libraryFunction; + } else if (error != nil) { + SDL_LogWarn( + SDL_LOG_CATEGORY_GPU, + "Creating MTLLibrary failed: %s", + [[error description] cStringUsingEncoding:[NSString defaultCStringEncoding]]); + } + + function = [library newFunctionWithName:@(entrypoint)]; + if (function == nil) { + SDL_LogError( + SDL_LOG_CATEGORY_GPU, + "Creating MTLFunction failed"); + return libraryFunction; + } + + libraryFunction.library = library; + libraryFunction.function = function; + return libraryFunction; +} + +// Disposal + +static void METAL_INTERNAL_DestroyTextureContainer( + MetalTextureContainer *container) +{ + for (Uint32 i = 0; i < container->textureCount; i += 1) { + container->textures[i]->handle = nil; + SDL_free(container->textures[i]); + } + if (container->debugName != NULL) { + SDL_free(container->debugName); + } + SDL_free(container->textures); + SDL_free(container); +} + +static void METAL_ReleaseTexture( + SDL_GPURenderer *driverData, + SDL_GPUTexture *texture) +{ + MetalRenderer *renderer = (MetalRenderer *)driverData; + MetalTextureContainer *container = (MetalTextureContainer *)texture; + + SDL_LockMutex(renderer->disposeLock); + + EXPAND_ARRAY_IF_NEEDED( + renderer->textureContainersToDestroy, + MetalTextureContainer *, + renderer->textureContainersToDestroyCount + 1, + renderer->textureContainersToDestroyCapacity, + renderer->textureContainersToDestroyCapacity + 1); + + renderer->textureContainersToDestroy[renderer->textureContainersToDestroyCount] = container; + renderer->textureContainersToDestroyCount += 1; + + SDL_UnlockMutex(renderer->disposeLock); +} + +static void METAL_ReleaseSampler( + SDL_GPURenderer *driverData, + SDL_GPUSampler *sampler) +{ + @autoreleasepool { + MetalSampler *metalSampler = (MetalSampler *)sampler; + metalSampler->handle = nil; + SDL_free(metalSampler); + } +} + +static void METAL_INTERNAL_DestroyBufferContainer( + MetalBufferContainer *container) +{ + for (Uint32 i = 0; i < container->bufferCount; i += 1) { + container->buffers[i]->handle = nil; + SDL_free(container->buffers[i]); + } + if (container->debugName != NULL) { + SDL_free(container->debugName); + } + SDL_free(container->buffers); + SDL_free(container); +} + +static void METAL_ReleaseBuffer( + SDL_GPURenderer *driverData, + SDL_GPUBuffer *buffer) +{ + MetalRenderer *renderer = (MetalRenderer *)driverData; + MetalBufferContainer *container = (MetalBufferContainer *)buffer; + + SDL_LockMutex(renderer->disposeLock); + + EXPAND_ARRAY_IF_NEEDED( + renderer->bufferContainersToDestroy, + MetalBufferContainer *, + renderer->bufferContainersToDestroyCount + 1, + renderer->bufferContainersToDestroyCapacity, + renderer->bufferContainersToDestroyCapacity + 1); + + renderer->bufferContainersToDestroy[renderer->bufferContainersToDestroyCount] = container; + renderer->bufferContainersToDestroyCount += 1; + + SDL_UnlockMutex(renderer->disposeLock); +} + +static void METAL_ReleaseTransferBuffer( + SDL_GPURenderer *driverData, + SDL_GPUTransferBuffer *transferBuffer) +{ + METAL_ReleaseBuffer( + driverData, + (SDL_GPUBuffer *)transferBuffer); +} + +static void METAL_ReleaseShader( + SDL_GPURenderer *driverData, + SDL_GPUShader *shader) +{ + @autoreleasepool { + MetalShader *metalShader = (MetalShader *)shader; + metalShader->function = nil; + metalShader->library = nil; + SDL_free(metalShader); + } +} + +static void METAL_ReleaseComputePipeline( + SDL_GPURenderer *driverData, + SDL_GPUComputePipeline *computePipeline) +{ + @autoreleasepool { + MetalComputePipeline *metalComputePipeline = (MetalComputePipeline *)computePipeline; + metalComputePipeline->handle = nil; + SDL_free(metalComputePipeline); + } +} + +static void METAL_ReleaseGraphicsPipeline( + SDL_GPURenderer *driverData, + SDL_GPUGraphicsPipeline *graphicsPipeline) +{ + @autoreleasepool { + MetalGraphicsPipeline *metalGraphicsPipeline = (MetalGraphicsPipeline *)graphicsPipeline; + metalGraphicsPipeline->handle = nil; + metalGraphicsPipeline->depth_stencil_state = nil; + SDL_free(metalGraphicsPipeline); + } +} + +// Pipeline Creation + +static SDL_GPUComputePipeline *METAL_CreateComputePipeline( + SDL_GPURenderer *driverData, + const SDL_GPUComputePipelineCreateInfo *createinfo) +{ + @autoreleasepool { + MetalRenderer *renderer = (MetalRenderer *)driverData; + MetalLibraryFunction libraryFunction; + id handle; + MetalComputePipeline *pipeline; + NSError *error; + + libraryFunction = METAL_INTERNAL_CompileShader( + renderer, + createinfo->format, + createinfo->code, + createinfo->code_size, + createinfo->entrypoint); + + if (libraryFunction.library == nil || libraryFunction.function == nil) { + return NULL; + } + + MTLComputePipelineDescriptor *descriptor = [MTLComputePipelineDescriptor new]; + descriptor.computeFunction = libraryFunction.function; + + if (renderer->debugMode && SDL_HasProperty(createinfo->props, SDL_PROP_GPU_COMPUTEPIPELINE_CREATE_NAME_STRING)) { + const char *name = SDL_GetStringProperty(createinfo->props, SDL_PROP_GPU_COMPUTEPIPELINE_CREATE_NAME_STRING, NULL); + descriptor.label = @(name); + } + + handle = [renderer->device newComputePipelineStateWithDescriptor:descriptor options:MTLPipelineOptionNone reflection: nil error:&error]; + if (error != NULL) { + SET_ERROR_AND_RETURN("Creating compute pipeline failed: %s", [[error description] UTF8String], NULL); + } + + pipeline = SDL_calloc(1, sizeof(MetalComputePipeline)); + pipeline->handle = handle; + pipeline->numSamplers = createinfo->num_samplers; + pipeline->numReadonlyStorageTextures = createinfo->num_readonly_storage_textures; + pipeline->numReadWriteStorageTextures = createinfo->num_readwrite_storage_textures; + pipeline->numReadonlyStorageBuffers = createinfo->num_readonly_storage_buffers; + pipeline->numReadWriteStorageBuffers = createinfo->num_readwrite_storage_buffers; + pipeline->numUniformBuffers = createinfo->num_uniform_buffers; + pipeline->threadcountX = createinfo->threadcount_x; + pipeline->threadcountY = createinfo->threadcount_y; + pipeline->threadcountZ = createinfo->threadcount_z; + + return (SDL_GPUComputePipeline *)pipeline; + } +} + +static SDL_GPUGraphicsPipeline *METAL_CreateGraphicsPipeline( + SDL_GPURenderer *driverData, + const SDL_GPUGraphicsPipelineCreateInfo *createinfo) +{ + @autoreleasepool { + MetalRenderer *renderer = (MetalRenderer *)driverData; + MetalShader *vertexShader = (MetalShader *)createinfo->vertex_shader; + MetalShader *fragmentShader = (MetalShader *)createinfo->fragment_shader; + MTLRenderPipelineDescriptor *pipelineDescriptor; + const SDL_GPUColorTargetBlendState *blendState; + MTLVertexDescriptor *vertexDescriptor; + Uint32 binding; + MTLDepthStencilDescriptor *depthStencilDescriptor; + MTLStencilDescriptor *frontStencilDescriptor = NULL; + MTLStencilDescriptor *backStencilDescriptor = NULL; + id depthStencilState = nil; + id pipelineState = nil; + NSError *error = NULL; + MetalGraphicsPipeline *result = NULL; + + if (renderer->debugMode) { + if (vertexShader->stage != SDL_GPU_SHADERSTAGE_VERTEX) { + SDL_assert_release(!"CreateGraphicsPipeline was passed a fragment shader for the vertex stage"); + } + if (fragmentShader->stage != SDL_GPU_SHADERSTAGE_FRAGMENT) { + SDL_assert_release(!"CreateGraphicsPipeline was passed a vertex shader for the fragment stage"); + } + } + pipelineDescriptor = [MTLRenderPipelineDescriptor new]; + + // Blend + + for (Uint32 i = 0; i < createinfo->target_info.num_color_targets; i += 1) { + blendState = &createinfo->target_info.color_target_descriptions[i].blend_state; + SDL_GPUColorComponentFlags colorWriteMask = blendState->enable_color_write_mask ? + blendState->color_write_mask : + 0xF; + + pipelineDescriptor.colorAttachments[i].pixelFormat = SDLToMetal_TextureFormat(createinfo->target_info.color_target_descriptions[i].format); + pipelineDescriptor.colorAttachments[i].writeMask = SDLToMetal_ColorWriteMask(colorWriteMask); + pipelineDescriptor.colorAttachments[i].blendingEnabled = blendState->enable_blend; + pipelineDescriptor.colorAttachments[i].rgbBlendOperation = SDLToMetal_BlendOp[blendState->color_blend_op]; + pipelineDescriptor.colorAttachments[i].alphaBlendOperation = SDLToMetal_BlendOp[blendState->alpha_blend_op]; + pipelineDescriptor.colorAttachments[i].sourceRGBBlendFactor = SDLToMetal_BlendFactor[blendState->src_color_blendfactor]; + pipelineDescriptor.colorAttachments[i].sourceAlphaBlendFactor = SDLToMetal_BlendFactor[blendState->src_alpha_blendfactor]; + pipelineDescriptor.colorAttachments[i].destinationRGBBlendFactor = SDLToMetal_BlendFactor[blendState->dst_color_blendfactor]; + pipelineDescriptor.colorAttachments[i].destinationAlphaBlendFactor = SDLToMetal_BlendFactor[blendState->dst_alpha_blendfactor]; + } + + // Multisample + + pipelineDescriptor.rasterSampleCount = SDLToMetal_SampleCount[createinfo->multisample_state.sample_count]; + + // Depth Stencil + + if (createinfo->target_info.has_depth_stencil_target) { + pipelineDescriptor.depthAttachmentPixelFormat = SDLToMetal_TextureFormat(createinfo->target_info.depth_stencil_format); + if (IsStencilFormat(createinfo->target_info.depth_stencil_format)) { + pipelineDescriptor.stencilAttachmentPixelFormat = SDLToMetal_TextureFormat(createinfo->target_info.depth_stencil_format); + } + + if (createinfo->depth_stencil_state.enable_stencil_test) { + frontStencilDescriptor = [MTLStencilDescriptor new]; + frontStencilDescriptor.stencilCompareFunction = SDLToMetal_CompareOp[createinfo->depth_stencil_state.front_stencil_state.compare_op]; + frontStencilDescriptor.stencilFailureOperation = SDLToMetal_StencilOp[createinfo->depth_stencil_state.front_stencil_state.fail_op]; + frontStencilDescriptor.depthStencilPassOperation = SDLToMetal_StencilOp[createinfo->depth_stencil_state.front_stencil_state.pass_op]; + frontStencilDescriptor.depthFailureOperation = SDLToMetal_StencilOp[createinfo->depth_stencil_state.front_stencil_state.depth_fail_op]; + frontStencilDescriptor.readMask = createinfo->depth_stencil_state.compare_mask; + frontStencilDescriptor.writeMask = createinfo->depth_stencil_state.write_mask; + + backStencilDescriptor = [MTLStencilDescriptor new]; + backStencilDescriptor.stencilCompareFunction = SDLToMetal_CompareOp[createinfo->depth_stencil_state.back_stencil_state.compare_op]; + backStencilDescriptor.stencilFailureOperation = SDLToMetal_StencilOp[createinfo->depth_stencil_state.back_stencil_state.fail_op]; + backStencilDescriptor.depthStencilPassOperation = SDLToMetal_StencilOp[createinfo->depth_stencil_state.back_stencil_state.pass_op]; + backStencilDescriptor.depthFailureOperation = SDLToMetal_StencilOp[createinfo->depth_stencil_state.back_stencil_state.depth_fail_op]; + backStencilDescriptor.readMask = createinfo->depth_stencil_state.compare_mask; + backStencilDescriptor.writeMask = createinfo->depth_stencil_state.write_mask; + } + + depthStencilDescriptor = [MTLDepthStencilDescriptor new]; + depthStencilDescriptor.depthCompareFunction = createinfo->depth_stencil_state.enable_depth_test ? SDLToMetal_CompareOp[createinfo->depth_stencil_state.compare_op] : MTLCompareFunctionAlways; + // Disable write when test is disabled, to match other APIs' behavior + depthStencilDescriptor.depthWriteEnabled = createinfo->depth_stencil_state.enable_depth_write && createinfo->depth_stencil_state.enable_depth_test; + depthStencilDescriptor.frontFaceStencil = frontStencilDescriptor; + depthStencilDescriptor.backFaceStencil = backStencilDescriptor; + + depthStencilState = [renderer->device newDepthStencilStateWithDescriptor:depthStencilDescriptor]; + } + + // Shaders + + pipelineDescriptor.vertexFunction = vertexShader->function; + pipelineDescriptor.fragmentFunction = fragmentShader->function; + + // Vertex Descriptor + + if (createinfo->vertex_input_state.num_vertex_buffers > 0) { + vertexDescriptor = [MTLVertexDescriptor vertexDescriptor]; + + for (Uint32 i = 0; i < createinfo->vertex_input_state.num_vertex_attributes; i += 1) { + Uint32 loc = createinfo->vertex_input_state.vertex_attributes[i].location; + vertexDescriptor.attributes[loc].format = SDLToMetal_VertexFormat[createinfo->vertex_input_state.vertex_attributes[i].format]; + vertexDescriptor.attributes[loc].offset = createinfo->vertex_input_state.vertex_attributes[i].offset; + vertexDescriptor.attributes[loc].bufferIndex = + METAL_FIRST_VERTEX_BUFFER_SLOT + createinfo->vertex_input_state.vertex_attributes[i].buffer_slot; + } + + for (Uint32 i = 0; i < createinfo->vertex_input_state.num_vertex_buffers; i += 1) { + binding = METAL_FIRST_VERTEX_BUFFER_SLOT + createinfo->vertex_input_state.vertex_buffer_descriptions[i].slot; + vertexDescriptor.layouts[binding].stepFunction = SDLToMetal_StepFunction[createinfo->vertex_input_state.vertex_buffer_descriptions[i].input_rate]; + vertexDescriptor.layouts[binding].stepRate = 1; + vertexDescriptor.layouts[binding].stride = createinfo->vertex_input_state.vertex_buffer_descriptions[i].pitch; + } + + pipelineDescriptor.vertexDescriptor = vertexDescriptor; + } + + if (renderer->debugMode && SDL_HasProperty(createinfo->props, SDL_PROP_GPU_GRAPHICSPIPELINE_CREATE_NAME_STRING)) { + const char *name = SDL_GetStringProperty(createinfo->props, SDL_PROP_GPU_GRAPHICSPIPELINE_CREATE_NAME_STRING, NULL); + pipelineDescriptor.label = @(name); + } + + // Create the graphics pipeline + + pipelineState = [renderer->device newRenderPipelineStateWithDescriptor:pipelineDescriptor error:&error]; + if (error != NULL) { + SET_ERROR_AND_RETURN("Creating render pipeline failed: %s", [[error description] UTF8String], NULL); + } + + result = SDL_calloc(1, sizeof(MetalGraphicsPipeline)); + result->handle = pipelineState; + result->depth_stencil_state = depthStencilState; + result->rasterizerState = createinfo->rasterizer_state; + result->primitiveType = createinfo->primitive_type; + result->vertexSamplerCount = vertexShader->numSamplers; + result->vertexUniformBufferCount = vertexShader->numUniformBuffers; + result->vertexStorageBufferCount = vertexShader->numStorageBuffers; + result->vertexStorageTextureCount = vertexShader->numStorageTextures; + result->fragmentSamplerCount = fragmentShader->numSamplers; + result->fragmentUniformBufferCount = fragmentShader->numUniformBuffers; + result->fragmentStorageBufferCount = fragmentShader->numStorageBuffers; + result->fragmentStorageTextureCount = fragmentShader->numStorageTextures; + return (SDL_GPUGraphicsPipeline *)result; + } +} + +// Debug Naming + +static void METAL_SetBufferName( + SDL_GPURenderer *driverData, + SDL_GPUBuffer *buffer, + const char *text) +{ + @autoreleasepool { + MetalRenderer *renderer = (MetalRenderer *)driverData; + MetalBufferContainer *container = (MetalBufferContainer *)buffer; + + if (renderer->debugMode && text != NULL) { + if (container->debugName != NULL) { + SDL_free(container->debugName); + } + + container->debugName = SDL_strdup(text); + + for (Uint32 i = 0; i < container->bufferCount; i += 1) { + container->buffers[i]->handle.label = @(text); + } + } + } +} + +static void METAL_SetTextureName( + SDL_GPURenderer *driverData, + SDL_GPUTexture *texture, + const char *text) +{ + @autoreleasepool { + MetalRenderer *renderer = (MetalRenderer *)driverData; + MetalTextureContainer *container = (MetalTextureContainer *)texture; + + if (renderer->debugMode && text != NULL) { + if (container->debugName != NULL) { + SDL_free(container->debugName); + } + + container->debugName = SDL_strdup(text); + + for (Uint32 i = 0; i < container->textureCount; i += 1) { + container->textures[i]->handle.label = @(text); + } + } + } +} + +static void METAL_InsertDebugLabel( + SDL_GPUCommandBuffer *commandBuffer, + const char *text) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + NSString *label = @(text); + + if (metalCommandBuffer->renderEncoder) { + [metalCommandBuffer->renderEncoder insertDebugSignpost:label]; + } else if (metalCommandBuffer->blitEncoder) { + [metalCommandBuffer->blitEncoder insertDebugSignpost:label]; + } else if (metalCommandBuffer->computeEncoder) { + [metalCommandBuffer->computeEncoder insertDebugSignpost:label]; + } else { + // Metal doesn't have insertDebugSignpost for command buffers... + [metalCommandBuffer->handle pushDebugGroup:label]; + [metalCommandBuffer->handle popDebugGroup]; + } + } +} + +static void METAL_PushDebugGroup( + SDL_GPUCommandBuffer *commandBuffer, + const char *name) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + NSString *label = @(name); + + if (metalCommandBuffer->renderEncoder) { + [metalCommandBuffer->renderEncoder pushDebugGroup:label]; + } else if (metalCommandBuffer->blitEncoder) { + [metalCommandBuffer->blitEncoder pushDebugGroup:label]; + } else if (metalCommandBuffer->computeEncoder) { + [metalCommandBuffer->computeEncoder pushDebugGroup:label]; + } else { + [metalCommandBuffer->handle pushDebugGroup:label]; + } + } +} + +static void METAL_PopDebugGroup( + SDL_GPUCommandBuffer *commandBuffer) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + + if (metalCommandBuffer->renderEncoder) { + [metalCommandBuffer->renderEncoder popDebugGroup]; + } else if (metalCommandBuffer->blitEncoder) { + [metalCommandBuffer->blitEncoder popDebugGroup]; + } else if (metalCommandBuffer->computeEncoder) { + [metalCommandBuffer->computeEncoder popDebugGroup]; + } else { + [metalCommandBuffer->handle popDebugGroup]; + } + } +} + +// Resource Creation + +static SDL_GPUSampler *METAL_CreateSampler( + SDL_GPURenderer *driverData, + const SDL_GPUSamplerCreateInfo *createinfo) +{ + @autoreleasepool { + MetalRenderer *renderer = (MetalRenderer *)driverData; + MTLSamplerDescriptor *samplerDesc = [MTLSamplerDescriptor new]; + id sampler; + MetalSampler *metalSampler; + + samplerDesc.sAddressMode = SDLToMetal_SamplerAddressMode[createinfo->address_mode_u]; + samplerDesc.tAddressMode = SDLToMetal_SamplerAddressMode[createinfo->address_mode_v]; + samplerDesc.rAddressMode = SDLToMetal_SamplerAddressMode[createinfo->address_mode_w]; + samplerDesc.minFilter = SDLToMetal_MinMagFilter[createinfo->min_filter]; + samplerDesc.magFilter = SDLToMetal_MinMagFilter[createinfo->mag_filter]; + samplerDesc.mipFilter = SDLToMetal_MipFilter[createinfo->mipmap_mode]; // FIXME: Is this right with non-mipmapped samplers? + samplerDesc.lodMinClamp = createinfo->min_lod; + samplerDesc.lodMaxClamp = createinfo->max_lod; + samplerDesc.maxAnisotropy = (NSUInteger)((createinfo->enable_anisotropy) ? createinfo->max_anisotropy : 1); + samplerDesc.compareFunction = (createinfo->enable_compare) ? SDLToMetal_CompareOp[createinfo->compare_op] : MTLCompareFunctionAlways; + + if (renderer->debugMode && SDL_HasProperty(createinfo->props, SDL_PROP_GPU_SAMPLER_CREATE_NAME_STRING)) { + const char *name = SDL_GetStringProperty(createinfo->props, SDL_PROP_GPU_SAMPLER_CREATE_NAME_STRING, NULL); + samplerDesc.label = @(name); + } + + sampler = [renderer->device newSamplerStateWithDescriptor:samplerDesc]; + if (sampler == NULL) { + SET_STRING_ERROR_AND_RETURN("Failed to create sampler", NULL); + } + + metalSampler = (MetalSampler *)SDL_calloc(1, sizeof(MetalSampler)); + metalSampler->handle = sampler; + return (SDL_GPUSampler *)metalSampler; + } +} + +static SDL_GPUShader *METAL_CreateShader( + SDL_GPURenderer *driverData, + const SDL_GPUShaderCreateInfo *createinfo) +{ + @autoreleasepool { + MetalLibraryFunction libraryFunction; + MetalShader *result; + + libraryFunction = METAL_INTERNAL_CompileShader( + (MetalRenderer *)driverData, + createinfo->format, + createinfo->code, + createinfo->code_size, + createinfo->entrypoint); + + if (libraryFunction.library == nil || libraryFunction.function == nil) { + return NULL; + } + + result = SDL_calloc(1, sizeof(MetalShader)); + result->library = libraryFunction.library; + result->function = libraryFunction.function; + result->stage = createinfo->stage; + result->numSamplers = createinfo->num_samplers; + result->numStorageBuffers = createinfo->num_storage_buffers; + result->numStorageTextures = createinfo->num_storage_textures; + result->numUniformBuffers = createinfo->num_uniform_buffers; + return (SDL_GPUShader *)result; + } +} + +// This function assumes that it's called from within an autorelease pool +static MetalTexture *METAL_INTERNAL_CreateTexture( + MetalRenderer *renderer, + const SDL_GPUTextureCreateInfo *createinfo) +{ + MTLTextureDescriptor *textureDescriptor = [MTLTextureDescriptor new]; + id texture; + MetalTexture *metalTexture; + + textureDescriptor.textureType = SDLToMetal_TextureType(createinfo->type, createinfo->sample_count > SDL_GPU_SAMPLECOUNT_1); + textureDescriptor.pixelFormat = SDLToMetal_TextureFormat(createinfo->format); + // This format isn't natively supported so let's swizzle! + if (createinfo->format == SDL_GPU_TEXTUREFORMAT_B4G4R4A4_UNORM) { + if (@available(macOS 10.15, iOS 13.0, tvOS 13.0, *)) { + textureDescriptor.swizzle = MTLTextureSwizzleChannelsMake(MTLTextureSwizzleBlue, + MTLTextureSwizzleGreen, + MTLTextureSwizzleRed, + MTLTextureSwizzleAlpha); + } else { + SET_STRING_ERROR_AND_RETURN("SDL_GPU_TEXTUREFORMAT_B4G4R4A4_UNORM is not supported", NULL); + } + } + + textureDescriptor.width = createinfo->width; + textureDescriptor.height = createinfo->height; + textureDescriptor.depth = (createinfo->type == SDL_GPU_TEXTURETYPE_3D) ? createinfo->layer_count_or_depth : 1; + textureDescriptor.mipmapLevelCount = createinfo->num_levels; + textureDescriptor.sampleCount = SDLToMetal_SampleCount[createinfo->sample_count]; + textureDescriptor.arrayLength = + (createinfo->type == SDL_GPU_TEXTURETYPE_2D_ARRAY || createinfo->type == SDL_GPU_TEXTURETYPE_CUBE_ARRAY) + ? createinfo->layer_count_or_depth + : 1; + textureDescriptor.storageMode = MTLStorageModePrivate; + + textureDescriptor.usage = 0; + if (createinfo->usage & (SDL_GPU_TEXTUREUSAGE_COLOR_TARGET | + SDL_GPU_TEXTUREUSAGE_DEPTH_STENCIL_TARGET)) { + textureDescriptor.usage |= MTLTextureUsageRenderTarget; + } + if (createinfo->usage & (SDL_GPU_TEXTUREUSAGE_SAMPLER | + SDL_GPU_TEXTUREUSAGE_GRAPHICS_STORAGE_READ | + SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_READ)) { + textureDescriptor.usage |= MTLTextureUsageShaderRead; + } + if (createinfo->usage & (SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE | + SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE)) { + textureDescriptor.usage |= MTLTextureUsageShaderWrite; + } + + texture = [renderer->device newTextureWithDescriptor:textureDescriptor]; + if (texture == NULL) { + SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to create MTLTexture!"); + return NULL; + } + + metalTexture = (MetalTexture *)SDL_calloc(1, sizeof(MetalTexture)); + metalTexture->handle = texture; + SDL_SetAtomicInt(&metalTexture->referenceCount, 0); + + if (renderer->debugMode && SDL_HasProperty(createinfo->props, SDL_PROP_GPU_TEXTURE_CREATE_NAME_STRING)) { + metalTexture->handle.label = @(SDL_GetStringProperty(createinfo->props, SDL_PROP_GPU_TEXTURE_CREATE_NAME_STRING, NULL)); + } + + return metalTexture; +} + +static bool METAL_SupportsSampleCount( + SDL_GPURenderer *driverData, + SDL_GPUTextureFormat format, + SDL_GPUSampleCount sampleCount) +{ + @autoreleasepool { + MetalRenderer *renderer = (MetalRenderer *)driverData; + NSUInteger mtlSampleCount = SDLToMetal_SampleCount[sampleCount]; + return [renderer->device supportsTextureSampleCount:mtlSampleCount]; + } +} + +static SDL_GPUTexture *METAL_CreateTexture( + SDL_GPURenderer *driverData, + const SDL_GPUTextureCreateInfo *createinfo) +{ + @autoreleasepool { + MetalRenderer *renderer = (MetalRenderer *)driverData; + MetalTextureContainer *container; + MetalTexture *texture; + + texture = METAL_INTERNAL_CreateTexture( + renderer, + createinfo); + + if (texture == NULL) { + SET_STRING_ERROR_AND_RETURN("Failed to create texture", NULL); + } + + container = SDL_calloc(1, sizeof(MetalTextureContainer)); + container->canBeCycled = 1; + + // Copy properties so we don't lose information when the client destroys them + container->header.info = *createinfo; + container->header.info.props = SDL_CreateProperties(); + SDL_CopyProperties(createinfo->props, container->header.info.props); + + container->activeTexture = texture; + container->textureCapacity = 1; + container->textureCount = 1; + container->textures = SDL_calloc( + container->textureCapacity, sizeof(MetalTexture *)); + container->textures[0] = texture; + container->debugName = NULL; + + if (SDL_HasProperty(createinfo->props, SDL_PROP_GPU_TEXTURE_CREATE_NAME_STRING)) { + container->debugName = SDL_strdup(SDL_GetStringProperty(createinfo->props, SDL_PROP_GPU_TEXTURE_CREATE_NAME_STRING, NULL)); + } + + return (SDL_GPUTexture *)container; + } +} + +// This function assumes that it's called from within an autorelease pool +static MetalTexture *METAL_INTERNAL_PrepareTextureForWrite( + MetalRenderer *renderer, + MetalTextureContainer *container, + bool cycle) +{ + Uint32 i; + + // Cycle the active texture handle if needed + if (cycle && container->canBeCycled) { + for (i = 0; i < container->textureCount; i += 1) { + if (SDL_GetAtomicInt(&container->textures[i]->referenceCount) == 0) { + container->activeTexture = container->textures[i]; + return container->activeTexture; + } + } + + EXPAND_ARRAY_IF_NEEDED( + container->textures, + MetalTexture *, + container->textureCount + 1, + container->textureCapacity, + container->textureCapacity + 1); + + container->textures[container->textureCount] = METAL_INTERNAL_CreateTexture( + renderer, + &container->header.info); + container->textureCount += 1; + + container->activeTexture = container->textures[container->textureCount - 1]; + } + + return container->activeTexture; +} + +// This function assumes that it's called from within an autorelease pool +static MetalBuffer *METAL_INTERNAL_CreateBuffer( + MetalRenderer *renderer, + Uint32 size, + MTLResourceOptions resourceOptions, + const char *debugName) +{ + id bufferHandle; + MetalBuffer *metalBuffer; + + // Storage buffers have to be 4-aligned, so might as well align them all + size = METAL_INTERNAL_NextHighestAlignment(size, 4); + + bufferHandle = [renderer->device newBufferWithLength:size options:resourceOptions]; + if (bufferHandle == NULL) { + SDL_LogError(SDL_LOG_CATEGORY_GPU, "Could not create buffer"); + return NULL; + } + + metalBuffer = SDL_calloc(1, sizeof(MetalBuffer)); + metalBuffer->handle = bufferHandle; + SDL_SetAtomicInt(&metalBuffer->referenceCount, 0); + + if (debugName != NULL) { + metalBuffer->handle.label = @(debugName); + } + + return metalBuffer; +} + +// This function assumes that it's called from within an autorelease pool +static MetalBufferContainer *METAL_INTERNAL_CreateBufferContainer( + MetalRenderer *renderer, + Uint32 size, + bool isPrivate, + bool isWriteOnly, + const char *debugName) +{ + MetalBufferContainer *container = SDL_calloc(1, sizeof(MetalBufferContainer)); + MTLResourceOptions resourceOptions; + + container->size = size; + container->bufferCapacity = 1; + container->bufferCount = 1; + container->buffers = SDL_calloc( + container->bufferCapacity, sizeof(MetalBuffer *)); + container->isPrivate = isPrivate; + container->isWriteOnly = isWriteOnly; + container->debugName = NULL; + if (container->debugName != NULL) { + container->debugName = SDL_strdup(debugName); + } + + if (isPrivate) { + resourceOptions = MTLResourceStorageModePrivate; + } else { + if (isWriteOnly) { + resourceOptions = MTLResourceCPUCacheModeWriteCombined; + } else { + resourceOptions = MTLResourceCPUCacheModeDefaultCache; + } + } + + container->buffers[0] = METAL_INTERNAL_CreateBuffer( + renderer, + size, + resourceOptions, + debugName); + + container->activeBuffer = container->buffers[0]; + + return container; +} + +static SDL_GPUBuffer *METAL_CreateBuffer( + SDL_GPURenderer *driverData, + SDL_GPUBufferUsageFlags usage, + Uint32 size, + const char *debugName) +{ + @autoreleasepool { + return (SDL_GPUBuffer *)METAL_INTERNAL_CreateBufferContainer( + (MetalRenderer *)driverData, + size, + true, + false, + debugName); + } +} + +static SDL_GPUTransferBuffer *METAL_CreateTransferBuffer( + SDL_GPURenderer *driverData, + SDL_GPUTransferBufferUsage usage, + Uint32 size, + const char *debugName) +{ + @autoreleasepool { + return (SDL_GPUTransferBuffer *)METAL_INTERNAL_CreateBufferContainer( + (MetalRenderer *)driverData, + size, + false, + usage == SDL_GPU_TRANSFERBUFFERUSAGE_UPLOAD, + debugName); + } +} + +// This function assumes that it's called from within an autorelease pool +static MetalUniformBuffer *METAL_INTERNAL_CreateUniformBuffer( + MetalRenderer *renderer, + Uint32 size) +{ + MetalUniformBuffer *uniformBuffer; + id bufferHandle; + + bufferHandle = [renderer->device newBufferWithLength:size options:MTLResourceCPUCacheModeWriteCombined]; + if (bufferHandle == nil) { + SDL_LogError(SDL_LOG_CATEGORY_GPU, "Could not create uniform buffer"); + return NULL; + } + + uniformBuffer = SDL_calloc(1, sizeof(MetalUniformBuffer)); + uniformBuffer->handle = bufferHandle; + uniformBuffer->writeOffset = 0; + uniformBuffer->drawOffset = 0; + + return uniformBuffer; +} + +// This function assumes that it's called from within an autorelease pool +static MetalBuffer *METAL_INTERNAL_PrepareBufferForWrite( + MetalRenderer *renderer, + MetalBufferContainer *container, + bool cycle) +{ + MTLResourceOptions resourceOptions; + Uint32 i; + + // Cycle if needed + if (cycle && SDL_GetAtomicInt(&container->activeBuffer->referenceCount) > 0) { + for (i = 0; i < container->bufferCount; i += 1) { + if (SDL_GetAtomicInt(&container->buffers[i]->referenceCount) == 0) { + container->activeBuffer = container->buffers[i]; + return container->activeBuffer; + } + } + + EXPAND_ARRAY_IF_NEEDED( + container->buffers, + MetalBuffer *, + container->bufferCount + 1, + container->bufferCapacity, + container->bufferCapacity + 1); + + if (container->isPrivate) { + resourceOptions = MTLResourceStorageModePrivate; + } else { + if (container->isWriteOnly) { + resourceOptions = MTLResourceCPUCacheModeWriteCombined; + } else { + resourceOptions = MTLResourceCPUCacheModeDefaultCache; + } + } + + container->buffers[container->bufferCount] = METAL_INTERNAL_CreateBuffer( + renderer, + container->size, + resourceOptions, + container->debugName); + container->bufferCount += 1; + + container->activeBuffer = container->buffers[container->bufferCount - 1]; + } + + return container->activeBuffer; +} + +// TransferBuffer Data + +static void *METAL_MapTransferBuffer( + SDL_GPURenderer *driverData, + SDL_GPUTransferBuffer *transferBuffer, + bool cycle) +{ + @autoreleasepool { + MetalRenderer *renderer = (MetalRenderer *)driverData; + MetalBufferContainer *container = (MetalBufferContainer *)transferBuffer; + MetalBuffer *buffer = METAL_INTERNAL_PrepareBufferForWrite(renderer, container, cycle); + return [buffer->handle contents]; + } +} + +static void METAL_UnmapTransferBuffer( + SDL_GPURenderer *driverData, + SDL_GPUTransferBuffer *transferBuffer) +{ +#ifdef SDL_PLATFORM_MACOS + @autoreleasepool { + // FIXME: Is this necessary? + MetalBufferContainer *container = (MetalBufferContainer *)transferBuffer; + MetalBuffer *buffer = container->activeBuffer; + if (buffer->handle.storageMode == MTLStorageModeManaged) { + [buffer->handle didModifyRange:NSMakeRange(0, container->size)]; + } + } +#endif +} + +// Copy Pass + +static void METAL_BeginCopyPass( + SDL_GPUCommandBuffer *commandBuffer) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + metalCommandBuffer->blitEncoder = [metalCommandBuffer->handle blitCommandEncoder]; + } +} + +static void METAL_UploadToTexture( + SDL_GPUCommandBuffer *commandBuffer, + const SDL_GPUTextureTransferInfo *source, + const SDL_GPUTextureRegion *destination, + bool cycle) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalRenderer *renderer = metalCommandBuffer->renderer; + MetalBufferContainer *bufferContainer = (MetalBufferContainer *)source->transfer_buffer; + MetalTextureContainer *textureContainer = (MetalTextureContainer *)destination->texture; + + MetalTexture *metalTexture = METAL_INTERNAL_PrepareTextureForWrite(renderer, textureContainer, cycle); + + [metalCommandBuffer->blitEncoder + copyFromBuffer:bufferContainer->activeBuffer->handle + sourceOffset:source->offset + sourceBytesPerRow:BytesPerRow(destination->w, textureContainer->header.info.format) + sourceBytesPerImage:SDL_CalculateGPUTextureFormatSize(textureContainer->header.info.format, destination->w, destination->h, destination->d) + sourceSize:MTLSizeMake(destination->w, destination->h, destination->d) + toTexture:metalTexture->handle + destinationSlice:destination->layer + destinationLevel:destination->mip_level + destinationOrigin:MTLOriginMake(destination->x, destination->y, destination->z)]; + + METAL_INTERNAL_TrackTexture(metalCommandBuffer, metalTexture); + METAL_INTERNAL_TrackBuffer(metalCommandBuffer, bufferContainer->activeBuffer); + } +} + +static void METAL_UploadToBuffer( + SDL_GPUCommandBuffer *commandBuffer, + const SDL_GPUTransferBufferLocation *source, + const SDL_GPUBufferRegion *destination, + bool cycle) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalRenderer *renderer = metalCommandBuffer->renderer; + MetalBufferContainer *transferContainer = (MetalBufferContainer *)source->transfer_buffer; + MetalBufferContainer *bufferContainer = (MetalBufferContainer *)destination->buffer; + + MetalBuffer *metalBuffer = METAL_INTERNAL_PrepareBufferForWrite( + renderer, + bufferContainer, + cycle); + + [metalCommandBuffer->blitEncoder + copyFromBuffer:transferContainer->activeBuffer->handle + sourceOffset:source->offset + toBuffer:metalBuffer->handle + destinationOffset:destination->offset + size:destination->size]; + + METAL_INTERNAL_TrackBuffer(metalCommandBuffer, metalBuffer); + METAL_INTERNAL_TrackBuffer(metalCommandBuffer, transferContainer->activeBuffer); + } +} + +static void METAL_CopyTextureToTexture( + SDL_GPUCommandBuffer *commandBuffer, + const SDL_GPUTextureLocation *source, + const SDL_GPUTextureLocation *destination, + Uint32 w, + Uint32 h, + Uint32 d, + bool cycle) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalRenderer *renderer = metalCommandBuffer->renderer; + MetalTextureContainer *srcContainer = (MetalTextureContainer *)source->texture; + MetalTextureContainer *dstContainer = (MetalTextureContainer *)destination->texture; + + MetalTexture *srcTexture = srcContainer->activeTexture; + MetalTexture *dstTexture = METAL_INTERNAL_PrepareTextureForWrite( + renderer, + dstContainer, + cycle); + + [metalCommandBuffer->blitEncoder + copyFromTexture:srcTexture->handle + sourceSlice:source->layer + sourceLevel:source->mip_level + sourceOrigin:MTLOriginMake(source->x, source->y, source->z) + sourceSize:MTLSizeMake(w, h, d) + toTexture:dstTexture->handle + destinationSlice:destination->layer + destinationLevel:destination->mip_level + destinationOrigin:MTLOriginMake(destination->x, destination->y, destination->z)]; + + METAL_INTERNAL_TrackTexture(metalCommandBuffer, srcTexture); + METAL_INTERNAL_TrackTexture(metalCommandBuffer, dstTexture); + } +} + +static void METAL_CopyBufferToBuffer( + SDL_GPUCommandBuffer *commandBuffer, + const SDL_GPUBufferLocation *source, + const SDL_GPUBufferLocation *destination, + Uint32 size, + bool cycle) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalRenderer *renderer = metalCommandBuffer->renderer; + MetalBufferContainer *srcContainer = (MetalBufferContainer *)source->buffer; + MetalBufferContainer *dstContainer = (MetalBufferContainer *)destination->buffer; + + MetalBuffer *srcBuffer = srcContainer->activeBuffer; + MetalBuffer *dstBuffer = METAL_INTERNAL_PrepareBufferForWrite( + renderer, + dstContainer, + cycle); + + [metalCommandBuffer->blitEncoder + copyFromBuffer:srcBuffer->handle + sourceOffset:source->offset + toBuffer:dstBuffer->handle + destinationOffset:destination->offset + size:size]; + + METAL_INTERNAL_TrackBuffer(metalCommandBuffer, srcBuffer); + METAL_INTERNAL_TrackBuffer(metalCommandBuffer, dstBuffer); + } +} + +static void METAL_DownloadFromTexture( + SDL_GPUCommandBuffer *commandBuffer, + const SDL_GPUTextureRegion *source, + const SDL_GPUTextureTransferInfo *destination) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalRenderer *renderer = metalCommandBuffer->renderer; + MetalTextureContainer *textureContainer = (MetalTextureContainer *)source->texture; + MetalTexture *metalTexture = textureContainer->activeTexture; + MetalBufferContainer *bufferContainer = (MetalBufferContainer *)destination->transfer_buffer; + Uint32 bufferStride = destination->pixels_per_row; + Uint32 bufferImageHeight = destination->rows_per_layer; + Uint32 bytesPerRow, bytesPerDepthSlice; + + MetalBuffer *dstBuffer = METAL_INTERNAL_PrepareBufferForWrite( + renderer, + bufferContainer, + false); + + MTLOrigin regionOrigin = MTLOriginMake( + source->x, + source->y, + source->z); + + MTLSize regionSize = MTLSizeMake( + source->w, + source->h, + source->d); + + if (bufferStride == 0 || bufferImageHeight == 0) { + bufferStride = source->w; + bufferImageHeight = source->h; + } + + bytesPerRow = BytesPerRow(bufferStride, textureContainer->header.info.format); + bytesPerDepthSlice = bytesPerRow * bufferImageHeight; + + [metalCommandBuffer->blitEncoder + copyFromTexture:metalTexture->handle + sourceSlice:source->layer + sourceLevel:source->mip_level + sourceOrigin:regionOrigin + sourceSize:regionSize + toBuffer:dstBuffer->handle + destinationOffset:destination->offset + destinationBytesPerRow:bytesPerRow + destinationBytesPerImage:bytesPerDepthSlice]; + + METAL_INTERNAL_TrackTexture(metalCommandBuffer, metalTexture); + METAL_INTERNAL_TrackBuffer(metalCommandBuffer, dstBuffer); + } +} + +static void METAL_DownloadFromBuffer( + SDL_GPUCommandBuffer *commandBuffer, + const SDL_GPUBufferRegion *source, + const SDL_GPUTransferBufferLocation *destination) +{ + SDL_GPUBufferLocation sourceLocation; + sourceLocation.buffer = source->buffer; + sourceLocation.offset = source->offset; + + METAL_CopyBufferToBuffer( + commandBuffer, + &sourceLocation, + (SDL_GPUBufferLocation *)destination, + source->size, + false); +} + +static void METAL_EndCopyPass( + SDL_GPUCommandBuffer *commandBuffer) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + [metalCommandBuffer->blitEncoder endEncoding]; + metalCommandBuffer->blitEncoder = nil; + } +} + +static void METAL_GenerateMipmaps( + SDL_GPUCommandBuffer *commandBuffer, + SDL_GPUTexture *texture) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalTextureContainer *container = (MetalTextureContainer *)texture; + MetalTexture *metalTexture = container->activeTexture; + + METAL_BeginCopyPass(commandBuffer); + [metalCommandBuffer->blitEncoder + generateMipmapsForTexture:metalTexture->handle]; + METAL_EndCopyPass(commandBuffer); + + METAL_INTERNAL_TrackTexture(metalCommandBuffer, metalTexture); + } +} + +// Graphics State + +static void METAL_INTERNAL_AllocateCommandBuffers( + MetalRenderer *renderer, + Uint32 allocateCount) +{ + MetalCommandBuffer *commandBuffer; + + renderer->availableCommandBufferCapacity += allocateCount; + + renderer->availableCommandBuffers = SDL_realloc( + renderer->availableCommandBuffers, + sizeof(MetalCommandBuffer *) * renderer->availableCommandBufferCapacity); + + for (Uint32 i = 0; i < allocateCount; i += 1) { + commandBuffer = SDL_calloc(1, sizeof(MetalCommandBuffer)); + commandBuffer->renderer = renderer; + + // The native Metal command buffer is created in METAL_AcquireCommandBuffer + + commandBuffer->windowDataCapacity = 1; + commandBuffer->windowDataCount = 0; + commandBuffer->windowDatas = SDL_calloc( + commandBuffer->windowDataCapacity, sizeof(MetalWindowData *)); + + // Reference Counting + commandBuffer->usedBufferCapacity = 4; + commandBuffer->usedBufferCount = 0; + commandBuffer->usedBuffers = SDL_calloc( + commandBuffer->usedBufferCapacity, sizeof(MetalBuffer *)); + + commandBuffer->usedTextureCapacity = 4; + commandBuffer->usedTextureCount = 0; + commandBuffer->usedTextures = SDL_calloc( + commandBuffer->usedTextureCapacity, sizeof(MetalTexture *)); + + renderer->availableCommandBuffers[renderer->availableCommandBufferCount] = commandBuffer; + renderer->availableCommandBufferCount += 1; + } +} + +static MetalCommandBuffer *METAL_INTERNAL_GetInactiveCommandBufferFromPool( + MetalRenderer *renderer) +{ + MetalCommandBuffer *commandBuffer; + + if (renderer->availableCommandBufferCount == 0) { + METAL_INTERNAL_AllocateCommandBuffers( + renderer, + renderer->availableCommandBufferCapacity); + } + + commandBuffer = renderer->availableCommandBuffers[renderer->availableCommandBufferCount - 1]; + renderer->availableCommandBufferCount -= 1; + + return commandBuffer; +} + +static Uint8 METAL_INTERNAL_CreateFence( + MetalRenderer *renderer) +{ + MetalFence *fence; + + fence = SDL_calloc(1, sizeof(MetalFence)); + SDL_SetAtomicInt(&fence->complete, 0); + SDL_SetAtomicInt(&fence->referenceCount, 0); + + // Add it to the available pool + // FIXME: Should this be EXPAND_IF_NEEDED? + if (renderer->availableFenceCount >= renderer->availableFenceCapacity) { + renderer->availableFenceCapacity *= 2; + + renderer->availableFences = SDL_realloc( + renderer->availableFences, + sizeof(MetalFence *) * renderer->availableFenceCapacity); + } + + renderer->availableFences[renderer->availableFenceCount] = fence; + renderer->availableFenceCount += 1; + + return 1; +} + +static bool METAL_INTERNAL_AcquireFence( + MetalRenderer *renderer, + MetalCommandBuffer *commandBuffer) +{ + MetalFence *fence; + + // Acquire a fence from the pool + SDL_LockMutex(renderer->fenceLock); + + if (renderer->availableFenceCount == 0) { + if (!METAL_INTERNAL_CreateFence(renderer)) { + SDL_UnlockMutex(renderer->fenceLock); + SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to create fence!"); + return false; + } + } + + fence = renderer->availableFences[renderer->availableFenceCount - 1]; + renderer->availableFenceCount -= 1; + + SDL_UnlockMutex(renderer->fenceLock); + + // Associate the fence with the command buffer + commandBuffer->fence = fence; + SDL_SetAtomicInt(&fence->complete, 0); // FIXME: Is this right? + (void)SDL_AtomicIncRef(&commandBuffer->fence->referenceCount); + + return true; +} + +static SDL_GPUCommandBuffer *METAL_AcquireCommandBuffer( + SDL_GPURenderer *driverData) +{ + @autoreleasepool { + MetalRenderer *renderer = (MetalRenderer *)driverData; + MetalCommandBuffer *commandBuffer; + + SDL_LockMutex(renderer->acquireCommandBufferLock); + + commandBuffer = METAL_INTERNAL_GetInactiveCommandBufferFromPool(renderer); + commandBuffer->handle = [renderer->queue commandBuffer]; + + commandBuffer->graphics_pipeline = NULL; + commandBuffer->compute_pipeline = NULL; + for (Uint32 i = 0; i < MAX_UNIFORM_BUFFERS_PER_STAGE; i += 1) { + commandBuffer->vertexUniformBuffers[i] = NULL; + commandBuffer->fragmentUniformBuffers[i] = NULL; + commandBuffer->computeUniformBuffers[i] = NULL; + } + + commandBuffer->autoReleaseFence = true; + + SDL_UnlockMutex(renderer->acquireCommandBufferLock); + + return (SDL_GPUCommandBuffer *)commandBuffer; + } +} + +// This function assumes that it's called from within an autorelease pool +static MetalUniformBuffer *METAL_INTERNAL_AcquireUniformBufferFromPool( + MetalCommandBuffer *commandBuffer) +{ + MetalRenderer *renderer = commandBuffer->renderer; + MetalUniformBuffer *uniformBuffer; + + SDL_LockMutex(renderer->acquireUniformBufferLock); + + if (renderer->uniformBufferPoolCount > 0) { + uniformBuffer = renderer->uniformBufferPool[renderer->uniformBufferPoolCount - 1]; + renderer->uniformBufferPoolCount -= 1; + } else { + uniformBuffer = METAL_INTERNAL_CreateUniformBuffer( + renderer, + UNIFORM_BUFFER_SIZE); + } + + SDL_UnlockMutex(renderer->acquireUniformBufferLock); + + METAL_INTERNAL_TrackUniformBuffer(commandBuffer, uniformBuffer); + + return uniformBuffer; +} + +static void METAL_INTERNAL_ReturnUniformBufferToPool( + MetalRenderer *renderer, + MetalUniformBuffer *uniformBuffer) +{ + if (renderer->uniformBufferPoolCount >= renderer->uniformBufferPoolCapacity) { + renderer->uniformBufferPoolCapacity *= 2; + renderer->uniformBufferPool = SDL_realloc( + renderer->uniformBufferPool, + renderer->uniformBufferPoolCapacity * sizeof(MetalUniformBuffer *)); + } + + renderer->uniformBufferPool[renderer->uniformBufferPoolCount] = uniformBuffer; + renderer->uniformBufferPoolCount += 1; + + uniformBuffer->writeOffset = 0; + uniformBuffer->drawOffset = 0; +} + +static void METAL_SetViewport( + SDL_GPUCommandBuffer *commandBuffer, + const SDL_GPUViewport *viewport) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MTLViewport metalViewport; + + metalViewport.originX = viewport->x; + metalViewport.originY = viewport->y; + metalViewport.width = viewport->w; + metalViewport.height = viewport->h; + metalViewport.znear = viewport->min_depth; + metalViewport.zfar = viewport->max_depth; + + [metalCommandBuffer->renderEncoder setViewport:metalViewport]; + } +} + +static void METAL_SetScissor( + SDL_GPUCommandBuffer *commandBuffer, + const SDL_Rect *scissor) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MTLScissorRect metalScissor; + + metalScissor.x = scissor->x; + metalScissor.y = scissor->y; + metalScissor.width = scissor->w; + metalScissor.height = scissor->h; + + [metalCommandBuffer->renderEncoder setScissorRect:metalScissor]; + } +} + +static void METAL_SetBlendConstants( + SDL_GPUCommandBuffer *commandBuffer, + SDL_FColor blendConstants) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + [metalCommandBuffer->renderEncoder setBlendColorRed:blendConstants.r + green:blendConstants.g + blue:blendConstants.b + alpha:blendConstants.a]; + } +} + +static void METAL_SetStencilReference( + SDL_GPUCommandBuffer *commandBuffer, + Uint8 reference) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + [metalCommandBuffer->renderEncoder setStencilReferenceValue:reference]; + } +} + +static void METAL_BeginRenderPass( + SDL_GPUCommandBuffer *commandBuffer, + const SDL_GPUColorTargetInfo *colorTargetInfos, + Uint32 numColorTargets, + const SDL_GPUDepthStencilTargetInfo *depthStencilTargetInfo) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalRenderer *renderer = metalCommandBuffer->renderer; + MTLRenderPassDescriptor *passDescriptor = [MTLRenderPassDescriptor renderPassDescriptor]; + Uint32 vpWidth = UINT_MAX; + Uint32 vpHeight = UINT_MAX; + SDL_GPUViewport viewport; + SDL_Rect scissorRect; + SDL_FColor blendConstants; + + for (Uint32 i = 0; i < numColorTargets; i += 1) { + MetalTextureContainer *container = (MetalTextureContainer *)colorTargetInfos[i].texture; + MetalTexture *texture = METAL_INTERNAL_PrepareTextureForWrite( + renderer, + container, + colorTargetInfos[i].cycle); + + passDescriptor.colorAttachments[i].texture = texture->handle; + passDescriptor.colorAttachments[i].level = colorTargetInfos[i].mip_level; + if (container->header.info.type == SDL_GPU_TEXTURETYPE_3D) { + passDescriptor.colorAttachments[i].depthPlane = colorTargetInfos[i].layer_or_depth_plane; + } else { + passDescriptor.colorAttachments[i].slice = colorTargetInfos[i].layer_or_depth_plane; + } + passDescriptor.colorAttachments[i].clearColor = MTLClearColorMake( + colorTargetInfos[i].clear_color.r, + colorTargetInfos[i].clear_color.g, + colorTargetInfos[i].clear_color.b, + colorTargetInfos[i].clear_color.a); + passDescriptor.colorAttachments[i].loadAction = SDLToMetal_LoadOp[colorTargetInfos[i].load_op]; + passDescriptor.colorAttachments[i].storeAction = SDLToMetal_StoreOp[colorTargetInfos[i].store_op]; + + METAL_INTERNAL_TrackTexture(metalCommandBuffer, texture); + + if (colorTargetInfos[i].store_op == SDL_GPU_STOREOP_RESOLVE || colorTargetInfos[i].store_op == SDL_GPU_STOREOP_RESOLVE_AND_STORE) { + MetalTextureContainer *resolveContainer = (MetalTextureContainer *)colorTargetInfos[i].resolve_texture; + MetalTexture *resolveTexture = METAL_INTERNAL_PrepareTextureForWrite( + renderer, + resolveContainer, + colorTargetInfos[i].cycle_resolve_texture); + + passDescriptor.colorAttachments[i].resolveTexture = resolveTexture->handle; + passDescriptor.colorAttachments[i].resolveSlice = colorTargetInfos[i].resolve_layer; + passDescriptor.colorAttachments[i].resolveLevel = colorTargetInfos[i].resolve_mip_level; + + METAL_INTERNAL_TrackTexture(metalCommandBuffer, resolveTexture); + } + } + + if (depthStencilTargetInfo != NULL) { + MetalTextureContainer *container = (MetalTextureContainer *)depthStencilTargetInfo->texture; + MetalTexture *texture = METAL_INTERNAL_PrepareTextureForWrite( + renderer, + container, + depthStencilTargetInfo->cycle); + + passDescriptor.depthAttachment.texture = texture->handle; + passDescriptor.depthAttachment.loadAction = SDLToMetal_LoadOp[depthStencilTargetInfo->load_op]; + passDescriptor.depthAttachment.storeAction = SDLToMetal_StoreOp[depthStencilTargetInfo->store_op]; + passDescriptor.depthAttachment.clearDepth = depthStencilTargetInfo->clear_depth; + + if (IsStencilFormat(container->header.info.format)) { + passDescriptor.stencilAttachment.texture = texture->handle; + passDescriptor.stencilAttachment.loadAction = SDLToMetal_LoadOp[depthStencilTargetInfo->stencil_load_op]; + passDescriptor.stencilAttachment.storeAction = SDLToMetal_StoreOp[depthStencilTargetInfo->stencil_store_op]; + passDescriptor.stencilAttachment.clearStencil = depthStencilTargetInfo->clear_stencil; + } + + METAL_INTERNAL_TrackTexture(metalCommandBuffer, texture); + } + + metalCommandBuffer->renderEncoder = [metalCommandBuffer->handle renderCommandEncoderWithDescriptor:passDescriptor]; + + // The viewport cannot be larger than the smallest target. + for (Uint32 i = 0; i < numColorTargets; i += 1) { + MetalTextureContainer *container = (MetalTextureContainer *)colorTargetInfos[i].texture; + Uint32 w = container->header.info.width >> colorTargetInfos[i].mip_level; + Uint32 h = container->header.info.height >> colorTargetInfos[i].mip_level; + + if (w < vpWidth) { + vpWidth = w; + } + + if (h < vpHeight) { + vpHeight = h; + } + } + + if (depthStencilTargetInfo != NULL) { + MetalTextureContainer *container = (MetalTextureContainer *)depthStencilTargetInfo->texture; + Uint32 w = container->header.info.width; + Uint32 h = container->header.info.height; + + if (w < vpWidth) { + vpWidth = w; + } + + if (h < vpHeight) { + vpHeight = h; + } + } + + // Set sensible default states + viewport.x = 0; + viewport.y = 0; + viewport.w = vpWidth; + viewport.h = vpHeight; + viewport.min_depth = 0; + viewport.max_depth = 1; + METAL_SetViewport(commandBuffer, &viewport); + + scissorRect.x = 0; + scissorRect.y = 0; + scissorRect.w = vpWidth; + scissorRect.h = vpHeight; + METAL_SetScissor(commandBuffer, &scissorRect); + + blendConstants.r = 1.0f; + blendConstants.g = 1.0f; + blendConstants.b = 1.0f; + blendConstants.a = 1.0f; + METAL_SetBlendConstants( + commandBuffer, + blendConstants); + + METAL_SetStencilReference( + commandBuffer, + 0); + } +} + +static void METAL_BindGraphicsPipeline( + SDL_GPUCommandBuffer *commandBuffer, + SDL_GPUGraphicsPipeline *graphicsPipeline) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalGraphicsPipeline *pipeline = (MetalGraphicsPipeline *)graphicsPipeline; + SDL_GPURasterizerState *rast = &pipeline->rasterizerState; + Uint32 i; + + metalCommandBuffer->graphics_pipeline = pipeline; + + [metalCommandBuffer->renderEncoder setRenderPipelineState:pipeline->handle]; + + // Apply rasterizer state + [metalCommandBuffer->renderEncoder setTriangleFillMode:SDLToMetal_PolygonMode[pipeline->rasterizerState.fill_mode]]; + [metalCommandBuffer->renderEncoder setCullMode:SDLToMetal_CullMode[pipeline->rasterizerState.cull_mode]]; + [metalCommandBuffer->renderEncoder setFrontFacingWinding:SDLToMetal_FrontFace[pipeline->rasterizerState.front_face]]; + [metalCommandBuffer->renderEncoder setDepthClipMode:SDLToMetal_DepthClipMode(pipeline->rasterizerState.enable_depth_clip)]; + [metalCommandBuffer->renderEncoder + setDepthBias:((rast->enable_depth_bias) ? rast->depth_bias_constant_factor : 0) + slopeScale:((rast->enable_depth_bias) ? rast->depth_bias_slope_factor : 0) + clamp:((rast->enable_depth_bias) ? rast->depth_bias_clamp : 0)]; + + // Apply depth-stencil state + if (pipeline->depth_stencil_state != NULL) { + [metalCommandBuffer->renderEncoder + setDepthStencilState:pipeline->depth_stencil_state]; + } + + for (i = 0; i < MAX_UNIFORM_BUFFERS_PER_STAGE; i += 1) { + metalCommandBuffer->needVertexUniformBufferBind[i] = true; + metalCommandBuffer->needFragmentUniformBufferBind[i] = true; + } + + for (i = 0; i < pipeline->vertexUniformBufferCount; i += 1) { + if (metalCommandBuffer->vertexUniformBuffers[i] == NULL) { + metalCommandBuffer->vertexUniformBuffers[i] = METAL_INTERNAL_AcquireUniformBufferFromPool( + metalCommandBuffer); + } + } + + for (i = 0; i < pipeline->fragmentUniformBufferCount; i += 1) { + if (metalCommandBuffer->fragmentUniformBuffers[i] == NULL) { + metalCommandBuffer->fragmentUniformBuffers[i] = METAL_INTERNAL_AcquireUniformBufferFromPool( + metalCommandBuffer); + } + } + } +} + +static void METAL_BindVertexBuffers( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 firstSlot, + const SDL_GPUBufferBinding *bindings, + Uint32 numBindings) +{ + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + + for (Uint32 i = 0; i < numBindings; i += 1) { + MetalBuffer *currentBuffer = ((MetalBufferContainer *)bindings[i].buffer)->activeBuffer; + if (metalCommandBuffer->vertexBuffers[firstSlot + i] != currentBuffer->handle || metalCommandBuffer->vertexBufferOffsets[firstSlot + i] != bindings[i].offset) { + metalCommandBuffer->vertexBuffers[firstSlot + i] = currentBuffer->handle; + metalCommandBuffer->vertexBufferOffsets[firstSlot + i] = bindings[i].offset; + metalCommandBuffer->needVertexBufferBind = true; + METAL_INTERNAL_TrackBuffer(metalCommandBuffer, currentBuffer); + } + } + + metalCommandBuffer->vertexBufferCount = + SDL_max(metalCommandBuffer->vertexBufferCount, firstSlot + numBindings); +} + +static void METAL_BindIndexBuffer( + SDL_GPUCommandBuffer *commandBuffer, + const SDL_GPUBufferBinding *binding, + SDL_GPUIndexElementSize indexElementSize) +{ + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + metalCommandBuffer->indexBuffer = ((MetalBufferContainer *)binding->buffer)->activeBuffer; + metalCommandBuffer->indexBufferOffset = binding->offset; + metalCommandBuffer->index_element_size = indexElementSize; + + METAL_INTERNAL_TrackBuffer(metalCommandBuffer, metalCommandBuffer->indexBuffer); +} + +static void METAL_BindVertexSamplers( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 firstSlot, + const SDL_GPUTextureSamplerBinding *textureSamplerBindings, + Uint32 numBindings) +{ + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalTextureContainer *textureContainer; + MetalSampler *sampler; + + for (Uint32 i = 0; i < numBindings; i += 1) { + textureContainer = (MetalTextureContainer *)textureSamplerBindings[i].texture; + sampler = (MetalSampler *)textureSamplerBindings[i].sampler; + + if (metalCommandBuffer->vertexSamplers[firstSlot + i] != sampler->handle) { + metalCommandBuffer->vertexSamplers[firstSlot + i] = sampler->handle; + metalCommandBuffer->needVertexSamplerBind = true; + } + + if (metalCommandBuffer->vertexTextures[firstSlot + i] != textureContainer->activeTexture->handle) { + METAL_INTERNAL_TrackTexture( + metalCommandBuffer, + textureContainer->activeTexture); + + metalCommandBuffer->vertexTextures[firstSlot + i] = + textureContainer->activeTexture->handle; + + metalCommandBuffer->needVertexSamplerBind = true; + } + } +} + +static void METAL_BindVertexStorageTextures( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 firstSlot, + SDL_GPUTexture *const *storageTextures, + Uint32 numBindings) +{ + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalTextureContainer *textureContainer; + + for (Uint32 i = 0; i < numBindings; i += 1) { + textureContainer = (MetalTextureContainer *)storageTextures[i]; + + if (metalCommandBuffer->vertexStorageTextures[firstSlot + i] != textureContainer->activeTexture->handle) { + METAL_INTERNAL_TrackTexture( + metalCommandBuffer, + textureContainer->activeTexture); + + metalCommandBuffer->vertexStorageTextures[firstSlot + i] = + textureContainer->activeTexture->handle; + + metalCommandBuffer->needVertexStorageTextureBind = true; + } + } +} + +static void METAL_BindVertexStorageBuffers( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 firstSlot, + SDL_GPUBuffer *const *storageBuffers, + Uint32 numBindings) +{ + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalBufferContainer *bufferContainer; + + for (Uint32 i = 0; i < numBindings; i += 1) { + bufferContainer = (MetalBufferContainer *)storageBuffers[i]; + + if (metalCommandBuffer->vertexStorageBuffers[firstSlot + i] != bufferContainer->activeBuffer->handle) { + METAL_INTERNAL_TrackBuffer( + metalCommandBuffer, + bufferContainer->activeBuffer); + + metalCommandBuffer->vertexStorageBuffers[firstSlot + i] = + bufferContainer->activeBuffer->handle; + + metalCommandBuffer->needVertexStorageBufferBind = true; + } + } +} + +static void METAL_BindFragmentSamplers( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 firstSlot, + const SDL_GPUTextureSamplerBinding *textureSamplerBindings, + Uint32 numBindings) +{ + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalTextureContainer *textureContainer; + MetalSampler *sampler; + + for (Uint32 i = 0; i < numBindings; i += 1) { + textureContainer = (MetalTextureContainer *)textureSamplerBindings[i].texture; + sampler = (MetalSampler *)textureSamplerBindings[i].sampler; + + if (metalCommandBuffer->fragmentSamplers[firstSlot + i] != sampler->handle) { + metalCommandBuffer->fragmentSamplers[firstSlot + i] = sampler->handle; + metalCommandBuffer->needFragmentSamplerBind = true; + } + + if (metalCommandBuffer->fragmentTextures[firstSlot + i] != textureContainer->activeTexture->handle) { + METAL_INTERNAL_TrackTexture( + metalCommandBuffer, + textureContainer->activeTexture); + + metalCommandBuffer->fragmentTextures[firstSlot + i] = + textureContainer->activeTexture->handle; + + metalCommandBuffer->needFragmentSamplerBind = true; + } + } +} + +static void METAL_BindFragmentStorageTextures( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 firstSlot, + SDL_GPUTexture *const *storageTextures, + Uint32 numBindings) +{ + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalTextureContainer *textureContainer; + + for (Uint32 i = 0; i < numBindings; i += 1) { + textureContainer = (MetalTextureContainer *)storageTextures[i]; + + if (metalCommandBuffer->fragmentStorageTextures[firstSlot + i] != textureContainer->activeTexture->handle) { + METAL_INTERNAL_TrackTexture( + metalCommandBuffer, + textureContainer->activeTexture); + + metalCommandBuffer->fragmentStorageTextures[firstSlot + i] = + textureContainer->activeTexture->handle; + + metalCommandBuffer->needFragmentStorageTextureBind = true; + } + } +} + +static void METAL_BindFragmentStorageBuffers( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 firstSlot, + SDL_GPUBuffer *const *storageBuffers, + Uint32 numBindings) +{ + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalBufferContainer *bufferContainer; + + for (Uint32 i = 0; i < numBindings; i += 1) { + bufferContainer = (MetalBufferContainer *)storageBuffers[i]; + + if (metalCommandBuffer->fragmentStorageBuffers[firstSlot + i] != bufferContainer->activeBuffer->handle) { + METAL_INTERNAL_TrackBuffer( + metalCommandBuffer, + bufferContainer->activeBuffer); + + metalCommandBuffer->fragmentStorageBuffers[firstSlot + i] = + bufferContainer->activeBuffer->handle; + + metalCommandBuffer->needFragmentStorageBufferBind = true; + } + } +} + +// This function assumes that it's called from within an autorelease pool +static void METAL_INTERNAL_BindGraphicsResources( + MetalCommandBuffer *commandBuffer) +{ + MetalGraphicsPipeline *graphicsPipeline = commandBuffer->graphics_pipeline; + NSUInteger offsets[MAX_STORAGE_BUFFERS_PER_STAGE] = { 0 }; + + // Vertex Buffers + if (commandBuffer->needVertexBufferBind) { + id metalBuffers[MAX_VERTEX_BUFFERS]; + NSUInteger bufferOffsets[MAX_VERTEX_BUFFERS]; + NSRange range = NSMakeRange(METAL_FIRST_VERTEX_BUFFER_SLOT, commandBuffer->vertexBufferCount); + for (Uint32 i = 0; i < commandBuffer->vertexBufferCount; i += 1) { + metalBuffers[i] = commandBuffer->vertexBuffers[i]; + bufferOffsets[i] = commandBuffer->vertexBufferOffsets[i]; + } + [commandBuffer->renderEncoder setVertexBuffers:metalBuffers offsets:bufferOffsets withRange:range]; + commandBuffer->needVertexBufferBind = false; + } + + // Vertex Samplers+Textures + + if (commandBuffer->needVertexSamplerBind) { + if (graphicsPipeline->vertexSamplerCount > 0) { + [commandBuffer->renderEncoder setVertexSamplerStates:commandBuffer->vertexSamplers + withRange:NSMakeRange(0, graphicsPipeline->vertexSamplerCount)]; + [commandBuffer->renderEncoder setVertexTextures:commandBuffer->vertexTextures + withRange:NSMakeRange(0, graphicsPipeline->vertexSamplerCount)]; + } + commandBuffer->needVertexSamplerBind = false; + } + + // Vertex Storage Textures + + if (commandBuffer->needVertexStorageTextureBind) { + if (graphicsPipeline->vertexStorageTextureCount > 0) { + [commandBuffer->renderEncoder setVertexTextures:commandBuffer->vertexStorageTextures + withRange:NSMakeRange(graphicsPipeline->vertexSamplerCount, + graphicsPipeline->vertexStorageTextureCount)]; + } + commandBuffer->needVertexStorageTextureBind = false; + } + + // Vertex Storage Buffers + + if (commandBuffer->needVertexStorageBufferBind) { + if (graphicsPipeline->vertexStorageBufferCount > 0) { + [commandBuffer->renderEncoder setVertexBuffers:commandBuffer->vertexStorageBuffers + offsets:offsets + withRange:NSMakeRange(graphicsPipeline->vertexUniformBufferCount, + graphicsPipeline->vertexStorageBufferCount)]; + } + commandBuffer->needVertexStorageBufferBind = false; + } + + // Vertex Uniform Buffers + + for (Uint32 i = 0; i < graphicsPipeline->vertexUniformBufferCount; i += 1) { + if (commandBuffer->needVertexUniformBufferBind[i]) { + if (graphicsPipeline->vertexUniformBufferCount > i) { + [commandBuffer->renderEncoder + setVertexBuffer:commandBuffer->vertexUniformBuffers[i]->handle + offset:commandBuffer->vertexUniformBuffers[i]->drawOffset + atIndex:i]; + } + commandBuffer->needVertexUniformBufferBind[i] = false; + } + } + + // Fragment Samplers+Textures + + if (commandBuffer->needFragmentSamplerBind) { + if (graphicsPipeline->fragmentSamplerCount > 0) { + [commandBuffer->renderEncoder setFragmentSamplerStates:commandBuffer->fragmentSamplers + withRange:NSMakeRange(0, graphicsPipeline->fragmentSamplerCount)]; + [commandBuffer->renderEncoder setFragmentTextures:commandBuffer->fragmentTextures + withRange:NSMakeRange(0, graphicsPipeline->fragmentSamplerCount)]; + } + commandBuffer->needFragmentSamplerBind = false; + } + + // Fragment Storage Textures + + if (commandBuffer->needFragmentStorageTextureBind) { + if (graphicsPipeline->fragmentStorageTextureCount > 0) { + [commandBuffer->renderEncoder setFragmentTextures:commandBuffer->fragmentStorageTextures + withRange:NSMakeRange(graphicsPipeline->fragmentSamplerCount, + graphicsPipeline->fragmentStorageTextureCount)]; + } + commandBuffer->needFragmentStorageTextureBind = false; + } + + // Fragment Storage Buffers + + if (commandBuffer->needFragmentStorageBufferBind) { + if (graphicsPipeline->fragmentStorageBufferCount > 0) { + [commandBuffer->renderEncoder setFragmentBuffers:commandBuffer->fragmentStorageBuffers + offsets:offsets + withRange:NSMakeRange(graphicsPipeline->fragmentUniformBufferCount, + graphicsPipeline->fragmentStorageBufferCount)]; + } + commandBuffer->needFragmentStorageBufferBind = false; + } + + // Fragment Uniform Buffers + + for (Uint32 i = 0; i < graphicsPipeline->fragmentUniformBufferCount; i += 1) { + if (commandBuffer->needFragmentUniformBufferBind[i]) { + if (graphicsPipeline->fragmentUniformBufferCount > i) { + [commandBuffer->renderEncoder + setFragmentBuffer:commandBuffer->fragmentUniformBuffers[i]->handle + offset:commandBuffer->fragmentUniformBuffers[i]->drawOffset + atIndex:i]; + } + commandBuffer->needFragmentUniformBufferBind[i] = false; + } + } +} + +// This function assumes that it's called from within an autorelease pool +static void METAL_INTERNAL_BindComputeResources( + MetalCommandBuffer *commandBuffer) +{ + MetalComputePipeline *computePipeline = commandBuffer->compute_pipeline; + NSUInteger offsets[MAX_STORAGE_BUFFERS_PER_STAGE] = { 0 }; + + if (commandBuffer->needComputeSamplerBind) { + if (computePipeline->numSamplers > 0) { + [commandBuffer->computeEncoder setTextures:commandBuffer->computeSamplerTextures + withRange:NSMakeRange(0, computePipeline->numSamplers)]; + [commandBuffer->computeEncoder setSamplerStates:commandBuffer->computeSamplers + withRange:NSMakeRange(0, computePipeline->numSamplers)]; + } + commandBuffer->needComputeSamplerBind = false; + } + + if (commandBuffer->needComputeReadOnlyStorageTextureBind) { + if (computePipeline->numReadonlyStorageTextures > 0) { + [commandBuffer->computeEncoder setTextures:commandBuffer->computeReadOnlyTextures + withRange:NSMakeRange( + computePipeline->numSamplers, + computePipeline->numReadonlyStorageTextures)]; + } + commandBuffer->needComputeReadOnlyStorageTextureBind = false; + } + + if (commandBuffer->needComputeReadOnlyStorageBufferBind) { + if (computePipeline->numReadonlyStorageBuffers > 0) { + [commandBuffer->computeEncoder setBuffers:commandBuffer->computeReadOnlyBuffers + offsets:offsets + withRange:NSMakeRange(computePipeline->numUniformBuffers, + computePipeline->numReadonlyStorageBuffers)]; + } + commandBuffer->needComputeReadOnlyStorageBufferBind = false; + } + + for (Uint32 i = 0; i < MAX_UNIFORM_BUFFERS_PER_STAGE; i += 1) { + if (commandBuffer->needComputeUniformBufferBind[i]) { + if (computePipeline->numUniformBuffers > i) { + [commandBuffer->computeEncoder + setBuffer:commandBuffer->computeUniformBuffers[i]->handle + offset:commandBuffer->computeUniformBuffers[i]->drawOffset + atIndex:i]; + } + } + commandBuffer->needComputeUniformBufferBind[i] = false; + } +} + +static void METAL_DrawIndexedPrimitives( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 numIndices, + Uint32 numInstances, + Uint32 firstIndex, + Sint32 vertexOffset, + Uint32 firstInstance) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + SDL_GPUPrimitiveType primitiveType = metalCommandBuffer->graphics_pipeline->primitiveType; + Uint32 indexSize = IndexSize(metalCommandBuffer->index_element_size); + + METAL_INTERNAL_BindGraphicsResources(metalCommandBuffer); + + [metalCommandBuffer->renderEncoder + drawIndexedPrimitives:SDLToMetal_PrimitiveType[primitiveType] + indexCount:numIndices + indexType:SDLToMetal_IndexType[metalCommandBuffer->index_element_size] + indexBuffer:metalCommandBuffer->indexBuffer->handle + indexBufferOffset:metalCommandBuffer->indexBufferOffset + (firstIndex * indexSize) + instanceCount:numInstances + baseVertex:vertexOffset + baseInstance:firstInstance]; + } +} + +static void METAL_DrawPrimitives( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 numVertices, + Uint32 numInstances, + Uint32 firstVertex, + Uint32 firstInstance) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + SDL_GPUPrimitiveType primitiveType = metalCommandBuffer->graphics_pipeline->primitiveType; + + METAL_INTERNAL_BindGraphicsResources(metalCommandBuffer); + + [metalCommandBuffer->renderEncoder + drawPrimitives:SDLToMetal_PrimitiveType[primitiveType] + vertexStart:firstVertex + vertexCount:numVertices + instanceCount:numInstances + baseInstance:firstInstance]; + } +} + +static void METAL_DrawPrimitivesIndirect( + SDL_GPUCommandBuffer *commandBuffer, + SDL_GPUBuffer *buffer, + Uint32 offset, + Uint32 drawCount) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalBuffer *metalBuffer = ((MetalBufferContainer *)buffer)->activeBuffer; + SDL_GPUPrimitiveType primitiveType = metalCommandBuffer->graphics_pipeline->primitiveType; + + METAL_INTERNAL_BindGraphicsResources(metalCommandBuffer); + + /* Metal: "We have multi-draw at home!" + * Multi-draw at home: + */ + for (Uint32 i = 0; i < drawCount; i += 1) { + [metalCommandBuffer->renderEncoder + drawPrimitives:SDLToMetal_PrimitiveType[primitiveType] + indirectBuffer:metalBuffer->handle + indirectBufferOffset:offset + (sizeof(SDL_GPUIndirectDrawCommand) * i)]; + } + + METAL_INTERNAL_TrackBuffer(metalCommandBuffer, metalBuffer); + } +} + +static void METAL_DrawIndexedPrimitivesIndirect( + SDL_GPUCommandBuffer *commandBuffer, + SDL_GPUBuffer *buffer, + Uint32 offset, + Uint32 drawCount) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalBuffer *metalBuffer = ((MetalBufferContainer *)buffer)->activeBuffer; + SDL_GPUPrimitiveType primitiveType = metalCommandBuffer->graphics_pipeline->primitiveType; + + METAL_INTERNAL_BindGraphicsResources(metalCommandBuffer); + + for (Uint32 i = 0; i < drawCount; i += 1) { + [metalCommandBuffer->renderEncoder + drawIndexedPrimitives:SDLToMetal_PrimitiveType[primitiveType] + indexType:SDLToMetal_IndexType[metalCommandBuffer->index_element_size] + indexBuffer:metalCommandBuffer->indexBuffer->handle + indexBufferOffset:metalCommandBuffer->indexBufferOffset + indirectBuffer:metalBuffer->handle + indirectBufferOffset:offset + (sizeof(SDL_GPUIndexedIndirectDrawCommand) * i)]; + } + + METAL_INTERNAL_TrackBuffer(metalCommandBuffer, metalBuffer); + } +} + +static void METAL_EndRenderPass( + SDL_GPUCommandBuffer *commandBuffer) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + [metalCommandBuffer->renderEncoder endEncoding]; + metalCommandBuffer->renderEncoder = nil; + + for (Uint32 i = 0; i < MAX_VERTEX_BUFFERS; i += 1) { + metalCommandBuffer->vertexBuffers[i] = nil; + metalCommandBuffer->vertexBufferOffsets[i] = 0; + metalCommandBuffer->vertexBufferCount = 0; + } + for (Uint32 i = 0; i < MAX_TEXTURE_SAMPLERS_PER_STAGE; i += 1) { + metalCommandBuffer->vertexSamplers[i] = nil; + metalCommandBuffer->vertexTextures[i] = nil; + metalCommandBuffer->fragmentSamplers[i] = nil; + metalCommandBuffer->fragmentTextures[i] = nil; + } + for (Uint32 i = 0; i < MAX_STORAGE_TEXTURES_PER_STAGE; i += 1) { + metalCommandBuffer->vertexStorageTextures[i] = nil; + metalCommandBuffer->fragmentStorageTextures[i] = nil; + } + for (Uint32 i = 0; i < MAX_STORAGE_BUFFERS_PER_STAGE; i += 1) { + metalCommandBuffer->vertexStorageBuffers[i] = nil; + metalCommandBuffer->fragmentStorageBuffers[i] = nil; + } + } +} + +// This function assumes that it's called from within an autorelease pool +static void METAL_INTERNAL_PushUniformData( + MetalCommandBuffer *metalCommandBuffer, + SDL_GPUShaderStage shaderStage, + Uint32 slotIndex, + const void *data, + Uint32 length) +{ + MetalUniformBuffer *metalUniformBuffer; + Uint32 alignedDataLength; + + if (shaderStage == SDL_GPU_SHADERSTAGE_VERTEX) { + if (metalCommandBuffer->vertexUniformBuffers[slotIndex] == NULL) { + metalCommandBuffer->vertexUniformBuffers[slotIndex] = METAL_INTERNAL_AcquireUniformBufferFromPool( + metalCommandBuffer); + } + metalUniformBuffer = metalCommandBuffer->vertexUniformBuffers[slotIndex]; + } else if (shaderStage == SDL_GPU_SHADERSTAGE_FRAGMENT) { + if (metalCommandBuffer->fragmentUniformBuffers[slotIndex] == NULL) { + metalCommandBuffer->fragmentUniformBuffers[slotIndex] = METAL_INTERNAL_AcquireUniformBufferFromPool( + metalCommandBuffer); + } + metalUniformBuffer = metalCommandBuffer->fragmentUniformBuffers[slotIndex]; + } else if (shaderStage == SDL_GPU_SHADERSTAGE_COMPUTE) { + if (metalCommandBuffer->computeUniformBuffers[slotIndex] == NULL) { + metalCommandBuffer->computeUniformBuffers[slotIndex] = METAL_INTERNAL_AcquireUniformBufferFromPool( + metalCommandBuffer); + } + metalUniformBuffer = metalCommandBuffer->computeUniformBuffers[slotIndex]; + } else { + SDL_LogError(SDL_LOG_CATEGORY_GPU, "Unrecognized shader stage!"); + return; + } + + alignedDataLength = METAL_INTERNAL_NextHighestAlignment( + length, + 256); + + if (metalUniformBuffer->writeOffset + alignedDataLength >= UNIFORM_BUFFER_SIZE) { + metalUniformBuffer = METAL_INTERNAL_AcquireUniformBufferFromPool( + metalCommandBuffer); + + metalUniformBuffer->writeOffset = 0; + metalUniformBuffer->drawOffset = 0; + + if (shaderStage == SDL_GPU_SHADERSTAGE_VERTEX) { + metalCommandBuffer->vertexUniformBuffers[slotIndex] = metalUniformBuffer; + } else if (shaderStage == SDL_GPU_SHADERSTAGE_FRAGMENT) { + metalCommandBuffer->fragmentUniformBuffers[slotIndex] = metalUniformBuffer; + } else if (shaderStage == SDL_GPU_SHADERSTAGE_COMPUTE) { + metalCommandBuffer->computeUniformBuffers[slotIndex] = metalUniformBuffer; + } else { + SDL_LogError(SDL_LOG_CATEGORY_GPU, "Unrecognized shader stage!"); + return; + } + } + + metalUniformBuffer->drawOffset = metalUniformBuffer->writeOffset; + + SDL_memcpy( + (metalUniformBuffer->handle).contents + metalUniformBuffer->writeOffset, + data, + length); + + metalUniformBuffer->writeOffset += alignedDataLength; + + if (shaderStage == SDL_GPU_SHADERSTAGE_VERTEX) { + metalCommandBuffer->needVertexUniformBufferBind[slotIndex] = true; + } else if (shaderStage == SDL_GPU_SHADERSTAGE_FRAGMENT) { + metalCommandBuffer->needFragmentUniformBufferBind[slotIndex] = true; + } else if (shaderStage == SDL_GPU_SHADERSTAGE_COMPUTE) { + metalCommandBuffer->needComputeUniformBufferBind[slotIndex] = true; + } else { + SDL_LogError(SDL_LOG_CATEGORY_GPU, "Unrecognized shader stage!"); + } +} + +static void METAL_PushVertexUniformData( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 slotIndex, + const void *data, + Uint32 length) +{ + @autoreleasepool { + METAL_INTERNAL_PushUniformData( + (MetalCommandBuffer *)commandBuffer, + SDL_GPU_SHADERSTAGE_VERTEX, + slotIndex, + data, + length); + } +} + +static void METAL_PushFragmentUniformData( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 slotIndex, + const void *data, + Uint32 length) +{ + @autoreleasepool { + METAL_INTERNAL_PushUniformData( + (MetalCommandBuffer *)commandBuffer, + SDL_GPU_SHADERSTAGE_FRAGMENT, + slotIndex, + data, + length); + } +} + +// Blit + +static void METAL_Blit( + SDL_GPUCommandBuffer *commandBuffer, + const SDL_GPUBlitInfo *info) +{ + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalRenderer *renderer = (MetalRenderer *)metalCommandBuffer->renderer; + + SDL_GPU_BlitCommon( + commandBuffer, + info, + renderer->blitLinearSampler, + renderer->blitNearestSampler, + renderer->blitVertexShader, + renderer->blitFrom2DShader, + renderer->blitFrom2DArrayShader, + renderer->blitFrom3DShader, + renderer->blitFromCubeShader, + renderer->blitFromCubeArrayShader, + &renderer->blitPipelines, + &renderer->blitPipelineCount, + &renderer->blitPipelineCapacity); +} + +// Compute State + +static void METAL_BeginComputePass( + SDL_GPUCommandBuffer *commandBuffer, + const SDL_GPUStorageTextureReadWriteBinding *storageTextureBindings, + Uint32 numStorageTextureBindings, + const SDL_GPUStorageBufferReadWriteBinding *storageBufferBindings, + Uint32 numStorageBufferBindings) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalTextureContainer *textureContainer; + MetalTexture *texture; + id textureView; + MetalBufferContainer *bufferContainer; + MetalBuffer *buffer; + + metalCommandBuffer->computeEncoder = [metalCommandBuffer->handle computeCommandEncoder]; + + for (Uint32 i = 0; i < numStorageTextureBindings; i += 1) { + textureContainer = (MetalTextureContainer *)storageTextureBindings[i].texture; + + texture = METAL_INTERNAL_PrepareTextureForWrite( + metalCommandBuffer->renderer, + textureContainer, + storageTextureBindings[i].cycle); + + METAL_INTERNAL_TrackTexture(metalCommandBuffer, texture); + + textureView = [texture->handle newTextureViewWithPixelFormat:SDLToMetal_TextureFormat(textureContainer->header.info.format) + textureType:SDLToMetal_TextureType(textureContainer->header.info.type, false) + levels:NSMakeRange(storageTextureBindings[i].mip_level, 1) + slices:NSMakeRange(storageTextureBindings[i].layer, 1)]; + + metalCommandBuffer->computeReadWriteTextures[i] = textureView; + } + + for (Uint32 i = 0; i < numStorageBufferBindings; i += 1) { + bufferContainer = (MetalBufferContainer *)storageBufferBindings[i].buffer; + + buffer = METAL_INTERNAL_PrepareBufferForWrite( + metalCommandBuffer->renderer, + bufferContainer, + storageBufferBindings[i].cycle); + + METAL_INTERNAL_TrackBuffer( + metalCommandBuffer, + buffer); + + metalCommandBuffer->computeReadWriteBuffers[i] = buffer->handle; + } + } +} + +static void METAL_BindComputePipeline( + SDL_GPUCommandBuffer *commandBuffer, + SDL_GPUComputePipeline *computePipeline) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalComputePipeline *pipeline = (MetalComputePipeline *)computePipeline; + + metalCommandBuffer->compute_pipeline = pipeline; + + [metalCommandBuffer->computeEncoder setComputePipelineState:pipeline->handle]; + + for (Uint32 i = 0; i < MAX_UNIFORM_BUFFERS_PER_STAGE; i += 1) { + metalCommandBuffer->needComputeUniformBufferBind[i] = true; + } + + for (Uint32 i = 0; i < pipeline->numUniformBuffers; i += 1) { + if (metalCommandBuffer->computeUniformBuffers[i] == NULL) { + metalCommandBuffer->computeUniformBuffers[i] = METAL_INTERNAL_AcquireUniformBufferFromPool( + metalCommandBuffer); + } + } + + // Bind write-only resources + if (pipeline->numReadWriteStorageTextures > 0) { + [metalCommandBuffer->computeEncoder setTextures:metalCommandBuffer->computeReadWriteTextures + withRange:NSMakeRange( + pipeline->numSamplers + + pipeline->numReadonlyStorageTextures, + pipeline->numReadWriteStorageTextures)]; + } + + NSUInteger offsets[MAX_COMPUTE_WRITE_BUFFERS] = { 0 }; + if (pipeline->numReadWriteStorageBuffers > 0) { + [metalCommandBuffer->computeEncoder setBuffers:metalCommandBuffer->computeReadWriteBuffers + offsets:offsets + withRange:NSMakeRange( + pipeline->numUniformBuffers + + pipeline->numReadonlyStorageBuffers, + pipeline->numReadWriteStorageBuffers)]; + } + } +} + +static void METAL_BindComputeSamplers( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 firstSlot, + const SDL_GPUTextureSamplerBinding *textureSamplerBindings, + Uint32 numBindings) +{ + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalTextureContainer *textureContainer; + MetalSampler *sampler; + + for (Uint32 i = 0; i < numBindings; i += 1) { + textureContainer = (MetalTextureContainer *)textureSamplerBindings[i].texture; + sampler = (MetalSampler *)textureSamplerBindings[i].sampler; + + if (metalCommandBuffer->computeSamplers[firstSlot + i] != sampler->handle) { + metalCommandBuffer->computeSamplers[firstSlot + i] = sampler->handle; + metalCommandBuffer->needComputeSamplerBind = true; + } + + if (metalCommandBuffer->computeSamplerTextures[firstSlot + i] != textureContainer->activeTexture->handle) { + METAL_INTERNAL_TrackTexture( + metalCommandBuffer, + textureContainer->activeTexture); + + metalCommandBuffer->computeSamplerTextures[firstSlot + i] = + textureContainer->activeTexture->handle; + + metalCommandBuffer->needComputeSamplerBind = true; + } + } +} + +static void METAL_BindComputeStorageTextures( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 firstSlot, + SDL_GPUTexture *const *storageTextures, + Uint32 numBindings) +{ + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalTextureContainer *textureContainer; + + for (Uint32 i = 0; i < numBindings; i += 1) { + textureContainer = (MetalTextureContainer *)storageTextures[i]; + + if (metalCommandBuffer->computeReadOnlyTextures[firstSlot + i] != textureContainer->activeTexture->handle) { + METAL_INTERNAL_TrackTexture( + metalCommandBuffer, + textureContainer->activeTexture); + + metalCommandBuffer->computeReadOnlyTextures[firstSlot + i] = + textureContainer->activeTexture->handle; + + metalCommandBuffer->needComputeReadOnlyStorageTextureBind = true; + } + } +} + +static void METAL_BindComputeStorageBuffers( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 firstSlot, + SDL_GPUBuffer *const *storageBuffers, + Uint32 numBindings) +{ + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalBufferContainer *bufferContainer; + + for (Uint32 i = 0; i < numBindings; i += 1) { + bufferContainer = (MetalBufferContainer *)storageBuffers[i]; + + if (metalCommandBuffer->computeReadOnlyBuffers[firstSlot + i] != bufferContainer->activeBuffer->handle) { + METAL_INTERNAL_TrackBuffer( + metalCommandBuffer, + bufferContainer->activeBuffer); + + metalCommandBuffer->computeReadOnlyBuffers[firstSlot + i] = + bufferContainer->activeBuffer->handle; + + metalCommandBuffer->needComputeReadOnlyStorageBufferBind = true; + } + } +} + +static void METAL_PushComputeUniformData( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 slotIndex, + const void *data, + Uint32 length) +{ + @autoreleasepool { + METAL_INTERNAL_PushUniformData( + (MetalCommandBuffer *)commandBuffer, + SDL_GPU_SHADERSTAGE_COMPUTE, + slotIndex, + data, + length); + } +} + +static void METAL_DispatchCompute( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 groupcountX, + Uint32 groupcountY, + Uint32 groupcountZ) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MTLSize threadgroups = MTLSizeMake(groupcountX, groupcountY, groupcountZ); + MTLSize threadsPerThreadgroup = MTLSizeMake( + metalCommandBuffer->compute_pipeline->threadcountX, + metalCommandBuffer->compute_pipeline->threadcountY, + metalCommandBuffer->compute_pipeline->threadcountZ); + + METAL_INTERNAL_BindComputeResources(metalCommandBuffer); + + [metalCommandBuffer->computeEncoder + dispatchThreadgroups:threadgroups + threadsPerThreadgroup:threadsPerThreadgroup]; + } +} + +static void METAL_DispatchComputeIndirect( + SDL_GPUCommandBuffer *commandBuffer, + SDL_GPUBuffer *buffer, + Uint32 offset) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalBuffer *metalBuffer = ((MetalBufferContainer *)buffer)->activeBuffer; + MTLSize threadsPerThreadgroup = MTLSizeMake( + metalCommandBuffer->compute_pipeline->threadcountX, + metalCommandBuffer->compute_pipeline->threadcountY, + metalCommandBuffer->compute_pipeline->threadcountZ); + + METAL_INTERNAL_BindComputeResources(metalCommandBuffer); + + [metalCommandBuffer->computeEncoder + dispatchThreadgroupsWithIndirectBuffer:metalBuffer->handle + indirectBufferOffset:offset + threadsPerThreadgroup:threadsPerThreadgroup]; + + METAL_INTERNAL_TrackBuffer(metalCommandBuffer, metalBuffer); + } +} + +static void METAL_EndComputePass( + SDL_GPUCommandBuffer *commandBuffer) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + [metalCommandBuffer->computeEncoder endEncoding]; + metalCommandBuffer->computeEncoder = nil; + + for (Uint32 i = 0; i < MAX_TEXTURE_SAMPLERS_PER_STAGE; i += 1) { + metalCommandBuffer->computeSamplers[i] = nil; + metalCommandBuffer->computeSamplerTextures[i] = nil; + } + for (Uint32 i = 0; i < MAX_COMPUTE_WRITE_TEXTURES; i += 1) { + metalCommandBuffer->computeReadWriteTextures[i] = nil; + } + for (Uint32 i = 0; i < MAX_COMPUTE_WRITE_BUFFERS; i += 1) { + metalCommandBuffer->computeReadWriteBuffers[i] = nil; + } + for (Uint32 i = 0; i < MAX_STORAGE_TEXTURES_PER_STAGE; i += 1) { + metalCommandBuffer->computeReadOnlyTextures[i] = nil; + } + for (Uint32 i = 0; i < MAX_STORAGE_BUFFERS_PER_STAGE; i += 1) { + metalCommandBuffer->computeReadOnlyBuffers[i] = nil; + } + } +} + +// Fence Cleanup + +static void METAL_INTERNAL_ReleaseFenceToPool( + MetalRenderer *renderer, + MetalFence *fence) +{ + SDL_LockMutex(renderer->fenceLock); + + // FIXME: Should this use EXPAND_IF_NEEDED? + if (renderer->availableFenceCount == renderer->availableFenceCapacity) { + renderer->availableFenceCapacity *= 2; + renderer->availableFences = SDL_realloc( + renderer->availableFences, + renderer->availableFenceCapacity * sizeof(MetalFence *)); + } + renderer->availableFences[renderer->availableFenceCount] = fence; + renderer->availableFenceCount += 1; + + SDL_UnlockMutex(renderer->fenceLock); +} + +static void METAL_ReleaseFence( + SDL_GPURenderer *driverData, + SDL_GPUFence *fence) +{ + MetalFence *metalFence = (MetalFence *)fence; + if (SDL_AtomicDecRef(&metalFence->referenceCount)) { + METAL_INTERNAL_ReleaseFenceToPool( + (MetalRenderer *)driverData, + (MetalFence *)fence); + } +} + +// Cleanup + +static void METAL_INTERNAL_CleanCommandBuffer( + MetalRenderer *renderer, + MetalCommandBuffer *commandBuffer, + bool cancel) +{ + Uint32 i; + + // End any active passes + if (commandBuffer->renderEncoder) { + [commandBuffer->renderEncoder endEncoding]; + commandBuffer->renderEncoder = nil; + } + if (commandBuffer->computeEncoder) { + [commandBuffer->computeEncoder endEncoding]; + commandBuffer->computeEncoder = nil; + } + if (commandBuffer->blitEncoder) { + [commandBuffer->blitEncoder endEncoding]; + commandBuffer->blitEncoder = nil; + } + + // Uniform buffers are now available + + SDL_LockMutex(renderer->acquireUniformBufferLock); + + for (i = 0; i < commandBuffer->usedUniformBufferCount; i += 1) { + METAL_INTERNAL_ReturnUniformBufferToPool( + renderer, + commandBuffer->usedUniformBuffers[i]); + } + commandBuffer->usedUniformBufferCount = 0; + + SDL_UnlockMutex(renderer->acquireUniformBufferLock); + + // Reference Counting + + for (i = 0; i < commandBuffer->usedBufferCount; i += 1) { + (void)SDL_AtomicDecRef(&commandBuffer->usedBuffers[i]->referenceCount); + } + commandBuffer->usedBufferCount = 0; + + for (i = 0; i < commandBuffer->usedTextureCount; i += 1) { + (void)SDL_AtomicDecRef(&commandBuffer->usedTextures[i]->referenceCount); + } + commandBuffer->usedTextureCount = 0; + + // Reset presentation + commandBuffer->windowDataCount = 0; + + // Reset bindings + for (i = 0; i < MAX_VERTEX_BUFFERS; i += 1) { + commandBuffer->vertexBuffers[i] = nil; + commandBuffer->vertexBufferOffsets[i] = 0; + } + commandBuffer->vertexBufferCount = 0; + commandBuffer->indexBuffer = NULL; + for (i = 0; i < MAX_TEXTURE_SAMPLERS_PER_STAGE; i += 1) { + commandBuffer->vertexSamplers[i] = nil; + commandBuffer->vertexTextures[i] = nil; + commandBuffer->fragmentSamplers[i] = nil; + commandBuffer->fragmentTextures[i] = nil; + commandBuffer->computeSamplers[i] = nil; + commandBuffer->computeSamplerTextures[i] = nil; + } + for (i = 0; i < MAX_STORAGE_TEXTURES_PER_STAGE; i += 1) { + commandBuffer->vertexStorageTextures[i] = nil; + commandBuffer->fragmentStorageTextures[i] = nil; + commandBuffer->computeReadOnlyTextures[i] = nil; + } + for (i = 0; i < MAX_STORAGE_BUFFERS_PER_STAGE; i += 1) { + commandBuffer->vertexStorageBuffers[i] = nil; + commandBuffer->fragmentStorageBuffers[i] = nil; + commandBuffer->computeReadOnlyBuffers[i] = nil; + } + for (i = 0; i < MAX_COMPUTE_WRITE_TEXTURES; i += 1) { + commandBuffer->computeReadWriteTextures[i] = nil; + } + for (i = 0; i < MAX_COMPUTE_WRITE_BUFFERS; i += 1) { + commandBuffer->computeReadWriteBuffers[i] = nil; + } + + commandBuffer->needVertexBufferBind = false; + commandBuffer->needVertexSamplerBind = false; + commandBuffer->needVertexStorageBufferBind = false; + commandBuffer->needVertexStorageTextureBind = false; + SDL_zeroa(commandBuffer->needVertexUniformBufferBind); + + commandBuffer->needFragmentSamplerBind = false; + commandBuffer->needFragmentStorageBufferBind = false; + commandBuffer->needFragmentStorageTextureBind = false; + SDL_zeroa(commandBuffer->needFragmentUniformBufferBind); + + commandBuffer->needComputeSamplerBind = false; + commandBuffer->needComputeReadOnlyStorageBufferBind = false; + commandBuffer->needComputeReadOnlyStorageTextureBind = false; + SDL_zeroa(commandBuffer->needComputeUniformBufferBind); + + // The fence is now available (unless SubmitAndAcquireFence was called) + if (commandBuffer->autoReleaseFence) { + METAL_ReleaseFence( + (SDL_GPURenderer *)renderer, + (SDL_GPUFence *)commandBuffer->fence); + } + + // Return command buffer to pool + SDL_LockMutex(renderer->acquireCommandBufferLock); + // FIXME: Should this use EXPAND_IF_NEEDED? + if (renderer->availableCommandBufferCount == renderer->availableCommandBufferCapacity) { + renderer->availableCommandBufferCapacity += 1; + renderer->availableCommandBuffers = SDL_realloc( + renderer->availableCommandBuffers, + renderer->availableCommandBufferCapacity * sizeof(MetalCommandBuffer *)); + } + renderer->availableCommandBuffers[renderer->availableCommandBufferCount] = commandBuffer; + renderer->availableCommandBufferCount += 1; + SDL_UnlockMutex(renderer->acquireCommandBufferLock); + + // Remove this command buffer from the submitted list + if (!cancel) { + for (i = 0; i < renderer->submittedCommandBufferCount; i += 1) { + if (renderer->submittedCommandBuffers[i] == commandBuffer) { + renderer->submittedCommandBuffers[i] = renderer->submittedCommandBuffers[renderer->submittedCommandBufferCount - 1]; + renderer->submittedCommandBufferCount -= 1; + } + } + } +} + +// This function assumes that it's called from within an autorelease pool +static void METAL_INTERNAL_PerformPendingDestroys( + MetalRenderer *renderer) +{ + Sint32 referenceCount = 0; + Sint32 i; + Uint32 j; + + for (i = renderer->bufferContainersToDestroyCount - 1; i >= 0; i -= 1) { + referenceCount = 0; + for (j = 0; j < renderer->bufferContainersToDestroy[i]->bufferCount; j += 1) { + referenceCount += SDL_GetAtomicInt(&renderer->bufferContainersToDestroy[i]->buffers[j]->referenceCount); + } + + if (referenceCount == 0) { + METAL_INTERNAL_DestroyBufferContainer( + renderer->bufferContainersToDestroy[i]); + + renderer->bufferContainersToDestroy[i] = renderer->bufferContainersToDestroy[renderer->bufferContainersToDestroyCount - 1]; + renderer->bufferContainersToDestroyCount -= 1; + } + } + + for (i = renderer->textureContainersToDestroyCount - 1; i >= 0; i -= 1) { + referenceCount = 0; + for (j = 0; j < renderer->textureContainersToDestroy[i]->textureCount; j += 1) { + referenceCount += SDL_GetAtomicInt(&renderer->textureContainersToDestroy[i]->textures[j]->referenceCount); + } + + if (referenceCount == 0) { + METAL_INTERNAL_DestroyTextureContainer( + renderer->textureContainersToDestroy[i]); + + renderer->textureContainersToDestroy[i] = renderer->textureContainersToDestroy[renderer->textureContainersToDestroyCount - 1]; + renderer->textureContainersToDestroyCount -= 1; + } + } +} + +// Fences + +static bool METAL_WaitForFences( + SDL_GPURenderer *driverData, + bool waitAll, + SDL_GPUFence *const *fences, + Uint32 numFences) +{ + @autoreleasepool { + MetalRenderer *renderer = (MetalRenderer *)driverData; + bool waiting; + + if (waitAll) { + for (Uint32 i = 0; i < numFences; i += 1) { + while (!SDL_GetAtomicInt(&((MetalFence *)fences[i])->complete)) { + // Spin! + } + } + } else { + waiting = 1; + while (waiting) { + for (Uint32 i = 0; i < numFences; i += 1) { + if (SDL_GetAtomicInt(&((MetalFence *)fences[i])->complete) > 0) { + waiting = 0; + break; + } + } + } + } + + METAL_INTERNAL_PerformPendingDestroys(renderer); + + return true; + } +} + +static bool METAL_QueryFence( + SDL_GPURenderer *driverData, + SDL_GPUFence *fence) +{ + MetalFence *metalFence = (MetalFence *)fence; + return SDL_GetAtomicInt(&metalFence->complete) == 1; +} + +// Window and Swapchain Management + +static MetalWindowData *METAL_INTERNAL_FetchWindowData(SDL_Window *window) +{ + SDL_PropertiesID properties = SDL_GetWindowProperties(window); + return (MetalWindowData *)SDL_GetPointerProperty(properties, WINDOW_PROPERTY_DATA, NULL); +} + +static bool METAL_SupportsSwapchainComposition( + SDL_GPURenderer *driverData, + SDL_Window *window, + SDL_GPUSwapchainComposition swapchainComposition) +{ +#ifndef SDL_PLATFORM_MACOS + if (swapchainComposition == SDL_GPU_SWAPCHAINCOMPOSITION_HDR10_ST2084) { + return false; + } +#endif + + if (@available(macOS 11.0, *)) { + return true; + } else { + return swapchainComposition != SDL_GPU_SWAPCHAINCOMPOSITION_HDR10_ST2084; + } +} + +// This function assumes that it's called from within an autorelease pool +static Uint8 METAL_INTERNAL_CreateSwapchain( + MetalRenderer *renderer, + MetalWindowData *windowData, + SDL_GPUSwapchainComposition swapchainComposition, + SDL_GPUPresentMode presentMode) +{ + CGColorSpaceRef colorspace; + CGSize drawableSize; + + windowData->view = SDL_Metal_CreateView(windowData->window); + windowData->drawable = nil; + windowData->presentMode = SDL_GPU_PRESENTMODE_VSYNC; + windowData->frameCounter = 0; + + for (int i = 0; i < MAX_FRAMES_IN_FLIGHT; i += 1) { + windowData->inFlightFences[i] = NULL; + } + + windowData->layer = (__bridge CAMetalLayer *)(SDL_Metal_GetLayer(windowData->view)); + windowData->layer.device = renderer->device; +#ifdef SDL_PLATFORM_MACOS + if (@available(macOS 10.13, *)) { + windowData->layer.displaySyncEnabled = (presentMode != SDL_GPU_PRESENTMODE_IMMEDIATE); + windowData->presentMode = presentMode; + } +#endif + windowData->layer.pixelFormat = SDLToMetal_TextureFormat(SwapchainCompositionToFormat[swapchainComposition]); +#ifndef SDL_PLATFORM_TVOS + if (@available(iOS 16.0, *)) { + windowData->layer.wantsExtendedDynamicRangeContent = (swapchainComposition != SDL_GPU_SWAPCHAINCOMPOSITION_SDR); + } +#endif + + colorspace = CGColorSpaceCreateWithName(SwapchainCompositionToColorSpace[swapchainComposition]); + windowData->layer.colorspace = colorspace; + CGColorSpaceRelease(colorspace); + + windowData->texture.handle = nil; // This will be set in AcquireSwapchainTexture. + + // Precache blit pipelines for the swapchain format + for (Uint32 i = 0; i < 4; i += 1) { + SDL_GPU_FetchBlitPipeline( + renderer->sdlGPUDevice, + (SDL_GPUTextureType)i, + SwapchainCompositionToFormat[swapchainComposition], + renderer->blitVertexShader, + renderer->blitFrom2DShader, + renderer->blitFrom2DArrayShader, + renderer->blitFrom3DShader, + renderer->blitFromCubeShader, + renderer->blitFromCubeArrayShader, + &renderer->blitPipelines, + &renderer->blitPipelineCount, + &renderer->blitPipelineCapacity); + } + + // Set up the texture container + SDL_zero(windowData->textureContainer); + windowData->textureContainer.canBeCycled = 0; + windowData->textureContainer.activeTexture = &windowData->texture; + windowData->textureContainer.textureCapacity = 1; + windowData->textureContainer.textureCount = 1; + windowData->textureContainer.header.info.format = SwapchainCompositionToFormat[swapchainComposition]; + windowData->textureContainer.header.info.num_levels = 1; + windowData->textureContainer.header.info.layer_count_or_depth = 1; + windowData->textureContainer.header.info.type = SDL_GPU_TEXTURETYPE_2D; + windowData->textureContainer.header.info.usage = SDL_GPU_TEXTUREUSAGE_COLOR_TARGET; + + drawableSize = windowData->layer.drawableSize; + windowData->textureContainer.header.info.width = (Uint32)drawableSize.width; + windowData->textureContainer.header.info.height = (Uint32)drawableSize.height; + + return 1; +} + +static bool METAL_SupportsPresentMode( + SDL_GPURenderer *driverData, + SDL_Window *window, + SDL_GPUPresentMode presentMode) +{ + switch (presentMode) { +#ifdef SDL_PLATFORM_MACOS + case SDL_GPU_PRESENTMODE_IMMEDIATE: +#endif + case SDL_GPU_PRESENTMODE_VSYNC: + return true; + default: + return false; + } +} + +static bool METAL_ClaimWindow( + SDL_GPURenderer *driverData, + SDL_Window *window) +{ + @autoreleasepool { + MetalRenderer *renderer = (MetalRenderer *)driverData; + MetalWindowData *windowData = METAL_INTERNAL_FetchWindowData(window); + + if (windowData == NULL) { + windowData = (MetalWindowData *)SDL_calloc(1, sizeof(MetalWindowData)); + windowData->window = window; + + if (METAL_INTERNAL_CreateSwapchain(renderer, windowData, SDL_GPU_SWAPCHAINCOMPOSITION_SDR, SDL_GPU_PRESENTMODE_VSYNC)) { + SDL_SetPointerProperty(SDL_GetWindowProperties(window), WINDOW_PROPERTY_DATA, windowData); + + SDL_LockMutex(renderer->windowLock); + + if (renderer->claimedWindowCount >= renderer->claimedWindowCapacity) { + renderer->claimedWindowCapacity *= 2; + renderer->claimedWindows = SDL_realloc( + renderer->claimedWindows, + renderer->claimedWindowCapacity * sizeof(MetalWindowData *)); + } + renderer->claimedWindows[renderer->claimedWindowCount] = windowData; + renderer->claimedWindowCount += 1; + + SDL_UnlockMutex(renderer->windowLock); + + return true; + } else { + SDL_free(windowData); + SET_STRING_ERROR_AND_RETURN("Could not create swapchain, failed to claim window", false); + } + } else { + SET_ERROR_AND_RETURN("%s", "Window already claimed!", false); + } + } +} + +static void METAL_ReleaseWindow( + SDL_GPURenderer *driverData, + SDL_Window *window) +{ + @autoreleasepool { + MetalRenderer *renderer = (MetalRenderer *)driverData; + MetalWindowData *windowData = METAL_INTERNAL_FetchWindowData(window); + + if (windowData == NULL) { + SET_STRING_ERROR_AND_RETURN("Window is not claimed by this SDL_GpuDevice", ); + } + + METAL_Wait(driverData); + SDL_Metal_DestroyView(windowData->view); + for (int i = 0; i < MAX_FRAMES_IN_FLIGHT; i += 1) { + if (windowData->inFlightFences[i] != NULL) { + METAL_ReleaseFence( + (SDL_GPURenderer *)renderer, + windowData->inFlightFences[i]); + } + } + + SDL_LockMutex(renderer->windowLock); + for (Uint32 i = 0; i < renderer->claimedWindowCount; i += 1) { + if (renderer->claimedWindows[i]->window == window) { + renderer->claimedWindows[i] = renderer->claimedWindows[renderer->claimedWindowCount - 1]; + renderer->claimedWindowCount -= 1; + break; + } + } + SDL_UnlockMutex(renderer->windowLock); + + SDL_free(windowData); + + SDL_ClearProperty(SDL_GetWindowProperties(window), WINDOW_PROPERTY_DATA); + } +} + +static bool METAL_WaitForSwapchain( + SDL_GPURenderer *driverData, + SDL_Window *window) +{ + @autoreleasepool { + MetalRenderer *renderer = (MetalRenderer *)driverData; + MetalWindowData *windowData = METAL_INTERNAL_FetchWindowData(window); + + if (windowData == NULL) { + SET_STRING_ERROR_AND_RETURN("Cannot wait for a swapchain from an unclaimed window!", false); + } + + if (windowData->inFlightFences[windowData->frameCounter] != NULL) { + if (!METAL_WaitForFences( + driverData, + true, + &windowData->inFlightFences[windowData->frameCounter], + 1)) { + return false; + } + } + + return true; + } +} + +static bool METAL_INTERNAL_AcquireSwapchainTexture( + bool block, + SDL_GPUCommandBuffer *commandBuffer, + SDL_Window *window, + SDL_GPUTexture **texture, + Uint32 *swapchainTextureWidth, + Uint32 *swapchainTextureHeight) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalRenderer *renderer = metalCommandBuffer->renderer; + MetalWindowData *windowData; + CGSize drawableSize; + + *texture = NULL; + if (swapchainTextureWidth) { + *swapchainTextureWidth = 0; + } + if (swapchainTextureHeight) { + *swapchainTextureHeight = 0; + } + + windowData = METAL_INTERNAL_FetchWindowData(window); + if (windowData == NULL) { + SET_STRING_ERROR_AND_RETURN("Window is not claimed by this SDL_GpuDevice", false); + } + + // Update the window size + drawableSize = windowData->layer.drawableSize; + windowData->textureContainer.header.info.width = (Uint32)drawableSize.width; + windowData->textureContainer.header.info.height = (Uint32)drawableSize.height; + if (swapchainTextureWidth) { + *swapchainTextureWidth = (Uint32)drawableSize.width; + } + if (swapchainTextureHeight) { + *swapchainTextureHeight = (Uint32)drawableSize.height; + } + + if (windowData->inFlightFences[windowData->frameCounter] != NULL) { + if (block) { + // If we are blocking, just wait for the fence! + if (!METAL_WaitForFences( + (SDL_GPURenderer *)renderer, + true, + &windowData->inFlightFences[windowData->frameCounter], + 1)) { + return false; + } + } else { + // If we are not blocking and the least recent fence is not signaled, + // return true to indicate that there is no error but rendering should be skipped. + if (!METAL_QueryFence( + (SDL_GPURenderer *)metalCommandBuffer->renderer, + windowData->inFlightFences[windowData->frameCounter])) { + return true; + } + } + + METAL_ReleaseFence( + (SDL_GPURenderer *)metalCommandBuffer->renderer, + windowData->inFlightFences[windowData->frameCounter]); + + windowData->inFlightFences[windowData->frameCounter] = NULL; + } + + // Get the drawable and its underlying texture + windowData->drawable = [windowData->layer nextDrawable]; + windowData->texture.handle = [windowData->drawable texture]; + + // Set up presentation + if (metalCommandBuffer->windowDataCount == metalCommandBuffer->windowDataCapacity) { + metalCommandBuffer->windowDataCapacity += 1; + metalCommandBuffer->windowDatas = SDL_realloc( + metalCommandBuffer->windowDatas, + metalCommandBuffer->windowDataCapacity * sizeof(MetalWindowData *)); + } + metalCommandBuffer->windowDatas[metalCommandBuffer->windowDataCount] = windowData; + metalCommandBuffer->windowDataCount += 1; + + // Return the swapchain texture + *texture = (SDL_GPUTexture *)&windowData->textureContainer; + return true; + } +} + +static bool METAL_AcquireSwapchainTexture( + SDL_GPUCommandBuffer *command_buffer, + SDL_Window *window, + SDL_GPUTexture **swapchain_texture, + Uint32 *swapchain_texture_width, + Uint32 *swapchain_texture_height +) { + return METAL_INTERNAL_AcquireSwapchainTexture( + false, + command_buffer, + window, + swapchain_texture, + swapchain_texture_width, + swapchain_texture_height); +} + +static bool METAL_WaitAndAcquireSwapchainTexture( + SDL_GPUCommandBuffer *command_buffer, + SDL_Window *window, + SDL_GPUTexture **swapchain_texture, + Uint32 *swapchain_texture_width, + Uint32 *swapchain_texture_height +) { + return METAL_INTERNAL_AcquireSwapchainTexture( + true, + command_buffer, + window, + swapchain_texture, + swapchain_texture_width, + swapchain_texture_height); +} + +static SDL_GPUTextureFormat METAL_GetSwapchainTextureFormat( + SDL_GPURenderer *driverData, + SDL_Window *window) +{ + MetalRenderer *renderer = (MetalRenderer *)driverData; + MetalWindowData *windowData = METAL_INTERNAL_FetchWindowData(window); + + if (windowData == NULL) { + SET_STRING_ERROR_AND_RETURN("Cannot get swapchain format, window has not been claimed", SDL_GPU_TEXTUREFORMAT_INVALID); + } + + return windowData->textureContainer.header.info.format; +} + +static bool METAL_SetSwapchainParameters( + SDL_GPURenderer *driverData, + SDL_Window *window, + SDL_GPUSwapchainComposition swapchainComposition, + SDL_GPUPresentMode presentMode) +{ + @autoreleasepool { + MetalRenderer *renderer = (MetalRenderer *)driverData; + MetalWindowData *windowData = METAL_INTERNAL_FetchWindowData(window); + CGColorSpaceRef colorspace; + + if (windowData == NULL) { + SET_STRING_ERROR_AND_RETURN("Cannot set swapchain parameters, window has not been claimed!", false); + } + + if (!METAL_SupportsSwapchainComposition(driverData, window, swapchainComposition)) { + SET_STRING_ERROR_AND_RETURN("Swapchain composition not supported", false); + } + + if (!METAL_SupportsPresentMode(driverData, window, presentMode)) { + SET_STRING_ERROR_AND_RETURN("Present mode not supported", false); + } + + METAL_Wait(driverData); + + windowData->presentMode = SDL_GPU_PRESENTMODE_VSYNC; + +#ifdef SDL_PLATFORM_MACOS + if (@available(macOS 10.13, *)) { + windowData->layer.displaySyncEnabled = (presentMode != SDL_GPU_PRESENTMODE_IMMEDIATE); + windowData->presentMode = presentMode; + } +#endif + windowData->layer.pixelFormat = SDLToMetal_TextureFormat(SwapchainCompositionToFormat[swapchainComposition]); +#ifndef SDL_PLATFORM_TVOS + if (@available(iOS 16.0, *)) { + windowData->layer.wantsExtendedDynamicRangeContent = (swapchainComposition != SDL_GPU_SWAPCHAINCOMPOSITION_SDR); + } +#endif + + colorspace = CGColorSpaceCreateWithName(SwapchainCompositionToColorSpace[swapchainComposition]); + windowData->layer.colorspace = colorspace; + CGColorSpaceRelease(colorspace); + + windowData->textureContainer.header.info.format = SwapchainCompositionToFormat[swapchainComposition]; + + return true; + } +} + +static bool METAL_SetAllowedFramesInFlight( + SDL_GPURenderer *driverData, + Uint32 allowedFramesInFlight) +{ + @autoreleasepool { + MetalRenderer *renderer = (MetalRenderer *)driverData; + + if (!METAL_Wait(driverData)) { + return false; + } + + renderer->allowedFramesInFlight = allowedFramesInFlight; + return true; + } +} + +// Submission + +static bool METAL_Submit( + SDL_GPUCommandBuffer *commandBuffer) +{ + @autoreleasepool { + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalRenderer *renderer = metalCommandBuffer->renderer; + + SDL_LockMutex(renderer->submitLock); + + if (!METAL_INTERNAL_AcquireFence(renderer, metalCommandBuffer)) { + SDL_UnlockMutex(renderer->submitLock); + return false; + } + + // Enqueue present requests, if applicable + for (Uint32 i = 0; i < metalCommandBuffer->windowDataCount; i += 1) { + MetalWindowData *windowData = metalCommandBuffer->windowDatas[i]; + [metalCommandBuffer->handle presentDrawable:windowData->drawable]; + windowData->drawable = nil; + + windowData->inFlightFences[windowData->frameCounter] = (SDL_GPUFence *)metalCommandBuffer->fence; + + (void)SDL_AtomicIncRef(&metalCommandBuffer->fence->referenceCount); + + windowData->frameCounter = (windowData->frameCounter + 1) % renderer->allowedFramesInFlight; + } + + // Notify the fence when the command buffer has completed + [metalCommandBuffer->handle addCompletedHandler:^(id buffer) { + SDL_AtomicIncRef(&metalCommandBuffer->fence->complete); + }]; + + // Submit the command buffer + [metalCommandBuffer->handle commit]; + metalCommandBuffer->handle = nil; + + // Mark the command buffer as submitted + if (renderer->submittedCommandBufferCount >= renderer->submittedCommandBufferCapacity) { + renderer->submittedCommandBufferCapacity = renderer->submittedCommandBufferCount + 1; + + renderer->submittedCommandBuffers = SDL_realloc( + renderer->submittedCommandBuffers, + sizeof(MetalCommandBuffer *) * renderer->submittedCommandBufferCapacity); + } + renderer->submittedCommandBuffers[renderer->submittedCommandBufferCount] = metalCommandBuffer; + renderer->submittedCommandBufferCount += 1; + + // Check if we can perform any cleanups + for (Sint32 i = renderer->submittedCommandBufferCount - 1; i >= 0; i -= 1) { + if (SDL_GetAtomicInt(&renderer->submittedCommandBuffers[i]->fence->complete)) { + METAL_INTERNAL_CleanCommandBuffer( + renderer, + renderer->submittedCommandBuffers[i], + false); + } + } + + METAL_INTERNAL_PerformPendingDestroys(renderer); + + SDL_UnlockMutex(renderer->submitLock); + + return true; + } +} + +static SDL_GPUFence *METAL_SubmitAndAcquireFence( + SDL_GPUCommandBuffer *commandBuffer) +{ + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + metalCommandBuffer->autoReleaseFence = false; + if (!METAL_Submit(commandBuffer)) { + return NULL; + } + return (SDL_GPUFence *)metalCommandBuffer->fence; +} + +static bool METAL_Cancel( + SDL_GPUCommandBuffer *commandBuffer) +{ + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalRenderer *renderer = metalCommandBuffer->renderer; + + metalCommandBuffer->autoReleaseFence = false; + SDL_LockMutex(renderer->submitLock); + METAL_INTERNAL_CleanCommandBuffer(renderer, metalCommandBuffer, true); + SDL_UnlockMutex(renderer->submitLock); + + return true; +} + +static bool METAL_Wait( + SDL_GPURenderer *driverData) +{ + @autoreleasepool { + MetalRenderer *renderer = (MetalRenderer *)driverData; + MetalCommandBuffer *commandBuffer; + + /* + * Wait for all submitted command buffers to complete. + * Sort of equivalent to vkDeviceWaitIdle. + */ + for (Uint32 i = 0; i < renderer->submittedCommandBufferCount; i += 1) { + while (!SDL_GetAtomicInt(&renderer->submittedCommandBuffers[i]->fence->complete)) { + // Spin! + } + } + + SDL_LockMutex(renderer->submitLock); + + for (Sint32 i = renderer->submittedCommandBufferCount - 1; i >= 0; i -= 1) { + commandBuffer = renderer->submittedCommandBuffers[i]; + METAL_INTERNAL_CleanCommandBuffer(renderer, commandBuffer, false); + } + + METAL_INTERNAL_PerformPendingDestroys(renderer); + + SDL_UnlockMutex(renderer->submitLock); + + return true; + } +} + +// Format Info + +// FIXME: Check simultaneous read-write support +static bool METAL_SupportsTextureFormat( + SDL_GPURenderer *driverData, + SDL_GPUTextureFormat format, + SDL_GPUTextureType type, + SDL_GPUTextureUsageFlags usage) +{ + @autoreleasepool { + MetalRenderer *renderer = (MetalRenderer *)driverData; + + // Only depth textures can be used as... depth textures + if ((usage & SDL_GPU_TEXTUREUSAGE_DEPTH_STENCIL_TARGET)) { + if (!IsDepthFormat(format)) { + return false; + } + } + + // Cube arrays are not supported on older iOS devices + if (type == SDL_GPU_TEXTURETYPE_CUBE_ARRAY) { +#ifdef SDL_PLATFORM_MACOS + return true; +#else + if (@available(iOS 13.0, tvOS 13.0, *)) { + if (!([renderer->device supportsFamily:MTLGPUFamilyCommon2] || + [renderer->device supportsFamily:MTLGPUFamilyApple4])) { + return false; + } + } else { + return false; + } +#endif + } + + switch (format) { + // Apple GPU exclusive + case SDL_GPU_TEXTUREFORMAT_B5G6R5_UNORM: + case SDL_GPU_TEXTUREFORMAT_B5G5R5A1_UNORM: + case SDL_GPU_TEXTUREFORMAT_B4G4R4A4_UNORM: + if (@available(macOS 10.15, iOS 13.0, tvOS 13.0, *)) { + return [renderer->device supportsFamily:MTLGPUFamilyApple1]; + } else { + return false; + } + + // Requires BC compression support + case SDL_GPU_TEXTUREFORMAT_BC1_RGBA_UNORM: + case SDL_GPU_TEXTUREFORMAT_BC2_RGBA_UNORM: + case SDL_GPU_TEXTUREFORMAT_BC3_RGBA_UNORM: + case SDL_GPU_TEXTUREFORMAT_BC4_R_UNORM: + case SDL_GPU_TEXTUREFORMAT_BC5_RG_UNORM: + case SDL_GPU_TEXTUREFORMAT_BC7_RGBA_UNORM: + case SDL_GPU_TEXTUREFORMAT_BC6H_RGB_FLOAT: + case SDL_GPU_TEXTUREFORMAT_BC6H_RGB_UFLOAT: + case SDL_GPU_TEXTUREFORMAT_BC1_RGBA_UNORM_SRGB: + case SDL_GPU_TEXTUREFORMAT_BC2_RGBA_UNORM_SRGB: + case SDL_GPU_TEXTUREFORMAT_BC3_RGBA_UNORM_SRGB: + case SDL_GPU_TEXTUREFORMAT_BC7_RGBA_UNORM_SRGB: + if (@available(iOS 16.4, tvOS 16.4, *)) { + if (usage & SDL_GPU_TEXTUREUSAGE_COLOR_TARGET) { + return false; + } + if (@available(macOS 11.0, *)) { + return [renderer->device supportsBCTextureCompression]; + } else { + return true; + } + } else { + return false; + } + + // Requires D24S8 support + case SDL_GPU_TEXTUREFORMAT_D24_UNORM: + case SDL_GPU_TEXTUREFORMAT_D24_UNORM_S8_UINT: +#ifdef SDL_PLATFORM_MACOS + return [renderer->device isDepth24Stencil8PixelFormatSupported]; +#else + return false; +#endif + + case SDL_GPU_TEXTUREFORMAT_D16_UNORM: + if (@available(macOS 10.12, iOS 13.0, tvOS 13.0, *)) { + return true; + } else { + return false; + } + + case SDL_GPU_TEXTUREFORMAT_ASTC_4x4_UNORM: + case SDL_GPU_TEXTUREFORMAT_ASTC_5x4_UNORM: + case SDL_GPU_TEXTUREFORMAT_ASTC_5x5_UNORM: + case SDL_GPU_TEXTUREFORMAT_ASTC_6x5_UNORM: + case SDL_GPU_TEXTUREFORMAT_ASTC_6x6_UNORM: + case SDL_GPU_TEXTUREFORMAT_ASTC_8x5_UNORM: + case SDL_GPU_TEXTUREFORMAT_ASTC_8x6_UNORM: + case SDL_GPU_TEXTUREFORMAT_ASTC_8x8_UNORM: + case SDL_GPU_TEXTUREFORMAT_ASTC_10x5_UNORM: + case SDL_GPU_TEXTUREFORMAT_ASTC_10x6_UNORM: + case SDL_GPU_TEXTUREFORMAT_ASTC_10x8_UNORM: + case SDL_GPU_TEXTUREFORMAT_ASTC_10x10_UNORM: + case SDL_GPU_TEXTUREFORMAT_ASTC_12x10_UNORM: + case SDL_GPU_TEXTUREFORMAT_ASTC_12x12_UNORM: + case SDL_GPU_TEXTUREFORMAT_ASTC_4x4_UNORM_SRGB: + case SDL_GPU_TEXTUREFORMAT_ASTC_5x4_UNORM_SRGB: + case SDL_GPU_TEXTUREFORMAT_ASTC_5x5_UNORM_SRGB: + case SDL_GPU_TEXTUREFORMAT_ASTC_6x5_UNORM_SRGB: + case SDL_GPU_TEXTUREFORMAT_ASTC_6x6_UNORM_SRGB: + case SDL_GPU_TEXTUREFORMAT_ASTC_8x5_UNORM_SRGB: + case SDL_GPU_TEXTUREFORMAT_ASTC_8x6_UNORM_SRGB: + case SDL_GPU_TEXTUREFORMAT_ASTC_8x8_UNORM_SRGB: + case SDL_GPU_TEXTUREFORMAT_ASTC_10x5_UNORM_SRGB: + case SDL_GPU_TEXTUREFORMAT_ASTC_10x6_UNORM_SRGB: + case SDL_GPU_TEXTUREFORMAT_ASTC_10x8_UNORM_SRGB: + case SDL_GPU_TEXTUREFORMAT_ASTC_10x10_UNORM_SRGB: + case SDL_GPU_TEXTUREFORMAT_ASTC_12x10_UNORM_SRGB: + case SDL_GPU_TEXTUREFORMAT_ASTC_12x12_UNORM_SRGB: +#ifdef SDL_PLATFORM_MACOS + if (@available(macOS 11.0, *)) { + return [renderer->device supportsFamily:MTLGPUFamilyApple7]; + } else { + return false; + } +#else + return true; +#endif + case SDL_GPU_TEXTUREFORMAT_ASTC_4x4_FLOAT: + case SDL_GPU_TEXTUREFORMAT_ASTC_5x4_FLOAT: + case SDL_GPU_TEXTUREFORMAT_ASTC_5x5_FLOAT: + case SDL_GPU_TEXTUREFORMAT_ASTC_6x5_FLOAT: + case SDL_GPU_TEXTUREFORMAT_ASTC_6x6_FLOAT: + case SDL_GPU_TEXTUREFORMAT_ASTC_8x5_FLOAT: + case SDL_GPU_TEXTUREFORMAT_ASTC_8x6_FLOAT: + case SDL_GPU_TEXTUREFORMAT_ASTC_8x8_FLOAT: + case SDL_GPU_TEXTUREFORMAT_ASTC_10x5_FLOAT: + case SDL_GPU_TEXTUREFORMAT_ASTC_10x6_FLOAT: + case SDL_GPU_TEXTUREFORMAT_ASTC_10x8_FLOAT: + case SDL_GPU_TEXTUREFORMAT_ASTC_10x10_FLOAT: + case SDL_GPU_TEXTUREFORMAT_ASTC_12x10_FLOAT: + case SDL_GPU_TEXTUREFORMAT_ASTC_12x12_FLOAT: +#ifdef SDL_PLATFORM_MACOS + if (@available(macOS 11.0, *)) { + return [renderer->device supportsFamily:MTLGPUFamilyApple7]; + } else { + return false; + } +#else + if (@available(iOS 13.0, tvOS 13.0, *)) { + return [renderer->device supportsFamily:MTLGPUFamilyApple6]; + } else { + return false; + } +#endif + default: + return true; + } + } +} + +// Device Creation + +static bool METAL_PrepareDriver(SDL_VideoDevice *this) +{ + if (@available(macOS 10.14, iOS 13.0, tvOS 13.0, *)) { + return (this->Metal_CreateView != NULL); + } + return false; +} + +static void METAL_INTERNAL_InitBlitResources( + MetalRenderer *renderer) +{ + SDL_GPUShaderCreateInfo shaderModuleCreateInfo; + SDL_GPUSamplerCreateInfo createinfo; + + // Allocate the dynamic blit pipeline list + renderer->blitPipelineCapacity = 2; + renderer->blitPipelineCount = 0; + renderer->blitPipelines = SDL_calloc( + renderer->blitPipelineCapacity, sizeof(BlitPipelineCacheEntry)); + + // Fullscreen vertex shader + SDL_zero(shaderModuleCreateInfo); + shaderModuleCreateInfo.code = FullscreenVert_metallib; + shaderModuleCreateInfo.code_size = FullscreenVert_metallib_len; + shaderModuleCreateInfo.stage = SDL_GPU_SHADERSTAGE_VERTEX; + shaderModuleCreateInfo.format = SDL_GPU_SHADERFORMAT_METALLIB; + shaderModuleCreateInfo.entrypoint = "FullscreenVert"; + + renderer->blitVertexShader = METAL_CreateShader( + (SDL_GPURenderer *)renderer, + &shaderModuleCreateInfo); + + if (renderer->blitVertexShader == NULL) { + SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to compile vertex shader for blit!"); + } + + // BlitFrom2D fragment shader + shaderModuleCreateInfo.code = BlitFrom2D_metallib; + shaderModuleCreateInfo.code_size = BlitFrom2D_metallib_len; + shaderModuleCreateInfo.stage = SDL_GPU_SHADERSTAGE_FRAGMENT; + shaderModuleCreateInfo.entrypoint = "BlitFrom2D"; + shaderModuleCreateInfo.num_samplers = 1; + shaderModuleCreateInfo.num_uniform_buffers = 1; + + renderer->blitFrom2DShader = METAL_CreateShader( + (SDL_GPURenderer *)renderer, + &shaderModuleCreateInfo); + + if (renderer->blitFrom2DShader == NULL) { + SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to compile BlitFrom2D fragment shader!"); + } + + // BlitFrom2DArray fragment shader + shaderModuleCreateInfo.code = BlitFrom2DArray_metallib; + shaderModuleCreateInfo.code_size = BlitFrom2DArray_metallib_len; + shaderModuleCreateInfo.entrypoint = "BlitFrom2DArray"; + + renderer->blitFrom2DArrayShader = METAL_CreateShader( + (SDL_GPURenderer *)renderer, + &shaderModuleCreateInfo); + + if (renderer->blitFrom2DArrayShader == NULL) { + SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to compile BlitFrom2DArray fragment shader!"); + } + + // BlitFrom3D fragment shader + shaderModuleCreateInfo.code = BlitFrom3D_metallib; + shaderModuleCreateInfo.code_size = BlitFrom3D_metallib_len; + shaderModuleCreateInfo.entrypoint = "BlitFrom3D"; + + renderer->blitFrom3DShader = METAL_CreateShader( + (SDL_GPURenderer *)renderer, + &shaderModuleCreateInfo); + + if (renderer->blitFrom3DShader == NULL) { + SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to compile BlitFrom3D fragment shader!"); + } + + // BlitFromCube fragment shader + shaderModuleCreateInfo.code = BlitFromCube_metallib; + shaderModuleCreateInfo.code_size = BlitFromCube_metallib_len; + shaderModuleCreateInfo.entrypoint = "BlitFromCube"; + + renderer->blitFromCubeShader = METAL_CreateShader( + (SDL_GPURenderer *)renderer, + &shaderModuleCreateInfo); + + if (renderer->blitFromCubeShader == NULL) { + SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to compile BlitFromCube fragment shader!"); + } + + // BlitFromCubeArray fragment shader + shaderModuleCreateInfo.code = BlitFromCubeArray_metallib; + shaderModuleCreateInfo.code_size = BlitFromCubeArray_metallib_len; + shaderModuleCreateInfo.entrypoint = "BlitFromCubeArray"; + + renderer->blitFromCubeArrayShader = METAL_CreateShader( + (SDL_GPURenderer *)renderer, + &shaderModuleCreateInfo); + + if (renderer->blitFromCubeArrayShader == NULL) { + SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to compile BlitFromCubeArray fragment shader!"); + } + + // Create samplers + createinfo.address_mode_u = SDL_GPU_SAMPLERADDRESSMODE_CLAMP_TO_EDGE; + createinfo.address_mode_v = SDL_GPU_SAMPLERADDRESSMODE_CLAMP_TO_EDGE; + createinfo.address_mode_w = SDL_GPU_SAMPLERADDRESSMODE_CLAMP_TO_EDGE; + createinfo.enable_anisotropy = 0; + createinfo.enable_compare = 0; + createinfo.mag_filter = SDL_GPU_FILTER_NEAREST; + createinfo.min_filter = SDL_GPU_FILTER_NEAREST; + createinfo.mipmap_mode = SDL_GPU_SAMPLERMIPMAPMODE_NEAREST; + createinfo.mip_lod_bias = 0.0f; + createinfo.min_lod = 0; + createinfo.max_lod = 1000; + createinfo.max_anisotropy = 1.0f; + createinfo.compare_op = SDL_GPU_COMPAREOP_ALWAYS; + + renderer->blitNearestSampler = METAL_CreateSampler( + (SDL_GPURenderer *)renderer, + &createinfo); + + if (renderer->blitNearestSampler == NULL) { + SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to create blit nearest sampler!"); + } + + createinfo.mag_filter = SDL_GPU_FILTER_LINEAR; + createinfo.min_filter = SDL_GPU_FILTER_LINEAR; + createinfo.mipmap_mode = SDL_GPU_SAMPLERMIPMAPMODE_LINEAR; + + renderer->blitLinearSampler = METAL_CreateSampler( + (SDL_GPURenderer *)renderer, + &createinfo); + + if (renderer->blitLinearSampler == NULL) { + SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to create blit linear sampler!"); + } +} + +static void METAL_INTERNAL_DestroyBlitResources( + SDL_GPURenderer *driverData) +{ + MetalRenderer *renderer = (MetalRenderer *)driverData; + METAL_ReleaseSampler(driverData, renderer->blitLinearSampler); + METAL_ReleaseSampler(driverData, renderer->blitNearestSampler); + METAL_ReleaseShader(driverData, renderer->blitVertexShader); + METAL_ReleaseShader(driverData, renderer->blitFrom2DShader); + METAL_ReleaseShader(driverData, renderer->blitFrom2DArrayShader); + METAL_ReleaseShader(driverData, renderer->blitFrom3DShader); + METAL_ReleaseShader(driverData, renderer->blitFromCubeShader); + METAL_ReleaseShader(driverData, renderer->blitFromCubeArrayShader); + + for (Uint32 i = 0; i < renderer->blitPipelineCount; i += 1) { + METAL_ReleaseGraphicsPipeline(driverData, renderer->blitPipelines[i].pipeline); + } + SDL_free(renderer->blitPipelines); +} + +static SDL_GPUDevice *METAL_CreateDevice(bool debugMode, bool preferLowPower, SDL_PropertiesID props) +{ + @autoreleasepool { + MetalRenderer *renderer; + id device = NULL; + bool hasHardwareSupport = false; + + if (debugMode) { + /* Due to a Metal driver quirk, once a MTLDevice has been created + * with this environment variable set, the Metal validation layers + * will remain enabled for the rest of the application's lifespan, + * even if the device is destroyed and recreated. + */ + SDL_setenv_unsafe("MTL_DEBUG_LAYER", "1", 0); + } + + // Create the Metal device and command queue +#ifdef SDL_PLATFORM_MACOS + if (preferLowPower) { + NSArray> *devices = MTLCopyAllDevices(); + for (id candidate in devices) { + if (candidate.isLowPower) { + device = candidate; + break; + } + } + } +#endif + if (device == NULL) { + device = MTLCreateSystemDefaultDevice(); + if (device == NULL) { + SDL_SetError("Failed to create Metal device"); + return NULL; + } + } + +#ifdef SDL_PLATFORM_MACOS + hasHardwareSupport = true; + if (@available(macOS 10.15, *)) { + hasHardwareSupport = [device supportsFamily:MTLGPUFamilyMac2]; + } else if (@available(macOS 10.14, *)) { + hasHardwareSupport = [device supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily2_v1]; + } +#else + if (@available(iOS 13.0, tvOS 13.0, *)) { + hasHardwareSupport = [device supportsFamily:MTLGPUFamilyApple3]; + } +#endif + + if (!hasHardwareSupport) { + SDL_SetError("Device does not meet the hardware requirements for SDL_GPU Metal"); + return NULL; + } + + // Allocate and zero out the renderer + renderer = (MetalRenderer *)SDL_calloc(1, sizeof(MetalRenderer)); + + renderer->device = device; + renderer->queue = [device newCommandQueue]; + + // Print driver info + SDL_LogInfo(SDL_LOG_CATEGORY_GPU, "SDL_GPU Driver: Metal"); + SDL_LogInfo( + SDL_LOG_CATEGORY_GPU, + "Metal Device: %s", + [device.name UTF8String]); + + // Remember debug mode + renderer->debugMode = debugMode; + renderer->allowedFramesInFlight = 2; + + // Set up colorspace array + SwapchainCompositionToColorSpace[0] = kCGColorSpaceSRGB; + SwapchainCompositionToColorSpace[1] = kCGColorSpaceSRGB; + SwapchainCompositionToColorSpace[2] = kCGColorSpaceExtendedLinearSRGB; + if (@available(macOS 11.0, iOS 14.0, tvOS 14.0, *)) { + SwapchainCompositionToColorSpace[3] = kCGColorSpaceITUR_2100_PQ; + } else { + SwapchainCompositionToColorSpace[3] = NULL; + } + + // Create mutexes + renderer->submitLock = SDL_CreateMutex(); + renderer->acquireCommandBufferLock = SDL_CreateMutex(); + renderer->acquireUniformBufferLock = SDL_CreateMutex(); + renderer->disposeLock = SDL_CreateMutex(); + renderer->fenceLock = SDL_CreateMutex(); + renderer->windowLock = SDL_CreateMutex(); + + // Create command buffer pool + METAL_INTERNAL_AllocateCommandBuffers(renderer, 2); + + // Create fence pool + renderer->availableFenceCapacity = 2; + renderer->availableFences = SDL_calloc( + renderer->availableFenceCapacity, sizeof(MetalFence *)); + + // Create uniform buffer pool + renderer->uniformBufferPoolCapacity = 32; + renderer->uniformBufferPoolCount = 32; + renderer->uniformBufferPool = SDL_calloc( + renderer->uniformBufferPoolCapacity, sizeof(MetalUniformBuffer *)); + + for (Uint32 i = 0; i < renderer->uniformBufferPoolCount; i += 1) { + renderer->uniformBufferPool[i] = METAL_INTERNAL_CreateUniformBuffer( + renderer, + UNIFORM_BUFFER_SIZE); + } + + // Create deferred destroy arrays + renderer->bufferContainersToDestroyCapacity = 2; + renderer->bufferContainersToDestroyCount = 0; + renderer->bufferContainersToDestroy = SDL_calloc( + renderer->bufferContainersToDestroyCapacity, sizeof(MetalBufferContainer *)); + + renderer->textureContainersToDestroyCapacity = 2; + renderer->textureContainersToDestroyCount = 0; + renderer->textureContainersToDestroy = SDL_calloc( + renderer->textureContainersToDestroyCapacity, sizeof(MetalTextureContainer *)); + + // Create claimed window list + renderer->claimedWindowCapacity = 1; + renderer->claimedWindows = SDL_calloc( + renderer->claimedWindowCapacity, sizeof(MetalWindowData *)); + + // Initialize blit resources + METAL_INTERNAL_InitBlitResources(renderer); + + SDL_GPUDevice *result = SDL_calloc(1, sizeof(SDL_GPUDevice)); + ASSIGN_DRIVER(METAL) + result->driverData = (SDL_GPURenderer *)renderer; + renderer->sdlGPUDevice = result; + + return result; + } +} + +SDL_GPUBootstrap MetalDriver = { + "metal", + SDL_GPU_SHADERFORMAT_MSL | SDL_GPU_SHADERFORMAT_METALLIB, + METAL_PrepareDriver, + METAL_CreateDevice +}; + +#endif // SDL_GPU_METAL -- cgit v1.2.3