From 5a079a2d114f96d4847d1ee305d5b7c16eeec50e Mon Sep 17 00:00:00 2001
From: 3gg <3gg@shellblade.net>
Date: Sat, 27 Dec 2025 12:03:39 -0800
Subject: Initial commit

---
 contrib/SDL-3.2.8/src/video/SDL_blit.h | 751 +++++++++++++++++++++++++++++++++
 1 file changed, 751 insertions(+)
 create mode 100644 contrib/SDL-3.2.8/src/video/SDL_blit.h

(limited to 'contrib/SDL-3.2.8/src/video/SDL_blit.h')

diff --git a/contrib/SDL-3.2.8/src/video/SDL_blit.h b/contrib/SDL-3.2.8/src/video/SDL_blit.h
new file mode 100644
index 0000000..714feeb
--- /dev/null
+++ b/contrib/SDL-3.2.8/src/video/SDL_blit.h
@@ -0,0 +1,751 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+#include "SDL_internal.h"
+
+#ifndef SDL_blit_h_
+#define SDL_blit_h_
+
+// Table to do pixel byte expansion
+extern const Uint8 *SDL_expand_byte[9];
+extern const Uint16 SDL_expand_byte_10[];
+
+// SDL blit copy flags
+#define SDL_COPY_MODULATE_COLOR         0x00000001
+#define SDL_COPY_MODULATE_ALPHA         0x00000002
+#define SDL_COPY_MODULATE_MASK		(SDL_COPY_MODULATE_COLOR | SDL_COPY_MODULATE_ALPHA)
+#define SDL_COPY_BLEND                  0x00000010
+#define SDL_COPY_BLEND_PREMULTIPLIED    0x00000020
+#define SDL_COPY_ADD                    0x00000040
+#define SDL_COPY_ADD_PREMULTIPLIED      0x00000080
+#define SDL_COPY_MOD                    0x00000100
+#define SDL_COPY_MUL                    0x00000200
+#define SDL_COPY_BLEND_MASK		(SDL_COPY_BLEND | SDL_COPY_BLEND_PREMULTIPLIED | SDL_COPY_ADD | SDL_COPY_ADD_PREMULTIPLIED | SDL_COPY_MOD | SDL_COPY_MUL)
+#define SDL_COPY_COLORKEY               0x00000400
+#define SDL_COPY_NEAREST                0x00000800
+#define SDL_COPY_RLE_DESIRED            0x00001000
+#define SDL_COPY_RLE_COLORKEY           0x00002000
+#define SDL_COPY_RLE_ALPHAKEY           0x00004000
+#define SDL_COPY_RLE_MASK               (SDL_COPY_RLE_DESIRED | SDL_COPY_RLE_COLORKEY | SDL_COPY_RLE_ALPHAKEY)
+
+// SDL blit CPU flags
+#define SDL_CPU_ANY                0x00000000
+#define SDL_CPU_MMX                0x00000001
+#define SDL_CPU_SSE                0x00000002
+#define SDL_CPU_SSE2               0x00000004
+#define SDL_CPU_ALTIVEC_PREFETCH   0x00000008
+#define SDL_CPU_ALTIVEC_NOPREFETCH 0x00000010
+
+typedef struct
+{
+    SDL_Surface *src_surface;
+    Uint8 *src;
+    int src_w, src_h;
+    int src_pitch;
+    int src_skip;
+    SDL_Surface *dst_surface;
+    Uint8 *dst;
+    int dst_w, dst_h;
+    int dst_pitch;
+    int dst_skip;
+    const SDL_PixelFormatDetails *src_fmt;
+    const SDL_Palette *src_pal;
+    const SDL_PixelFormatDetails *dst_fmt;
+    const SDL_Palette *dst_pal;
+    Uint8 *table;
+    SDL_HashTable *palette_map;
+    int flags;
+    Uint32 colorkey;
+    Uint8 r, g, b, a;
+} SDL_BlitInfo;
+
+typedef void (*SDL_BlitFunc)(SDL_BlitInfo *info);
+
+typedef struct
+{
+    SDL_PixelFormat src_format;
+    SDL_PixelFormat dst_format;
+    int flags;
+    unsigned int cpu;
+    SDL_BlitFunc func;
+} SDL_BlitFuncEntry;
+
+typedef bool (SDLCALL *SDL_Blit) (struct SDL_Surface *src, const SDL_Rect *srcrect, struct SDL_Surface *dst, const SDL_Rect *dstrect);
+
+// Blit mapping definition
+typedef struct SDL_BlitMap
+{
+    int identity;
+    SDL_Blit blit;
+    void *data;
+    SDL_BlitInfo info;
+
+    /* the version count matches the destination; mismatch indicates
+       an invalid mapping */
+    Uint32 dst_palette_version;
+    Uint32 src_palette_version;
+} SDL_BlitMap;
+
+// Functions found in SDL_blit.c
+extern bool SDL_CalculateBlit(SDL_Surface *surface, SDL_Surface *dst);
+
+/* Functions found in SDL_blit_*.c */
+extern SDL_BlitFunc SDL_CalculateBlit0(SDL_Surface *surface);
+extern SDL_BlitFunc SDL_CalculateBlit1(SDL_Surface *surface);
+extern SDL_BlitFunc SDL_CalculateBlitN(SDL_Surface *surface);
+extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface);
+
+/*
+ * Useful macros for blitting routines
+ */
+
+#ifdef __GNUC__
+#define DECLARE_ALIGNED(t, v, a) t __attribute__((aligned(a))) v
+#elif defined(_MSC_VER)
+#define DECLARE_ALIGNED(t, v, a) __declspec(align(a)) t v
+#else
+#define DECLARE_ALIGNED(t, v, a) t v
+#endif
+
+// Load pixel of the specified format from a buffer and get its R-G-B values
+#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)                                     \
+    {                                                                           \
+        r = SDL_expand_byte[fmt->Rbits][((Pixel & fmt->Rmask) >> fmt->Rshift)]; \
+        g = SDL_expand_byte[fmt->Gbits][((Pixel & fmt->Gmask) >> fmt->Gshift)]; \
+        b = SDL_expand_byte[fmt->Bbits][((Pixel & fmt->Bmask) >> fmt->Bshift)]; \
+    }
+#define RGB_FROM_RGB565(Pixel, r, g, b)                   \
+    {                                                     \
+        r = SDL_expand_byte[5][((Pixel & 0xF800) >> 11)]; \
+        g = SDL_expand_byte[6][((Pixel & 0x07E0) >> 5)];  \
+        b = SDL_expand_byte[5][(Pixel & 0x001F)];         \
+    }
+#define RGB_FROM_RGB555(Pixel, r, g, b)                   \
+    {                                                     \
+        r = SDL_expand_byte[5][((Pixel & 0x7C00) >> 10)]; \
+        g = SDL_expand_byte[5][((Pixel & 0x03E0) >> 5)];  \
+        b = SDL_expand_byte[5][(Pixel & 0x001F)];         \
+    }
+#define RGB_FROM_XRGB8888(Pixel, r, g, b) \
+    {                                   \
+        r = ((Pixel & 0xFF0000) >> 16); \
+        g = ((Pixel & 0xFF00) >> 8);    \
+        b = (Pixel & 0xFF);             \
+    }
+#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)                \
+    do {                                                   \
+        switch (bpp) {                                     \
+        case 1:                                            \
+            Pixel = *((Uint8 *)(buf));                     \
+            break;                                         \
+                                                           \
+        case 2:                                            \
+            Pixel = *((Uint16 *)(buf));                    \
+            break;                                         \
+                                                           \
+        case 3:                                            \
+        {                                                  \
+            Uint8 *B = (Uint8 *)(buf);                     \
+            if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {         \
+                Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
+            } else {                                       \
+                Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
+            }                                              \
+        } break;                                           \
+                                                           \
+        case 4:                                            \
+            Pixel = *((Uint32 *)(buf));                    \
+            break;                                         \
+                                                           \
+        default:                                           \
+            Pixel = 0; /* stop gcc complaints */           \
+            break;                                         \
+        }                                                  \
+    } while (0)
+
+#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b) \
+    do {                                            \
+        switch (bpp) {                              \
+        case 1:                                     \
+            Pixel = *((Uint8 *)(buf));              \
+            RGB_FROM_PIXEL(Pixel, fmt, r, g, b);    \
+            break;                                  \
+                                                    \
+        case 2:                                     \
+            Pixel = *((Uint16 *)(buf));             \
+            RGB_FROM_PIXEL(Pixel, fmt, r, g, b);    \
+            break;                                  \
+                                                    \
+        case 3:                                     \
+        {                                           \
+            Pixel = 0;                              \
+            if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {  \
+                r = *((buf) + fmt->Rshift / 8);     \
+                g = *((buf) + fmt->Gshift / 8);     \
+                b = *((buf) + fmt->Bshift / 8);     \
+            } else {                                \
+                r = *((buf) + 2 - fmt->Rshift / 8); \
+                g = *((buf) + 2 - fmt->Gshift / 8); \
+                b = *((buf) + 2 - fmt->Bshift / 8); \
+            }                                       \
+        } break;                                    \
+                                                    \
+        case 4:                                     \
+            Pixel = *((Uint32 *)(buf));             \
+            RGB_FROM_PIXEL(Pixel, fmt, r, g, b);    \
+            break;                                  \
+                                                    \
+        default:                                    \
+            /* stop gcc complaints */               \
+            Pixel = 0;                              \
+            r = g = b = 0;                          \
+            break;                                  \
+        }                                           \
+    } while (0)
+
+// Assemble R-G-B values into a specified pixel format and store them
+#define PIXEL_FROM_RGB(Pixel, fmt, r, g, b)                 \
+    {                                                       \
+        Pixel = ((r >> (8 - fmt->Rbits)) << fmt->Rshift) |  \
+                ((g >> (8 - fmt->Gbits)) << fmt->Gshift) |  \
+                ((b >> (8 - fmt->Bbits)) << fmt->Bshift) |  \
+                fmt->Amask;                                 \
+    }
+#define RGB332_FROM_RGB(Pixel, r, g, b)                        \
+    {                                                          \
+        Pixel = (Uint8)(((r >> 5) << 5) | ((g >> 5) << 2) | (b >> 6)); \
+    }
+#define RGB565_FROM_RGB(Pixel, r, g, b)                        \
+    {                                                          \
+        Pixel = (Uint16)(((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3)); \
+    }
+#define RGB555_FROM_RGB(Pixel, r, g, b)                        \
+    {                                                          \
+        Pixel = (Uint16)(((r >> 3) << 10) | ((g >> 3) << 5) | (b >> 3)); \
+    }
+#define XRGB8888_FROM_RGB(Pixel, r, g, b)   \
+    {                                     \
+        Pixel = (r << 16) | (g << 8) | b; \
+    }
+#define ARGB8888_FROM_RGBA(Pixel, r, g, b, a)         \
+    {                                                 \
+        Pixel = (a << 24) | (r << 16) | (g << 8) | b; \
+    }
+#define RGBA8888_FROM_RGBA(Pixel, r, g, b, a)         \
+    {                                                 \
+        Pixel = (r << 24) | (g << 16) | (b << 8) | a; \
+    }
+#define ABGR8888_FROM_RGBA(Pixel, r, g, b, a)         \
+    {                                                 \
+        Pixel = (a << 24) | (b << 16) | (g << 8) | r; \
+    }
+#define BGRA8888_FROM_RGBA(Pixel, r, g, b, a)         \
+    {                                                 \
+        Pixel = (b << 24) | (g << 16) | (r << 8) | a; \
+    }
+#define ARGB2101010_FROM_RGBA(Pixel, r, g, b, a)       \
+    {                                                  \
+        r = r ? ((r << 2) | 0x3) : 0;                  \
+        g = g ? ((g << 2) | 0x3) : 0;                  \
+        b = b ? ((b << 2) | 0x3) : 0;                  \
+        a = (a * 3) / 255;                             \
+        Pixel = (a << 30) | (r << 20) | (g << 10) | b; \
+    }
+#define ARGB2101010_FROM_RGBAFLOAT(Pixel, r, g, b, a) \
+    {                                                 \
+        r = SDL_clamp(r, 0.0f, 1.0f) * 1023.0f;       \
+        g = SDL_clamp(g, 0.0f, 1.0f) * 1023.0f;       \
+        b = SDL_clamp(b, 0.0f, 1.0f) * 1023.0f;       \
+        a = SDL_clamp(a, 0.0f, 1.0f) * 3.0f;          \
+        Pixel = (((Uint32)SDL_roundf(a)) << 30) |     \
+                (((Uint32)SDL_roundf(r)) << 20) |     \
+                (((Uint32)SDL_roundf(g)) << 10) |     \
+                (Uint32)SDL_roundf(b);                \
+    }
+#define ABGR2101010_FROM_RGBA(Pixel, r, g, b, a)       \
+    {                                                  \
+        r = r ? ((r << 2) | 0x3) : 0;                  \
+        g = g ? ((g << 2) | 0x3) : 0;                  \
+        b = b ? ((b << 2) | 0x3) : 0;                  \
+        a = (a * 3) / 255;                             \
+        Pixel = (a << 30) | (b << 20) | (g << 10) | r; \
+    }
+#define ABGR2101010_FROM_RGBAFLOAT(Pixel, r, g, b, a) \
+    {                                                 \
+        r = SDL_clamp(r, 0.0f, 1.0f) * 1023.0f;       \
+        g = SDL_clamp(g, 0.0f, 1.0f) * 1023.0f;       \
+        b = SDL_clamp(b, 0.0f, 1.0f) * 1023.0f;       \
+        a = SDL_clamp(a, 0.0f, 1.0f) * 3.0f;          \
+        Pixel = (((Uint32)SDL_roundf(a)) << 30) |     \
+                (((Uint32)SDL_roundf(b)) << 20) |     \
+                (((Uint32)SDL_roundf(g)) << 10) |     \
+                (Uint32)SDL_roundf(r);                \
+    }
+#define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b)        \
+    {                                               \
+        switch (bpp) {                              \
+        case 1:                                     \
+        {                                           \
+            Uint8 _pixel;                           \
+                                                    \
+            PIXEL_FROM_RGB(_pixel, fmt, r, g, b);   \
+            *((Uint8 *)(buf)) = _pixel;             \
+        } break;                                    \
+                                                    \
+        case 2:                                     \
+        {                                           \
+            Uint16 _pixel;                          \
+                                                    \
+            PIXEL_FROM_RGB(_pixel, fmt, r, g, b);   \
+            *((Uint16 *)(buf)) = _pixel;            \
+        } break;                                    \
+                                                    \
+        case 3:                                     \
+        {                                           \
+            if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {  \
+                *((buf) + fmt->Rshift / 8) = r;     \
+                *((buf) + fmt->Gshift / 8) = g;     \
+                *((buf) + fmt->Bshift / 8) = b;     \
+            } else {                                \
+                *((buf) + 2 - fmt->Rshift / 8) = r; \
+                *((buf) + 2 - fmt->Gshift / 8) = g; \
+                *((buf) + 2 - fmt->Bshift / 8) = b; \
+            }                                       \
+        } break;                                    \
+                                                    \
+        case 4:                                     \
+        {                                           \
+            Uint32 _pixel;                          \
+                                                    \
+            PIXEL_FROM_RGB(_pixel, fmt, r, g, b);   \
+            *((Uint32 *)(buf)) = _pixel;            \
+        } break;                                    \
+        }                                           \
+    }
+
+// FIXME: Should we rescale alpha into 0..255 here?
+#define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a)                                 \
+    {                                                                           \
+        r = SDL_expand_byte[fmt->Rbits][((Pixel & fmt->Rmask) >> fmt->Rshift)]; \
+        g = SDL_expand_byte[fmt->Gbits][((Pixel & fmt->Gmask) >> fmt->Gshift)]; \
+        b = SDL_expand_byte[fmt->Bbits][((Pixel & fmt->Bmask) >> fmt->Bshift)]; \
+        a = SDL_expand_byte[fmt->Abits][((Pixel & fmt->Amask) >> fmt->Ashift)]; \
+    }
+#define RGBA_FROM_8888(Pixel, fmt, r, g, b, a)   \
+    {                                            \
+        r = (Pixel & fmt->Rmask) >> fmt->Rshift; \
+        g = (Pixel & fmt->Gmask) >> fmt->Gshift; \
+        b = (Pixel & fmt->Bmask) >> fmt->Bshift; \
+        a = (Pixel & fmt->Amask) >> fmt->Ashift; \
+    }
+#define RGBA_FROM_RGBA8888(Pixel, r, g, b, a) \
+    {                                         \
+        r = (Pixel >> 24);                    \
+        g = ((Pixel >> 16) & 0xFF);           \
+        b = ((Pixel >> 8) & 0xFF);            \
+        a = (Pixel & 0xFF);                   \
+    }
+#define RGBA_FROM_ARGB8888(Pixel, r, g, b, a) \
+    {                                         \
+        r = ((Pixel >> 16) & 0xFF);           \
+        g = ((Pixel >> 8) & 0xFF);            \
+        b = (Pixel & 0xFF);                   \
+        a = (Pixel >> 24);                    \
+    }
+#define RGBA_FROM_ABGR8888(Pixel, r, g, b, a) \
+    {                                         \
+        r = (Pixel & 0xFF);                   \
+        g = ((Pixel >> 8) & 0xFF);            \
+        b = ((Pixel >> 16) & 0xFF);           \
+        a = (Pixel >> 24);                    \
+    }
+#define RGBA_FROM_BGRA8888(Pixel, r, g, b, a) \
+    {                                         \
+        r = ((Pixel >> 8) & 0xFF);            \
+        g = ((Pixel >> 16) & 0xFF);           \
+        b = (Pixel >> 24);                    \
+        a = (Pixel & 0xFF);                   \
+    }
+#define RGBA_FROM_ARGB2101010(Pixel, r, g, b, a) \
+    {                                            \
+        r = ((Pixel >> 22) & 0xFF);              \
+        g = ((Pixel >> 12) & 0xFF);              \
+        b = ((Pixel >> 2) & 0xFF);               \
+        a = SDL_expand_byte[2][(Pixel >> 30)];   \
+    }
+#define RGBAFLOAT_FROM_ARGB2101010(Pixel, r, g, b, a)   \
+    {                                                   \
+        r = (float)((Pixel >> 20) & 0x3FF) / 1023.0f;   \
+        g = (float)((Pixel >> 10) & 0x3FF) / 1023.0f;   \
+        b = (float)((Pixel >> 0) & 0x3FF) / 1023.0f;    \
+        a = (float)(Pixel >> 30) / 3.0f;                \
+    }
+#define RGBA_FROM_ABGR2101010(Pixel, r, g, b, a) \
+    {                                            \
+        r = ((Pixel >> 2) & 0xFF);               \
+        g = ((Pixel >> 12) & 0xFF);              \
+        b = ((Pixel >> 22) & 0xFF);              \
+        a = SDL_expand_byte[2][(Pixel >> 30)];   \
+    }
+#define RGBAFLOAT_FROM_ABGR2101010(Pixel, r, g, b, a)   \
+    {                                                   \
+        r = (float)((Pixel >> 0) & 0x3FF) / 1023.0f;    \
+        g = (float)((Pixel >> 10) & 0x3FF) / 1023.0f;   \
+        b = (float)((Pixel >> 20) & 0x3FF) / 1023.0f;   \
+        a = (float)(Pixel >> 30) / 3.0f;                \
+    }
+#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a) \
+    do {                                                \
+        switch (bpp) {                                  \
+        case 1:                                         \
+            Pixel = *((Uint8 *)(buf));                  \
+            RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);    \
+            break;                                      \
+                                                        \
+        case 2:                                         \
+            Pixel = *((Uint16 *)(buf));                 \
+            RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);    \
+            break;                                      \
+                                                        \
+        case 3:                                         \
+        {                                               \
+            Pixel = 0;                                  \
+            if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {      \
+                r = *((buf) + fmt->Rshift / 8);         \
+                g = *((buf) + fmt->Gshift / 8);         \
+                b = *((buf) + fmt->Bshift / 8);         \
+            } else {                                    \
+                r = *((buf) + 2 - fmt->Rshift / 8);     \
+                g = *((buf) + 2 - fmt->Gshift / 8);     \
+                b = *((buf) + 2 - fmt->Bshift / 8);     \
+            }                                           \
+            a = 0xFF;                                   \
+        } break;                                        \
+                                                        \
+        case 4:                                         \
+            Pixel = *((Uint32 *)(buf));                 \
+            RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);    \
+            break;                                      \
+                                                        \
+        default:                                        \
+            /* stop gcc complaints */                   \
+            Pixel = 0;                                  \
+            r = g = b = a = 0;                          \
+            break;                                      \
+        }                                               \
+    } while (0)
+
+// FIXME: this isn't correct, especially for Alpha (maximum != 255)
+#define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)             \
+    {                                                       \
+        Pixel = ((r >> (8 - fmt->Rbits)) << fmt->Rshift) |  \
+                ((g >> (8 - fmt->Gbits)) << fmt->Gshift) |  \
+                ((b >> (8 - fmt->Bbits)) << fmt->Bshift) |  \
+                ((a >> (8 - fmt->Abits)) << fmt->Ashift);   \
+    }
+#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)      \
+    {                                                 \
+        switch (bpp) {                                \
+        case 1:                                       \
+        {                                             \
+            Uint8 _pixel;                             \
+                                                      \
+            PIXEL_FROM_RGBA(_pixel, fmt, r, g, b, a); \
+            *((Uint8 *)(buf)) = _pixel;               \
+        } break;                                      \
+                                                      \
+        case 2:                                       \
+        {                                             \
+            Uint16 _pixel;                            \
+                                                      \
+            PIXEL_FROM_RGBA(_pixel, fmt, r, g, b, a); \
+            *((Uint16 *)(buf)) = _pixel;              \
+        } break;                                      \
+                                                      \
+        case 3:                                       \
+        {                                             \
+            if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {    \
+                *((buf) + fmt->Rshift / 8) = r;       \
+                *((buf) + fmt->Gshift / 8) = g;       \
+                *((buf) + fmt->Bshift / 8) = b;       \
+            } else {                                  \
+                *((buf) + 2 - fmt->Rshift / 8) = r;   \
+                *((buf) + 2 - fmt->Gshift / 8) = g;   \
+                *((buf) + 2 - fmt->Bshift / 8) = b;   \
+            }                                         \
+        } break;                                      \
+                                                      \
+        case 4:                                       \
+        {                                             \
+            Uint32 _pixel;                            \
+                                                      \
+            PIXEL_FROM_RGBA(_pixel, fmt, r, g, b, a); \
+            *((Uint32 *)(buf)) = _pixel;              \
+        } break;                                      \
+        }                                             \
+    }
+
+// Convert any 32-bit 4-bpp pixel to ARGB format
+#define PIXEL_TO_ARGB_PIXEL(src, srcfmt, dst)         \
+    do {                                              \
+        Uint8 a, r, g, b;                         \
+        RGBA_FROM_PIXEL(src, srcfmt, r, g, b, a); \
+        dst = a << 24 | r << 16 | g << 8 | b;     \
+    } while (0)
+// Blend a single color channel or alpha value
+/* dC = ((sC * sA) + (dC * (255 - sA))) / 255 */
+#define ALPHA_BLEND_CHANNEL(sC, dC, sA)                  \
+    do {                                                 \
+        Uint16 x;                                        \
+        x = ((sC - dC) * sA) + ((dC << 8) - dC);         \
+        x += 0x1U;                                       \
+        x += x >> 8;                                     \
+        dC = x >> 8;                                     \
+    } while (0)
+// Perform a division by 255 after a multiplication of two 8-bit color channels
+/* out = (sC * dC) / 255 */
+#define MULT_DIV_255(sC, dC, out) \
+    do {                          \
+        Uint16 x = sC * dC;       \
+        x += 0x1U;                \
+        x += x >> 8;              \
+        out = x >> 8;             \
+    } while (0)
+// Blend the RGB values of two pixels with an alpha value
+#define ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB)            \
+    do {                                                      \
+        ALPHA_BLEND_CHANNEL(sR, dR, A);                       \
+        ALPHA_BLEND_CHANNEL(sG, dG, A);                       \
+        ALPHA_BLEND_CHANNEL(sB, dB, A);                       \
+    } while (0)
+
+// Blend two 8888 pixels with the same format
+/* Calculates dst = ((src * factor) + (dst * (255 - factor))) / 255 */
+// FIXME: SDL_SIZE_MAX might not be an integer literal
+#if defined(SIZE_MAX) && (SIZE_MAX == 0xffffffffffffffff)
+#define FACTOR_BLEND_8888(src, dst, factor)                        \
+    do {                                                           \
+        Uint64 src64 = src;                                        \
+        src64 = (src64 | (src64 << 24)) & 0x00FF00FF00FF00FF;      \
+                                                                   \
+        Uint64 dst64 = dst;                                        \
+        dst64 = (dst64 | (dst64 << 24)) & 0x00FF00FF00FF00FF;      \
+                                                                   \
+        dst64 = ((src64 - dst64) * factor) + (dst64 << 8) - dst64; \
+        dst64 += 0x0001000100010001;                               \
+        dst64 += (dst64 >> 8) & 0x00FF00FF00FF00FF;                \
+        dst64 &= 0xFF00FF00FF00FF00;                               \
+                                                                   \
+        dst = (Uint32)((dst64 >> 8) | (dst64 >> 32));              \
+    } while (0)
+#else
+#define FACTOR_BLEND_8888(src, dst, factor)                               \
+    do {                                                                  \
+        Uint32 src02 = src & 0x00FF00FF;                                  \
+        Uint32 dst02 = dst & 0x00FF00FF;                                  \
+                                                                          \
+        Uint32 src13 = (src >> 8) & 0x00FF00FF;                           \
+        Uint32 dst13 = (dst >> 8) & 0x00FF00FF;                           \
+                                                                          \
+        Uint32 res02 = ((src02 - dst02) * factor) + (dst02 << 8) - dst02; \
+        res02 += 0x00010001;                                              \
+        res02 += (res02 >> 8) & 0x00FF00FF;                               \
+        res02 = (res02 >> 8) & 0x00FF00FF;                                \
+                                                                          \
+        Uint32 res13 = ((src13 - dst13) * factor) + (dst13 << 8) - dst13; \
+        res13 += 0x00010001;                                              \
+        res13 += (res13 >> 8) & 0x00FF00FF;                               \
+        res13 &= 0xFF00FF00;                                              \
+        dst = res02 | res13;                                              \
+    } while (0)
+#endif
+
+// Alpha blend two 8888 pixels with the same formats.
+#define ALPHA_BLEND_8888(src, dst, fmt)            \
+    do {                                           \
+        Uint32 srcA = (src >> fmt->Ashift) & 0xFF; \
+        Uint32 tmp = src | fmt->Amask;             \
+        FACTOR_BLEND_8888(tmp, dst, srcA);         \
+    } while (0)
+
+// Alpha blend two 8888 pixels with differing formats.
+#define ALPHA_BLEND_SWIZZLE_8888(src, dst, srcfmt, dstfmt)                  \
+    do {                                                                    \
+        Uint32 srcA = (src >> srcfmt->Ashift) & 0xFF;                       \
+        Uint32 tmp = (((src >> srcfmt->Rshift) & 0xFF) << dstfmt->Rshift) | \
+                     (((src >> srcfmt->Gshift) & 0xFF) << dstfmt->Gshift) | \
+                     (((src >> srcfmt->Bshift) & 0xFF) << dstfmt->Bshift) | \
+                     dstfmt->Amask;                                         \
+        FACTOR_BLEND_8888(tmp, dst, srcA);                                  \
+    } while (0)
+// Blend the RGBA values of two pixels
+#define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA) \
+    do {                                                 \
+        ALPHA_BLEND_CHANNEL(sR, dR, sA);                 \
+        ALPHA_BLEND_CHANNEL(sG, dG, sA);                 \
+        ALPHA_BLEND_CHANNEL(sB, dB, sA);                 \
+        ALPHA_BLEND_CHANNEL(255, dA, sA);                \
+    } while (0)
+
+// This is a very useful loop for optimizing blitters
+#if defined(_MSC_VER) && (_MSC_VER == 1300)
+// There's a bug in the Visual C++ 7 optimizer when compiling this code
+#else
+#define USE_DUFFS_LOOP
+#endif
+
+#define DUFFS_LOOP1(pixel_copy_increment, width) \
+    {                                            \
+        int n;                                   \
+        for (n = width; n > 0; --n) {            \
+            pixel_copy_increment;                \
+        }                                        \
+    }
+
+#ifdef USE_DUFFS_LOOP
+
+// 8-times unrolled loop
+#define DUFFS_LOOP8(pixel_copy_increment, width) \
+    {                                            \
+        int n = (width + 7) / 8;                 \
+        switch (width & 7) {                     \
+        case 0:                                  \
+            do {                                 \
+                pixel_copy_increment;            \
+                SDL_FALLTHROUGH;                 \
+            case 7:                              \
+                pixel_copy_increment;            \
+                SDL_FALLTHROUGH;                 \
+            case 6:                              \
+                pixel_copy_increment;            \
+                SDL_FALLTHROUGH;                 \
+            case 5:                              \
+                pixel_copy_increment;            \
+                SDL_FALLTHROUGH;                 \
+            case 4:                              \
+                pixel_copy_increment;            \
+                SDL_FALLTHROUGH;                 \
+            case 3:                              \
+                pixel_copy_increment;            \
+                SDL_FALLTHROUGH;                 \
+            case 2:                              \
+                pixel_copy_increment;            \
+                SDL_FALLTHROUGH;                 \
+            case 1:                              \
+                pixel_copy_increment;            \
+            } while (--n > 0);                   \
+        }                                        \
+    }
+
+// 4-times unrolled loop
+#define DUFFS_LOOP4(pixel_copy_increment, width) \
+    {                                            \
+        int n = (width + 3) / 4;                 \
+        switch (width & 3) {                     \
+        case 0:                                  \
+            do {                                 \
+                pixel_copy_increment;            \
+                SDL_FALLTHROUGH;                 \
+            case 3:                              \
+                pixel_copy_increment;            \
+                SDL_FALLTHROUGH;                 \
+            case 2:                              \
+                pixel_copy_increment;            \
+                SDL_FALLTHROUGH;                 \
+            case 1:                              \
+                pixel_copy_increment;            \
+            } while (--n > 0);                   \
+        }                                        \
+    }
+
+// 2-times unrolled loop
+#define DUFFS_LOOP2(pixel_copy_increment, width) \
+    {                                            \
+        int n = (width + 1) / 2;                 \
+        switch (width & 1) {                     \
+        case 0:                                  \
+            do {                                 \
+                pixel_copy_increment;            \
+                SDL_FALLTHROUGH;                 \
+            case 1:                              \
+                pixel_copy_increment;            \
+            } while (--n > 0);                   \
+        }                                        \
+    }
+
+// Use the 4-times version of the loop by default
+#define DUFFS_LOOP(pixel_copy_increment, width) \
+    DUFFS_LOOP4(pixel_copy_increment, width)
+// Use the 8-times version of the loop for simple routines
+#define DUFFS_LOOP_TRIVIAL(pixel_copy_increment, width) \
+    DUFFS_LOOP8(pixel_copy_increment, width)
+
+// Special version of Duff's device for even more optimization
+#define DUFFS_LOOP_124(pixel_copy_increment1,        \
+                       pixel_copy_increment2,        \
+                       pixel_copy_increment4, width) \
+    {                                                \
+        int n = width;                               \
+        if (n & 1) {                                 \
+            pixel_copy_increment1;                   \
+            n -= 1;                                  \
+        }                                            \
+        if (n & 2) {                                 \
+            pixel_copy_increment2;                   \
+            n -= 2;                                  \
+        }                                            \
+        if (n & 4) {                                 \
+            pixel_copy_increment4;                   \
+            n -= 4;                                  \
+        }                                            \
+        if (n) {                                     \
+            n /= 8;                                  \
+            do {                                     \
+                pixel_copy_increment4;               \
+                pixel_copy_increment4;               \
+            } while (--n > 0);                       \
+        }                                            \
+    }
+
+#else
+
+// Don't use Duff's device to unroll loops
+#define DUFFS_LOOP(pixel_copy_increment, width) \
+    DUFFS_LOOP1(pixel_copy_increment, width)
+#define DUFFS_LOOP_TRIVIAL(pixel_copy_increment, width) \
+    DUFFS_LOOP1(pixel_copy_increment, width)
+#define DUFFS_LOOP8(pixel_copy_increment, width) \
+    DUFFS_LOOP1(pixel_copy_increment, width)
+#define DUFFS_LOOP4(pixel_copy_increment, width) \
+    DUFFS_LOOP1(pixel_copy_increment, width)
+#define DUFFS_LOOP2(pixel_copy_increment, width) \
+    DUFFS_LOOP1(pixel_copy_increment, width)
+#define DUFFS_LOOP_124(pixel_copy_increment1,        \
+                       pixel_copy_increment2,        \
+                       pixel_copy_increment4, width) \
+    DUFFS_LOOP1(pixel_copy_increment1, width)
+
+#endif // USE_DUFFS_LOOP
+
+#if defined(_MSC_VER) && (_MSC_VER >= 600)
+#pragma warning(disable : 4244) // '=': conversion from 'X' to 'Y', possible loss of data
+#endif
+
+#endif // SDL_blit_h_
-- 
cgit v1.2.3