/*
Matrices:
  - Column-major math convention.
  - Column-major memory storage.

Coordinate systems:
  - Right-handed.
  - NDC in [-1, +1].
  - Viewport goes up and to the right.
  - Window goes down and to the right.
  - (x,y) is the center of a pixel.
    - Top-left:     (x - 1/2, y - 1/2)
    - Bottom-right: (x + 1/2, y + 1/2)
*/
#include <swgfx.h>

#include <assert.h>
#include <math.h>   // sqrt
#include <stdint.h>
#include <string.h>

static constexpr sgTextureId DefaultTextureId = SWGFX_MAX_TEXTURES;
static constexpr size_t SWGFX_TEXTURE_REGISTER_SIZE = SWGFX_MAX_TEXTURES + 1;

static constexpr R DepthClearValue = 1.0f;

static constexpr sgVec3 Up3 = (sgVec3){0,1,0};

typedef struct sgViewport_t { int x0, y0, width, height; } sgViewport_t;
typedef struct sgAABB2      { sgVec2 pmin, pmax; }         sgAABB2;

// Column-major math, column-major storage.
typedef struct sgMat4 {
  R val[4][4]; // (col, row)
} sgMat4;

typedef struct sgTexture {
  const sgImage*  image;
  sgTextureFilter filter;
} sgTexture;

typedef struct swgfx {
  sgVec2i       dims;      // Colour buffer dimensions.
  sgPixel*      colour;    // Colour buffer.
  R*            depth;     // Depth  buffer.
  sgTextureId*  texture;   // Texture ID buffer.
  sgVec2*       texcoords; // Texture coords buffer.
  sgViewport_t  viewport;
  sgMat4        model;     // Model matrix.
  sgMat4        view;      // View matrix.
  sgMat4        proj;      // Projection matrix.
  // Pre-multiplied matrices.
  // The model matrix changes once per object, more frequently than view or
  // projection. View and projection are expected to change infrequently, maybe
  // once per frame.
  // Make it so that changing the model matrix only requires one matrix
  // multiplication (mvp = model * viewProj) and not two (mvp = model * view * projection)
  // before rendering the model's triangles.
  sgMat4          viewProj;        // View-projection matrix.
  sgMat4          mvp;             // Model-view-projection matrix.
  sgTexture*      textureRegister; // Indexed by texture id.
  sgTextureId     activeTexture;
  sgPixel         defaultPixel;    // The single-pixel of the default texture.
  sgImage         defaultImage;    // Image for the default texture.
  sgCounters      counters;
} swgfx;

static inline int mod(int a, int m) { return (m + (a % m)) % m; }
static inline R   frac(R a) { return a - (R)((int)a); }
static inline int imin(int a, int b) { return (a <= b) ? a : b; }
static inline int imax(int a, int b) { return (a >= b) ? a : b; }

static inline R   rmin(R a, R b) { return (a <= b) ? a : b; }
static inline R   rmax(R a, R b) { return (a >= b) ? a : b; }
static inline R   lerp(R a, R b, R t) { return a + t*(b-a); }
static inline R   mod1(R a, R m) { return fmodf(1.f + fmodf(a, m), 1.f); }

static inline sgVec2i min2i(sgVec2i a, sgVec2i b) { return (sgVec2i){.x = imin(a.x, b.x), .y = imin(a.y, b.y) }; }
static inline sgVec2i max2i(sgVec2i a, sgVec2i b) { return (sgVec2i){.x = imax(a.x, b.x), .y = imax(a.y, b.y) }; }
static inline sgVec2  min2(sgVec2 a, sgVec2 b) { return (sgVec2){.x = rmin(a.x, b.x), .y = rmin(a.y, b.y) }; }
static inline sgVec2  max2(sgVec2 a, sgVec2 b) { return (sgVec2){.x = rmax(a.x, b.x), .y = rmax(a.y, b.y) }; }
static inline sgVec2  add2(sgVec2 a, sgVec2 b) { return (sgVec2){a.x + b.x, a.y + b.y}; }
static inline sgVec2  sub2(sgVec2 a, sgVec2 b) { return (sgVec2){a.x - b.x, a.y - b.y}; }
static inline sgVec2  scale2(sgVec2 v, R s) { return (sgVec2){v.x * s, v.y * s}; }
static inline sgVec2  frac2(sgVec2 v) { return (sgVec2){frac(v.x), frac(v.y)}; }
static inline sgVec2  lerp2(sgVec2 a, sgVec2 b, R t) { return add2(a, scale2(sub2(b,a), t)); }
static inline sgVec2  mod2(sgVec2 v, R m) { return (sgVec2){mod1(v.x, m), mod1(v.y, m)}; }

static inline sgVec3 add3(sgVec3 a, sgVec3 b) { return (sgVec3){a.x + b.x, a.y + b.y, a.z + b.z}; }
static inline sgVec3 neg3(sgVec3 v) { return (sgVec3){-v.x, -v.y, -v.z}; }
static inline sgVec3 sub3(sgVec3 a, sgVec3 b) { return (sgVec3){a.x - b.x, a.y - b.y, a.z - b.z}; }
static inline sgVec3 div3(sgVec3 a, sgVec3 b) { return (sgVec3){a.x / b.x, a.y / b.y, a.z / b.z}; }
static inline sgVec3 scale3(sgVec3 v, R s) { return (sgVec3){v.x * s, v.y * s, v.z * s}; }
static inline sgVec3 exp3(sgVec3 v, R exp) { return (sgVec3){powf(v.x, exp), powf(v.y, exp), powf(v.z, exp)};}
static inline R dot3(sgVec3 a, sgVec3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; }
static inline R normsq3(sgVec3 v) { return v.x * v.x + v.y * v.y + v.z * v.z; }
static inline R norm3  (sgVec3 v) { return (R)sqrt(normsq3(v)); }

static inline sgVec4 add4(sgVec4 a, sgVec4 b) { return (sgVec4){a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w}; }
static inline sgVec4 sub4(sgVec4 a, sgVec4 b) { return (sgVec4){a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w}; }
static inline sgVec4 scale4(sgVec4 v, R s) { return (sgVec4){v.x * s, v.y * s, v.z * s, v.w * s}; }
static inline sgVec4 lerp4(sgVec4 a, sgVec4 b, R t) {
  return (sgVec4){
    .x = a.x + t * (b.x - a.x),
    .y = a.y + t * (b.y - a.y),
    .z = a.z + t * (b.z - a.z),
    .w = a.w + t * (b.w - a.w)};
}

/// Return the curl of 'a' towards 'b', which is defined as the z-coordinate of
/// the cross product a x b, or as the determinant det(a,b).
///
/// The curl of 'a' towards 'b' is positive if 'a' curls towards 'b' like the
/// positive x-axis curls towards the positive y-axis.
static inline R curl2(sgVec2 a, sgVec2 b) {
  return (a.x * b.y) - (a.y * b.x);
}
static inline sgVec3 cross3(sgVec3 a, sgVec3 b) {
  return (sgVec3) {
    a.y * b.z - a.z * b.y,
    a.z * b.x - a.x * b.z,
    a.x * b.y - a.y * b.x};
}

static inline sgVec3 normalize3(sgVec3 v) {
  const R n = norm3(v);
  return (n > 0) ? (sgVec3){v.x / n, v.y / n, v.z / n} : (sgVec3){0, 0, 0};
}

static inline sgVec2 Vec2FromVec4(sgVec4 v) { return (sgVec2){v.x, v.y}; }
static inline sgVec4 Vec4FromVec3(sgVec3 v, R w) { return (sgVec4){v.x, v.y, v.z, w}; }

static inline sgMat4 Mat4(
    R m00, R m01, R m02, R m03,   // v0.x v1.x v2.x v3.x
    R m10, R m11, R m12, R m13,   // v0.y v1.y v2.y v3.y
    R m20, R m21, R m22, R m23,   // v0.z v1.z v2.z v3.z
    R m30, R m31, R m32, R m33) { // v0.w v1.w v2.w v3.w
  return (sgMat4) {
    .val = {{m00, m10, m20, m30},   // col 0
            {m01, m11, m21, m31},   // col 1
            {m02, m12, m22, m32},   // col 2
            {m03, m13, m23, m33}}}; // col 3
}

static inline sgMat4 Mat4FromVec3(sgVec3 right, sgVec3 up, sgVec3 forward, sgVec3 position) {
  return Mat4(
      right.x, up.x, forward.x, position.x,
      right.y, up.y, forward.y, position.y,
      right.z, up.z, forward.z, position.z,
            0,    0,         0,          1);
}

static inline R Mat4At(sgMat4 m, int row, int col) { return m.val[col][row]; }
static inline sgVec3 Mat4v0(sgMat4 m) { return *((sgVec3*)m.val[0]); }
static inline sgVec3 Mat4v1(sgMat4 m) { return *((sgVec3*)m.val[1]); }
static inline sgVec3 Mat4v2(sgMat4 m) { return *((sgVec3*)m.val[2]); }
static inline sgVec3 Mat4v3(sgMat4 m) { return *((sgVec3*)m.val[3]); }

static inline sgMat4 Mat4Mul(sgMat4 A, sgMat4 B) {
  R m00 = Mat4At(A, 0, 0) * Mat4At(B, 0, 0) +
          Mat4At(A, 0, 1) * Mat4At(B, 1, 0) +
          Mat4At(A, 0, 2) * Mat4At(B, 2, 0) +
          Mat4At(A, 0, 3) * Mat4At(B, 3, 0);
  R m01 = Mat4At(A, 0, 0) * Mat4At(B, 0, 1) +
          Mat4At(A, 0, 1) * Mat4At(B, 1, 1) +
          Mat4At(A, 0, 2) * Mat4At(B, 2, 1) +
          Mat4At(A, 0, 3) * Mat4At(B, 3, 1);
  R m02 = Mat4At(A, 0, 0) * Mat4At(B, 0, 2) +
          Mat4At(A, 0, 1) * Mat4At(B, 1, 2) +
          Mat4At(A, 0, 2) * Mat4At(B, 2, 2) +
          Mat4At(A, 0, 3) * Mat4At(B, 3, 2);
  R m03 = Mat4At(A, 0, 0) * Mat4At(B, 0, 3) +
          Mat4At(A, 0, 1) * Mat4At(B, 1, 3) +
          Mat4At(A, 0, 2) * Mat4At(B, 2, 3) +
          Mat4At(A, 0, 3) * Mat4At(B, 3, 3);

  R m10 = Mat4At(A, 1, 0) * Mat4At(B, 0, 0) +
          Mat4At(A, 1, 1) * Mat4At(B, 1, 0) +
          Mat4At(A, 1, 2) * Mat4At(B, 2, 0) +
          Mat4At(A, 1, 3) * Mat4At(B, 3, 0);
  R m11 = Mat4At(A, 1, 0) * Mat4At(B, 0, 1) +
          Mat4At(A, 1, 1) * Mat4At(B, 1, 1) +
          Mat4At(A, 1, 2) * Mat4At(B, 2, 1) +
          Mat4At(A, 1, 3) * Mat4At(B, 3, 1);
  R m12 = Mat4At(A, 1, 0) * Mat4At(B, 0, 2) +
          Mat4At(A, 1, 1) * Mat4At(B, 1, 2) +
          Mat4At(A, 1, 2) * Mat4At(B, 2, 2) +
          Mat4At(A, 1, 3) * Mat4At(B, 3, 2);
  R m13 = Mat4At(A, 1, 0) * Mat4At(B, 0, 3) +
          Mat4At(A, 1, 1) * Mat4At(B, 1, 3) +
          Mat4At(A, 1, 2) * Mat4At(B, 2, 3) +
          Mat4At(A, 1, 3) * Mat4At(B, 3, 3);

  R m20 = Mat4At(A, 2, 0) * Mat4At(B, 0, 0) +
          Mat4At(A, 2, 1) * Mat4At(B, 1, 0) +
          Mat4At(A, 2, 2) * Mat4At(B, 2, 0) +
          Mat4At(A, 2, 3) * Mat4At(B, 3, 0);
  R m21 = Mat4At(A, 2, 0) * Mat4At(B, 0, 1) +
          Mat4At(A, 2, 1) * Mat4At(B, 1, 1) +
          Mat4At(A, 2, 2) * Mat4At(B, 2, 1) +
          Mat4At(A, 2, 3) * Mat4At(B, 3, 1);
  R m22 = Mat4At(A, 2, 0) * Mat4At(B, 0, 2) +
          Mat4At(A, 2, 1) * Mat4At(B, 1, 2) +
          Mat4At(A, 2, 2) * Mat4At(B, 2, 2) +
          Mat4At(A, 2, 3) * Mat4At(B, 3, 2);
  R m23 = Mat4At(A, 2, 0) * Mat4At(B, 0, 3) +
          Mat4At(A, 2, 1) * Mat4At(B, 1, 3) +
          Mat4At(A, 2, 2) * Mat4At(B, 2, 3) +
          Mat4At(A, 2, 3) * Mat4At(B, 3, 3);

  R m30 = Mat4At(A, 3, 0) * Mat4At(B, 0, 0) +
          Mat4At(A, 3, 1) * Mat4At(B, 1, 0) +
          Mat4At(A, 3, 2) * Mat4At(B, 2, 0) +
          Mat4At(A, 3, 3) * Mat4At(B, 3, 0);
  R m31 = Mat4At(A, 3, 0) * Mat4At(B, 0, 1) +
          Mat4At(A, 3, 1) * Mat4At(B, 1, 1) +
          Mat4At(A, 3, 2) * Mat4At(B, 2, 1) +
          Mat4At(A, 3, 3) * Mat4At(B, 3, 1);
  R m32 = Mat4At(A, 3, 0) * Mat4At(B, 0, 2) +
          Mat4At(A, 3, 1) * Mat4At(B, 1, 2) +
          Mat4At(A, 3, 2) * Mat4At(B, 2, 2) +
          Mat4At(A, 3, 3) * Mat4At(B, 3, 2);
  R m33 = Mat4At(A, 3, 0) * Mat4At(B, 0, 3) +
          Mat4At(A, 3, 1) * Mat4At(B, 1, 3) +
          Mat4At(A, 3, 2) * Mat4At(B, 2, 3) +
          Mat4At(A, 3, 3) * Mat4At(B, 3, 3);

  return Mat4(
    m00, m01, m02, m03,
    m10, m11, m12, m13,
    m20, m21, m22, m23,
    m30, m31, m32, m33);
}

static inline sgVec3 Mat4MulVec3(sgMat4 m, sgVec3 v, R w) {
  return (sgVec3) {
    .x = Mat4At(m, 0, 0) * v.x + Mat4At(m, 0, 1) * v.y + Mat4At(m, 0, 2) * v.z + Mat4At(m, 0, 3) * w,
    .y = Mat4At(m, 1, 0) * v.x + Mat4At(m, 1, 1) * v.y + Mat4At(m, 1, 2) * v.z + Mat4At(m, 1, 3) * w,
    .z = Mat4At(m, 2, 0) * v.x + Mat4At(m, 2, 1) * v.y + Mat4At(m, 2, 2) * v.z + Mat4At(m, 2, 3) * w};
}

static inline sgVec4 Mat4MulVec4(sgMat4 m, sgVec4 v) {
  sgVec4 u;
  u.x = Mat4At(m, 0, 0) * v.x + Mat4At(m, 0, 1) * v.y +
        Mat4At(m, 0, 2) * v.z + Mat4At(m, 0, 3) * v.w;
  u.y = Mat4At(m, 1, 0) * v.x + Mat4At(m, 1, 1) * v.y +
        Mat4At(m, 1, 2) * v.z + Mat4At(m, 1, 3) * v.w;
  u.z = Mat4At(m, 2, 0) * v.x + Mat4At(m, 2, 1) * v.y +
        Mat4At(m, 2, 2) * v.z + Mat4At(m, 2, 3) * v.w;
  u.w = Mat4At(m, 3, 0) * v.x + Mat4At(m, 3, 1) * v.y +
        Mat4At(m, 3, 2) * v.z + Mat4At(m, 3, 3) * v.w;
  return u;
}

static inline sgMat4 Mat4InverseTransform(sgMat4 m) {
  const sgVec3 r = Mat4v0(m);
  const sgVec3 u = Mat4v1(m);
  const sgVec3 f = Mat4v2(m);
  const sgVec3 t = Mat4v3(m);
  return Mat4(
      r.x, r.y, r.z, -dot3(r, t),
      u.x, u.y, u.z, -dot3(u, t),
      f.x, f.y, f.z, -dot3(f, t),
      0.f, 0.f, 0.f, 1.f);
}

static inline sgMat4 Mat4Look(sgVec3 position, sgVec3 forward, sgVec3 up) {
  const sgVec3 right = normalize3(cross3(forward, up));
  up                 = normalize3(cross3(right, forward));
  return Mat4FromVec3(right, up, neg3(forward), position);
}

static inline sgMat4 Mat4Perspective(R fovy, R aspect, R near, R far) {
  assert(fovy > 0.f);
  assert(near < far);
  const R f = 1.f / tanf(fovy / 2.f);
  const R a = near - far;
  return Mat4(
    f / aspect, 0,                 0,                   0,
    0,          f,                 0,                   0,
    0,          0,  (far + near) / a, (2 * far * near / a),
    0,          0,                -1,                   0);
}

static inline sgVec3 PixelToVec3(sgPixel p) {
  return (sgVec3){(R)p.r / 255.f, (R)p.g / 255.f, (R)p.b / 255.f};
}
static inline sgPixel Vec3ToPixel(sgVec3 p, R a) {
  return (sgPixel){(uint8_t)(p.x * 255.f), (uint8_t)(p.y * 255.f), (uint8_t)(p.z * 255.f), (uint8_t)(a * 255.f)};
}
static inline sgVec4 PixelToVec4(sgPixel p) {
  return (sgVec4){(R)p.r / 255.f, (R)p.g / 255.f, (R)p.b / 255.f, (R)p.a / 255.f};
}
static inline sgPixel Vec4ToPixel(sgVec4 p) {
  return (sgPixel){(uint8_t)(p.x * 255.f), (uint8_t)(p.y * 255.f), (uint8_t)(p.z * 255.f), (uint8_t)(p.w * 255.f)};
}

#ifndef _NDEBUG
static bool InBounds(int width, int height, int x, int y) {
  return (0 <= x) && (x < width) &&
         (0 <= y) && (y < height);
}
#endif // _NDEBUG

static inline sgPixel* Pixel(swgfx* gfx, int x, int y) {
  assert(gfx);
  assert(gfx->colour);
  assert(InBounds(gfx->dims.x, gfx->dims.y, x, y));
  return gfx->colour + (y * gfx->dims.x) + x;
}

static inline const R* Depth(swgfx* gfx, int x, int y) {
  assert(gfx);
  assert(gfx->depth);
  assert(InBounds(gfx->dims.x, gfx->dims.y, x, y));
  return gfx->depth + (y * gfx->dims.x) + x;
}

static inline void SetPixelColour(swgfx* gfx, const sgVec2i p, sgPixel colour) {
  assert(gfx);
  *Pixel(gfx, p.x, p.y) = colour;
#if SWGFX_PROFILING
  gfx->counters.pixels++;
#endif // SWGFX_PROFILING
}

static inline void SetPixelDeferred(swgfx* gfx, const sgVec2i p, R depth, sgTextureId texid, sgVec2 uv) {
  assert(gfx);
  gfx->depth[(p.y * gfx->dims.x) + p.x]     = depth;
  gfx->texture[(p.y * gfx->dims.x) + p.x]   = texid;
  gfx->texcoords[(p.y * gfx->dims.x) + p.x] = uv;
#if SWGFX_PROFILING
  gfx->counters.pixels++;
#endif // SWGFX_PROFILING
}

static inline sgPixel ReadTexture(const sgImage* texture, sgVec2i xy) {
  assert(texture);
  assert(texture->pixels);
  assert(InBounds(texture->width, texture->height, xy.x, xy.y));
  return texture->pixels[xy.y * texture->width + xy.x];
}
// Output normalized to [0,1].
static inline sgVec4 ReadTextureFloat(const sgImage* texture, sgVec2i xy) {
  return PixelToVec4(ReadTexture(texture, xy));
}

static inline sgVec2i UvToIndex(const sgImage* texture, sgVec2 uv) {
  assert(texture);
  return (sgVec2i){
    (int)(uv.x * (R)(texture->width - 1)),
    (int)(uv.y * (R)(texture->height - 1))};
}

static inline sgVec2i TextureRepeat(const sgImage* texture, sgVec2i p) {
  return (sgVec2i){mod(p.x, texture->width), mod(p.y, texture->height)};
}

static inline sgPixel FilterNearest(const sgImage* texture, sgVec2 uv) {
  assert(texture);
  assert(texture->pixels);
  const sgVec2i xy  = UvToIndex(texture, uv);
  const sgVec2i xy2 = TextureRepeat(texture, xy);
  return ReadTexture(texture, xy2);
}

static inline sgPixel FilterBilinear(const sgImage* texture, sgVec2 uv) {
  assert(texture);
  assert(texture->pixels);
#define ADDR(x,y) TextureRepeat(texture, (sgVec2i){x,y})
  const sgVec2 uv01 = mod2(uv, 1.f);
  // Find the closest grid vertex, then interpolate the 4 neighbouring pixel
  // centers.
  const sgVec2i tl = UvToIndex(texture, uv01);
  const sgVec2i tr = ADDR(tl.x+1, tl.y);
  const sgVec2i bl = ADDR(tl.x,   tl.y+1);
  const sgVec2i br = ADDR(tl.x+1, tl.y+1);
  const sgVec2   t = frac2(uv01);
  const sgVec4 tl_pix = ReadTextureFloat(texture, tl);
  const sgVec4 tr_pix = ReadTextureFloat(texture, tr);
  const sgVec4 bl_pix = ReadTextureFloat(texture, bl);
  const sgVec4 br_pix = ReadTextureFloat(texture, br);
  const sgVec4 x1 = lerp4(tl_pix, tr_pix, t.x);
  const sgVec4 x2 = lerp4(bl_pix, br_pix, t.x);
  const sgVec4  y = lerp4(x1, x2, t.y);
  return Vec4ToPixel(y);
}

// TODO: Mipmapping.
// TODO: Clamping and other addressing strategies.
static inline sgPixel Sample(const sgImage* texture, sgTextureFilter filter, sgVec2 uv) {
  switch (filter) {
  case sgNearest: return FilterNearest(texture, uv);
  case sgBilinear: return FilterBilinear(texture, uv);
  default: assert(false); return (sgPixel){0};
  }
}

static inline sgAABB2 TriangleAabb2(sgVec2 p0, sgVec2 p1, sgVec2 p2) {
  return (sgAABB2){.pmin = min2(min2(p0, p1), p2),
                   .pmax = max2(max2(p0, p1), p2)};
}

static inline sgVec2i Clip(const swgfx* gfx, const sgVec2i p) {
  assert(gfx);
  constexpr sgVec2i lower = (sgVec2i){0,0};
  const     sgVec2i upper = (sgVec2i){gfx->viewport.width  - 1,
                                      gfx->viewport.height - 1};
  return max2i(lower, min2i(upper, p));
}

static inline R BarycentricInterp(sgVec3 bar, R a, R b, R c) {
  return bar.x*a + bar.y*b + bar.z*c;
}
static inline sgVec2 BarycentricInterp2(sgVec3 bar, sgVec2 a, sgVec2 b, sgVec2 c) {
  return add2(add2(scale2(a, bar.x), scale2(b, bar.y)), scale2(c, bar.z));
}

static inline R f(sgVec2 a, sgVec2 b, sgVec2 p) {
  return (a.y - b.y)*p.x + (b.x - a.x)*p.y + a.x*b.y - b.x*a.y;
}

static inline sgVec3 Barycentric(sgVec2 p0, sgVec2 p1, sgVec2 p2, sgVec2 p) {
  // There is no need to compute the third coordinate explicitly: a + b + c = 1.
  // But this results in a worse rasterization of the triangle along one of the edges.
  // It seems we can patch it with a small epsilon, though.
  // ---
  // Division by zero is only possible if the triangle has zero area.
  /*return (sgVec3){
    f(p1, p2, p) / f(p1, p2, p0),
    f(p2, p0, p) / f(p2, p0, p1),
    f(p0, p1, p) / f(p0, p1, p2)};*/
  const R b = f(p0, p2, p) / f(p0, p2, p1);
  const R c = f(p0, p1, p) / f(p0, p1, p2);
  const R a = /*f(p1, p2, p) / f(p1, p2, p0);*/1.f - b - c - (R)1e-7;
  return (sgVec3){a,b,c};
}

static void DrawTriangle2(swgfx* gfx, const sgTri2* const tri) {
  assert(gfx);
  assert(tri);
  const sgVec2 p0 = (sgVec2){tri->p0.pos.x, tri->p0.pos.y};
  const sgVec2 p1 = (sgVec2){tri->p1.pos.x, tri->p1.pos.y};
  const sgVec2 p2 = (sgVec2){tri->p2.pos.x, tri->p2.pos.y};
  const sgAABB2 bbox = TriangleAabb2(p0, p1, p2);
  // We consider (x,y) to be the pixel center.
  // Draw all pixels touched by the bounding box. TODO: Multi-sampling.
  sgVec2i pmin = (sgVec2i){(int)bbox.pmin.x, (int)bbox.pmin.y};
  sgVec2i pmax = (sgVec2i){(int)(bbox.pmax.x + 0.5f), (int)(bbox.pmax.y + 0.5f)};
  // Clip to screen space.
  pmin = Clip(gfx, pmin);
  pmax = Clip(gfx, pmax);
  const sgTexture* texture = &gfx->textureRegister[gfx->activeTexture];
  // Draw.
  for   (int y = pmin.y; y <= pmax.y; ++y) {
    for (int x = pmin.x; x <= pmax.x; ++x) {
      const sgVec2 p = (sgVec2){(R)x, (R)y};
      // TODO: there is an incremental optimization to computing barycentric coordinates;
      //  read more about it.
      const sgVec3 bar = Barycentric(p0, p1, p2, p);
      // We need to check the third coordinate.
      //   a + b + c = 1
      //   So, e.g., if a >= 0 and b >= 0, then we have c <= 1, but we could also have c <= 0.
      //   In the case c <= 0, then point is outside the triangle.
      if ((bar.x >= 0) && (bar.y >= 0) && (bar.z >= 0)) {
        assert((bar.x + bar.y + bar.z - 1e7) <= 1.f);
        const sgVec2 uv = BarycentricInterp2(bar, tri->p0.uv, tri->p1.uv, tri->p2.uv);
        const sgPixel colour = Sample(texture->image, texture->filter, uv);
        SetPixelColour(gfx, (sgVec2i){x,y}, colour);
      }
    }
  }
#if SWGFX_PROFILING
  gfx->counters.triangles2++;
#endif // SWGFX_PROFILING
}

static inline sgVec4 PerspDivide(sgVec4 v) {
  return (sgVec4){v.x / v.w, v.y / v.w, v.z / v.w, v.w};
}

// TODO: Compute a viewport matrix in sgViewport() instead.
static inline sgVec4 ViewportTransform(sgViewport_t vp, sgVec4 ndc) {
  return (sgVec4){
    .x = (ndc.x+1.f) * ((R)vp.width/2.f)  + (R)vp.x0,
    .y = (ndc.y+1.f) * ((R)vp.height/2.f) + (R)vp.y0,
    .z = ndc.z*0.5f + 0.5f,
    .w = ndc.w};
}

static inline sgVec4 ViewportToWindow(sgViewport_t vp, sgVec4 p) {
  return (sgVec4){p.x, (R)vp.height - p.y, p.z, p.w};
}

/// Line segment-plane intersection special-case for the near camera plane.
/// All quantities assumed to be in camera space.
/// outP = a + outT*(b-a)
static inline R IntersectSegmentPlane(R near, const sgVec3* const a, const sgVec3* const b) {
  // D = near plane distance = perpendicular distance from the origin to the plane.
  // o = line origin = a
  // d = line direction = b-a
  // Plane normal = (0, 0, +1) --- Could be -1, need to be consistent with D.
  // Point in plane: p=(0, 0, -near)
  //   <=> p dot n     + D = 0
  //   === -near * n.z + D = 0
  //   === -near * 1   + D = 0
  //   === D = near
  // Denominator = n dot d = (0,0,1) dot d = d.z = (b.z - a.z)
  const R t = (-near - a->z) / (b->z - a->z);
  assert(t >= 0.f);
  assert(t <= 1.f);
  return t;
}

/// Interpolate depth and vertex attributes at the in/out vertex 'out'.
static void InterpolateAttributes(const sgVert4* const a, const sgVert4* const b, R t, sgVert4* out) {
  assert(a);
  assert(b);
  assert(out);
  assert(t >= 0.f);
  assert(t <= 1.f);
  const sgVec4 d = sub4(b->pos, a->pos); // Line direction.
  out->pos = add4(a->pos, scale4(d, t));
  out->uv  = lerp2(a->uv, b->uv, t);
}

/// Clip a triangle, vertices in clip space. Return the number of output
/// triangles.
///
/// 4 possible cases:
/// 1. All vertices in front of the camera near plane => draw.
/// 2. All vertices behind   => discard.
/// 3. One vertex in front   => draw 1 clipped triangle.
/// 4. Two vertices in front => draw 2 clipped triangles.
static inline int ClipTriangle(R near, const sgTri4* const tri, sgTri4 out[2]) {
#define VERTEX(IDX) (&tri->p0)[IDX]
#define VALID(X) ((0 <= (X)) && ((X) < 3))
#define IN_FRONT(P) (P.z >= -near) // +Z points into the screen in clip space.
  const bool f[3] = {IN_FRONT(tri->p0.pos), IN_FRONT(tri->p1.pos), IN_FRONT(tri->p2.pos)};
  const int numFront = f[0] + f[1] + f[2];
  int numTris;
  if (numFront == 3) {
    numTris = 1;
    out[0] = *tri;
  } else if (numFront == 2) {
    numTris = 2;
    int back = 0;
    for (; f[back] && (back < 3); ++back) {}
    assert(VALID(back));
    assert(!f[back]);
    int front[2] = {(back+1)%3, (back+2)%3};
    assert(VALID(front[0]));
    assert(VALID(front[1]));
    const sgVert4* const backVert = &VERTEX(back);
    sgVert4 p[2];
    for (int i = 0; i < 2; ++i) {
      const R t = IntersectSegmentPlane(near, (const sgVec3*)&backVert->pos, (const sgVec3*)&VERTEX(front[i]).pos);
      InterpolateAttributes(backVert, &VERTEX(front[i]), t, &p[i]);
    }
    // We must preserve the winding order here for culling.
    // Note that p[i] corresponds to front[i] = back+(i+1).
    out[0] = (sgTri4){p[1], p[0], VERTEX(front[1])};
    out[1] = (sgTri4){p[0], VERTEX(front[0]), VERTEX(front[1])};
  } else if (numFront == 1) {
    numTris = 1;
    int front = 0;
    for (; !f[front] && (front < 3); ++front){}
    assert(VALID(front));
    assert(f[front]);
    int back[2] = {(front+1)%3, (front+2)%3};
    assert(VALID(back[0]));
    assert(VALID(back[1]));
    const sgVert4* const frontVert = &VERTEX(front);
    sgVert4 p[2];
    for (int i = 0; i < 2; ++i) {
      const R t = IntersectSegmentPlane(near, (const sgVec3*)&frontVert->pos, (const sgVec3*)&VERTEX(back[i]).pos);
      InterpolateAttributes(frontVert, &VERTEX(back[i]), t, &p[i]);
    }
    // We must preserve the winding order here for culling.
    // Note that p[i] corresponds to back[i] = front+(i+1).
    out[0] = (sgTri4){*frontVert, p[0], p[1]};
  } else {
    numTris = 0;
  }
  return numTris;
#undef IN_FRONT
#undef VALID
#undef VERTEX
}

static inline int TransformTri(const swgfx* gfx, const sgTri3* const tri, sgTri4 out[2]) {
  assert(gfx);
  assert(tri);
  // Model to clip space.
  const sgVec4 p0_clip = Mat4MulVec4(gfx->mvp, Vec4FromVec3(tri->p0.pos, 1));
  const sgVec4 p1_clip = Mat4MulVec4(gfx->mvp, Vec4FromVec3(tri->p1.pos, 1));
  const sgVec4 p2_clip = Mat4MulVec4(gfx->mvp, Vec4FromVec3(tri->p2.pos, 1));
  const sgTri4 tri_clip = {
    (sgVert4){ p0_clip, tri->p0.uv },
    (sgVert4){ p1_clip, tri->p1.uv },
    (sgVert4){ p2_clip, tri->p2.uv }};
  // Clip.
  // Our perspective matrix maps the near plane to z=-1 in clip space.
  constexpr R near_clip = -1.f;
  const int numTris = ClipTriangle(near_clip, &tri_clip, out);
  assert((0 <= numTris) && (numTris <= 2));
  for (int i = 0; i < numTris; ++i) {
    sgTri4* const tri4 = &out[i];
    // Perspective divide.
    const sgVec4 p0_ndc = PerspDivide(tri4->p0.pos);
    const sgVec4 p1_ndc = PerspDivide(tri4->p1.pos);
    const sgVec4 p2_ndc = PerspDivide(tri4->p2.pos);
    // To viewport.
    const sgVec4 p0_vp = ViewportTransform(gfx->viewport, p0_ndc);
    const sgVec4 p1_vp = ViewportTransform(gfx->viewport, p1_ndc);
    const sgVec4 p2_vp = ViewportTransform(gfx->viewport, p2_ndc);
    // To window.
    const sgVec4 p0_wn = ViewportToWindow(gfx->viewport, p0_vp);
    const sgVec4 p1_wn = ViewportToWindow(gfx->viewport, p1_vp);
    const sgVec4 p2_wn = ViewportToWindow(gfx->viewport, p2_vp);
    // Output.
    tri4->p0.pos = p0_wn;
    tri4->p1.pos = p1_wn;
    tri4->p2.pos = p2_wn;
  }
  return numTris;
}

static void DrawTriangle3PostClip(swgfx* gfx, const sgTri4* const tri) {
  assert(gfx);
  assert(tri);
  const sgVec4 p0 = tri->p0.pos;
  const sgVec4 p1 = tri->p1.pos;
  const sgVec4 p2 = tri->p2.pos;
  const sgVec2 p0_2d = (sgVec2){p0.x, p0.y};
  const sgVec2 p1_2d = (sgVec2){p1.x, p1.y};
  const sgVec2 p2_2d = (sgVec2){p2.x, p2.y};
  // Backface culling, assume front face = ccw.
  // In screen space, +Y goes down.
  // p0p1p2 is ccw <=> p0p1 curls negatively towards p0p2. If the curl is
  // positive (cw winding), cull.
  if (curl2(sub2(p1_2d, p0_2d),
            sub2(p2_2d, p0_2d)) > 0.f) {
    return;
  }
  const sgAABB2 bbox = TriangleAabb2(p0_2d, p1_2d, p2_2d);
  // We consider (x,y) to be the pixel center.
  // Draw all pixels touched by the bounding box. TODO: Multi-sampling.
  sgVec2i pmin = (sgVec2i){(int)bbox.pmin.x, (int)bbox.pmin.y};
  sgVec2i pmax = (sgVec2i){(int)(bbox.pmax.x + 0.5f), (int)(bbox.pmax.y + 0.5f)};
  // Clip to screen space.
  pmin = Clip(gfx, pmin);
  pmax = Clip(gfx, pmax);
  // Setup for perspective texture mapping.
  // 'w' is view-space z.
  const sgVec3 depths      = (sgVec3){p0.z, p1.z, p2.z};
  const sgVec3 one_over_zs = (sgVec3){1.f / p0.w, 1.f / p1.w, 1.f/ p2.w};
  const sgVec3 u_over_zs   = (sgVec3){tri->p0.uv.x / p0.w, tri->p1.uv.x / p1.w, tri->p2.uv.x / p2.w};
  const sgVec3 v_over_zs   = (sgVec3){tri->p0.uv.y / p0.w, tri->p1.uv.y / p1.w, tri->p2.uv.y / p2.w};
  // Draw.
  for   (int y = pmin.y; y <= pmax.y; ++y) {
    for (int x = pmin.x; x <= pmax.x; ++x) {
      const sgVec2 p = (sgVec2){(R)x, (R)y};
      // TODO: there is an incremental optimization to computing barycentric coordinates;
      //  read more about it.
      const sgVec3 bar = Barycentric(p0_2d, p1_2d, p2_2d, p);
      // We need to check the third coordinate.
      //   a + b + c = 1
      //   So, e.g., if a >= 0 and b >= 0, then we have c <= 1, but we could also have c <= 0.
      //   In the case c <= 0, then point is outside the triangle.
      if ((bar.x >= 0) && (bar.y >= 0) && (bar.z >= 0)) {
        assert((bar.x + bar.y + bar.z - 1e7) <= 1.f);
        const R p_one_over_z = dot3(bar, one_over_zs);
        const R p_u_over_z   = dot3(bar, u_over_zs);
        const R p_v_over_z   = dot3(bar, v_over_zs);
        const R p_depth      = dot3(bar, depths);
        const R z            = 1.f / p_one_over_z;
        const sgVec2 uv      = (sgVec2){p_u_over_z * z, p_v_over_z * z};
        const R* depth = Depth(gfx, x, y);
        if ((0.f <= p_depth) && (p_depth <= 1.f) && (p_depth <= *depth)) {
          // TODO: When doing lighting, need to tone-map here and apply inverse
          //  gamma here.
          //const sgPixel colour = {(uint8_t)(bar.x*255.f), (uint8_t)(bar.y*255.f), (uint8_t)(bar.z*255.f), 255};
          //const sgPixel colour = {(int)(z*255.f), (int)(z*255.f), (int)(z*255.f), 255};
          //const sgPixel colour = {255, 0, 255, 255};
          //const sgPixel colour = {(int)(uv.x * 255.f), (int)(uv.y * 255.f), 255, 255};
          SetPixelDeferred(gfx, (sgVec2i){x,y}, p_depth, gfx->activeTexture, uv);
        }
      }
    }
  }
}

static void DrawTriangle3(swgfx* gfx, const sgTri3* const tri) {
  assert(gfx);
  assert(tri);
  sgTri4 tris[2];
  const int numTris = TransformTri(gfx, tri, tris);
  assert((0 <= numTris) && (numTris <= 2));
  for (int i = 0; i < numTris; ++i) {
    DrawTriangle3PostClip(gfx, &tris[i]);
  }
#if SWGFX_PROFILING
  gfx->counters.triangles3++;
#endif // SWGFX_PROFILING
}

#define is_pow2_or_0(X) ((X & (X - 1)) == 0)
#define SG_ALIGN 64
#define SG_ALLOC(PP_MEM, COUNT, TYPE) (TYPE*)Alloc(PP_MEM, COUNT, sizeof(TYPE))

static void* AlignPtr(void* address) {
  assert(is_pow2_or_0(SG_ALIGN));
  constexpr size_t mask = SG_ALIGN - 1;
  return (void*)(((uintptr_t)address + mask) & ~mask);
}

static size_t Align(size_t size) {
  static_assert(is_pow2_or_0(SG_ALIGN));
  constexpr size_t mask = SG_ALIGN - 1;
  return (size + mask) & (~mask);
}

static void* Alloc(void** ppMem, size_t count, size_t size) {
  assert(ppMem);
  assert(*ppMem);
  assert(*ppMem == AlignPtr(*ppMem)); // Should already be aligned.
  const size_t total = Align(count * size);
  void* ptr = *ppMem;
  *ppMem = ptr + total;
  memset(ptr, 0, total);
  return ptr;
}

size_t sgMem(int width, int height) {
  const int N = width * height;
  return Align(sizeof(swgfx)) +
         Align(N * sizeof(sgPixel)) +     // Colour buffer.
         Align(N * sizeof(R)) +           // Depth buffer.
         Align(N * sizeof(sgTextureId)) + // Texture ID buffer.
         Align(N * sizeof(sgVec2)) +      // Texture coords buffer.
         Align(SWGFX_TEXTURE_REGISTER_SIZE * sizeof(sgTexture)) + // Texture register.
         (SG_ALIGN - 1); // To make room to align allocations within the buffer.
}

swgfx* sgNew(int width, int height, void* mem) {
  const int N = width * height;
  void* aligned  = AlignPtr(mem); // Uses the extra room we made in sgMem().
  swgfx* gfx     = SG_ALLOC(&aligned, 1, swgfx);
  gfx->dims      = (sgVec2i){width, height};
  gfx->colour    = SG_ALLOC(&aligned, N, sgPixel);
  gfx->depth     = SG_ALLOC(&aligned, N, R);
  gfx->texture   = SG_ALLOC(&aligned, N, sgTextureId);
  gfx->texcoords = SG_ALLOC(&aligned, N, sgVec2);
  gfx->textureRegister = SG_ALLOC(&aligned, SWGFX_TEXTURE_REGISTER_SIZE, sgTexture);
  gfx->activeTexture = DefaultTextureId;
  gfx->defaultPixel  = (sgPixel){255, 255, 255, 255};
  gfx->defaultImage = (sgImage){
    .width  = 1,
    .height = 1,
    .pixels = &gfx->defaultPixel,
  };
  gfx->textureRegister[DefaultTextureId] = (sgTexture){
    .image  = &gfx->defaultImage,
    .filter = sgNearest,
  };
  return gfx;
}

void sgDel(swgfx** ppSwgfx) {
  assert(ppSwgfx);
  if (*ppSwgfx) {
    *ppSwgfx = nullptr;
  }
}

sgPixel* sgColourBuffer(swgfx* gfx) {
  assert(gfx);
  return gfx->colour;
}

void sgPresent(swgfx* gfx, sgVec2i dimensions, sgScreenPixel* screen) {
  assert(gfx);
  assert(screen);
  // Integer scaling only.
  assert((dimensions.x % gfx->dims.x) == 0);
  assert((dimensions.y % gfx->dims.y) == 0);

  const int sx = dimensions.x / gfx->dims.x;
  const int sy = dimensions.y / gfx->dims.y;

  const sgPixel* src = gfx->colour;

  for (int y = 0; y < gfx->dims.y; ++y, src += gfx->dims.x) {
    // Replicate each row 'sy' times.
    for (int yy = 0; yy < sy; ++yy) {
      const sgPixel* src_col = src;
      for (int x = 0; x < gfx->dims.x; ++x, ++src_col) {
        // Replicate each column 'sx' times.
        for (int xx = 0; xx < sx; ++xx, ++screen) {
          screen->r = src_col->r;
          screen->g = src_col->g;
          screen->b = src_col->b;
          screen->a = src_col->a;
        }
      }
    }
  }

#if SWGFX_PROFILING
  gfx->counters.frames++;
#endif // SWGFX_PROFILING
}

static void sgUpdateViewProjection(swgfx* gfx) {
  assert(gfx);
  gfx->viewProj = Mat4Mul(gfx->proj, gfx->view);
}

static void sgUpdateMvp(swgfx* gfx) {
  assert(gfx);
  gfx->mvp = Mat4Mul(gfx->viewProj, gfx->model);
}

void sgModelId(swgfx* gfx) {
  assert(gfx);
  sgModel(gfx,
    (sgVec3){0,0,0},
    (sgVec3){1, 0, 0},
    (sgVec3){0, 1, 0},
    (sgVec3){0, 0, 1});
}

void sgModel(swgfx* gfx, sgVec3 position, sgVec3 right, sgVec3 up, sgVec3 forward) {
  assert(gfx);
  gfx->model = Mat4FromVec3(right, up, forward, position);
  sgUpdateMvp(gfx);
}

void sgView(swgfx* gfx, sgVec3 position, sgVec3 forward) {
  assert(gfx);
  const sgMat4 camera = Mat4Look(position, forward, Up3);
  gfx->view = Mat4InverseTransform(camera);
  sgUpdateViewProjection(gfx);
  sgUpdateMvp(gfx);
}

void sgPerspective(swgfx* gfx, R fovy, R aspect, R near, R far) {
  assert(gfx);
  gfx->proj = Mat4Perspective(fovy, aspect, near, far);
  sgUpdateViewProjection(gfx);
  sgUpdateMvp(gfx);
}

void sgViewport(swgfx* gfx, int x0, int y0, int width, int height) {
  assert(gfx);
  assert(x0 >= 0);
  assert(y0 >= 0);
  assert((x0 + width)  <= gfx->dims.x);
  assert((y0 + height) <= gfx->dims.y);
  gfx->viewport = (sgViewport_t){x0, y0, width, height};
}

void sgTextureRegister(swgfx* gfx, sgTextureId id, const sgImage* image, sgTextureFilter filter) {
  assert(gfx);
  assert(id < SWGFX_MAX_TEXTURES);
  assert(id != DefaultTextureId);
  assert(image);
  gfx->textureRegister[id] = (sgTexture){image, filter};
}

void sgTextureActivate(swgfx* gfx, sgTextureId id) {
  assert(gfx);
  assert(id < SWGFX_MAX_TEXTURES);
  gfx->activeTexture = id;
}

void sgClear(swgfx* gfx) {
  assert(gfx);
  const int N = gfx->dims.x * gfx->dims.y;
  memset(gfx->colour, 0, N * sizeof(*gfx->colour));
  for (int i = 0; i < N; ++i) {
      gfx->depth[i] = DepthClearValue;
  }
}

void sgPixels(swgfx* gfx, size_t count, const sgVec2i* positions, sgPixel colour) {
  assert(gfx);
  for (size_t i = 0; i < count; ++i) {
    SetPixelColour(gfx, positions[i], colour);
  }
}

// TODO: DrawTriangle3 with clipping. Leave DrawTriangle2 to not clip for
//       performance; assume that 2D triangles are within bounds.
// TODO: If the triangle is out of bounds, skip entirely.
// TODO: Otherwise, rasterize the triangle the simple way and check whether each
//       individual pixel is within bounds; do not explicitly clip the triangle.
// TODO: Actually, I think we can just clip the triangle's AABB and then walk
//       over those pixels instead of checking every individual pixel in the
//       non-clipped AABB. Edit: I think this doesn't work; draw it and you'll
//       see. Some pixels that should be rasterized will fall out of the clipped
//       AABB.

void sgTriangles2(swgfx* gfx, size_t count, const sgTri2* tris) {
  assert(gfx);
  for (size_t i = 0; i < count; ++i) {
    DrawTriangle2(gfx, &tris[i]);
  }
}

void sgTriangles(swgfx* gfx, size_t count, const sgTri3* tris, const sgNormal*) {
  assert(gfx);
  assert(tris);
  for (size_t i = 0; i < count; ++i) {
    const sgTri3* tri = &tris[i];
    DrawTriangle3(gfx, tri);
  }
}

void sgTrianglesIndexed(swgfx* gfx, size_t numIndices, const sgIdx* indices, const sgVec3* positions, const sgVec2* texcoords) {
  assert(gfx);
  assert(indices);
  assert(positions);
  assert(texcoords);
  for (size_t i = 0; i < numIndices; i+=3) {
    const sgIdx i0 = indices[i];
    const sgIdx i1 = indices[i+1];
    const sgIdx i2 = indices[i+2];
    const sgVec3 p0 = positions[i0];
    const sgVec3 p1 = positions[i1];
    const sgVec3 p2 = positions[i2];
    const sgVec2 uv0 = texcoords[i0];
    const sgVec2 uv1 = texcoords[i1];
    const sgVec2 uv2 = texcoords[i2];
    const sgTri3 tri = (sgTri3){
      (sgVert3){p0, uv0},
      (sgVert3){p1, uv1},
      (sgVert3){p2, uv2}};
    DrawTriangle3(gfx, &tri);
  }
}

void sgTrianglesIndexedNonUniform(swgfx* gfx, size_t numTris, const sgTriIdx* tris, const sgVec3* positions, const sgVec2* texcoords) {
  assert(gfx);
  assert(tris);
  assert(positions);
  assert(texcoords);
  for (size_t t = 0; t < numTris; ++t) {
    const sgTriIdx* triIdx = &tris[t];
    const sgTri3 tri = (sgTri3){
      (sgVert3){positions[triIdx->v0.pos], texcoords[triIdx->v0.uv]},
      (sgVert3){positions[triIdx->v1.pos], texcoords[triIdx->v1.uv]},
      (sgVert3){positions[triIdx->v2.pos], texcoords[triIdx->v2.uv]}};
    DrawTriangle3(gfx, &tri);
  }
}

static void ImageExp(sgPixel* pixels, int width, int height, R exp) {
  assert(pixels);
  for (int i = 0; i < width * height; ++i) {
    sgPixel* p = &pixels[i];
    *p = Vec3ToPixel(exp3(PixelToVec3(*p), exp), p->a);
  }
}

void sgLighting(swgfx* gfx) {
  assert(gfx);
  const int N = gfx->dims.x * gfx->dims.y;
  for (int i = 0; i < N; ++i) {
    const R depth = gfx->depth[i];
    if (depth != DepthClearValue) {
      const sgTextureId texid  = gfx->texture[i];
      const sgTexture* texture = &gfx->textureRegister[texid];
      const sgVec2 uv          = gfx->texcoords[i];
      sgPixel* colour          = &gfx->colour[i];
      // TODO: Actual lighting.
      const sgPixel albedo = Sample(texture->image, texture->filter, uv);
      *colour = albedo;
    }
  }
}

void sgGamma(swgfx* gfx, sgPixel* pixels, int width, int height) {
  assert(gfx);
  assert(pixels);
  ImageExp(pixels, width, height, 2.2f);
}

void sgGammaInv(swgfx* gfx, sgPixel* pixels, int width, int height) {
  assert(gfx);
  assert(pixels);
  ImageExp(pixels, width, height, 1.0f/2.2f);
}

sgCounters sgGetCounters(const swgfx* gfx) {
  assert(gfx);
  return gfx->counters;
}