/*
Matrices:
  - Column-major math convention.
  - Column-major memory storage.

Coordinate systems:
  - Right-handed.
  - NDC in [-1, +1].
*/
#include <swgfx.h>

#include <assert.h>
#include <math.h>   // sqrt
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

static const sgVec3 Up3 = (sgVec3){0,1,0};

typedef struct sgViewport_t { int x0, y0, width, height; } sgViewport_t;
typedef struct sgTri2       { sgVec2 p0, p1, p2; }         sgTri2;
typedef struct sgAABB2      { sgVec2 pmin, pmax; }         sgAABB2;

// Column-major math, column-major storage.
typedef struct sgMat4 {
  R val[4][4]; // (col, row)
} sgMat4;

typedef struct swgfx {
  sgVec2i      dims;     // Colour buffer dimensions.
  sgPixel*     colour;   // Colour buffer.
  sgViewport_t viewport;
  sgMat4       view;     // View matrix.
  sgMat4       proj;     // Projection matrix.
} swgfx;

static inline sgVec3 neg3(sgVec3 v) { return (sgVec3){-v.x, -v.y, -v.z}; }

static inline sgVec3 sub3(sgVec3 a, sgVec3 b) {
  return (sgVec3){a.x - b.x, a.y - b.y, a.z - b.z};
}

static inline sgVec3 cross3(sgVec3 a, sgVec3 b) {
  return (sgVec3) {
    a.y * b.z - a.z * b.y,
    a.z * b.x - a.x * b.z,
    a.x * b.y - a.y * b.x};
}

static inline R normsq3(sgVec3 v) { return v.x * v.x + v.y * v.y + v.z * v.z; }

static inline R norm3(sgVec3 v) { return sqrt(normsq3(v)); }

static inline sgVec3 normalize3(sgVec3 v) {
  const R n = norm3(v);
  assert(n > 0);
  return (sgVec3){v.x / n, v.y / n, v.z / n};
}

static inline sgMat4 Mat4(
    R m00, R m01, R m02, R m03,   // v0.x v1.x v2.x v3.x
    R m10, R m11, R m12, R m13,   // v0.y v1.y v2.y v3.y
    R m20, R m21, R m22, R m23,   // v0.z v1.z v2.z v3.z
    R m30, R m31, R m32, R m33) { // v0.w v1.w v2.w v3.w
  return (sgMat4) {
    .val = {{m00, m10, m20, m30},   // col 0
            {m01, m11, m21, m31},   // col 1
            {m02, m12, m22, m32},   // col 2
            {m03, m13, m23, m33}}}; // col 3
}

static inline sgMat4 Mat4FromVec3(sgVec3 right, sgVec3 up, sgVec3 forward, sgVec3 position) {
  return Mat4(
      right.x, up.x, forward.x, position.x,
      right.y, up.y, forward.y, position.y,
      right.z, up.z, forward.z, position.z,
            0,    0,         0,          1);
}

static inline R Mat4At(sgMat4 m, int row, int col) { return m.val[col][row]; }

static inline sgMat4 Mat4Mul(sgMat4 A, sgMat4 B) {
  R m00 = Mat4At(A, 0, 0) * Mat4At(B, 0, 0) +
          Mat4At(A, 0, 1) * Mat4At(B, 1, 0) +
          Mat4At(A, 0, 2) * Mat4At(B, 2, 0) +
          Mat4At(A, 0, 3) * Mat4At(B, 3, 0);
  R m01 = Mat4At(A, 0, 0) * Mat4At(B, 0, 1) +
          Mat4At(A, 0, 1) * Mat4At(B, 1, 1) +
          Mat4At(A, 0, 2) * Mat4At(B, 2, 1) +
          Mat4At(A, 0, 3) * Mat4At(B, 3, 1);
  R m02 = Mat4At(A, 0, 0) * Mat4At(B, 0, 2) +
          Mat4At(A, 0, 1) * Mat4At(B, 1, 2) +
          Mat4At(A, 0, 2) * Mat4At(B, 2, 2) +
          Mat4At(A, 0, 3) * Mat4At(B, 3, 2);
  R m03 = Mat4At(A, 0, 0) * Mat4At(B, 0, 3) +
          Mat4At(A, 0, 1) * Mat4At(B, 1, 3) +
          Mat4At(A, 0, 2) * Mat4At(B, 2, 3) +
          Mat4At(A, 0, 3) * Mat4At(B, 3, 3);

  R m10 = Mat4At(A, 1, 0) * Mat4At(B, 0, 0) +
          Mat4At(A, 1, 1) * Mat4At(B, 1, 0) +
          Mat4At(A, 1, 2) * Mat4At(B, 2, 0) +
          Mat4At(A, 1, 3) * Mat4At(B, 3, 0);
  R m11 = Mat4At(A, 1, 0) * Mat4At(B, 0, 1) +
          Mat4At(A, 1, 1) * Mat4At(B, 1, 1) +
          Mat4At(A, 1, 2) * Mat4At(B, 2, 1) +
          Mat4At(A, 1, 3) * Mat4At(B, 3, 1);
  R m12 = Mat4At(A, 1, 0) * Mat4At(B, 0, 2) +
          Mat4At(A, 1, 1) * Mat4At(B, 1, 2) +
          Mat4At(A, 1, 2) * Mat4At(B, 2, 2) +
          Mat4At(A, 1, 3) * Mat4At(B, 3, 2);
  R m13 = Mat4At(A, 1, 0) * Mat4At(B, 0, 3) +
          Mat4At(A, 1, 1) * Mat4At(B, 1, 3) +
          Mat4At(A, 1, 2) * Mat4At(B, 2, 3) +
          Mat4At(A, 1, 3) * Mat4At(B, 3, 3);

  R m20 = Mat4At(A, 2, 0) * Mat4At(B, 0, 0) +
          Mat4At(A, 2, 1) * Mat4At(B, 1, 0) +
          Mat4At(A, 2, 2) * Mat4At(B, 2, 0) +
          Mat4At(A, 2, 3) * Mat4At(B, 3, 0);
  R m21 = Mat4At(A, 2, 0) * Mat4At(B, 0, 1) +
          Mat4At(A, 2, 1) * Mat4At(B, 1, 1) +
          Mat4At(A, 2, 2) * Mat4At(B, 2, 1) +
          Mat4At(A, 2, 3) * Mat4At(B, 3, 1);
  R m22 = Mat4At(A, 2, 0) * Mat4At(B, 0, 2) +
          Mat4At(A, 2, 1) * Mat4At(B, 1, 2) +
          Mat4At(A, 2, 2) * Mat4At(B, 2, 2) +
          Mat4At(A, 2, 3) * Mat4At(B, 3, 2);
  R m23 = Mat4At(A, 2, 0) * Mat4At(B, 0, 3) +
          Mat4At(A, 2, 1) * Mat4At(B, 1, 3) +
          Mat4At(A, 2, 2) * Mat4At(B, 2, 3) +
          Mat4At(A, 2, 3) * Mat4At(B, 3, 3);

  R m30 = Mat4At(A, 3, 0) * Mat4At(B, 0, 0) +
          Mat4At(A, 3, 1) * Mat4At(B, 1, 0) +
          Mat4At(A, 3, 2) * Mat4At(B, 2, 0) +
          Mat4At(A, 3, 3) * Mat4At(B, 3, 0);
  R m31 = Mat4At(A, 3, 0) * Mat4At(B, 0, 1) +
          Mat4At(A, 3, 1) * Mat4At(B, 1, 1) +
          Mat4At(A, 3, 2) * Mat4At(B, 2, 1) +
          Mat4At(A, 3, 3) * Mat4At(B, 3, 1);
  R m32 = Mat4At(A, 3, 0) * Mat4At(B, 0, 2) +
          Mat4At(A, 3, 1) * Mat4At(B, 1, 2) +
          Mat4At(A, 3, 2) * Mat4At(B, 2, 2) +
          Mat4At(A, 3, 3) * Mat4At(B, 3, 2);
  R m33 = Mat4At(A, 3, 0) * Mat4At(B, 0, 3) +
          Mat4At(A, 3, 1) * Mat4At(B, 1, 3) +
          Mat4At(A, 3, 2) * Mat4At(B, 2, 3) +
          Mat4At(A, 3, 3) * Mat4At(B, 3, 3);

  return Mat4(
    m00, m01, m02, m03,
    m10, m11, m12, m13,
    m20, m21, m22, m23,
    m30, m31, m32, m33);
}

static inline sgVec3 Mat4MulVec3(sgMat4 m, sgVec3 v, R w) {
  return (sgVec3) {
    .x = Mat4At(m, 0, 0) * v.x + Mat4At(m, 0, 1) * v.y + Mat4At(m, 0, 2) * v.z + Mat4At(m, 0, 3) * w,
    .y = Mat4At(m, 1, 0) * v.x + Mat4At(m, 1, 1) * v.y + Mat4At(m, 1, 2) * v.z + Mat4At(m, 1, 3) * w,
    .z = Mat4At(m, 2, 0) * v.x + Mat4At(m, 2, 1) * v.y + Mat4At(m, 2, 2) * v.z + Mat4At(m, 2, 3) * w};
}

static inline sgMat4 Mat4Look(sgVec3 position, sgVec3 forward, sgVec3 up) {
  const sgVec3 right = normalize3(cross3(forward, up));
  up                 = normalize3(cross3(right, forward));
  return Mat4FromVec3(right, up, neg3(forward), position);
}

static inline sgMat4 Mat4Perspective(R fovy, R aspect, R near, R far) {
  R f = tan(fovy / 2.0);
  assert(f > 0.0);
  f = 1.0 / f;
  const R a = near - far;
  return Mat4(
    f / aspect, 0, 0, 0,
    0, f, 0, 0,
    0, 0, (far + near) / a, (2 * far * near / a),
    0, 0, -1, 0);
}

static inline sgPixel* PixelRow(sgPixel* image, int width, int y) {
  return image + (y * width);
}

static inline sgPixel* Pixel(sgPixel* image, int width, int x, int y) {
  return image + (y * width) + x;
}

#define XY(X,Y) Pixel(gfx->colour, gfx->dims.x, X, Y)

static inline R rmin(R a, R b) { return (a <= b) ? a : b; }
static inline R rmax(R a, R b) { return (a >= b) ? a : b; }

static inline sgVec2 min2(sgVec2 a, sgVec2 b) {
  return (sgVec2){.x = rmin(a.x, b.x), .y = rmin(a.y, b.y) };
}

static inline sgVec2 max2(sgVec2 a, sgVec2 b) {
  return (sgVec2){.x = rmax(a.x, b.x), .y = rmax(a.y, b.y) };
}

static inline sgAABB2 TriangleAabb2(const sgTri2 tri) {
  return (sgAABB2){.pmin = min2(min2(tri.p0, tri.p1), tri.p2),
                   .pmax = max2(max2(tri.p0, tri.p1), tri.p2)};
}

static inline R f(sgVec2 a, sgVec2 b, sgVec2 p) {
  return (a.y - b.y)*p.x + (b.x - a.x)*p.y + a.x*b.y - b.x*a.y;
}

static inline sgVec3 Barycentric(const sgTri2 tri, sgVec2 p) {
  // There is no need to compute the third coordinate explicitly: a + b + c = 1.
  // But this results in a worse rasterization of the triangle along one of the edges.
  // It seems we can patch it with a small epsilon, though.
  // ---
  // Division by zero is only possible if the triangle has zero area.
  /*return (sgVec3){
    f(tri.p1, tri.p2, p) / f(tri.p1, tri.p2, tri.p0),
    f(tri.p2, tri.p0, p) / f(tri.p2, tri.p0, tri.p1),
    f(tri.p0, tri.p1, p) / f(tri.p0, tri.p1, tri.p2)};*/
  const R b = f(tri.p0, tri.p2, p) / f(tri.p0, tri.p2, tri.p1);
  const R c = f(tri.p0, tri.p1, p) / f(tri.p0, tri.p1, tri.p2);
  const R a = /*f(tri.p1, tri.p2, p) / f(tri.p1, tri.p2, tri.p0);*/1 - b - c - 1e-7;
  return (sgVec3){a,b,c};
}

#define is_pow2_or_0(X) ((X & (X - 1)) == 0)

static size_t align(size_t size) {
  static_assert(is_pow2_or_0(SG_ALIGN));
  constexpr size_t mask = SG_ALIGN - 1;
  return (size + mask) & (~mask);
}

void* sgAlloc(size_t count, size_t size) {
  const size_t total = align(count * size);
  void* const ptr = aligned_alloc(SG_ALIGN, total);
  memset(ptr, 0, total);
  return ptr;
}

void sgFree(void** pp) {
  assert(pp);
  if (*pp) {
    free(*pp);
    *pp = nullptr;
  }
}

swgfx* sgNew() {
  swgfx* gfx = SG_ALIGN_ALLOC(1, swgfx);
  return gfx;
}

void sgDel(swgfx** ppSwgfx) {
  assert(ppSwgfx);
  if (*ppSwgfx) {
    free(*ppSwgfx);
    *ppSwgfx = 0;
  }
}

void sgColourBuffer(swgfx* gfx, sgVec2i dimensions, sgPixel* buffer) {
  assert(gfx);
  gfx->dims   = dimensions;
  gfx->colour = buffer;
}

void sgPresent(swgfx* gfx, sgVec2i dimensions, sgPixel* screen) {
  assert(gfx);
  assert(screen);
  // Integer scaling only.
  assert((dimensions.x % gfx->dims.x) == 0);
  assert((dimensions.y % gfx->dims.y) == 0);

  const int sx = dimensions.x / gfx->dims.x;
  const int sy = dimensions.y / gfx->dims.y;

  const sgPixel* src = gfx->colour;
        sgPixel* dst = screen;

  // Replicate each row 'sy' times.
  for (int y = 0; y < gfx->dims.y; ++y, src += gfx->dims.x) {
    for (int yy = y*sy; yy < (y+1)*sy; ++yy) {
      // Replicate each column 'sx' times.
      const sgPixel* src_col = src;
      for (int x = 0; x < gfx->dims.x; ++x, ++src_col) {
        for (int xx = x*sx; xx < (x+1)*sx; ++xx, ++dst) {
          *dst = *src_col;
        }
      }
    }
  }
}

void sgCam(swgfx* gfx, sgVec3 position, sgVec3 forward) {
  assert(gfx);
  gfx->view = Mat4Look(position, forward, Up3);
}

void sgPerspective(swgfx* gfx, R fovy, R aspect, R near, R far) {
  assert(gfx);
  gfx->proj = Mat4Perspective(fovy, aspect, near, far);
}

void sgViewport(swgfx* gfx, int x0, int y0, int width, int height) {
  assert(gfx);
  gfx->viewport = (sgViewport_t){x0, y0, width, height};
}

void sgClear(swgfx* gfx) {
  assert(gfx);
  memset(gfx->colour, 0, gfx->dims.x * gfx->dims.y * sizeof(sgPixel));
}

void sgPixels(swgfx* gfx, size_t count, const sgVec2i* positions, sgPixel colour) {
  assert(gfx);
  for (size_t i = 0; i < count; ++i) {
    const sgVec2i p = positions[i];
    *XY(p.x, p.y) = colour;
  }
}

static void DrawTriangle2(swgfx* gfx, const sgTri2* tri) {
  assert(gfx);
  assert(tri);
  const sgAABB2 bbox = TriangleAabb2(*tri);
  for   (int y = bbox.pmin.y; y <= bbox.pmax.y; ++y) {
    for (int x = bbox.pmin.x; x <= bbox.pmax.x; ++x) {
      const sgVec2 p = (sgVec2){x, y};
      // TODO: there is an incremental optimization to computing barycentric coordinates;
      // read more about it.
      const sgVec3 bar = Barycentric(*tri, p);
      // We need to check the third coordinate.
      //   a + b + c = 1
      //   So, e.g., if a > 0 and b > 0, then we have c < 1, but we could also have c < 0.
      //   In the case c < 0, then point is outside the triangle.
      if ((bar.x > 0) && (bar.y > 0) && (bar.z > 0)) {
        const sgVec2i pi = (sgVec2i){(int)x, (int)y};
        sgPixels(gfx, 1, &pi, (sgPixel){255, 255, 255, 255});
      }
    }
  }
}

// TODO: DrawTriangle3 with clipping. Leave DrawTriangle2 to not clip for
//       performance; assume that 2D triangles are within bounds.
// TODO: If the triangle is out of bounds, skip entirely.
// TODO: Otherwise, rasterize the triangle the simple way and check whether each
//       individual pixel is within bounds; do not explicitly clip the triangle.
// TODO: Actually, I think we can just clip the triangle's AABB and then walk
//       over those pixels instead of checking every individual pixel in the
//       non-clipped AABB. Edit: I think this doesn't work; draw it and you'll
//       see. Some pixels that should be rasterized will fall out of the clipped
//       AABB.

void sgTriangles2(swgfx* gfx, size_t count, const sgTri2* tris) {
  assert(gfx);
  for (size_t i = 0; i < count; ++i) {
    DrawTriangle2(gfx, &tris[i]);
  }
}

void sgTriangles(swgfx* gfx, size_t count, const sgTri3* tris, const sgNormal*) {
  assert(gfx);
  for (size_t i = 0; i < count; ++i) {
    // Ignore projection matrix for now. Rasterize 2D triangles.
    const sgTri3* tri3 = &tris[i];
    const sgTri2 tri2 = (sgTri2) {
      .p0 = (sgVec2){tri3->p0.x, tri3->p0.y},
      .p1 = (sgVec2){tri3->p1.x, tri3->p1.y},
      .p2 = (sgVec2){tri3->p2.x, tri3->p2.y},
    };
    DrawTriangle2(gfx, &tri2);
  }
}

static inline void AssertViewportWithinBuffer(swgfx* gfx) {
  assert(gfx);
  const sgViewport_t vp = gfx->viewport;
  assert((vp.x0 + vp.width)  <= gfx->dims.x);
  assert((vp.y0 + vp.height) <= gfx->dims.y);
}

void sgCheck(swgfx* gfx) {
  assert(gfx);
  AssertViewportWithinBuffer(gfx);
}