/* Matrices: - Column-major math convention. - Column-major memory storage. Coordinate systems: - Right-handed. - NDC in [-1, +1]. - Viewport goes up and to the right. - Window goes down and to the right. - (x,y) is the center of a pixel. - Top-left: (x - 1/2, y - 1/2) - Bottom-right: (x + 1/2, y + 1/2) */ #include #include #include // sqrt #include #include static constexpr sgTextureId DefaultTextureId = SWGFX_MAX_TEXTURES; static constexpr size_t SWGFX_TEXTURE_REGISTER_SIZE = SWGFX_MAX_TEXTURES + 1; static constexpr R DepthClearValue = 1.0f; static constexpr sgVec3 Up3 = (sgVec3){0,1,0}; typedef struct sgViewport_t { int x0, y0, width, height; } sgViewport_t; typedef struct sgAABB2 { sgVec2 pmin, pmax; } sgAABB2; // Column-major math, column-major storage. typedef struct sgMat4 { R val[4][4]; // (col, row) } sgMat4; typedef struct sgTexture { const sgImage* image; sgTextureFilter filter; } sgTexture; typedef struct swgfx { sgVec2i dims; // Colour buffer dimensions. sgPixel* colour; // Colour buffer. R* depth; // Depth buffer. sgTextureId* texture; // Texture ID buffer. sgVec2* texcoords; // Texture coords buffer. sgViewport_t viewport; sgMat4 model; // Model matrix. sgMat4 view; // View matrix. sgMat4 proj; // Projection matrix. // Pre-multiplied matrices. // The model matrix changes once per object, more frequently than view or // projection. View and projection are expected to change infrequently, maybe // once per frame. // Make it so that changing the model matrix only requires one matrix // multiplication (mvp = model * viewProj) and not two (mvp = model * view * projection) // before rendering the model's triangles. sgMat4 viewProj; // View-projection matrix. sgMat4 mvp; // Model-view-projection matrix. sgTexture* textureRegister; // Indexed by texture id. sgTextureId activeTexture; sgPixel defaultPixel; // The single-pixel of the default texture. sgImage defaultImage; // Image for the default texture. sgCounters counters; } swgfx; static inline int mod(int a, int m) { return (m + (a % m)) % m; } static inline R frac(R a) { return a - (R)((int)a); } static inline int imin(int a, int b) { return (a <= b) ? a : b; } static inline int imax(int a, int b) { return (a >= b) ? a : b; } static inline R rmin(R a, R b) { return (a <= b) ? a : b; } static inline R rmax(R a, R b) { return (a >= b) ? a : b; } static inline R lerp(R a, R b, R t) { return a + t*(b-a); } static inline R mod1(R a, R m) { return fmodf(1.f + fmodf(a, m), 1.f); } static inline sgVec2i min2i(sgVec2i a, sgVec2i b) { return (sgVec2i){.x = imin(a.x, b.x), .y = imin(a.y, b.y) }; } static inline sgVec2i max2i(sgVec2i a, sgVec2i b) { return (sgVec2i){.x = imax(a.x, b.x), .y = imax(a.y, b.y) }; } static inline sgVec2 min2(sgVec2 a, sgVec2 b) { return (sgVec2){.x = rmin(a.x, b.x), .y = rmin(a.y, b.y) }; } static inline sgVec2 max2(sgVec2 a, sgVec2 b) { return (sgVec2){.x = rmax(a.x, b.x), .y = rmax(a.y, b.y) }; } static inline sgVec2 add2(sgVec2 a, sgVec2 b) { return (sgVec2){a.x + b.x, a.y + b.y}; } static inline sgVec2 sub2(sgVec2 a, sgVec2 b) { return (sgVec2){a.x - b.x, a.y - b.y}; } static inline sgVec2 scale2(sgVec2 v, R s) { return (sgVec2){v.x * s, v.y * s}; } static inline sgVec2 frac2(sgVec2 v) { return (sgVec2){frac(v.x), frac(v.y)}; } static inline sgVec2 lerp2(sgVec2 a, sgVec2 b, R t) { return add2(a, scale2(sub2(b,a), t)); } static inline sgVec2 mod2(sgVec2 v, R m) { return (sgVec2){mod1(v.x, m), mod1(v.y, m)}; } static inline sgVec3 add3(sgVec3 a, sgVec3 b) { return (sgVec3){a.x + b.x, a.y + b.y, a.z + b.z}; } static inline sgVec3 neg3(sgVec3 v) { return (sgVec3){-v.x, -v.y, -v.z}; } static inline sgVec3 sub3(sgVec3 a, sgVec3 b) { return (sgVec3){a.x - b.x, a.y - b.y, a.z - b.z}; } static inline sgVec3 div3(sgVec3 a, sgVec3 b) { return (sgVec3){a.x / b.x, a.y / b.y, a.z / b.z}; } static inline sgVec3 scale3(sgVec3 v, R s) { return (sgVec3){v.x * s, v.y * s, v.z * s}; } static inline sgVec3 exp3(sgVec3 v, R exp) { return (sgVec3){powf(v.x, exp), powf(v.y, exp), powf(v.z, exp)};} static inline R dot3(sgVec3 a, sgVec3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; } static inline R normsq3(sgVec3 v) { return v.x * v.x + v.y * v.y + v.z * v.z; } static inline R norm3 (sgVec3 v) { return (R)sqrt(normsq3(v)); } static inline sgVec4 add4(sgVec4 a, sgVec4 b) { return (sgVec4){a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w}; } static inline sgVec4 sub4(sgVec4 a, sgVec4 b) { return (sgVec4){a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w}; } static inline sgVec4 scale4(sgVec4 v, R s) { return (sgVec4){v.x * s, v.y * s, v.z * s, v.w * s}; } static inline sgVec4 lerp4(sgVec4 a, sgVec4 b, R t) { return (sgVec4){ .x = a.x + t * (b.x - a.x), .y = a.y + t * (b.y - a.y), .z = a.z + t * (b.z - a.z), .w = a.w + t * (b.w - a.w)}; } /// Return the curl of 'a' towards 'b', which is defined as the z-coordinate of /// the cross product a x b, or as the determinant det(a,b). /// /// The curl of 'a' towards 'b' is positive if 'a' curls towards 'b' like the /// positive x-axis curls towards the positive y-axis. static inline R curl2(sgVec2 a, sgVec2 b) { return (a.x * b.y) - (a.y * b.x); } static inline sgVec3 cross3(sgVec3 a, sgVec3 b) { return (sgVec3) { a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x}; } static inline sgVec3 normalize3(sgVec3 v) { const R n = norm3(v); return (n > 0) ? (sgVec3){v.x / n, v.y / n, v.z / n} : (sgVec3){0, 0, 0}; } static inline sgVec2 Vec2FromVec4(sgVec4 v) { return (sgVec2){v.x, v.y}; } static inline sgVec4 Vec4FromVec3(sgVec3 v, R w) { return (sgVec4){v.x, v.y, v.z, w}; } static inline sgMat4 Mat4( R m00, R m01, R m02, R m03, // v0.x v1.x v2.x v3.x R m10, R m11, R m12, R m13, // v0.y v1.y v2.y v3.y R m20, R m21, R m22, R m23, // v0.z v1.z v2.z v3.z R m30, R m31, R m32, R m33) { // v0.w v1.w v2.w v3.w return (sgMat4) { .val = {{m00, m10, m20, m30}, // col 0 {m01, m11, m21, m31}, // col 1 {m02, m12, m22, m32}, // col 2 {m03, m13, m23, m33}}}; // col 3 } static inline sgMat4 Mat4FromVec3(sgVec3 right, sgVec3 up, sgVec3 forward, sgVec3 position) { return Mat4( right.x, up.x, forward.x, position.x, right.y, up.y, forward.y, position.y, right.z, up.z, forward.z, position.z, 0, 0, 0, 1); } static inline R Mat4At(sgMat4 m, int row, int col) { return m.val[col][row]; } static inline sgVec3 Mat4v0(sgMat4 m) { return *((sgVec3*)m.val[0]); } static inline sgVec3 Mat4v1(sgMat4 m) { return *((sgVec3*)m.val[1]); } static inline sgVec3 Mat4v2(sgMat4 m) { return *((sgVec3*)m.val[2]); } static inline sgVec3 Mat4v3(sgMat4 m) { return *((sgVec3*)m.val[3]); } static inline sgMat4 Mat4Mul(sgMat4 A, sgMat4 B) { R m00 = Mat4At(A, 0, 0) * Mat4At(B, 0, 0) + Mat4At(A, 0, 1) * Mat4At(B, 1, 0) + Mat4At(A, 0, 2) * Mat4At(B, 2, 0) + Mat4At(A, 0, 3) * Mat4At(B, 3, 0); R m01 = Mat4At(A, 0, 0) * Mat4At(B, 0, 1) + Mat4At(A, 0, 1) * Mat4At(B, 1, 1) + Mat4At(A, 0, 2) * Mat4At(B, 2, 1) + Mat4At(A, 0, 3) * Mat4At(B, 3, 1); R m02 = Mat4At(A, 0, 0) * Mat4At(B, 0, 2) + Mat4At(A, 0, 1) * Mat4At(B, 1, 2) + Mat4At(A, 0, 2) * Mat4At(B, 2, 2) + Mat4At(A, 0, 3) * Mat4At(B, 3, 2); R m03 = Mat4At(A, 0, 0) * Mat4At(B, 0, 3) + Mat4At(A, 0, 1) * Mat4At(B, 1, 3) + Mat4At(A, 0, 2) * Mat4At(B, 2, 3) + Mat4At(A, 0, 3) * Mat4At(B, 3, 3); R m10 = Mat4At(A, 1, 0) * Mat4At(B, 0, 0) + Mat4At(A, 1, 1) * Mat4At(B, 1, 0) + Mat4At(A, 1, 2) * Mat4At(B, 2, 0) + Mat4At(A, 1, 3) * Mat4At(B, 3, 0); R m11 = Mat4At(A, 1, 0) * Mat4At(B, 0, 1) + Mat4At(A, 1, 1) * Mat4At(B, 1, 1) + Mat4At(A, 1, 2) * Mat4At(B, 2, 1) + Mat4At(A, 1, 3) * Mat4At(B, 3, 1); R m12 = Mat4At(A, 1, 0) * Mat4At(B, 0, 2) + Mat4At(A, 1, 1) * Mat4At(B, 1, 2) + Mat4At(A, 1, 2) * Mat4At(B, 2, 2) + Mat4At(A, 1, 3) * Mat4At(B, 3, 2); R m13 = Mat4At(A, 1, 0) * Mat4At(B, 0, 3) + Mat4At(A, 1, 1) * Mat4At(B, 1, 3) + Mat4At(A, 1, 2) * Mat4At(B, 2, 3) + Mat4At(A, 1, 3) * Mat4At(B, 3, 3); R m20 = Mat4At(A, 2, 0) * Mat4At(B, 0, 0) + Mat4At(A, 2, 1) * Mat4At(B, 1, 0) + Mat4At(A, 2, 2) * Mat4At(B, 2, 0) + Mat4At(A, 2, 3) * Mat4At(B, 3, 0); R m21 = Mat4At(A, 2, 0) * Mat4At(B, 0, 1) + Mat4At(A, 2, 1) * Mat4At(B, 1, 1) + Mat4At(A, 2, 2) * Mat4At(B, 2, 1) + Mat4At(A, 2, 3) * Mat4At(B, 3, 1); R m22 = Mat4At(A, 2, 0) * Mat4At(B, 0, 2) + Mat4At(A, 2, 1) * Mat4At(B, 1, 2) + Mat4At(A, 2, 2) * Mat4At(B, 2, 2) + Mat4At(A, 2, 3) * Mat4At(B, 3, 2); R m23 = Mat4At(A, 2, 0) * Mat4At(B, 0, 3) + Mat4At(A, 2, 1) * Mat4At(B, 1, 3) + Mat4At(A, 2, 2) * Mat4At(B, 2, 3) + Mat4At(A, 2, 3) * Mat4At(B, 3, 3); R m30 = Mat4At(A, 3, 0) * Mat4At(B, 0, 0) + Mat4At(A, 3, 1) * Mat4At(B, 1, 0) + Mat4At(A, 3, 2) * Mat4At(B, 2, 0) + Mat4At(A, 3, 3) * Mat4At(B, 3, 0); R m31 = Mat4At(A, 3, 0) * Mat4At(B, 0, 1) + Mat4At(A, 3, 1) * Mat4At(B, 1, 1) + Mat4At(A, 3, 2) * Mat4At(B, 2, 1) + Mat4At(A, 3, 3) * Mat4At(B, 3, 1); R m32 = Mat4At(A, 3, 0) * Mat4At(B, 0, 2) + Mat4At(A, 3, 1) * Mat4At(B, 1, 2) + Mat4At(A, 3, 2) * Mat4At(B, 2, 2) + Mat4At(A, 3, 3) * Mat4At(B, 3, 2); R m33 = Mat4At(A, 3, 0) * Mat4At(B, 0, 3) + Mat4At(A, 3, 1) * Mat4At(B, 1, 3) + Mat4At(A, 3, 2) * Mat4At(B, 2, 3) + Mat4At(A, 3, 3) * Mat4At(B, 3, 3); return Mat4( m00, m01, m02, m03, m10, m11, m12, m13, m20, m21, m22, m23, m30, m31, m32, m33); } static inline sgVec3 Mat4MulVec3(sgMat4 m, sgVec3 v, R w) { return (sgVec3) { .x = Mat4At(m, 0, 0) * v.x + Mat4At(m, 0, 1) * v.y + Mat4At(m, 0, 2) * v.z + Mat4At(m, 0, 3) * w, .y = Mat4At(m, 1, 0) * v.x + Mat4At(m, 1, 1) * v.y + Mat4At(m, 1, 2) * v.z + Mat4At(m, 1, 3) * w, .z = Mat4At(m, 2, 0) * v.x + Mat4At(m, 2, 1) * v.y + Mat4At(m, 2, 2) * v.z + Mat4At(m, 2, 3) * w}; } static inline sgVec4 Mat4MulVec4(sgMat4 m, sgVec4 v) { sgVec4 u; u.x = Mat4At(m, 0, 0) * v.x + Mat4At(m, 0, 1) * v.y + Mat4At(m, 0, 2) * v.z + Mat4At(m, 0, 3) * v.w; u.y = Mat4At(m, 1, 0) * v.x + Mat4At(m, 1, 1) * v.y + Mat4At(m, 1, 2) * v.z + Mat4At(m, 1, 3) * v.w; u.z = Mat4At(m, 2, 0) * v.x + Mat4At(m, 2, 1) * v.y + Mat4At(m, 2, 2) * v.z + Mat4At(m, 2, 3) * v.w; u.w = Mat4At(m, 3, 0) * v.x + Mat4At(m, 3, 1) * v.y + Mat4At(m, 3, 2) * v.z + Mat4At(m, 3, 3) * v.w; return u; } static inline sgMat4 Mat4InverseTransform(sgMat4 m) { const sgVec3 r = Mat4v0(m); const sgVec3 u = Mat4v1(m); const sgVec3 f = Mat4v2(m); const sgVec3 t = Mat4v3(m); return Mat4( r.x, r.y, r.z, -dot3(r, t), u.x, u.y, u.z, -dot3(u, t), f.x, f.y, f.z, -dot3(f, t), 0.f, 0.f, 0.f, 1.f); } static inline sgMat4 Mat4Look(sgVec3 position, sgVec3 forward, sgVec3 up) { const sgVec3 right = normalize3(cross3(forward, up)); up = normalize3(cross3(right, forward)); return Mat4FromVec3(right, up, neg3(forward), position); } static inline sgMat4 Mat4Perspective(R fovy, R aspect, R near, R far) { assert(fovy > 0.f); assert(near < far); const R f = 1.f / tanf(fovy / 2.f); const R a = near - far; return Mat4( f / aspect, 0, 0, 0, 0, f, 0, 0, 0, 0, (far + near) / a, (2 * far * near / a), 0, 0, -1, 0); } static inline sgVec3 PixelToVec3(sgPixel p) { return (sgVec3){(R)p.r / 255.f, (R)p.g / 255.f, (R)p.b / 255.f}; } static inline sgPixel Vec3ToPixel(sgVec3 p, R a) { return (sgPixel){(uint8_t)(p.x * 255.f), (uint8_t)(p.y * 255.f), (uint8_t)(p.z * 255.f), (uint8_t)(a * 255.f)}; } static inline sgVec4 PixelToVec4(sgPixel p) { return (sgVec4){(R)p.r / 255.f, (R)p.g / 255.f, (R)p.b / 255.f, (R)p.a / 255.f}; } static inline sgPixel Vec4ToPixel(sgVec4 p) { return (sgPixel){(uint8_t)(p.x * 255.f), (uint8_t)(p.y * 255.f), (uint8_t)(p.z * 255.f), (uint8_t)(p.w * 255.f)}; } #ifndef _NDEBUG static bool InBounds(int width, int height, int x, int y) { return (0 <= x) && (x < width) && (0 <= y) && (y < height); } #endif // _NDEBUG static inline sgPixel* Pixel(swgfx* gfx, int x, int y) { assert(gfx); assert(gfx->colour); assert(InBounds(gfx->dims.x, gfx->dims.y, x, y)); return gfx->colour + (y * gfx->dims.x) + x; } static inline const R* Depth(swgfx* gfx, int x, int y) { assert(gfx); assert(gfx->depth); assert(InBounds(gfx->dims.x, gfx->dims.y, x, y)); return gfx->depth + (y * gfx->dims.x) + x; } static inline void SetPixelColour(swgfx* gfx, const sgVec2i p, sgPixel colour) { assert(gfx); *Pixel(gfx, p.x, p.y) = colour; #if SWGFX_PROFILING gfx->counters.pixels++; #endif // SWGFX_PROFILING } static inline void SetPixelDeferred(swgfx* gfx, const sgVec2i p, R depth, sgTextureId texid, sgVec2 uv) { assert(gfx); gfx->depth[(p.y * gfx->dims.x) + p.x] = depth; gfx->texture[(p.y * gfx->dims.x) + p.x] = texid; gfx->texcoords[(p.y * gfx->dims.x) + p.x] = uv; #if SWGFX_PROFILING gfx->counters.pixels++; #endif // SWGFX_PROFILING } static inline sgPixel ReadTexture(const sgImage* texture, sgVec2i xy) { assert(texture); assert(texture->pixels); assert(InBounds(texture->width, texture->height, xy.x, xy.y)); return texture->pixels[xy.y * texture->width + xy.x]; } // Output normalized to [0,1]. static inline sgVec4 ReadTextureFloat(const sgImage* texture, sgVec2i xy) { return PixelToVec4(ReadTexture(texture, xy)); } static inline sgVec2i UvToIndex(const sgImage* texture, sgVec2 uv) { assert(texture); return (sgVec2i){ (int)(uv.x * (R)(texture->width - 1)), (int)(uv.y * (R)(texture->height - 1))}; } static inline sgVec2i TextureRepeat(const sgImage* texture, sgVec2i p) { return (sgVec2i){mod(p.x, texture->width), mod(p.y, texture->height)}; } static inline sgPixel FilterNearest(const sgImage* texture, sgVec2 uv) { assert(texture); assert(texture->pixels); const sgVec2i xy = UvToIndex(texture, uv); const sgVec2i xy2 = TextureRepeat(texture, xy); return ReadTexture(texture, xy2); } static inline sgPixel FilterBilinear(const sgImage* texture, sgVec2 uv) { assert(texture); assert(texture->pixels); #define ADDR(x,y) TextureRepeat(texture, (sgVec2i){x,y}) const sgVec2 uv01 = mod2(uv, 1.f); // Find the closest grid vertex, then interpolate the 4 neighbouring pixel // centers. const sgVec2i tl = UvToIndex(texture, uv01); const sgVec2i tr = ADDR(tl.x+1, tl.y); const sgVec2i bl = ADDR(tl.x, tl.y+1); const sgVec2i br = ADDR(tl.x+1, tl.y+1); const sgVec2 t = frac2(uv01); const sgVec4 tl_pix = ReadTextureFloat(texture, tl); const sgVec4 tr_pix = ReadTextureFloat(texture, tr); const sgVec4 bl_pix = ReadTextureFloat(texture, bl); const sgVec4 br_pix = ReadTextureFloat(texture, br); const sgVec4 x1 = lerp4(tl_pix, tr_pix, t.x); const sgVec4 x2 = lerp4(bl_pix, br_pix, t.x); const sgVec4 y = lerp4(x1, x2, t.y); return Vec4ToPixel(y); } // TODO: Mipmapping. // TODO: Clamping and other addressing strategies. static inline sgPixel Sample(const sgImage* texture, sgTextureFilter filter, sgVec2 uv) { switch (filter) { case sgNearest: return FilterNearest(texture, uv); case sgBilinear: return FilterBilinear(texture, uv); default: assert(false); return (sgPixel){0}; } } static inline sgAABB2 TriangleAabb2(sgVec2 p0, sgVec2 p1, sgVec2 p2) { return (sgAABB2){.pmin = min2(min2(p0, p1), p2), .pmax = max2(max2(p0, p1), p2)}; } static inline sgVec2i Clip(const swgfx* gfx, const sgVec2i p) { assert(gfx); constexpr sgVec2i lower = (sgVec2i){0,0}; const sgVec2i upper = (sgVec2i){gfx->viewport.width - 1, gfx->viewport.height - 1}; return max2i(lower, min2i(upper, p)); } static inline R BarycentricInterp(sgVec3 bar, R a, R b, R c) { return bar.x*a + bar.y*b + bar.z*c; } static inline sgVec2 BarycentricInterp2(sgVec3 bar, sgVec2 a, sgVec2 b, sgVec2 c) { return add2(add2(scale2(a, bar.x), scale2(b, bar.y)), scale2(c, bar.z)); } static inline R f(sgVec2 a, sgVec2 b, sgVec2 p) { return (a.y - b.y)*p.x + (b.x - a.x)*p.y + a.x*b.y - b.x*a.y; } static inline sgVec3 Barycentric(sgVec2 p0, sgVec2 p1, sgVec2 p2, sgVec2 p) { // There is no need to compute the third coordinate explicitly: a + b + c = 1. // But this results in a worse rasterization of the triangle along one of the edges. // It seems we can patch it with a small epsilon, though. // --- // Division by zero is only possible if the triangle has zero area. /*return (sgVec3){ f(p1, p2, p) / f(p1, p2, p0), f(p2, p0, p) / f(p2, p0, p1), f(p0, p1, p) / f(p0, p1, p2)};*/ const R b = f(p0, p2, p) / f(p0, p2, p1); const R c = f(p0, p1, p) / f(p0, p1, p2); const R a = /*f(p1, p2, p) / f(p1, p2, p0);*/1.f - b - c - (R)1e-7; return (sgVec3){a,b,c}; } static void DrawTriangle2(swgfx* gfx, const sgTri2* const tri) { assert(gfx); assert(tri); const sgVec2 p0 = (sgVec2){tri->p0.pos.x, tri->p0.pos.y}; const sgVec2 p1 = (sgVec2){tri->p1.pos.x, tri->p1.pos.y}; const sgVec2 p2 = (sgVec2){tri->p2.pos.x, tri->p2.pos.y}; const sgAABB2 bbox = TriangleAabb2(p0, p1, p2); // We consider (x,y) to be the pixel center. // Draw all pixels touched by the bounding box. TODO: Multi-sampling. sgVec2i pmin = (sgVec2i){(int)bbox.pmin.x, (int)bbox.pmin.y}; sgVec2i pmax = (sgVec2i){(int)(bbox.pmax.x + 0.5f), (int)(bbox.pmax.y + 0.5f)}; // Clip to screen space. pmin = Clip(gfx, pmin); pmax = Clip(gfx, pmax); const sgTexture* texture = &gfx->textureRegister[gfx->activeTexture]; // Draw. for (int y = pmin.y; y <= pmax.y; ++y) { for (int x = pmin.x; x <= pmax.x; ++x) { const sgVec2 p = (sgVec2){(R)x, (R)y}; // TODO: there is an incremental optimization to computing barycentric coordinates; // read more about it. const sgVec3 bar = Barycentric(p0, p1, p2, p); // We need to check the third coordinate. // a + b + c = 1 // So, e.g., if a >= 0 and b >= 0, then we have c <= 1, but we could also have c <= 0. // In the case c <= 0, then point is outside the triangle. if ((bar.x >= 0) && (bar.y >= 0) && (bar.z >= 0)) { assert((bar.x + bar.y + bar.z - 1e7) <= 1.f); const sgVec2 uv = BarycentricInterp2(bar, tri->p0.uv, tri->p1.uv, tri->p2.uv); const sgPixel colour = Sample(texture->image, texture->filter, uv); SetPixelColour(gfx, (sgVec2i){x,y}, colour); } } } #if SWGFX_PROFILING gfx->counters.triangles2++; #endif // SWGFX_PROFILING } static inline sgVec4 PerspDivide(sgVec4 v) { return (sgVec4){v.x / v.w, v.y / v.w, v.z / v.w, v.w}; } // TODO: Compute a viewport matrix in sgViewport() instead. static inline sgVec4 ViewportTransform(sgViewport_t vp, sgVec4 ndc) { return (sgVec4){ .x = (ndc.x+1.f) * ((R)vp.width/2.f) + (R)vp.x0, .y = (ndc.y+1.f) * ((R)vp.height/2.f) + (R)vp.y0, .z = ndc.z*0.5f + 0.5f, .w = ndc.w}; } static inline sgVec4 ViewportToWindow(sgViewport_t vp, sgVec4 p) { return (sgVec4){p.x, (R)vp.height - p.y, p.z, p.w}; } /// Line segment-plane intersection special-case for the near camera plane. /// All quantities assumed to be in camera space. /// outP = a + outT*(b-a) static inline R IntersectSegmentPlane(R near, const sgVec3* const a, const sgVec3* const b) { // D = near plane distance = perpendicular distance from the origin to the plane. // o = line origin = a // d = line direction = b-a // Plane normal = (0, 0, +1) --- Could be -1, need to be consistent with D. // Point in plane: p=(0, 0, -near) // <=> p dot n + D = 0 // === -near * n.z + D = 0 // === -near * 1 + D = 0 // === D = near // Denominator = n dot d = (0,0,1) dot d = d.z = (b.z - a.z) const R t = (-near - a->z) / (b->z - a->z); assert(t >= 0.f); assert(t <= 1.f); return t; } /// Interpolate depth and vertex attributes at the in/out vertex 'out'. static void InterpolateAttributes(const sgVert4* const a, const sgVert4* const b, R t, sgVert4* out) { assert(a); assert(b); assert(out); assert(t >= 0.f); assert(t <= 1.f); const sgVec4 d = sub4(b->pos, a->pos); // Line direction. out->pos = add4(a->pos, scale4(d, t)); out->uv = lerp2(a->uv, b->uv, t); } /// Clip a triangle, vertices in clip space. Return the number of output /// triangles. /// /// 4 possible cases: /// 1. All vertices in front of the camera near plane => draw. /// 2. All vertices behind => discard. /// 3. One vertex in front => draw 1 clipped triangle. /// 4. Two vertices in front => draw 2 clipped triangles. static inline int ClipTriangle(R near, const sgTri4* const tri, sgTri4 out[2]) { #define VERTEX(IDX) (&tri->p0)[IDX] #define VALID(X) ((0 <= (X)) && ((X) < 3)) #define IN_FRONT(P) (P.z >= -near) // +Z points into the screen in clip space. const bool f[3] = {IN_FRONT(tri->p0.pos), IN_FRONT(tri->p1.pos), IN_FRONT(tri->p2.pos)}; const int numFront = f[0] + f[1] + f[2]; int numTris; if (numFront == 3) { numTris = 1; out[0] = *tri; } else if (numFront == 2) { numTris = 2; int back = 0; for (; f[back] && (back < 3); ++back) {} assert(VALID(back)); assert(!f[back]); int front[2] = {(back+1)%3, (back+2)%3}; assert(VALID(front[0])); assert(VALID(front[1])); const sgVert4* const backVert = &VERTEX(back); sgVert4 p[2]; for (int i = 0; i < 2; ++i) { const R t = IntersectSegmentPlane(near, (const sgVec3*)&backVert->pos, (const sgVec3*)&VERTEX(front[i]).pos); InterpolateAttributes(backVert, &VERTEX(front[i]), t, &p[i]); } // We must preserve the winding order here for culling. // Note that p[i] corresponds to front[i] = back+(i+1). out[0] = (sgTri4){p[1], p[0], VERTEX(front[1])}; out[1] = (sgTri4){p[0], VERTEX(front[0]), VERTEX(front[1])}; } else if (numFront == 1) { numTris = 1; int front = 0; for (; !f[front] && (front < 3); ++front){} assert(VALID(front)); assert(f[front]); int back[2] = {(front+1)%3, (front+2)%3}; assert(VALID(back[0])); assert(VALID(back[1])); const sgVert4* const frontVert = &VERTEX(front); sgVert4 p[2]; for (int i = 0; i < 2; ++i) { const R t = IntersectSegmentPlane(near, (const sgVec3*)&frontVert->pos, (const sgVec3*)&VERTEX(back[i]).pos); InterpolateAttributes(frontVert, &VERTEX(back[i]), t, &p[i]); } // We must preserve the winding order here for culling. // Note that p[i] corresponds to back[i] = front+(i+1). out[0] = (sgTri4){*frontVert, p[0], p[1]}; } else { numTris = 0; } return numTris; #undef IN_FRONT #undef VALID #undef VERTEX } static inline int TransformTri(const swgfx* gfx, const sgTri3* const tri, sgTri4 out[2]) { assert(gfx); assert(tri); // Model to clip space. const sgVec4 p0_clip = Mat4MulVec4(gfx->mvp, Vec4FromVec3(tri->p0.pos, 1)); const sgVec4 p1_clip = Mat4MulVec4(gfx->mvp, Vec4FromVec3(tri->p1.pos, 1)); const sgVec4 p2_clip = Mat4MulVec4(gfx->mvp, Vec4FromVec3(tri->p2.pos, 1)); const sgTri4 tri_clip = { (sgVert4){ p0_clip, tri->p0.uv }, (sgVert4){ p1_clip, tri->p1.uv }, (sgVert4){ p2_clip, tri->p2.uv }}; // Clip. // Our perspective matrix maps the near plane to z=-1 in clip space. constexpr R near_clip = -1.f; const int numTris = ClipTriangle(near_clip, &tri_clip, out); assert((0 <= numTris) && (numTris <= 2)); for (int i = 0; i < numTris; ++i) { sgTri4* const tri4 = &out[i]; // Perspective divide. const sgVec4 p0_ndc = PerspDivide(tri4->p0.pos); const sgVec4 p1_ndc = PerspDivide(tri4->p1.pos); const sgVec4 p2_ndc = PerspDivide(tri4->p2.pos); // To viewport. const sgVec4 p0_vp = ViewportTransform(gfx->viewport, p0_ndc); const sgVec4 p1_vp = ViewportTransform(gfx->viewport, p1_ndc); const sgVec4 p2_vp = ViewportTransform(gfx->viewport, p2_ndc); // To window. const sgVec4 p0_wn = ViewportToWindow(gfx->viewport, p0_vp); const sgVec4 p1_wn = ViewportToWindow(gfx->viewport, p1_vp); const sgVec4 p2_wn = ViewportToWindow(gfx->viewport, p2_vp); // Output. tri4->p0.pos = p0_wn; tri4->p1.pos = p1_wn; tri4->p2.pos = p2_wn; } return numTris; } static void DrawTriangle3PostClip(swgfx* gfx, const sgTri4* const tri) { assert(gfx); assert(tri); const sgVec4 p0 = tri->p0.pos; const sgVec4 p1 = tri->p1.pos; const sgVec4 p2 = tri->p2.pos; const sgVec2 p0_2d = (sgVec2){p0.x, p0.y}; const sgVec2 p1_2d = (sgVec2){p1.x, p1.y}; const sgVec2 p2_2d = (sgVec2){p2.x, p2.y}; // Backface culling, assume front face = ccw. // In screen space, +Y goes down. // p0p1p2 is ccw <=> p0p1 curls negatively towards p0p2. If the curl is // positive (cw winding), cull. if (curl2(sub2(p1_2d, p0_2d), sub2(p2_2d, p0_2d)) > 0.f) { return; } const sgAABB2 bbox = TriangleAabb2(p0_2d, p1_2d, p2_2d); // We consider (x,y) to be the pixel center. // Draw all pixels touched by the bounding box. TODO: Multi-sampling. sgVec2i pmin = (sgVec2i){(int)bbox.pmin.x, (int)bbox.pmin.y}; sgVec2i pmax = (sgVec2i){(int)(bbox.pmax.x + 0.5f), (int)(bbox.pmax.y + 0.5f)}; // Clip to screen space. pmin = Clip(gfx, pmin); pmax = Clip(gfx, pmax); // Setup for perspective texture mapping. // 'w' is view-space z. const sgVec3 depths = (sgVec3){p0.z, p1.z, p2.z}; const sgVec3 one_over_zs = (sgVec3){1.f / p0.w, 1.f / p1.w, 1.f/ p2.w}; const sgVec3 u_over_zs = (sgVec3){tri->p0.uv.x / p0.w, tri->p1.uv.x / p1.w, tri->p2.uv.x / p2.w}; const sgVec3 v_over_zs = (sgVec3){tri->p0.uv.y / p0.w, tri->p1.uv.y / p1.w, tri->p2.uv.y / p2.w}; // Draw. for (int y = pmin.y; y <= pmax.y; ++y) { for (int x = pmin.x; x <= pmax.x; ++x) { const sgVec2 p = (sgVec2){(R)x, (R)y}; // TODO: there is an incremental optimization to computing barycentric coordinates; // read more about it. const sgVec3 bar = Barycentric(p0_2d, p1_2d, p2_2d, p); // We need to check the third coordinate. // a + b + c = 1 // So, e.g., if a >= 0 and b >= 0, then we have c <= 1, but we could also have c <= 0. // In the case c <= 0, then point is outside the triangle. if ((bar.x >= 0) && (bar.y >= 0) && (bar.z >= 0)) { assert((bar.x + bar.y + bar.z - 1e7) <= 1.f); const R p_one_over_z = dot3(bar, one_over_zs); const R p_u_over_z = dot3(bar, u_over_zs); const R p_v_over_z = dot3(bar, v_over_zs); const R p_depth = dot3(bar, depths); const R z = 1.f / p_one_over_z; const sgVec2 uv = (sgVec2){p_u_over_z * z, p_v_over_z * z}; const R* depth = Depth(gfx, x, y); if ((0.f <= p_depth) && (p_depth <= 1.f) && (p_depth <= *depth)) { // TODO: When doing lighting, need to tone-map here and apply inverse // gamma here. //const sgPixel colour = {(uint8_t)(bar.x*255.f), (uint8_t)(bar.y*255.f), (uint8_t)(bar.z*255.f), 255}; //const sgPixel colour = {(int)(z*255.f), (int)(z*255.f), (int)(z*255.f), 255}; //const sgPixel colour = {255, 0, 255, 255}; //const sgPixel colour = {(int)(uv.x * 255.f), (int)(uv.y * 255.f), 255, 255}; SetPixelDeferred(gfx, (sgVec2i){x,y}, p_depth, gfx->activeTexture, uv); } } } } } static void DrawTriangle3(swgfx* gfx, const sgTri3* const tri) { assert(gfx); assert(tri); sgTri4 tris[2]; const int numTris = TransformTri(gfx, tri, tris); assert((0 <= numTris) && (numTris <= 2)); for (int i = 0; i < numTris; ++i) { DrawTriangle3PostClip(gfx, &tris[i]); } #if SWGFX_PROFILING gfx->counters.triangles3++; #endif // SWGFX_PROFILING } #define is_pow2_or_0(X) ((X & (X - 1)) == 0) #define SG_ALIGN 64 #define SG_ALLOC(PP_MEM, COUNT, TYPE) (TYPE*)Alloc(PP_MEM, COUNT, sizeof(TYPE)) static void* AlignPtr(void* address) { assert(is_pow2_or_0(SG_ALIGN)); constexpr size_t mask = SG_ALIGN - 1; return (void*)(((uintptr_t)address + mask) & ~mask); } static size_t Align(size_t size) { static_assert(is_pow2_or_0(SG_ALIGN)); constexpr size_t mask = SG_ALIGN - 1; return (size + mask) & (~mask); } static void* Alloc(void** ppMem, size_t count, size_t size) { assert(ppMem); assert(*ppMem); assert(*ppMem == AlignPtr(*ppMem)); // Should already be aligned. const size_t total = Align(count * size); void* ptr = *ppMem; *ppMem = ptr + total; memset(ptr, 0, total); return ptr; } size_t sgMem(int width, int height) { const int N = width * height; return Align(sizeof(swgfx)) + Align(N * sizeof(sgPixel)) + // Colour buffer. Align(N * sizeof(R)) + // Depth buffer. Align(N * sizeof(sgTextureId)) + // Texture ID buffer. Align(N * sizeof(sgVec2)) + // Texture coords buffer. Align(SWGFX_TEXTURE_REGISTER_SIZE * sizeof(sgTexture)) + // Texture register. (SG_ALIGN - 1); // To make room to align allocations within the buffer. } swgfx* sgNew(int width, int height, void* mem) { const int N = width * height; void* aligned = AlignPtr(mem); // Uses the extra room we made in sgMem(). swgfx* gfx = SG_ALLOC(&aligned, 1, swgfx); gfx->dims = (sgVec2i){width, height}; gfx->colour = SG_ALLOC(&aligned, N, sgPixel); gfx->depth = SG_ALLOC(&aligned, N, R); gfx->texture = SG_ALLOC(&aligned, N, sgTextureId); gfx->texcoords = SG_ALLOC(&aligned, N, sgVec2); gfx->textureRegister = SG_ALLOC(&aligned, SWGFX_TEXTURE_REGISTER_SIZE, sgTexture); gfx->activeTexture = DefaultTextureId; gfx->defaultPixel = (sgPixel){255, 255, 255, 255}; gfx->defaultImage = (sgImage){ .width = 1, .height = 1, .pixels = &gfx->defaultPixel, }; gfx->textureRegister[DefaultTextureId] = (sgTexture){ .image = &gfx->defaultImage, .filter = sgNearest, }; return gfx; } void sgDel(swgfx** ppSwgfx) { assert(ppSwgfx); if (*ppSwgfx) { *ppSwgfx = nullptr; } } sgPixel* sgColourBuffer(swgfx* gfx) { assert(gfx); return gfx->colour; } void sgPresent(swgfx* gfx, sgVec2i dimensions, sgScreenPixel* screen) { assert(gfx); assert(screen); // Integer scaling only. assert((dimensions.x % gfx->dims.x) == 0); assert((dimensions.y % gfx->dims.y) == 0); const int sx = dimensions.x / gfx->dims.x; const int sy = dimensions.y / gfx->dims.y; const sgPixel* src = gfx->colour; for (int y = 0; y < gfx->dims.y; ++y, src += gfx->dims.x) { // Replicate each row 'sy' times. for (int yy = 0; yy < sy; ++yy) { const sgPixel* src_col = src; for (int x = 0; x < gfx->dims.x; ++x, ++src_col) { // Replicate each column 'sx' times. for (int xx = 0; xx < sx; ++xx, ++screen) { screen->r = src_col->r; screen->g = src_col->g; screen->b = src_col->b; screen->a = src_col->a; } } } } #if SWGFX_PROFILING gfx->counters.frames++; #endif // SWGFX_PROFILING } static void sgUpdateViewProjection(swgfx* gfx) { assert(gfx); gfx->viewProj = Mat4Mul(gfx->proj, gfx->view); } static void sgUpdateMvp(swgfx* gfx) { assert(gfx); gfx->mvp = Mat4Mul(gfx->viewProj, gfx->model); } void sgModelId(swgfx* gfx) { assert(gfx); sgModel(gfx, (sgVec3){0,0,0}, (sgVec3){1, 0, 0}, (sgVec3){0, 1, 0}, (sgVec3){0, 0, 1}); } void sgModel(swgfx* gfx, sgVec3 position, sgVec3 right, sgVec3 up, sgVec3 forward) { assert(gfx); gfx->model = Mat4FromVec3(right, up, forward, position); sgUpdateMvp(gfx); } void sgView(swgfx* gfx, sgVec3 position, sgVec3 forward) { assert(gfx); const sgMat4 camera = Mat4Look(position, forward, Up3); gfx->view = Mat4InverseTransform(camera); sgUpdateViewProjection(gfx); sgUpdateMvp(gfx); } void sgPerspective(swgfx* gfx, R fovy, R aspect, R near, R far) { assert(gfx); gfx->proj = Mat4Perspective(fovy, aspect, near, far); sgUpdateViewProjection(gfx); sgUpdateMvp(gfx); } void sgViewport(swgfx* gfx, int x0, int y0, int width, int height) { assert(gfx); assert(x0 >= 0); assert(y0 >= 0); assert((x0 + width) <= gfx->dims.x); assert((y0 + height) <= gfx->dims.y); gfx->viewport = (sgViewport_t){x0, y0, width, height}; } void sgTextureRegister(swgfx* gfx, sgTextureId id, const sgImage* image, sgTextureFilter filter) { assert(gfx); assert(id < SWGFX_MAX_TEXTURES); assert(id != DefaultTextureId); assert(image); gfx->textureRegister[id] = (sgTexture){image, filter}; } void sgTextureActivate(swgfx* gfx, sgTextureId id) { assert(gfx); assert(id < SWGFX_MAX_TEXTURES); gfx->activeTexture = id; } void sgClear(swgfx* gfx) { assert(gfx); const int N = gfx->dims.x * gfx->dims.y; memset(gfx->colour, 0, N * sizeof(*gfx->colour)); for (int i = 0; i < N; ++i) { gfx->depth[i] = DepthClearValue; } } void sgPixels(swgfx* gfx, size_t count, const sgVec2i* positions, sgPixel colour) { assert(gfx); for (size_t i = 0; i < count; ++i) { SetPixelColour(gfx, positions[i], colour); } } // TODO: DrawTriangle3 with clipping. Leave DrawTriangle2 to not clip for // performance; assume that 2D triangles are within bounds. // TODO: If the triangle is out of bounds, skip entirely. // TODO: Otherwise, rasterize the triangle the simple way and check whether each // individual pixel is within bounds; do not explicitly clip the triangle. // TODO: Actually, I think we can just clip the triangle's AABB and then walk // over those pixels instead of checking every individual pixel in the // non-clipped AABB. Edit: I think this doesn't work; draw it and you'll // see. Some pixels that should be rasterized will fall out of the clipped // AABB. void sgTriangles2(swgfx* gfx, size_t count, const sgTri2* tris) { assert(gfx); for (size_t i = 0; i < count; ++i) { DrawTriangle2(gfx, &tris[i]); } } void sgTriangles(swgfx* gfx, size_t count, const sgTri3* tris, const sgNormal*) { assert(gfx); assert(tris); for (size_t i = 0; i < count; ++i) { const sgTri3* tri = &tris[i]; DrawTriangle3(gfx, tri); } } void sgTrianglesIndexed(swgfx* gfx, size_t numIndices, const sgIdx* indices, const sgVec3* positions, const sgVec2* texcoords) { assert(gfx); assert(indices); assert(positions); assert(texcoords); for (size_t i = 0; i < numIndices; i+=3) { const sgIdx i0 = indices[i]; const sgIdx i1 = indices[i+1]; const sgIdx i2 = indices[i+2]; const sgVec3 p0 = positions[i0]; const sgVec3 p1 = positions[i1]; const sgVec3 p2 = positions[i2]; const sgVec2 uv0 = texcoords[i0]; const sgVec2 uv1 = texcoords[i1]; const sgVec2 uv2 = texcoords[i2]; const sgTri3 tri = (sgTri3){ (sgVert3){p0, uv0}, (sgVert3){p1, uv1}, (sgVert3){p2, uv2}}; DrawTriangle3(gfx, &tri); } } void sgTrianglesIndexedNonUniform(swgfx* gfx, size_t numTris, const sgTriIdx* tris, const sgVec3* positions, const sgVec2* texcoords) { assert(gfx); assert(tris); assert(positions); assert(texcoords); for (size_t t = 0; t < numTris; ++t) { const sgTriIdx* triIdx = &tris[t]; const sgTri3 tri = (sgTri3){ (sgVert3){positions[triIdx->v0.pos], texcoords[triIdx->v0.uv]}, (sgVert3){positions[triIdx->v1.pos], texcoords[triIdx->v1.uv]}, (sgVert3){positions[triIdx->v2.pos], texcoords[triIdx->v2.uv]}}; DrawTriangle3(gfx, &tri); } } static void ImageExp(sgPixel* pixels, int width, int height, R exp) { assert(pixels); for (int i = 0; i < width * height; ++i) { sgPixel* p = &pixels[i]; *p = Vec3ToPixel(exp3(PixelToVec3(*p), exp), p->a); } } void sgLighting(swgfx* gfx) { assert(gfx); const int N = gfx->dims.x * gfx->dims.y; for (int i = 0; i < N; ++i) { const R depth = gfx->depth[i]; if (depth != DepthClearValue) { const sgTextureId texid = gfx->texture[i]; const sgTexture* texture = &gfx->textureRegister[texid]; const sgVec2 uv = gfx->texcoords[i]; sgPixel* colour = &gfx->colour[i]; // TODO: Actual lighting. const sgPixel albedo = Sample(texture->image, texture->filter, uv); *colour = albedo; } } } void sgGamma(swgfx* gfx, sgPixel* pixels, int width, int height) { assert(gfx); assert(pixels); ImageExp(pixels, width, height, 2.2f); } void sgGammaInv(swgfx* gfx, sgPixel* pixels, int width, int height) { assert(gfx); assert(pixels); ImageExp(pixels, width, height, 1.0f/2.2f); } sgCounters sgGetCounters(const swgfx* gfx) { assert(gfx); return gfx->counters; }