From 5a079a2d114f96d4847d1ee305d5b7c16eeec50e Mon Sep 17 00:00:00 2001 From: 3gg <3gg@shellblade.net> Date: Sat, 27 Dec 2025 12:03:39 -0800 Subject: Initial commit --- contrib/SDL-3.2.8/src/audio/SDL_audioresample.c | 706 ++++++++++++++++++++++++ 1 file changed, 706 insertions(+) create mode 100644 contrib/SDL-3.2.8/src/audio/SDL_audioresample.c (limited to 'contrib/SDL-3.2.8/src/audio/SDL_audioresample.c') diff --git a/contrib/SDL-3.2.8/src/audio/SDL_audioresample.c b/contrib/SDL-3.2.8/src/audio/SDL_audioresample.c new file mode 100644 index 0000000..371002e --- /dev/null +++ b/contrib/SDL-3.2.8/src/audio/SDL_audioresample.c @@ -0,0 +1,706 @@ +/* + Simple DirectMedia Layer + Copyright (C) 1997-2025 Sam Lantinga + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. +*/ +#include "SDL_internal.h" + +#include "SDL_sysaudio.h" + +#include "SDL_audioresample.h" + +// SDL's resampler uses a "bandlimited interpolation" algorithm: +// https://ccrma.stanford.edu/~jos/resample/ + +// TODO: Support changing this at runtime? +#if defined(SDL_SSE_INTRINSICS) || defined(SDL_NEON_INTRINSICS) +// In , SSE is basically mandatory anyway +// We want RESAMPLER_SAMPLES_PER_FRAME to be a multiple of 4, to make SIMD easier +#define RESAMPLER_ZERO_CROSSINGS 6 +#else +#define RESAMPLER_ZERO_CROSSINGS 5 +#endif + +#define RESAMPLER_SAMPLES_PER_FRAME (RESAMPLER_ZERO_CROSSINGS * 2) + +// For a given srcpos, `srcpos + frame` are sampled, where `-RESAMPLER_ZERO_CROSSINGS < frame <= RESAMPLER_ZERO_CROSSINGS`. +// Note, when upsampling, it is also possible to start sampling from `srcpos = -1`. +#define RESAMPLER_MAX_PADDING_FRAMES (RESAMPLER_ZERO_CROSSINGS + 1) + +// More bits gives more precision, at the cost of a larger table. +#define RESAMPLER_BITS_PER_ZERO_CROSSING 3 +#define RESAMPLER_SAMPLES_PER_ZERO_CROSSING (1 << RESAMPLER_BITS_PER_ZERO_CROSSING) +#define RESAMPLER_FILTER_INTERP_BITS (32 - RESAMPLER_BITS_PER_ZERO_CROSSING) +#define RESAMPLER_FILTER_INTERP_RANGE (1 << RESAMPLER_FILTER_INTERP_BITS) + +// ResampleFrame is just a vector/matrix/matrix multiplication. +// It performs cubic interpolation of the filter, then multiplies that with the input. +// dst = [1, frac, frac^2, frac^3] * filter * src + +// Cubic Polynomial +typedef union Cubic +{ + float v[4]; + +#ifdef SDL_SSE_INTRINSICS + // Aligned loads can be used directly as memory operands for mul/add + __m128 v128; +#endif + +#ifdef SDL_NEON_INTRINSICS + float32x4_t v128; +#endif + +} Cubic; + +static void ResampleFrame_Generic(const float *src, float *dst, const Cubic *filter, float frac, int chans) +{ + const float frac2 = frac * frac; + const float frac3 = frac * frac2; + + int i, chan; + float scales[RESAMPLER_SAMPLES_PER_FRAME]; + + for (i = 0; i < RESAMPLER_SAMPLES_PER_FRAME; ++i, ++filter) { + scales[i] = filter->v[0] + (filter->v[1] * frac) + (filter->v[2] * frac2) + (filter->v[3] * frac3); + } + + for (chan = 0; chan < chans; ++chan) { + float out = 0.0f; + + for (i = 0; i < RESAMPLER_SAMPLES_PER_FRAME; ++i) { + out += src[i * chans + chan] * scales[i]; + } + + dst[chan] = out; + } +} + +static void ResampleFrame_Mono(const float *src, float *dst, const Cubic *filter, float frac, int chans) +{ + const float frac2 = frac * frac; + const float frac3 = frac * frac2; + + int i; + float out = 0.0f; + + for (i = 0; i < RESAMPLER_SAMPLES_PER_FRAME; ++i, ++filter) { + // Interpolate between the nearest two filters + const float scale = filter->v[0] + (filter->v[1] * frac) + (filter->v[2] * frac2) + (filter->v[3] * frac3); + + out += src[i] * scale; + } + + dst[0] = out; +} + +static void ResampleFrame_Stereo(const float *src, float *dst, const Cubic *filter, float frac, int chans) +{ + const float frac2 = frac * frac; + const float frac3 = frac * frac2; + + int i; + float out0 = 0.0f; + float out1 = 0.0f; + + for (i = 0; i < RESAMPLER_SAMPLES_PER_FRAME; ++i, ++filter) { + // Interpolate between the nearest two filters + const float scale = filter->v[0] + (filter->v[1] * frac) + (filter->v[2] * frac2) + (filter->v[3] * frac3); + + out0 += src[i * 2 + 0] * scale; + out1 += src[i * 2 + 1] * scale; + } + + dst[0] = out0; + dst[1] = out1; +} + +#ifdef SDL_SSE_INTRINSICS +#define sdl_madd_ps(a, b, c) _mm_add_ps(a, _mm_mul_ps(b, c)) // Not-so-fused multiply-add + +static void SDL_TARGETING("sse") ResampleFrame_Generic_SSE(const float *src, float *dst, const Cubic *filter, float frac, int chans) +{ +#if RESAMPLER_SAMPLES_PER_FRAME != 12 +#error Invalid samples per frame +#endif + + __m128 f0, f1, f2; + + { + const __m128 frac1 = _mm_set1_ps(frac); + const __m128 frac2 = _mm_mul_ps(frac1, frac1); + const __m128 frac3 = _mm_mul_ps(frac1, frac2); + +// Transposed in SetupAudioResampler +// Explicitly use _mm_load_ps to workaround ICE in GCC 4.9.4 accessing Cubic.v128 +#define X(out) \ + out = _mm_load_ps(filter[0].v); \ + out = sdl_madd_ps(out, frac1, _mm_load_ps(filter[1].v)); \ + out = sdl_madd_ps(out, frac2, _mm_load_ps(filter[2].v)); \ + out = sdl_madd_ps(out, frac3, _mm_load_ps(filter[3].v)); \ + filter += 4 + + X(f0); + X(f1); + X(f2); + +#undef X + } + + if (chans == 2) { + // Duplicate each of the filter elements and multiply by the input + // Use two accumulators to improve throughput + __m128 out0 = _mm_mul_ps(_mm_loadu_ps(src + 0), _mm_unpacklo_ps(f0, f0)); + __m128 out1 = _mm_mul_ps(_mm_loadu_ps(src + 4), _mm_unpackhi_ps(f0, f0)); + out0 = sdl_madd_ps(out0, _mm_loadu_ps(src + 8), _mm_unpacklo_ps(f1, f1)); + out1 = sdl_madd_ps(out1, _mm_loadu_ps(src + 12), _mm_unpackhi_ps(f1, f1)); + out0 = sdl_madd_ps(out0, _mm_loadu_ps(src + 16), _mm_unpacklo_ps(f2, f2)); + out1 = sdl_madd_ps(out1, _mm_loadu_ps(src + 20), _mm_unpackhi_ps(f2, f2)); + + // Add the accumulators together + __m128 out = _mm_add_ps(out0, out1); + + // Add the lower and upper pairs together + out = _mm_add_ps(out, _mm_movehl_ps(out, out)); + + // Store the result + _mm_storel_pi((__m64 *)dst, out); + return; + } + + if (chans == 1) { + // Multiply the filter by the input + __m128 out = _mm_mul_ps(f0, _mm_loadu_ps(src + 0)); + out = sdl_madd_ps(out, f1, _mm_loadu_ps(src + 4)); + out = sdl_madd_ps(out, f2, _mm_loadu_ps(src + 8)); + + // Horizontal sum + __m128 shuf = _mm_shuffle_ps(out, out, _MM_SHUFFLE(2, 3, 0, 1)); + out = _mm_add_ps(out, shuf); + out = _mm_add_ss(out, _mm_movehl_ps(shuf, out)); + + _mm_store_ss(dst, out); + return; + } + + int chan = 0; + + // Process 4 channels at once + for (; chan + 4 <= chans; chan += 4) { + const float *in = &src[chan]; + __m128 out0 = _mm_setzero_ps(); + __m128 out1 = _mm_setzero_ps(); + +#define X(a, b, out) \ + out = sdl_madd_ps(out, _mm_loadu_ps(in), _mm_shuffle_ps(a, a, _MM_SHUFFLE(b, b, b, b))); \ + in += chans + +#define Y(a) \ + X(a, 0, out0); \ + X(a, 1, out1); \ + X(a, 2, out0); \ + X(a, 3, out1) + + Y(f0); + Y(f1); + Y(f2); + +#undef X +#undef Y + + // Add the accumulators together + __m128 out = _mm_add_ps(out0, out1); + + _mm_storeu_ps(&dst[chan], out); + } + + // Process the remaining channels one at a time. + // Channel counts 1,2,4,8 are already handled above, leaving 3,5,6,7 to deal with (looping 3,1,2,3 times). + // Without vgatherdps (AVX2), this gets quite messy. + for (; chan < chans; ++chan) { + const float *in = &src[chan]; + __m128 v0, v1, v2; + +#define X(x) \ + x = _mm_unpacklo_ps(_mm_load_ss(in), _mm_load_ss(in + chans)); \ + in += chans + chans; \ + x = _mm_movelh_ps(x, _mm_unpacklo_ps(_mm_load_ss(in), _mm_load_ss(in + chans))); \ + in += chans + chans + + X(v0); + X(v1); + X(v2); + +#undef X + + __m128 out = _mm_mul_ps(f0, v0); + out = sdl_madd_ps(out, f1, v1); + out = sdl_madd_ps(out, f2, v2); + + // Horizontal sum + __m128 shuf = _mm_shuffle_ps(out, out, _MM_SHUFFLE(2, 3, 0, 1)); + out = _mm_add_ps(out, shuf); + out = _mm_add_ss(out, _mm_movehl_ps(shuf, out)); + + _mm_store_ss(&dst[chan], out); + } +} + +#undef sdl_madd_ps +#endif + +#ifdef SDL_NEON_INTRINSICS +static void ResampleFrame_Generic_NEON(const float *src, float *dst, const Cubic *filter, float frac, int chans) +{ +#if RESAMPLER_SAMPLES_PER_FRAME != 12 +#error Invalid samples per frame +#endif + + float32x4_t f0, f1, f2; + + { + const float32x4_t frac1 = vdupq_n_f32(frac); + const float32x4_t frac2 = vmulq_f32(frac1, frac1); + const float32x4_t frac3 = vmulq_f32(frac1, frac2); + +// Transposed in SetupAudioResampler +#define X(out) \ + out = vmlaq_f32(vmlaq_f32(vmlaq_f32(filter[0].v128, filter[1].v128, frac1), filter[2].v128, frac2), filter[3].v128, frac3); \ + filter += 4 + + X(f0); + X(f1); + X(f2); + +#undef X + } + + if (chans == 2) { + float32x4x2_t g0 = vzipq_f32(f0, f0); + float32x4x2_t g1 = vzipq_f32(f1, f1); + float32x4x2_t g2 = vzipq_f32(f2, f2); + + // Duplicate each of the filter elements and multiply by the input + // Use two accumulators to improve throughput + float32x4_t out0 = vmulq_f32(vld1q_f32(src + 0), g0.val[0]); + float32x4_t out1 = vmulq_f32(vld1q_f32(src + 4), g0.val[1]); + out0 = vmlaq_f32(out0, vld1q_f32(src + 8), g1.val[0]); + out1 = vmlaq_f32(out1, vld1q_f32(src + 12), g1.val[1]); + out0 = vmlaq_f32(out0, vld1q_f32(src + 16), g2.val[0]); + out1 = vmlaq_f32(out1, vld1q_f32(src + 20), g2.val[1]); + + // Add the accumulators together + out0 = vaddq_f32(out0, out1); + + // Add the lower and upper pairs together + float32x2_t out = vadd_f32(vget_low_f32(out0), vget_high_f32(out0)); + + // Store the result + vst1_f32(dst, out); + return; + } + + if (chans == 1) { + // Multiply the filter by the input + float32x4_t out = vmulq_f32(f0, vld1q_f32(src + 0)); + out = vmlaq_f32(out, f1, vld1q_f32(src + 4)); + out = vmlaq_f32(out, f2, vld1q_f32(src + 8)); + + // Horizontal sum + float32x2_t sum = vadd_f32(vget_low_f32(out), vget_high_f32(out)); + sum = vpadd_f32(sum, sum); + + vst1_lane_f32(dst, sum, 0); + return; + } + + int chan = 0; + + // Process 4 channels at once + for (; chan + 4 <= chans; chan += 4) { + const float *in = &src[chan]; + float32x4_t out0 = vdupq_n_f32(0); + float32x4_t out1 = vdupq_n_f32(0); + +#define X(a, b, out) \ + out = vmlaq_f32(out, vld1q_f32(in), vdupq_lane_f32(a, b)); \ + in += chans + +#define Y(a) \ + X(vget_low_f32(a), 0, out0); \ + X(vget_low_f32(a), 1, out1); \ + X(vget_high_f32(a), 0, out0); \ + X(vget_high_f32(a), 1, out1) + + Y(f0); + Y(f1); + Y(f2); + +#undef X +#undef Y + + // Add the accumulators together + float32x4_t out = vaddq_f32(out0, out1); + + vst1q_f32(&dst[chan], out); + } + + // Process the remaining channels one at a time. + // Channel counts 1,2,4,8 are already handled above, leaving 3,5,6,7 to deal with (looping 3,1,2,3 times). + for (; chan < chans; ++chan) { + const float *in = &src[chan]; + float32x4_t v0, v1, v2; + +#define X(x) \ + x = vld1q_dup_f32(in); \ + in += chans; \ + x = vld1q_lane_f32(in, x, 1); \ + in += chans; \ + x = vld1q_lane_f32(in, x, 2); \ + in += chans; \ + x = vld1q_lane_f32(in, x, 3); \ + in += chans + + X(v0); + X(v1); + X(v2); + +#undef X + + float32x4_t out = vmulq_f32(f0, v0); + out = vmlaq_f32(out, f1, v1); + out = vmlaq_f32(out, f2, v2); + + // Horizontal sum + float32x2_t sum = vadd_f32(vget_low_f32(out), vget_high_f32(out)); + sum = vpadd_f32(sum, sum); + + vst1_lane_f32(&dst[chan], sum, 0); + } +} +#endif + +// Calculate the cubic equation which passes through all four points. +// https://en.wikipedia.org/wiki/Ordinary_least_squares +// https://en.wikipedia.org/wiki/Polynomial_regression +static void CubicLeastSquares(Cubic *coeffs, float y0, float y1, float y2, float y3) +{ + // Least squares matrix for xs = [0, 1/3, 2/3, 1] + // [ 1.0 0.0 0.0 0.0 ] + // [ -5.5 9.0 -4.5 1.0 ] + // [ 9.0 -22.5 18.0 -4.5 ] + // [ -4.5 13.5 -13.5 4.5 ] + + coeffs->v[0] = y0; + coeffs->v[1] = -5.5f * y0 + 9.0f * y1 - 4.5f * y2 + y3; + coeffs->v[2] = 9.0f * y0 - 22.5f * y1 + 18.0f * y2 - 4.5f * y3; + coeffs->v[3] = -4.5f * y0 + 13.5f * y1 - 13.5f * y2 + 4.5f * y3; +} + +// Zeroth-order modified Bessel function of the first kind +// https://mathworld.wolfram.com/ModifiedBesselFunctionoftheFirstKind.html +static float BesselI0(float x) +{ + float sum = 0.0f; + float i = 1.0f; + float t = 1.0f; + x *= x * 0.25f; + + while (t >= sum * SDL_FLT_EPSILON) { + sum += t; + t *= x / (i * i); + ++i; + } + + return sum; +} + +// Pre-calculate 180 degrees of sin(pi * x) / pi +// The speedup from this isn't huge, but it also avoids precision issues. +// If sinf isn't available, SDL_sinf just calls SDL_sin. +// Know what SDL_sin(SDL_PI_F) equals? Not quite zero. +static void SincTable(float *table, int len) +{ + int i; + + for (i = 0; i < len; ++i) { + table[i] = SDL_sinf(i * (SDL_PI_F / len)) / SDL_PI_F; + } +} + +// Calculate Sinc(x/y), using a lookup table +static float Sinc(const float *table, int x, int y) +{ + float s = table[x % y]; + s = ((x / y) & 1) ? -s : s; + return (s * y) / x; +} + +static Cubic ResamplerFilter[RESAMPLER_SAMPLES_PER_ZERO_CROSSING][RESAMPLER_SAMPLES_PER_FRAME]; + +static void GenerateResamplerFilter(void) +{ + enum + { + // Generate samples at 3x the target resolution, so that we have samples at [0, 1/3, 2/3, 1] of each position + TABLE_SAMPLES_PER_ZERO_CROSSING = RESAMPLER_SAMPLES_PER_ZERO_CROSSING * 3, + TABLE_SIZE = RESAMPLER_ZERO_CROSSINGS * TABLE_SAMPLES_PER_ZERO_CROSSING, + }; + + // if dB > 50, beta=(0.1102 * (dB - 8.7)), according to Matlab. + const float dB = 80.0f; + const float beta = 0.1102f * (dB - 8.7f); + const float bessel_beta = BesselI0(beta); + const float lensqr = TABLE_SIZE * TABLE_SIZE; + + int i, j; + + float sinc[TABLE_SAMPLES_PER_ZERO_CROSSING]; + SincTable(sinc, TABLE_SAMPLES_PER_ZERO_CROSSING); + + // Generate one wing of the filter + // https://en.wikipedia.org/wiki/Kaiser_window + // https://en.wikipedia.org/wiki/Whittaker%E2%80%93Shannon_interpolation_formula + float filter[TABLE_SIZE + 1]; + filter[0] = 1.0f; + + for (i = 1; i <= TABLE_SIZE; ++i) { + float b = BesselI0(beta * SDL_sqrtf((lensqr - (i * i)) / lensqr)) / bessel_beta; + float s = Sinc(sinc, i, TABLE_SAMPLES_PER_ZERO_CROSSING); + filter[i] = b * s; + } + + // Generate the coefficients for each point + // When interpolating, the fraction represents how far we are between input samples, + // so we need to align the filter by "moving" it to the right. + // + // For the left wing, this means interpolating "forwards" (away from the center) + // For the right wing, this means interpolating "backwards" (towards the center) + // + // The center of the filter is at the end of the left wing (RESAMPLER_ZERO_CROSSINGS - 1) + // The left wing is the filter, but reversed + // The right wing is the filter, but offset by 1 + // + // Since the right wing is offset by 1, this just means we interpolate backwards + // between the same points, instead of forwards + // interp(p[n], p[n+1], t) = interp(p[n+1], p[n+1-1], 1 - t) = interp(p[n+1], p[n], 1 - t) + for (i = 0; i < RESAMPLER_SAMPLES_PER_ZERO_CROSSING; ++i) { + for (j = 0; j < RESAMPLER_ZERO_CROSSINGS; ++j) { + const float *ys = &filter[((j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING) + i) * 3]; + + Cubic *fwd = &ResamplerFilter[i][RESAMPLER_ZERO_CROSSINGS - j - 1]; + Cubic *rev = &ResamplerFilter[RESAMPLER_SAMPLES_PER_ZERO_CROSSING - i - 1][RESAMPLER_ZERO_CROSSINGS + j]; + + // Calculate the cubic equation of the 4 points + CubicLeastSquares(fwd, ys[0], ys[1], ys[2], ys[3]); + CubicLeastSquares(rev, ys[3], ys[2], ys[1], ys[0]); + } + } +} + +typedef void (*ResampleFrameFunc)(const float *src, float *dst, const Cubic *filter, float frac, int chans); +static ResampleFrameFunc ResampleFrame[8]; + +// Transpose 4x4 floats +static void Transpose4x4(Cubic *data) +{ + int i, j; + + Cubic temp[4] = { data[0], data[1], data[2], data[3] }; + + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) { + data[i].v[j] = temp[j].v[i]; + } + } +} + +static void SetupAudioResampler(void) +{ + int i, j; + bool transpose = false; + + GenerateResamplerFilter(); + +#ifdef SDL_SSE_INTRINSICS + if (SDL_HasSSE()) { + for (i = 0; i < 8; ++i) { + ResampleFrame[i] = ResampleFrame_Generic_SSE; + } + transpose = true; + } else +#endif +#ifdef SDL_NEON_INTRINSICS + if (SDL_HasNEON()) { + for (i = 0; i < 8; ++i) { + ResampleFrame[i] = ResampleFrame_Generic_NEON; + } + transpose = true; + } else +#endif + { + for (i = 0; i < 8; ++i) { + ResampleFrame[i] = ResampleFrame_Generic; + } + + ResampleFrame[0] = ResampleFrame_Mono; + ResampleFrame[1] = ResampleFrame_Stereo; + } + + if (transpose) { + // Transpose each set of 4 coefficients, to reduce work when resampling + for (i = 0; i < RESAMPLER_SAMPLES_PER_ZERO_CROSSING; ++i) { + for (j = 0; j + 4 <= RESAMPLER_SAMPLES_PER_FRAME; j += 4) { + Transpose4x4(&ResamplerFilter[i][j]); + } + } + } +} + +void SDL_SetupAudioResampler(void) +{ + static SDL_InitState init; + + if (SDL_ShouldInit(&init)) { + SetupAudioResampler(); + SDL_SetInitialized(&init, true); + } +} + +Sint64 SDL_GetResampleRate(int src_rate, int dst_rate) +{ + SDL_assert(src_rate > 0); + SDL_assert(dst_rate > 0); + + Sint64 numerator = (Sint64)src_rate << 32; + Sint64 denominator = (Sint64)dst_rate; + + // Generally it's expected that `dst_frames = (src_frames * dst_rate) / src_rate` + // To match this as closely as possible without infinite precision, always round up the resample rate. + // For example, without rounding up, a sample ratio of 2:3 would have `sample_rate = 0xAAAAAAAA` + // After 3 frames, the position would be 0x1.FFFFFFFE, meaning we haven't fully consumed the second input frame. + // By rounding up to 0xAAAAAAAB, we would instead reach 0x2.00000001, fulling consuming the second frame. + // Technically you could say this is kicking the can 0x100000000 steps down the road, but I'm fine with that :) + // sample_rate = div_ceil(numerator, denominator) + Sint64 sample_rate = ((numerator - 1) / denominator) + 1; + + SDL_assert(sample_rate > 0); + + return sample_rate; +} + +int SDL_GetResamplerHistoryFrames(void) +{ + // Even if we aren't currently resampling, make sure to keep enough history in case we need to later. + + return RESAMPLER_MAX_PADDING_FRAMES; +} + +int SDL_GetResamplerPaddingFrames(Sint64 resample_rate) +{ + // This must always be <= SDL_GetResamplerHistoryFrames() + + return resample_rate ? RESAMPLER_MAX_PADDING_FRAMES : 0; +} + +// These are not general purpose. They do not check for all possible underflow/overflow +SDL_FORCE_INLINE bool ResamplerAdd(Sint64 a, Sint64 b, Sint64 *ret) +{ + if ((b > 0) && (a > SDL_MAX_SINT64 - b)) { + return false; + } + + *ret = a + b; + return true; +} + +SDL_FORCE_INLINE bool ResamplerMul(Sint64 a, Sint64 b, Sint64 *ret) +{ + if ((b > 0) && (a > SDL_MAX_SINT64 / b)) { + return false; + } + + *ret = a * b; + return true; +} + +Sint64 SDL_GetResamplerInputFrames(Sint64 output_frames, Sint64 resample_rate, Sint64 resample_offset) +{ + // Calculate the index of the last input frame, then add 1. + // ((((output_frames - 1) * resample_rate) + resample_offset) >> 32) + 1 + + Sint64 output_offset; + if (!ResamplerMul(output_frames, resample_rate, &output_offset) || + !ResamplerAdd(output_offset, -resample_rate + resample_offset + 0x100000000, &output_offset)) { + output_offset = SDL_MAX_SINT64; + } + + Sint64 input_frames = (Sint64)(Sint32)(output_offset >> 32); + input_frames = SDL_max(input_frames, 0); + + return input_frames; +} + +Sint64 SDL_GetResamplerOutputFrames(Sint64 input_frames, Sint64 resample_rate, Sint64 *inout_resample_offset) +{ + Sint64 resample_offset = *inout_resample_offset; + + // input_offset = (input_frames << 32) - resample_offset; + Sint64 input_offset; + if (!ResamplerMul(input_frames, 0x100000000, &input_offset) || + !ResamplerAdd(input_offset, -resample_offset, &input_offset)) { + input_offset = SDL_MAX_SINT64; + } + + // output_frames = div_ceil(input_offset, resample_rate) + Sint64 output_frames = (input_offset > 0) ? ((input_offset - 1) / resample_rate) + 1 : 0; + + *inout_resample_offset = (output_frames * resample_rate) - input_offset; + + return output_frames; +} + +void SDL_ResampleAudio(int chans, const float *src, int inframes, float *dst, int outframes, + Sint64 resample_rate, Sint64 *inout_resample_offset) +{ + int i; + Sint64 srcpos = *inout_resample_offset; + ResampleFrameFunc resample_frame = ResampleFrame[chans - 1]; + + SDL_assert(resample_rate > 0); + + src -= (RESAMPLER_ZERO_CROSSINGS - 1) * chans; + + for (i = 0; i < outframes; ++i) { + int srcindex = (int)(Sint32)(srcpos >> 32); + Uint32 srcfraction = (Uint32)(srcpos & 0xFFFFFFFF); + srcpos += resample_rate; + + SDL_assert(srcindex >= -1 && srcindex < inframes); + + const Cubic *filter = ResamplerFilter[srcfraction >> RESAMPLER_FILTER_INTERP_BITS]; + const float frac = (float)(srcfraction & (RESAMPLER_FILTER_INTERP_RANGE - 1)) * (1.0f / RESAMPLER_FILTER_INTERP_RANGE); + + const float *frame = &src[srcindex * chans]; + resample_frame(frame, dst, filter, frac, chans); + + dst += chans; + } + + *inout_resample_offset = srcpos - ((Sint64)inframes << 32); +} -- cgit v1.2.3