audio: Resampler optimizations. - Calculate `j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING` once per loop iteration since we use it multiple times. - Do the left-wing loop in two sections: while `srcframe < 0` and then the remaining calculations when `srcframe >= 0`. This bubbles a conditional out of every iteration of a tight loop, giving us a boost. We could _probably_ do this to the right-wing loop too, but it's less straightforward there. - The real win: Use floats instead of doubles. This almost doubles the speed of the entire function on Intel CPUs, and for embedded things without hardware-level support for doubles, the speedup is enormous. This in theory might reduce audio quality, though, and I had to put a check in place to avoid a division-by-zero that we avoided at higher precision, but this is likely to be worth keeping for at least the Sony PSP and other smaller platforms, if not everyone.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
diff --git a/src/audio/SDL_audiocvt.c b/src/audio/SDL_audiocvt.c
index 2682131..6ecc2c9 100644
--- a/src/audio/SDL_audiocvt.c
+++ b/src/audio/SDL_audiocvt.c
@@ -725,46 +725,64 @@ SDL_ResampleAudio(const int chans, const int inrate, const int outrate,
const float *inbuf, const int inbuflen,
float *outbuf, const int outbuflen)
{
- const double finrate = (double) inrate;
- const double outtimeincr = 1.0 / ((float) outrate);
- const double ratio = ((float) outrate) / ((float) inrate);
+ /* Note that this used to be double, but it looks like we can get by with float in most cases at
+ almost twice the speed on Intel processors, and orders of magnitude more
+ on CPUs that need a software fallback for double calculations. */
+ typedef float ResampleFloatType;
+
+ const ResampleFloatType finrate = (ResampleFloatType) inrate;
+ const ResampleFloatType outtimeincr = ((ResampleFloatType) 1.0f) / ((ResampleFloatType) outrate);
+ const ResampleFloatType ratio = ((float) outrate) / ((float) inrate);
const int paddinglen = ResamplerPadding(inrate, outrate);
const int framelen = chans * (int)sizeof (float);
const int inframes = inbuflen / framelen;
const int wantedoutframes = (int) ((inbuflen / framelen) * ratio); /* outbuflen isn't total to write, it's total available. */
const int maxoutframes = outbuflen / framelen;
const int outframes = SDL_min(wantedoutframes, maxoutframes);
+ ResampleFloatType outtime = 0.0f;
float *dst = outbuf;
- double outtime = 0.0;
int i, j, chan;
for (i = 0; i < outframes; i++) {
const int srcindex = (int) (outtime * inrate);
- const double intime = ((double) srcindex) / finrate;
- const double innexttime = ((double) (srcindex + 1)) / finrate;
- const double interpolation1 = 1.0 - ((innexttime - outtime) / (innexttime - intime));
+ const ResampleFloatType intime = ((ResampleFloatType) srcindex) / finrate;
+ const ResampleFloatType innexttime = ((ResampleFloatType) (srcindex + 1)) / finrate;
+ const ResampleFloatType indeltatime = innexttime - intime;
+ const ResampleFloatType interpolation1 = (indeltatime == 0.0f) ? 1.0f : (1.0f - ((innexttime - outtime) / indeltatime));
const int filterindex1 = (int) (interpolation1 * RESAMPLER_SAMPLES_PER_ZERO_CROSSING);
- const double interpolation2 = 1.0 - interpolation1;
+ const ResampleFloatType interpolation2 = 1.0f - interpolation1;
const int filterindex2 = (int) (interpolation2 * RESAMPLER_SAMPLES_PER_ZERO_CROSSING);
for (chan = 0; chan < chans; chan++) {
float outsample = 0.0f;
/* do this twice to calculate the sample, once for the "left wing" and then same for the right. */
- /* !!! FIXME: do both wings in one loop */
- for (j = 0; (filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) < RESAMPLER_FILTER_SIZE; j++) {
+
+ /* Left wing! split the "srcframe < 0" condition out into a preloop. */
+ for (j = 0; srcindex < j; j++) {
+ const int jsamples = j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING;
+ const int srcframe = srcindex - j;
+ const float insample = lpadding[((paddinglen + srcframe) * chans) + chan];
+ outsample += (float)(insample * (ResamplerFilter[filterindex1 + jsamples] + (interpolation1 * ResamplerFilterDifference[filterindex1 + jsamples])));
+ }
+
+ /* Finish the left wing now that srcframe >= 0 */
+ for (; (filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) < RESAMPLER_FILTER_SIZE; j++) {
+ const int jsamples = j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING;
const int srcframe = srcindex - j;
- /* !!! FIXME: we can bubble this conditional out of here by doing a pre loop. */
- const float insample = (srcframe < 0) ? lpadding[((paddinglen + srcframe) * chans) + chan] : inbuf[(srcframe * chans) + chan];
- outsample += (float)(insample * (ResamplerFilter[filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)] + (interpolation1 * ResamplerFilterDifference[filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)])));
+ const float insample = inbuf[(srcframe * chans) + chan];
+ outsample += (float)(insample * (ResamplerFilter[filterindex1 + jsamples] + (interpolation1 * ResamplerFilterDifference[filterindex1 + jsamples])));
}
+ /* Do the right wing! */
for (j = 0; (filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) < RESAMPLER_FILTER_SIZE; j++) {
+ const int jsamples = j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING;
const int srcframe = srcindex + 1 + j;
/* !!! FIXME: we can bubble this conditional out of here by doing a post loop. */
const float insample = (srcframe >= inframes) ? rpadding[((srcframe - inframes) * chans) + chan] : inbuf[(srcframe * chans) + chan];
- outsample += (float)(insample * (ResamplerFilter[filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)] + (interpolation2 * ResamplerFilterDifference[filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)])));
+ outsample += (float)(insample * (ResamplerFilter[filterindex2 + jsamples] + (interpolation2 * ResamplerFilterDifference[filterindex2 + jsamples])));
}
+
*(dst++) = outsample;
}