audiocvt: 5.1 to Stereo conversion utilizing SSE
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
diff --git a/src/audio/SDL_audiocvt.c b/src/audio/SDL_audiocvt.c
index efeb538..7c3e5bf 100644
--- a/src/audio/SDL_audiocvt.c
+++ b/src/audio/SDL_audiocvt.c
@@ -35,6 +35,10 @@
#define DEBUG_AUDIOSTREAM 0
+#ifdef __SSE__
+#define HAVE_SSE_INTRINSICS 1
+#endif
+
#ifdef __SSE3__
#define HAVE_SSE3_INTRINSICS 1
#endif
@@ -97,6 +101,66 @@ SDL_ConvertStereoToMono(SDL_AudioCVT * cvt, SDL_AudioFormat format)
}
+#if HAVE_SSE_INTRINSICS
+/* Convert from 5.1 to stereo. Average left and right, distribute center, discard LFE. */
+static void SDLCALL
+SDL_Convert51ToStereo_SSE(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ float *dst = (float *) cvt->buf;
+ const float *src = dst;
+ int i = cvt->len_cvt / (sizeof (float) * 6);
+ const float two_fifths_f = 1.0f / 2.5f;
+ const __m128 two_fifths_v = _mm_set1_ps(two_fifths_f);
+ const __m128 half = _mm_set1_ps(0.5f);
+
+ LOG_DEBUG_CONVERT("5.1", "stereo (using SSE)");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ /* SDL's 5.1 layout: FL+FR+FC+LFE+BL+BR */
+ /* Just use unaligned load/stores, if the memory at runtime is */
+ /* aligned it'll be just as fast on modern processors */
+ while (i >= 2) {
+ /* Two 5.1 samples (12 floats) fit nicely in three 128bit */
+ /* registers. Using shuffles they can be rearranged so that */
+ /* the conversion math can be vectorized. */
+ __m128 in0 = _mm_loadu_ps(src); /* 0FL 0FR 0FC 0LF */
+ __m128 in1 = _mm_loadu_ps(src + 4); /* 0BL 0BR 1FL 1FR */
+ __m128 in2 = _mm_loadu_ps(src + 8); /* 1FC 1LF 1BL 1BR */
+
+ /* 0FC 0FC 1FC 1FC */
+ __m128 fc_distributed = _mm_mul_ps(half, _mm_shuffle_ps(in0, in2, _MM_SHUFFLE(0, 0, 2, 2)));
+
+ /* 0FL 0FR 1BL 1BR */
+ __m128 blended = _mm_shuffle_ps(in0, in2, _MM_SHUFFLE(3, 2, 1, 0));
+
+ /* 0FL 0FR 1BL 1BR */
+ /* + 0BL 0BR 1FL 1FR */
+ /* = 0L 0R 1L 1R */
+ __m128 out = _mm_add_ps(blended, in1);
+ out = _mm_add_ps(out, fc_distributed);
+ out = _mm_mul_ps(out, two_fifths_v);
+
+ _mm_storeu_ps(dst, out);
+
+ i -= 2; src += 12; dst += 4;
+ }
+
+
+ /* Finish off any leftovers with scalar operations. */
+ while (i) {
+ const float front_center_distributed = src[2] * 0.5f;
+ dst[0] = (src[0] + front_center_distributed + src[4]) * two_fifths_f; /* left */
+ dst[1] = (src[1] + front_center_distributed + src[5]) * two_fifths_f; /* right */
+ i--; src += 6; dst+=2;
+ }
+
+ cvt->len_cvt /= 3;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+#endif
+
/* Convert from 5.1 to stereo. Average left and right, distribute center, discard LFE. */
static void SDLCALL
SDL_Convert51ToStereo(SDL_AudioCVT * cvt, SDL_AudioFormat format)
@@ -1020,7 +1084,19 @@ SDL_BuildAudioCVT(SDL_AudioCVT * cvt,
}
/* [7.1 ->] 5.1 -> Stereo [-> Mono] */
if ((src_channels == 6) && (dst_channels <= 2)) {
- if (SDL_AddAudioCVTFilter(cvt, SDL_Convert51ToStereo) < 0) {
+ SDL_AudioFilter filter = NULL;
+
+ #if HAVE_SSE_INTRINSICS
+ if (SDL_HasSSE()) {
+ filter = SDL_Convert51ToStereo_SSE;
+ }
+ #endif
+
+ if (!filter) {
+ filter = SDL_Convert51ToStereo;
+ }
+
+ if (SDL_AddAudioCVTFilter(cvt, filter) < 0) {
return -1;
}
src_channels = 2;