Commit f70e197363f21e8734fd28452c19f9f9bf89dc11

Cameron Gutman 2021-02-08T18:31:08

Fix waiting on condition variables with the SRW lock implmentation When SleepConditionVariableSRW() releases the SRW lock internally, it causes our SDL_mutex_srw state to become inconsistent. The lock is unowned yet inside, the owner is still the sleeping thread and more importantly the owner count is still 1. The next time someone acquires the lock, they will bump the owner count from 1 to 2. At that point, the lock is hosed. From the internal lock state, it looks to us like that owner has acquired the lock recursively, even though they have not. When they call SDL_UnlockMutex(), it will see the owner count > 0 and not call ReleaseSRWLockExclusive(). Now when someone calls SDL_CondSignal(), SleepConditionVariableSRW() will start the wakeup process by attempting to re-acquire the SRW lock. This will deadlock because the lock was never released after the other thread had used it. The thread waiting on the condition variable will never be able to wake up, even if the SDL_CondWaitTimeout() function is used and the timeout expires.

diff --git a/src/thread/windows/SDL_syscond_srw.c b/src/thread/windows/SDL_syscond_srw.c
index 139bacc..763a546 100644
--- a/src/thread/windows/SDL_syscond_srw.c
+++ b/src/thread/windows/SDL_syscond_srw.c
@@ -133,6 +133,7 @@ SDL_CondWaitTimeout_srw(SDL_cond * _cond, SDL_mutex * _mutex, Uint32 ms)
     SDL_cond_srw *cond = (SDL_cond_srw *)_cond;
     SDL_mutex_srw *mutex = (SDL_mutex_srw *)_mutex;
     DWORD timeout;
+    int ret;
 
     if (!cond) {
         return SDL_SetError("Passed a NULL condition variable");
@@ -141,7 +142,7 @@ SDL_CondWaitTimeout_srw(SDL_cond * _cond, SDL_mutex * _mutex, Uint32 ms)
         return SDL_SetError("Passed a NULL mutex");
     }
 
-    if (mutex->count != 1) {
+    if (mutex->count != 1 || mutex->owner != GetCurrentThreadId()) {
         return SDL_SetError("Passed mutex is not locked or locked recursively");
     }
 
@@ -151,14 +152,26 @@ SDL_CondWaitTimeout_srw(SDL_cond * _cond, SDL_mutex * _mutex, Uint32 ms)
         timeout = (DWORD) ms;
     }
 
+    /* The mutex must be updated to the released state */
+    mutex->count = 0;
+    mutex->owner = 0;
+
     if (pSleepConditionVariableSRW(&cond->cond, &mutex->srw, timeout, 0) == FALSE) {
         if (GetLastError() == ERROR_TIMEOUT) {
-            return SDL_MUTEX_TIMEDOUT;
+            ret = SDL_MUTEX_TIMEDOUT;
+        } else {
+            ret = SDL_SetError("SleepConditionVariableSRW() failed");
         }
-        return SDL_SetError("SleepConditionVariableSRW() failed");
+    } else {
+        ret = 0;
     }
 
-    return 0;
+    /* The mutex is owned by us again, regardless of status of the wait */
+    SDL_assert(mutex->count == 0 && mutex->owner == 0);
+    mutex->count = 1;
+    mutex->owner = GetCurrentThreadId();
+
+    return ret;
 }
 
 static int
diff --git a/src/thread/windows/SDL_sysmutex.c b/src/thread/windows/SDL_sysmutex.c
index bac5ea5..6bf9f83 100644
--- a/src/thread/windows/SDL_sysmutex.c
+++ b/src/thread/windows/SDL_sysmutex.c
@@ -100,8 +100,9 @@ SDL_LockMutex_srw(SDL_mutex * _mutex)
            so unlocks from other threads will fail.
          */
         pAcquireSRWLockExclusive(&mutex->srw);
+        SDL_assert(mutex->count == 0 && mutex->owner == 0);
         mutex->owner = this_thread;
-        ++mutex->count;
+        mutex->count = 1;
     }
     return 0;
 }
@@ -122,8 +123,9 @@ SDL_TryLockMutex_srw(SDL_mutex * _mutex)
         ++mutex->count;
     } else {
         if (pTryAcquireSRWLockExclusive(&mutex->srw) != 0) {
+            SDL_assert(mutex->count == 0 && mutex->owner == 0);
             mutex->owner = this_thread;
-            ++mutex->count;
+            mutex->count = 1;
         } else {
             retval = SDL_MUTEX_TIMEDOUT;
         }