From f4d76853b9d921e3645295f9bebc39eb73661e67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20J=2E=20Est=C3=A9banez?= Date: Thu, 18 Jul 2024 14:54:58 +0200 Subject: [PATCH] WorkerThreadPool (plus friends): Overhaul unlock allowance zones This fixes a rare but possible deadlock, maybe due to undefined behavior. The new implementation is safer, at the cost of some added boilerplate. --- SConstruct | 2 +- core/io/resource_loader.cpp | 6 ++- core/io/resource_loader.h | 3 ++ core/object/worker_thread_pool.cpp | 60 +++++++++-------------- core/object/worker_thread_pool.h | 19 +++++--- core/os/condition_variable.h | 8 ++- core/os/mutex.h | 9 +++- core/os/safe_binary_mutex.h | 76 ++++++++++++++--------------- core/templates/command_queue_mt.cpp | 8 --- core/templates/command_queue_mt.h | 18 +++---- modules/gdscript/gdscript_cache.cpp | 10 +++- modules/gdscript/gdscript_cache.h | 9 +++- 12 files changed, 118 insertions(+), 110 deletions(-) diff --git a/SConstruct b/SConstruct index ca160c1762b..73e95896ebb 100644 --- a/SConstruct +++ b/SConstruct @@ -861,7 +861,7 @@ else: # GCC, Clang if cc_version_major >= 11: # Broke on MethodBind templates before GCC 11. env.Append(CCFLAGS=["-Wlogical-op"]) elif methods.using_clang(env) or methods.using_emcc(env): - env.Append(CCFLAGS=["-Wimplicit-fallthrough"]) + env.Append(CCFLAGS=["-Wimplicit-fallthrough", "-Wno-undefined-var-template"]) elif env["warnings"] == "all": env.Append(CCFLAGS=["-Wall"] + common_warnings) elif env["warnings"] == "moderate": diff --git a/core/io/resource_loader.cpp b/core/io/resource_loader.cpp index a7c5c80ad29..eaee849a876 100644 --- a/core/io/resource_loader.cpp +++ b/core/io/resource_loader.cpp @@ -1357,8 +1357,12 @@ thread_local int ResourceLoader::load_nesting = 0; thread_local Vector ResourceLoader::load_paths_stack; thread_local HashMap>> ResourceLoader::res_ref_overrides; +SafeBinaryMutex &_get_res_loader_mutex() { + return ResourceLoader::thread_load_mutex; +} + template <> -thread_local uint32_t SafeBinaryMutex::count = 0; +thread_local SafeBinaryMutex::TLSData SafeBinaryMutex::tls_data(_get_res_loader_mutex()); SafeBinaryMutex ResourceLoader::thread_load_mutex; HashMap ResourceLoader::thread_load_tasks; bool ResourceLoader::cleaning_tasks = false; diff --git a/core/io/resource_loader.h b/core/io/resource_loader.h index 217c14d7a38..f75bf019fb5 100644 --- a/core/io/resource_loader.h +++ b/core/io/resource_loader.h @@ -194,7 +194,10 @@ private: static thread_local int load_nesting; static thread_local HashMap>> res_ref_overrides; // Outermost key is nesting level. static thread_local Vector load_paths_stack; + static SafeBinaryMutex thread_load_mutex; + friend SafeBinaryMutex &_get_res_loader_mutex(); + static HashMap thread_load_tasks; static bool cleaning_tasks; diff --git a/core/object/worker_thread_pool.cpp b/core/object/worker_thread_pool.cpp index 155d963a2bd..7fd43c40949 100644 --- a/core/object/worker_thread_pool.cpp +++ b/core/object/worker_thread_pool.cpp @@ -32,6 +32,7 @@ #include "core/object/script_language.h" #include "core/os/os.h" +#include "core/os/safe_binary_mutex.h" #include "core/os/thread_safe.h" WorkerThreadPool::Task *const WorkerThreadPool::ThreadData::YIELDING = (Task *)1; @@ -46,7 +47,7 @@ void WorkerThreadPool::Task::free_template_userdata() { WorkerThreadPool *WorkerThreadPool::singleton = nullptr; #ifdef THREADS_ENABLED -thread_local uintptr_t WorkerThreadPool::unlockable_mutexes[MAX_UNLOCKABLE_MUTEXES] = {}; +thread_local WorkerThreadPool::UnlockableLocks WorkerThreadPool::unlockable_locks[MAX_UNLOCKABLE_LOCKS]; #endif void WorkerThreadPool::_process_task(Task *p_task) { @@ -428,13 +429,9 @@ Error WorkerThreadPool::wait_for_task_completion(TaskID p_task_id) { void WorkerThreadPool::_lock_unlockable_mutexes() { #ifdef THREADS_ENABLED - for (uint32_t i = 0; i < MAX_UNLOCKABLE_MUTEXES; i++) { - if (unlockable_mutexes[i]) { - if ((((uintptr_t)unlockable_mutexes[i]) & 1) == 0) { - ((Mutex *)unlockable_mutexes[i])->lock(); - } else { - ((BinaryMutex *)(unlockable_mutexes[i] & ~1))->lock(); - } + for (uint32_t i = 0; i < MAX_UNLOCKABLE_LOCKS; i++) { + if (unlockable_locks[i].ulock) { + unlockable_locks[i].ulock->lock(); } } #endif @@ -442,13 +439,9 @@ void WorkerThreadPool::_lock_unlockable_mutexes() { void WorkerThreadPool::_unlock_unlockable_mutexes() { #ifdef THREADS_ENABLED - for (uint32_t i = 0; i < MAX_UNLOCKABLE_MUTEXES; i++) { - if (unlockable_mutexes[i]) { - if ((((uintptr_t)unlockable_mutexes[i]) & 1) == 0) { - ((Mutex *)unlockable_mutexes[i])->unlock(); - } else { - ((BinaryMutex *)(unlockable_mutexes[i] & ~1))->unlock(); - } + for (uint32_t i = 0; i < MAX_UNLOCKABLE_LOCKS; i++) { + if (unlockable_locks[i].ulock) { + unlockable_locks[i].ulock->unlock(); } } #endif @@ -675,37 +668,28 @@ WorkerThreadPool::TaskID WorkerThreadPool::get_caller_task_id() { } #ifdef THREADS_ENABLED -uint32_t WorkerThreadPool::thread_enter_unlock_allowance_zone(Mutex *p_mutex) { - return _thread_enter_unlock_allowance_zone(p_mutex, false); -} - -uint32_t WorkerThreadPool::thread_enter_unlock_allowance_zone(BinaryMutex *p_mutex) { - return _thread_enter_unlock_allowance_zone(p_mutex, true); -} - -uint32_t WorkerThreadPool::_thread_enter_unlock_allowance_zone(void *p_mutex, bool p_is_binary) { - for (uint32_t i = 0; i < MAX_UNLOCKABLE_MUTEXES; i++) { - if (unlikely((unlockable_mutexes[i] & ~1) == (uintptr_t)p_mutex)) { +uint32_t WorkerThreadPool::_thread_enter_unlock_allowance_zone(THREADING_NAMESPACE::unique_lock &p_ulock) { + for (uint32_t i = 0; i < MAX_UNLOCKABLE_LOCKS; i++) { + DEV_ASSERT((bool)unlockable_locks[i].ulock == (bool)unlockable_locks[i].rc); + if (unlockable_locks[i].ulock == &p_ulock) { // Already registered in the current thread. - return UINT32_MAX; - } - if (!unlockable_mutexes[i]) { - unlockable_mutexes[i] = (uintptr_t)p_mutex; - if (p_is_binary) { - unlockable_mutexes[i] |= 1; - } + unlockable_locks[i].rc++; + return i; + } else if (!unlockable_locks[i].ulock) { + unlockable_locks[i].ulock = &p_ulock; + unlockable_locks[i].rc = 1; return i; } } - ERR_FAIL_V_MSG(UINT32_MAX, "No more unlockable mutex slots available. Engine bug."); + ERR_FAIL_V_MSG(UINT32_MAX, "No more unlockable lock slots available. Engine bug."); } void WorkerThreadPool::thread_exit_unlock_allowance_zone(uint32_t p_zone_id) { - if (p_zone_id == UINT32_MAX) { - return; + DEV_ASSERT(unlockable_locks[p_zone_id].ulock && unlockable_locks[p_zone_id].rc); + unlockable_locks[p_zone_id].rc--; + if (unlockable_locks[p_zone_id].rc == 0) { + unlockable_locks[p_zone_id].ulock = nullptr; } - DEV_ASSERT(unlockable_mutexes[p_zone_id]); - unlockable_mutexes[p_zone_id] = 0; } #endif diff --git a/core/object/worker_thread_pool.h b/core/object/worker_thread_pool.h index 57b67b32fad..5be4f209270 100644 --- a/core/object/worker_thread_pool.h +++ b/core/object/worker_thread_pool.h @@ -162,8 +162,12 @@ private: static WorkerThreadPool *singleton; #ifdef THREADS_ENABLED - static const uint32_t MAX_UNLOCKABLE_MUTEXES = 2; - static thread_local uintptr_t unlockable_mutexes[MAX_UNLOCKABLE_MUTEXES]; + static const uint32_t MAX_UNLOCKABLE_LOCKS = 2; + struct UnlockableLocks { + THREADING_NAMESPACE::unique_lock *ulock = nullptr; + uint32_t rc = 0; + }; + static thread_local UnlockableLocks unlockable_locks[MAX_UNLOCKABLE_LOCKS]; #endif TaskID _add_task(const Callable &p_callable, void (*p_func)(void *), void *p_userdata, BaseTemplateUserdata *p_template_userdata, bool p_high_priority, const String &p_description); @@ -192,7 +196,7 @@ private: void _wait_collaboratively(ThreadData *p_caller_pool_thread, Task *p_task); #ifdef THREADS_ENABLED - static uint32_t _thread_enter_unlock_allowance_zone(void *p_mutex, bool p_is_binary); + static uint32_t _thread_enter_unlock_allowance_zone(THREADING_NAMESPACE::unique_lock &p_ulock); #endif void _lock_unlockable_mutexes(); @@ -242,11 +246,14 @@ public: static TaskID get_caller_task_id(); #ifdef THREADS_ENABLED - static uint32_t thread_enter_unlock_allowance_zone(Mutex *p_mutex); - static uint32_t thread_enter_unlock_allowance_zone(BinaryMutex *p_mutex); + _ALWAYS_INLINE_ static uint32_t thread_enter_unlock_allowance_zone(const MutexLock &p_lock) { return _thread_enter_unlock_allowance_zone(p_lock._get_lock()); } + template + _ALWAYS_INLINE_ static uint32_t thread_enter_unlock_allowance_zone(const SafeBinaryMutex &p_mutex) { return _thread_enter_unlock_allowance_zone(p_mutex._get_lock()); } static void thread_exit_unlock_allowance_zone(uint32_t p_zone_id); #else - static uint32_t thread_enter_unlock_allowance_zone(void *p_mutex) { return UINT32_MAX; } + static uint32_t thread_enter_unlock_allowance_zone(const MutexLock &p_lock) { return UINT32_MAX; } + template + static uint32_t thread_enter_unlock_allowance_zone(const SafeBinaryMutex &p_mutex) { return UINT32_MAX; } static void thread_exit_unlock_allowance_zone(uint32_t p_zone_id) {} #endif diff --git a/core/os/condition_variable.h b/core/os/condition_variable.h index fa1355e98c7..c819fa6b402 100644 --- a/core/os/condition_variable.h +++ b/core/os/condition_variable.h @@ -32,6 +32,7 @@ #define CONDITION_VARIABLE_H #include "core/os/mutex.h" +#include "core/os/safe_binary_mutex.h" #ifdef THREADS_ENABLED @@ -56,7 +57,12 @@ class ConditionVariable { public: template _ALWAYS_INLINE_ void wait(const MutexLock &p_lock) const { - condition.wait(const_cast &>(p_lock.lock)); + condition.wait(const_cast &>(p_lock._get_lock())); + } + + template + _ALWAYS_INLINE_ void wait(const MutexLock> &p_lock) const { + condition.wait(const_cast &>(p_lock.mutex._get_lock())); } _ALWAYS_INLINE_ void notify_one() const { diff --git a/core/os/mutex.h b/core/os/mutex.h index 3e7aa81bc1a..773b31828dc 100644 --- a/core/os/mutex.h +++ b/core/os/mutex.h @@ -72,13 +72,18 @@ public: template class MutexLock { - friend class ConditionVariable; - THREADING_NAMESPACE::unique_lock lock; public: explicit MutexLock(const MutexT &p_mutex) : lock(p_mutex.mutex) {} + + // Clarification: all the funny syntax is needed so this function exists only for binary mutexes. + template + _ALWAYS_INLINE_ THREADING_NAMESPACE::unique_lock &_get_lock( + typename std::enable_if::value> * = nullptr) const { + return const_cast &>(lock); + } }; using Mutex = MutexImpl; // Recursive, for general use diff --git a/core/os/safe_binary_mutex.h b/core/os/safe_binary_mutex.h index 1e98cc074cd..4ca4b50b02c 100644 --- a/core/os/safe_binary_mutex.h +++ b/core/os/safe_binary_mutex.h @@ -47,76 +47,76 @@ // Also, don't forget to declare the thread_local variable on each use. template class SafeBinaryMutex { - friend class MutexLock; + friend class MutexLock>; using StdMutexType = THREADING_NAMESPACE::mutex; mutable THREADING_NAMESPACE::mutex mutex; - static thread_local uint32_t count; + + struct TLSData { + mutable THREADING_NAMESPACE::unique_lock lock; + uint32_t count = 0; + + TLSData(SafeBinaryMutex &p_mutex) : + lock(p_mutex.mutex, THREADING_NAMESPACE::defer_lock) {} + }; + static thread_local TLSData tls_data; public: _ALWAYS_INLINE_ void lock() const { - if (++count == 1) { - mutex.lock(); + if (++tls_data.count == 1) { + tls_data.lock.lock(); } } _ALWAYS_INLINE_ void unlock() const { - DEV_ASSERT(count); - if (--count == 0) { - mutex.unlock(); + DEV_ASSERT(tls_data.count); + if (--tls_data.count == 0) { + tls_data.lock.unlock(); } } - _ALWAYS_INLINE_ bool try_lock() const { - if (count) { - count++; - return true; - } else { - if (mutex.try_lock()) { - count++; - return true; - } else { - return false; - } - } + _ALWAYS_INLINE_ THREADING_NAMESPACE::unique_lock &_get_lock() const { + return const_cast &>(tls_data.lock); } - ~SafeBinaryMutex() { - DEV_ASSERT(!count); + _ALWAYS_INLINE_ SafeBinaryMutex() { + } + + _ALWAYS_INLINE_ ~SafeBinaryMutex() { + DEV_ASSERT(!tls_data.count); } }; -// This specialization is needed so manual locking and MutexLock can be used -// at the same time on a SafeBinaryMutex. template class MutexLock> { friend class ConditionVariable; - THREADING_NAMESPACE::unique_lock lock; + const SafeBinaryMutex &mutex; public: - _ALWAYS_INLINE_ explicit MutexLock(const SafeBinaryMutex &p_mutex) : - lock(p_mutex.mutex) { - SafeBinaryMutex::count++; - }; - _ALWAYS_INLINE_ ~MutexLock() { - SafeBinaryMutex::count--; - }; + explicit MutexLock(const SafeBinaryMutex &p_mutex) : + mutex(p_mutex) { + mutex.lock(); + } + + ~MutexLock() { + mutex.unlock(); + } }; #else // No threads. template -class SafeBinaryMutex : public MutexImpl { - static thread_local uint32_t count; -}; +class SafeBinaryMutex { + struct TLSData { + TLSData(SafeBinaryMutex &p_mutex) {} + }; + static thread_local TLSData tls_data; -template -class MutexLock> { public: - MutexLock(const SafeBinaryMutex &p_mutex) {} - ~MutexLock() {} + void lock() const {} + void unlock() const {} }; #endif // THREADS_ENABLED diff --git a/core/templates/command_queue_mt.cpp b/core/templates/command_queue_mt.cpp index ef75a70868e..5fa767263f9 100644 --- a/core/templates/command_queue_mt.cpp +++ b/core/templates/command_queue_mt.cpp @@ -33,14 +33,6 @@ #include "core/config/project_settings.h" #include "core/os/os.h" -void CommandQueueMT::lock() { - mutex.lock(); -} - -void CommandQueueMT::unlock() { - mutex.unlock(); -} - CommandQueueMT::CommandQueueMT() { command_mem.reserve(DEFAULT_COMMAND_MEM_SIZE_KB * 1024); } diff --git a/core/templates/command_queue_mt.h b/core/templates/command_queue_mt.h index 1e6c6e42a96..8ef5dd30642 100644 --- a/core/templates/command_queue_mt.h +++ b/core/templates/command_queue_mt.h @@ -362,23 +362,24 @@ class CommandQueueMT { return; } - lock(); + MutexLock lock(mutex); - uint32_t allowance_id = WorkerThreadPool::thread_enter_unlock_allowance_zone(&mutex); while (flush_read_ptr < command_mem.size()) { uint64_t size = *(uint64_t *)&command_mem[flush_read_ptr]; flush_read_ptr += 8; CommandBase *cmd = reinterpret_cast(&command_mem[flush_read_ptr]); + uint32_t allowance_id = WorkerThreadPool::thread_enter_unlock_allowance_zone(lock); cmd->call(); + WorkerThreadPool::thread_exit_unlock_allowance_zone(allowance_id); // Handle potential realloc due to the command and unlock allowance. cmd = reinterpret_cast(&command_mem[flush_read_ptr]); if (unlikely(cmd->sync)) { sync_head++; - unlock(); // Give an opportunity to awaiters right away. + lock.~MutexLock(); // Give an opportunity to awaiters right away. sync_cond_var.notify_all(); - lock(); + new (&lock) MutexLock(mutex); // Handle potential realloc happened during unlock. cmd = reinterpret_cast(&command_mem[flush_read_ptr]); } @@ -387,14 +388,11 @@ class CommandQueueMT { flush_read_ptr += size; } - WorkerThreadPool::thread_exit_unlock_allowance_zone(allowance_id); command_mem.clear(); flush_read_ptr = 0; _prevent_sync_wraparound(); - - unlock(); } _FORCE_INLINE_ void _wait_for_sync(MutexLock &p_lock) { @@ -410,9 +408,6 @@ class CommandQueueMT { void _no_op() {} public: - void lock(); - void unlock(); - /* NORMAL PUSH COMMANDS */ DECL_PUSH(0) SPACE_SEP_LIST(DECL_PUSH, 15) @@ -446,9 +441,8 @@ public: } void set_pump_task_id(WorkerThreadPool::TaskID p_task_id) { - lock(); + MutexLock lock(mutex); pump_task_id = p_task_id; - unlock(); } CommandQueueMT(); diff --git a/modules/gdscript/gdscript_cache.cpp b/modules/gdscript/gdscript_cache.cpp index 3b6526ffd91..b3c0744bdf2 100644 --- a/modules/gdscript/gdscript_cache.cpp +++ b/modules/gdscript/gdscript_cache.cpp @@ -144,6 +144,14 @@ GDScriptParserRef::~GDScriptParserRef() { GDScriptCache *GDScriptCache::singleton = nullptr; +SafeBinaryMutex &_get_gdscript_cache_mutex() { + return GDScriptCache::mutex; +} + +template <> +thread_local SafeBinaryMutex::TLSData SafeBinaryMutex::tls_data(_get_gdscript_cache_mutex()); +SafeBinaryMutex GDScriptCache::mutex; + void GDScriptCache::move_script(const String &p_from, const String &p_to) { if (singleton == nullptr || p_from == p_to) { return; @@ -369,7 +377,7 @@ Ref GDScriptCache::get_full_script(const String &p_path, Error &r_erro // Allowing lifting the lock might cause a script to be reloaded multiple times, // which, as a last resort deadlock prevention strategy, is a good tradeoff. - uint32_t allowance_id = WorkerThreadPool::thread_enter_unlock_allowance_zone(&singleton->mutex); + uint32_t allowance_id = WorkerThreadPool::thread_enter_unlock_allowance_zone(singleton->mutex); r_error = script->reload(true); WorkerThreadPool::thread_exit_unlock_allowance_zone(allowance_id); if (r_error) { diff --git a/modules/gdscript/gdscript_cache.h b/modules/gdscript/gdscript_cache.h index f7f2cd90e9e..4903da92b4f 100644 --- a/modules/gdscript/gdscript_cache.h +++ b/modules/gdscript/gdscript_cache.h @@ -34,7 +34,7 @@ #include "gdscript.h" #include "core/object/ref_counted.h" -#include "core/os/mutex.h" +#include "core/os/safe_binary_mutex.h" #include "core/templates/hash_map.h" #include "core/templates/hash_set.h" @@ -95,7 +95,12 @@ class GDScriptCache { bool cleared = false; - Mutex mutex; +public: + static const int BINARY_MUTEX_TAG = 2; + +private: + static SafeBinaryMutex mutex; + friend SafeBinaryMutex &_get_gdscript_cache_mutex(); public: static void move_script(const String &p_from, const String &p_to);