diff --git a/SConstruct b/SConstruct index 94574aacb22..569ee24d850 100644 --- a/SConstruct +++ b/SConstruct @@ -861,7 +861,7 @@ else: # GCC, Clang if cc_version_major >= 11: # Broke on MethodBind templates before GCC 11. env.Append(CCFLAGS=["-Wlogical-op"]) elif methods.using_clang(env) or methods.using_emcc(env): - env.Append(CCFLAGS=["-Wimplicit-fallthrough"]) + env.Append(CCFLAGS=["-Wimplicit-fallthrough", "-Wno-undefined-var-template"]) elif env["warnings"] == "all": env.Append(CCFLAGS=["-Wall"] + common_warnings) elif env["warnings"] == "moderate": diff --git a/core/io/resource_loader.cpp b/core/io/resource_loader.cpp index c9ed4e27d9d..eaee849a876 100644 --- a/core/io/resource_loader.cpp +++ b/core/io/resource_loader.cpp @@ -207,34 +207,53 @@ void ResourceFormatLoader::_bind_methods() { /////////////////////////////////// +// These are used before and after a wait for a WorkerThreadPool task +// because that can lead to another load started in the same thread, +// something we must treat as a different stack for the purposes +// of tracking nesting. + +#define PREPARE_FOR_WTP_WAIT \ + int load_nesting_backup = ResourceLoader::load_nesting; \ + Vector load_paths_stack_backup = ResourceLoader::load_paths_stack; \ + ResourceLoader::load_nesting = 0; \ + ResourceLoader::load_paths_stack.clear(); + +#define RESTORE_AFTER_WTP_WAIT \ + DEV_ASSERT(ResourceLoader::load_nesting == 0); \ + DEV_ASSERT(ResourceLoader::load_paths_stack.is_empty()); \ + ResourceLoader::load_nesting = load_nesting_backup; \ + ResourceLoader::load_paths_stack = load_paths_stack_backup; \ + load_paths_stack_backup.clear(); + // This should be robust enough to be called redundantly without issues. void ResourceLoader::LoadToken::clear() { thread_load_mutex.lock(); WorkerThreadPool::TaskID task_to_await = 0; + // User-facing tokens shouldn't be deleted until completely claimed. + DEV_ASSERT(user_rc == 0 && user_path.is_empty()); + if (!local_path.is_empty()) { // Empty is used for the special case where the load task is not registered. DEV_ASSERT(thread_load_tasks.has(local_path)); ThreadLoadTask &load_task = thread_load_tasks[local_path]; - if (!load_task.awaited) { + if (load_task.task_id && !load_task.awaited) { task_to_await = load_task.task_id; - load_task.awaited = true; } + // Removing a task which is still in progress would be catastrophic. + // Tokens must be alive until the task thread function is done. + DEV_ASSERT(load_task.status == THREAD_LOAD_FAILED || load_task.status == THREAD_LOAD_LOADED); thread_load_tasks.erase(local_path); local_path.clear(); } - if (!user_path.is_empty()) { - DEV_ASSERT(user_load_tokens.has(user_path)); - user_load_tokens.erase(user_path); - user_path.clear(); - } - thread_load_mutex.unlock(); // If task is unused, await it here, locally, now the token data is consistent. if (task_to_await) { + PREPARE_FOR_WTP_WAIT WorkerThreadPool::get_singleton()->wait_for_task_completion(task_to_await); + RESTORE_AFTER_WTP_WAIT } } @@ -295,11 +314,11 @@ Ref ResourceLoader::_load(const String &p_path, const String &p_origin ERR_FAIL_V_MSG(Ref(), vformat("No loader found for resource: %s (expected type: %s)", p_path, p_type_hint)); } -void ResourceLoader::_thread_load_function(void *p_userdata) { +// This implementation must allow re-entrancy for a task that started awaiting in a deeper stack frame. +void ResourceLoader::_run_load_task(void *p_userdata) { ThreadLoadTask &load_task = *(ThreadLoadTask *)p_userdata; thread_load_mutex.lock(); - caller_task_id = load_task.task_id; if (cleaning_tasks) { load_task.status = THREAD_LOAD_FAILED; thread_load_mutex.unlock(); @@ -322,8 +341,10 @@ void ResourceLoader::_thread_load_function(void *p_userdata) { } // -- + bool xl_remapped = false; + const String &remapped_path = _path_remap(load_task.local_path, &xl_remapped); Error load_err = OK; - Ref res = _load(load_task.remapped_path, load_task.remapped_path != load_task.local_path ? load_task.local_path : String(), load_task.type_hint, load_task.cache_mode, &load_err, load_task.use_sub_threads, &load_task.progress); + Ref res = _load(remapped_path, remapped_path != load_task.local_path ? load_task.local_path : String(), load_task.type_hint, load_task.cache_mode, &load_err, load_task.use_sub_threads, &load_task.progress); if (MessageQueue::get_singleton() != MessageQueue::get_main_singleton()) { MessageQueue::get_singleton()->flush(); } @@ -356,27 +377,40 @@ void ResourceLoader::_thread_load_function(void *p_userdata) { unlock_pending = false; if (!ignoring) { - if (replacing) { - Ref old_res = ResourceCache::get_ref(load_task.local_path); - if (old_res.is_valid() && old_res != load_task.resource) { - // If resource is already loaded, only replace its data, to avoid existing invalidating instances. - old_res->copy_from(load_task.resource); + ResourceCache::lock.lock(); // Check and operations must happen atomically. + bool pending_unlock = true; + Ref old_res = ResourceCache::get_ref(load_task.local_path); + if (old_res.is_valid()) { + if (old_res != load_task.resource) { + // Resource can already exists at this point for two reasons: + // a) The load uses replace mode. + // b) There were more than one load in flight for the same path because of deadlock prevention. + // Either case, we want to keep the resource that was already there. + ResourceCache::lock.unlock(); + pending_unlock = false; + if (replacing) { + old_res->copy_from(load_task.resource); + } load_task.resource = old_res; } + } else { + load_task.resource->set_path(load_task.local_path); + } + if (pending_unlock) { + ResourceCache::lock.unlock(); } - load_task.resource->set_path(load_task.local_path, replacing); } else { load_task.resource->set_path_cache(load_task.local_path); } - if (load_task.xl_remapped) { + if (xl_remapped) { load_task.resource->set_as_translation_remapped(true); } #ifdef TOOLS_ENABLED load_task.resource->set_edited(false); if (timestamp_on_load) { - uint64_t mt = FileAccess::get_modified_time(load_task.remapped_path); + uint64_t mt = FileAccess::get_modified_time(remapped_path); //printf("mt %s: %lli\n",remapped_path.utf8().get_data(),mt); load_task.resource->set_last_modified_time(mt); } @@ -426,36 +460,44 @@ static String _validate_local_path(const String &p_path) { } Error ResourceLoader::load_threaded_request(const String &p_path, const String &p_type_hint, bool p_use_sub_threads, ResourceFormatLoader::CacheMode p_cache_mode) { - thread_load_mutex.lock(); - if (user_load_tokens.has(p_path)) { - print_verbose("load_threaded_request(): Another threaded load for resource path '" + p_path + "' has been initiated. Not an error."); - user_load_tokens[p_path]->reference(); // Additional request. - thread_load_mutex.unlock(); - return OK; - } - user_load_tokens[p_path] = nullptr; - thread_load_mutex.unlock(); + Ref token = _load_start(p_path, p_type_hint, p_use_sub_threads ? LOAD_THREAD_DISTRIBUTE : LOAD_THREAD_SPAWN_SINGLE, p_cache_mode, true); + return token.is_valid() ? OK : FAILED; +} - Ref token = _load_start(p_path, p_type_hint, p_use_sub_threads ? LOAD_THREAD_DISTRIBUTE : LOAD_THREAD_SPAWN_SINGLE, p_cache_mode); - if (token.is_valid()) { - thread_load_mutex.lock(); - token->user_path = p_path; - token->reference(); // First request. - user_load_tokens[p_path] = token.ptr(); - print_lt("REQUEST: user load tokens: " + itos(user_load_tokens.size())); - thread_load_mutex.unlock(); - return OK; +ResourceLoader::LoadToken *ResourceLoader::_load_threaded_request_reuse_user_token(const String &p_path) { + HashMap::Iterator E = user_load_tokens.find(p_path); + if (E) { + print_verbose("load_threaded_request(): Another threaded load for resource path '" + p_path + "' has been initiated. Not an error."); + LoadToken *token = E->value; + token->user_rc++; + return token; } else { - return FAILED; + return nullptr; } } +void ResourceLoader::_load_threaded_request_setup_user_token(LoadToken *p_token, const String &p_path) { + p_token->user_path = p_path; + p_token->reference(); // Extra RC until all user requests have been gotten. + p_token->user_rc = 1; + user_load_tokens[p_path] = p_token; + print_lt("REQUEST: user load tokens: " + itos(user_load_tokens.size())); +} + Ref ResourceLoader::load(const String &p_path, const String &p_type_hint, ResourceFormatLoader::CacheMode p_cache_mode, Error *r_error) { if (r_error) { *r_error = OK; } - Ref load_token = _load_start(p_path, p_type_hint, LOAD_THREAD_FROM_CURRENT, p_cache_mode); + LoadThreadMode thread_mode = LOAD_THREAD_FROM_CURRENT; + if (WorkerThreadPool::get_singleton()->get_caller_task_id() != WorkerThreadPool::INVALID_TASK_ID) { + // If user is initiating a single-threaded load from a WorkerThreadPool task, + // we instead spawn a new task so there's a precondition that a load in a pool task + // is always initiated by the engine. That makes certain aspects simpler, such as + // cyclic load detection and awaiting. + thread_mode = LOAD_THREAD_SPAWN_SINGLE; + } + Ref load_token = _load_start(p_path, p_type_hint, thread_mode, p_cache_mode); if (!load_token.is_valid()) { if (r_error) { *r_error = FAILED; @@ -467,7 +509,7 @@ Ref ResourceLoader::load(const String &p_path, const String &p_type_hi return res; } -Ref ResourceLoader::_load_start(const String &p_path, const String &p_type_hint, LoadThreadMode p_thread_mode, ResourceFormatLoader::CacheMode p_cache_mode) { +Ref ResourceLoader::_load_start(const String &p_path, const String &p_type_hint, LoadThreadMode p_thread_mode, ResourceFormatLoader::CacheMode p_cache_mode, bool p_for_user) { String local_path = _validate_local_path(p_path); bool ignoring_cache = p_cache_mode == ResourceFormatLoader::CACHE_MODE_IGNORE || p_cache_mode == ResourceFormatLoader::CACHE_MODE_IGNORE_DEEP; @@ -480,6 +522,13 @@ Ref ResourceLoader::_load_start(const String &p_path, { MutexLock thread_load_lock(thread_load_mutex); + if (p_for_user) { + LoadToken *existing_token = _load_threaded_request_reuse_user_token(p_path); + if (existing_token) { + return Ref(existing_token); + } + } + if (!ignoring_cache && thread_load_tasks.has(local_path)) { load_token = Ref(thread_load_tasks[local_path].load_token); if (load_token.is_valid()) { @@ -493,12 +542,14 @@ Ref ResourceLoader::_load_start(const String &p_path, load_token.instantiate(); load_token->local_path = local_path; + if (p_for_user) { + _load_threaded_request_setup_user_token(load_token.ptr(), p_path); + } //create load task { ThreadLoadTask load_task; - load_task.remapped_path = _path_remap(local_path, &load_task.xl_remapped); load_task.load_token = load_token.ptr(); load_task.local_path = local_path; load_task.type_hint = p_type_hint; @@ -511,6 +562,7 @@ Ref ResourceLoader::_load_start(const String &p_path, load_task.resource = existing; load_task.status = THREAD_LOAD_LOADED; load_task.progress = 1.0; + DEV_ASSERT(!thread_load_tasks.has(local_path)); thread_load_tasks[local_path] = load_task; return load_token; } @@ -532,14 +584,20 @@ Ref ResourceLoader::_load_start(const String &p_path, run_on_current_thread = must_not_register || p_thread_mode == LOAD_THREAD_FROM_CURRENT; if (run_on_current_thread) { - load_task_ptr->thread_id = Thread::get_caller_id(); + // The current thread may happen to be a thread from the pool. + WorkerThreadPool::TaskID tid = WorkerThreadPool::get_singleton()->get_caller_task_id(); + if (tid != WorkerThreadPool::INVALID_TASK_ID) { + load_task_ptr->task_id = tid; + } else { + load_task_ptr->thread_id = Thread::get_caller_id(); + } } else { - load_task_ptr->task_id = WorkerThreadPool::get_singleton()->add_native_task(&ResourceLoader::_thread_load_function, load_task_ptr); + load_task_ptr->task_id = WorkerThreadPool::get_singleton()->add_native_task(&ResourceLoader::_run_load_task, load_task_ptr); } - } + } // MutexLock(thread_load_mutex). if (run_on_current_thread) { - _thread_load_function(load_task_ptr); + _run_load_task(load_task_ptr); if (must_not_register) { load_token->res_if_unregistered = load_task_ptr->resource; } @@ -626,13 +684,7 @@ Ref ResourceLoader::load_threaded_get(const String &p_path, Error *r_e } LoadToken *load_token = user_load_tokens[p_path]; - if (!load_token) { - // This happens if requested from one thread and rapidly querying from another. - if (r_error) { - *r_error = ERR_BUSY; - } - return Ref(); - } + DEV_ASSERT(load_token->user_rc >= 1); // Support userland requesting on the main thread before the load is reported to be complete. if (Thread::is_main_thread() && !load_token->local_path.is_empty()) { @@ -649,8 +701,15 @@ Ref ResourceLoader::load_threaded_get(const String &p_path, Error *r_e } res = _load_complete_inner(*load_token, r_error, thread_load_lock); - if (load_token->unreference()) { - memdelete(load_token); + + load_token->user_rc--; + if (load_token->user_rc == 0) { + load_token->user_path.clear(); + user_load_tokens.erase(p_path); + if (load_token->unreference()) { + memdelete(load_token); + load_token = nullptr; + } } } @@ -682,7 +741,7 @@ Ref ResourceLoader::_load_complete_inner(LoadToken &p_load_token, Erro if (load_task.status == THREAD_LOAD_IN_PROGRESS) { DEV_ASSERT((load_task.task_id == 0) != (load_task.thread_id == 0)); - if ((load_task.task_id != 0 && load_task.task_id == caller_task_id) || + if ((load_task.task_id != 0 && load_task.task_id == WorkerThreadPool::get_singleton()->get_caller_task_id()) || (load_task.thread_id != 0 && load_task.thread_id == Thread::get_caller_id())) { // Load is in progress, but it's precisely this thread the one in charge. // That means this is a cyclic load. @@ -693,55 +752,45 @@ Ref ResourceLoader::_load_complete_inner(LoadToken &p_load_token, Erro } bool loader_is_wtp = load_task.task_id != 0; - Error wtp_task_err = FAILED; if (loader_is_wtp) { // Loading thread is in the worker pool. - load_task.awaited = true; thread_load_mutex.unlock(); - wtp_task_err = WorkerThreadPool::get_singleton()->wait_for_task_completion(load_task.task_id); - } - if (load_task.status == THREAD_LOAD_IN_PROGRESS) { // If early errored, awaiting would deadlock. - if (loader_is_wtp) { - if (wtp_task_err == ERR_BUSY) { - // The WorkerThreadPool has reported that the current task wants to await on an older one. - // That't not allowed for safety, to avoid deadlocks. Fortunately, though, in the context of - // resource loading that means that the task to wait for can be restarted here to break the - // cycle, with as much recursion into this process as needed. - // When the stack is eventually unrolled, the original load will have been notified to go on. - // CACHE_MODE_IGNORE is needed because, otherwise, the new request would just see there's - // an ongoing load for that resource and wait for it again. This value forces a new load. - Ref token = _load_start(load_task.local_path, load_task.type_hint, LOAD_THREAD_DISTRIBUTE, ResourceFormatLoader::CACHE_MODE_IGNORE); - Ref resource = _load_complete(*token.ptr(), &wtp_task_err); - if (r_error) { - *r_error = wtp_task_err; - } - thread_load_mutex.lock(); - return resource; - } else { - DEV_ASSERT(wtp_task_err == OK); - thread_load_mutex.lock(); - } - } else if (load_task.need_wait) { - // Loading thread is main or user thread. - if (!load_task.cond_var) { - load_task.cond_var = memnew(ConditionVariable); - } - load_task.awaiters_count++; - do { - load_task.cond_var->wait(p_thread_load_lock); - DEV_ASSERT(thread_load_tasks.has(p_load_token.local_path) && p_load_token.get_reference_count()); - } while (load_task.need_wait); - load_task.awaiters_count--; - if (load_task.awaiters_count == 0) { - memdelete(load_task.cond_var); - load_task.cond_var = nullptr; - } + PREPARE_FOR_WTP_WAIT + Error wait_err = WorkerThreadPool::get_singleton()->wait_for_task_completion(load_task.task_id); + RESTORE_AFTER_WTP_WAIT + + DEV_ASSERT(!wait_err || wait_err == ERR_BUSY); + if (wait_err == ERR_BUSY) { + // The WorkerThreadPool has reported that the current task wants to await on an older one. + // That't not allowed for safety, to avoid deadlocks. Fortunately, though, in the context of + // resource loading that means that the task to wait for can be restarted here to break the + // cycle, with as much recursion into this process as needed. + // When the stack is eventually unrolled, the original load will have been notified to go on. + _run_load_task(&load_task); } - } else { - if (loader_is_wtp) { - thread_load_mutex.lock(); + + thread_load_mutex.lock(); + load_task.awaited = true; + + DEV_ASSERT(load_task.status == THREAD_LOAD_FAILED || load_task.status == THREAD_LOAD_LOADED); + } else if (load_task.need_wait) { + // Loading thread is main or user thread. + if (!load_task.cond_var) { + load_task.cond_var = memnew(ConditionVariable); } + load_task.awaiters_count++; + do { + load_task.cond_var->wait(p_thread_load_lock); + DEV_ASSERT(thread_load_tasks.has(p_load_token.local_path) && p_load_token.get_reference_count()); + } while (load_task.need_wait); + load_task.awaiters_count--; + if (load_task.awaiters_count == 0) { + memdelete(load_task.cond_var); + load_task.cond_var = nullptr; + } + + DEV_ASSERT(load_task.status == THREAD_LOAD_FAILED || load_task.status == THREAD_LOAD_LOADED); } } @@ -1181,10 +1230,13 @@ void ResourceLoader::clear_thread_load_tasks() { } while (user_load_tokens.begin()) { - // User load tokens remove themselves from the map on destruction. - memdelete(user_load_tokens.begin()->value); + LoadToken *user_token = user_load_tokens.begin()->value; + user_load_tokens.remove(user_load_tokens.begin()); + DEV_ASSERT(user_token->user_rc > 0 && !user_token->user_path.is_empty()); + user_token->user_path.clear(); + user_token->user_rc = 0; + user_token->unreference(); } - user_load_tokens.clear(); thread_load_tasks.clear(); @@ -1302,12 +1354,15 @@ bool ResourceLoader::abort_on_missing_resource = true; bool ResourceLoader::timestamp_on_load = false; thread_local int ResourceLoader::load_nesting = 0; -thread_local WorkerThreadPool::TaskID ResourceLoader::caller_task_id = 0; thread_local Vector ResourceLoader::load_paths_stack; thread_local HashMap>> ResourceLoader::res_ref_overrides; +SafeBinaryMutex &_get_res_loader_mutex() { + return ResourceLoader::thread_load_mutex; +} + template <> -thread_local uint32_t SafeBinaryMutex::count = 0; +thread_local SafeBinaryMutex::TLSData SafeBinaryMutex::tls_data(_get_res_loader_mutex()); SafeBinaryMutex ResourceLoader::thread_load_mutex; HashMap ResourceLoader::thread_load_tasks; bool ResourceLoader::cleaning_tasks = false; diff --git a/core/io/resource_loader.h b/core/io/resource_loader.h index ec9997891ea..f75bf019fb5 100644 --- a/core/io/resource_loader.h +++ b/core/io/resource_loader.h @@ -100,6 +100,8 @@ typedef Error (*ResourceLoaderImport)(const String &p_path); typedef void (*ResourceLoadedCallback)(Ref p_resource, const String &p_path); class ResourceLoader { + friend class LoadToken; + enum { MAX_LOADERS = 64 }; @@ -121,6 +123,7 @@ public: struct LoadToken : public RefCounted { String local_path; String user_path; + uint32_t user_rc = 0; // Having user RC implies regular RC incremented in one, until the user RC reaches zero. Ref res_if_unregistered; void clear(); @@ -130,10 +133,13 @@ public: static const int BINARY_MUTEX_TAG = 1; - static Ref _load_start(const String &p_path, const String &p_type_hint, LoadThreadMode p_thread_mode, ResourceFormatLoader::CacheMode p_cache_mode); + static Ref _load_start(const String &p_path, const String &p_type_hint, LoadThreadMode p_thread_mode, ResourceFormatLoader::CacheMode p_cache_mode, bool p_for_user = false); static Ref _load_complete(LoadToken &p_load_token, Error *r_error); private: + static LoadToken *_load_threaded_request_reuse_user_token(const String &p_path); + static void _load_threaded_request_setup_user_token(LoadToken *p_token, const String &p_path); + static Ref _load_complete_inner(LoadToken &p_load_token, Error *r_error, MutexLock> &p_thread_load_lock); static Ref loader[MAX_LOADERS]; @@ -171,7 +177,6 @@ private: bool need_wait = true; LoadToken *load_token = nullptr; String local_path; - String remapped_path; String type_hint; float progress = 0.0f; float max_reported_progress = 0.0f; @@ -180,18 +185,19 @@ private: ResourceFormatLoader::CacheMode cache_mode = ResourceFormatLoader::CACHE_MODE_REUSE; Error error = OK; Ref resource; - bool xl_remapped = false; bool use_sub_threads = false; HashSet sub_tasks; }; - static void _thread_load_function(void *p_userdata); + static void _run_load_task(void *p_userdata); static thread_local int load_nesting; - static thread_local WorkerThreadPool::TaskID caller_task_id; static thread_local HashMap>> res_ref_overrides; // Outermost key is nesting level. static thread_local Vector load_paths_stack; + static SafeBinaryMutex thread_load_mutex; + friend SafeBinaryMutex &_get_res_loader_mutex(); + static HashMap thread_load_tasks; static bool cleaning_tasks; diff --git a/core/object/worker_thread_pool.cpp b/core/object/worker_thread_pool.cpp index 56b9fa84756..7fd43c40949 100644 --- a/core/object/worker_thread_pool.cpp +++ b/core/object/worker_thread_pool.cpp @@ -32,6 +32,7 @@ #include "core/object/script_language.h" #include "core/os/os.h" +#include "core/os/safe_binary_mutex.h" #include "core/os/thread_safe.h" WorkerThreadPool::Task *const WorkerThreadPool::ThreadData::YIELDING = (Task *)1; @@ -46,7 +47,7 @@ void WorkerThreadPool::Task::free_template_userdata() { WorkerThreadPool *WorkerThreadPool::singleton = nullptr; #ifdef THREADS_ENABLED -thread_local uintptr_t WorkerThreadPool::unlockable_mutexes[MAX_UNLOCKABLE_MUTEXES] = {}; +thread_local WorkerThreadPool::UnlockableLocks WorkerThreadPool::unlockable_locks[MAX_UNLOCKABLE_LOCKS]; #endif void WorkerThreadPool::_process_task(Task *p_task) { @@ -428,13 +429,9 @@ Error WorkerThreadPool::wait_for_task_completion(TaskID p_task_id) { void WorkerThreadPool::_lock_unlockable_mutexes() { #ifdef THREADS_ENABLED - for (uint32_t i = 0; i < MAX_UNLOCKABLE_MUTEXES; i++) { - if (unlockable_mutexes[i]) { - if ((((uintptr_t)unlockable_mutexes[i]) & 1) == 0) { - ((Mutex *)unlockable_mutexes[i])->lock(); - } else { - ((BinaryMutex *)(unlockable_mutexes[i] & ~1))->lock(); - } + for (uint32_t i = 0; i < MAX_UNLOCKABLE_LOCKS; i++) { + if (unlockable_locks[i].ulock) { + unlockable_locks[i].ulock->lock(); } } #endif @@ -442,13 +439,9 @@ void WorkerThreadPool::_lock_unlockable_mutexes() { void WorkerThreadPool::_unlock_unlockable_mutexes() { #ifdef THREADS_ENABLED - for (uint32_t i = 0; i < MAX_UNLOCKABLE_MUTEXES; i++) { - if (unlockable_mutexes[i]) { - if ((((uintptr_t)unlockable_mutexes[i]) & 1) == 0) { - ((Mutex *)unlockable_mutexes[i])->unlock(); - } else { - ((BinaryMutex *)(unlockable_mutexes[i] & ~1))->unlock(); - } + for (uint32_t i = 0; i < MAX_UNLOCKABLE_LOCKS; i++) { + if (unlockable_locks[i].ulock) { + unlockable_locks[i].ulock->unlock(); } } #endif @@ -665,38 +658,38 @@ int WorkerThreadPool::get_thread_index() { return singleton->thread_ids.has(tid) ? singleton->thread_ids[tid] : -1; } +WorkerThreadPool::TaskID WorkerThreadPool::get_caller_task_id() { + int th_index = get_thread_index(); + if (th_index != -1 && singleton->threads[th_index].current_task) { + return singleton->threads[th_index].current_task->self; + } else { + return INVALID_TASK_ID; + } +} + #ifdef THREADS_ENABLED -uint32_t WorkerThreadPool::thread_enter_unlock_allowance_zone(Mutex *p_mutex) { - return _thread_enter_unlock_allowance_zone(p_mutex, false); -} - -uint32_t WorkerThreadPool::thread_enter_unlock_allowance_zone(BinaryMutex *p_mutex) { - return _thread_enter_unlock_allowance_zone(p_mutex, true); -} - -uint32_t WorkerThreadPool::_thread_enter_unlock_allowance_zone(void *p_mutex, bool p_is_binary) { - for (uint32_t i = 0; i < MAX_UNLOCKABLE_MUTEXES; i++) { - if (unlikely((unlockable_mutexes[i] & ~1) == (uintptr_t)p_mutex)) { +uint32_t WorkerThreadPool::_thread_enter_unlock_allowance_zone(THREADING_NAMESPACE::unique_lock &p_ulock) { + for (uint32_t i = 0; i < MAX_UNLOCKABLE_LOCKS; i++) { + DEV_ASSERT((bool)unlockable_locks[i].ulock == (bool)unlockable_locks[i].rc); + if (unlockable_locks[i].ulock == &p_ulock) { // Already registered in the current thread. - return UINT32_MAX; - } - if (!unlockable_mutexes[i]) { - unlockable_mutexes[i] = (uintptr_t)p_mutex; - if (p_is_binary) { - unlockable_mutexes[i] |= 1; - } + unlockable_locks[i].rc++; + return i; + } else if (!unlockable_locks[i].ulock) { + unlockable_locks[i].ulock = &p_ulock; + unlockable_locks[i].rc = 1; return i; } } - ERR_FAIL_V_MSG(UINT32_MAX, "No more unlockable mutex slots available. Engine bug."); + ERR_FAIL_V_MSG(UINT32_MAX, "No more unlockable lock slots available. Engine bug."); } void WorkerThreadPool::thread_exit_unlock_allowance_zone(uint32_t p_zone_id) { - if (p_zone_id == UINT32_MAX) { - return; + DEV_ASSERT(unlockable_locks[p_zone_id].ulock && unlockable_locks[p_zone_id].rc); + unlockable_locks[p_zone_id].rc--; + if (unlockable_locks[p_zone_id].rc == 0) { + unlockable_locks[p_zone_id].ulock = nullptr; } - DEV_ASSERT(unlockable_mutexes[p_zone_id]); - unlockable_mutexes[p_zone_id] = 0; } #endif diff --git a/core/object/worker_thread_pool.h b/core/object/worker_thread_pool.h index 8774143abfe..5be4f209270 100644 --- a/core/object/worker_thread_pool.h +++ b/core/object/worker_thread_pool.h @@ -162,8 +162,12 @@ private: static WorkerThreadPool *singleton; #ifdef THREADS_ENABLED - static const uint32_t MAX_UNLOCKABLE_MUTEXES = 2; - static thread_local uintptr_t unlockable_mutexes[MAX_UNLOCKABLE_MUTEXES]; + static const uint32_t MAX_UNLOCKABLE_LOCKS = 2; + struct UnlockableLocks { + THREADING_NAMESPACE::unique_lock *ulock = nullptr; + uint32_t rc = 0; + }; + static thread_local UnlockableLocks unlockable_locks[MAX_UNLOCKABLE_LOCKS]; #endif TaskID _add_task(const Callable &p_callable, void (*p_func)(void *), void *p_userdata, BaseTemplateUserdata *p_template_userdata, bool p_high_priority, const String &p_description); @@ -192,7 +196,7 @@ private: void _wait_collaboratively(ThreadData *p_caller_pool_thread, Task *p_task); #ifdef THREADS_ENABLED - static uint32_t _thread_enter_unlock_allowance_zone(void *p_mutex, bool p_is_binary); + static uint32_t _thread_enter_unlock_allowance_zone(THREADING_NAMESPACE::unique_lock &p_ulock); #endif void _lock_unlockable_mutexes(); @@ -239,13 +243,17 @@ public: static WorkerThreadPool *get_singleton() { return singleton; } static int get_thread_index(); + static TaskID get_caller_task_id(); #ifdef THREADS_ENABLED - static uint32_t thread_enter_unlock_allowance_zone(Mutex *p_mutex); - static uint32_t thread_enter_unlock_allowance_zone(BinaryMutex *p_mutex); + _ALWAYS_INLINE_ static uint32_t thread_enter_unlock_allowance_zone(const MutexLock &p_lock) { return _thread_enter_unlock_allowance_zone(p_lock._get_lock()); } + template + _ALWAYS_INLINE_ static uint32_t thread_enter_unlock_allowance_zone(const SafeBinaryMutex &p_mutex) { return _thread_enter_unlock_allowance_zone(p_mutex._get_lock()); } static void thread_exit_unlock_allowance_zone(uint32_t p_zone_id); #else - static uint32_t thread_enter_unlock_allowance_zone(void *p_mutex) { return UINT32_MAX; } + static uint32_t thread_enter_unlock_allowance_zone(const MutexLock &p_lock) { return UINT32_MAX; } + template + static uint32_t thread_enter_unlock_allowance_zone(const SafeBinaryMutex &p_mutex) { return UINT32_MAX; } static void thread_exit_unlock_allowance_zone(uint32_t p_zone_id) {} #endif diff --git a/core/os/condition_variable.h b/core/os/condition_variable.h index fa1355e98c7..c819fa6b402 100644 --- a/core/os/condition_variable.h +++ b/core/os/condition_variable.h @@ -32,6 +32,7 @@ #define CONDITION_VARIABLE_H #include "core/os/mutex.h" +#include "core/os/safe_binary_mutex.h" #ifdef THREADS_ENABLED @@ -56,7 +57,12 @@ class ConditionVariable { public: template _ALWAYS_INLINE_ void wait(const MutexLock &p_lock) const { - condition.wait(const_cast &>(p_lock.lock)); + condition.wait(const_cast &>(p_lock._get_lock())); + } + + template + _ALWAYS_INLINE_ void wait(const MutexLock> &p_lock) const { + condition.wait(const_cast &>(p_lock.mutex._get_lock())); } _ALWAYS_INLINE_ void notify_one() const { diff --git a/core/os/mutex.h b/core/os/mutex.h index 3e7aa81bc1a..773b31828dc 100644 --- a/core/os/mutex.h +++ b/core/os/mutex.h @@ -72,13 +72,18 @@ public: template class MutexLock { - friend class ConditionVariable; - THREADING_NAMESPACE::unique_lock lock; public: explicit MutexLock(const MutexT &p_mutex) : lock(p_mutex.mutex) {} + + // Clarification: all the funny syntax is needed so this function exists only for binary mutexes. + template + _ALWAYS_INLINE_ THREADING_NAMESPACE::unique_lock &_get_lock( + typename std::enable_if::value> * = nullptr) const { + return const_cast &>(lock); + } }; using Mutex = MutexImpl; // Recursive, for general use diff --git a/core/os/safe_binary_mutex.h b/core/os/safe_binary_mutex.h index 1e98cc074cd..4ca4b50b02c 100644 --- a/core/os/safe_binary_mutex.h +++ b/core/os/safe_binary_mutex.h @@ -47,76 +47,76 @@ // Also, don't forget to declare the thread_local variable on each use. template class SafeBinaryMutex { - friend class MutexLock; + friend class MutexLock>; using StdMutexType = THREADING_NAMESPACE::mutex; mutable THREADING_NAMESPACE::mutex mutex; - static thread_local uint32_t count; + + struct TLSData { + mutable THREADING_NAMESPACE::unique_lock lock; + uint32_t count = 0; + + TLSData(SafeBinaryMutex &p_mutex) : + lock(p_mutex.mutex, THREADING_NAMESPACE::defer_lock) {} + }; + static thread_local TLSData tls_data; public: _ALWAYS_INLINE_ void lock() const { - if (++count == 1) { - mutex.lock(); + if (++tls_data.count == 1) { + tls_data.lock.lock(); } } _ALWAYS_INLINE_ void unlock() const { - DEV_ASSERT(count); - if (--count == 0) { - mutex.unlock(); + DEV_ASSERT(tls_data.count); + if (--tls_data.count == 0) { + tls_data.lock.unlock(); } } - _ALWAYS_INLINE_ bool try_lock() const { - if (count) { - count++; - return true; - } else { - if (mutex.try_lock()) { - count++; - return true; - } else { - return false; - } - } + _ALWAYS_INLINE_ THREADING_NAMESPACE::unique_lock &_get_lock() const { + return const_cast &>(tls_data.lock); } - ~SafeBinaryMutex() { - DEV_ASSERT(!count); + _ALWAYS_INLINE_ SafeBinaryMutex() { + } + + _ALWAYS_INLINE_ ~SafeBinaryMutex() { + DEV_ASSERT(!tls_data.count); } }; -// This specialization is needed so manual locking and MutexLock can be used -// at the same time on a SafeBinaryMutex. template class MutexLock> { friend class ConditionVariable; - THREADING_NAMESPACE::unique_lock lock; + const SafeBinaryMutex &mutex; public: - _ALWAYS_INLINE_ explicit MutexLock(const SafeBinaryMutex &p_mutex) : - lock(p_mutex.mutex) { - SafeBinaryMutex::count++; - }; - _ALWAYS_INLINE_ ~MutexLock() { - SafeBinaryMutex::count--; - }; + explicit MutexLock(const SafeBinaryMutex &p_mutex) : + mutex(p_mutex) { + mutex.lock(); + } + + ~MutexLock() { + mutex.unlock(); + } }; #else // No threads. template -class SafeBinaryMutex : public MutexImpl { - static thread_local uint32_t count; -}; +class SafeBinaryMutex { + struct TLSData { + TLSData(SafeBinaryMutex &p_mutex) {} + }; + static thread_local TLSData tls_data; -template -class MutexLock> { public: - MutexLock(const SafeBinaryMutex &p_mutex) {} - ~MutexLock() {} + void lock() const {} + void unlock() const {} }; #endif // THREADS_ENABLED diff --git a/core/templates/command_queue_mt.cpp b/core/templates/command_queue_mt.cpp index ef75a70868e..5fa767263f9 100644 --- a/core/templates/command_queue_mt.cpp +++ b/core/templates/command_queue_mt.cpp @@ -33,14 +33,6 @@ #include "core/config/project_settings.h" #include "core/os/os.h" -void CommandQueueMT::lock() { - mutex.lock(); -} - -void CommandQueueMT::unlock() { - mutex.unlock(); -} - CommandQueueMT::CommandQueueMT() { command_mem.reserve(DEFAULT_COMMAND_MEM_SIZE_KB * 1024); } diff --git a/core/templates/command_queue_mt.h b/core/templates/command_queue_mt.h index 1e6c6e42a96..8ef5dd30642 100644 --- a/core/templates/command_queue_mt.h +++ b/core/templates/command_queue_mt.h @@ -362,23 +362,24 @@ class CommandQueueMT { return; } - lock(); + MutexLock lock(mutex); - uint32_t allowance_id = WorkerThreadPool::thread_enter_unlock_allowance_zone(&mutex); while (flush_read_ptr < command_mem.size()) { uint64_t size = *(uint64_t *)&command_mem[flush_read_ptr]; flush_read_ptr += 8; CommandBase *cmd = reinterpret_cast(&command_mem[flush_read_ptr]); + uint32_t allowance_id = WorkerThreadPool::thread_enter_unlock_allowance_zone(lock); cmd->call(); + WorkerThreadPool::thread_exit_unlock_allowance_zone(allowance_id); // Handle potential realloc due to the command and unlock allowance. cmd = reinterpret_cast(&command_mem[flush_read_ptr]); if (unlikely(cmd->sync)) { sync_head++; - unlock(); // Give an opportunity to awaiters right away. + lock.~MutexLock(); // Give an opportunity to awaiters right away. sync_cond_var.notify_all(); - lock(); + new (&lock) MutexLock(mutex); // Handle potential realloc happened during unlock. cmd = reinterpret_cast(&command_mem[flush_read_ptr]); } @@ -387,14 +388,11 @@ class CommandQueueMT { flush_read_ptr += size; } - WorkerThreadPool::thread_exit_unlock_allowance_zone(allowance_id); command_mem.clear(); flush_read_ptr = 0; _prevent_sync_wraparound(); - - unlock(); } _FORCE_INLINE_ void _wait_for_sync(MutexLock &p_lock) { @@ -410,9 +408,6 @@ class CommandQueueMT { void _no_op() {} public: - void lock(); - void unlock(); - /* NORMAL PUSH COMMANDS */ DECL_PUSH(0) SPACE_SEP_LIST(DECL_PUSH, 15) @@ -446,9 +441,8 @@ public: } void set_pump_task_id(WorkerThreadPool::TaskID p_task_id) { - lock(); + MutexLock lock(mutex); pump_task_id = p_task_id; - unlock(); } CommandQueueMT(); diff --git a/modules/gdscript/gdscript_cache.cpp b/modules/gdscript/gdscript_cache.cpp index 3b6526ffd91..b3c0744bdf2 100644 --- a/modules/gdscript/gdscript_cache.cpp +++ b/modules/gdscript/gdscript_cache.cpp @@ -144,6 +144,14 @@ GDScriptParserRef::~GDScriptParserRef() { GDScriptCache *GDScriptCache::singleton = nullptr; +SafeBinaryMutex &_get_gdscript_cache_mutex() { + return GDScriptCache::mutex; +} + +template <> +thread_local SafeBinaryMutex::TLSData SafeBinaryMutex::tls_data(_get_gdscript_cache_mutex()); +SafeBinaryMutex GDScriptCache::mutex; + void GDScriptCache::move_script(const String &p_from, const String &p_to) { if (singleton == nullptr || p_from == p_to) { return; @@ -369,7 +377,7 @@ Ref GDScriptCache::get_full_script(const String &p_path, Error &r_erro // Allowing lifting the lock might cause a script to be reloaded multiple times, // which, as a last resort deadlock prevention strategy, is a good tradeoff. - uint32_t allowance_id = WorkerThreadPool::thread_enter_unlock_allowance_zone(&singleton->mutex); + uint32_t allowance_id = WorkerThreadPool::thread_enter_unlock_allowance_zone(singleton->mutex); r_error = script->reload(true); WorkerThreadPool::thread_exit_unlock_allowance_zone(allowance_id); if (r_error) { diff --git a/modules/gdscript/gdscript_cache.h b/modules/gdscript/gdscript_cache.h index f7f2cd90e9e..4903da92b4f 100644 --- a/modules/gdscript/gdscript_cache.h +++ b/modules/gdscript/gdscript_cache.h @@ -34,7 +34,7 @@ #include "gdscript.h" #include "core/object/ref_counted.h" -#include "core/os/mutex.h" +#include "core/os/safe_binary_mutex.h" #include "core/templates/hash_map.h" #include "core/templates/hash_set.h" @@ -95,7 +95,12 @@ class GDScriptCache { bool cleared = false; - Mutex mutex; +public: + static const int BINARY_MUTEX_TAG = 2; + +private: + static SafeBinaryMutex mutex; + friend SafeBinaryMutex &_get_gdscript_cache_mutex(); public: static void move_script(const String &p_from, const String &p_to);