From 47dea96f8650955cdef9f2b29ec8bbf27a157493 Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Mon, 9 Oct 2023 16:02:14 -0700 Subject: [PATCH] Add single thread allocation mode (experimental). --- CMakeLists.txt | 9 +++++++ include/metall/kernel/object_cache.hpp | 36 +++++++++++++++++++------- test/kernel/CMakeLists.txt | 3 +++ 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 16c14325..7b268253 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,6 +101,10 @@ set(INITIAL_SEGMENT_SIZE "0" CACHE STRING "Set the initial segment size (use the internally defined value if 0 is specified)") # ---------- Experimental options ---------- # +# This mode still uses multiple threads inside Metall. +# However, applications must not use metall with multiple threads. +option(SINGLE_THREAD_ALLOC "Optimize Metall kernel for single thread usage" OFF) + option(USE_ANONYMOUS_NEW_MAP "Use the anonymous map when creating a new map region" OFF) set(UMAP_ROOT "" CACHE PATH "UMap installed root directory") @@ -211,6 +215,11 @@ if (USE_ANONYMOUS_NEW_MAP) message(STATUS "Use the anonymous map for new map region") endif () +if (SINGLE_THREAD_ALLOC) + list(APPEND METALL_DEFS "METALL_SINGLE_THREAD_ALLOC") + message(STATUS "Optimize Metall kernel for single thread usage") +endif () + # Requirements for GCC if (NOT RUN_BUILD_AND_TEST_WITH_CI) if (("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") OR ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")) diff --git a/include/metall/kernel/object_cache.hpp b/include/metall/kernel/object_cache.hpp index 8218a088..4c2890a0 100644 --- a/include/metall/kernel/object_cache.hpp +++ b/include/metall/kernel/object_cache.hpp @@ -1,4 +1,4 @@ -// Copyright 2019 Lawrence Livermore National Security, LLC and other Metall +// Copyright 2023 Lawrence Livermore National Security, LLC and other Metall // Project Developers. See the top-level COPYRIGHT file for details. // // SPDX-License-Identifier: (Apache-2.0 OR MIT) @@ -19,10 +19,13 @@ #include #include #include -#define ENABLE_MUTEX_IN_METALL_OBJECT_CACHE 1 -#if ENABLE_MUTEX_IN_METALL_OBJECT_CACHE + +#ifndef METALL_SINGLE_THREAD_ALLOC +#define METALL_ENABLE_MUTEX_IN_OBJECT_CACHE +#ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE #include #endif +#endif namespace metall::kernel { @@ -54,7 +57,12 @@ class object_cache { // Private types and static values // -------------------- // - static constexpr std::size_t k_num_cache_per_core = 4; + static constexpr std::size_t k_num_cache_per_core = +#ifdef METALL_SINGLE_THREAD_ALLOC + 1; +#else + 4; +#endif static constexpr std::size_t k_cache_bin_size = 1ULL << 20ULL; static constexpr std::size_t k_max_cache_block_size = 64; // Add and remove caches by up to this size @@ -69,7 +77,7 @@ class object_cache { difference_type, bin_no_manager>; using cache_table_type = std::vector; -#if ENABLE_MUTEX_IN_METALL_OBJECT_CACHE +#ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE using mutex_type = mdtl::mutex; using lock_guard_type = mdtl::mutex_lock_guard; #endif @@ -85,7 +93,7 @@ class object_cache { // -------------------- // object_cache() : m_cache_table(get_num_cores() * k_num_cache_per_core) -#if ENABLE_MUTEX_IN_METALL_OBJECT_CACHE +#ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE , m_mutex(m_cache_table.size()) #endif @@ -112,7 +120,7 @@ class object_cache { if (bin_no > max_bin_no()) return -1; const auto cache_no = priv_comp_cache_no(); -#if ENABLE_MUTEX_IN_METALL_OBJECT_CACHE +#ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE lock_guard_type guard(m_mutex[cache_no]); #endif if (m_cache_table[cache_no].empty(bin_no)) { @@ -140,7 +148,7 @@ class object_cache { if (bin_no > max_bin_no()) return false; // Error const auto cache_no = priv_comp_cache_no(); -#if ENABLE_MUTEX_IN_METALL_OBJECT_CACHE +#ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE lock_guard_type guard(m_mutex[cache_no]); #endif m_cache_table[cache_no].push(bin_no, object_offset); @@ -224,6 +232,9 @@ class object_cache { } std::size_t priv_comp_cache_no() const { +#ifdef METALL_SINGLE_THREAD_ALLOC + return 0; +#endif #if SUPPORT_GET_CPU_CORE_NO thread_local static const auto sub_cache_no = std::hash{}(std::this_thread::get_id()) % @@ -241,6 +252,9 @@ class object_cache { /// \brief Get CPU core number. /// This function does not call the system call every time as it is slow. static std::size_t priv_get_core_no() { +#ifdef METALL_SINGLE_THREAD_ALLOC + return 0; +#endif thread_local static int cached_core_no = 0; thread_local static int cached_count = 0; if (cached_core_no == 0) { @@ -251,14 +265,18 @@ class object_cache { } static std::size_t get_num_cores() { +#ifdef METALL_SINGLE_THREAD_ALLOC + return 1; +#else return std::thread::hardware_concurrency(); +#endif } // -------------------- // // Private fields // -------------------- // cache_table_type m_cache_table; -#if ENABLE_MUTEX_IN_METALL_OBJECT_CACHE +#ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE std::vector m_mutex; #endif }; diff --git a/test/kernel/CMakeLists.txt b/test/kernel/CMakeLists.txt index 46507536..ee042dd8 100644 --- a/test/kernel/CMakeLists.txt +++ b/test/kernel/CMakeLists.txt @@ -10,6 +10,9 @@ add_metall_test_executable(chunk_directory_test chunk_directory_test.cpp) add_metall_test_executable(manager_test manager_test.cpp) +add_metall_test_executable(manager_test_single_thread manager_test.cpp) +target_compile_definitions(manager_test_single_thread PRIVATE METALL_SINGLE_THREAD_ALLOC) + add_metall_test_executable(snapshot_test snapshot_test.cpp) add_metall_test_executable(copy_file_test copy_file_test.cpp)