diff --git a/src/third_party/wiredtiger/cmake/configs/base.cmake b/src/third_party/wiredtiger/cmake/configs/base.cmake index 7b62ec3377f..fbaf21ccc16 100644 --- a/src/third_party/wiredtiger/cmake/configs/base.cmake +++ b/src/third_party/wiredtiger/cmake/configs/base.cmake @@ -413,34 +413,34 @@ if(ENABLE_DEBUG_INFO AND NOT WT_DEBUG_FLAGS_INITIALIZED) set(BUILD_TYPES_WITH_DEBUG_INFO ${BUILD_MODES}) list(REMOVE_ITEM BUILD_TYPES_WITH_DEBUG_INFO Release) - set(DEBUG_INFO_FLAGS) if(GNU_C_COMPILER OR CLANG_C_COMPILER) # Higher debug levels `-g3`/`-ggdb3` emit additional debug information, including # macro definitions that allow us to evaluate macros such as `p S2C(session)` inside of gdb. - # This needs to be in DWARF version 2 format or later - and should be by default - but - # we'll specify version 4 here to be safe. - list(APPEND DEBUG_INFO_FLAGS -g3 -gdwarf-4) + # DWARF v4 is supplied explicitly to be safe across toolchain defaults. + set(debug_info_flags "-g3 -gdwarf-4") if(CLANG_C_COMPILER) - # Clang requires one additional flag to output macro debug information. - list(APPEND DEBUG_INFO_FLAGS -glldb -fdebug-macro) + string(APPEND debug_info_flags " -glldb -fdebug-macro") else() - list(APPEND DEBUG_INFO_FLAGS -ggdb3) + string(APPEND debug_info_flags " -ggdb3") endif() - - add_cmake_compiler_flags( - FLAGS ${DEBUG_INFO_FLAGS} - LANGUAGES C CXX - BUILD_TYPES ${BUILD_TYPES_WITH_DEBUG_INFO} - ) + foreach(build_type IN LISTS BUILD_TYPES_WITH_DEBUG_INFO) + string(TOUPPER "${build_type}" BT) + set(CMAKE_C_FLAGS_${BT} + "${CMAKE_C_FLAGS_${BT}} ${debug_info_flags}" CACHE STRING "" FORCE) + set(CMAKE_CXX_FLAGS_${BT} + "${CMAKE_CXX_FLAGS_${BT}} ${debug_info_flags}" CACHE STRING "" FORCE) + endforeach() endif() # MSVC: ensure linker produces PDBs. if(MSVC_C_COMPILER) - add_cmake_linker_flags( - FLAGS "/DEBUG" - BINARIES EXE SHARED - BUILD_TYPES ${BUILD_TYPES_WITH_DEBUG_INFO} - ) + foreach(build_type IN LISTS BUILD_TYPES_WITH_DEBUG_INFO) + string(TOUPPER "${build_type}" BT) + set(CMAKE_EXE_LINKER_FLAGS_${BT} + "${CMAKE_EXE_LINKER_FLAGS_${BT}} /DEBUG" CACHE STRING "" FORCE) + set(CMAKE_SHARED_LINKER_FLAGS_${BT} + "${CMAKE_SHARED_LINKER_FLAGS_${BT}} /DEBUG" CACHE STRING "" FORCE) + endforeach() endif() # Mark that we've set the initial debug flags diff --git a/src/third_party/wiredtiger/cmake/configs/modes.cmake b/src/third_party/wiredtiger/cmake/configs/modes.cmake index 4ea618d1a05..8f195b2c1ce 100644 --- a/src/third_party/wiredtiger/cmake/configs/modes.cmake +++ b/src/third_party/wiredtiger/cmake/configs/modes.cmake @@ -80,29 +80,36 @@ function(define_build_mode mode) string(REPLACE ";" " " cxx_flags "${DEFINE_BUILD_CXX_COMPILER_FLAGS}") string(REPLACE ";" " " linker_flags "${linker_flags}") string(TOUPPER ${mode} build_mode) - set(CMAKE_C_FLAGS_${build_mode} - "${c_flags}" CACHE STRING - "Flags used by the C compiler for ${mode} build type or configuration." FORCE) - set(CMAKE_CXX_FLAGS_${build_mode} - "${cxx_flags}" CACHE STRING - "Flags used by the C++ compiler for ${mode} build type or configuration." FORCE) + # Seed the default flags for this build mode exactly once per build dir. + if(NOT WT_BUILD_MODE_${build_mode}_FLAGS_INITIALIZED) + set(CMAKE_C_FLAGS_${build_mode} + "${c_flags}" CACHE STRING + "Flags used by the C compiler for ${mode} build type or configuration." FORCE) - set(CMAKE_EXE_LINKER_FLAGS_${build_mode} - "${linker_flags}" CACHE STRING - "Linker flags to be used to create executables for ${mode} build type." FORCE) + set(CMAKE_CXX_FLAGS_${build_mode} + "${cxx_flags}" CACHE STRING + "Flags used by the C++ compiler for ${mode} build type or configuration." FORCE) - set(CMAKE_SHARED_LINKER_FLAGS_${build_mode} - "${linker_flags}" CACHE STRING - "Linker flags to be used to create shared libraries for ${mode} build type." FORCE) + set(CMAKE_EXE_LINKER_FLAGS_${build_mode} + "${linker_flags}" CACHE STRING + "Linker flags to be used to create executables for ${mode} build type." FORCE) - set(CMAKE_MODULE_LINKER_FLAGS_${build_mode} - "${linker_flags}" CACHE STRING - "Linker flags to be used to create shared modules for ${mode} build type." FORCE) + set(CMAKE_SHARED_LINKER_FLAGS_${build_mode} + "${linker_flags}" CACHE STRING + "Linker flags to be used to create shared libraries for ${mode} build type." FORCE) + + set(CMAKE_MODULE_LINKER_FLAGS_${build_mode} + "${linker_flags}" CACHE STRING + "Linker flags to be used to create shared modules for ${mode} build type." FORCE) + + set(WT_BUILD_MODE_${build_mode}_FLAGS_INITIALIZED TRUE CACHE INTERNAL + "WiredTiger ${mode} build mode flags have been initialized") + endif() mark_as_advanced( - CMAKE_CXX_FLAGS_${build_mode} CMAKE_C_FLAGS_${build_mode} + CMAKE_CXX_FLAGS_${build_mode} CMAKE_EXE_LINKER_FLAGS_${build_mode} CMAKE_SHARED_LINKER_FLAGS_${build_mode} CMAKE_MODULE_LINKER_FLAGS_${build_mode} diff --git a/src/third_party/wiredtiger/cmake/helpers.cmake b/src/third_party/wiredtiger/cmake/helpers.cmake index a1594c4f2c8..1c557a7166a 100644 --- a/src/third_party/wiredtiger/cmake/helpers.cmake +++ b/src/third_party/wiredtiger/cmake/helpers.cmake @@ -441,99 +441,6 @@ function(add_cmake_flag included_flags flag) endif() endfunction() -# add_cmake_compiler_flags(FLAGS LANGUAGES BUILD_TYPES ) -# A helper function that adds one or more compiler flags to specified languages and build types, -# avoiding duplication by using the existing add_cmake_flag function. -# FLAGS - one or more compilation flags to add -# LANGUAGES - one or more languages (C, CXX, etc.) -# BUILD_TYPES - one or more build types (Debug, RelWithDebInfo, Release, etc.) -function(add_cmake_compiler_flags) - cmake_parse_arguments( - PARSE_ARGV - 0 - "COMPILER_FLAGS" - "" - "" - "FLAGS;LANGUAGES;BUILD_TYPES" - ) - - # Validate required arguments - if(NOT COMPILER_FLAGS_FLAGS) - message(FATAL_ERROR "add_cmake_compiler_flags: FLAGS argument is required") - endif() - if(NOT COMPILER_FLAGS_LANGUAGES) - message(FATAL_ERROR "add_cmake_compiler_flags: LANGUAGES argument is required") - endif() - if(NOT COMPILER_FLAGS_BUILD_TYPES) - message(FATAL_ERROR "add_cmake_compiler_flags: BUILD_TYPES argument is required") - endif() - - # Add each flag to each language/build_type combination - foreach(lang ${COMPILER_FLAGS_LANGUAGES}) - foreach(build_type ${COMPILER_FLAGS_BUILD_TYPES}) - # Convert build type to uppercase for CMAKE variable names - string(TOUPPER "${build_type}" build_type_upper) - - # Initialize the flags variable if not already defined - if(NOT DEFINED CMAKE_${lang}_FLAGS_${build_type_upper}) - set(CMAKE_${lang}_FLAGS_${build_type_upper} "") - endif() - - # Add each flag while avoiding duplication - foreach(flag ${COMPILER_FLAGS_FLAGS}) - add_cmake_flag(CMAKE_${lang}_FLAGS_${build_type_upper} "${flag}") - endforeach() - endforeach() - endforeach() -endfunction() - -# add_cmake_linker_flags(FLAGS BINARIES BUILD_TYPES ) -# A helper function that adds one or more linker flags to specified binary types and build types, -# avoiding duplication by using the existing add_cmake_flag function. -# FLAGS - one or more linker flags to add -# BINARIES - one or more binary types (EXE, SHARED, MODULE, etc.) -# BUILD_TYPES - one or more build types (Debug, RelWithDebInfo, Release, etc.) -function(add_cmake_linker_flags) - cmake_parse_arguments( - PARSE_ARGV - 0 - "LINKER_FLAGS" - "" - "" - "FLAGS;BINARIES;BUILD_TYPES" - ) - - # Validate required arguments - if(NOT LINKER_FLAGS_FLAGS) - message(FATAL_ERROR "add_cmake_linker_flags: FLAGS argument is required") - endif() - if(NOT LINKER_FLAGS_BINARIES) - message(FATAL_ERROR "add_cmake_linker_flags: BINARIES argument is required") - endif() - if(NOT LINKER_FLAGS_BUILD_TYPES) - message(FATAL_ERROR "add_cmake_linker_flags: BUILD_TYPES argument is required") - endif() - - # Add each flag to each binary_type/build_type combination - foreach(binary ${LINKER_FLAGS_BINARIES}) - foreach(build_type ${LINKER_FLAGS_BUILD_TYPES}) - # Convert build type to uppercase for CMAKE variable names - string(TOUPPER "${build_type}" build_type_upper) - - # Initialize the flags variable if not already defined - if(NOT DEFINED CMAKE_${binary}_LINKER_FLAGS_${build_type_upper}) - set(CMAKE_${binary}_LINKER_FLAGS_${build_type_upper} "") - endif() - - # Add each flag while avoiding duplication - foreach(flag ${LINKER_FLAGS_FLAGS}) - add_cmake_flag(CMAKE_${binary}_LINKER_FLAGS_${build_type_upper} "${flag}") - endforeach() - endforeach() - endforeach() -endfunction() - - # replace_compile_options(flag_var [REMOVE ] [ADD ]) # A helper function that removes specified compiler flags from a flag variable and optionally adds new ones. # This is useful for replacing default compiler flags with custom ones while maintaining clean flag strings. diff --git a/src/third_party/wiredtiger/cmake/platform/arch/aarch64.cmake b/src/third_party/wiredtiger/cmake/platform/arch/aarch64.cmake index 2263a5602f6..f15706eb2d6 100644 --- a/src/third_party/wiredtiger/cmake/platform/arch/aarch64.cmake +++ b/src/third_party/wiredtiger/cmake/platform/arch/aarch64.cmake @@ -4,11 +4,9 @@ include(cmake/rcpc_test.cmake) # ARMv8-A is the 64-bit ARM architecture, turn on the optional CRC. # If the compilation check in rcpc_test passes also turn on the RCpc instructions. if(HAVE_RCPC) - add_cmake_flag(CMAKE_C_FLAGS -march=armv8.2-a+rcpc+crc) - add_cmake_flag(CMAKE_CXX_FLAGS -march=armv8.2-a+rcpc+crc) + add_compile_options(-march=armv8.2-a+rcpc+crc) else() - add_cmake_flag(CMAKE_C_FLAGS -march=armv8-a+crc) - add_cmake_flag(CMAKE_CXX_FLAGS -march=armv8-a+crc) + add_compile_options(-march=armv8-a+crc) endif() # moutline-atomics preserves backwards compatibility with Arm v8.0 systems but also supports @@ -17,6 +15,6 @@ endif() # the flag. check_c_compiler_flag("-moutline-atomics" has_moutline_atomics) if(has_moutline_atomics) - add_cmake_flag(CMAKE_C_FLAGS -moutline-atomics) + add_compile_options(-moutline-atomics) endif() unset(has_moutline_atomics CACHE) diff --git a/src/third_party/wiredtiger/cmake/platform/arch/riscv64.cmake b/src/third_party/wiredtiger/cmake/platform/arch/riscv64.cmake index 1b3bd6e881e..684f4cd3dd7 100644 --- a/src/third_party/wiredtiger/cmake/platform/arch/riscv64.cmake +++ b/src/third_party/wiredtiger/cmake/platform/arch/riscv64.cmake @@ -1,4 +1,3 @@ # See https://www.sifive.com/blog/all-aboard-part-1-compiler-args # for background on the `rv64imafdc` and `lp64d` arguments here. -add_cmake_flag(CMAKE_C_FLAGS -march=rv64imafdc) -add_cmake_flag(CMAKE_C_FLAGS -mabi=lp64d) +add_compile_options(-march=rv64imafdc -mabi=lp64d) diff --git a/src/third_party/wiredtiger/cmake/platform/os/linux.cmake b/src/third_party/wiredtiger/cmake/platform/os/linux.cmake index 102b9b5cd21..ef6a54002df 100644 --- a/src/third_party/wiredtiger/cmake/platform/os/linux.cmake +++ b/src/third_party/wiredtiger/cmake/platform/os/linux.cmake @@ -1,6 +1,5 @@ set(WT_POSIX ON CACHE BOOL "") # Linux requires '_GNU_SOURCE' to be defined for access to GNU/Linux extension functions -# e.g. Access to 'pthread_setname_np' on Linux. Append this macro to our compiler flags -# for Linux-based builds. -add_cmake_flag(CMAKE_C_FLAGS -D_GNU_SOURCE) +# e.g. 'pthread_setname_np'. +add_compile_definitions(_GNU_SOURCE) diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index c780f67a17d..342f2a15123 100755 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -689,8 +689,7 @@ connection_runtime_config = [ if true, for operations with snapshot isolation the cursor temporarily releases any page that requires force eviction, then repositions back to the page for further operations. A page release encourages eviction of hot or large pages, which is more likely to - succeed without a cursor keeping the page pinned. Note: This setting is not compatible - with disaggregated storage.''', + succeed without a cursor keeping the page pinned.''', type='boolean'), Config('disagg_address_cookie_upgrade', 'none', r''' modify the disaggregated block manager to pretend that it is a newer version to test @@ -2238,7 +2237,13 @@ methods = { ), 'WT_CONNECTION.set_file_system' : Method([]), -'WT_CONNECTION.set_key_provider' : Method([]), +'WT_CONNECTION.set_key_provider' : Method([ + Config('version', '0', r''' + the key provider API version. Version 0 uses the pull model + (WiredTiger calls WT_KEY_PROVIDER::get_key). Version 1 uses + the push model''', + min=0, max=1), +]), 'WT_CONNECTION.load_extension' : Method([ Config('config', '', r''' diff --git a/src/third_party/wiredtiger/dist/s_copyright b/src/third_party/wiredtiger/dist/s_copyright index 963ef2fcc00..b39dc0a99a9 100755 --- a/src/third_party/wiredtiger/dist/s_copyright +++ b/src/third_party/wiredtiger/dist/s_copyright @@ -125,21 +125,23 @@ ENDOFTEXT # Parallel execution: if it's the main invocation of the script, collect the file names # to process and run them in subprocesses. -# Search for files, skipping some well-known 3rd party directories. -find [a-z]* -name '*.[ch]' \ +# Search for files in explicit source directories, skipping any absent in this tree. +dirs=() +for d in bench dist docs examples ext lang oss src test tools; do + [ -d "$d" ] && dirs+=("$d") +done +find "${dirs[@]}" \ + -name '*.[ch]' \ -o -name '*.cpp' \ -o -name '*.in' \ -o -name '*.py' \ -o -name '*.swig' | sed -e '/Makefile.in/d' \ - -e '/^build\//d' \ - -e '/^cmake\//d' \ -e '/checksum\/power8\//d' \ -e '/checksum\/zseries\//d' \ -e '/\/3rdparty\//d' \ -e '/\/node_modules\//d' \ -e '/^tools\/wt-mcp\/\.venv\//d' \ - -e '/^venv\//d' \ -e '/dist\/__/d' \ -e 's/^\.\///' | do_in_parallel || RET=1 diff --git a/src/third_party/wiredtiger/dist/s_mentions b/src/third_party/wiredtiger/dist/s_mentions index f0504459de0..a51f54b7bce 100755 --- a/src/third_party/wiredtiger/dist/s_mentions +++ b/src/third_party/wiredtiger/dist/s_mentions @@ -26,23 +26,12 @@ fi # Get what could be the ticket id. ticket_id=$(echo "$branch_name" | cut -d "-" -f-2) -search_function="grep -Iinr --exclude-dir=.git" - -# Find the name of the build folders WiredTiger has been compiled in. -# Users can name this folder anything, but it needs to be in the rootdir and to contain CMakeFiles -build_files=$(find ../ -maxdepth 2 -name CMakeFiles) -for build_dir in $build_files; do - build_folder=$(basename $(dirname $build_dir)) - search_function="$search_function --exclude-dir=$build_folder" -done - -search_function="$search_function $ticket_id ../ 2>&1" - -# Check for comments related to the ticket. -if eval "$search_function >/dev/null" ; then - echo "There are comments mentioning $ticket_id in the code, please check if they need to be \ -resolved:" - eval "$search_function" +# Check for comments related to the ticket. git grep searches only tracked files, so build +# directories and temporary files are excluded automatically. Note: newly created files that are +# not yet added to git will be missed, but they can be checked once they are tracked. +if git -C .. grep -Iin "$ticket_id" > /dev/null 2>&1; then + echo "There are comments mentioning $ticket_id in the code, please check if they need to be resolved:" + git -C .. grep -Iin "$ticket_id" fi exit 0 diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index a5d5a6bef4c..eca7da70c07 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger", "branch": "mongodb-master", - "commit": "6f3dbbf2ed12faffad4a3e274d012c61e58874f5" + "commit": "9d2e4ce64fa8248ce21daa252e79528da59bc5d5" } diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c index d008ac92559..f381a49321c 100644 --- a/src/third_party/wiredtiger/src/btree/bt_debug.c +++ b/src/third_party/wiredtiger/src/btree/bt_debug.c @@ -1612,10 +1612,6 @@ __debug_update_dump_flags(WT_DBG *ds, WT_UPDATE *upd) ds->f(ds, ", prepare-restored-from-ds")); ++flag_num; } - if (F_ISSET(upd, WT_UPDATE_PREPARE_ROLLBACK)) { - WT_RET(flag_num == 0 ? ds->f(ds, "prepare-rollback") : ds->f(ds, ", prepare-rollback")); - ++flag_num; - } if (F_ISSET(upd, WT_UPDATE_RESTORED_FAST_TRUNCATE)) { WT_RET(flag_num == 0 ? ds->f(ds, "fast-truncate") : ds->f(ds, ", fast-truncate")); ++flag_num; diff --git a/src/third_party/wiredtiger/src/btree/row_modify.c b/src/third_party/wiredtiger/src/btree/row_modify.c index 2959f6c077c..c1283bf73ee 100644 --- a/src/third_party/wiredtiger/src/btree/row_modify.c +++ b/src/third_party/wiredtiger/src/btree/row_modify.c @@ -396,18 +396,6 @@ __wt_update_obsolete_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UP if (__wt_atomic_load_uint64_v_relaxed(&upd->txnid) == WT_TXN_ABORTED) continue; - /* - * Prepare transaction rollback adds a globally visible tombstone to the update chain to - * remove the entire key. Treating these globally visible tombstones as obsolete and - * trimming update list can cause problems if the update chain is getting accessed somewhere - * else. To avoid this problem, skip these globally visible tombstones from the update - * obsolete check. - */ - if (F_ISSET(upd, WT_UPDATE_PREPARE_ROLLBACK)) { - first = NULL; - continue; - } - /* Cannot truncate the updates if we need to remove the updates from the history store. */ if (F_ISSET(upd, WT_UPDATE_HS_MAX_STOP)) { first = NULL; diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index fa527bbebc8..19ae90a1d28 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -901,6 +901,17 @@ static const uint8_t confchk_WT_CONNECTION_rollback_to_stable_jump[WT_CONFIG_JUM 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; +static const WT_CONFIG_CHECK confchk_WT_CONNECTION_set_key_provider[] = { + {"version", "int", NULL, "min=0,max=1", NULL, 0, NULL, WT_CONFIG_COMPILED_TYPE_INT, 70, 0, 1, + NULL}, + {NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, 0, 0, NULL}}; + +static const uint8_t confchk_WT_CONNECTION_set_key_provider_jump[WT_CONFIG_JUMP_TABLE_SIZE] = {0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + static const WT_CONFIG_CHECK confchk_WT_CONNECTION_set_timestamp[] = { {"durable_timestamp", "string", NULL, NULL, NULL, 0, NULL, WT_CONFIG_COMPILED_TYPE_STRING, 3, INT64_MIN, INT64_MAX, NULL}, @@ -4210,7 +4221,8 @@ static const WT_CONFIG_ENTRY config_entries[] = { confchk_WT_CONNECTION_rollback_to_stable, 2, confchk_WT_CONNECTION_rollback_to_stable_jump, 12, WT_CONF_SIZING_NONE, false}, {"WT_CONNECTION.set_file_system", "", NULL, 0, NULL, 13, WT_CONF_SIZING_NONE, false}, - {"WT_CONNECTION.set_key_provider", "", NULL, 0, NULL, 14, WT_CONF_SIZING_NONE, false}, + {"WT_CONNECTION.set_key_provider", "version=0", confchk_WT_CONNECTION_set_key_provider, 1, + confchk_WT_CONNECTION_set_key_provider_jump, 14, WT_CONF_SIZING_NONE, false}, {"WT_CONNECTION.set_timestamp", "durable_timestamp=,force=false,oldest_timestamp=," "stable_disaggregated_schema_epoch=,stable_timestamp=", diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index 3375c51dcb9..3f5624c1a03 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -1448,7 +1448,7 @@ __conn_open_session(WT_CONNECTION *wt_conn, WT_EVENT_HANDLER *event_handler, con session_ret = NULL; WT_ERR(__wt_open_session(conn, event_handler, config, true, &session_ret)); - session_ret->name = "connection-open-session"; + __wt_atomic_store_ptr_relaxed(&session_ret->name, "connection-open-session"); *wt_sessionp = &session_ret->iface; err: @@ -2907,16 +2907,13 @@ err: static int __conn_set_key_provider(WT_CONNECTION *wt_conn, WT_KEY_PROVIDER *key_provider, const char *config) { + WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION_IMPL *session; conn = (WT_CONNECTION_IMPL *)wt_conn; - CONNECTION_API_CALL_NOCONF(conn, session, set_key_provider); - - /* The configuration string has no use but may be useful at a later time. */ - if (config != NULL) - WT_ERR_MSG(session, EINVAL, "key provider configuration currently not supported."); + CONNECTION_API_CALL(conn, session, set_key_provider, config, cfg); /* You can only enable the key provider system in disaggregated mode. */ if (__wt_conn_is_disagg(session)) @@ -2928,6 +2925,10 @@ __conn_set_key_provider(WT_CONNECTION *wt_conn, WT_KEY_PROVIDER *key_provider, c if (conn->key_provider != NULL) WT_ERR_MSG(session, EINVAL, "key provider system must be configured with early_load set"); + WT_ERR(__wt_config_gets(session, cfg, "version", &cval)); + if (cval.val == 1) + F_SET(conn, WT_CONN_KEY_PROVIDER_PUSH); + conn->key_provider = key_provider; err: diff --git a/src/third_party/wiredtiger/src/conn/conn_layered_page_log.c b/src/third_party/wiredtiger/src/conn/conn_layered_page_log.c index 8719457bd7c..d60af6cd0a1 100644 --- a/src/third_party/wiredtiger/src/conn/conn_layered_page_log.c +++ b/src/third_party/wiredtiger/src/conn/conn_layered_page_log.c @@ -435,6 +435,10 @@ __wt_disagg_put_crypt_helper(WT_SESSION_IMPL *session) if (session->ckpt.crash_trigger_point == KEY_PROVIDER_CRASH_BEFORE_KEY_ROTATION) __wt_debug_crash(session); + /* The pull-model get_key API is disabled when the push-model is configured. */ + if (F_ISSET(conn, WT_CONN_KEY_PROVIDER_PUSH)) + return (ENOTSUP); + /* Check for a new encryption key data. If the size is 0, there is none so we can skip. */ WT_ERR(key_provider->get_key(key_provider, (WT_SESSION *)session, &crypt)); if (crypt.keys.size == 0) diff --git a/src/third_party/wiredtiger/src/cursor/cur_layered.c b/src/third_party/wiredtiger/src/cursor/cur_layered.c index 017dde9fa75..2f0b957ef72 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_layered.c +++ b/src/third_party/wiredtiger/src/cursor/cur_layered.c @@ -1155,13 +1155,11 @@ __clayered_iterate_constituents(WT_CURSOR_LAYERED *clayered, uint32_t iter_flag) * prepared conflict occurs. Prepared updates are always ignored on the stable cursor, making it * safe to check the WT_CURSTD_KEY_INT flag. */ - if (((WT_CURSOR_BTREE *)c_ingest)->ref == NULL && !F_ISSET(c_stable, WT_CURSTD_KEY_INT)) { - /* - * Move the stable cursor first to ensure it is advanced, even if a prepared conflict occurs - * on the ingest cursor. - */ - WT_ERR_NOTFOUND_OK(__clayered_constituent_iter_helper(clayered, c_stable, forward), false); + bool fresh_start = + (((WT_CURSOR_BTREE *)c_ingest)->ref == NULL && !F_ISSET(c_stable, WT_CURSTD_KEY_INT)); + if (fresh_start) { WT_ERR_NOTFOUND_OK(__clayered_constituent_iter_helper(clayered, c_ingest, forward), false); + WT_ERR_NOTFOUND_OK(__clayered_constituent_iter_helper(clayered, c_stable, forward), false); goto done; } @@ -1226,7 +1224,13 @@ __clayered_iterate_constituents(WT_CURSOR_LAYERED *clayered, uint32_t iter_flag) done: err: - if (ret == 0 || ret == WT_PREPARE_CONFLICT) { + if (ret == WT_PREPARE_CONFLICT && fresh_start) + /* + * Prepare conflict on the very first key of a fresh walk: ingest is blocked before stable + * has advanced. Reset ingest so the next call restarts cleanly. + */ + WT_TRET(__clayered_reset_cursors(clayered, false)); + else if (ret == 0 || ret == WT_PREPARE_CONFLICT) { if (!F_ISSET(clayered, iter_flag)) { F_CLR(clayered, WT_CLAYERED_ITERATE_NEXT | WT_CLAYERED_ITERATE_PREV); F_SET(clayered, iter_flag); @@ -2961,14 +2965,11 @@ __wt_clayered_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, WT_RET(__wt_config_gets_def(session, cfg, "checkpoint", 0, &cval)); if (cval.len != 0) - WT_RET_MSG(session, ENOTSUP, "Layered trees do not support opening by checkpoint"); + WT_RET_MSG(session, EINVAL, "Layered trees do not support opening by checkpoint"); WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval)); if (cval.val != 0) - WT_RET_MSG(session, ENOTSUP, "Layered trees do not support bulk loading"); - - if (FLD_ISSET(S2C(session)->debug.flags, WT_CONN_DEBUG_CURSOR_REPOSITION)) - WT_RET_MSG(session, ENOTSUP, "Layered trees do not support cursor reposition"); + WT_RET_MSG(session, EINVAL, "Layered trees do not support bulk loading"); /* Get the layered tree, and hold a reference to it until the cursor is closed. */ WT_RET(__wt_session_get_dhandle(session, uri, NULL, cfg, 0)); diff --git a/src/third_party/wiredtiger/src/cursor/cur_version.c b/src/third_party/wiredtiger/src/cursor/cur_version.c index ca558a16f0f..c4e9be201de 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_version.c +++ b/src/third_party/wiredtiger/src/cursor/cur_version.c @@ -186,13 +186,7 @@ static WT_INLINE WT_UPDATE * __curversion_tombstone_next_upd( WT_SESSION_IMPL *session, WT_CURSOR_VERSION *version_cursor, WT_UPDATE *tombstone) { - /* - * show_prepared_rollback currently targets ingest-table style rollback updates (in-memory - * trees), where rollback metadata lives on aborted prepared value updates and no globally - * visible tombstone with PREPARE_ROLLBACK flag is prepended. If this feature is extended to - * non-in-memory trees, we need additional handling for globally visible PREPARE_ROLLBACK - * tombstones and their underlying aborted value updates. - */ + /* Stop at a globally visible tombstone nothing older is relevant. */ if (__wt_txn_upd_visible_all(session, tombstone)) return (NULL); diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index 6cc8630c778..ea6d619dcbb 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -1549,20 +1549,19 @@ struct __wt_update { /* When introducing a new flag, consider adding it to WT_UPDATE_SELECT_FOR_DS. */ /* AUTOMATIC FLAG VALUE GENERATION START 0 */ -#define WT_UPDATE_DELETE_DURABLE 0x0001u /* Key has been removed from disk image. */ -#define WT_UPDATE_DS 0x0002u /* Update has been chosen to the data store. */ -#define WT_UPDATE_DURABLE 0x0004u /* Update has been durable. */ -#define WT_UPDATE_HS 0x0008u /* Update has been written to hs. */ -#define WT_UPDATE_HS_MAX_STOP 0x0010u /* Update has been written to hs with a max stop. */ -#define WT_UPDATE_PREPARE_DURABLE 0x0020u /* Prepared update has been durable. */ -#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x0040u /* Prepared update restored from data store. */ -#define WT_UPDATE_PREPARE_ROLLBACK 0x0080u /* Tombstone that rolled back by a prepared update.*/ -#define WT_UPDATE_RESTORED_FAST_TRUNCATE 0x0100u /* Fast truncate instantiation. */ -#define WT_UPDATE_RESTORED_FROM_DS 0x0200u /* Update restored from data store. */ -#define WT_UPDATE_RESTORED_FROM_HS 0x0400u /* Update restored from history store. */ -#define WT_UPDATE_RESTORED_FROM_INGEST 0x0800u /* Update restored from ingest btree. */ -#define WT_UPDATE_RTS_DRYRUN_ABORT 0x1000u /* Used by dry run to mark a would-be abort. */ - /* AUTOMATIC FLAG VALUE GENERATION STOP 16 */ +#define WT_UPDATE_DELETE_DURABLE 0x001u /* Key has been removed from disk image. */ +#define WT_UPDATE_DS 0x002u /* Update has been chosen to the data store. */ +#define WT_UPDATE_DURABLE 0x004u /* Update has been durable. */ +#define WT_UPDATE_HS 0x008u /* Update has been written to hs. */ +#define WT_UPDATE_HS_MAX_STOP 0x010u /* Update has been written to hs with a max stop. */ +#define WT_UPDATE_PREPARE_DURABLE 0x020u /* Prepared update has been durable. */ +#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x040u /* Prepared update restored from data store. */ +#define WT_UPDATE_RESTORED_FAST_TRUNCATE 0x080u /* Fast truncate instantiation. */ +#define WT_UPDATE_RESTORED_FROM_DS 0x100u /* Update restored from data store. */ +#define WT_UPDATE_RESTORED_FROM_HS 0x200u /* Update restored from history store. */ +#define WT_UPDATE_RESTORED_FROM_INGEST 0x400u /* Update restored from ingest btree. */ +#define WT_UPDATE_RTS_DRYRUN_ABORT 0x800u /* Used by dry run to mark a would-be abort. */ + /* AUTOMATIC FLAG VALUE GENERATION STOP 16 */ uint16_t flags; /* There are several cases we should select the update irrespective of visibility to write to the diff --git a/src/third_party/wiredtiger/src/include/cell_inline.h b/src/third_party/wiredtiger/src/include/cell_inline.h index b5d9f1022b5..0a94248161b 100644 --- a/src/third_party/wiredtiger/src/include/cell_inline.h +++ b/src/third_party/wiredtiger/src/include/cell_inline.h @@ -1083,6 +1083,280 @@ __wt_cell_leaf_value_parse(WT_PAGE *page, WT_CELL *cell) return (WT_ERROR); \ } while (0) +/* + * __cell_unpack_addr_cell -- + * Unpack the validity window and optional fast-truncate record for an addr cell. + */ +static WT_INLINE int +__cell_unpack_addr_cell(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell, + const uint8_t **pp, const void *end, WT_CELL_UNPACK_ADDR *unpack_addr) +{ + WT_PAGE_DELETED *page_del; + WT_TIME_AGGREGATE *ta; + uint8_t flags; + bool has_fast_truncate, prepare_fast_truncate; + + /* Return an error if we're not unpacking a cell of this type. */ + if (unpack_addr == NULL) + return (WT_ERROR); + + ta = &unpack_addr->ta; + has_fast_truncate = unpack_addr->raw == WT_CELL_ADDR_DEL && F_ISSET(dsk, WT_PAGE_FT_UPDATE); + prepare_fast_truncate = false; + + if ((cell->__chunk[0] & WT_CELL_SECOND_DESC) != 0) { + flags = *(*pp)++; /* skip second descriptor byte */ + WT_CELL_LEN_CHK(*pp, 0, dsk, end); + + if (LF_ISSET(WT_CELL_PREPARE)) { + if (has_fast_truncate) + prepare_fast_truncate = true; + else + ta->prepare = 1; + } + if (LF_ISSET(WT_CELL_TS_START)) + WT_RET( + __wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &ta->oldest_start_ts)); + if (LF_ISSET(WT_CELL_TXN_START)) + WT_RET(__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &ta->newest_txn)); + if (LF_ISSET(WT_CELL_TS_DURABLE_START)) { + WT_RET(__wt_vunpack_uint( + pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &ta->newest_start_durable_ts)); + ta->newest_start_durable_ts += ta->oldest_start_ts; + } + if (LF_ISSET(WT_CELL_TS_STOP)) { + WT_RET( + __wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &ta->newest_stop_ts)); + ta->newest_stop_ts += ta->oldest_start_ts; + } + if (LF_ISSET(WT_CELL_TXN_STOP)) { + WT_RET( + __wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &ta->newest_stop_txn)); + ta->newest_stop_txn += ta->newest_txn; + } + if (LF_ISSET(WT_CELL_TS_DURABLE_STOP)) { + WT_RET(__wt_vunpack_uint( + pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &ta->newest_stop_durable_ts)); + ta->newest_stop_durable_ts += ta->newest_stop_ts; + } + WT_RET(__wt_check_addr_validity(session, ta, end != NULL)); + } + + if (!has_fast_truncate) + return (0); + + /* Unpack the fast-truncate page_del record. */ + page_del = &unpack_addr->page_del; + WT_RET( + __wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), (uint64_t *)&page_del->txnid)); + if (prepare_fast_truncate) { + page_del->prepare_state = WT_PREPARE_INPROGRESS; + page_del->committed = false; + WT_RET( + __wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &page_del->prepare_ts)); + page_del->pg_del_start_ts = page_del->prepare_ts; + WT_RET( + __wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &page_del->prepared_id)); + /* Explicitly initialize the durable timestamp to WT_TS_NONE. */ + page_del->pg_del_durable_ts = WT_TS_NONE; + WT_ASSERT_ALWAYS(session, + !F_ISSET(S2C(session), WT_CONN_PRESERVE_PREPARED) || + page_del->prepared_id != WT_PREPARED_ID_NONE, + "Read prepared record with no prepared id when preserve prepared is enabled."); + } else { + page_del->prepare_state = WT_PREPARE_INIT; + page_del->committed = true; + WT_RET(__wt_vunpack_uint( + pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &page_del->pg_del_start_ts)); + WT_RET(__wt_vunpack_uint( + pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &page_del->pg_del_durable_ts)); + } + page_del->selected_for_write = true; + return (0); +} + +/* + * __cell_unpack_value_window -- + * Unpack the validity window for a value cell (called when WT_CELL_SECOND_DESC is set). + */ +static WT_INLINE int +__cell_unpack_value_window( + WT_SESSION_IMPL *session, const uint8_t **pp, const void *end, uint8_t flags, WT_TIME_WINDOW *tw) +{ + wt_timestamp_t temp_start_ts, temp_durable_start_ts, temp_stop_ts, temp_durable_stop_ts; + + temp_start_ts = temp_durable_start_ts = temp_durable_stop_ts = WT_TS_NONE; + temp_stop_ts = WT_TS_MAX; + + if (LF_ISSET(WT_CELL_TS_START)) + WT_RET(__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &temp_start_ts)); + if (LF_ISSET(WT_CELL_TXN_START)) + WT_RET(__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &tw->start_txn)); + if (LF_ISSET(WT_CELL_TS_DURABLE_START)) + WT_RET( + __wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &temp_durable_start_ts)); + + if (LF_ISSET(WT_CELL_TS_STOP)) + WT_RET(__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &temp_stop_ts)); + + if (LF_ISSET(WT_CELL_TXN_STOP)) { + WT_RET(__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &tw->stop_txn)); + tw->stop_txn += tw->start_txn; + } + if (LF_ISSET(WT_CELL_TS_DURABLE_STOP)) + WT_RET( + __wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &temp_durable_stop_ts)); + + /* Load temporary values to the right fields. */ + if (LF_ISSET(WT_CELL_PREPARE)) { + bool preserve_prepared = F_ISSET(S2C(session), WT_CONN_PRESERVE_PREPARED); + /* + * We can compare the txn_id only here, but cannot do it everywhere else because when + * recovering, all transaction ids are reset to WT_TXN_NONE, so we cannot compare the + * transaction ids. + */ + if (tw->start_txn == tw->stop_txn && temp_stop_ts == WT_TS_NONE) { + /* + * This is a special case where both transaction start and stop are in prepared state. + * The prepared record is written with the preserve prepared config enabled. The same + * prepared id is packed to WT_CELL_TS_DURABLE_START. Since temp_stop_ts here stores the + * difference between start_prepared_id and stop_prepared_id, temp_stop_ts must be 0. + */ + if (temp_durable_start_ts != WT_TS_NONE) { + WT_ASSERT(session, temp_durable_stop_ts == WT_TS_NONE); + tw->start_prepare_ts = temp_start_ts; + tw->start_prepared_id = temp_durable_start_ts; + tw->stop_prepare_ts = temp_start_ts; + tw->stop_prepared_id = temp_durable_start_ts; + } else { + WT_ASSERT_ALWAYS(session, !preserve_prepared, + "Read prepared record with no prepared id when preserve prepared is " + "enabled."); + WT_ASSERT(session, temp_durable_start_ts == temp_durable_stop_ts); + tw->start_prepare_ts = tw->stop_prepare_ts = temp_start_ts; + } + } else if (tw->stop_txn != WT_TXN_MAX) { + /* + * This case happens where the transaction start is committed, but the transaction stop + * is prepared. In this case, we store the start timestamp and durable start timestamp + * in WT_CELL_TS_START and WT_CELL_TS_DURABLE_START, prepare ts in WT_CELL_TS_STOP. + */ + tw->start_ts = temp_start_ts; + /* + * The prepared record is written with the preserve prepared config enabled. We store + * the prepared id in WT_CELL_TS_DURABLE_STOP. + */ + if (temp_durable_start_ts != WT_TS_NONE) + tw->durable_start_ts = temp_durable_start_ts + tw->start_ts; + else + tw->durable_start_ts = tw->start_ts; + + WT_ASSERT(session, temp_stop_ts != WT_TS_MAX); + tw->stop_prepare_ts = tw->start_ts + temp_stop_ts; + + if (temp_durable_stop_ts != WT_TS_NONE) + tw->stop_prepared_id = temp_durable_stop_ts; + else + WT_ASSERT_ALWAYS(session, !preserve_prepared, + "Read prepared record with no prepared id when preserve prepared is " + "enabled."); + } else { + WT_ASSERT(session, tw->start_ts == WT_TS_NONE); + /* + * This case happens when only transaction start is prepared, and there is no + * transaction stop. In this case, we store the prepare ts in WT_CELL_TS_START. + */ + tw->start_prepare_ts = temp_start_ts; + /* + * The prepared record is written with the preserve prepared config enabled. We store + * prepared id in WT_CELL_TS_DURABLE_START. + */ + if (temp_durable_start_ts != WT_TS_NONE) + tw->start_prepared_id = temp_durable_start_ts; + else + WT_ASSERT_ALWAYS(session, !preserve_prepared, + "Read prepared record with no prepared id when preserve prepared is " + "enabled."); + } + } else { + if (LF_ISSET(WT_CELL_TS_START)) + tw->start_ts = temp_start_ts; + if (LF_ISSET(WT_CELL_TS_DURABLE_START)) + tw->durable_start_ts = temp_durable_start_ts + tw->start_ts; + else + tw->durable_start_ts = tw->start_ts; + + if (LF_ISSET(WT_CELL_TS_STOP)) + tw->stop_ts = temp_stop_ts + tw->start_ts; + if (LF_ISSET(WT_CELL_TS_DURABLE_STOP)) + tw->durable_stop_ts = temp_durable_stop_ts + tw->stop_ts; + else if (tw->stop_ts != WT_TS_MAX) + tw->durable_stop_ts = tw->stop_ts; + } + + __cell_assert_tw_has_ts_for_garbage_collection_table(session, tw); + + WT_RET(__cell_check_value_validity(session, tw, end != NULL)); + return (0); +} + +/* + * __cell_unpack_data_len -- + * Unpack the data length for a cell (all cases except WT_CELL_VALUE_COPY). + */ +static WT_INLINE int +__cell_unpack_data_len( + WT_CELL *cell, WT_CELL_UNPACK_COMMON *unpack, const uint8_t **pp, const void *end) +{ + uint64_t v; + + switch (unpack->raw) { + case WT_CELL_KEY_OVFL: + case WT_CELL_KEY_OVFL_RM: + case WT_CELL_VALUE_OVFL: + case WT_CELL_VALUE_OVFL_RM: + /* + * Set overflow flag. + */ + F_SET(unpack, WT_CELL_UNPACK_OVERFLOW); + /* FALLTHROUGH */ + + case WT_CELL_ADDR_DEL: + case WT_CELL_ADDR_DEL_VISIBLE_ALL: + case WT_CELL_ADDR_INT: + case WT_CELL_ADDR_LEAF: + case WT_CELL_ADDR_LEAF_NO: + case WT_CELL_KEY: + case WT_CELL_KEY_PFX: + case WT_CELL_VALUE: + /* + * The cell is followed by a 4B data length and a chunk of data. + */ + WT_RET(__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &v)); + + /* + * If the size was what prevented us from using a short cell, it's larger than the + * adjustment size. Decrement/increment it when packing/unpacking so it takes up less room. + */ + if (unpack->raw == WT_CELL_KEY || unpack->raw == WT_CELL_KEY_PFX || + (unpack->raw == WT_CELL_VALUE && unpack->v == 0 && + (cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0)) + v += WT_CELL_SIZE_ADJUST; + + unpack->data = *pp; + unpack->size = (uint32_t)v; + unpack->__len = WT_PTRDIFF32(*pp, cell) + unpack->size; + break; + + case WT_CELL_DEL: + unpack->__len = WT_PTRDIFF32(*pp, cell); + break; + default: + return (WT_ERROR); /* Unknown cell type. */ + } + return (0); +} + /* * __wt_cell_unpack_safe -- * Unpack a WT_CELL into a structure, with optional boundary checks. @@ -1097,15 +1371,13 @@ __wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CE WT_TIME_WINDOW tw; } copy; WT_CELL_UNPACK_COMMON *unpack; - WT_PAGE_DELETED *page_del; - WT_TIME_AGGREGATE *ta; WT_TIME_WINDOW *tw; uint64_t v; const uint8_t *p; uint8_t flags; - bool copy_cell, has_fast_truncate, prepare_fast_truncate; + bool copy_cell; - copy_cell = has_fast_truncate = prepare_fast_truncate = false; + copy_cell = false; copy.len = 0; /* [-Wconditional-uninitialized] */ copy.v = 0; /* [-Wconditional-uninitialized] */ @@ -1113,13 +1385,11 @@ __wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CE unpack = (WT_CELL_UNPACK_COMMON *)unpack_value; tw = &unpack_value->tw; WT_TIME_WINDOW_INIT(tw); - ta = NULL; } else { WT_ASSERT(session, unpack_value == NULL); unpack = (WT_CELL_UNPACK_COMMON *)unpack_addr; - ta = &unpack_addr->ta; - WT_TIME_AGGREGATE_INIT(ta); + WT_TIME_AGGREGATE_INIT(&unpack_addr->ta); tw = NULL; } @@ -1189,60 +1459,7 @@ copy_cell_restart: case WT_CELL_ADDR_INT: case WT_CELL_ADDR_LEAF: case WT_CELL_ADDR_LEAF_NO: - /* Return an error if we're not unpacking a cell of this type. */ - if (unpack_addr == NULL) - return (WT_ERROR); - - /* - * A committed fast-truncate cell may be written without WT_CELL_SECOND_DESC when its time - * aggregate is globally visible. Compute this flag before the SECOND_DESC early-exit so the - * page_del block is always unpacked for fast-truncate addr-del cells. - */ - has_fast_truncate = unpack->raw == WT_CELL_ADDR_DEL && F_ISSET(dsk, WT_PAGE_FT_UPDATE); - - if ((cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0) - break; - flags = *p++; /* skip second descriptor byte */ - WT_CELL_LEN_CHK(p, 0, dsk, end); - - if (LF_ISSET(WT_CELL_PREPARE)) { - /* - * For a prepared fast-truncate, the prepare state is recorded in the time aggregate. We - * cannot have a prepared fast-truncate and a prepared time aggregate at the same time. - * Otherwise, it would be a write conflict. - */ - if (has_fast_truncate) - prepare_fast_truncate = true; - else - ta->prepare = 1; - } - if (LF_ISSET(WT_CELL_TS_START)) - WT_RET( - __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->oldest_start_ts)); - if (LF_ISSET(WT_CELL_TXN_START)) - WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_txn)); - if (LF_ISSET(WT_CELL_TS_DURABLE_START)) { - WT_RET(__wt_vunpack_uint( - &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_start_durable_ts)); - ta->newest_start_durable_ts += ta->oldest_start_ts; - } - - if (LF_ISSET(WT_CELL_TS_STOP)) { - WT_RET( - __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_stop_ts)); - ta->newest_stop_ts += ta->oldest_start_ts; - } - if (LF_ISSET(WT_CELL_TXN_STOP)) { - WT_RET( - __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_stop_txn)); - ta->newest_stop_txn += ta->newest_txn; - } - if (LF_ISSET(WT_CELL_TS_DURABLE_STOP)) { - WT_RET(__wt_vunpack_uint( - &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_stop_durable_ts)); - ta->newest_stop_durable_ts += ta->newest_stop_ts; - } - WT_RET(__wt_check_addr_validity(session, ta, end != NULL)); + WT_RET(__cell_unpack_addr_cell(session, dsk, cell, &p, end, unpack_addr)); break; case WT_CELL_DEL: case WT_CELL_VALUE: @@ -1257,158 +1474,10 @@ copy_cell_restart: break; flags = *p++; /* skip second descriptor byte */ WT_CELL_LEN_CHK(p, 0, dsk, end); - wt_timestamp_t temp_start_ts, temp_durable_start_ts, temp_stop_ts, temp_durable_stop_ts; - temp_start_ts = temp_durable_start_ts = temp_durable_stop_ts = WT_TS_NONE; - temp_stop_ts = WT_TS_MAX; - - if (LF_ISSET(WT_CELL_TS_START)) - WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &temp_start_ts)); - if (LF_ISSET(WT_CELL_TXN_START)) - WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->start_txn)); - if (LF_ISSET(WT_CELL_TS_DURABLE_START)) - WT_RET( - __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &temp_durable_start_ts)); - - if (LF_ISSET(WT_CELL_TS_STOP)) - WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &temp_stop_ts)); - - if (LF_ISSET(WT_CELL_TXN_STOP)) { - WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->stop_txn)); - tw->stop_txn += tw->start_txn; - } - if (LF_ISSET(WT_CELL_TS_DURABLE_STOP)) - WT_RET( - __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &temp_durable_stop_ts)); - - /* Load temporary values to the right fields. */ - if (LF_ISSET(WT_CELL_PREPARE)) { - bool preserve_prepared = F_ISSET(S2C(session), WT_CONN_PRESERVE_PREPARED); - /* - * We can compare the txn_id only here, but cannot do it everywhere else because when - * recovering, all transaction ids are reset to WT_TXN_NONE, so we cannot compare the - * transaction ids. - */ - if (tw->start_txn == tw->stop_txn && temp_stop_ts == WT_TS_NONE) { - /* - * This is a special case where both transaction start and stop are in prepared - * state. The prepared record is written with the preserve prepared config enabled. - * The same prepared id is packed to WT_CELL_TS_DURABLE_START. Since temp_stop_ts - * here stores the difference between start_prepared_id and stop_prepared_id, - * temp_stop_ts must be 0. - */ - if (temp_durable_start_ts != WT_TS_NONE) { - WT_ASSERT(session, temp_durable_stop_ts == WT_TS_NONE); - tw->start_prepare_ts = temp_start_ts; - tw->start_prepared_id = temp_durable_start_ts; - tw->stop_prepare_ts = temp_start_ts; - tw->stop_prepared_id = temp_durable_start_ts; - } else { - WT_ASSERT_ALWAYS(session, !preserve_prepared, - "Read prepared record with no prepared id when preserve prepared is " - "enabled."); - WT_ASSERT(session, temp_durable_start_ts == temp_durable_stop_ts); - tw->start_prepare_ts = tw->stop_prepare_ts = temp_start_ts; - } - } else if (tw->stop_txn != WT_TXN_MAX) { - /* - * This case happens where the transaction start is committed, but the transaction - * stop is prepared. In this case, we store the start timestamp and durable start - * timestamp in WT_CELL_TS_START and WT_CELL_TS_DURABLE_START, prepare ts in - * WT_CELL_TS_STOP. - */ - tw->start_ts = temp_start_ts; - /* - * The prepared record is written with the preserve prepared config enabled. We - * store the prepared id in WT_CELL_TS_DURABLE_STOP. - */ - if (temp_durable_start_ts != WT_TS_NONE) - tw->durable_start_ts = temp_durable_start_ts + tw->start_ts; - else - tw->durable_start_ts = tw->start_ts; - - WT_ASSERT(session, temp_stop_ts != WT_TS_MAX); - tw->stop_prepare_ts = tw->start_ts + temp_stop_ts; - - if (temp_durable_stop_ts != WT_TS_NONE) - tw->stop_prepared_id = temp_durable_stop_ts; - else - WT_ASSERT_ALWAYS(session, !preserve_prepared, - "Read prepared record with no prepared id when preserve prepared is " - "enabled."); - } else { - WT_ASSERT(session, tw->start_ts == WT_TS_NONE); - /* - * This case happens when only transaction start is prepared, and there is no - * transaction stop. In this case, we store the prepare ts in WT_CELL_TS_START. - */ - tw->start_prepare_ts = temp_start_ts; - /* - * The prepared record is written with the preserve prepared config enabled. We - * store prepared id in WT_CELL_TS_DURABLE_START. - */ - if (temp_durable_start_ts != WT_TS_NONE) - tw->start_prepared_id = temp_durable_start_ts; - else - WT_ASSERT_ALWAYS(session, !preserve_prepared, - "Read prepared record with no prepared id when preserve prepared is " - "enabled."); - } - } else { - if (LF_ISSET(WT_CELL_TS_START)) - tw->start_ts = temp_start_ts; - if (LF_ISSET(WT_CELL_TS_DURABLE_START)) - tw->durable_start_ts = temp_durable_start_ts + tw->start_ts; - else - tw->durable_start_ts = tw->start_ts; - - if (LF_ISSET(WT_CELL_TS_STOP)) - tw->stop_ts = temp_stop_ts + tw->start_ts; - if (LF_ISSET(WT_CELL_TS_DURABLE_STOP)) - tw->durable_stop_ts = temp_durable_stop_ts + tw->stop_ts; - else if (tw->stop_ts != WT_TS_MAX) - tw->durable_stop_ts = tw->stop_ts; - } - - __cell_assert_tw_has_ts_for_garbage_collection_table(session, tw); - - WT_RET(__cell_check_value_validity(session, tw, end != NULL)); + WT_RET(__cell_unpack_value_window(session, &p, end, flags, tw)); break; } - /* Unpack any fast-truncate information. */ - if (has_fast_truncate) { - page_del = &unpack_addr->page_del; - WT_RET(__wt_vunpack_uint( - &p, end == NULL ? 0 : WT_PTRDIFF(end, p), (uint64_t *)&page_del->txnid)); - if (prepare_fast_truncate) { - page_del->prepare_state = WT_PREPARE_INPROGRESS; - page_del->committed = false; - /* - * For prepared fast-truncates, the prepared state is shared with the time aggregate but - * the prepare timestamp and the prepared id are stored in the page_del block. - */ - WT_RET( - __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &page_del->prepare_ts)); - page_del->pg_del_start_ts = page_del->prepare_ts; - WT_RET( - __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &page_del->prepared_id)); - /* Explicitly initialize the durable timestamp to WT_TS_NONE. */ - page_del->pg_del_durable_ts = WT_TS_NONE; - WT_ASSERT_ALWAYS(session, - !F_ISSET(S2C(session), WT_CONN_PRESERVE_PREPARED) || - page_del->prepared_id != WT_PREPARED_ID_NONE, - "Read prepared record with no prepared id when preserve prepared is enabled."); - } else { - page_del->prepare_state = WT_PREPARE_INIT; - page_del->committed = true; - WT_RET(__wt_vunpack_uint( - &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &page_del->pg_del_start_ts)); - WT_RET(__wt_vunpack_uint( - &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &page_del->pg_del_durable_ts)); - } - page_del->selected_for_write = true; - } - /* * Check for an RLE count or record number that optionally follows the cell descriptor byte on * column-store variable-length pages. @@ -1441,48 +1510,9 @@ copy_cell_restart: cell = (WT_CELL *)((uint8_t *)cell - v); goto copy_cell_restart; - case WT_CELL_KEY_OVFL: - case WT_CELL_KEY_OVFL_RM: - case WT_CELL_VALUE_OVFL: - case WT_CELL_VALUE_OVFL_RM: - /* - * Set overflow flag. - */ - F_SET(unpack, WT_CELL_UNPACK_OVERFLOW); - /* FALLTHROUGH */ - - case WT_CELL_ADDR_DEL: - case WT_CELL_ADDR_DEL_VISIBLE_ALL: - case WT_CELL_ADDR_INT: - case WT_CELL_ADDR_LEAF: - case WT_CELL_ADDR_LEAF_NO: - case WT_CELL_KEY: - case WT_CELL_KEY_PFX: - case WT_CELL_VALUE: - /* - * The cell is followed by a 4B data length and a chunk of data. - */ - WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &v)); - - /* - * If the size was what prevented us from using a short cell, it's larger than the - * adjustment size. Decrement/increment it when packing/unpacking so it takes up less room. - */ - if (unpack->raw == WT_CELL_KEY || unpack->raw == WT_CELL_KEY_PFX || - (unpack->raw == WT_CELL_VALUE && unpack->v == 0 && - (cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0)) - v += WT_CELL_SIZE_ADJUST; - - unpack->data = p; - unpack->size = (uint32_t)v; - unpack->__len = WT_PTRDIFF32(p, cell) + unpack->size; - break; - - case WT_CELL_DEL: - unpack->__len = WT_PTRDIFF32(p, cell); - break; default: - return (WT_ERROR); /* Unknown cell type. */ + WT_RET(__cell_unpack_data_len(cell, unpack, &p, end)); + break; } done: diff --git a/src/third_party/wiredtiger/src/include/conf.h b/src/third_party/wiredtiger/src/include/conf.h index 1ff6dcd60cf..e3a1a469d65 100644 --- a/src/third_party/wiredtiger/src/include/conf.h +++ b/src/third_party/wiredtiger/src/include/conf.h @@ -157,6 +157,7 @@ WT_CONF_API_DECLARE(WT_CONNECTION, open_session, 3, 9); WT_CONF_API_DECLARE(WT_CONNECTION, query_timestamp, 1, 1); WT_CONF_API_DECLARE(WT_CONNECTION, reconfigure, 21, 132); WT_CONF_API_DECLARE(WT_CONNECTION, rollback_to_stable, 1, 2); +WT_CONF_API_DECLARE(WT_CONNECTION, set_key_provider, 1, 1); WT_CONF_API_DECLARE(WT_CONNECTION, set_timestamp, 1, 5); WT_CONF_API_DECLARE(WT_CURSOR, bound, 1, 3); WT_CONF_API_DECLARE(WT_CURSOR, reconfigure, 1, 3); diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index 2da34bd4105..157b35728f1 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -1198,15 +1198,16 @@ struct __wt_connection_impl { #define WT_CONN_CKPT_CLEANUP_RECLAIM_SPACE 0x0008u #define WT_CONN_CKPT_SYNC 0x0010u #define WT_CONN_IN_MEMORY 0x0020u -#define WT_CONN_LIVE_RESTORE_FS 0x0040u -#define WT_CONN_PRECISE_CHECKPOINT 0x0080u -#define WT_CONN_PRESERVE_PREPARED 0x0100u -#define WT_CONN_READONLY 0x0200u -#define WT_CONN_RECOVERING 0x0400u -#define WT_CONN_RECOVERING_METADATA 0x0800u -#define WT_CONN_RECOVERY_COMPLETE 0x1000u -#define WT_CONN_SALVAGE 0x2000u -#define WT_CONN_WAS_BACKUP 0x4000u +#define WT_CONN_KEY_PROVIDER_PUSH 0x0040u +#define WT_CONN_LIVE_RESTORE_FS 0x0080u +#define WT_CONN_PRECISE_CHECKPOINT 0x0100u +#define WT_CONN_PRESERVE_PREPARED 0x0200u +#define WT_CONN_READONLY 0x0400u +#define WT_CONN_RECOVERING 0x0800u +#define WT_CONN_RECOVERING_METADATA 0x1000u +#define WT_CONN_RECOVERY_COMPLETE 0x2000u +#define WT_CONN_SALVAGE 0x4000u +#define WT_CONN_WAS_BACKUP 0x8000u /* AUTOMATIC FLAG VALUE GENERATION STOP 32 */ wt_shared uint32_t flags; diff --git a/src/third_party/wiredtiger/src/include/session_inline.h b/src/third_party/wiredtiger/src/include/session_inline.h index 8061b6dd8d9..42379d9bc91 100644 --- a/src/third_party/wiredtiger/src/include/session_inline.h +++ b/src/third_party/wiredtiger/src/include/session_inline.h @@ -32,14 +32,15 @@ __wt_single_thread_check_start(WT_SESSION_IMPL *s) if (!WT_SESSION_IS_DEFAULT(s) && s->thread_check.owning_thread != current_tid) { ret = __wt_spin_trylock(s, &s->thread_check.lock); + const char *session_name = __wt_atomic_load_ptr_relaxed(&s->name); WT_ASSERT_ALWAYS(s, ret == 0, "Session %" PRIu32 " is accessed concurrently by multiple threads: " "current thread %" PRIuMAX ", owning thread %" PRIuMAX " (active op: %s, last op: %s, api depth: %u, dhandle: %s)", - s->id, current_tid, s->thread_check.owning_thread, s->name != NULL ? s->name : "none", - s->lastop != NULL ? s->lastop : "none", s->api_call_counter, - s->dhandle != NULL ? s->dhandle->name : "none"); + s->id, current_tid, s->thread_check.owning_thread, + session_name != NULL ? session_name : "none", s->lastop != NULL ? s->lastop : "none", + s->api_call_counter, s->dhandle != NULL ? s->dhandle->name : "none"); s->thread_check.owning_thread = current_tid; } diff --git a/src/third_party/wiredtiger/src/include/txn_inline.h b/src/third_party/wiredtiger/src/include/txn_inline.h index f56e40cada5..f0f067b9c2b 100644 --- a/src/third_party/wiredtiger/src/include/txn_inline.h +++ b/src/third_party/wiredtiger/src/include/txn_inline.h @@ -1514,7 +1514,7 @@ __wt_txn_read_upd_list_internal(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, *restored_updp = NULL; __wt_upd_value_clear(cbt->upd_value); - for (; upd != NULL; upd = upd->next) { + for (; upd != NULL; upd = __wt_atomic_load_ptr_relaxed(&upd->next)) { /* Skip reserved place-holders, they're never visible. */ if (upd->type == WT_UPDATE_RESERVE) continue; diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.h.in b/src/third_party/wiredtiger/src/include/wiredtiger.h.in index 695d0c5a4ee..1a5aafdc679 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.h.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.h.in @@ -2258,12 +2258,11 @@ struct __wt_connection { * isolation the cursor temporarily releases any page that requires force eviction\, then * repositions back to the page for further operations. A page release encourages eviction of * hot or large pages\, which is more likely to succeed without a cursor keeping the page - * pinned. Note: This setting is not compatible with disaggregated storage., a boolean flag; - * default \c false.} - * @config{    eviction, if true\, modify internal - * algorithms to change skew to force history store eviction to happen more aggressively. This - * includes but is not limited to not skewing newest\, not favoring leaf pages\, and modifying - * the eviction score mechanism., a boolean flag; default \c false.} + * pinned., a boolean flag; default \c false.} + * @config{    eviction, if + * true\, modify internal algorithms to change skew to force history store eviction to happen + * more aggressively. This includes but is not limited to not skewing newest\, not favoring + * leaf pages\, and modifying the eviction score mechanism., a boolean flag; default \c false.} * @config{    eviction_checkpoint_ts_ordering, if true\, act as if eviction * is being run in parallel to checkpoint. We should return EBUSY in eviction if we detect any * timestamp ordering issue., a boolean flag; default \c false.} @@ -3013,7 +3012,11 @@ struct __wt_connection { * * @param connection the connection handle * @param km the key provider structure - * @configempty{WT_CONNECTION.set_key_provider, see dist/api_data.py} + * @configstart{WT_CONNECTION.set_key_provider, see dist/api_data.py} + * @config{version, the key provider API version. Version 0 uses the pull model (WiredTiger + * calls WT_KEY_PROVIDER::get_key). Version 1 uses the push model., an integer between \c 0 and + * \c 1; default \c 0.} + * @configend * @errors */ int __F(set_key_provider)( @@ -3191,27 +3194,26 @@ struct __wt_connection { * cursor_reposition, if true\, for operations with snapshot isolation the cursor temporarily * releases any page that requires force eviction\, then repositions back to the page for further * operations. A page release encourages eviction of hot or large pages\, which is more likely to - * succeed without a cursor keeping the page pinned. Note: This setting is not compatible with - * disaggregated storage., a boolean flag; default \c false.} + * succeed without a cursor keeping the page pinned., a boolean flag; default \c false.} + * @config{    eviction, if true\, modify internal algorithms to change skew to + * force history store eviction to happen more aggressively. This includes but is not limited to + * not skewing newest\, not favoring leaf pages\, and modifying the eviction score mechanism., a + * boolean flag; default \c false.} + * @config{    eviction_checkpoint_ts_ordering, + * if true\, act as if eviction is being run in parallel to checkpoint. We should return EBUSY in + * eviction if we detect any timestamp ordering issue., a boolean flag; default \c false.} + * @config{    log_retention, adjust log removal to retain at least this number + * of log files. (Warning: this option can remove log files required for recovery if no checkpoints + * have yet been done and the number of log files exceeds the configured value. As WiredTiger + * cannot detect the difference between a system that has not yet checkpointed and one that will + * never checkpoint\, it might discard log files before any checkpoint is done.) Ignored if set to + * 0., an integer between \c 0 and \c 1024; default \c 0.} * @config{     - * eviction, if true\, modify internal algorithms to change skew to force history store eviction to - * happen more aggressively. This includes but is not limited to not skewing newest\, not favoring - * leaf pages\, and modifying the eviction score mechanism., a boolean flag; default \c false.} - * @config{    eviction_checkpoint_ts_ordering, if true\, act as if eviction is - * being run in parallel to checkpoint. We should return EBUSY in eviction if we detect any - * timestamp ordering issue., a boolean flag; default \c false.} - * @config{     - * log_retention, adjust log removal to retain at least this number of log files. (Warning: this - * option can remove log files required for recovery if no checkpoints have yet been done and the - * number of log files exceeds the configured value. As WiredTiger cannot detect the difference - * between a system that has not yet checkpointed and one that will never checkpoint\, it might - * discard log files before any checkpoint is done.) Ignored if set to 0., an integer between \c 0 - * and \c 1024; default \c 0.} - * @config{    realloc_exact, if true\, reallocation - * of memory will only provide the exact amount requested. This will help with spotting memory - * allocation issues more easily., a boolean flag; default \c false.} - * @config{    realloc_malloc, if true\, every realloc call will force a new - * memory allocation by using malloc., a boolean flag; default \c false.} + * realloc_exact, if true\, reallocation of memory will only provide the exact amount requested. + * This will help with spotting memory allocation issues more easily., a boolean flag; default \c + * false.} + * @config{    realloc_malloc, if true\, every realloc call will force a + * new memory allocation by using malloc., a boolean flag; default \c false.} * @config{    rollback_error, return a WT_ROLLBACK error from a transaction * operation about every Nth operation to simulate a collision., an integer between \c 0 and \c 10M; * default \c 0.} diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c index ae4123f8512..fbc1463cdcb 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c @@ -349,18 +349,6 @@ __rec_save_delete_hs_upd_and_free_obs_updates(WT_SESSION_IMPL *session, WTI_RECO break; } - /* - * Prepare transaction rollback adds a globally visible tombstone to the update chain to - * remove the entire key. Treating these globally visible tombstones as obsolete and - * trimming update list can cause problems if the update chain is getting accessed somewhere - * else. To avoid this problem, skip these globally visible tombstones from the update - * obsolete check. - */ - if (F_ISSET(delete_upd, WT_UPDATE_PREPARE_ROLLBACK)) { - visible_all_upd = NULL; - break; - } - /* Track the first self-contained value that is globally visible. */ if (F_ISSET(r, WT_REC_CHECKPOINT) && visible_all_upd == NULL && delete_upd->next != NULL && WT_UPDATE_DATA_VALUE(delete_upd) && __wt_txn_upd_visible_all(session, delete_upd)) @@ -737,14 +725,13 @@ __rec_upd_select(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_CELL_UNPACK_KV * bool *has_newer_updatesp, bool *write_prepare, size_t *upd_memsizep) { WT_CONNECTION_IMPL *conn; - WT_UPDATE *upd, *prepare_rollback_tombstone; + WT_UPDATE *upd; wt_timestamp_t max_ts; uint64_t max_txn, session_txnid, txnid; uint8_t prepare_state; bool is_hs_page; conn = S2C(session); - prepare_rollback_tombstone = NULL; max_ts = WT_TS_NONE; max_txn = WT_TXN_NONE; is_hs_page = F_ISSET(session->dhandle, WT_DHANDLE_HS); @@ -773,15 +760,8 @@ __rec_upd_select(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_CELL_UNPACK_KV * upd->prepare_state == WT_PREPARE_INPROGRESS); /* Ignore the prepared update if the rollback timestamp is stable. */ if (upd->upd_rollback_ts != WT_TS_NONE && - upd->upd_rollback_ts <= r->rec_start_pinned_stable_ts) { - /* - * If we have seen a tombstone that rolled back the prepared update, delete the key - * from the disk. - */ - if (prepare_rollback_tombstone != NULL) - break; + upd->upd_rollback_ts <= r->rec_start_pinned_stable_ts) continue; - } txnid = upd->upd_saved_txnid; } @@ -807,7 +787,6 @@ __rec_upd_select(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_CELL_UNPACK_KV * session_txnid != WT_TXN_NONE && txnid == session_txnid) { *upd_memsizep += WT_UPDATE_MEMSIZE(upd); *has_newer_updatesp = true; - WT_ASSERT(session, prepare_rollback_tombstone == NULL); WT_ASSERT(session, !upd_select->skip_aborted_prepared_value); continue; } @@ -841,16 +820,6 @@ __rec_upd_select(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_CELL_UNPACK_KV * *upd_memsizep += WT_UPDATE_MEMSIZE(upd); *has_newer_updatesp = true; - /* - * If we have already seen a globally visible tombstone from prepared rollback, the - * update we are now skipping is the aborted prepared update that the tombstone rolled - * back, and its rollback is not yet stable (otherwise we would have broken out of the - * loop above). The rollback decision is not durable, so the rollback tombstone is not - * safe to write to disk. Drop it from consideration so the fallback after the loop does - * not select it for write; we will revisit this key in a later reconcile once the - * rollback becomes stable. - */ - prepare_rollback_tombstone = NULL; /* * Same reason as the aborted-prepared skip earlier: this rolled-back prepared value has * no in-chain fallback, so the on-disk cell must not be dropped on this reconciliation. @@ -884,8 +853,6 @@ __rec_upd_select(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_CELL_UNPACK_KV * WT_ASSERT(session, !is_hs_page); *upd_memsizep += WT_UPDATE_MEMSIZE(upd); *has_newer_updatesp = true; - /* We should write nothing to disk. */ - prepare_rollback_tombstone = NULL; /* * Same reason as the aborted-prepared skip earlier: this rolled-back prepared @@ -925,8 +892,7 @@ __rec_upd_select(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_CELL_UNPACK_KV * * commit/rollback. But it is enough to help us catch some issues. */ WT_ASSERT_ALWAYS(session, - !F_ISSET(r, WT_REC_EVICT) || prepare_rollback_tombstone != NULL || - upd->next != NULL || + !F_ISSET(r, WT_REC_EVICT) || upd->next != NULL || (WT_REC_HAS_ON_DISK(vpack) && !WT_TIME_WINDOW_HAS_PREPARE(&vpack->tw)), "leaked prepared update."); } else @@ -999,46 +965,12 @@ __rec_upd_select(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_CELL_UNPACK_KV * } } - if (F_ISSET(conn, WT_CONN_PRESERVE_PREPARED) && F_ISSET(upd, WT_UPDATE_PREPARE_ROLLBACK) && - !F_ISSET(upd, WT_UPDATE_SELECT_FOR_DS)) - prepare_rollback_tombstone = upd; /* * Always select the newest visible update if precise checkpoint is not enabled. Otherwise, * select the first update that is smaller or equal to the pinned timestamp. */ - else if (upd_select->upd == NULL) { + if (upd_select->upd == NULL) upd_select->upd = upd; - if (prepare_rollback_tombstone != NULL) { - /* - * Not checking upd->txnid == WT_TXN_ABORTED here because when doing prepared - * rollback, we first insert the rollback tombstone then mark the prepare aborted, - * so this assert can fire if we race with prepared rollback. - */ - WT_ASSERT(session, - *write_prepare && - (prepare_state == WT_PREPARE_INPROGRESS || prepare_state == WT_PREPARE_LOCKED)); -#ifdef HAVE_DIAGNOSTIC - /* - * Walk from the rollback tombstone to the current prepared update; the only updates - * permitted in between are reserve updates. Any other update would mean an unknown - * entry slipped in front of the prepared update we are about to select. - */ - WT_UPDATE *scan; - for (scan = prepare_rollback_tombstone->next; scan != NULL && scan != upd; - scan = scan->next) - WT_ASSERT( - session, scan->type == WT_UPDATE_RESERVE && scan->txnid == WT_TXN_ABORTED); - WT_ASSERT(session, scan == upd); -#endif - /* We skipped the prepare rollback tombstone. */ - WT_ASSERT(session, *has_newer_updatesp); - /* - * If we have seen a tombstone that rolled back the prepared update, this must be - * the prepared update. No need to walk further. - */ - prepare_rollback_tombstone = NULL; - } - } /* Track the selected update transaction id and timestamp. */ if (max_txn < txnid) @@ -1052,13 +984,6 @@ __rec_upd_select(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_CELL_UNPACK_KV * max_ts = upd->upd_start_ts; } - /* - * If we see a globally visible tombstone that deletes a key because of prepared rollback, - * keep walking to see if we should write the prepared update instead. - */ - if (prepare_rollback_tombstone != NULL) - continue; - /* * We only need to walk the whole update chain if we are evicting metadata as it is written * with read uncommitted isolation and we may see a committed update followed by uncommitted @@ -1068,10 +993,6 @@ __rec_upd_select(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_CELL_UNPACK_KV * break; } - /* The prepare rollback is stable. Delete the key by selecting the rollback tombstone. */ - if (upd_select->upd == NULL && prepare_rollback_tombstone != NULL) - upd_select->upd = prepare_rollback_tombstone; - /* * Track the most recent transaction in the page. We store this in the tree at the end of * reconciliation in the service of checkpoints, it is used to avoid discarding trees from diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c index ac9914a6335..94ae2efde6f 100644 --- a/src/third_party/wiredtiger/src/session/session_api.c +++ b/src/third_party/wiredtiger/src/session/session_api.c @@ -2631,7 +2631,7 @@ __open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const session_ret->iface = F_ISSET(conn, WT_CONN_READONLY) ? stds_readonly : stds; session_ret->iface.connection = &conn->iface; - session_ret->name = NULL; + __wt_atomic_store_ptr_relaxed(&session_ret->name, NULL); session_ret->id = i; #ifdef HAVE_UNITTEST_ASSERTS diff --git a/src/third_party/wiredtiger/src/session/session_helper.c b/src/third_party/wiredtiger/src/session/session_helper.c index eedb7dc10e1..6eae2910195 100644 --- a/src/third_party/wiredtiger/src/session/session_helper.c +++ b/src/third_party/wiredtiger/src/session/session_helper.c @@ -78,13 +78,15 @@ __wt_session_dump(WT_SESSION_IMPL *session, WT_SESSION_IMPL *dump_session, bool WT_CURSOR *cursor; WT_DECL_ITEM(buf); WT_DECL_RET; + const char *session_name; WT_ERR(__wt_scr_alloc(session, 0, &buf)); WT_ERR(__wt_msg( session, "Session: ID: %" PRIu32 " @: 0x%p", dump_session->id, (void *)dump_session)); - WT_ERR( - __wt_msg(session, " Name: %s", dump_session->name == NULL ? "EMPTY" : dump_session->name)); + + session_name = __wt_atomic_load_ptr_relaxed(&dump_session->name); + WT_ERR(__wt_msg(session, " Name: %s", session_name == NULL ? "EMPTY" : session_name)); WT_ERR(__wt_msg(session, " Last operation: %s", dump_session->lastop == NULL ? "NONE" : dump_session->lastop)); WT_ERR(__wt_msg(session, " Current dhandle: %s", diff --git a/src/third_party/wiredtiger/src/support/hazard.c b/src/third_party/wiredtiger/src/support/hazard.c index 715afcce9c2..12f273f9414 100644 --- a/src/third_party/wiredtiger/src/support/hazard.c +++ b/src/third_party/wiredtiger/src/support/hazard.c @@ -415,6 +415,8 @@ __wt_hazard_check_assert(WT_SESSION_IMPL *session, void *ref, bool waitfor) break; __wt_sleep(0, 10 * WT_THOUSAND); } + + const char *session_name = __wt_atomic_load_ptr_relaxed(&s->name); #ifdef HAVE_DIAGNOSTIC /* * In diagnostic mode we also track the file and line where the hazard pointer is set. If this @@ -422,10 +424,11 @@ __wt_hazard_check_assert(WT_SESSION_IMPL *session, void *ref, bool waitfor) */ __wt_errx(session, "hazard pointer reference to discarded object: (%p: session %p name %s: %s, line %d)", - (void *)hp->ref, (void *)s, s->name == NULL ? "UNKNOWN" : s->name, hp->func, hp->line); + (void *)hp->ref, (void *)s, session_name == NULL ? "UNKNOWN" : session_name, hp->func, + hp->line); #else __wt_errx(session, "hazard pointer reference to discarded object: (%p: session %p name %s)", - (void *)hp->ref, (void *)s, s->name == NULL ? "UNKNOWN" : s->name); + (void *)hp->ref, (void *)s, session_name == NULL ? "UNKNOWN" : session_name); #endif return (false); } diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index 72957fc8347..4e83c2ba35d 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -958,7 +958,7 @@ __txn_prepare_rollback_restore_hs_update( } /* Append the update to the end of the chain. */ - WT_RELEASE_WRITE_WITH_BARRIER(upd_chain->next, upd); + __wt_atomic_store_ptr_relaxed(&upd_chain->next, upd); __wt_cache_page_inmem_incr(session, page, total_size, false); @@ -1034,30 +1034,35 @@ __txn_search_prepared_op(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_ITEM *key /* * __txn_prepare_rollback_delete_key -- - * Prepend a global visible tombstone to the head of the update chain to delete the key for - * prepare rollback. + * Append a globally visible tombstone to the tail of the update chain to delete the key for + * prepare rollback. Placing the tombstone below the prepared update encodes its role by + * position so reconciliation and pruning see a normal globally visible tombstone without + * needing a distinguishing flag. */ static int -__txn_prepare_rollback_delete_key(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_CURSOR_BTREE *cbt) +__txn_prepare_rollback_delete_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd_chain) { - WT_DECL_RET; WT_UPDATE *tombstone; - size_t not_used; + size_t size; - tombstone = NULL; + WT_ASSERT(session, upd_chain != NULL); - WT_ERR(__wt_upd_alloc_tombstone(session, &tombstone, ¬_used)); - F_SET(tombstone, WT_UPDATE_PREPARE_ROLLBACK); - WT_WITH_BTREE(session, btree, - ret = btree->type == BTREE_ROW ? - __wt_row_modify(cbt, &cbt->iface.key, NULL, &tombstone, WT_UPDATE_INVALID, false, false) : - __wt_col_modify(cbt, cbt->recno, NULL, &tombstone, WT_UPDATE_INVALID, false, false)); - WT_ERR(ret); - tombstone = NULL; + size = 0; + WT_RET(__wt_upd_alloc_tombstone(session, &tombstone, &size)); -err: - __wt_free(session, tombstone); - return (ret); + /* + * Walk to the end of the chain. The caller guarantees that the chain at this point consists + * only of updates from the resolving prepared transaction (and aborted reserve entries), so the + * tail's next pointer is stable. + */ + while (upd_chain->next != NULL) + upd_chain = upd_chain->next; + + __wt_atomic_store_ptr_relaxed(&upd_chain->next, tombstone); + + __wt_cache_page_inmem_incr(session, page, size, false); + + return (0); } /* @@ -1218,28 +1223,24 @@ __wt_txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_BTREE *btree, * If the prepared update is a single tombstone, we don't need to do anything special and we can * directly resolve it in memory. * - * If the prepared update is not a tombstone or we have multiple prepared updates in the same - * transaction. There are four base cases: + * Otherwise there are three resolve cases: * - * 1) Prepared updates are on the update chain. - * commit: simply resolve the updates on chain. - * rollback: simply resolve the updates on chain. + * 1) Prepared updates are on the update chain (RESOLVE_UPDATE_CHAIN). + * commit: resolve the updates on chain. + * rollback: if the prepared update is the only update and there is no on-disk value, + * append a globally visible tombstone to delete the key. * - * 2) Prepared updates are written to the data store. - * If there is no older updates written to the history store: - * commit: simply resolve the prepared updates in memory. - * rollback: delete the whole key. - * - * If there are older updates written to the history store: + * 2) Prepared updates are written to the data store (RESOLVE_PREPARE_ON_DISK). + * If there are older updates in the history store: * commit: restore the newest history store update with a max stop time point to the - * update chain. Reconciliation should know when to delete it from the history - * store. - * rollback:restore the newest update in the history store to the update chain. - * Reconciliation should know when to delete it from the history store. + * update chain. + * rollback: restore the newest history store update to the update chain. + * If there are no older updates in the history store: + * commit: resolve the prepared updates in memory. + * rollback: append a globally visible tombstone to delete the key. * - * 4) We are running an in-memory database: - * commit: resolve the prepared updates in memory. - * rollback: if the prepared update is written to the disk image, delete the whole key. + * 3) We are running an in-memory database (RESOLVE_IN_MEMORY). + * commit/rollback: resolve the prepared updates in memory only. */ /* @@ -1264,7 +1265,7 @@ __wt_txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_BTREE *btree, if (!commit && first_committed_upd == NULL) { tw_found = __wt_read_cell_time_window(cbt, &tw); if (!tw_found) - WT_ERR(__txn_prepare_rollback_delete_key(session, btree, cbt)); + WT_ERR(__txn_prepare_rollback_delete_key(session, page, head_upd)); else WT_ASSERT_ALWAYS( session, !WT_TIME_WINDOW_HAS_PREPARE(&tw), "no committed update to fallback to."); @@ -1303,12 +1304,12 @@ __wt_txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_BTREE *btree, else { ret = 0; /* - * Allocate a tombstone and prepend it to the row so when we reconcile the update chain - * we don't copy the prepared cell, which is now associated with a rolled back prepare, - * and instead write nothing. + * Append a globally visible tombstone to the end of the chain. When reconciliation + * later drops the rolled-back prepared cell, the tombstone remains as the correct + * post-rollback state for the key. */ if (!commit) - WT_ERR(__txn_prepare_rollback_delete_key(session, btree, cbt)); + WT_ERR(__txn_prepare_rollback_delete_key(session, page, head_upd)); } break; case RESOLVE_IN_MEMORY: @@ -2764,13 +2765,15 @@ __wt_verbose_dump_txn_one( buf_len = 512; WT_RET(__wt_scr_alloc(session, buf_len, &buf)); + + const char *session_name = __wt_atomic_load_ptr_relaxed(&txn_session->name); WT_ERR(__wt_snprintf((char *)buf->data, buf_len, "session ID: %" PRIu32 ", txn ID: %" PRIu64 ", pinned ID: %" PRIu64 ", metadata pinned ID: %" PRIu64 ", name: %s", txn_session->id, __wt_atomic_load_uint64_v_relaxed(&txn_shared->id), __wt_atomic_load_uint64_v_relaxed(&txn_shared->pinned_id), __wt_atomic_load_uint64_v_relaxed(&txn_shared->metadata_pinned), - txn_session->name == NULL ? "EMPTY" : txn_session->name)); + session_name == NULL ? "EMPTY" : session_name)); if (error_code != 0) WT_ERR_MSG(session, error_code, "%s, %s", (char *)buf->data, diff --git a/src/third_party/wiredtiger/test/catch2/ext/test_key_provider.cpp b/src/third_party/wiredtiger/test/catch2/ext/test_key_provider.cpp index bc19125288d..c46af83cc28 100644 --- a/src/third_party/wiredtiger/test/catch2/ext/test_key_provider.cpp +++ b/src/third_party/wiredtiger/test/catch2/ext/test_key_provider.cpp @@ -326,6 +326,26 @@ TEST_CASE_METHOD(kp_fixture, "Persist key, failure", "[key_provider]") free(const_cast(crypt.keys.data)); } +TEST_CASE_METHOD(kp_fixture, "set_key_provider version selects push mode", "[key_provider]") +{ + WT_CONNECTION *wt_conn = conn.get_wt_connection(); + WT_CONNECTION_IMPL *conn_impl = conn.get_wt_connection_impl(); + WT_KEY_PROVIDER stub = {}; + + /* version=0 (default): push flag stays clear. */ + REQUIRE(wt_conn->set_key_provider(wt_conn, &stub, "version=0") == 0); + REQUIRE(!F_ISSET(conn_impl, WT_CONN_KEY_PROVIDER_PUSH)); + conn_impl->key_provider = nullptr; /* Allow reconfiguration. */ + + /* version=1: push flag is set. */ + REQUIRE(wt_conn->set_key_provider(wt_conn, &stub, "version=1") == 0); + REQUIRE(F_ISSET(conn_impl, WT_CONN_KEY_PROVIDER_PUSH)); + + /* Cleanup so the fixture destructor doesn't see a stale provider. */ + conn_impl->key_provider = nullptr; + F_CLR(conn_impl, WT_CONN_KEY_PROVIDER_PUSH); +} + TEST_CASE_METHOD(kp_fixture, "Key always expires", "[key_provider]") { kp_ptr_t kp = kp_init("key_expires=0"); diff --git a/src/third_party/wiredtiger/test/format/format_config.c b/src/third_party/wiredtiger/test/format/format_config.c index fd7f3928958..0e6905afdac 100644 --- a/src/third_party/wiredtiger/test/format/format_config.c +++ b/src/third_party/wiredtiger/test/format/format_config.c @@ -1489,10 +1489,6 @@ config_disagg_storage(void) config_off(NULL, "ops.compaction"); config_off(NULL, "background_compact"); - /* Cursor reposition is not supported for disaggregated storage. */ - config_off(NULL, "debug.cursor_reposition"); - config_off(NULL, "stress.evict_reposition"); - /* Tiered storage is not supported with disagg */ config_single(NULL, "tiered_storage.storage_source=off", true); } diff --git a/src/third_party/wiredtiger/test/suite/helper_layered_fast_truncate.py b/src/third_party/wiredtiger/test/suite/helper_layered_fast_truncate.py new file mode 100644 index 00000000000..fbc075b2621 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/helper_layered_fast_truncate.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 +# +# Public Domain 2014-present MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +# helper_layered_fast_truncate.py +# Shared helpers for the layered fast truncate Python tests. + +from contextlib import closing +from itertools import chain +from typing import Iterable + +import wiredtiger + + +def concat(*iterables): + """Concatenate any number of iterables into a single list.""" + return list(chain.from_iterable(iterables)) + + +def range_inclusive(start, stop): + """Return a range covering [start, stop] inclusive.""" + return range(start, stop + 1) + + +class LayeredFastTruncateConfigMixin: + """Shared helpers for the layered fast truncate test suite.""" + + def key(self, n): + """ + Convert an int into a key; override in subclasses that use a different + key format. + """ + return n + + def session_create_config(self): + """ + Return the session.create() config string, and, for layered URIs, the + disaggregated storage options. + """ + cfg = 'key_format=i,value_format=S' + uri = getattr(self, 'uri', '') + if uri.startswith('table'): + cfg += ',block_manager=disagg,type=layered' + return cfg + + def auto_closing_cursor(self, config=None): + """Return a cursor that auto-closes as it goes out of scope.""" + return closing(self.session.open_cursor(self.uri, None, config)) + + def populate(self, keys, value='v'): + """Insert each key with a placeholder value in a single transaction.""" + with self.auto_closing_cursor() as cursor: + with self.transaction(): + for key in keys: + cursor[self.key(key)] = value + + def setup_leader(self, keys=None, extra_cfg=''): + """ + Create the table on the leader and optionally populate stable. The + follower picks up these keys via the initial checkpoint. + """ + self.session.create(self.uri, self.session_create_config() + extra_cfg) + if keys is not None: + self.populate(keys) + self.session.checkpoint() + + def setup_follower(self, keys=None): + """Switch to follower role and optionally write keys to ingest.""" + self.reopen_disagg_conn('disaggregated=(role="follower"),') + if keys is not None: + self.populate(keys) + + def truncate(self, start_key=None, stop_key=None, commit_timestamp=None): + """ + Truncate [start_key, stop_key] inclusive on self.uri. Either bound + may be None for an open-ended side. If commit_timestamp is set, + the truncate transaction commits at that timestamp. + """ + start = stop = None + try: + if start_key is not None: + start = self.session.open_cursor(self.uri) + start.set_key(self.key(start_key)) + if stop_key is not None: + stop = self.session.open_cursor(self.uri) + stop.set_key(self.key(stop_key)) + # session.truncate() needs a URI iff both cursors are NULL. + uri = self.uri if (start is None and stop is None) else None + with self.transaction(commit_timestamp=commit_timestamp): + self.session.truncate(uri, start, stop, None) + finally: + if start is not None: + start.close() + if stop is not None: + stop.close() + + def visible_keys(self, forward=True): + """Return all keys visible via a scan (forward or backward).""" + result = [] + with self.auto_closing_cursor() as cursor: + step = cursor.next if forward else cursor.prev + with self.transaction(rollback=True): + while step() == 0: + result.append(cursor.get_key()) + return result + + def key_exists(self, key): + """Return True if key is visible to a search in its own transaction.""" + with self.auto_closing_cursor() as cursor: + with self.transaction(rollback=True): + cursor.set_key(self.key(key)) + return cursor.search() == 0 + + def search_near_key(self, key): + """ + Run search_near. Returns (exact, found_key). exact follows WT + convention: 0 = exact, 1 = positioned above, -1 = positioned + below, or WT_NOTFOUND if no visible keys exist (in which case + found_key is None). + """ + with self.auto_closing_cursor() as cursor: + with self.transaction(rollback=True): + cursor.set_key(self.key(key)) + exact = cursor.search_near() + if exact == wiredtiger.WT_NOTFOUND: + return exact, None + return exact, cursor.get_key() + + def leader_checkpoint(self, ts=None): + """Set timestamps and checkpoint on the leader.""" + if ts is not None: + self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(ts) + + ',oldest_timestamp=' + self.timestamp_str(1)) + self.session.checkpoint() + + def step_up(self): + """Promote self.conn_follow to leader; the original leader steps down.""" + self.ignoreStdoutPattern('Picking up the same checkpoint') + self.disagg_switch_follower_and_leader(self.conn_follow) + + def open_follower(self, table_config='key_format=i,value_format=S'): + """ + Open a separate follower connection (distinct from setup_follower + which reopens the existing connection). Returns (conn, session). + """ + conn = self.wiredtiger_open( + 'follower', + self.extensionsConfig() + + ',create,cache_size=50MB,statistics=(all),disaggregated=(role="follower")') + session = conn.open_session('') + session.create(self.uri, table_config) + self.disagg_advance_checkpoint(conn, self.conn) + return conn, session + + def search_at(self, session, key, ts): + """Search for key under a read_timestamp; return (ret, value).""" + cur = session.open_cursor(self.uri) + try: + with self.transaction(session=session, read_timestamp=ts, rollback=True): + cur.set_key(key) + ret = cur.search() + val = cur.get_value() if ret == 0 else None + return ret, val + finally: + cur.close() + + def evict_range(self, session, start, stop, step=1): + """Evict the page(s) backing keys [start, stop] on the given session.""" + evict_cur = session.open_cursor(self.uri, None, 'debug=(release_evict)') + try: + with self.transaction(session=session, read_timestamp=10, rollback=True): + for i in range(start, stop + 1, step): + evict_cur.set_key(i) + evict_cur.search() + evict_cur.reset() + finally: + evict_cur.close() + + def get_stat(self, conn, stat_key): + """Read a connection statistic on the given connection.""" + s = conn.open_session('') + val = s.open_cursor('statistics:')[stat_key][2] + s.close() + return val diff --git a/src/third_party/wiredtiger/test/suite/hook_disagg.fail b/src/third_party/wiredtiger/test/suite/hook_disagg.fail index 549071fa3c8..274dba796c3 100644 --- a/src/third_party/wiredtiger/test/suite/hook_disagg.fail +++ b/src/third_party/wiredtiger/test/suite/hook_disagg.fail @@ -4,6 +4,8 @@ test_autoclose.py test_config02.py test_config09.py +test_cursor13.py # FIXME: WT-15369 +test_cursor21.py # FIXME: WT-15369 test_drop03.py test_dump.py test_dump01.py diff --git a/src/third_party/wiredtiger/test/suite/test_cursor13.py b/src/third_party/wiredtiger/test/suite/test_cursor13.py index fc21e07c598..b8a94f40226 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor13.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor13.py @@ -56,15 +56,7 @@ class test_cursor13_base(wttest.WiredTigerTestCase): def caching_stats(self): hs_stats_uri = 'statistics:file:WiredTigerHS.wt' max_tries = 100 - # Cursor cache/reopen stats are updated with plain (non-atomic) int64 add/subtract - # operations. - # A recent increment by another core may not yet be visible to this reader. Re-reading - # in a tight Python loop cannot force coherence; the fix is to pause briefly on retry - # so store buffers drain and cache lines propagate. - retry_sleep = 0.005 # seconds for i in range(max_tries): - if i > 0: - time.sleep(retry_sleep) hs_stats_before = self.session.open_cursor(hs_stats_uri, None, None) conn_stats = self.session.open_cursor('statistics:', None, None) hs_stats_after = self.session.open_cursor(hs_stats_uri, None, None) @@ -86,14 +78,7 @@ class test_cursor13_base(wttest.WiredTigerTestCase): hs_after[0] += hs_disagg_stat_after[stat.dsrc.cursor_cache][2] hs_after[1] += hs_disagg_stat_after[stat.dsrc.cursor_reopen][2] - report = [totals[0], - hs_before[0], - hs_disagg_stat_before[stat.dsrc.cursor_cache][2], - hs_stats_before[stat.dsrc.cursor_cache][2]] - self.pr(' '.join(map(str, report))) - - hs_disagg_stat_before.close() - hs_disagg_stat_after.close() + self.pr(str(totals[0]) + " " + str(hs_before[0]) + " " + str(hs_disagg_stat_before[stat.dsrc.cursor_cache][2]) + " " + str(hs_stats_before[stat.dsrc.cursor_cache][2])) hs_stats_before.close() hs_stats_after.close() @@ -526,7 +511,6 @@ class test_cursor13_big(test_cursor13_big_base): self.assertEqual(end_stats[0] - begin_stats[0], self.closecount) self.assertEqual(end_stats[1] - begin_stats[1], self.opencount) -@wttest.skip_for_hook("disagg", "layered dhandles are never swept: FIXME-WT-16982") class test_cursor13_sweep(test_cursor13_big_base): # Set dhandle sweep configuration so that dhandles should be closed within # two seconds of all the cursors for the dhandle being closed (cached). diff --git a/src/third_party/wiredtiger/test/suite/test_cursor21.py b/src/third_party/wiredtiger/test/suite/test_cursor21.py index cfeb2f1de00..5467cef920e 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor21.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor21.py @@ -31,7 +31,7 @@ import wttest from wtscenario import make_scenarios -from wiredtiger import stat, WiredTigerError +from wiredtiger import stat class test_cursor21(wttest.WiredTigerTestCase): uri = "table:test_cursor21" @@ -71,7 +71,6 @@ class test_cursor21(wttest.WiredTigerTestCase): self.assertEqual(reposition_count, 0) return reposition_count - @wttest.skip_for_hook("disagg", "layered tables don't support cursor reposition") def test_cursor21(self): format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) reposition_count = 0 @@ -126,15 +125,3 @@ class test_cursor21(wttest.WiredTigerTestCase): reposition_count += self.check_reposition(reposition_count) cursor.close() self.session.close() - - @wttest.only_for_hook("disagg", "check reposition is disabled for disaggregated storage") - def test_cursor21_dsc(self): - # Skip the test if reposition is disabled or it's column store (unsupported in disagg). - if not self.reposition or self.scenario_name == 'column.reposition': - return - - format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) - self.session.create(self.uri, format) - msg = '/Operation not supported/' - self.assertRaisesWithMessage(WiredTigerError, - lambda: self.session.open_cursor(self.uri), msg) diff --git a/src/third_party/wiredtiger/test/suite/test_cursor24.py b/src/third_party/wiredtiger/test/suite/test_cursor24.py index 670bc8ce73a..221bafaea1d 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor24.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor24.py @@ -34,7 +34,6 @@ import wiredtiger from wtscenario import make_scenarios WT_TS_MAX = 2**64 - 1 -WT_UPDATE_PREPARE_ROLLBACK = 0x080 class test_cursor24(wttest.WiredTigerTestCase): uri = 'file:test_cursor24.wt' diff --git a/src/third_party/wiredtiger/test/suite/test_cursor25.py b/src/third_party/wiredtiger/test/suite/test_cursor25.py index add1ce3b91e..c0606d86dd0 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor25.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor25.py @@ -181,7 +181,7 @@ class test_cursor25(wttest.WiredTigerTestCase): cursor[1] = 10 self.session.commit_transaction("commit_timestamp=" + self.timestamp_str(1)) - # Prepared overwrite + rollback. No PREPARE_ROLLBACK tombstone because + # Prepared overwrite + rollback. No rollback tombstone appended because # first_committed_upd != NULL. session2 = self.conn.open_session() cursor2 = session2.open_cursor(self.uri, None) @@ -229,7 +229,7 @@ class test_cursor25(wttest.WiredTigerTestCase): cursor[1] = 10 self.session.commit_transaction("commit_timestamp=" + self.timestamp_str(1)) - # Prepared delete + rollback. No PREPARE_ROLLBACK tombstone because + # Prepared delete + rollback. No rollback tombstone appended because # first_committed_upd != NULL. session2 = self.conn.open_session() cursor2 = session2.open_cursor(self.uri, None) diff --git a/src/third_party/wiredtiger/test/suite/test_layered69.py b/src/third_party/wiredtiger/test/suite/test_layered69.py index e390eb9ea98..b2f3aba9146 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered69.py +++ b/src/third_party/wiredtiger/test/suite/test_layered69.py @@ -199,10 +199,13 @@ class test_layered69(test_prepare_preserve_prepare_base): session_prepare.rollback_transaction(f'rollback_timestamp={self.timestamp_str(45)}') session_prepare.close() - # Verify checkpoint skips writing a page to disk + # Verify checkpoint skips writing a page to disk. When the page was evicted before the + # prepare, the prior committed delete tombstone is gone from memory, so the prepare + # rollback appends a fresh tail tombstone with no durable flag set; that tombstone gets + # re-saved and causes one extra write here. self.checkpoint_and_verify_stats({ wiredtiger.stat.dsrc.rec_time_window_prepared: False, - stat: False, + stat: self.evict, }, self.uri) # Make stable timestamp equal to prepare timestamp - this should allow checkpoint to reconcile prepared update diff --git a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate01.py b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate01.py index de3466280a1..96c32ac5a09 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate01.py +++ b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate01.py @@ -28,12 +28,13 @@ import unittest, wttest, wiredtiger from helper_disagg import disagg_test_class, gen_disagg_storages +from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin from wtscenario import make_scenarios # test_layered_fast_truncate01.py # Test basic fast truncate functionality. @disagg_test_class -class test_layered_fast_truncate01(wttest.WiredTigerTestCase): +class test_layered_fast_truncate01(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase): conn_config = 'disaggregated=(role="leader"),' @@ -48,6 +49,9 @@ class test_layered_fast_truncate01(wttest.WiredTigerTestCase): nitems = 1000 + def key(self, n): + return str(n) + def session_create_config(self): cfg = 'key_format=S,value_format=S' if self.uri.startswith('table'): diff --git a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate02.py b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate02.py index e8927285f2b..01bb26f4b48 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate02.py +++ b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate02.py @@ -32,10 +32,11 @@ import wiredtiger, wttest from helper_disagg import disagg_test_class, gen_disagg_storages +from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin from wtscenario import make_scenarios @disagg_test_class -class test_layered_fast_truncate02(wttest.WiredTigerTestCase): +class test_layered_fast_truncate02(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase): uri = 'layered:test_layered_fast_truncate02' nrows = 5000 @@ -48,11 +49,6 @@ class test_layered_fast_truncate02(wttest.WiredTigerTestCase): disagg_storages = gen_disagg_storages('test_layered_fast_truncate02', disagg_only=True) scenarios = make_scenarios(disagg_storages) - def leader_checkpoint(self, ts): - self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(ts) + - ',oldest_timestamp=' + self.timestamp_str(1)) - self.session.checkpoint() - def setup_leader(self): self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(1)) self.session.create(self.uri, 'key_format=i,value_format=S') @@ -74,44 +70,12 @@ class test_layered_fast_truncate02(wttest.WiredTigerTestCase): evict_cur.close() self.session.rollback_transaction() - def truncate_and_checkpoint(self, trunc_start, trunc_stop, ts): - # Fast-truncate rows [trunc_start, trunc_stop] on the leader and checkpoint. - c_start = self.session.open_cursor(self.uri) - c_start.set_key(trunc_start) - c_stop = self.session.open_cursor(self.uri) - c_stop.set_key(trunc_stop) - self.session.begin_transaction() - self.session.truncate(None, c_start, c_stop, None) - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(ts)) - c_start.close() - c_stop.close() - self.leader_checkpoint(ts) - - def open_follower(self): - conn = self.wiredtiger_open( - 'follower', - self.extensionsConfig() + ',create,cache_size=50MB,statistics=(all),disaggregated=(role="follower")') - sess = conn.open_session('') - sess.create(self.uri, 'key_format=i,value_format=S') - self.disagg_advance_checkpoint(conn, self.conn) - return conn, sess - - def search_at(self, sess, key, ts): - cur = sess.open_cursor(self.uri) - txn_cfg = ('read_timestamp=' + self.timestamp_str(ts)) - sess.begin_transaction(txn_cfg) - cur.set_key(key) - ret = cur.search() - val = cur.get_value() if ret == 0 else None - sess.rollback_transaction() - cur.close() - return ret, val - def test_visibility(self): # At ts=20 (equal to truncation at ts=20): truncated keys return WT_NOTFOUND, boundary and # exterior keys return their values. At ts=15 (before truncation): all keys are visible. self.setup_leader() - self.truncate_and_checkpoint(self.trunc_start, self.trunc_stop, 20) + self.truncate(self.trunc_start, self.trunc_stop, commit_timestamp=20) + self.leader_checkpoint(20) conn, sess = self.open_follower() # Truncation is visible: deleted keys are gone, surrounding keys survive. @@ -137,7 +101,8 @@ class test_layered_fast_truncate02(wttest.WiredTigerTestCase): # Reading at a timestamp before the truncation must still find all rows, including those # later deleted. Verifies mvcc correctness across the follower checkpoint boundary. self.setup_leader() - self.truncate_and_checkpoint(self.trunc_start, self.trunc_stop, 20) + self.truncate(self.trunc_start, self.trunc_stop, commit_timestamp=20) + self.leader_checkpoint(20) conn, sess = self.open_follower() for key in [self.trunc_start, self.trunc_mid, self.trunc_stop]: @@ -161,7 +126,8 @@ class test_layered_fast_truncate02(wttest.WiredTigerTestCase): # Forward and backward scans must skip the entire truncated range without visiting any # deleted key. search_near on a deleted key must land outside the range. self.setup_leader() - self.truncate_and_checkpoint(self.trunc_start, self.trunc_stop, 20) + self.truncate(self.trunc_start, self.trunc_stop, commit_timestamp=20) + self.leader_checkpoint(20) conn, sess = self.open_follower() expected = self.nrows - (self.trunc_stop - self.trunc_start + 1) diff --git a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate03.py b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate03.py index caface03c78..ef576bb87e0 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate03.py +++ b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate03.py @@ -33,11 +33,12 @@ import wiredtiger, wttest from helper_disagg import disagg_test_class, gen_disagg_storages +from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin from wtscenario import make_scenarios from wiredtiger import stat @disagg_test_class -class test_layered_fast_truncate03(wttest.WiredTigerTestCase): +class test_layered_fast_truncate03(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase): uri = 'layered:test_layered_fast_truncate03' nrows = 5000 @@ -49,17 +50,6 @@ class test_layered_fast_truncate03(wttest.WiredTigerTestCase): disagg_storages = gen_disagg_storages('test_layered_fast_truncate03', disagg_only=True) scenarios = make_scenarios(disagg_storages) - def get_stat(self, conn, stat_key): - s = conn.open_session('') - val = s.open_cursor('statistics:')[stat_key][2] - s.close() - return val - - def leader_checkpoint(self, ts): - self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(ts) + - ',oldest_timestamp=' + self.timestamp_str(1)) - self.session.checkpoint() - def setup_leader(self, extra_cfg=''): self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(1)) self.session.create(self.uri, 'key_format=i,value_format=S' + extra_cfg) @@ -81,58 +71,16 @@ class test_layered_fast_truncate03(wttest.WiredTigerTestCase): evict_cur.close() self.session.rollback_transaction() - def truncate_and_checkpoint(self, trunc_start, trunc_stop, ts): - # Fast-truncate rows [trunc_start, trunc_stop] on the leader and checkpoint. - c_start = self.session.open_cursor(self.uri) - c_start.set_key(trunc_start) - c_stop = self.session.open_cursor(self.uri) - c_stop.set_key(trunc_stop) - self.session.begin_transaction() - self.session.truncate(None, c_start, c_stop, None) - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(ts)) - c_start.close() - c_stop.close() - self.leader_checkpoint(ts) - - def open_follower(self): - conn = self.wiredtiger_open( - 'follower', - self.extensionsConfig() + ',create,cache_size=50MB,statistics=(all),disaggregated=(role="follower")') - sess = conn.open_session('') - sess.create(self.uri, 'key_format=i,value_format=S') - self.disagg_advance_checkpoint(conn, self.conn) - return conn, sess - def advance_follower(self, conn): self.leader_checkpoint(20) self.disagg_advance_checkpoint(conn, self.conn) - def evict_range(self, sess, start, stop, step=1): - evict_cur = sess.open_cursor(self.uri, None, 'debug=(release_evict)') - sess.begin_transaction('read_timestamp=' + self.timestamp_str(10)) - for i in range(start, stop + 1, step): - evict_cur.set_key(i) - evict_cur.search() - evict_cur.reset() - evict_cur.close() - sess.rollback_transaction() - - def search_at(self, sess, key, ts): - cur = sess.open_cursor(self.uri) - txn_cfg = ('read_timestamp=' + self.timestamp_str(ts)) - sess.begin_transaction(txn_cfg) - cur.set_key(key) - ret = cur.search() - val = cur.get_value() if ret == 0 else None - sess.rollback_transaction() - cur.close() - return ret, val - def test_no_dirty_on_read(self): # Reading fast-truncated pages on the follower must never dirty them. Verifies this holds # across a full load-evict-reload cycle for both single and bulk page reads. self.setup_leader() - self.truncate_and_checkpoint(self.trunc_start, self.trunc_stop, 20) + self.truncate(self.trunc_start, self.trunc_stop, commit_timestamp=20) + self.leader_checkpoint(20) conn, sess = self.open_follower() sample = list(range(self.trunc_start, self.trunc_stop + 1, 10)) dirty_before = self.get_stat(conn, stat.conn.cache_pages_dirty) @@ -168,7 +116,8 @@ class test_layered_fast_truncate03(wttest.WiredTigerTestCase): # restore a subset of truncated keys, those keys must be visible while the rest # remain deleted. self.setup_leader(',leaf_page_max=4096') - self.truncate_and_checkpoint(self.trunc_start, self.trunc_stop, 20) + self.truncate(self.trunc_start, self.trunc_stop, commit_timestamp=20) + self.leader_checkpoint(20) conn, sess = self.open_follower() sample = list(range(self.trunc_start, self.trunc_stop + 1, 10)) dirty_before = self.get_stat(conn, stat.conn.cache_pages_dirty) @@ -226,7 +175,8 @@ class test_layered_fast_truncate03(wttest.WiredTigerTestCase): # Closing and reopening the follower connection must not lose the deleted state. # The same checkpoint must still show truncated keys as WT_NOTFOUND after a cold start. self.setup_leader() - self.truncate_and_checkpoint(self.trunc_start, self.trunc_stop, 20) + self.truncate(self.trunc_start, self.trunc_stop, commit_timestamp=20) + self.leader_checkpoint(20) truncated_keys = [self.trunc_start, self.trunc_start + 100, self.trunc_stop] non_truncated_keys = [1, self.trunc_start - 1, self.trunc_stop + 1, self.nrows] @@ -250,7 +200,8 @@ class test_layered_fast_truncate03(wttest.WiredTigerTestCase): # Reading a deleted page at a timestamp before the truncation forces it to load from disk. # The key must be found, cache_read_deleted must increment, and the page must not be dirtied. self.setup_leader() - self.truncate_and_checkpoint(self.trunc_start, self.trunc_stop, 20) + self.truncate(self.trunc_start, self.trunc_stop, commit_timestamp=20) + self.leader_checkpoint(20) conn, sess = self.open_follower() dirty_before = self.get_stat(conn, stat.conn.cache_pages_dirty) diff --git a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate04.py b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate04.py index b4dec4eb05b..58e89e0913d 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate04.py +++ b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate04.py @@ -26,9 +26,9 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. -import unittest -import wttest, wiredtiger +import wttest from helper_disagg import disagg_test_class, gen_disagg_storages +from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin from wtscenario import make_scenarios # test_layered_fast_truncate04.py @@ -37,7 +37,7 @@ from wtscenario import make_scenarios # open-ended truncation, multiple truncated ranges, and mixed # update-then-truncate workloads. @disagg_test_class -class test_layered_fast_truncate04(wttest.WiredTigerTestCase): +class test_layered_fast_truncate04(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase): conn_config = 'disaggregated=(role="leader"),' @@ -54,8 +54,7 @@ class test_layered_fast_truncate04(wttest.WiredTigerTestCase): # digits so that lexicographic order matches numeric order. nitems = 1000 - @staticmethod - def key(n): + def key(self, n): return f'{n:04d}' def session_create_config(self): @@ -66,104 +65,35 @@ class test_layered_fast_truncate04(wttest.WiredTigerTestCase): # Populate the table on the leader, checkpoint, then reopen as follower. def setup_follower(self): - self.session.create(self.uri, self.session_create_config()) - cursor = self.session.open_cursor(self.uri) - for i in range(self.nitems): - self.session.begin_transaction() - cursor[self.key(i)] = 'value' - self.session.commit_transaction() - cursor.close() - self.session.checkpoint() + self.setup_leader(keys=range(self.nitems)) + super().setup_follower() - follower_config = ( - 'disaggregated=(role="follower",' - f'checkpoint_meta="{self.disagg_get_complete_checkpoint_meta()}")' - ) - self.reopen_conn(config=follower_config) - - # Truncate the range [start, stop] (inclusive). If stop is None, truncate - # from start to the end of the table. - def truncate_range(self, start, stop): - c1 = self.session.open_cursor(self.uri) - c1.set_key(self.key(start)) - c2 = None - if stop is not None: - c2 = self.session.open_cursor(self.uri) - c2.set_key(self.key(stop)) - self.session.begin_transaction() - self.session.truncate(None, c1, c2, None) - self.session.commit_transaction() - c1.close() - if c2 is not None: - c2.close() - - # Return all keys visible via a forward scan. - def scan_forward(self): - cursor = self.session.open_cursor(self.uri) - self.session.begin_transaction() - keys = [] - while cursor.next() == 0: - keys.append(cursor.get_key()) - self.session.rollback_transaction() - cursor.close() - return keys - - # Return all keys visible via a backward scan. - def scan_backward(self): - cursor = self.session.open_cursor(self.uri) - self.session.begin_transaction() - keys = [] - while cursor.prev() == 0: - keys.append(cursor.get_key()) - self.session.rollback_transaction() - cursor.close() - return list(reversed(keys)) # reverse so order matches forward scan + # Return all keys visible via a forward and a backward scan; assert both + # match the expected list. + def assert_scan(self, expected): + self.assertEqual(self.visible_keys(), expected, 'forward scan mismatch') + self.assertEqual(list(reversed(self.visible_keys(forward=False))), expected, + 'backward scan mismatch') # Run search_near in its own transaction; return (exact, landed_key). def search_near(self, key): - cursor = self.session.open_cursor(self.uri) - self.session.begin_transaction() - cursor.set_key(self.key(key)) - exact = cursor.search_near() - landed = cursor.get_key() - self.session.rollback_transaction() - cursor.close() - return exact, landed - - # Run search in its own transaction; return the return value (0 or WT_NOTFOUND). - def search(self, key): - cursor = self.session.open_cursor(self.uri) - self.session.begin_transaction() - cursor.set_key(self.key(key)) - ret = cursor.search() - self.session.rollback_transaction() - cursor.close() - return ret - - # Assert forward and backward scans both return the expected key list. - def assert_scan(self, expected): - self.assertEqual(self.scan_forward(), expected, 'forward scan mismatch') - self.assertEqual(self.scan_backward(), expected, 'backward scan mismatch') + return self.search_near_key(key) # Write a single key/value pair in its own transaction. def put(self, key, value='v'): - cursor = self.session.open_cursor(self.uri) - self.session.begin_transaction() - cursor[self.key(key)] = value - self.session.commit_transaction() - cursor.close() + self.populate([key], value=value) def test_cursor_scan_skips_truncated_range(self): # Forward and backward scans must skip every key in the truncated range. self.setup_follower() - self.truncate_range(100, 700) + self.truncate(100, 700) self.assert_scan([self.key(i) for i in range(self.nitems) if i < 100 or i > 700]) def test_search_near_inside_truncated_range(self): # search_near for a key deep inside a truncated range must land outside # the range and must not report an exact match. self.setup_follower() - self.truncate_range(100, 700) + self.truncate(100, 700) exact, landed = self.search_near(400) self.assertFalse(self.key(100) <= landed <= self.key(700), @@ -175,7 +105,7 @@ class test_layered_fast_truncate04(wttest.WiredTigerTestCase): # as candidates for search_near. Test both directions by placing the # single visible ingest key above or below the search key. self.setup_follower() - self.truncate_range(0, self.nitems - 1) + self.truncate(0, self.nitems - 1) # Scenario 1: ingest 0600 above search key 0500 forward (exact=1). self.put(600, 'ingest-live') @@ -197,7 +127,7 @@ class test_layered_fast_truncate04(wttest.WiredTigerTestCase): # The start and stop keys of the range are inclusive, so search_near at # either boundary must land strictly outside the range. self.setup_follower() - self.truncate_range(100, 700) + self.truncate(100, 700) for boundary in (100, 700): _, landed = self.search_near(boundary) @@ -207,22 +137,22 @@ class test_layered_fast_truncate04(wttest.WiredTigerTestCase): def test_truncate_to_end_of_table(self): # Open-ended truncate from key 500; only 0-499 remain visible. self.setup_follower() - self.truncate_range(500, None) + self.truncate(500, None) self.assert_scan([self.key(i) for i in range(500)]) def test_multiple_truncate_ranges(self): # Two disjoint bounded ranges; scans must skip both. self.setup_follower() - self.truncate_range(100, 300) - self.truncate_range(600, 800) + self.truncate(100, 300) + self.truncate(600, 800) self.assert_scan([self.key(i) for i in range(self.nitems) if not (100 <= i <= 300) and not (600 <= i <= 800)]) def test_mixed_bounded_and_open_ended_truncates(self): # Bounded [100, 300] combined with open-ended [600, end]; 0-99 and 301-599 visible. self.setup_follower() - self.truncate_range(100, 300) - self.truncate_range(600, None) + self.truncate(100, 300) + self.truncate(600, None) self.assert_scan([self.key(i) for i in range(self.nitems) if i < 100 or (301 <= i <= 599)]) @@ -230,7 +160,7 @@ class test_layered_fast_truncate04(wttest.WiredTigerTestCase): # Open-ended truncate captures a snapshot of "end" at commit time. Keys # appended afterwards are new data and must remain visible. self.setup_follower() - self.truncate_range(800, None) + self.truncate(800, None) for i in range(1000, 1100): self.put(i, 'appended') @@ -244,23 +174,23 @@ class test_layered_fast_truncate04(wttest.WiredTigerTestCase): self.setup_follower() for i in range(200, 401): self.put(i, 'updated') - self.truncate_range(100, 700) + self.truncate(100, 700) self.assert_scan([self.key(i) for i in range(self.nitems) if i < 100 or i > 700]) - self.assertEqual(self.search(300), wiredtiger.WT_NOTFOUND, + self.assertFalse(self.key_exists(300), 'search must hide an updated-then-truncated key') def test_search_returns_not_found_in_truncated_range(self): # search() goes through a different read path than scans and search_near; # both boundaries and interior keys must return WT_NOTFOUND. self.setup_follower() - self.truncate_range(100, 700) + self.truncate(100, 700) for k in (400, 100, 700): - self.assertEqual(self.search(k), wiredtiger.WT_NOTFOUND, + self.assertFalse(self.key_exists(k), f'search({self.key(k)}) inside range must be hidden') for k in (99, 701): - self.assertEqual(self.search(k), 0, + self.assertTrue(self.key_exists(k), f'search({self.key(k)}) outside range must succeed') def test_search_near_direction_in_truncated_range(self): @@ -269,24 +199,24 @@ class test_layered_fast_truncate04(wttest.WiredTigerTestCase): self.setup_follower() # Bounded range [100, 700]. Forward finds 0701. - self.truncate_range(100, 700) + self.truncate(100, 700) self.assertEqual(self.search_near(400), (1, self.key(701)), 'forward scenario') # Add open-ended truncate [800, end]. Forward exhausts, falls back to 0799. - self.truncate_range(800, None) + self.truncate(800, None) self.assertEqual(self.search_near(900), (-1, self.key(799)), 'backward scenario') def test_overlapping_truncated_ranges_scan(self): # Two overlapping ranges [100, 400] and [300, 700]: scans must skip the # full union [100, 700], not just one range at a time. self.setup_follower() - self.truncate_range(100, 400) - self.truncate_range(300, 700) + self.truncate(100, 400) + self.truncate(300, 700) self.assert_scan([self.key(i) for i in range(self.nitems) if i < 100 or i > 700]) def test_entire_table_truncated(self): # Truncate every key; both scans must be empty. self.setup_follower() - self.truncate_range(0, self.nitems - 1) + self.truncate(0, self.nitems - 1) self.assert_scan([]) diff --git a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate05.py b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate05.py index 106b7a6cfcf..eae34916d54 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate05.py +++ b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate05.py @@ -28,6 +28,7 @@ import wttest from helper_disagg import disagg_test_class, gen_disagg_storages +from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin from wtscenario import make_scenarios # test_layered_fast_truncate05.py @@ -35,7 +36,7 @@ from wtscenario import make_scenarios # standby (follower) node. @disagg_test_class -class test_layered_fast_truncate05(wttest.WiredTigerTestCase): +class test_layered_fast_truncate05(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase): conn_config = 'disaggregated=(role="leader"),' @@ -52,8 +53,7 @@ class test_layered_fast_truncate05(wttest.WiredTigerTestCase): # digits so that lexicographic order matches numeric order. nitems = 1000 - @staticmethod - def key(n): + def key(self, n): return f'{n:04d}' def session_create_config(self): @@ -64,36 +64,8 @@ class test_layered_fast_truncate05(wttest.WiredTigerTestCase): # Populate the table on the leader, checkpoint, then reopen as follower. def setup_follower(self): - self.session.create(self.uri, self.session_create_config()) - cursor = self.session.open_cursor(self.uri) - for i in range(self.nitems): - self.session.begin_transaction() - cursor[self.key(i)] = 'value' - self.session.commit_transaction() - cursor.close() - self.session.checkpoint() - - follower_config = ( - 'disaggregated=(role="follower",' - f'checkpoint_meta="{self.disagg_get_complete_checkpoint_meta()}")' - ) - self.reopen_conn(config=follower_config) - - # Truncate the range [start, stop] (inclusive). If stop is None, truncate - # from start to the end of the table. - def truncate_range(self, start, stop): - c1 = self.session.open_cursor(self.uri) - c1.set_key(self.key(start)) - c2 = None - if stop is not None: - c2 = self.session.open_cursor(self.uri) - c2.set_key(self.key(stop)) - self.session.begin_transaction() - self.session.truncate(None, c1, c2, None) - self.session.commit_transaction() - c1.close() - if c2 is not None: - c2.close() + self.setup_leader(keys=range(self.nitems)) + super().setup_follower() # Draw `samples` random keys and assert none fall inside [low, high]. def sample_assert_random(self, low, high, samples=200): @@ -110,7 +82,7 @@ class test_layered_fast_truncate05(wttest.WiredTigerTestCase): def test_random_cursor_skips_truncated_range(self): # 200 random samples must all land outside the truncated range. self.setup_follower() - self.truncate_range(100, 700) + self.truncate(100, 700) self.sample_assert_random(100, 700) def test_random_cursor_skips_truncated_range_with_live_ingest(self): @@ -125,5 +97,5 @@ class test_layered_fast_truncate05(wttest.WiredTigerTestCase): self.session.commit_transaction() cursor.close() - self.truncate_range(100, 700) + self.truncate(100, 700) self.sample_assert_random(100, 700) diff --git a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate06.py b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate06.py index a68711d28f4..c266ecabf7e 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate06.py +++ b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate06.py @@ -34,10 +34,11 @@ import wttest from helper_disagg import disagg_test_class, gen_disagg_storages +from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin from wtscenario import make_scenarios @disagg_test_class -class test_layered_fast_truncate06(wttest.WiredTigerTestCase): +class test_layered_fast_truncate06(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase): conn_config = 'disaggregated=(role="leader"),' nrows = 100 @@ -50,14 +51,6 @@ class test_layered_fast_truncate06(wttest.WiredTigerTestCase): 'test_layered_fast_truncate06', disagg_only=True) scenarios = make_scenarios(disagg_storages, uris) - def visible_keys(self): - c = self.session.open_cursor(self.uri) - keys = [] - while c.next() == 0: - keys.append(c.get_key()) - c.close() - return keys - def session_create_config(self): cfg = 'key_format=i,value_format=S' if self.uri.startswith('table:'): @@ -65,8 +58,8 @@ class test_layered_fast_truncate06(wttest.WiredTigerTestCase): return cfg def setup_follower(self): - # Create the table on the leader, load nrows, checkpoint, then reopen the - # connection as a follower picking up that checkpoint. + # Create the table on the leader, load nrows with per-row commit timestamps, + # checkpoint, then reopen the connection as a follower picking up that checkpoint. self.session.create(self.uri, self.session_create_config()) cursor = self.session.open_cursor(self.uri) @@ -77,32 +70,29 @@ class test_layered_fast_truncate06(wttest.WiredTigerTestCase): cursor.close() self.session.checkpoint() - follower_config = ('disaggregated=(role="follower",' - f'checkpoint_meta="{self.disagg_get_complete_checkpoint_meta()}")') - self.reopen_conn(config=follower_config) + super().setup_follower() - def follower_truncate(self, start, stop): - c_start = self.session.open_cursor(self.uri) - c_start.set_key(start) - c_stop = self.session.open_cursor(self.uri) - c_stop.set_key(stop) - self.session.begin_transaction() - self.session.truncate(None, c_start, c_stop, None) - self.session.commit_transaction() - c_start.close() - c_stop.close() + def visible_keys_simple(self): + # The test verifies a scan outside a transaction; use a simple inline scan + # to match the original semantics (no transaction wrapping). + c = self.session.open_cursor(self.uri) + keys = [] + while c.next() == 0: + keys.append(c.get_key()) + c.close() + return keys def test_verify_preserves_follower_truncate(self): self.setup_follower() - self.follower_truncate(30, 60) + self.truncate(30, 60) expected = [i for i in range(1, self.nrows + 1) if i < 30 or i > 60] # Before verify: a scan does not return the truncated rows. - self.assertEqual(self.visible_keys(), expected) + self.assertEqual(self.visible_keys_simple(), expected) # Verify the layered URI. This triggers a close + reopen of the dhandle. self.session.verify(self.uri) # After verify: a scan must still not return the truncated rows. - self.assertEqual(self.visible_keys(), expected) + self.assertEqual(self.visible_keys_simple(), expected) diff --git a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate07.py b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate07.py index fd25acac51b..52ef0338a75 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate07.py +++ b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate07.py @@ -26,7 +26,7 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. -# test_layered_fast_truncate06.py +# test_layered_fast_truncate07.py # Follower-initiated truncate stores a bounded range in the truncate list. # Verifies NULL start/stop from the session API are resolved to the table's # first/last visible key, both via the verbose log line and by the row set @@ -34,19 +34,20 @@ import wttest from helper_disagg import disagg_test_class, gen_disagg_storages +from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin from wtscenario import make_scenarios @disagg_test_class -class test_layered_fast_truncate06(wttest.WiredTigerTestCase): +class test_layered_fast_truncate07(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase): conn_config = 'verbose=[layered:3],disaggregated=(role="leader"),' - uri = 'layered:test_layered_fast_truncate06' + uri = 'layered:test_layered_fast_truncate07' key_formats = [ ('string', dict(key_format='S')), ('int', dict(key_format='i')), ] - disagg_storages = gen_disagg_storages('test_layered_fast_truncate06', disagg_only=True) + disagg_storages = gen_disagg_storages('test_layered_fast_truncate07', disagg_only=True) scenarios = make_scenarios(disagg_storages, key_formats) nitems = 100 @@ -59,42 +60,17 @@ class test_layered_fast_truncate06(wttest.WiredTigerTestCase): def key_str(self, n): return f'{n:04d}' if self.key_format == 'S' else str(n) + def session_create_config(self): + return f'key_format={self.key_format},value_format=S' + def setup_follower(self): - self.session.create(self.uri, f'key_format={self.key_format},value_format=S') + self.session.create(self.uri, self.session_create_config()) self.insert_range(1, self.nitems) self.session.checkpoint() follower_config = ('verbose=[layered:3],disaggregated=(role="follower",' f'checkpoint_meta="{self.disagg_get_complete_checkpoint_meta()}")') self.reopen_conn(config=follower_config) - def truncate(self, start=None, stop=None): - c_start = c_stop = None - if start is not None: - c_start = self.session.open_cursor(self.uri) - c_start.set_key(self.key(start)) - if stop is not None: - c_stop = self.session.open_cursor(self.uri) - c_stop.set_key(self.key(stop)) - - # Use the table uri if both start and stop cursors are not given. - uri = self.uri if (c_start is None and c_stop is None) else None - self.session.begin_transaction() - self.session.truncate(uri, c_start, c_stop, None) - self.session.commit_transaction() - if c_start is not None: - c_start.close() - if c_stop is not None: - c_stop.close() - - def visible_keys(self, forward=True): - c = self.session.open_cursor(self.uri) - step = c.next if forward else c.prev - keys = [] - while step() == 0: - keys.append(c.get_key()) - c.close() - return keys - def insert_range(self, lo, hi): c = self.session.open_cursor(self.uri) for i in range(lo, hi + 1): @@ -103,6 +79,16 @@ class test_layered_fast_truncate06(wttest.WiredTigerTestCase): self.session.commit_transaction() c.close() + def follower_visible_keys(self, forward=True): + # Simple inline scan without a transaction wrapper to match the original behavior. + c = self.session.open_cursor(self.uri) + step = c.next if forward else c.prev + keys = [] + while step() == 0: + keys.append(c.get_key()) + c.close() + return keys + # Keys in [1, nitems] minus [start, stop] (inclusive on both ends). def expected_keys(self, start, stop): return [self.key(i) for i in range(1, self.nitems + 1) @@ -117,59 +103,59 @@ class test_layered_fast_truncate06(wttest.WiredTigerTestCase): def test_bounded_range(self): self.setup_follower() - self.truncate(start=30, stop=60) + self.truncate(start_key=30, stop_key=60) self.assert_trunc_log(30, 60) - self.assertEqual(self.visible_keys(), self.expected_keys(30, 60)) + self.assertEqual(self.follower_visible_keys(), self.expected_keys(30, 60)) def test_null_start_resolves_to_first_key(self): self.setup_follower() - self.truncate(start=None, stop=60) + self.truncate(start_key=None, stop_key=60) self.assert_trunc_log(1, 60) - self.assertEqual(self.visible_keys(), self.expected_keys(1, 60)) + self.assertEqual(self.follower_visible_keys(), self.expected_keys(1, 60)) def test_null_stop_resolves_to_last_key(self): self.setup_follower() - self.truncate(start=30, stop=None) + self.truncate(start_key=30, stop_key=None) self.assert_trunc_log(30, self.nitems) - self.assertEqual(self.visible_keys(), self.expected_keys(30, self.nitems)) + self.assertEqual(self.follower_visible_keys(), self.expected_keys(30, self.nitems)) def test_both_null_is_full_table(self): self.setup_follower() - self.truncate(start=None, stop=None) + self.truncate(start_key=None, stop_key=None) self.assert_trunc_log(1, self.nitems) - self.assertEqual(self.visible_keys(), []) + self.assertEqual(self.follower_visible_keys(), []) # An open-ended truncate captures "end" at commit time, not dynamically. Keys appended # after stop should be visible. def test_open_ended_truncate_does_not_hide_later_appends(self): self.setup_follower() - self.truncate(start=80, stop=None) + self.truncate(start_key=80, stop_key=None) self.assert_trunc_log(80, self.nitems) self.insert_range(200, 210) expected = [self.key(i) for i in range(1, 80)] + \ [self.key(i) for i in range(200, 211)] - self.assertEqual(self.visible_keys(), expected) + self.assertEqual(self.follower_visible_keys(), expected) def test_bounded_and_end_open_ended_overlap(self): self.setup_follower() - self.truncate(start=20, stop=60) + self.truncate(start_key=20, stop_key=60) self.assert_trunc_log(20, 60) - self.truncate(start=50, stop=None) + self.truncate(start_key=50, stop_key=None) # key 50-60 was deleted by the first truncate; search_near positions it on the # nearest in-bound key, 61. self.assert_trunc_log(61, self.nitems) expected = [self.key(i) for i in range(1, 20)] - self.assertEqual(self.visible_keys(), expected) - self.assertEqual(self.visible_keys(forward=False), list(reversed(expected))) + self.assertEqual(self.follower_visible_keys(), expected) + self.assertEqual(self.follower_visible_keys(forward=False), list(reversed(expected))) def test_bounded_and_start_open_ended_overlap(self): self.setup_follower() - self.truncate(start=20, stop=60) + self.truncate(start_key=20, stop_key=60) self.assert_trunc_log(20, 60) - self.truncate(start=0, stop=30) + self.truncate(start_key=0, stop_key=30) # key 20-30 was deleted by the first truncate; search_near positions it on the # nearest live key, 19. self.assert_trunc_log(1, 19) expected = [self.key(i) for i in range(61, self.nitems + 1)] - self.assertEqual(self.visible_keys(), expected) - self.assertEqual(self.visible_keys(forward=False), list(reversed(expected))) + self.assertEqual(self.follower_visible_keys(), expected) + self.assertEqual(self.follower_visible_keys(forward=False), list(reversed(expected))) diff --git a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate08.py b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate08.py index e5b8df5d008..deea4cafe3c 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate08.py +++ b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate08.py @@ -33,68 +33,59 @@ from contextlib import closing from helper_disagg import disagg_test_class, gen_disagg_storages +from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin from wtscenario import make_scenarios import wttest @disagg_test_class -class test_layered_fast_truncate08(wttest.WiredTigerTestCase): +class test_layered_fast_truncate08(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase): test_name = __qualname__ disagg_storages = gen_disagg_storages(test_name, disagg_only=True) scenarios = make_scenarios(disagg_storages) conn_config = 'disaggregated=(role="leader"),' - def setup_layered_table(self, layered_uri: str): + uri = f"layered:{test_name}" + + def session_create_config(self): + return "key_format=i,value_format=u" + + def populate(self, keys, value=b"v"): + with closing(self.session.open_cursor(self.uri)) as cursor: + with self.transaction(): + for key in keys: + cursor[key] = value + + def setup_layered_table(self): # Create the table and produce the initial checkpoint that the follower # will attach to. - session_config = "key_format=i,value_format=u" - self.session.create(layered_uri, session_config) - self.session.checkpoint() - - def setup_follower(self, layered_uri: str): - self.reopen_disagg_conn('disaggregated=(role="follower"),') + self.setup_leader() + def setup_follower(self, keys=range(100)): + super().setup_follower() # Add updates on the ingest that can be truncated later. - with closing(self.session.open_cursor(layered_uri)) as cursor: - with self.transaction(): - for i in range(100): - cursor[i] = b"v" + self.populate(keys) - def truncate(self, layered_uri: str, start_key: int, stop_key: int): - # Truncate between start and stop keys inclusive. - with ( - closing(self.session.open_cursor(layered_uri)) as start_cursor, - closing(self.session.open_cursor(layered_uri)) as stop_cursor, - ): - start_cursor.set_key(start_key) - stop_cursor.set_key(stop_key) - - with self.transaction(): - self.session.truncate(None, start_cursor, stop_cursor, None) - - def get_values(self, uri: str, start_key: int, stop_key: int): + def get_values(self, uri, start_key, stop_key): # Return values of any keys between start and stop inclusive that exist. values = [] - with closing(self.session.open_cursor(uri)) as cursor: for i in range(start_key, stop_key + 1): cursor.set_key(i) if cursor.search() == 0: values.append(cursor.get_value()) - return values def test_follower_truncate_writes_tombstone_to_ingest(self): # Set up a follower with existing ingest updates. - layered_uri = f"layered:{self.test_name}" - self.setup_layered_table(layered_uri) - self.setup_follower(layered_uri) + self.setup_layered_table() + self.setup_follower() # Truncate a range of keys. start_key = 20 stop_key = 80 - self.truncate(layered_uri, start_key, stop_key) + self.truncate(start_key, stop_key) # Examine what the truncate actually wrote to the ingest file. ingest_uri = f"file:{self.test_name}.wt_ingest" diff --git a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate09.py b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate09.py index 9f71bd52ef9..fbae594b353 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate09.py +++ b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate09.py @@ -28,12 +28,13 @@ import wiredtiger, wttest from helper_disagg import disagg_test_class, gen_disagg_storages +from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin from wtscenario import make_scenarios # test_layered_fast_truncate09.py # Follower truncate-list visibility coverage. @disagg_test_class -class test_layered_fast_truncate09(wttest.WiredTigerTestCase): +class test_layered_fast_truncate09(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase): conn_config = 'disaggregated=(role="leader"),' @@ -49,7 +50,6 @@ class test_layered_fast_truncate09(wttest.WiredTigerTestCase): def setUp(self): super().setUp() - self.setup_follower() def session_create_config(self): @@ -85,7 +85,7 @@ class test_layered_fast_truncate09(wttest.WiredTigerTestCase): c_start.close() c_stop.close() - def search_key(self, session, key): + def search_in(self, session, key): cursor = session.open_cursor(self.uri) cursor.set_key(key) ret = cursor.search() @@ -93,7 +93,7 @@ class test_layered_fast_truncate09(wttest.WiredTigerTestCase): cursor.close() return ret, value - def search_near_key(self, session, key): + def search_near_in(self, session, key): cursor = session.open_cursor(self.uri) cursor.set_key(key) exact = cursor.search_near() @@ -118,9 +118,9 @@ class test_layered_fast_truncate09(wttest.WiredTigerTestCase): with self.transaction(session=self.session, rollback=True): self.truncate_range(self.session, 100, 700) - ret = self.search_key(self.session, 150)[0] + ret = self.search_in(self.session, 150)[0] self.assertEqual(ret, wiredtiger.WT_NOTFOUND) - exact, landed = self.search_near_key(self.session, 150) + exact, landed = self.search_near_in(self.session, 150) self.assertNotEqual(exact, 0) if exact < 0: self.assertEqual(landed, 99) @@ -136,8 +136,8 @@ class test_layered_fast_truncate09(wttest.WiredTigerTestCase): session2 = self.conn.open_session() try: with self.transaction(session=session2, rollback=True): - self.assertEqual(self.search_key(session2, 150), (0, 'value')) - self.assertEqual(self.search_near_key(session2, 150), (0, 150)) + self.assertEqual(self.search_in(session2, 150), (0, 'value')) + self.assertEqual(self.search_near_in(session2, 150), (0, 150)) self.assertEqual(self.next_key_after(session2, 149), 150) finally: session2.close() @@ -145,14 +145,14 @@ class test_layered_fast_truncate09(wttest.WiredTigerTestCase): def test_rollback_restores_visibility(self): with self.transaction(session=self.session, rollback=True): self.truncate_range(self.session, 100, 700) - ret = self.search_key(self.session, 150)[0] + ret = self.search_in(self.session, 150)[0] self.assertEqual(ret, wiredtiger.WT_NOTFOUND) session2 = self.conn.open_session() try: with self.transaction(session=session2, rollback=True): - self.assertEqual(self.search_key(session2, 150), (0, 'value')) - self.assertEqual(self.search_near_key(session2, 150), (0, 150)) + self.assertEqual(self.search_in(session2, 150), (0, 'value')) + self.assertEqual(self.search_near_in(session2, 150), (0, 150)) self.assertEqual(self.next_key_after(session2, 149), 150) finally: session2.close() @@ -163,14 +163,14 @@ class test_layered_fast_truncate09(wttest.WiredTigerTestCase): session2 = self.conn.open_session() try: with self.transaction(session=session2, read_timestamp=20, rollback=True): - self.assertEqual(self.search_key(session2, 150), (0, 'value')) - self.assertEqual(self.search_near_key(session2, 150), (0, 150)) + self.assertEqual(self.search_in(session2, 150), (0, 'value')) + self.assertEqual(self.search_near_in(session2, 150), (0, 150)) self.assertEqual(self.next_key_after(session2, 149), 150) with self.transaction(session=session2, read_timestamp=30, rollback=True): - ret = self.search_key(session2, 150)[0] + ret = self.search_in(session2, 150)[0] self.assertEqual(ret, wiredtiger.WT_NOTFOUND) - exact, landed = self.search_near_key(session2, 150) + exact, landed = self.search_near_in(session2, 150) self.assertNotEqual(exact, 0) if exact < 0: self.assertEqual(landed, 99) @@ -188,16 +188,16 @@ class test_layered_fast_truncate09(wttest.WiredTigerTestCase): session2 = self.conn.open_session() try: with self.transaction(session=session2, read_timestamp=30, rollback=True): - ret = self.search_key(session2, 350)[0] + ret = self.search_in(session2, 350)[0] self.assertEqual(ret, wiredtiger.WT_NOTFOUND) - self.assertEqual(self.search_key(session2, 500), (0, 'value')) + self.assertEqual(self.search_in(session2, 500), (0, 'value')) with self.transaction(session=session2, read_timestamp=40, rollback=True): - ret = self.search_key(session2, 350)[0] + ret = self.search_in(session2, 350)[0] self.assertEqual(ret, wiredtiger.WT_NOTFOUND) - ret = self.search_key(session2, 500)[0] + ret = self.search_in(session2, 500)[0] self.assertEqual(ret, wiredtiger.WT_NOTFOUND) - exact, landed = self.search_near_key(session2, 150) + exact, landed = self.search_near_in(session2, 150) self.assertNotEqual(exact, 0) if exact < 0: self.assertEqual(landed, 99) diff --git a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate10.py b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate10.py index b3ea0b0d229..6f57795d41e 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate10.py +++ b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate10.py @@ -33,26 +33,16 @@ # the logical union of the stable and ingest tables, independent of which # table any given key actually lives in. -from contextlib import closing -from itertools import chain -from typing import Iterable from helper_disagg import disagg_test_class, gen_disagg_storages +from helper_layered_fast_truncate import ( + LayeredFastTruncateConfigMixin, concat, range_inclusive, +) from wtscenario import make_scenarios import wttest -def concat(*iterables: Iterable[int]) -> list[int]: - """Concatenate any number of iterables into a single list.""" - return list(chain.from_iterable(iterables)) - - -def range_inclusive(start: int, stop: int) -> range: - """Return a range covering [start, stop] inclusive.""" - return range(start, stop + 1) - - @disagg_test_class -class test_layered_fast_truncate10(wttest.WiredTigerTestCase): +class test_layered_fast_truncate10(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase): """ Data location semantics (stable vs ingest). @@ -70,60 +60,6 @@ class test_layered_fast_truncate10(wttest.WiredTigerTestCase): scenarios = make_scenarios(disagg_storages, uris) conn_config = 'disaggregated=(role="leader"),' - def session_create_config(self): - cfg = "key_format=i,value_format=S" - if self.uri.startswith("table"): - cfg += ",block_manager=disagg,type=layered" - return cfg - - def auto_closing_cursor(self): - """Return a cursor that auto-closes as it goes out of scope.""" - return closing(self.session.open_cursor(self.uri)) - - def populate(self, keys: Iterable[int]): - """Insert each key with a placeholder value in a single transaction.""" - with self.auto_closing_cursor() as cursor: - with self.transaction(): - for key in keys: - cursor[key] = "v" - - def setup_leader(self, keys: Iterable[int] | None = None): - """ - Create the table on the leader and optionally pre-populate stable. - The follower will pick up these keys via the initial checkpoint. - """ - self.session.create(self.uri, self.session_create_config()) - if keys is not None: - self.populate(keys) - self.session.checkpoint() - - def setup_follower(self, keys: Iterable[int] | None = None): - """Switch to follower role and optionally write keys to ingest.""" - self.reopen_disagg_conn('disaggregated=(role="follower"),') - if keys is not None: - self.populate(keys) - - def truncate(self, start_key: int, stop_key: int): - """Truncate between start and stop keys inclusive.""" - with ( - self.auto_closing_cursor() as start_cursor, - self.auto_closing_cursor() as stop_cursor, - ): - start_cursor.set_key(start_key) - stop_cursor.set_key(stop_key) - - with self.transaction(): - self.session.truncate(None, start_cursor, stop_cursor, None) - - def visible_keys(self) -> list[int]: - """Return all keys visible via a forward scan, in key order.""" - result = [] - with self.auto_closing_cursor() as cursor: - with self.transaction(rollback=True): - while cursor.next() == 0: - result.append(cursor.get_key()) - return result - def test_truncate_range_with_both_tables_empty(self): # Stable and ingest are both empty. self.setup_leader() diff --git a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate11.py b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate11.py index 72e545a8f3d..bea30b92a8f 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate11.py +++ b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate11.py @@ -34,27 +34,17 @@ # Open-ended truncates should not apply to keys written after the truncate # commits. -from contextlib import closing, nullcontext -from itertools import chain -from typing import Iterable from helper_disagg import disagg_test_class, gen_disagg_storages +from helper_layered_fast_truncate import ( + LayeredFastTruncateConfigMixin, concat, range_inclusive, +) from wiredtiger import WiredTigerError from wtscenario import make_scenarios import wttest -def concat(*iterables: Iterable[int]) -> list[int]: - """Concatenate any number of iterables into a single list.""" - return list(chain.from_iterable(iterables)) - - -def range_inclusive(start: int, stop: int) -> range: - """Return a range covering [start, stop] inclusive.""" - return range(start, stop + 1) - - @disagg_test_class -class test_layered_fast_truncate11(wttest.WiredTigerTestCase): +class test_layered_fast_truncate11(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase): """ Range specification (start / end / open-ended). @@ -73,67 +63,6 @@ class test_layered_fast_truncate11(wttest.WiredTigerTestCase): scenarios = make_scenarios(disagg_storages, uris) conn_config = 'disaggregated=(role="leader"),' - def session_create_config(self): - cfg = "key_format=i,value_format=S" - if self.uri.startswith("table"): - cfg += ",block_manager=disagg,type=layered" - return cfg - - def auto_closing_cursor(self) -> closing: - """Return a cursor that auto-closes as it goes out of scope.""" - return closing(self.session.open_cursor(self.uri)) - - def populate(self, keys: Iterable[int]): - """Insert each key with a placeholder value in a single transaction.""" - with self.auto_closing_cursor() as cursor: - with self.transaction(): - for key in keys: - cursor[key] = "v" - - def setup_leader(self, keys: Iterable[int] | None = None): - """ - Create the table on the leader and optionally pre-populate stable. - The follower will pick up these keys via the initial checkpoint. - """ - self.session.create(self.uri, self.session_create_config()) - if keys is not None: - self.populate(keys) - self.session.checkpoint() - - def setup_follower(self, keys: Iterable[int] | None = None): - """Switch to follower role and optionally write keys to ingest.""" - self.reopen_disagg_conn('disaggregated=(role="follower"),') - if keys is not None: - self.populate(keys) - - def cursor_for_key(self, key: int | None): - """Return a cursor with its key set, or None if key is None.""" - if key is None: - return nullcontext(None) # Open-ended truncate. - cursor = self.auto_closing_cursor() - cursor.thing.set_key(key) - return cursor - - def truncate(self, start_key: int | None, stop_key: int | None): - """Truncate [start_key, stop_key] inclusive; None means open end.""" - with ( - self.cursor_for_key(start_key) as start, - self.cursor_for_key(stop_key) as stop, - ): - # WT requires a URI when both cursors are absent. - uri = self.uri if (start is None and stop is None) else None - with self.transaction(): - self.session.truncate(uri, start, stop, None) - - def visible_keys(self) -> list[int]: - """Return all keys visible via a forward scan, in key order.""" - result = [] - with self.auto_closing_cursor() as cursor: - with self.transaction(rollback=True): - while cursor.next() == 0: - result.append(cursor.get_key()) - return result - def test_truncate_with_null_start_key(self): # Set up a follower with keys 1-100. self.setup_leader() diff --git a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate12.py b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate12.py index ad4fb31d2ab..fb67c6a29d8 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate12.py +++ b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate12.py @@ -32,27 +32,16 @@ # Verify that forward scans, backward scans, next_random, search, and # search_near all treat truncated keys as non-existent on a follower. -from contextlib import closing, nullcontext -from itertools import chain -from typing import Iterable from helper_disagg import disagg_test_class, gen_disagg_storages -from wiredtiger import WT_NOTFOUND +from helper_layered_fast_truncate import ( + LayeredFastTruncateConfigMixin, concat, range_inclusive, +) from wtscenario import make_scenarios import wttest -def concat(*iterables: Iterable[int]) -> list[int]: - """Concatenate any number of iterables into a single list.""" - return list(chain.from_iterable(iterables)) - - -def range_inclusive(start: int, stop: int) -> range: - """Return a range covering [start, stop] inclusive.""" - return range(start, stop + 1) - - @disagg_test_class -class test_layered_fast_truncate12(wttest.WiredTigerTestCase): +class test_layered_fast_truncate12(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase): """ Cursor iteration and searches over truncated ranges. @@ -69,76 +58,7 @@ class test_layered_fast_truncate12(wttest.WiredTigerTestCase): scenarios = make_scenarios(disagg_storages, uris) conn_config = 'disaggregated=(role="leader"),' - def session_create_config(self): - cfg = "key_format=i,value_format=S" - if self.uri.startswith("table"): - cfg += ",block_manager=disagg,type=layered" - return cfg - - def auto_closing_cursor(self, config: str | None = None) -> closing: - """Return a cursor that auto-closes as it goes out of scope.""" - return closing(self.session.open_cursor(self.uri, None, config)) - - def populate(self, keys: Iterable[int]): - """Insert each key with a placeholder value in a single transaction.""" - with self.auto_closing_cursor() as cursor: - with self.transaction(): - for key in keys: - cursor[key] = "v" - - def setup_leader(self, keys: Iterable[int] | None = None): - """ - Create the table on the leader and optionally pre-populate stable. - The follower will pick up these keys via the initial checkpoint. - """ - self.session.create(self.uri, self.session_create_config()) - if keys is not None: - self.populate(keys) - self.session.checkpoint() - - def setup_follower(self, keys: Iterable[int] | None = None): - """Switch to follower role and optionally write keys to ingest.""" - self.reopen_disagg_conn('disaggregated=(role="follower"),') - if keys is not None: - self.populate(keys) - - def cursor_for_key(self, key: int | None): - """Return a cursor with its key set, or None if key is None.""" - if key is None: - return nullcontext(None) - cursor = self.auto_closing_cursor() - cursor.thing.set_key(key) - return cursor - - def truncate(self, start_key: int | None, stop_key: int | None): - """Truncate [start_key, stop_key] inclusive; None means open end.""" - with ( - self.cursor_for_key(start_key) as start, - self.cursor_for_key(stop_key) as stop, - ): - uri = self.uri if (start is None and stop is None) else None - with self.transaction(): - self.session.truncate(uri, start, stop, None) - - def visible_keys(self) -> list[int]: - """Return all keys visible via a forward scan, in key order.""" - result = [] - with self.auto_closing_cursor() as cursor: - with self.transaction(rollback=True): - while cursor.next() == 0: - result.append(cursor.get_key()) - return result - - def backward_visible_keys(self) -> list[int]: - """Return all keys visible via a backward scan.""" - result = [] - with self.auto_closing_cursor() as cursor: - with self.transaction(rollback=True): - while cursor.prev() == 0: - result.append(cursor.get_key()) - return result - - def random_sample_keys(self, n: int) -> list[int]: + def random_sample_keys(self, n): """Return n keys drawn from a next_random cursor.""" result = [] with self.auto_closing_cursor("next_random=true") as cursor: @@ -148,27 +68,6 @@ class test_layered_fast_truncate12(wttest.WiredTigerTestCase): result.append(cursor.get_key()) return result - def search_key(self, key: int) -> int: - """Search for key; return 0 on exact match or WT_NOTFOUND.""" - with self.cursor_for_key(key) as cursor: - with self.transaction(rollback=True): - return cursor.search() - - def search_near_key(self, key: int) -> tuple[int, int | None]: - """ - Call search_near for a key. - - Returns (exact, found_key). exact follows WT convention: 0 = exact, - 1 = positioned above, -1 = positioned below, or WT_NOTFOUND if no - visible keys exist. - """ - with self.cursor_for_key(key) as cursor: - with self.transaction(rollback=True): - exact = cursor.search_near() - if exact == WT_NOTFOUND: - return exact, None - return exact, cursor.get_key() - def test_forward_scan_skips_truncated_range(self): # Set up a follower with keys 1-100. self.setup_leader() @@ -194,7 +93,7 @@ class test_layered_fast_truncate12(wttest.WiredTigerTestCase): reversed(range_inclusive(61, 100)), reversed(range_inclusive(1, 29)), ) - self.assertEqual(self.backward_visible_keys(), expected) + self.assertEqual(self.visible_keys(forward=False), expected) def test_next_random_never_lands_in_truncated_range(self): # Set up a follower with keys 1-100. @@ -219,7 +118,7 @@ class test_layered_fast_truncate12(wttest.WiredTigerTestCase): # Searching for a key inside the truncated range should return # WT_NOTFOUND. - self.assertEqual(self.search_key(45), WT_NOTFOUND) + self.assertFalse(self.key_exists(45)) def test_search_at_inclusive_truncate_boundary(self): # Set up a follower with keys 1-100. @@ -230,12 +129,12 @@ class test_layered_fast_truncate12(wttest.WiredTigerTestCase): self.truncate(30, 60) # The boundary keys should be invisible. - self.assertEqual(self.search_key(30), WT_NOTFOUND) - self.assertEqual(self.search_key(60), WT_NOTFOUND) + self.assertFalse(self.key_exists(30)) + self.assertFalse(self.key_exists(60)) # The keys just outside the truncated range should still be found. - self.assertEqual(self.search_key(29), 0) - self.assertEqual(self.search_key(61), 0) + self.assertTrue(self.key_exists(29)) + self.assertTrue(self.key_exists(61)) def test_search_near_inside_truncated_range(self): # Set up a follower with keys 1-100. diff --git a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate13.py b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate13.py index 607bb55b944..4ea8b244c0d 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate13.py +++ b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate13.py @@ -32,26 +32,16 @@ # Verify that subsequent operations - additional truncates, per-key removes, # and reinsertion - compose correctly with a prior committed truncate. -from contextlib import closing, nullcontext -from itertools import chain -from typing import Iterable from helper_disagg import disagg_test_class, gen_disagg_storages +from helper_layered_fast_truncate import ( + LayeredFastTruncateConfigMixin, concat, range_inclusive, +) from wtscenario import make_scenarios import wttest -def concat(*iterables: Iterable[int]) -> list[int]: - """Concatenate any number of iterables into a single list.""" - return list(chain.from_iterable(iterables)) - - -def range_inclusive(start: int, stop: int) -> range: - """Return a range covering [start, stop] inclusive.""" - return range(start, stop + 1) - - @disagg_test_class -class test_layered_fast_truncate13(wttest.WiredTigerTestCase): +class test_layered_fast_truncate13(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase): """ Interactions with existing truncates. @@ -68,72 +58,13 @@ class test_layered_fast_truncate13(wttest.WiredTigerTestCase): scenarios = make_scenarios(disagg_storages, uris) conn_config = 'disaggregated=(role="leader"),' - def session_create_config(self): - cfg = "key_format=i,value_format=S" - if self.uri.startswith("table"): - cfg += ",block_manager=disagg,type=layered" - return cfg - - def auto_closing_cursor(self, config: str | None = None) -> closing: - """Return a cursor that auto-closes as it goes out of scope.""" - return closing(self.session.open_cursor(self.uri, None, config)) - - def populate(self, keys: Iterable[int]): - """Insert each key with a placeholder value in a single transaction.""" - with self.auto_closing_cursor() as cursor: - with self.transaction(): - for key in keys: - cursor[key] = "v" - - def setup_leader(self, keys: Iterable[int] | None = None): - """ - Create the table on the leader and optionally pre-populate stable. The - follower will pick up these keys via the initial checkpoint. - """ - self.session.create(self.uri, self.session_create_config()) - if keys is not None: - self.populate(keys) - self.session.checkpoint() - - def setup_follower(self, keys: Iterable[int] | None = None): - """Switch to follower role and optionally write keys to ingest.""" - self.reopen_disagg_conn('disaggregated=(role="follower"),') - if keys is not None: - self.populate(keys) - - def cursor_for_key(self, key: int | None): - """Return a cursor with its key set, or None if key is None.""" - if key is None: - return nullcontext(None) - cursor = self.auto_closing_cursor() - cursor.thing.set_key(key) - return cursor - - def truncate(self, start_key: int | None, stop_key: int | None): - """Truncate [start_key, stop_key] inclusive; None means open end.""" - with ( - self.cursor_for_key(start_key) as start, - self.cursor_for_key(stop_key) as stop, - ): - uri = self.uri if (start is None and stop is None) else None - with self.transaction(): - self.session.truncate(uri, start, stop, None) - - def remove_key(self, key: int): + def remove_key(self, key): """Remove a single key in a transaction.""" - with self.cursor_for_key(key) as cursor: + with self.auto_closing_cursor() as cursor: + cursor.set_key(self.key(key)) with self.transaction(): cursor.remove() - def visible_keys(self) -> list[int]: - """Return all keys visible via a forward scan, in key order.""" - result = [] - with self.auto_closing_cursor() as cursor: - with self.transaction(rollback=True): - while cursor.next() == 0: - result.append(cursor.get_key()) - return result - def test_per_key_removes_before_truncate(self): # Set up a follower with keys 1-100. self.setup_leader() @@ -226,10 +157,12 @@ class test_layered_fast_truncate13(wttest.WiredTigerTestCase): # Truncate keys 30-60 and reinsert key 45 within the same transaction. with self.transaction(): with ( - self.cursor_for_key(30) as start, - self.cursor_for_key(60) as stop, + self.auto_closing_cursor() as start, + self.auto_closing_cursor() as stop, self.auto_closing_cursor() as cursor, ): + start.set_key(self.key(30)) + stop.set_key(self.key(60)) self.session.truncate(None, start, stop, None) cursor[45] = "v" diff --git a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate14.py b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate14.py index ff1420689b3..9e50311cb5f 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate14.py +++ b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate14.py @@ -29,15 +29,14 @@ # test_layered_fast_truncate14.py # Ensure next() skips truncated stable keys after search_near lands on an ingest key. -from contextlib import closing -from typing import Iterable from helper_disagg import disagg_test_class, gen_disagg_storages +from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin from wtscenario import make_scenarios import wttest @disagg_test_class -class test_layered_fast_truncate14(wttest.WiredTigerTestCase): +class test_layered_fast_truncate14(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase): """next() skips truncated stable keys after search_near lands on an ingest key.""" uris = [ @@ -49,43 +48,7 @@ class test_layered_fast_truncate14(wttest.WiredTigerTestCase): scenarios = make_scenarios(disagg_storages, uris) conn_config = 'disaggregated=(role="leader"),' - def session_create_config(self): - cfg = "key_format=i,value_format=S" - if self.uri.startswith("table"): - cfg += ",block_manager=disagg,type=layered" - return cfg - - def auto_closing_cursor(self): - return closing(self.session.open_cursor(self.uri)) - - def populate(self, keys: Iterable[int]): - with self.auto_closing_cursor() as cursor: - with self.transaction(): - for key in keys: - cursor[key] = "v" - - def setup_leader(self, keys: Iterable[int] | None = None): - self.session.create(self.uri, self.session_create_config()) - if keys is not None: - self.populate(keys) - self.session.checkpoint() - - def setup_follower(self, keys: Iterable[int] | None = None): - self.reopen_disagg_conn('disaggregated=(role="follower"),') - if keys is not None: - self.populate(keys) - - def truncate(self, start_key: int, stop_key: int): - with ( - self.auto_closing_cursor() as start, - self.auto_closing_cursor() as stop, - ): - start.set_key(start_key) - stop.set_key(stop_key) - with self.transaction(): - self.session.truncate(None, start, stop, None) - - def keys_after_search_near(self, search_key: int) -> list[int]: + def keys_after_search_near(self, search_key): """ Position on search_key via search_near (must be an exact match), then return all keys yielded by subsequent next() calls. diff --git a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate15.py b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate15.py index f2e2173c0aa..b887603e2f9 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate15.py +++ b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate15.py @@ -30,16 +30,14 @@ # Validate edge scenario where no tombstones are written when ingest keys sit outside # the range. Follower truncate tombstones ingest keys only inside the range. -from contextlib import closing -from typing import Iterable from helper_disagg import disagg_test_class, gen_disagg_storages -from wiredtiger import WT_NOTFOUND +from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin from wtscenario import make_scenarios import wttest @disagg_test_class -class test_layered_fast_truncate15(wttest.WiredTigerTestCase): +class test_layered_fast_truncate15(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase): """Follower truncate tombstones only ingest keys inside the range.""" uris = [ @@ -51,65 +49,15 @@ class test_layered_fast_truncate15(wttest.WiredTigerTestCase): scenarios = make_scenarios(disagg_storages, uris) conn_config = 'disaggregated=(role="leader"),' - def session_create_config(self): - cfg = "key_format=i,value_format=S" - if self.uri.startswith("table"): - cfg += ",block_manager=disagg,type=layered" - return cfg - - def auto_closing_cursor(self): - return closing(self.session.open_cursor(self.uri)) - - def populate(self, keys: Iterable[int]): - with self.auto_closing_cursor() as cursor: - with self.transaction(): - for key in keys: - cursor[key] = "v" - - def setup_leader(self, keys: Iterable[int] | None = None): - self.session.create(self.uri, self.session_create_config()) - if keys is not None: - self.populate(keys) - self.session.checkpoint() - - def setup_follower(self, keys: Iterable[int] | None = None): - self.reopen_disagg_conn('disaggregated=(role="follower"),') - if keys is not None: - self.populate(keys) - - def truncate(self, start_key: int, stop_key: int): - with ( - self.auto_closing_cursor() as start, - self.auto_closing_cursor() as stop, - ): - start.set_key(start_key) - stop.set_key(stop_key) - with self.transaction(): - self.session.truncate(None, start, stop, None) - - def search_key(self, key: int) -> int: - with self.auto_closing_cursor() as cursor: - with self.transaction(rollback=True): - cursor.set_key(key) - return cursor.search() - - def visible_keys(self) -> list[int]: - result = [] - with self.auto_closing_cursor() as cursor: - with self.transaction(rollback=True): - while cursor.next() == 0: - result.append(cursor.get_key()) - return result - def test_ingest_keys_flanking_range_not_tombstoned(self): # Ingest keys flank the range on both sides with none inside; neither should be tombstoned. self.setup_leader(keys=[0, 10, 20, 30]) self.setup_follower(keys=[5, 25]) self.truncate(10, 20) - self.assertEqual(self.search_key(10), WT_NOTFOUND, + self.assertFalse(self.key_exists(10), "key 10 must be deleted (stable-only, inside truncate range)") - self.assertEqual(self.search_key(25), 0, + self.assertTrue(self.key_exists(25), "key 25 must be visible (ingest key, outside truncate range)") def test_scan_correct_when_ingest_keys_flank_range(self): @@ -126,10 +74,8 @@ class test_layered_fast_truncate15(wttest.WiredTigerTestCase): self.setup_follower(keys=[5]) self.truncate(10, 15) - self.assertEqual(self.search_key(10), WT_NOTFOUND, - "key 10 must be deleted") - self.assertEqual(self.search_key(5), 0, - "key 5 must be visible") + self.assertFalse(self.key_exists(10), "key 10 must be deleted") + self.assertTrue(self.key_exists(5), "key 5 must be visible") def test_ingest_key_only_above_range(self): # All ingest keys are above the range; none should be tombstoned. @@ -137,10 +83,8 @@ class test_layered_fast_truncate15(wttest.WiredTigerTestCase): self.setup_follower(keys=[15]) self.truncate(5, 10) - self.assertEqual(self.search_key(10), WT_NOTFOUND, - "key 10 must be deleted") - self.assertEqual(self.search_key(15), 0, - "key 15 must be visible") + self.assertFalse(self.key_exists(10), "key 10 must be deleted") + self.assertTrue(self.key_exists(15), "key 15 must be visible") def test_multiple_ingest_keys_both_sides_no_ingest_in_range(self): # Multiple ingest keys on both sides of the range; none inside; all should stay visible. @@ -149,10 +93,10 @@ class test_layered_fast_truncate15(wttest.WiredTigerTestCase): self.truncate(10, 15) for k in [10, 15]: - self.assertEqual(self.search_key(k), WT_NOTFOUND, + self.assertFalse(self.key_exists(k), f"key {k} must be deleted (stable-only, inside truncate range)") for k in [3, 7, 18, 22]: - self.assertEqual(self.search_key(k), 0, + self.assertTrue(self.key_exists(k), f"key {k} must be visible (ingest key, outside truncate range)") if __name__ == "__main__": diff --git a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate16.py b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate16.py index 5b8d0e7adc8..d18f97e10c5 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate16.py +++ b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate16.py @@ -28,13 +28,14 @@ import wttest, wiredtiger from helper_disagg import disagg_test_class, gen_disagg_storages +from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin from wtscenario import make_scenarios # test_layered_fast_truncate16.py # Verify that pending follower truncates land on stable when the follower steps up, # across the variety of per-key shapes and edge cases. @disagg_test_class -class test_layered_fast_truncate_stepup(wttest.WiredTigerTestCase): +class test_layered_fast_truncate_stepup(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase): conn_config = 'disaggregated=(role="leader")' uri = 'layered:test_layered_fast_truncate_stepup' @@ -53,22 +54,10 @@ class test_layered_fast_truncate_stepup(wttest.WiredTigerTestCase): self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(ts)) self.session.checkpoint() - # Open a separate follower connection, create the table on both sides, leader populates, - # follower picks up the checkpoint. After this, follower-side ops run on session_follow. def setup_follower(self): - self.conn_follow = self.wiredtiger_open( - 'follower', - self.extensionsConfig() + ',create,disaggregated=(role="follower")') - self.session_follow = self.conn_follow.open_session('') self.session.create(self.uri, 'key_format=i,value_format=S') - self.session_follow.create(self.uri, 'key_format=i,value_format=S') self.populate_on_leader() - self.disagg_advance_checkpoint(self.conn_follow) - - # Step up the follower (which becomes the new leader) and step the original leader down. - def step_up(self): - self.ignoreStdoutPattern('Picking up the same checkpoint') - self.disagg_switch_follower_and_leader(self.conn_follow) + self.conn_follow, self.session_follow = self.open_follower() def write_kv(self, key, value, ts): cursor = self.session_follow.open_cursor(self.uri) @@ -97,26 +86,18 @@ class test_layered_fast_truncate_stepup(wttest.WiredTigerTestCase): c_stop.close() def assert_visible(self, keys, value=None, ts=None): - self.session_follow.begin_transaction('read_timestamp=' + self.timestamp_str(ts)) - cursor = self.session_follow.open_cursor(self.uri) for k in keys: - cursor.set_key(k) - self.assertEqual(cursor.search(), 0, f"key {k} should be visible at ts={ts}") + ret, val = self.search_at(self.session_follow, k, ts) + self.assertEqual(ret, 0, f"key {k} should be visible at ts={ts}") if value is not None: expected = value(k) if callable(value) else value - self.assertEqual(cursor.get_value(), expected) - cursor.close() - self.session_follow.rollback_transaction() + self.assertEqual(val, expected) def assert_deleted(self, keys, ts): - self.session_follow.begin_transaction('read_timestamp=' + self.timestamp_str(ts)) - cursor = self.session_follow.open_cursor(self.uri) for k in keys: - cursor.set_key(k) - self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND, + ret, _ = self.search_at(self.session_follow, k, ts) + self.assertEqual(ret, wiredtiger.WT_NOTFOUND, f"key {k} should be deleted at ts={ts}") - cursor.close() - self.session_follow.rollback_transaction() def assert_keys_gone(self, ranges): # Sweep the populated key space: keys inside any (lo, hi) inclusive range must be diff --git a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate17.py b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate17.py index a343d3a4739..b10bf0dadaa 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate17.py +++ b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate17.py @@ -28,6 +28,7 @@ import wiredtiger, wttest from helper_disagg import disagg_test_class, gen_disagg_storages +from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin from wtscenario import make_scenarios from wiredtiger import stat @@ -35,7 +36,7 @@ from wiredtiger import stat # Verify that step-up replay uses fast page truncation (WT_REF_DELETED) when # replaying follower truncates. @disagg_test_class -class test_layered_fast_truncate17(wttest.WiredTigerTestCase): +class test_layered_fast_truncate17(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase): conn_config = 'disaggregated=(role="leader")' uri = 'layered:test_layered_ft_replay' @@ -45,12 +46,6 @@ class test_layered_fast_truncate17(wttest.WiredTigerTestCase): disagg_storages = gen_disagg_storages('test_layered_ft_replay', disagg_only=True) scenarios = make_scenarios(disagg_storages) - def get_stat(self, conn, stat_key): - s = conn.open_session('') - val = s.open_cursor('statistics:')[stat_key][2] - s.close() - return val - def populate_on_leader(self, ts=10): cursor = self.session.open_cursor(self.uri) for i in range(self.nitems): @@ -58,23 +53,12 @@ class test_layered_fast_truncate17(wttest.WiredTigerTestCase): cursor[i] = 'v' self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(ts)) cursor.close() - self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(ts) + - ',oldest_timestamp=' + self.timestamp_str(1)) - self.session.checkpoint() + self.leader_checkpoint(ts) def setup_follower(self): - self.conn_follow = self.wiredtiger_open( - 'follower', - self.extensionsConfig() + ',create,statistics=(all),disaggregated=(role="follower")') - self.session_follow = self.conn_follow.open_session('') self.session.create(self.uri, self.table_config) - self.session_follow.create(self.uri, self.table_config) self.populate_on_leader() - self.disagg_advance_checkpoint(self.conn_follow) - - def step_up(self): - self.ignoreStdoutPattern('Picking up the same checkpoint') - self.disagg_switch_follower_and_leader(self.conn_follow) + self.conn_follow, self.session_follow = self.open_follower(self.table_config) def truncate_range(self, start_key, stop_key, ts): c_start = self.session_follow.open_cursor(self.uri) diff --git a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate18.py b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate18.py index d4b65e96bb2..3cee3d86b9f 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate18.py +++ b/src/third_party/wiredtiger/test/suite/test_layered_fast_truncate18.py @@ -30,22 +30,16 @@ # Write conflict detection for follower fast truncate (truncate-truncate # conflicts only). -import unittest from contextlib import closing, nullcontext -from typing import Iterable from helper_disagg import disagg_test_class, gen_disagg_storages +from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin, range_inclusive from wiredtiger import WiredTigerError from wtscenario import make_scenarios import wttest -def range_inclusive(start: int, stop: int) -> range: - """Return a range covering [start, stop] inclusive.""" - return range(start, stop + 1) - - @disagg_test_class -class test_layered_fast_truncate18(wttest.WiredTigerTestCase): +class test_layered_fast_truncate18(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase): """ Write conflict detection for follower fast truncate (truncate-truncate conflicts only). @@ -62,51 +56,32 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase): CONFLICT_MSG = "/conflict between concurrent operations/" - def session_create_config(self) -> str: - """Return a config string for session.create() based on table URI.""" - cfg = "key_format=i,value_format=S" - if self.uri.startswith("table"): - cfg += ",block_manager=disagg,type=layered" - return cfg + # These helpers are local to 18 because they all take an explicit session + # (the conflict tests drive two sessions concurrently). The equivalent + # mixin helpers are bound to self.session and so are not reusable here. - def auto_closing_cursor(self, session) -> closing: - """Return a cursor that auto-closes as it goes out of scope.""" + def cursor_on(self, session): + """Return a cursor on the given session that auto-closes.""" return closing(session.open_cursor(self.uri)) - def auto_closing_session(self) -> closing: + def auto_closing_session(self): """Return a session that auto-closes as it goes out of scope.""" return closing(self.conn.open_session()) - def populate(self, keys: Iterable[int]): - """Insert each key with a placeholder value in a single transaction.""" - with self.auto_closing_cursor(self.session) as cursor: - with self.transaction(): - for key in keys: - cursor[key] = "v" - - def setup_leader(self, keys: Iterable[int] | None = None): - """Create the table on the leader and optionally populate stable.""" - self.session.create(self.uri, self.session_create_config()) - if keys is not None: - self.populate(keys) - self.session.checkpoint() - - def setup_follower(self, keys: Iterable[int] | None = None): - """Switch to follower role and optionally write keys to ingest.""" - self.reopen_disagg_conn('disaggregated=(role="follower"),') - if keys is not None: - self.populate(keys) - - def cursor_for_key(self, key: int | None, session): + def cursor_for_key(self, key, session): """Return a cursor with its key set, or None if key is None.""" if key is None: return nullcontext(None) - cursor = self.auto_closing_cursor(session) + cursor = self.cursor_on(session) cursor.thing.set_key(key) return cursor - def truncate(self, session, start_key: int | None, stop_key: int | None): - """Execute a truncate from start to stop key inclusive.""" + def truncate_on(self, session, start_key, stop_key): + """ + Truncate [start_key, stop_key] inclusive on the given session. + Caller manages the transaction (the conflict tests inspect the + truncate's failure/success inside a hand-managed txn). + """ with ( self.cursor_for_key(start_key, session) as start, self.cursor_for_key(stop_key, session) as stop, @@ -121,8 +96,8 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase): # Within a single transaction: truncate 30-60, then truncate 40-80. with self.transaction(): - self.truncate(self.session, 30, 60) - self.truncate(self.session, 40, 80) + self.truncate_on(self.session, 30, 60) + self.truncate_on(self.session, 40, 80) # The transaction committed; no WT_ROLLBACK raised. @@ -134,7 +109,7 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase): # txn A begins a truncate over 30-60 and leaves it uncommitted. session_a = self.session session_a.begin_transaction() - self.truncate(session_a, 30, 60) + self.truncate_on(session_a, 30, 60) # txn B truncates overlapping range 40-70 and gets WT_ROLLBACK. with ( @@ -143,7 +118,7 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase): ): self.assertRaisesException( WiredTigerError, - lambda: self.truncate(session_b, 40, 70), + lambda: self.truncate_on(session_b, 40, 70), self.CONFLICT_MSG, ) @@ -155,7 +130,7 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase): # txn A begins a truncate over 30-60 and leaves it uncommitted. session_a = self.session session_a.begin_transaction() - self.truncate(session_a, 30, 60) + self.truncate_on(session_a, 30, 60) # txn B truncates overlapping range 40-70 and gets WT_ROLLBACK. with ( @@ -164,7 +139,7 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase): ): self.assertRaisesException( WiredTigerError, - lambda: self.truncate(session_b, 40, 70), + lambda: self.truncate_on(session_b, 40, 70), self.CONFLICT_MSG, ) @@ -176,14 +151,14 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase): # txn A truncates 10-30 and leaves it uncommitted. session_a = self.session session_a.begin_transaction() - self.truncate(session_a, 10, 30) + self.truncate_on(session_a, 10, 30) # txn B truncates 50-70 (no overlap) and commits successfully. with ( self.auto_closing_session() as session_b, self.transaction(session=session_b), ): - self.truncate(session_b, 50, 70) + self.truncate_on(session_b, 50, 70) def test_rolled_back_truncate_no_residual(self): # A follower with stable keys 1-100. @@ -193,14 +168,14 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase): # txn A truncates 30-60 then explicitly rolls back. session_a = self.session with self.transaction(session=session_a, rollback=True): - self.truncate(session_a, 30, 60) + self.truncate_on(session_a, 30, 60) # txn B truncates the same range 30-60 and commits without WT_ROLLBACK. with ( self.auto_closing_session() as session_b, self.transaction(session=session_b), ): - self.truncate(session_b, 30, 60) + self.truncate_on(session_b, 30, 60) def test_invisible_committed_truncate_conflicts(self): # A follower with stable keys 1-100. @@ -210,7 +185,7 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase): # txn A commits a truncate over 30-60 at ts=10 (invisible to txn B). self.conn.set_timestamp("oldest_timestamp=" + self.timestamp_str(1)) with self.transaction(commit_timestamp=10): - self.truncate(self.session, 30, 60) + self.truncate_on(self.session, 30, 60) # txn B (read_ts=5) truncates overlapping range 40-70 and gets # WT_ROLLBACK. @@ -222,7 +197,7 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase): ): self.assertRaisesException( WiredTigerError, - lambda: self.truncate(session_b, 40, 70), + lambda: self.truncate_on(session_b, 40, 70), self.CONFLICT_MSG, ) @@ -234,7 +209,7 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase): # txn A commits a truncate over 30-60 at ts=5 (visible to txn B). self.conn.set_timestamp("oldest_timestamp=" + self.timestamp_str(1)) with self.transaction(commit_timestamp=5): - self.truncate(self.session, 30, 60) + self.truncate_on(self.session, 30, 60) # txn B (read_ts=10) truncates overlapping range 40-70 without # WT_ROLLBACK. @@ -242,7 +217,7 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase): self.auto_closing_session() as session_b, self.transaction(session=session_b, read_timestamp=10), ): - self.truncate(session_b, 40, 70) + self.truncate_on(session_b, 40, 70) if __name__ == "__main__": diff --git a/src/third_party/wiredtiger/test/suite/test_layered_prepare03.py b/src/third_party/wiredtiger/test/suite/test_layered_prepare03.py new file mode 100644 index 00000000000..308cce47b9c --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_layered_prepare03.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +# +# Public Domain 2014-present MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +from helper_disagg import disagg_test_class + +# test_layered_prepare03.py +# Forward iteration on a layered cursor after the very first next() returns +# WT_PREPARE_CONFLICT must resume correctly and return all visible keys. + +@disagg_test_class +class test_layered_prepare03(wttest.WiredTigerTestCase): + + conn_base_config = 'precise_checkpoint=true,' + conn_config = conn_base_config + 'disaggregated=(role="leader")' + + def safe_next(self, cursor): + try: + return cursor.next() + except wiredtiger.WiredTigerError as e: + if 'WT_PREPARE_CONFLICT' in str(e): + return wiredtiger.WT_PREPARE_CONFLICT + raise + + def test_iterate_after_prepare_conflict_on_first_key(self): + ''' + A layered cursor that encounters WT_PREPARE_CONFLICT on its very first + next() call must resume from the beginning after the conflict is resolved + and return all stable keys. + ''' + uri = 'table:test_layered_prepare03' + stable_keys = ['1', '2', '3'] + + # Write stable keys on the leader and checkpoint. + self.session.create( + uri, 'key_format=S,value_format=S,block_manager=disagg,type=layered') + with self.transaction(session=self.session, commit_timestamp=100): + c = self.session.open_cursor(uri) + for k in stable_keys: + c[k] = 'stable_' + k + c.close() + self.conn.set_timestamp(f'stable_timestamp={self.timestamp_str(200)}') + self.session.checkpoint() + + # Open a follower and pull in the stable checkpoint. + conn_follow = self.wiredtiger_open( + 'follower', + self.extensionsConfig() + ',create,' + self.conn_base_config + + 'disaggregated=(role="follower")') + self.disagg_advance_checkpoint(conn_follow) + + # Prepare an ingest update for key '1' so that the first next() on the + # layered cursor returns WT_PREPARE_CONFLICT. + prep_session = conn_follow.open_session('') + prep_cursor = prep_session.open_cursor(uri) + prep_session.begin_transaction() + prep_cursor['1'] = 'prepared_update' + prep_cursor.close() + prep_session.prepare_transaction( + f'prepare_timestamp={self.timestamp_str(300)}' + + f',prepared_id={self.prepared_id_str(1)}') + + # Read-committed isolation: the transaction sees the prepared update as + # a conflict on the very first next() call. + iter_session = conn_follow.open_session('') + iter_session.begin_transaction('isolation=read-committed') + iter_cursor = iter_session.open_cursor(uri) + + # First next() must hit the prepared key and return WT_PREPARE_CONFLICT. + self.assertEqual(self.safe_next(iter_cursor), wiredtiger.WT_PREPARE_CONFLICT) + + # Resolve the conflict and verify that subsequent iteration returns all + # stable keys from the beginning. + prep_session.rollback_transaction() + + got = [] + ret = iter_cursor.next() + while ret == 0: + got.append(iter_cursor.get_key()) + ret = iter_cursor.next() + self.assertEqual(ret, wiredtiger.WT_NOTFOUND) + self.assertEqual(got, stable_keys) + + iter_cursor.close() + iter_session.rollback_transaction() + prep_session.close() + conn_follow.close() diff --git a/src/third_party/wiredtiger/test/suite/test_prepare35.py b/src/third_party/wiredtiger/test/suite/test_prepare35.py index 40ae5f56851..533a06fe63a 100644 --- a/src/third_party/wiredtiger/test/suite/test_prepare35.py +++ b/src/third_party/wiredtiger/test/suite/test_prepare35.py @@ -87,7 +87,7 @@ class test_prepare35(test_prepare_preserve_prepare_base): session_evict.close() # Step 4: Rollback the first prepared transaction - # This prepends a globally visible tombstone + # This appends a globally visible tombstone to the tail of the update chain session_prepare.rollback_transaction("rollback_timestamp=" + self.timestamp_str(35)) session_prepare.close() diff --git a/src/third_party/wiredtiger/test/suite/test_prepare47.py b/src/third_party/wiredtiger/test/suite/test_prepare47.py index f9689d7e983..95115f81585 100644 --- a/src/third_party/wiredtiger/test/suite/test_prepare47.py +++ b/src/third_party/wiredtiger/test/suite/test_prepare47.py @@ -160,13 +160,13 @@ class test_prepare47(wttest.WiredTigerTestCase): evict_session.close() def test_aborted_prepared_with_lost_disk_fallback(self): - # Theory: at rollback time, first_committed_upd is NULL (no committed update behind - # the prepared insert) but tw_found is true (on-disk cell with stop is the fallback), - # so __txn_prepare_rollback_delete_key is not called and no rollback tombstone is - # prepended. Later, a reconcile drops the on-disk cell (its stop is globally visible - # and nothing is selected for the key), erasing the only fallback. A subsequent - # reconcile that walks the surviving aborted prepared update has neither a rollback - # tombstone nor an on-disk fallback, tripping the leaked-prepared-update assertion. + # Theory: at rollback time there is no committed update behind the prepared insert, + # but there is an on-disk cell with a stop that serves as the fallback, so no rollback + # tombstone is appended to the chain. Later, a reconcile drops the on-disk cell (its + # stop is globally visible and nothing is selected for the key), erasing the only + # fallback. A subsequent reconcile that walks the surviving aborted prepared update has + # neither a rollback tombstone nor an on-disk fallback, tripping the + # leaked-prepared-update assertion. insert_ts = 20 delete_ts = 30 oldest_after_delete = 31 @@ -224,8 +224,9 @@ class test_prepare47(wttest.WiredTigerTestCase): self.conn.set_timestamp( 'stable_timestamp=' + self.timestamp_str(stable_unstable)) - # Roll back with rollback_ts ahead of stable; first_committed_upd is NULL but - # tw_found is true so no rollback tombstone is prepended. + # Roll back with rollback_ts ahead of stable; there is no committed update behind the + # prepared insert but the on-disk cell exists, so no rollback tombstone is appended to + # the chain. self.session.rollback_transaction( 'rollback_timestamp=' + self.timestamp_str(rollback_ts))