Import wiredtiger: 9d2e4ce64fa8248ce21daa252e79528da59bc5d5 from branch mongodb-master (#54508)
GitOrigin-RevId: f0a6209b8e95fa30174606036a240e325e2d1947
This commit is contained in:
parent
837806ab05
commit
e7ebec401e
@ -413,34 +413,34 @@ if(ENABLE_DEBUG_INFO AND NOT WT_DEBUG_FLAGS_INITIALIZED)
|
||||
set(BUILD_TYPES_WITH_DEBUG_INFO ${BUILD_MODES})
|
||||
list(REMOVE_ITEM BUILD_TYPES_WITH_DEBUG_INFO Release)
|
||||
|
||||
set(DEBUG_INFO_FLAGS)
|
||||
if(GNU_C_COMPILER OR CLANG_C_COMPILER)
|
||||
# Higher debug levels `-g3`/`-ggdb3` emit additional debug information, including
|
||||
# macro definitions that allow us to evaluate macros such as `p S2C(session)` inside of gdb.
|
||||
# This needs to be in DWARF version 2 format or later - and should be by default - but
|
||||
# we'll specify version 4 here to be safe.
|
||||
list(APPEND DEBUG_INFO_FLAGS -g3 -gdwarf-4)
|
||||
# DWARF v4 is supplied explicitly to be safe across toolchain defaults.
|
||||
set(debug_info_flags "-g3 -gdwarf-4")
|
||||
if(CLANG_C_COMPILER)
|
||||
# Clang requires one additional flag to output macro debug information.
|
||||
list(APPEND DEBUG_INFO_FLAGS -glldb -fdebug-macro)
|
||||
string(APPEND debug_info_flags " -glldb -fdebug-macro")
|
||||
else()
|
||||
list(APPEND DEBUG_INFO_FLAGS -ggdb3)
|
||||
string(APPEND debug_info_flags " -ggdb3")
|
||||
endif()
|
||||
|
||||
add_cmake_compiler_flags(
|
||||
FLAGS ${DEBUG_INFO_FLAGS}
|
||||
LANGUAGES C CXX
|
||||
BUILD_TYPES ${BUILD_TYPES_WITH_DEBUG_INFO}
|
||||
)
|
||||
foreach(build_type IN LISTS BUILD_TYPES_WITH_DEBUG_INFO)
|
||||
string(TOUPPER "${build_type}" BT)
|
||||
set(CMAKE_C_FLAGS_${BT}
|
||||
"${CMAKE_C_FLAGS_${BT}} ${debug_info_flags}" CACHE STRING "" FORCE)
|
||||
set(CMAKE_CXX_FLAGS_${BT}
|
||||
"${CMAKE_CXX_FLAGS_${BT}} ${debug_info_flags}" CACHE STRING "" FORCE)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
# MSVC: ensure linker produces PDBs.
|
||||
if(MSVC_C_COMPILER)
|
||||
add_cmake_linker_flags(
|
||||
FLAGS "/DEBUG"
|
||||
BINARIES EXE SHARED
|
||||
BUILD_TYPES ${BUILD_TYPES_WITH_DEBUG_INFO}
|
||||
)
|
||||
foreach(build_type IN LISTS BUILD_TYPES_WITH_DEBUG_INFO)
|
||||
string(TOUPPER "${build_type}" BT)
|
||||
set(CMAKE_EXE_LINKER_FLAGS_${BT}
|
||||
"${CMAKE_EXE_LINKER_FLAGS_${BT}} /DEBUG" CACHE STRING "" FORCE)
|
||||
set(CMAKE_SHARED_LINKER_FLAGS_${BT}
|
||||
"${CMAKE_SHARED_LINKER_FLAGS_${BT}} /DEBUG" CACHE STRING "" FORCE)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
# Mark that we've set the initial debug flags
|
||||
|
||||
@ -80,29 +80,36 @@ function(define_build_mode mode)
|
||||
string(REPLACE ";" " " cxx_flags "${DEFINE_BUILD_CXX_COMPILER_FLAGS}")
|
||||
string(REPLACE ";" " " linker_flags "${linker_flags}")
|
||||
string(TOUPPER ${mode} build_mode)
|
||||
set(CMAKE_C_FLAGS_${build_mode}
|
||||
"${c_flags}" CACHE STRING
|
||||
"Flags used by the C compiler for ${mode} build type or configuration." FORCE)
|
||||
|
||||
set(CMAKE_CXX_FLAGS_${build_mode}
|
||||
"${cxx_flags}" CACHE STRING
|
||||
"Flags used by the C++ compiler for ${mode} build type or configuration." FORCE)
|
||||
# Seed the default flags for this build mode exactly once per build dir.
|
||||
if(NOT WT_BUILD_MODE_${build_mode}_FLAGS_INITIALIZED)
|
||||
set(CMAKE_C_FLAGS_${build_mode}
|
||||
"${c_flags}" CACHE STRING
|
||||
"Flags used by the C compiler for ${mode} build type or configuration." FORCE)
|
||||
|
||||
set(CMAKE_EXE_LINKER_FLAGS_${build_mode}
|
||||
"${linker_flags}" CACHE STRING
|
||||
"Linker flags to be used to create executables for ${mode} build type." FORCE)
|
||||
set(CMAKE_CXX_FLAGS_${build_mode}
|
||||
"${cxx_flags}" CACHE STRING
|
||||
"Flags used by the C++ compiler for ${mode} build type or configuration." FORCE)
|
||||
|
||||
set(CMAKE_SHARED_LINKER_FLAGS_${build_mode}
|
||||
"${linker_flags}" CACHE STRING
|
||||
"Linker flags to be used to create shared libraries for ${mode} build type." FORCE)
|
||||
set(CMAKE_EXE_LINKER_FLAGS_${build_mode}
|
||||
"${linker_flags}" CACHE STRING
|
||||
"Linker flags to be used to create executables for ${mode} build type." FORCE)
|
||||
|
||||
set(CMAKE_MODULE_LINKER_FLAGS_${build_mode}
|
||||
"${linker_flags}" CACHE STRING
|
||||
"Linker flags to be used to create shared modules for ${mode} build type." FORCE)
|
||||
set(CMAKE_SHARED_LINKER_FLAGS_${build_mode}
|
||||
"${linker_flags}" CACHE STRING
|
||||
"Linker flags to be used to create shared libraries for ${mode} build type." FORCE)
|
||||
|
||||
set(CMAKE_MODULE_LINKER_FLAGS_${build_mode}
|
||||
"${linker_flags}" CACHE STRING
|
||||
"Linker flags to be used to create shared modules for ${mode} build type." FORCE)
|
||||
|
||||
set(WT_BUILD_MODE_${build_mode}_FLAGS_INITIALIZED TRUE CACHE INTERNAL
|
||||
"WiredTiger ${mode} build mode flags have been initialized")
|
||||
endif()
|
||||
|
||||
mark_as_advanced(
|
||||
CMAKE_CXX_FLAGS_${build_mode}
|
||||
CMAKE_C_FLAGS_${build_mode}
|
||||
CMAKE_CXX_FLAGS_${build_mode}
|
||||
CMAKE_EXE_LINKER_FLAGS_${build_mode}
|
||||
CMAKE_SHARED_LINKER_FLAGS_${build_mode}
|
||||
CMAKE_MODULE_LINKER_FLAGS_${build_mode}
|
||||
|
||||
93
src/third_party/wiredtiger/cmake/helpers.cmake
vendored
93
src/third_party/wiredtiger/cmake/helpers.cmake
vendored
@ -441,99 +441,6 @@ function(add_cmake_flag included_flags flag)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
# add_cmake_compiler_flags(FLAGS <flags...> LANGUAGES <languages...> BUILD_TYPES <build_types...>)
|
||||
# A helper function that adds one or more compiler flags to specified languages and build types,
|
||||
# avoiding duplication by using the existing add_cmake_flag function.
|
||||
# FLAGS <flags...> - one or more compilation flags to add
|
||||
# LANGUAGES <languages...> - one or more languages (C, CXX, etc.)
|
||||
# BUILD_TYPES <build_types...> - one or more build types (Debug, RelWithDebInfo, Release, etc.)
|
||||
function(add_cmake_compiler_flags)
|
||||
cmake_parse_arguments(
|
||||
PARSE_ARGV
|
||||
0
|
||||
"COMPILER_FLAGS"
|
||||
""
|
||||
""
|
||||
"FLAGS;LANGUAGES;BUILD_TYPES"
|
||||
)
|
||||
|
||||
# Validate required arguments
|
||||
if(NOT COMPILER_FLAGS_FLAGS)
|
||||
message(FATAL_ERROR "add_cmake_compiler_flags: FLAGS argument is required")
|
||||
endif()
|
||||
if(NOT COMPILER_FLAGS_LANGUAGES)
|
||||
message(FATAL_ERROR "add_cmake_compiler_flags: LANGUAGES argument is required")
|
||||
endif()
|
||||
if(NOT COMPILER_FLAGS_BUILD_TYPES)
|
||||
message(FATAL_ERROR "add_cmake_compiler_flags: BUILD_TYPES argument is required")
|
||||
endif()
|
||||
|
||||
# Add each flag to each language/build_type combination
|
||||
foreach(lang ${COMPILER_FLAGS_LANGUAGES})
|
||||
foreach(build_type ${COMPILER_FLAGS_BUILD_TYPES})
|
||||
# Convert build type to uppercase for CMAKE variable names
|
||||
string(TOUPPER "${build_type}" build_type_upper)
|
||||
|
||||
# Initialize the flags variable if not already defined
|
||||
if(NOT DEFINED CMAKE_${lang}_FLAGS_${build_type_upper})
|
||||
set(CMAKE_${lang}_FLAGS_${build_type_upper} "")
|
||||
endif()
|
||||
|
||||
# Add each flag while avoiding duplication
|
||||
foreach(flag ${COMPILER_FLAGS_FLAGS})
|
||||
add_cmake_flag(CMAKE_${lang}_FLAGS_${build_type_upper} "${flag}")
|
||||
endforeach()
|
||||
endforeach()
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
# add_cmake_linker_flags(FLAGS <flags...> BINARIES <binaries...> BUILD_TYPES <build_types...>)
|
||||
# A helper function that adds one or more linker flags to specified binary types and build types,
|
||||
# avoiding duplication by using the existing add_cmake_flag function.
|
||||
# FLAGS <flags...> - one or more linker flags to add
|
||||
# BINARIES <binaries...> - one or more binary types (EXE, SHARED, MODULE, etc.)
|
||||
# BUILD_TYPES <build_types...> - one or more build types (Debug, RelWithDebInfo, Release, etc.)
|
||||
function(add_cmake_linker_flags)
|
||||
cmake_parse_arguments(
|
||||
PARSE_ARGV
|
||||
0
|
||||
"LINKER_FLAGS"
|
||||
""
|
||||
""
|
||||
"FLAGS;BINARIES;BUILD_TYPES"
|
||||
)
|
||||
|
||||
# Validate required arguments
|
||||
if(NOT LINKER_FLAGS_FLAGS)
|
||||
message(FATAL_ERROR "add_cmake_linker_flags: FLAGS argument is required")
|
||||
endif()
|
||||
if(NOT LINKER_FLAGS_BINARIES)
|
||||
message(FATAL_ERROR "add_cmake_linker_flags: BINARIES argument is required")
|
||||
endif()
|
||||
if(NOT LINKER_FLAGS_BUILD_TYPES)
|
||||
message(FATAL_ERROR "add_cmake_linker_flags: BUILD_TYPES argument is required")
|
||||
endif()
|
||||
|
||||
# Add each flag to each binary_type/build_type combination
|
||||
foreach(binary ${LINKER_FLAGS_BINARIES})
|
||||
foreach(build_type ${LINKER_FLAGS_BUILD_TYPES})
|
||||
# Convert build type to uppercase for CMAKE variable names
|
||||
string(TOUPPER "${build_type}" build_type_upper)
|
||||
|
||||
# Initialize the flags variable if not already defined
|
||||
if(NOT DEFINED CMAKE_${binary}_LINKER_FLAGS_${build_type_upper})
|
||||
set(CMAKE_${binary}_LINKER_FLAGS_${build_type_upper} "")
|
||||
endif()
|
||||
|
||||
# Add each flag while avoiding duplication
|
||||
foreach(flag ${LINKER_FLAGS_FLAGS})
|
||||
add_cmake_flag(CMAKE_${binary}_LINKER_FLAGS_${build_type_upper} "${flag}")
|
||||
endforeach()
|
||||
endforeach()
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
|
||||
# replace_compile_options(flag_var [REMOVE <flags...>] [ADD <flags...>])
|
||||
# A helper function that removes specified compiler flags from a flag variable and optionally adds new ones.
|
||||
# This is useful for replacing default compiler flags with custom ones while maintaining clean flag strings.
|
||||
|
||||
@ -4,11 +4,9 @@ include(cmake/rcpc_test.cmake)
|
||||
# ARMv8-A is the 64-bit ARM architecture, turn on the optional CRC.
|
||||
# If the compilation check in rcpc_test passes also turn on the RCpc instructions.
|
||||
if(HAVE_RCPC)
|
||||
add_cmake_flag(CMAKE_C_FLAGS -march=armv8.2-a+rcpc+crc)
|
||||
add_cmake_flag(CMAKE_CXX_FLAGS -march=armv8.2-a+rcpc+crc)
|
||||
add_compile_options(-march=armv8.2-a+rcpc+crc)
|
||||
else()
|
||||
add_cmake_flag(CMAKE_C_FLAGS -march=armv8-a+crc)
|
||||
add_cmake_flag(CMAKE_CXX_FLAGS -march=armv8-a+crc)
|
||||
add_compile_options(-march=armv8-a+crc)
|
||||
endif()
|
||||
|
||||
# moutline-atomics preserves backwards compatibility with Arm v8.0 systems but also supports
|
||||
@ -17,6 +15,6 @@ endif()
|
||||
# the flag.
|
||||
check_c_compiler_flag("-moutline-atomics" has_moutline_atomics)
|
||||
if(has_moutline_atomics)
|
||||
add_cmake_flag(CMAKE_C_FLAGS -moutline-atomics)
|
||||
add_compile_options(-moutline-atomics)
|
||||
endif()
|
||||
unset(has_moutline_atomics CACHE)
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
# See https://www.sifive.com/blog/all-aboard-part-1-compiler-args
|
||||
# for background on the `rv64imafdc` and `lp64d` arguments here.
|
||||
add_cmake_flag(CMAKE_C_FLAGS -march=rv64imafdc)
|
||||
add_cmake_flag(CMAKE_C_FLAGS -mabi=lp64d)
|
||||
add_compile_options(-march=rv64imafdc -mabi=lp64d)
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
set(WT_POSIX ON CACHE BOOL "")
|
||||
|
||||
# Linux requires '_GNU_SOURCE' to be defined for access to GNU/Linux extension functions
|
||||
# e.g. Access to 'pthread_setname_np' on Linux. Append this macro to our compiler flags
|
||||
# for Linux-based builds.
|
||||
add_cmake_flag(CMAKE_C_FLAGS -D_GNU_SOURCE)
|
||||
# e.g. 'pthread_setname_np'.
|
||||
add_compile_definitions(_GNU_SOURCE)
|
||||
|
||||
11
src/third_party/wiredtiger/dist/api_data.py
vendored
11
src/third_party/wiredtiger/dist/api_data.py
vendored
@ -689,8 +689,7 @@ connection_runtime_config = [
|
||||
if true, for operations with snapshot isolation the cursor temporarily releases any page
|
||||
that requires force eviction, then repositions back to the page for further operations.
|
||||
A page release encourages eviction of hot or large pages, which is more likely to
|
||||
succeed without a cursor keeping the page pinned. Note: This setting is not compatible
|
||||
with disaggregated storage.''',
|
||||
succeed without a cursor keeping the page pinned.''',
|
||||
type='boolean'),
|
||||
Config('disagg_address_cookie_upgrade', 'none', r'''
|
||||
modify the disaggregated block manager to pretend that it is a newer version to test
|
||||
@ -2238,7 +2237,13 @@ methods = {
|
||||
),
|
||||
'WT_CONNECTION.set_file_system' : Method([]),
|
||||
|
||||
'WT_CONNECTION.set_key_provider' : Method([]),
|
||||
'WT_CONNECTION.set_key_provider' : Method([
|
||||
Config('version', '0', r'''
|
||||
the key provider API version. Version 0 uses the pull model
|
||||
(WiredTiger calls WT_KEY_PROVIDER::get_key). Version 1 uses
|
||||
the push model''',
|
||||
min=0, max=1),
|
||||
]),
|
||||
|
||||
'WT_CONNECTION.load_extension' : Method([
|
||||
Config('config', '', r'''
|
||||
|
||||
12
src/third_party/wiredtiger/dist/s_copyright
vendored
12
src/third_party/wiredtiger/dist/s_copyright
vendored
@ -125,21 +125,23 @@ ENDOFTEXT
|
||||
# Parallel execution: if it's the main invocation of the script, collect the file names
|
||||
# to process and run them in subprocesses.
|
||||
|
||||
# Search for files, skipping some well-known 3rd party directories.
|
||||
find [a-z]* -name '*.[ch]' \
|
||||
# Search for files in explicit source directories, skipping any absent in this tree.
|
||||
dirs=()
|
||||
for d in bench dist docs examples ext lang oss src test tools; do
|
||||
[ -d "$d" ] && dirs+=("$d")
|
||||
done
|
||||
find "${dirs[@]}" \
|
||||
-name '*.[ch]' \
|
||||
-o -name '*.cpp' \
|
||||
-o -name '*.in' \
|
||||
-o -name '*.py' \
|
||||
-o -name '*.swig' |
|
||||
sed -e '/Makefile.in/d' \
|
||||
-e '/^build\//d' \
|
||||
-e '/^cmake\//d' \
|
||||
-e '/checksum\/power8\//d' \
|
||||
-e '/checksum\/zseries\//d' \
|
||||
-e '/\/3rdparty\//d' \
|
||||
-e '/\/node_modules\//d' \
|
||||
-e '/^tools\/wt-mcp\/\.venv\//d' \
|
||||
-e '/^venv\//d' \
|
||||
-e '/dist\/__/d' \
|
||||
-e 's/^\.\///' |
|
||||
do_in_parallel || RET=1
|
||||
|
||||
23
src/third_party/wiredtiger/dist/s_mentions
vendored
23
src/third_party/wiredtiger/dist/s_mentions
vendored
@ -26,23 +26,12 @@ fi
|
||||
# Get what could be the ticket id.
|
||||
ticket_id=$(echo "$branch_name" | cut -d "-" -f-2)
|
||||
|
||||
search_function="grep -Iinr --exclude-dir=.git"
|
||||
|
||||
# Find the name of the build folders WiredTiger has been compiled in.
|
||||
# Users can name this folder anything, but it needs to be in the rootdir and to contain CMakeFiles
|
||||
build_files=$(find ../ -maxdepth 2 -name CMakeFiles)
|
||||
for build_dir in $build_files; do
|
||||
build_folder=$(basename $(dirname $build_dir))
|
||||
search_function="$search_function --exclude-dir=$build_folder"
|
||||
done
|
||||
|
||||
search_function="$search_function $ticket_id ../ 2>&1"
|
||||
|
||||
# Check for comments related to the ticket.
|
||||
if eval "$search_function >/dev/null" ; then
|
||||
echo "There are comments mentioning $ticket_id in the code, please check if they need to be \
|
||||
resolved:"
|
||||
eval "$search_function"
|
||||
# Check for comments related to the ticket. git grep searches only tracked files, so build
|
||||
# directories and temporary files are excluded automatically. Note: newly created files that are
|
||||
# not yet added to git will be missed, but they can be checked once they are tracked.
|
||||
if git -C .. grep -Iin "$ticket_id" > /dev/null 2>&1; then
|
||||
echo "There are comments mentioning $ticket_id in the code, please check if they need to be resolved:"
|
||||
git -C .. grep -Iin "$ticket_id"
|
||||
fi
|
||||
|
||||
exit 0
|
||||
|
||||
2
src/third_party/wiredtiger/import.data
vendored
2
src/third_party/wiredtiger/import.data
vendored
@ -2,5 +2,5 @@
|
||||
"vendor": "wiredtiger",
|
||||
"github": "wiredtiger/wiredtiger",
|
||||
"branch": "mongodb-master",
|
||||
"commit": "6f3dbbf2ed12faffad4a3e274d012c61e58874f5"
|
||||
"commit": "9d2e4ce64fa8248ce21daa252e79528da59bc5d5"
|
||||
}
|
||||
|
||||
@ -1612,10 +1612,6 @@ __debug_update_dump_flags(WT_DBG *ds, WT_UPDATE *upd)
|
||||
ds->f(ds, ", prepare-restored-from-ds"));
|
||||
++flag_num;
|
||||
}
|
||||
if (F_ISSET(upd, WT_UPDATE_PREPARE_ROLLBACK)) {
|
||||
WT_RET(flag_num == 0 ? ds->f(ds, "prepare-rollback") : ds->f(ds, ", prepare-rollback"));
|
||||
++flag_num;
|
||||
}
|
||||
if (F_ISSET(upd, WT_UPDATE_RESTORED_FAST_TRUNCATE)) {
|
||||
WT_RET(flag_num == 0 ? ds->f(ds, "fast-truncate") : ds->f(ds, ", fast-truncate"));
|
||||
++flag_num;
|
||||
|
||||
@ -396,18 +396,6 @@ __wt_update_obsolete_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UP
|
||||
if (__wt_atomic_load_uint64_v_relaxed(&upd->txnid) == WT_TXN_ABORTED)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Prepare transaction rollback adds a globally visible tombstone to the update chain to
|
||||
* remove the entire key. Treating these globally visible tombstones as obsolete and
|
||||
* trimming update list can cause problems if the update chain is getting accessed somewhere
|
||||
* else. To avoid this problem, skip these globally visible tombstones from the update
|
||||
* obsolete check.
|
||||
*/
|
||||
if (F_ISSET(upd, WT_UPDATE_PREPARE_ROLLBACK)) {
|
||||
first = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Cannot truncate the updates if we need to remove the updates from the history store. */
|
||||
if (F_ISSET(upd, WT_UPDATE_HS_MAX_STOP)) {
|
||||
first = NULL;
|
||||
|
||||
@ -901,6 +901,17 @@ static const uint8_t confchk_WT_CONNECTION_rollback_to_stable_jump[WT_CONFIG_JUM
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2};
|
||||
|
||||
static const WT_CONFIG_CHECK confchk_WT_CONNECTION_set_key_provider[] = {
|
||||
{"version", "int", NULL, "min=0,max=1", NULL, 0, NULL, WT_CONFIG_COMPILED_TYPE_INT, 70, 0, 1,
|
||||
NULL},
|
||||
{NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, 0, 0, NULL}};
|
||||
|
||||
static const uint8_t confchk_WT_CONNECTION_set_key_provider_jump[WT_CONFIG_JUMP_TABLE_SIZE] = {0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
|
||||
static const WT_CONFIG_CHECK confchk_WT_CONNECTION_set_timestamp[] = {
|
||||
{"durable_timestamp", "string", NULL, NULL, NULL, 0, NULL, WT_CONFIG_COMPILED_TYPE_STRING, 3,
|
||||
INT64_MIN, INT64_MAX, NULL},
|
||||
@ -4210,7 +4221,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
|
||||
confchk_WT_CONNECTION_rollback_to_stable, 2, confchk_WT_CONNECTION_rollback_to_stable_jump, 12,
|
||||
WT_CONF_SIZING_NONE, false},
|
||||
{"WT_CONNECTION.set_file_system", "", NULL, 0, NULL, 13, WT_CONF_SIZING_NONE, false},
|
||||
{"WT_CONNECTION.set_key_provider", "", NULL, 0, NULL, 14, WT_CONF_SIZING_NONE, false},
|
||||
{"WT_CONNECTION.set_key_provider", "version=0", confchk_WT_CONNECTION_set_key_provider, 1,
|
||||
confchk_WT_CONNECTION_set_key_provider_jump, 14, WT_CONF_SIZING_NONE, false},
|
||||
{"WT_CONNECTION.set_timestamp",
|
||||
"durable_timestamp=,force=false,oldest_timestamp=,"
|
||||
"stable_disaggregated_schema_epoch=,stable_timestamp=",
|
||||
|
||||
13
src/third_party/wiredtiger/src/conn/conn_api.c
vendored
13
src/third_party/wiredtiger/src/conn/conn_api.c
vendored
@ -1448,7 +1448,7 @@ __conn_open_session(WT_CONNECTION *wt_conn, WT_EVENT_HANDLER *event_handler, con
|
||||
|
||||
session_ret = NULL;
|
||||
WT_ERR(__wt_open_session(conn, event_handler, config, true, &session_ret));
|
||||
session_ret->name = "connection-open-session";
|
||||
__wt_atomic_store_ptr_relaxed(&session_ret->name, "connection-open-session");
|
||||
*wt_sessionp = &session_ret->iface;
|
||||
|
||||
err:
|
||||
@ -2907,16 +2907,13 @@ err:
|
||||
static int
|
||||
__conn_set_key_provider(WT_CONNECTION *wt_conn, WT_KEY_PROVIDER *key_provider, const char *config)
|
||||
{
|
||||
WT_CONFIG_ITEM cval;
|
||||
WT_CONNECTION_IMPL *conn;
|
||||
WT_DECL_RET;
|
||||
WT_SESSION_IMPL *session;
|
||||
|
||||
conn = (WT_CONNECTION_IMPL *)wt_conn;
|
||||
CONNECTION_API_CALL_NOCONF(conn, session, set_key_provider);
|
||||
|
||||
/* The configuration string has no use but may be useful at a later time. */
|
||||
if (config != NULL)
|
||||
WT_ERR_MSG(session, EINVAL, "key provider configuration currently not supported.");
|
||||
CONNECTION_API_CALL(conn, session, set_key_provider, config, cfg);
|
||||
|
||||
/* You can only enable the key provider system in disaggregated mode. */
|
||||
if (__wt_conn_is_disagg(session))
|
||||
@ -2928,6 +2925,10 @@ __conn_set_key_provider(WT_CONNECTION *wt_conn, WT_KEY_PROVIDER *key_provider, c
|
||||
if (conn->key_provider != NULL)
|
||||
WT_ERR_MSG(session, EINVAL, "key provider system must be configured with early_load set");
|
||||
|
||||
WT_ERR(__wt_config_gets(session, cfg, "version", &cval));
|
||||
if (cval.val == 1)
|
||||
F_SET(conn, WT_CONN_KEY_PROVIDER_PUSH);
|
||||
|
||||
conn->key_provider = key_provider;
|
||||
|
||||
err:
|
||||
|
||||
@ -435,6 +435,10 @@ __wt_disagg_put_crypt_helper(WT_SESSION_IMPL *session)
|
||||
if (session->ckpt.crash_trigger_point == KEY_PROVIDER_CRASH_BEFORE_KEY_ROTATION)
|
||||
__wt_debug_crash(session);
|
||||
|
||||
/* The pull-model get_key API is disabled when the push-model is configured. */
|
||||
if (F_ISSET(conn, WT_CONN_KEY_PROVIDER_PUSH))
|
||||
return (ENOTSUP);
|
||||
|
||||
/* Check for a new encryption key data. If the size is 0, there is none so we can skip. */
|
||||
WT_ERR(key_provider->get_key(key_provider, (WT_SESSION *)session, &crypt));
|
||||
if (crypt.keys.size == 0)
|
||||
|
||||
@ -1155,13 +1155,11 @@ __clayered_iterate_constituents(WT_CURSOR_LAYERED *clayered, uint32_t iter_flag)
|
||||
* prepared conflict occurs. Prepared updates are always ignored on the stable cursor, making it
|
||||
* safe to check the WT_CURSTD_KEY_INT flag.
|
||||
*/
|
||||
if (((WT_CURSOR_BTREE *)c_ingest)->ref == NULL && !F_ISSET(c_stable, WT_CURSTD_KEY_INT)) {
|
||||
/*
|
||||
* Move the stable cursor first to ensure it is advanced, even if a prepared conflict occurs
|
||||
* on the ingest cursor.
|
||||
*/
|
||||
WT_ERR_NOTFOUND_OK(__clayered_constituent_iter_helper(clayered, c_stable, forward), false);
|
||||
bool fresh_start =
|
||||
(((WT_CURSOR_BTREE *)c_ingest)->ref == NULL && !F_ISSET(c_stable, WT_CURSTD_KEY_INT));
|
||||
if (fresh_start) {
|
||||
WT_ERR_NOTFOUND_OK(__clayered_constituent_iter_helper(clayered, c_ingest, forward), false);
|
||||
WT_ERR_NOTFOUND_OK(__clayered_constituent_iter_helper(clayered, c_stable, forward), false);
|
||||
goto done;
|
||||
}
|
||||
|
||||
@ -1226,7 +1224,13 @@ __clayered_iterate_constituents(WT_CURSOR_LAYERED *clayered, uint32_t iter_flag)
|
||||
|
||||
done:
|
||||
err:
|
||||
if (ret == 0 || ret == WT_PREPARE_CONFLICT) {
|
||||
if (ret == WT_PREPARE_CONFLICT && fresh_start)
|
||||
/*
|
||||
* Prepare conflict on the very first key of a fresh walk: ingest is blocked before stable
|
||||
* has advanced. Reset ingest so the next call restarts cleanly.
|
||||
*/
|
||||
WT_TRET(__clayered_reset_cursors(clayered, false));
|
||||
else if (ret == 0 || ret == WT_PREPARE_CONFLICT) {
|
||||
if (!F_ISSET(clayered, iter_flag)) {
|
||||
F_CLR(clayered, WT_CLAYERED_ITERATE_NEXT | WT_CLAYERED_ITERATE_PREV);
|
||||
F_SET(clayered, iter_flag);
|
||||
@ -2961,14 +2965,11 @@ __wt_clayered_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
|
||||
|
||||
WT_RET(__wt_config_gets_def(session, cfg, "checkpoint", 0, &cval));
|
||||
if (cval.len != 0)
|
||||
WT_RET_MSG(session, ENOTSUP, "Layered trees do not support opening by checkpoint");
|
||||
WT_RET_MSG(session, EINVAL, "Layered trees do not support opening by checkpoint");
|
||||
|
||||
WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval));
|
||||
if (cval.val != 0)
|
||||
WT_RET_MSG(session, ENOTSUP, "Layered trees do not support bulk loading");
|
||||
|
||||
if (FLD_ISSET(S2C(session)->debug.flags, WT_CONN_DEBUG_CURSOR_REPOSITION))
|
||||
WT_RET_MSG(session, ENOTSUP, "Layered trees do not support cursor reposition");
|
||||
WT_RET_MSG(session, EINVAL, "Layered trees do not support bulk loading");
|
||||
|
||||
/* Get the layered tree, and hold a reference to it until the cursor is closed. */
|
||||
WT_RET(__wt_session_get_dhandle(session, uri, NULL, cfg, 0));
|
||||
|
||||
@ -186,13 +186,7 @@ static WT_INLINE WT_UPDATE *
|
||||
__curversion_tombstone_next_upd(
|
||||
WT_SESSION_IMPL *session, WT_CURSOR_VERSION *version_cursor, WT_UPDATE *tombstone)
|
||||
{
|
||||
/*
|
||||
* show_prepared_rollback currently targets ingest-table style rollback updates (in-memory
|
||||
* trees), where rollback metadata lives on aborted prepared value updates and no globally
|
||||
* visible tombstone with PREPARE_ROLLBACK flag is prepended. If this feature is extended to
|
||||
* non-in-memory trees, we need additional handling for globally visible PREPARE_ROLLBACK
|
||||
* tombstones and their underlying aborted value updates.
|
||||
*/
|
||||
/* Stop at a globally visible tombstone nothing older is relevant. */
|
||||
if (__wt_txn_upd_visible_all(session, tombstone))
|
||||
return (NULL);
|
||||
|
||||
|
||||
27
src/third_party/wiredtiger/src/include/btmem.h
vendored
27
src/third_party/wiredtiger/src/include/btmem.h
vendored
@ -1549,20 +1549,19 @@ struct __wt_update {
|
||||
|
||||
/* When introducing a new flag, consider adding it to WT_UPDATE_SELECT_FOR_DS. */
|
||||
/* AUTOMATIC FLAG VALUE GENERATION START 0 */
|
||||
#define WT_UPDATE_DELETE_DURABLE 0x0001u /* Key has been removed from disk image. */
|
||||
#define WT_UPDATE_DS 0x0002u /* Update has been chosen to the data store. */
|
||||
#define WT_UPDATE_DURABLE 0x0004u /* Update has been durable. */
|
||||
#define WT_UPDATE_HS 0x0008u /* Update has been written to hs. */
|
||||
#define WT_UPDATE_HS_MAX_STOP 0x0010u /* Update has been written to hs with a max stop. */
|
||||
#define WT_UPDATE_PREPARE_DURABLE 0x0020u /* Prepared update has been durable. */
|
||||
#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x0040u /* Prepared update restored from data store. */
|
||||
#define WT_UPDATE_PREPARE_ROLLBACK 0x0080u /* Tombstone that rolled back by a prepared update.*/
|
||||
#define WT_UPDATE_RESTORED_FAST_TRUNCATE 0x0100u /* Fast truncate instantiation. */
|
||||
#define WT_UPDATE_RESTORED_FROM_DS 0x0200u /* Update restored from data store. */
|
||||
#define WT_UPDATE_RESTORED_FROM_HS 0x0400u /* Update restored from history store. */
|
||||
#define WT_UPDATE_RESTORED_FROM_INGEST 0x0800u /* Update restored from ingest btree. */
|
||||
#define WT_UPDATE_RTS_DRYRUN_ABORT 0x1000u /* Used by dry run to mark a would-be abort. */
|
||||
/* AUTOMATIC FLAG VALUE GENERATION STOP 16 */
|
||||
#define WT_UPDATE_DELETE_DURABLE 0x001u /* Key has been removed from disk image. */
|
||||
#define WT_UPDATE_DS 0x002u /* Update has been chosen to the data store. */
|
||||
#define WT_UPDATE_DURABLE 0x004u /* Update has been durable. */
|
||||
#define WT_UPDATE_HS 0x008u /* Update has been written to hs. */
|
||||
#define WT_UPDATE_HS_MAX_STOP 0x010u /* Update has been written to hs with a max stop. */
|
||||
#define WT_UPDATE_PREPARE_DURABLE 0x020u /* Prepared update has been durable. */
|
||||
#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x040u /* Prepared update restored from data store. */
|
||||
#define WT_UPDATE_RESTORED_FAST_TRUNCATE 0x080u /* Fast truncate instantiation. */
|
||||
#define WT_UPDATE_RESTORED_FROM_DS 0x100u /* Update restored from data store. */
|
||||
#define WT_UPDATE_RESTORED_FROM_HS 0x200u /* Update restored from history store. */
|
||||
#define WT_UPDATE_RESTORED_FROM_INGEST 0x400u /* Update restored from ingest btree. */
|
||||
#define WT_UPDATE_RTS_DRYRUN_ABORT 0x800u /* Used by dry run to mark a would-be abort. */
|
||||
/* AUTOMATIC FLAG VALUE GENERATION STOP 16 */
|
||||
uint16_t flags;
|
||||
|
||||
/* There are several cases we should select the update irrespective of visibility to write to the
|
||||
|
||||
532
src/third_party/wiredtiger/src/include/cell_inline.h
vendored
532
src/third_party/wiredtiger/src/include/cell_inline.h
vendored
@ -1083,6 +1083,280 @@ __wt_cell_leaf_value_parse(WT_PAGE *page, WT_CELL *cell)
|
||||
return (WT_ERROR); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* __cell_unpack_addr_cell --
|
||||
* Unpack the validity window and optional fast-truncate record for an addr cell.
|
||||
*/
|
||||
static WT_INLINE int
|
||||
__cell_unpack_addr_cell(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell,
|
||||
const uint8_t **pp, const void *end, WT_CELL_UNPACK_ADDR *unpack_addr)
|
||||
{
|
||||
WT_PAGE_DELETED *page_del;
|
||||
WT_TIME_AGGREGATE *ta;
|
||||
uint8_t flags;
|
||||
bool has_fast_truncate, prepare_fast_truncate;
|
||||
|
||||
/* Return an error if we're not unpacking a cell of this type. */
|
||||
if (unpack_addr == NULL)
|
||||
return (WT_ERROR);
|
||||
|
||||
ta = &unpack_addr->ta;
|
||||
has_fast_truncate = unpack_addr->raw == WT_CELL_ADDR_DEL && F_ISSET(dsk, WT_PAGE_FT_UPDATE);
|
||||
prepare_fast_truncate = false;
|
||||
|
||||
if ((cell->__chunk[0] & WT_CELL_SECOND_DESC) != 0) {
|
||||
flags = *(*pp)++; /* skip second descriptor byte */
|
||||
WT_CELL_LEN_CHK(*pp, 0, dsk, end);
|
||||
|
||||
if (LF_ISSET(WT_CELL_PREPARE)) {
|
||||
if (has_fast_truncate)
|
||||
prepare_fast_truncate = true;
|
||||
else
|
||||
ta->prepare = 1;
|
||||
}
|
||||
if (LF_ISSET(WT_CELL_TS_START))
|
||||
WT_RET(
|
||||
__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &ta->oldest_start_ts));
|
||||
if (LF_ISSET(WT_CELL_TXN_START))
|
||||
WT_RET(__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &ta->newest_txn));
|
||||
if (LF_ISSET(WT_CELL_TS_DURABLE_START)) {
|
||||
WT_RET(__wt_vunpack_uint(
|
||||
pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &ta->newest_start_durable_ts));
|
||||
ta->newest_start_durable_ts += ta->oldest_start_ts;
|
||||
}
|
||||
if (LF_ISSET(WT_CELL_TS_STOP)) {
|
||||
WT_RET(
|
||||
__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &ta->newest_stop_ts));
|
||||
ta->newest_stop_ts += ta->oldest_start_ts;
|
||||
}
|
||||
if (LF_ISSET(WT_CELL_TXN_STOP)) {
|
||||
WT_RET(
|
||||
__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &ta->newest_stop_txn));
|
||||
ta->newest_stop_txn += ta->newest_txn;
|
||||
}
|
||||
if (LF_ISSET(WT_CELL_TS_DURABLE_STOP)) {
|
||||
WT_RET(__wt_vunpack_uint(
|
||||
pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &ta->newest_stop_durable_ts));
|
||||
ta->newest_stop_durable_ts += ta->newest_stop_ts;
|
||||
}
|
||||
WT_RET(__wt_check_addr_validity(session, ta, end != NULL));
|
||||
}
|
||||
|
||||
if (!has_fast_truncate)
|
||||
return (0);
|
||||
|
||||
/* Unpack the fast-truncate page_del record. */
|
||||
page_del = &unpack_addr->page_del;
|
||||
WT_RET(
|
||||
__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), (uint64_t *)&page_del->txnid));
|
||||
if (prepare_fast_truncate) {
|
||||
page_del->prepare_state = WT_PREPARE_INPROGRESS;
|
||||
page_del->committed = false;
|
||||
WT_RET(
|
||||
__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &page_del->prepare_ts));
|
||||
page_del->pg_del_start_ts = page_del->prepare_ts;
|
||||
WT_RET(
|
||||
__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &page_del->prepared_id));
|
||||
/* Explicitly initialize the durable timestamp to WT_TS_NONE. */
|
||||
page_del->pg_del_durable_ts = WT_TS_NONE;
|
||||
WT_ASSERT_ALWAYS(session,
|
||||
!F_ISSET(S2C(session), WT_CONN_PRESERVE_PREPARED) ||
|
||||
page_del->prepared_id != WT_PREPARED_ID_NONE,
|
||||
"Read prepared record with no prepared id when preserve prepared is enabled.");
|
||||
} else {
|
||||
page_del->prepare_state = WT_PREPARE_INIT;
|
||||
page_del->committed = true;
|
||||
WT_RET(__wt_vunpack_uint(
|
||||
pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &page_del->pg_del_start_ts));
|
||||
WT_RET(__wt_vunpack_uint(
|
||||
pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &page_del->pg_del_durable_ts));
|
||||
}
|
||||
page_del->selected_for_write = true;
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* __cell_unpack_value_window --
|
||||
* Unpack the validity window for a value cell (called when WT_CELL_SECOND_DESC is set).
|
||||
*/
|
||||
static WT_INLINE int
|
||||
__cell_unpack_value_window(
|
||||
WT_SESSION_IMPL *session, const uint8_t **pp, const void *end, uint8_t flags, WT_TIME_WINDOW *tw)
|
||||
{
|
||||
wt_timestamp_t temp_start_ts, temp_durable_start_ts, temp_stop_ts, temp_durable_stop_ts;
|
||||
|
||||
temp_start_ts = temp_durable_start_ts = temp_durable_stop_ts = WT_TS_NONE;
|
||||
temp_stop_ts = WT_TS_MAX;
|
||||
|
||||
if (LF_ISSET(WT_CELL_TS_START))
|
||||
WT_RET(__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &temp_start_ts));
|
||||
if (LF_ISSET(WT_CELL_TXN_START))
|
||||
WT_RET(__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &tw->start_txn));
|
||||
if (LF_ISSET(WT_CELL_TS_DURABLE_START))
|
||||
WT_RET(
|
||||
__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &temp_durable_start_ts));
|
||||
|
||||
if (LF_ISSET(WT_CELL_TS_STOP))
|
||||
WT_RET(__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &temp_stop_ts));
|
||||
|
||||
if (LF_ISSET(WT_CELL_TXN_STOP)) {
|
||||
WT_RET(__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &tw->stop_txn));
|
||||
tw->stop_txn += tw->start_txn;
|
||||
}
|
||||
if (LF_ISSET(WT_CELL_TS_DURABLE_STOP))
|
||||
WT_RET(
|
||||
__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &temp_durable_stop_ts));
|
||||
|
||||
/* Load temporary values to the right fields. */
|
||||
if (LF_ISSET(WT_CELL_PREPARE)) {
|
||||
bool preserve_prepared = F_ISSET(S2C(session), WT_CONN_PRESERVE_PREPARED);
|
||||
/*
|
||||
* We can compare the txn_id only here, but cannot do it everywhere else because when
|
||||
* recovering, all transaction ids are reset to WT_TXN_NONE, so we cannot compare the
|
||||
* transaction ids.
|
||||
*/
|
||||
if (tw->start_txn == tw->stop_txn && temp_stop_ts == WT_TS_NONE) {
|
||||
/*
|
||||
* This is a special case where both transaction start and stop are in prepared state.
|
||||
* The prepared record is written with the preserve prepared config enabled. The same
|
||||
* prepared id is packed to WT_CELL_TS_DURABLE_START. Since temp_stop_ts here stores the
|
||||
* difference between start_prepared_id and stop_prepared_id, temp_stop_ts must be 0.
|
||||
*/
|
||||
if (temp_durable_start_ts != WT_TS_NONE) {
|
||||
WT_ASSERT(session, temp_durable_stop_ts == WT_TS_NONE);
|
||||
tw->start_prepare_ts = temp_start_ts;
|
||||
tw->start_prepared_id = temp_durable_start_ts;
|
||||
tw->stop_prepare_ts = temp_start_ts;
|
||||
tw->stop_prepared_id = temp_durable_start_ts;
|
||||
} else {
|
||||
WT_ASSERT_ALWAYS(session, !preserve_prepared,
|
||||
"Read prepared record with no prepared id when preserve prepared is "
|
||||
"enabled.");
|
||||
WT_ASSERT(session, temp_durable_start_ts == temp_durable_stop_ts);
|
||||
tw->start_prepare_ts = tw->stop_prepare_ts = temp_start_ts;
|
||||
}
|
||||
} else if (tw->stop_txn != WT_TXN_MAX) {
|
||||
/*
|
||||
* This case happens where the transaction start is committed, but the transaction stop
|
||||
* is prepared. In this case, we store the start timestamp and durable start timestamp
|
||||
* in WT_CELL_TS_START and WT_CELL_TS_DURABLE_START, prepare ts in WT_CELL_TS_STOP.
|
||||
*/
|
||||
tw->start_ts = temp_start_ts;
|
||||
/*
|
||||
* The prepared record is written with the preserve prepared config enabled. We store
|
||||
* the prepared id in WT_CELL_TS_DURABLE_STOP.
|
||||
*/
|
||||
if (temp_durable_start_ts != WT_TS_NONE)
|
||||
tw->durable_start_ts = temp_durable_start_ts + tw->start_ts;
|
||||
else
|
||||
tw->durable_start_ts = tw->start_ts;
|
||||
|
||||
WT_ASSERT(session, temp_stop_ts != WT_TS_MAX);
|
||||
tw->stop_prepare_ts = tw->start_ts + temp_stop_ts;
|
||||
|
||||
if (temp_durable_stop_ts != WT_TS_NONE)
|
||||
tw->stop_prepared_id = temp_durable_stop_ts;
|
||||
else
|
||||
WT_ASSERT_ALWAYS(session, !preserve_prepared,
|
||||
"Read prepared record with no prepared id when preserve prepared is "
|
||||
"enabled.");
|
||||
} else {
|
||||
WT_ASSERT(session, tw->start_ts == WT_TS_NONE);
|
||||
/*
|
||||
* This case happens when only transaction start is prepared, and there is no
|
||||
* transaction stop. In this case, we store the prepare ts in WT_CELL_TS_START.
|
||||
*/
|
||||
tw->start_prepare_ts = temp_start_ts;
|
||||
/*
|
||||
* The prepared record is written with the preserve prepared config enabled. We store
|
||||
* prepared id in WT_CELL_TS_DURABLE_START.
|
||||
*/
|
||||
if (temp_durable_start_ts != WT_TS_NONE)
|
||||
tw->start_prepared_id = temp_durable_start_ts;
|
||||
else
|
||||
WT_ASSERT_ALWAYS(session, !preserve_prepared,
|
||||
"Read prepared record with no prepared id when preserve prepared is "
|
||||
"enabled.");
|
||||
}
|
||||
} else {
|
||||
if (LF_ISSET(WT_CELL_TS_START))
|
||||
tw->start_ts = temp_start_ts;
|
||||
if (LF_ISSET(WT_CELL_TS_DURABLE_START))
|
||||
tw->durable_start_ts = temp_durable_start_ts + tw->start_ts;
|
||||
else
|
||||
tw->durable_start_ts = tw->start_ts;
|
||||
|
||||
if (LF_ISSET(WT_CELL_TS_STOP))
|
||||
tw->stop_ts = temp_stop_ts + tw->start_ts;
|
||||
if (LF_ISSET(WT_CELL_TS_DURABLE_STOP))
|
||||
tw->durable_stop_ts = temp_durable_stop_ts + tw->stop_ts;
|
||||
else if (tw->stop_ts != WT_TS_MAX)
|
||||
tw->durable_stop_ts = tw->stop_ts;
|
||||
}
|
||||
|
||||
__cell_assert_tw_has_ts_for_garbage_collection_table(session, tw);
|
||||
|
||||
WT_RET(__cell_check_value_validity(session, tw, end != NULL));
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* __cell_unpack_data_len --
|
||||
* Unpack the data length for a cell (all cases except WT_CELL_VALUE_COPY).
|
||||
*/
|
||||
static WT_INLINE int
|
||||
__cell_unpack_data_len(
|
||||
WT_CELL *cell, WT_CELL_UNPACK_COMMON *unpack, const uint8_t **pp, const void *end)
|
||||
{
|
||||
uint64_t v;
|
||||
|
||||
switch (unpack->raw) {
|
||||
case WT_CELL_KEY_OVFL:
|
||||
case WT_CELL_KEY_OVFL_RM:
|
||||
case WT_CELL_VALUE_OVFL:
|
||||
case WT_CELL_VALUE_OVFL_RM:
|
||||
/*
|
||||
* Set overflow flag.
|
||||
*/
|
||||
F_SET(unpack, WT_CELL_UNPACK_OVERFLOW);
|
||||
/* FALLTHROUGH */
|
||||
|
||||
case WT_CELL_ADDR_DEL:
|
||||
case WT_CELL_ADDR_DEL_VISIBLE_ALL:
|
||||
case WT_CELL_ADDR_INT:
|
||||
case WT_CELL_ADDR_LEAF:
|
||||
case WT_CELL_ADDR_LEAF_NO:
|
||||
case WT_CELL_KEY:
|
||||
case WT_CELL_KEY_PFX:
|
||||
case WT_CELL_VALUE:
|
||||
/*
|
||||
* The cell is followed by a 4B data length and a chunk of data.
|
||||
*/
|
||||
WT_RET(__wt_vunpack_uint(pp, end == NULL ? 0 : WT_PTRDIFF(end, *pp), &v));
|
||||
|
||||
/*
|
||||
* If the size was what prevented us from using a short cell, it's larger than the
|
||||
* adjustment size. Decrement/increment it when packing/unpacking so it takes up less room.
|
||||
*/
|
||||
if (unpack->raw == WT_CELL_KEY || unpack->raw == WT_CELL_KEY_PFX ||
|
||||
(unpack->raw == WT_CELL_VALUE && unpack->v == 0 &&
|
||||
(cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0))
|
||||
v += WT_CELL_SIZE_ADJUST;
|
||||
|
||||
unpack->data = *pp;
|
||||
unpack->size = (uint32_t)v;
|
||||
unpack->__len = WT_PTRDIFF32(*pp, cell) + unpack->size;
|
||||
break;
|
||||
|
||||
case WT_CELL_DEL:
|
||||
unpack->__len = WT_PTRDIFF32(*pp, cell);
|
||||
break;
|
||||
default:
|
||||
return (WT_ERROR); /* Unknown cell type. */
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_cell_unpack_safe --
|
||||
* Unpack a WT_CELL into a structure, with optional boundary checks.
|
||||
@ -1097,15 +1371,13 @@ __wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CE
|
||||
WT_TIME_WINDOW tw;
|
||||
} copy;
|
||||
WT_CELL_UNPACK_COMMON *unpack;
|
||||
WT_PAGE_DELETED *page_del;
|
||||
WT_TIME_AGGREGATE *ta;
|
||||
WT_TIME_WINDOW *tw;
|
||||
uint64_t v;
|
||||
const uint8_t *p;
|
||||
uint8_t flags;
|
||||
bool copy_cell, has_fast_truncate, prepare_fast_truncate;
|
||||
bool copy_cell;
|
||||
|
||||
copy_cell = has_fast_truncate = prepare_fast_truncate = false;
|
||||
copy_cell = false;
|
||||
copy.len = 0; /* [-Wconditional-uninitialized] */
|
||||
copy.v = 0; /* [-Wconditional-uninitialized] */
|
||||
|
||||
@ -1113,13 +1385,11 @@ __wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CE
|
||||
unpack = (WT_CELL_UNPACK_COMMON *)unpack_value;
|
||||
tw = &unpack_value->tw;
|
||||
WT_TIME_WINDOW_INIT(tw);
|
||||
ta = NULL;
|
||||
} else {
|
||||
WT_ASSERT(session, unpack_value == NULL);
|
||||
|
||||
unpack = (WT_CELL_UNPACK_COMMON *)unpack_addr;
|
||||
ta = &unpack_addr->ta;
|
||||
WT_TIME_AGGREGATE_INIT(ta);
|
||||
WT_TIME_AGGREGATE_INIT(&unpack_addr->ta);
|
||||
tw = NULL;
|
||||
}
|
||||
|
||||
@ -1189,60 +1459,7 @@ copy_cell_restart:
|
||||
case WT_CELL_ADDR_INT:
|
||||
case WT_CELL_ADDR_LEAF:
|
||||
case WT_CELL_ADDR_LEAF_NO:
|
||||
/* Return an error if we're not unpacking a cell of this type. */
|
||||
if (unpack_addr == NULL)
|
||||
return (WT_ERROR);
|
||||
|
||||
/*
|
||||
* A committed fast-truncate cell may be written without WT_CELL_SECOND_DESC when its time
|
||||
* aggregate is globally visible. Compute this flag before the SECOND_DESC early-exit so the
|
||||
* page_del block is always unpacked for fast-truncate addr-del cells.
|
||||
*/
|
||||
has_fast_truncate = unpack->raw == WT_CELL_ADDR_DEL && F_ISSET(dsk, WT_PAGE_FT_UPDATE);
|
||||
|
||||
if ((cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0)
|
||||
break;
|
||||
flags = *p++; /* skip second descriptor byte */
|
||||
WT_CELL_LEN_CHK(p, 0, dsk, end);
|
||||
|
||||
if (LF_ISSET(WT_CELL_PREPARE)) {
|
||||
/*
|
||||
* For a prepared fast-truncate, the prepare state is recorded in the time aggregate. We
|
||||
* cannot have a prepared fast-truncate and a prepared time aggregate at the same time.
|
||||
* Otherwise, it would be a write conflict.
|
||||
*/
|
||||
if (has_fast_truncate)
|
||||
prepare_fast_truncate = true;
|
||||
else
|
||||
ta->prepare = 1;
|
||||
}
|
||||
if (LF_ISSET(WT_CELL_TS_START))
|
||||
WT_RET(
|
||||
__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->oldest_start_ts));
|
||||
if (LF_ISSET(WT_CELL_TXN_START))
|
||||
WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_txn));
|
||||
if (LF_ISSET(WT_CELL_TS_DURABLE_START)) {
|
||||
WT_RET(__wt_vunpack_uint(
|
||||
&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_start_durable_ts));
|
||||
ta->newest_start_durable_ts += ta->oldest_start_ts;
|
||||
}
|
||||
|
||||
if (LF_ISSET(WT_CELL_TS_STOP)) {
|
||||
WT_RET(
|
||||
__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_stop_ts));
|
||||
ta->newest_stop_ts += ta->oldest_start_ts;
|
||||
}
|
||||
if (LF_ISSET(WT_CELL_TXN_STOP)) {
|
||||
WT_RET(
|
||||
__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_stop_txn));
|
||||
ta->newest_stop_txn += ta->newest_txn;
|
||||
}
|
||||
if (LF_ISSET(WT_CELL_TS_DURABLE_STOP)) {
|
||||
WT_RET(__wt_vunpack_uint(
|
||||
&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_stop_durable_ts));
|
||||
ta->newest_stop_durable_ts += ta->newest_stop_ts;
|
||||
}
|
||||
WT_RET(__wt_check_addr_validity(session, ta, end != NULL));
|
||||
WT_RET(__cell_unpack_addr_cell(session, dsk, cell, &p, end, unpack_addr));
|
||||
break;
|
||||
case WT_CELL_DEL:
|
||||
case WT_CELL_VALUE:
|
||||
@ -1257,158 +1474,10 @@ copy_cell_restart:
|
||||
break;
|
||||
flags = *p++; /* skip second descriptor byte */
|
||||
WT_CELL_LEN_CHK(p, 0, dsk, end);
|
||||
wt_timestamp_t temp_start_ts, temp_durable_start_ts, temp_stop_ts, temp_durable_stop_ts;
|
||||
temp_start_ts = temp_durable_start_ts = temp_durable_stop_ts = WT_TS_NONE;
|
||||
temp_stop_ts = WT_TS_MAX;
|
||||
|
||||
if (LF_ISSET(WT_CELL_TS_START))
|
||||
WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &temp_start_ts));
|
||||
if (LF_ISSET(WT_CELL_TXN_START))
|
||||
WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->start_txn));
|
||||
if (LF_ISSET(WT_CELL_TS_DURABLE_START))
|
||||
WT_RET(
|
||||
__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &temp_durable_start_ts));
|
||||
|
||||
if (LF_ISSET(WT_CELL_TS_STOP))
|
||||
WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &temp_stop_ts));
|
||||
|
||||
if (LF_ISSET(WT_CELL_TXN_STOP)) {
|
||||
WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->stop_txn));
|
||||
tw->stop_txn += tw->start_txn;
|
||||
}
|
||||
if (LF_ISSET(WT_CELL_TS_DURABLE_STOP))
|
||||
WT_RET(
|
||||
__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &temp_durable_stop_ts));
|
||||
|
||||
/* Load temporary values to the right fields. */
|
||||
if (LF_ISSET(WT_CELL_PREPARE)) {
|
||||
bool preserve_prepared = F_ISSET(S2C(session), WT_CONN_PRESERVE_PREPARED);
|
||||
/*
|
||||
* We can compare the txn_id only here, but cannot do it everywhere else because when
|
||||
* recovering, all transaction ids are reset to WT_TXN_NONE, so we cannot compare the
|
||||
* transaction ids.
|
||||
*/
|
||||
if (tw->start_txn == tw->stop_txn && temp_stop_ts == WT_TS_NONE) {
|
||||
/*
|
||||
* This is a special case where both transaction start and stop are in prepared
|
||||
* state. The prepared record is written with the preserve prepared config enabled.
|
||||
* The same prepared id is packed to WT_CELL_TS_DURABLE_START. Since temp_stop_ts
|
||||
* here stores the difference between start_prepared_id and stop_prepared_id,
|
||||
* temp_stop_ts must be 0.
|
||||
*/
|
||||
if (temp_durable_start_ts != WT_TS_NONE) {
|
||||
WT_ASSERT(session, temp_durable_stop_ts == WT_TS_NONE);
|
||||
tw->start_prepare_ts = temp_start_ts;
|
||||
tw->start_prepared_id = temp_durable_start_ts;
|
||||
tw->stop_prepare_ts = temp_start_ts;
|
||||
tw->stop_prepared_id = temp_durable_start_ts;
|
||||
} else {
|
||||
WT_ASSERT_ALWAYS(session, !preserve_prepared,
|
||||
"Read prepared record with no prepared id when preserve prepared is "
|
||||
"enabled.");
|
||||
WT_ASSERT(session, temp_durable_start_ts == temp_durable_stop_ts);
|
||||
tw->start_prepare_ts = tw->stop_prepare_ts = temp_start_ts;
|
||||
}
|
||||
} else if (tw->stop_txn != WT_TXN_MAX) {
|
||||
/*
|
||||
* This case happens where the transaction start is committed, but the transaction
|
||||
* stop is prepared. In this case, we store the start timestamp and durable start
|
||||
* timestamp in WT_CELL_TS_START and WT_CELL_TS_DURABLE_START, prepare ts in
|
||||
* WT_CELL_TS_STOP.
|
||||
*/
|
||||
tw->start_ts = temp_start_ts;
|
||||
/*
|
||||
* The prepared record is written with the preserve prepared config enabled. We
|
||||
* store the prepared id in WT_CELL_TS_DURABLE_STOP.
|
||||
*/
|
||||
if (temp_durable_start_ts != WT_TS_NONE)
|
||||
tw->durable_start_ts = temp_durable_start_ts + tw->start_ts;
|
||||
else
|
||||
tw->durable_start_ts = tw->start_ts;
|
||||
|
||||
WT_ASSERT(session, temp_stop_ts != WT_TS_MAX);
|
||||
tw->stop_prepare_ts = tw->start_ts + temp_stop_ts;
|
||||
|
||||
if (temp_durable_stop_ts != WT_TS_NONE)
|
||||
tw->stop_prepared_id = temp_durable_stop_ts;
|
||||
else
|
||||
WT_ASSERT_ALWAYS(session, !preserve_prepared,
|
||||
"Read prepared record with no prepared id when preserve prepared is "
|
||||
"enabled.");
|
||||
} else {
|
||||
WT_ASSERT(session, tw->start_ts == WT_TS_NONE);
|
||||
/*
|
||||
* This case happens when only transaction start is prepared, and there is no
|
||||
* transaction stop. In this case, we store the prepare ts in WT_CELL_TS_START.
|
||||
*/
|
||||
tw->start_prepare_ts = temp_start_ts;
|
||||
/*
|
||||
* The prepared record is written with the preserve prepared config enabled. We
|
||||
* store prepared id in WT_CELL_TS_DURABLE_START.
|
||||
*/
|
||||
if (temp_durable_start_ts != WT_TS_NONE)
|
||||
tw->start_prepared_id = temp_durable_start_ts;
|
||||
else
|
||||
WT_ASSERT_ALWAYS(session, !preserve_prepared,
|
||||
"Read prepared record with no prepared id when preserve prepared is "
|
||||
"enabled.");
|
||||
}
|
||||
} else {
|
||||
if (LF_ISSET(WT_CELL_TS_START))
|
||||
tw->start_ts = temp_start_ts;
|
||||
if (LF_ISSET(WT_CELL_TS_DURABLE_START))
|
||||
tw->durable_start_ts = temp_durable_start_ts + tw->start_ts;
|
||||
else
|
||||
tw->durable_start_ts = tw->start_ts;
|
||||
|
||||
if (LF_ISSET(WT_CELL_TS_STOP))
|
||||
tw->stop_ts = temp_stop_ts + tw->start_ts;
|
||||
if (LF_ISSET(WT_CELL_TS_DURABLE_STOP))
|
||||
tw->durable_stop_ts = temp_durable_stop_ts + tw->stop_ts;
|
||||
else if (tw->stop_ts != WT_TS_MAX)
|
||||
tw->durable_stop_ts = tw->stop_ts;
|
||||
}
|
||||
|
||||
__cell_assert_tw_has_ts_for_garbage_collection_table(session, tw);
|
||||
|
||||
WT_RET(__cell_check_value_validity(session, tw, end != NULL));
|
||||
WT_RET(__cell_unpack_value_window(session, &p, end, flags, tw));
|
||||
break;
|
||||
}
|
||||
|
||||
/* Unpack any fast-truncate information. */
|
||||
if (has_fast_truncate) {
|
||||
page_del = &unpack_addr->page_del;
|
||||
WT_RET(__wt_vunpack_uint(
|
||||
&p, end == NULL ? 0 : WT_PTRDIFF(end, p), (uint64_t *)&page_del->txnid));
|
||||
if (prepare_fast_truncate) {
|
||||
page_del->prepare_state = WT_PREPARE_INPROGRESS;
|
||||
page_del->committed = false;
|
||||
/*
|
||||
* For prepared fast-truncates, the prepared state is shared with the time aggregate but
|
||||
* the prepare timestamp and the prepared id are stored in the page_del block.
|
||||
*/
|
||||
WT_RET(
|
||||
__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &page_del->prepare_ts));
|
||||
page_del->pg_del_start_ts = page_del->prepare_ts;
|
||||
WT_RET(
|
||||
__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &page_del->prepared_id));
|
||||
/* Explicitly initialize the durable timestamp to WT_TS_NONE. */
|
||||
page_del->pg_del_durable_ts = WT_TS_NONE;
|
||||
WT_ASSERT_ALWAYS(session,
|
||||
!F_ISSET(S2C(session), WT_CONN_PRESERVE_PREPARED) ||
|
||||
page_del->prepared_id != WT_PREPARED_ID_NONE,
|
||||
"Read prepared record with no prepared id when preserve prepared is enabled.");
|
||||
} else {
|
||||
page_del->prepare_state = WT_PREPARE_INIT;
|
||||
page_del->committed = true;
|
||||
WT_RET(__wt_vunpack_uint(
|
||||
&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &page_del->pg_del_start_ts));
|
||||
WT_RET(__wt_vunpack_uint(
|
||||
&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &page_del->pg_del_durable_ts));
|
||||
}
|
||||
page_del->selected_for_write = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for an RLE count or record number that optionally follows the cell descriptor byte on
|
||||
* column-store variable-length pages.
|
||||
@ -1441,48 +1510,9 @@ copy_cell_restart:
|
||||
cell = (WT_CELL *)((uint8_t *)cell - v);
|
||||
goto copy_cell_restart;
|
||||
|
||||
case WT_CELL_KEY_OVFL:
|
||||
case WT_CELL_KEY_OVFL_RM:
|
||||
case WT_CELL_VALUE_OVFL:
|
||||
case WT_CELL_VALUE_OVFL_RM:
|
||||
/*
|
||||
* Set overflow flag.
|
||||
*/
|
||||
F_SET(unpack, WT_CELL_UNPACK_OVERFLOW);
|
||||
/* FALLTHROUGH */
|
||||
|
||||
case WT_CELL_ADDR_DEL:
|
||||
case WT_CELL_ADDR_DEL_VISIBLE_ALL:
|
||||
case WT_CELL_ADDR_INT:
|
||||
case WT_CELL_ADDR_LEAF:
|
||||
case WT_CELL_ADDR_LEAF_NO:
|
||||
case WT_CELL_KEY:
|
||||
case WT_CELL_KEY_PFX:
|
||||
case WT_CELL_VALUE:
|
||||
/*
|
||||
* The cell is followed by a 4B data length and a chunk of data.
|
||||
*/
|
||||
WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &v));
|
||||
|
||||
/*
|
||||
* If the size was what prevented us from using a short cell, it's larger than the
|
||||
* adjustment size. Decrement/increment it when packing/unpacking so it takes up less room.
|
||||
*/
|
||||
if (unpack->raw == WT_CELL_KEY || unpack->raw == WT_CELL_KEY_PFX ||
|
||||
(unpack->raw == WT_CELL_VALUE && unpack->v == 0 &&
|
||||
(cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0))
|
||||
v += WT_CELL_SIZE_ADJUST;
|
||||
|
||||
unpack->data = p;
|
||||
unpack->size = (uint32_t)v;
|
||||
unpack->__len = WT_PTRDIFF32(p, cell) + unpack->size;
|
||||
break;
|
||||
|
||||
case WT_CELL_DEL:
|
||||
unpack->__len = WT_PTRDIFF32(p, cell);
|
||||
break;
|
||||
default:
|
||||
return (WT_ERROR); /* Unknown cell type. */
|
||||
WT_RET(__cell_unpack_data_len(cell, unpack, &p, end));
|
||||
break;
|
||||
}
|
||||
|
||||
done:
|
||||
|
||||
@ -157,6 +157,7 @@ WT_CONF_API_DECLARE(WT_CONNECTION, open_session, 3, 9);
|
||||
WT_CONF_API_DECLARE(WT_CONNECTION, query_timestamp, 1, 1);
|
||||
WT_CONF_API_DECLARE(WT_CONNECTION, reconfigure, 21, 132);
|
||||
WT_CONF_API_DECLARE(WT_CONNECTION, rollback_to_stable, 1, 2);
|
||||
WT_CONF_API_DECLARE(WT_CONNECTION, set_key_provider, 1, 1);
|
||||
WT_CONF_API_DECLARE(WT_CONNECTION, set_timestamp, 1, 5);
|
||||
WT_CONF_API_DECLARE(WT_CURSOR, bound, 1, 3);
|
||||
WT_CONF_API_DECLARE(WT_CURSOR, reconfigure, 1, 3);
|
||||
|
||||
@ -1198,15 +1198,16 @@ struct __wt_connection_impl {
|
||||
#define WT_CONN_CKPT_CLEANUP_RECLAIM_SPACE 0x0008u
|
||||
#define WT_CONN_CKPT_SYNC 0x0010u
|
||||
#define WT_CONN_IN_MEMORY 0x0020u
|
||||
#define WT_CONN_LIVE_RESTORE_FS 0x0040u
|
||||
#define WT_CONN_PRECISE_CHECKPOINT 0x0080u
|
||||
#define WT_CONN_PRESERVE_PREPARED 0x0100u
|
||||
#define WT_CONN_READONLY 0x0200u
|
||||
#define WT_CONN_RECOVERING 0x0400u
|
||||
#define WT_CONN_RECOVERING_METADATA 0x0800u
|
||||
#define WT_CONN_RECOVERY_COMPLETE 0x1000u
|
||||
#define WT_CONN_SALVAGE 0x2000u
|
||||
#define WT_CONN_WAS_BACKUP 0x4000u
|
||||
#define WT_CONN_KEY_PROVIDER_PUSH 0x0040u
|
||||
#define WT_CONN_LIVE_RESTORE_FS 0x0080u
|
||||
#define WT_CONN_PRECISE_CHECKPOINT 0x0100u
|
||||
#define WT_CONN_PRESERVE_PREPARED 0x0200u
|
||||
#define WT_CONN_READONLY 0x0400u
|
||||
#define WT_CONN_RECOVERING 0x0800u
|
||||
#define WT_CONN_RECOVERING_METADATA 0x1000u
|
||||
#define WT_CONN_RECOVERY_COMPLETE 0x2000u
|
||||
#define WT_CONN_SALVAGE 0x4000u
|
||||
#define WT_CONN_WAS_BACKUP 0x8000u
|
||||
/* AUTOMATIC FLAG VALUE GENERATION STOP 32 */
|
||||
wt_shared uint32_t flags;
|
||||
|
||||
|
||||
@ -32,14 +32,15 @@ __wt_single_thread_check_start(WT_SESSION_IMPL *s)
|
||||
if (!WT_SESSION_IS_DEFAULT(s) && s->thread_check.owning_thread != current_tid) {
|
||||
ret = __wt_spin_trylock(s, &s->thread_check.lock);
|
||||
|
||||
const char *session_name = __wt_atomic_load_ptr_relaxed(&s->name);
|
||||
WT_ASSERT_ALWAYS(s, ret == 0,
|
||||
"Session %" PRIu32
|
||||
" is accessed concurrently by multiple threads: "
|
||||
"current thread %" PRIuMAX ", owning thread %" PRIuMAX
|
||||
" (active op: %s, last op: %s, api depth: %u, dhandle: %s)",
|
||||
s->id, current_tid, s->thread_check.owning_thread, s->name != NULL ? s->name : "none",
|
||||
s->lastop != NULL ? s->lastop : "none", s->api_call_counter,
|
||||
s->dhandle != NULL ? s->dhandle->name : "none");
|
||||
s->id, current_tid, s->thread_check.owning_thread,
|
||||
session_name != NULL ? session_name : "none", s->lastop != NULL ? s->lastop : "none",
|
||||
s->api_call_counter, s->dhandle != NULL ? s->dhandle->name : "none");
|
||||
|
||||
s->thread_check.owning_thread = current_tid;
|
||||
}
|
||||
|
||||
@ -1514,7 +1514,7 @@ __wt_txn_read_upd_list_internal(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
|
||||
*restored_updp = NULL;
|
||||
__wt_upd_value_clear(cbt->upd_value);
|
||||
|
||||
for (; upd != NULL; upd = upd->next) {
|
||||
for (; upd != NULL; upd = __wt_atomic_load_ptr_relaxed(&upd->next)) {
|
||||
/* Skip reserved place-holders, they're never visible. */
|
||||
if (upd->type == WT_UPDATE_RESERVE)
|
||||
continue;
|
||||
|
||||
@ -2258,12 +2258,11 @@ struct __wt_connection {
|
||||
* isolation the cursor temporarily releases any page that requires force eviction\, then
|
||||
* repositions back to the page for further operations. A page release encourages eviction of
|
||||
* hot or large pages\, which is more likely to succeed without a cursor keeping the page
|
||||
* pinned. Note: This setting is not compatible with disaggregated storage., a boolean flag;
|
||||
* default \c false.}
|
||||
* @config{ eviction, if true\, modify internal
|
||||
* algorithms to change skew to force history store eviction to happen more aggressively. This
|
||||
* includes but is not limited to not skewing newest\, not favoring leaf pages\, and modifying
|
||||
* the eviction score mechanism., a boolean flag; default \c false.}
|
||||
* pinned., a boolean flag; default \c false.}
|
||||
* @config{ eviction, if
|
||||
* true\, modify internal algorithms to change skew to force history store eviction to happen
|
||||
* more aggressively. This includes but is not limited to not skewing newest\, not favoring
|
||||
* leaf pages\, and modifying the eviction score mechanism., a boolean flag; default \c false.}
|
||||
* @config{ eviction_checkpoint_ts_ordering, if true\, act as if eviction
|
||||
* is being run in parallel to checkpoint. We should return EBUSY in eviction if we detect any
|
||||
* timestamp ordering issue., a boolean flag; default \c false.}
|
||||
@ -3013,7 +3012,11 @@ struct __wt_connection {
|
||||
*
|
||||
* @param connection the connection handle
|
||||
* @param km the key provider structure
|
||||
* @configempty{WT_CONNECTION.set_key_provider, see dist/api_data.py}
|
||||
* @configstart{WT_CONNECTION.set_key_provider, see dist/api_data.py}
|
||||
* @config{version, the key provider API version. Version 0 uses the pull model (WiredTiger
|
||||
* calls WT_KEY_PROVIDER::get_key). Version 1 uses the push model., an integer between \c 0 and
|
||||
* \c 1; default \c 0.}
|
||||
* @configend
|
||||
* @errors
|
||||
*/
|
||||
int __F(set_key_provider)(
|
||||
@ -3191,27 +3194,26 @@ struct __wt_connection {
|
||||
* cursor_reposition, if true\, for operations with snapshot isolation the cursor temporarily
|
||||
* releases any page that requires force eviction\, then repositions back to the page for further
|
||||
* operations. A page release encourages eviction of hot or large pages\, which is more likely to
|
||||
* succeed without a cursor keeping the page pinned. Note: This setting is not compatible with
|
||||
* disaggregated storage., a boolean flag; default \c false.}
|
||||
* succeed without a cursor keeping the page pinned., a boolean flag; default \c false.}
|
||||
* @config{ eviction, if true\, modify internal algorithms to change skew to
|
||||
* force history store eviction to happen more aggressively. This includes but is not limited to
|
||||
* not skewing newest\, not favoring leaf pages\, and modifying the eviction score mechanism., a
|
||||
* boolean flag; default \c false.}
|
||||
* @config{ eviction_checkpoint_ts_ordering,
|
||||
* if true\, act as if eviction is being run in parallel to checkpoint. We should return EBUSY in
|
||||
* eviction if we detect any timestamp ordering issue., a boolean flag; default \c false.}
|
||||
* @config{ log_retention, adjust log removal to retain at least this number
|
||||
* of log files. (Warning: this option can remove log files required for recovery if no checkpoints
|
||||
* have yet been done and the number of log files exceeds the configured value. As WiredTiger
|
||||
* cannot detect the difference between a system that has not yet checkpointed and one that will
|
||||
* never checkpoint\, it might discard log files before any checkpoint is done.) Ignored if set to
|
||||
* 0., an integer between \c 0 and \c 1024; default \c 0.}
|
||||
* @config{
|
||||
* eviction, if true\, modify internal algorithms to change skew to force history store eviction to
|
||||
* happen more aggressively. This includes but is not limited to not skewing newest\, not favoring
|
||||
* leaf pages\, and modifying the eviction score mechanism., a boolean flag; default \c false.}
|
||||
* @config{ eviction_checkpoint_ts_ordering, if true\, act as if eviction is
|
||||
* being run in parallel to checkpoint. We should return EBUSY in eviction if we detect any
|
||||
* timestamp ordering issue., a boolean flag; default \c false.}
|
||||
* @config{
|
||||
* log_retention, adjust log removal to retain at least this number of log files. (Warning: this
|
||||
* option can remove log files required for recovery if no checkpoints have yet been done and the
|
||||
* number of log files exceeds the configured value. As WiredTiger cannot detect the difference
|
||||
* between a system that has not yet checkpointed and one that will never checkpoint\, it might
|
||||
* discard log files before any checkpoint is done.) Ignored if set to 0., an integer between \c 0
|
||||
* and \c 1024; default \c 0.}
|
||||
* @config{ realloc_exact, if true\, reallocation
|
||||
* of memory will only provide the exact amount requested. This will help with spotting memory
|
||||
* allocation issues more easily., a boolean flag; default \c false.}
|
||||
* @config{ realloc_malloc, if true\, every realloc call will force a new
|
||||
* memory allocation by using malloc., a boolean flag; default \c false.}
|
||||
* realloc_exact, if true\, reallocation of memory will only provide the exact amount requested.
|
||||
* This will help with spotting memory allocation issues more easily., a boolean flag; default \c
|
||||
* false.}
|
||||
* @config{ realloc_malloc, if true\, every realloc call will force a
|
||||
* new memory allocation by using malloc., a boolean flag; default \c false.}
|
||||
* @config{ rollback_error, return a WT_ROLLBACK error from a transaction
|
||||
* operation about every Nth operation to simulate a collision., an integer between \c 0 and \c 10M;
|
||||
* default \c 0.}
|
||||
|
||||
@ -349,18 +349,6 @@ __rec_save_delete_hs_upd_and_free_obs_updates(WT_SESSION_IMPL *session, WTI_RECO
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare transaction rollback adds a globally visible tombstone to the update chain to
|
||||
* remove the entire key. Treating these globally visible tombstones as obsolete and
|
||||
* trimming update list can cause problems if the update chain is getting accessed somewhere
|
||||
* else. To avoid this problem, skip these globally visible tombstones from the update
|
||||
* obsolete check.
|
||||
*/
|
||||
if (F_ISSET(delete_upd, WT_UPDATE_PREPARE_ROLLBACK)) {
|
||||
visible_all_upd = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Track the first self-contained value that is globally visible. */
|
||||
if (F_ISSET(r, WT_REC_CHECKPOINT) && visible_all_upd == NULL && delete_upd->next != NULL &&
|
||||
WT_UPDATE_DATA_VALUE(delete_upd) && __wt_txn_upd_visible_all(session, delete_upd))
|
||||
@ -737,14 +725,13 @@ __rec_upd_select(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_CELL_UNPACK_KV *
|
||||
bool *has_newer_updatesp, bool *write_prepare, size_t *upd_memsizep)
|
||||
{
|
||||
WT_CONNECTION_IMPL *conn;
|
||||
WT_UPDATE *upd, *prepare_rollback_tombstone;
|
||||
WT_UPDATE *upd;
|
||||
wt_timestamp_t max_ts;
|
||||
uint64_t max_txn, session_txnid, txnid;
|
||||
uint8_t prepare_state;
|
||||
bool is_hs_page;
|
||||
|
||||
conn = S2C(session);
|
||||
prepare_rollback_tombstone = NULL;
|
||||
max_ts = WT_TS_NONE;
|
||||
max_txn = WT_TXN_NONE;
|
||||
is_hs_page = F_ISSET(session->dhandle, WT_DHANDLE_HS);
|
||||
@ -773,15 +760,8 @@ __rec_upd_select(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_CELL_UNPACK_KV *
|
||||
upd->prepare_state == WT_PREPARE_INPROGRESS);
|
||||
/* Ignore the prepared update if the rollback timestamp is stable. */
|
||||
if (upd->upd_rollback_ts != WT_TS_NONE &&
|
||||
upd->upd_rollback_ts <= r->rec_start_pinned_stable_ts) {
|
||||
/*
|
||||
* If we have seen a tombstone that rolled back the prepared update, delete the key
|
||||
* from the disk.
|
||||
*/
|
||||
if (prepare_rollback_tombstone != NULL)
|
||||
break;
|
||||
upd->upd_rollback_ts <= r->rec_start_pinned_stable_ts)
|
||||
continue;
|
||||
}
|
||||
|
||||
txnid = upd->upd_saved_txnid;
|
||||
}
|
||||
@ -807,7 +787,6 @@ __rec_upd_select(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_CELL_UNPACK_KV *
|
||||
session_txnid != WT_TXN_NONE && txnid == session_txnid) {
|
||||
*upd_memsizep += WT_UPDATE_MEMSIZE(upd);
|
||||
*has_newer_updatesp = true;
|
||||
WT_ASSERT(session, prepare_rollback_tombstone == NULL);
|
||||
WT_ASSERT(session, !upd_select->skip_aborted_prepared_value);
|
||||
continue;
|
||||
}
|
||||
@ -841,16 +820,6 @@ __rec_upd_select(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_CELL_UNPACK_KV *
|
||||
|
||||
*upd_memsizep += WT_UPDATE_MEMSIZE(upd);
|
||||
*has_newer_updatesp = true;
|
||||
/*
|
||||
* If we have already seen a globally visible tombstone from prepared rollback, the
|
||||
* update we are now skipping is the aborted prepared update that the tombstone rolled
|
||||
* back, and its rollback is not yet stable (otherwise we would have broken out of the
|
||||
* loop above). The rollback decision is not durable, so the rollback tombstone is not
|
||||
* safe to write to disk. Drop it from consideration so the fallback after the loop does
|
||||
* not select it for write; we will revisit this key in a later reconcile once the
|
||||
* rollback becomes stable.
|
||||
*/
|
||||
prepare_rollback_tombstone = NULL;
|
||||
/*
|
||||
* Same reason as the aborted-prepared skip earlier: this rolled-back prepared value has
|
||||
* no in-chain fallback, so the on-disk cell must not be dropped on this reconciliation.
|
||||
@ -884,8 +853,6 @@ __rec_upd_select(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_CELL_UNPACK_KV *
|
||||
WT_ASSERT(session, !is_hs_page);
|
||||
*upd_memsizep += WT_UPDATE_MEMSIZE(upd);
|
||||
*has_newer_updatesp = true;
|
||||
/* We should write nothing to disk. */
|
||||
prepare_rollback_tombstone = NULL;
|
||||
|
||||
/*
|
||||
* Same reason as the aborted-prepared skip earlier: this rolled-back prepared
|
||||
@ -925,8 +892,7 @@ __rec_upd_select(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_CELL_UNPACK_KV *
|
||||
* commit/rollback. But it is enough to help us catch some issues.
|
||||
*/
|
||||
WT_ASSERT_ALWAYS(session,
|
||||
!F_ISSET(r, WT_REC_EVICT) || prepare_rollback_tombstone != NULL ||
|
||||
upd->next != NULL ||
|
||||
!F_ISSET(r, WT_REC_EVICT) || upd->next != NULL ||
|
||||
(WT_REC_HAS_ON_DISK(vpack) && !WT_TIME_WINDOW_HAS_PREPARE(&vpack->tw)),
|
||||
"leaked prepared update.");
|
||||
} else
|
||||
@ -999,46 +965,12 @@ __rec_upd_select(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_CELL_UNPACK_KV *
|
||||
}
|
||||
}
|
||||
|
||||
if (F_ISSET(conn, WT_CONN_PRESERVE_PREPARED) && F_ISSET(upd, WT_UPDATE_PREPARE_ROLLBACK) &&
|
||||
!F_ISSET(upd, WT_UPDATE_SELECT_FOR_DS))
|
||||
prepare_rollback_tombstone = upd;
|
||||
/*
|
||||
* Always select the newest visible update if precise checkpoint is not enabled. Otherwise,
|
||||
* select the first update that is smaller or equal to the pinned timestamp.
|
||||
*/
|
||||
else if (upd_select->upd == NULL) {
|
||||
if (upd_select->upd == NULL)
|
||||
upd_select->upd = upd;
|
||||
if (prepare_rollback_tombstone != NULL) {
|
||||
/*
|
||||
* Not checking upd->txnid == WT_TXN_ABORTED here because when doing prepared
|
||||
* rollback, we first insert the rollback tombstone then mark the prepare aborted,
|
||||
* so this assert can fire if we race with prepared rollback.
|
||||
*/
|
||||
WT_ASSERT(session,
|
||||
*write_prepare &&
|
||||
(prepare_state == WT_PREPARE_INPROGRESS || prepare_state == WT_PREPARE_LOCKED));
|
||||
#ifdef HAVE_DIAGNOSTIC
|
||||
/*
|
||||
* Walk from the rollback tombstone to the current prepared update; the only updates
|
||||
* permitted in between are reserve updates. Any other update would mean an unknown
|
||||
* entry slipped in front of the prepared update we are about to select.
|
||||
*/
|
||||
WT_UPDATE *scan;
|
||||
for (scan = prepare_rollback_tombstone->next; scan != NULL && scan != upd;
|
||||
scan = scan->next)
|
||||
WT_ASSERT(
|
||||
session, scan->type == WT_UPDATE_RESERVE && scan->txnid == WT_TXN_ABORTED);
|
||||
WT_ASSERT(session, scan == upd);
|
||||
#endif
|
||||
/* We skipped the prepare rollback tombstone. */
|
||||
WT_ASSERT(session, *has_newer_updatesp);
|
||||
/*
|
||||
* If we have seen a tombstone that rolled back the prepared update, this must be
|
||||
* the prepared update. No need to walk further.
|
||||
*/
|
||||
prepare_rollback_tombstone = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Track the selected update transaction id and timestamp. */
|
||||
if (max_txn < txnid)
|
||||
@ -1052,13 +984,6 @@ __rec_upd_select(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_CELL_UNPACK_KV *
|
||||
max_ts = upd->upd_start_ts;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we see a globally visible tombstone that deletes a key because of prepared rollback,
|
||||
* keep walking to see if we should write the prepared update instead.
|
||||
*/
|
||||
if (prepare_rollback_tombstone != NULL)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We only need to walk the whole update chain if we are evicting metadata as it is written
|
||||
* with read uncommitted isolation and we may see a committed update followed by uncommitted
|
||||
@ -1068,10 +993,6 @@ __rec_upd_select(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_CELL_UNPACK_KV *
|
||||
break;
|
||||
}
|
||||
|
||||
/* The prepare rollback is stable. Delete the key by selecting the rollback tombstone. */
|
||||
if (upd_select->upd == NULL && prepare_rollback_tombstone != NULL)
|
||||
upd_select->upd = prepare_rollback_tombstone;
|
||||
|
||||
/*
|
||||
* Track the most recent transaction in the page. We store this in the tree at the end of
|
||||
* reconciliation in the service of checkpoints, it is used to avoid discarding trees from
|
||||
|
||||
@ -2631,7 +2631,7 @@ __open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const
|
||||
session_ret->iface = F_ISSET(conn, WT_CONN_READONLY) ? stds_readonly : stds;
|
||||
session_ret->iface.connection = &conn->iface;
|
||||
|
||||
session_ret->name = NULL;
|
||||
__wt_atomic_store_ptr_relaxed(&session_ret->name, NULL);
|
||||
session_ret->id = i;
|
||||
|
||||
#ifdef HAVE_UNITTEST_ASSERTS
|
||||
|
||||
@ -78,13 +78,15 @@ __wt_session_dump(WT_SESSION_IMPL *session, WT_SESSION_IMPL *dump_session, bool
|
||||
WT_CURSOR *cursor;
|
||||
WT_DECL_ITEM(buf);
|
||||
WT_DECL_RET;
|
||||
const char *session_name;
|
||||
|
||||
WT_ERR(__wt_scr_alloc(session, 0, &buf));
|
||||
|
||||
WT_ERR(__wt_msg(
|
||||
session, "Session: ID: %" PRIu32 " @: 0x%p", dump_session->id, (void *)dump_session));
|
||||
WT_ERR(
|
||||
__wt_msg(session, " Name: %s", dump_session->name == NULL ? "EMPTY" : dump_session->name));
|
||||
|
||||
session_name = __wt_atomic_load_ptr_relaxed(&dump_session->name);
|
||||
WT_ERR(__wt_msg(session, " Name: %s", session_name == NULL ? "EMPTY" : session_name));
|
||||
WT_ERR(__wt_msg(session, " Last operation: %s",
|
||||
dump_session->lastop == NULL ? "NONE" : dump_session->lastop));
|
||||
WT_ERR(__wt_msg(session, " Current dhandle: %s",
|
||||
|
||||
@ -415,6 +415,8 @@ __wt_hazard_check_assert(WT_SESSION_IMPL *session, void *ref, bool waitfor)
|
||||
break;
|
||||
__wt_sleep(0, 10 * WT_THOUSAND);
|
||||
}
|
||||
|
||||
const char *session_name = __wt_atomic_load_ptr_relaxed(&s->name);
|
||||
#ifdef HAVE_DIAGNOSTIC
|
||||
/*
|
||||
* In diagnostic mode we also track the file and line where the hazard pointer is set. If this
|
||||
@ -422,10 +424,11 @@ __wt_hazard_check_assert(WT_SESSION_IMPL *session, void *ref, bool waitfor)
|
||||
*/
|
||||
__wt_errx(session,
|
||||
"hazard pointer reference to discarded object: (%p: session %p name %s: %s, line %d)",
|
||||
(void *)hp->ref, (void *)s, s->name == NULL ? "UNKNOWN" : s->name, hp->func, hp->line);
|
||||
(void *)hp->ref, (void *)s, session_name == NULL ? "UNKNOWN" : session_name, hp->func,
|
||||
hp->line);
|
||||
#else
|
||||
__wt_errx(session, "hazard pointer reference to discarded object: (%p: session %p name %s)",
|
||||
(void *)hp->ref, (void *)s, s->name == NULL ? "UNKNOWN" : s->name);
|
||||
(void *)hp->ref, (void *)s, session_name == NULL ? "UNKNOWN" : session_name);
|
||||
#endif
|
||||
return (false);
|
||||
}
|
||||
|
||||
87
src/third_party/wiredtiger/src/txn/txn.c
vendored
87
src/third_party/wiredtiger/src/txn/txn.c
vendored
@ -958,7 +958,7 @@ __txn_prepare_rollback_restore_hs_update(
|
||||
}
|
||||
|
||||
/* Append the update to the end of the chain. */
|
||||
WT_RELEASE_WRITE_WITH_BARRIER(upd_chain->next, upd);
|
||||
__wt_atomic_store_ptr_relaxed(&upd_chain->next, upd);
|
||||
|
||||
__wt_cache_page_inmem_incr(session, page, total_size, false);
|
||||
|
||||
@ -1034,30 +1034,35 @@ __txn_search_prepared_op(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_ITEM *key
|
||||
|
||||
/*
|
||||
* __txn_prepare_rollback_delete_key --
|
||||
* Prepend a global visible tombstone to the head of the update chain to delete the key for
|
||||
* prepare rollback.
|
||||
* Append a globally visible tombstone to the tail of the update chain to delete the key for
|
||||
* prepare rollback. Placing the tombstone below the prepared update encodes its role by
|
||||
* position so reconciliation and pruning see a normal globally visible tombstone without
|
||||
* needing a distinguishing flag.
|
||||
*/
|
||||
static int
|
||||
__txn_prepare_rollback_delete_key(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_CURSOR_BTREE *cbt)
|
||||
__txn_prepare_rollback_delete_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd_chain)
|
||||
{
|
||||
WT_DECL_RET;
|
||||
WT_UPDATE *tombstone;
|
||||
size_t not_used;
|
||||
size_t size;
|
||||
|
||||
tombstone = NULL;
|
||||
WT_ASSERT(session, upd_chain != NULL);
|
||||
|
||||
WT_ERR(__wt_upd_alloc_tombstone(session, &tombstone, ¬_used));
|
||||
F_SET(tombstone, WT_UPDATE_PREPARE_ROLLBACK);
|
||||
WT_WITH_BTREE(session, btree,
|
||||
ret = btree->type == BTREE_ROW ?
|
||||
__wt_row_modify(cbt, &cbt->iface.key, NULL, &tombstone, WT_UPDATE_INVALID, false, false) :
|
||||
__wt_col_modify(cbt, cbt->recno, NULL, &tombstone, WT_UPDATE_INVALID, false, false));
|
||||
WT_ERR(ret);
|
||||
tombstone = NULL;
|
||||
size = 0;
|
||||
WT_RET(__wt_upd_alloc_tombstone(session, &tombstone, &size));
|
||||
|
||||
err:
|
||||
__wt_free(session, tombstone);
|
||||
return (ret);
|
||||
/*
|
||||
* Walk to the end of the chain. The caller guarantees that the chain at this point consists
|
||||
* only of updates from the resolving prepared transaction (and aborted reserve entries), so the
|
||||
* tail's next pointer is stable.
|
||||
*/
|
||||
while (upd_chain->next != NULL)
|
||||
upd_chain = upd_chain->next;
|
||||
|
||||
__wt_atomic_store_ptr_relaxed(&upd_chain->next, tombstone);
|
||||
|
||||
__wt_cache_page_inmem_incr(session, page, size, false);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1218,28 +1223,24 @@ __wt_txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_BTREE *btree,
|
||||
* If the prepared update is a single tombstone, we don't need to do anything special and we can
|
||||
* directly resolve it in memory.
|
||||
*
|
||||
* If the prepared update is not a tombstone or we have multiple prepared updates in the same
|
||||
* transaction. There are four base cases:
|
||||
* Otherwise there are three resolve cases:
|
||||
*
|
||||
* 1) Prepared updates are on the update chain.
|
||||
* commit: simply resolve the updates on chain.
|
||||
* rollback: simply resolve the updates on chain.
|
||||
* 1) Prepared updates are on the update chain (RESOLVE_UPDATE_CHAIN).
|
||||
* commit: resolve the updates on chain.
|
||||
* rollback: if the prepared update is the only update and there is no on-disk value,
|
||||
* append a globally visible tombstone to delete the key.
|
||||
*
|
||||
* 2) Prepared updates are written to the data store.
|
||||
* If there is no older updates written to the history store:
|
||||
* commit: simply resolve the prepared updates in memory.
|
||||
* rollback: delete the whole key.
|
||||
*
|
||||
* If there are older updates written to the history store:
|
||||
* 2) Prepared updates are written to the data store (RESOLVE_PREPARE_ON_DISK).
|
||||
* If there are older updates in the history store:
|
||||
* commit: restore the newest history store update with a max stop time point to the
|
||||
* update chain. Reconciliation should know when to delete it from the history
|
||||
* store.
|
||||
* rollback:restore the newest update in the history store to the update chain.
|
||||
* Reconciliation should know when to delete it from the history store.
|
||||
* update chain.
|
||||
* rollback: restore the newest history store update to the update chain.
|
||||
* If there are no older updates in the history store:
|
||||
* commit: resolve the prepared updates in memory.
|
||||
* rollback: append a globally visible tombstone to delete the key.
|
||||
*
|
||||
* 4) We are running an in-memory database:
|
||||
* commit: resolve the prepared updates in memory.
|
||||
* rollback: if the prepared update is written to the disk image, delete the whole key.
|
||||
* 3) We are running an in-memory database (RESOLVE_IN_MEMORY).
|
||||
* commit/rollback: resolve the prepared updates in memory only.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -1264,7 +1265,7 @@ __wt_txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_BTREE *btree,
|
||||
if (!commit && first_committed_upd == NULL) {
|
||||
tw_found = __wt_read_cell_time_window(cbt, &tw);
|
||||
if (!tw_found)
|
||||
WT_ERR(__txn_prepare_rollback_delete_key(session, btree, cbt));
|
||||
WT_ERR(__txn_prepare_rollback_delete_key(session, page, head_upd));
|
||||
else
|
||||
WT_ASSERT_ALWAYS(
|
||||
session, !WT_TIME_WINDOW_HAS_PREPARE(&tw), "no committed update to fallback to.");
|
||||
@ -1303,12 +1304,12 @@ __wt_txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_BTREE *btree,
|
||||
else {
|
||||
ret = 0;
|
||||
/*
|
||||
* Allocate a tombstone and prepend it to the row so when we reconcile the update chain
|
||||
* we don't copy the prepared cell, which is now associated with a rolled back prepare,
|
||||
* and instead write nothing.
|
||||
* Append a globally visible tombstone to the end of the chain. When reconciliation
|
||||
* later drops the rolled-back prepared cell, the tombstone remains as the correct
|
||||
* post-rollback state for the key.
|
||||
*/
|
||||
if (!commit)
|
||||
WT_ERR(__txn_prepare_rollback_delete_key(session, btree, cbt));
|
||||
WT_ERR(__txn_prepare_rollback_delete_key(session, page, head_upd));
|
||||
}
|
||||
break;
|
||||
case RESOLVE_IN_MEMORY:
|
||||
@ -2764,13 +2765,15 @@ __wt_verbose_dump_txn_one(
|
||||
|
||||
buf_len = 512;
|
||||
WT_RET(__wt_scr_alloc(session, buf_len, &buf));
|
||||
|
||||
const char *session_name = __wt_atomic_load_ptr_relaxed(&txn_session->name);
|
||||
WT_ERR(__wt_snprintf((char *)buf->data, buf_len,
|
||||
"session ID: %" PRIu32 ", txn ID: %" PRIu64 ", pinned ID: %" PRIu64
|
||||
", metadata pinned ID: %" PRIu64 ", name: %s",
|
||||
txn_session->id, __wt_atomic_load_uint64_v_relaxed(&txn_shared->id),
|
||||
__wt_atomic_load_uint64_v_relaxed(&txn_shared->pinned_id),
|
||||
__wt_atomic_load_uint64_v_relaxed(&txn_shared->metadata_pinned),
|
||||
txn_session->name == NULL ? "EMPTY" : txn_session->name));
|
||||
session_name == NULL ? "EMPTY" : session_name));
|
||||
|
||||
if (error_code != 0)
|
||||
WT_ERR_MSG(session, error_code, "%s, %s", (char *)buf->data,
|
||||
|
||||
@ -326,6 +326,26 @@ TEST_CASE_METHOD(kp_fixture, "Persist key, failure", "[key_provider]")
|
||||
free(const_cast<void *>(crypt.keys.data));
|
||||
}
|
||||
|
||||
TEST_CASE_METHOD(kp_fixture, "set_key_provider version selects push mode", "[key_provider]")
|
||||
{
|
||||
WT_CONNECTION *wt_conn = conn.get_wt_connection();
|
||||
WT_CONNECTION_IMPL *conn_impl = conn.get_wt_connection_impl();
|
||||
WT_KEY_PROVIDER stub = {};
|
||||
|
||||
/* version=0 (default): push flag stays clear. */
|
||||
REQUIRE(wt_conn->set_key_provider(wt_conn, &stub, "version=0") == 0);
|
||||
REQUIRE(!F_ISSET(conn_impl, WT_CONN_KEY_PROVIDER_PUSH));
|
||||
conn_impl->key_provider = nullptr; /* Allow reconfiguration. */
|
||||
|
||||
/* version=1: push flag is set. */
|
||||
REQUIRE(wt_conn->set_key_provider(wt_conn, &stub, "version=1") == 0);
|
||||
REQUIRE(F_ISSET(conn_impl, WT_CONN_KEY_PROVIDER_PUSH));
|
||||
|
||||
/* Cleanup so the fixture destructor doesn't see a stale provider. */
|
||||
conn_impl->key_provider = nullptr;
|
||||
F_CLR(conn_impl, WT_CONN_KEY_PROVIDER_PUSH);
|
||||
}
|
||||
|
||||
TEST_CASE_METHOD(kp_fixture, "Key always expires", "[key_provider]")
|
||||
{
|
||||
kp_ptr_t kp = kp_init("key_expires=0");
|
||||
|
||||
@ -1489,10 +1489,6 @@ config_disagg_storage(void)
|
||||
config_off(NULL, "ops.compaction");
|
||||
config_off(NULL, "background_compact");
|
||||
|
||||
/* Cursor reposition is not supported for disaggregated storage. */
|
||||
config_off(NULL, "debug.cursor_reposition");
|
||||
config_off(NULL, "stress.evict_reposition");
|
||||
|
||||
/* Tiered storage is not supported with disagg */
|
||||
config_single(NULL, "tiered_storage.storage_source=off", true);
|
||||
}
|
||||
|
||||
208
src/third_party/wiredtiger/test/suite/helper_layered_fast_truncate.py
vendored
Normal file
208
src/third_party/wiredtiger/test/suite/helper_layered_fast_truncate.py
vendored
Normal file
@ -0,0 +1,208 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Public Domain 2014-present MongoDB, Inc.
|
||||
# Public Domain 2008-2014 WiredTiger, Inc.
|
||||
#
|
||||
# This is free and unencumbered software released into the public domain.
|
||||
#
|
||||
# Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
# distribute this software, either in source code form or as a compiled
|
||||
# binary, for any purpose, commercial or non-commercial, and by any
|
||||
# means.
|
||||
#
|
||||
# In jurisdictions that recognize copyright laws, the author or authors
|
||||
# of this software dedicate any and all copyright interest in the
|
||||
# software to the public domain. We make this dedication for the benefit
|
||||
# of the public at large and to the detriment of our heirs and
|
||||
# successors. We intend this dedication to be an overt act of
|
||||
# relinquishment in perpetuity of all present and future rights to this
|
||||
# software under copyright law.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
# helper_layered_fast_truncate.py
|
||||
# Shared helpers for the layered fast truncate Python tests.
|
||||
|
||||
from contextlib import closing
|
||||
from itertools import chain
|
||||
from typing import Iterable
|
||||
|
||||
import wiredtiger
|
||||
|
||||
|
||||
def concat(*iterables):
|
||||
"""Concatenate any number of iterables into a single list."""
|
||||
return list(chain.from_iterable(iterables))
|
||||
|
||||
|
||||
def range_inclusive(start, stop):
|
||||
"""Return a range covering [start, stop] inclusive."""
|
||||
return range(start, stop + 1)
|
||||
|
||||
|
||||
class LayeredFastTruncateConfigMixin:
|
||||
"""Shared helpers for the layered fast truncate test suite."""
|
||||
|
||||
def key(self, n):
|
||||
"""
|
||||
Convert an int into a key; override in subclasses that use a different
|
||||
key format.
|
||||
"""
|
||||
return n
|
||||
|
||||
def session_create_config(self):
|
||||
"""
|
||||
Return the session.create() config string, and, for layered URIs, the
|
||||
disaggregated storage options.
|
||||
"""
|
||||
cfg = 'key_format=i,value_format=S'
|
||||
uri = getattr(self, 'uri', '')
|
||||
if uri.startswith('table'):
|
||||
cfg += ',block_manager=disagg,type=layered'
|
||||
return cfg
|
||||
|
||||
def auto_closing_cursor(self, config=None):
|
||||
"""Return a cursor that auto-closes as it goes out of scope."""
|
||||
return closing(self.session.open_cursor(self.uri, None, config))
|
||||
|
||||
def populate(self, keys, value='v'):
|
||||
"""Insert each key with a placeholder value in a single transaction."""
|
||||
with self.auto_closing_cursor() as cursor:
|
||||
with self.transaction():
|
||||
for key in keys:
|
||||
cursor[self.key(key)] = value
|
||||
|
||||
def setup_leader(self, keys=None, extra_cfg=''):
|
||||
"""
|
||||
Create the table on the leader and optionally populate stable. The
|
||||
follower picks up these keys via the initial checkpoint.
|
||||
"""
|
||||
self.session.create(self.uri, self.session_create_config() + extra_cfg)
|
||||
if keys is not None:
|
||||
self.populate(keys)
|
||||
self.session.checkpoint()
|
||||
|
||||
def setup_follower(self, keys=None):
|
||||
"""Switch to follower role and optionally write keys to ingest."""
|
||||
self.reopen_disagg_conn('disaggregated=(role="follower"),')
|
||||
if keys is not None:
|
||||
self.populate(keys)
|
||||
|
||||
def truncate(self, start_key=None, stop_key=None, commit_timestamp=None):
|
||||
"""
|
||||
Truncate [start_key, stop_key] inclusive on self.uri. Either bound
|
||||
may be None for an open-ended side. If commit_timestamp is set,
|
||||
the truncate transaction commits at that timestamp.
|
||||
"""
|
||||
start = stop = None
|
||||
try:
|
||||
if start_key is not None:
|
||||
start = self.session.open_cursor(self.uri)
|
||||
start.set_key(self.key(start_key))
|
||||
if stop_key is not None:
|
||||
stop = self.session.open_cursor(self.uri)
|
||||
stop.set_key(self.key(stop_key))
|
||||
# session.truncate() needs a URI iff both cursors are NULL.
|
||||
uri = self.uri if (start is None and stop is None) else None
|
||||
with self.transaction(commit_timestamp=commit_timestamp):
|
||||
self.session.truncate(uri, start, stop, None)
|
||||
finally:
|
||||
if start is not None:
|
||||
start.close()
|
||||
if stop is not None:
|
||||
stop.close()
|
||||
|
||||
def visible_keys(self, forward=True):
|
||||
"""Return all keys visible via a scan (forward or backward)."""
|
||||
result = []
|
||||
with self.auto_closing_cursor() as cursor:
|
||||
step = cursor.next if forward else cursor.prev
|
||||
with self.transaction(rollback=True):
|
||||
while step() == 0:
|
||||
result.append(cursor.get_key())
|
||||
return result
|
||||
|
||||
def key_exists(self, key):
|
||||
"""Return True if key is visible to a search in its own transaction."""
|
||||
with self.auto_closing_cursor() as cursor:
|
||||
with self.transaction(rollback=True):
|
||||
cursor.set_key(self.key(key))
|
||||
return cursor.search() == 0
|
||||
|
||||
def search_near_key(self, key):
|
||||
"""
|
||||
Run search_near. Returns (exact, found_key). exact follows WT
|
||||
convention: 0 = exact, 1 = positioned above, -1 = positioned
|
||||
below, or WT_NOTFOUND if no visible keys exist (in which case
|
||||
found_key is None).
|
||||
"""
|
||||
with self.auto_closing_cursor() as cursor:
|
||||
with self.transaction(rollback=True):
|
||||
cursor.set_key(self.key(key))
|
||||
exact = cursor.search_near()
|
||||
if exact == wiredtiger.WT_NOTFOUND:
|
||||
return exact, None
|
||||
return exact, cursor.get_key()
|
||||
|
||||
def leader_checkpoint(self, ts=None):
|
||||
"""Set timestamps and checkpoint on the leader."""
|
||||
if ts is not None:
|
||||
self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(ts) +
|
||||
',oldest_timestamp=' + self.timestamp_str(1))
|
||||
self.session.checkpoint()
|
||||
|
||||
def step_up(self):
|
||||
"""Promote self.conn_follow to leader; the original leader steps down."""
|
||||
self.ignoreStdoutPattern('Picking up the same checkpoint')
|
||||
self.disagg_switch_follower_and_leader(self.conn_follow)
|
||||
|
||||
def open_follower(self, table_config='key_format=i,value_format=S'):
|
||||
"""
|
||||
Open a separate follower connection (distinct from setup_follower
|
||||
which reopens the existing connection). Returns (conn, session).
|
||||
"""
|
||||
conn = self.wiredtiger_open(
|
||||
'follower',
|
||||
self.extensionsConfig() +
|
||||
',create,cache_size=50MB,statistics=(all),disaggregated=(role="follower")')
|
||||
session = conn.open_session('')
|
||||
session.create(self.uri, table_config)
|
||||
self.disagg_advance_checkpoint(conn, self.conn)
|
||||
return conn, session
|
||||
|
||||
def search_at(self, session, key, ts):
|
||||
"""Search for key under a read_timestamp; return (ret, value)."""
|
||||
cur = session.open_cursor(self.uri)
|
||||
try:
|
||||
with self.transaction(session=session, read_timestamp=ts, rollback=True):
|
||||
cur.set_key(key)
|
||||
ret = cur.search()
|
||||
val = cur.get_value() if ret == 0 else None
|
||||
return ret, val
|
||||
finally:
|
||||
cur.close()
|
||||
|
||||
def evict_range(self, session, start, stop, step=1):
|
||||
"""Evict the page(s) backing keys [start, stop] on the given session."""
|
||||
evict_cur = session.open_cursor(self.uri, None, 'debug=(release_evict)')
|
||||
try:
|
||||
with self.transaction(session=session, read_timestamp=10, rollback=True):
|
||||
for i in range(start, stop + 1, step):
|
||||
evict_cur.set_key(i)
|
||||
evict_cur.search()
|
||||
evict_cur.reset()
|
||||
finally:
|
||||
evict_cur.close()
|
||||
|
||||
def get_stat(self, conn, stat_key):
|
||||
"""Read a connection statistic on the given connection."""
|
||||
s = conn.open_session('')
|
||||
val = s.open_cursor('statistics:')[stat_key][2]
|
||||
s.close()
|
||||
return val
|
||||
@ -4,6 +4,8 @@
|
||||
test_autoclose.py
|
||||
test_config02.py
|
||||
test_config09.py
|
||||
test_cursor13.py # FIXME: WT-15369
|
||||
test_cursor21.py # FIXME: WT-15369
|
||||
test_drop03.py
|
||||
test_dump.py
|
||||
test_dump01.py
|
||||
|
||||
@ -56,15 +56,7 @@ class test_cursor13_base(wttest.WiredTigerTestCase):
|
||||
def caching_stats(self):
|
||||
hs_stats_uri = 'statistics:file:WiredTigerHS.wt'
|
||||
max_tries = 100
|
||||
# Cursor cache/reopen stats are updated with plain (non-atomic) int64 add/subtract
|
||||
# operations.
|
||||
# A recent increment by another core may not yet be visible to this reader. Re-reading
|
||||
# in a tight Python loop cannot force coherence; the fix is to pause briefly on retry
|
||||
# so store buffers drain and cache lines propagate.
|
||||
retry_sleep = 0.005 # seconds
|
||||
for i in range(max_tries):
|
||||
if i > 0:
|
||||
time.sleep(retry_sleep)
|
||||
hs_stats_before = self.session.open_cursor(hs_stats_uri, None, None)
|
||||
conn_stats = self.session.open_cursor('statistics:', None, None)
|
||||
hs_stats_after = self.session.open_cursor(hs_stats_uri, None, None)
|
||||
@ -86,14 +78,7 @@ class test_cursor13_base(wttest.WiredTigerTestCase):
|
||||
hs_after[0] += hs_disagg_stat_after[stat.dsrc.cursor_cache][2]
|
||||
hs_after[1] += hs_disagg_stat_after[stat.dsrc.cursor_reopen][2]
|
||||
|
||||
report = [totals[0],
|
||||
hs_before[0],
|
||||
hs_disagg_stat_before[stat.dsrc.cursor_cache][2],
|
||||
hs_stats_before[stat.dsrc.cursor_cache][2]]
|
||||
self.pr(' '.join(map(str, report)))
|
||||
|
||||
hs_disagg_stat_before.close()
|
||||
hs_disagg_stat_after.close()
|
||||
self.pr(str(totals[0]) + " " + str(hs_before[0]) + " " + str(hs_disagg_stat_before[stat.dsrc.cursor_cache][2]) + " " + str(hs_stats_before[stat.dsrc.cursor_cache][2]))
|
||||
|
||||
hs_stats_before.close()
|
||||
hs_stats_after.close()
|
||||
@ -526,7 +511,6 @@ class test_cursor13_big(test_cursor13_big_base):
|
||||
self.assertEqual(end_stats[0] - begin_stats[0], self.closecount)
|
||||
self.assertEqual(end_stats[1] - begin_stats[1], self.opencount)
|
||||
|
||||
@wttest.skip_for_hook("disagg", "layered dhandles are never swept: FIXME-WT-16982")
|
||||
class test_cursor13_sweep(test_cursor13_big_base):
|
||||
# Set dhandle sweep configuration so that dhandles should be closed within
|
||||
# two seconds of all the cursors for the dhandle being closed (cached).
|
||||
|
||||
@ -31,7 +31,7 @@
|
||||
|
||||
import wttest
|
||||
from wtscenario import make_scenarios
|
||||
from wiredtiger import stat, WiredTigerError
|
||||
from wiredtiger import stat
|
||||
|
||||
class test_cursor21(wttest.WiredTigerTestCase):
|
||||
uri = "table:test_cursor21"
|
||||
@ -71,7 +71,6 @@ class test_cursor21(wttest.WiredTigerTestCase):
|
||||
self.assertEqual(reposition_count, 0)
|
||||
return reposition_count
|
||||
|
||||
@wttest.skip_for_hook("disagg", "layered tables don't support cursor reposition")
|
||||
def test_cursor21(self):
|
||||
format = 'key_format={},value_format={}'.format(self.key_format, self.value_format)
|
||||
reposition_count = 0
|
||||
@ -126,15 +125,3 @@ class test_cursor21(wttest.WiredTigerTestCase):
|
||||
reposition_count += self.check_reposition(reposition_count)
|
||||
cursor.close()
|
||||
self.session.close()
|
||||
|
||||
@wttest.only_for_hook("disagg", "check reposition is disabled for disaggregated storage")
|
||||
def test_cursor21_dsc(self):
|
||||
# Skip the test if reposition is disabled or it's column store (unsupported in disagg).
|
||||
if not self.reposition or self.scenario_name == 'column.reposition':
|
||||
return
|
||||
|
||||
format = 'key_format={},value_format={}'.format(self.key_format, self.value_format)
|
||||
self.session.create(self.uri, format)
|
||||
msg = '/Operation not supported/'
|
||||
self.assertRaisesWithMessage(WiredTigerError,
|
||||
lambda: self.session.open_cursor(self.uri), msg)
|
||||
|
||||
@ -34,7 +34,6 @@ import wiredtiger
|
||||
from wtscenario import make_scenarios
|
||||
|
||||
WT_TS_MAX = 2**64 - 1
|
||||
WT_UPDATE_PREPARE_ROLLBACK = 0x080
|
||||
|
||||
class test_cursor24(wttest.WiredTigerTestCase):
|
||||
uri = 'file:test_cursor24.wt'
|
||||
|
||||
@ -181,7 +181,7 @@ class test_cursor25(wttest.WiredTigerTestCase):
|
||||
cursor[1] = 10
|
||||
self.session.commit_transaction("commit_timestamp=" + self.timestamp_str(1))
|
||||
|
||||
# Prepared overwrite + rollback. No PREPARE_ROLLBACK tombstone because
|
||||
# Prepared overwrite + rollback. No rollback tombstone appended because
|
||||
# first_committed_upd != NULL.
|
||||
session2 = self.conn.open_session()
|
||||
cursor2 = session2.open_cursor(self.uri, None)
|
||||
@ -229,7 +229,7 @@ class test_cursor25(wttest.WiredTigerTestCase):
|
||||
cursor[1] = 10
|
||||
self.session.commit_transaction("commit_timestamp=" + self.timestamp_str(1))
|
||||
|
||||
# Prepared delete + rollback. No PREPARE_ROLLBACK tombstone because
|
||||
# Prepared delete + rollback. No rollback tombstone appended because
|
||||
# first_committed_upd != NULL.
|
||||
session2 = self.conn.open_session()
|
||||
cursor2 = session2.open_cursor(self.uri, None)
|
||||
|
||||
@ -199,10 +199,13 @@ class test_layered69(test_prepare_preserve_prepare_base):
|
||||
session_prepare.rollback_transaction(f'rollback_timestamp={self.timestamp_str(45)}')
|
||||
session_prepare.close()
|
||||
|
||||
# Verify checkpoint skips writing a page to disk
|
||||
# Verify checkpoint skips writing a page to disk. When the page was evicted before the
|
||||
# prepare, the prior committed delete tombstone is gone from memory, so the prepare
|
||||
# rollback appends a fresh tail tombstone with no durable flag set; that tombstone gets
|
||||
# re-saved and causes one extra write here.
|
||||
self.checkpoint_and_verify_stats({
|
||||
wiredtiger.stat.dsrc.rec_time_window_prepared: False,
|
||||
stat: False,
|
||||
stat: self.evict,
|
||||
}, self.uri)
|
||||
|
||||
# Make stable timestamp equal to prepare timestamp - this should allow checkpoint to reconcile prepared update
|
||||
|
||||
@ -28,12 +28,13 @@
|
||||
|
||||
import unittest, wttest, wiredtiger
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin
|
||||
from wtscenario import make_scenarios
|
||||
|
||||
# test_layered_fast_truncate01.py
|
||||
# Test basic fast truncate functionality.
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate01(wttest.WiredTigerTestCase):
|
||||
class test_layered_fast_truncate01(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase):
|
||||
|
||||
conn_config = 'disaggregated=(role="leader"),'
|
||||
|
||||
@ -48,6 +49,9 @@ class test_layered_fast_truncate01(wttest.WiredTigerTestCase):
|
||||
|
||||
nitems = 1000
|
||||
|
||||
def key(self, n):
|
||||
return str(n)
|
||||
|
||||
def session_create_config(self):
|
||||
cfg = 'key_format=S,value_format=S'
|
||||
if self.uri.startswith('table'):
|
||||
|
||||
@ -32,10 +32,11 @@
|
||||
|
||||
import wiredtiger, wttest
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin
|
||||
from wtscenario import make_scenarios
|
||||
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate02(wttest.WiredTigerTestCase):
|
||||
class test_layered_fast_truncate02(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase):
|
||||
|
||||
uri = 'layered:test_layered_fast_truncate02'
|
||||
nrows = 5000
|
||||
@ -48,11 +49,6 @@ class test_layered_fast_truncate02(wttest.WiredTigerTestCase):
|
||||
disagg_storages = gen_disagg_storages('test_layered_fast_truncate02', disagg_only=True)
|
||||
scenarios = make_scenarios(disagg_storages)
|
||||
|
||||
def leader_checkpoint(self, ts):
|
||||
self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(ts) +
|
||||
',oldest_timestamp=' + self.timestamp_str(1))
|
||||
self.session.checkpoint()
|
||||
|
||||
def setup_leader(self):
|
||||
self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(1))
|
||||
self.session.create(self.uri, 'key_format=i,value_format=S')
|
||||
@ -74,44 +70,12 @@ class test_layered_fast_truncate02(wttest.WiredTigerTestCase):
|
||||
evict_cur.close()
|
||||
self.session.rollback_transaction()
|
||||
|
||||
def truncate_and_checkpoint(self, trunc_start, trunc_stop, ts):
|
||||
# Fast-truncate rows [trunc_start, trunc_stop] on the leader and checkpoint.
|
||||
c_start = self.session.open_cursor(self.uri)
|
||||
c_start.set_key(trunc_start)
|
||||
c_stop = self.session.open_cursor(self.uri)
|
||||
c_stop.set_key(trunc_stop)
|
||||
self.session.begin_transaction()
|
||||
self.session.truncate(None, c_start, c_stop, None)
|
||||
self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(ts))
|
||||
c_start.close()
|
||||
c_stop.close()
|
||||
self.leader_checkpoint(ts)
|
||||
|
||||
def open_follower(self):
|
||||
conn = self.wiredtiger_open(
|
||||
'follower',
|
||||
self.extensionsConfig() + ',create,cache_size=50MB,statistics=(all),disaggregated=(role="follower")')
|
||||
sess = conn.open_session('')
|
||||
sess.create(self.uri, 'key_format=i,value_format=S')
|
||||
self.disagg_advance_checkpoint(conn, self.conn)
|
||||
return conn, sess
|
||||
|
||||
def search_at(self, sess, key, ts):
|
||||
cur = sess.open_cursor(self.uri)
|
||||
txn_cfg = ('read_timestamp=' + self.timestamp_str(ts))
|
||||
sess.begin_transaction(txn_cfg)
|
||||
cur.set_key(key)
|
||||
ret = cur.search()
|
||||
val = cur.get_value() if ret == 0 else None
|
||||
sess.rollback_transaction()
|
||||
cur.close()
|
||||
return ret, val
|
||||
|
||||
def test_visibility(self):
|
||||
# At ts=20 (equal to truncation at ts=20): truncated keys return WT_NOTFOUND, boundary and
|
||||
# exterior keys return their values. At ts=15 (before truncation): all keys are visible.
|
||||
self.setup_leader()
|
||||
self.truncate_and_checkpoint(self.trunc_start, self.trunc_stop, 20)
|
||||
self.truncate(self.trunc_start, self.trunc_stop, commit_timestamp=20)
|
||||
self.leader_checkpoint(20)
|
||||
conn, sess = self.open_follower()
|
||||
|
||||
# Truncation is visible: deleted keys are gone, surrounding keys survive.
|
||||
@ -137,7 +101,8 @@ class test_layered_fast_truncate02(wttest.WiredTigerTestCase):
|
||||
# Reading at a timestamp before the truncation must still find all rows, including those
|
||||
# later deleted. Verifies mvcc correctness across the follower checkpoint boundary.
|
||||
self.setup_leader()
|
||||
self.truncate_and_checkpoint(self.trunc_start, self.trunc_stop, 20)
|
||||
self.truncate(self.trunc_start, self.trunc_stop, commit_timestamp=20)
|
||||
self.leader_checkpoint(20)
|
||||
conn, sess = self.open_follower()
|
||||
|
||||
for key in [self.trunc_start, self.trunc_mid, self.trunc_stop]:
|
||||
@ -161,7 +126,8 @@ class test_layered_fast_truncate02(wttest.WiredTigerTestCase):
|
||||
# Forward and backward scans must skip the entire truncated range without visiting any
|
||||
# deleted key. search_near on a deleted key must land outside the range.
|
||||
self.setup_leader()
|
||||
self.truncate_and_checkpoint(self.trunc_start, self.trunc_stop, 20)
|
||||
self.truncate(self.trunc_start, self.trunc_stop, commit_timestamp=20)
|
||||
self.leader_checkpoint(20)
|
||||
conn, sess = self.open_follower()
|
||||
|
||||
expected = self.nrows - (self.trunc_stop - self.trunc_start + 1)
|
||||
|
||||
@ -33,11 +33,12 @@
|
||||
|
||||
import wiredtiger, wttest
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin
|
||||
from wtscenario import make_scenarios
|
||||
from wiredtiger import stat
|
||||
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate03(wttest.WiredTigerTestCase):
|
||||
class test_layered_fast_truncate03(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase):
|
||||
|
||||
uri = 'layered:test_layered_fast_truncate03'
|
||||
nrows = 5000
|
||||
@ -49,17 +50,6 @@ class test_layered_fast_truncate03(wttest.WiredTigerTestCase):
|
||||
disagg_storages = gen_disagg_storages('test_layered_fast_truncate03', disagg_only=True)
|
||||
scenarios = make_scenarios(disagg_storages)
|
||||
|
||||
def get_stat(self, conn, stat_key):
|
||||
s = conn.open_session('')
|
||||
val = s.open_cursor('statistics:')[stat_key][2]
|
||||
s.close()
|
||||
return val
|
||||
|
||||
def leader_checkpoint(self, ts):
|
||||
self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(ts) +
|
||||
',oldest_timestamp=' + self.timestamp_str(1))
|
||||
self.session.checkpoint()
|
||||
|
||||
def setup_leader(self, extra_cfg=''):
|
||||
self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(1))
|
||||
self.session.create(self.uri, 'key_format=i,value_format=S' + extra_cfg)
|
||||
@ -81,58 +71,16 @@ class test_layered_fast_truncate03(wttest.WiredTigerTestCase):
|
||||
evict_cur.close()
|
||||
self.session.rollback_transaction()
|
||||
|
||||
def truncate_and_checkpoint(self, trunc_start, trunc_stop, ts):
|
||||
# Fast-truncate rows [trunc_start, trunc_stop] on the leader and checkpoint.
|
||||
c_start = self.session.open_cursor(self.uri)
|
||||
c_start.set_key(trunc_start)
|
||||
c_stop = self.session.open_cursor(self.uri)
|
||||
c_stop.set_key(trunc_stop)
|
||||
self.session.begin_transaction()
|
||||
self.session.truncate(None, c_start, c_stop, None)
|
||||
self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(ts))
|
||||
c_start.close()
|
||||
c_stop.close()
|
||||
self.leader_checkpoint(ts)
|
||||
|
||||
def open_follower(self):
|
||||
conn = self.wiredtiger_open(
|
||||
'follower',
|
||||
self.extensionsConfig() + ',create,cache_size=50MB,statistics=(all),disaggregated=(role="follower")')
|
||||
sess = conn.open_session('')
|
||||
sess.create(self.uri, 'key_format=i,value_format=S')
|
||||
self.disagg_advance_checkpoint(conn, self.conn)
|
||||
return conn, sess
|
||||
|
||||
def advance_follower(self, conn):
|
||||
self.leader_checkpoint(20)
|
||||
self.disagg_advance_checkpoint(conn, self.conn)
|
||||
|
||||
def evict_range(self, sess, start, stop, step=1):
|
||||
evict_cur = sess.open_cursor(self.uri, None, 'debug=(release_evict)')
|
||||
sess.begin_transaction('read_timestamp=' + self.timestamp_str(10))
|
||||
for i in range(start, stop + 1, step):
|
||||
evict_cur.set_key(i)
|
||||
evict_cur.search()
|
||||
evict_cur.reset()
|
||||
evict_cur.close()
|
||||
sess.rollback_transaction()
|
||||
|
||||
def search_at(self, sess, key, ts):
|
||||
cur = sess.open_cursor(self.uri)
|
||||
txn_cfg = ('read_timestamp=' + self.timestamp_str(ts))
|
||||
sess.begin_transaction(txn_cfg)
|
||||
cur.set_key(key)
|
||||
ret = cur.search()
|
||||
val = cur.get_value() if ret == 0 else None
|
||||
sess.rollback_transaction()
|
||||
cur.close()
|
||||
return ret, val
|
||||
|
||||
def test_no_dirty_on_read(self):
|
||||
# Reading fast-truncated pages on the follower must never dirty them. Verifies this holds
|
||||
# across a full load-evict-reload cycle for both single and bulk page reads.
|
||||
self.setup_leader()
|
||||
self.truncate_and_checkpoint(self.trunc_start, self.trunc_stop, 20)
|
||||
self.truncate(self.trunc_start, self.trunc_stop, commit_timestamp=20)
|
||||
self.leader_checkpoint(20)
|
||||
conn, sess = self.open_follower()
|
||||
sample = list(range(self.trunc_start, self.trunc_stop + 1, 10))
|
||||
dirty_before = self.get_stat(conn, stat.conn.cache_pages_dirty)
|
||||
@ -168,7 +116,8 @@ class test_layered_fast_truncate03(wttest.WiredTigerTestCase):
|
||||
# restore a subset of truncated keys, those keys must be visible while the rest
|
||||
# remain deleted.
|
||||
self.setup_leader(',leaf_page_max=4096')
|
||||
self.truncate_and_checkpoint(self.trunc_start, self.trunc_stop, 20)
|
||||
self.truncate(self.trunc_start, self.trunc_stop, commit_timestamp=20)
|
||||
self.leader_checkpoint(20)
|
||||
conn, sess = self.open_follower()
|
||||
sample = list(range(self.trunc_start, self.trunc_stop + 1, 10))
|
||||
dirty_before = self.get_stat(conn, stat.conn.cache_pages_dirty)
|
||||
@ -226,7 +175,8 @@ class test_layered_fast_truncate03(wttest.WiredTigerTestCase):
|
||||
# Closing and reopening the follower connection must not lose the deleted state.
|
||||
# The same checkpoint must still show truncated keys as WT_NOTFOUND after a cold start.
|
||||
self.setup_leader()
|
||||
self.truncate_and_checkpoint(self.trunc_start, self.trunc_stop, 20)
|
||||
self.truncate(self.trunc_start, self.trunc_stop, commit_timestamp=20)
|
||||
self.leader_checkpoint(20)
|
||||
|
||||
truncated_keys = [self.trunc_start, self.trunc_start + 100, self.trunc_stop]
|
||||
non_truncated_keys = [1, self.trunc_start - 1, self.trunc_stop + 1, self.nrows]
|
||||
@ -250,7 +200,8 @@ class test_layered_fast_truncate03(wttest.WiredTigerTestCase):
|
||||
# Reading a deleted page at a timestamp before the truncation forces it to load from disk.
|
||||
# The key must be found, cache_read_deleted must increment, and the page must not be dirtied.
|
||||
self.setup_leader()
|
||||
self.truncate_and_checkpoint(self.trunc_start, self.trunc_stop, 20)
|
||||
self.truncate(self.trunc_start, self.trunc_stop, commit_timestamp=20)
|
||||
self.leader_checkpoint(20)
|
||||
conn, sess = self.open_follower()
|
||||
|
||||
dirty_before = self.get_stat(conn, stat.conn.cache_pages_dirty)
|
||||
|
||||
@ -26,9 +26,9 @@
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
import unittest
|
||||
import wttest, wiredtiger
|
||||
import wttest
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin
|
||||
from wtscenario import make_scenarios
|
||||
|
||||
# test_layered_fast_truncate04.py
|
||||
@ -37,7 +37,7 @@ from wtscenario import make_scenarios
|
||||
# open-ended truncation, multiple truncated ranges, and mixed
|
||||
# update-then-truncate workloads.
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate04(wttest.WiredTigerTestCase):
|
||||
class test_layered_fast_truncate04(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase):
|
||||
|
||||
conn_config = 'disaggregated=(role="leader"),'
|
||||
|
||||
@ -54,8 +54,7 @@ class test_layered_fast_truncate04(wttest.WiredTigerTestCase):
|
||||
# digits so that lexicographic order matches numeric order.
|
||||
nitems = 1000
|
||||
|
||||
@staticmethod
|
||||
def key(n):
|
||||
def key(self, n):
|
||||
return f'{n:04d}'
|
||||
|
||||
def session_create_config(self):
|
||||
@ -66,104 +65,35 @@ class test_layered_fast_truncate04(wttest.WiredTigerTestCase):
|
||||
|
||||
# Populate the table on the leader, checkpoint, then reopen as follower.
|
||||
def setup_follower(self):
|
||||
self.session.create(self.uri, self.session_create_config())
|
||||
cursor = self.session.open_cursor(self.uri)
|
||||
for i in range(self.nitems):
|
||||
self.session.begin_transaction()
|
||||
cursor[self.key(i)] = 'value'
|
||||
self.session.commit_transaction()
|
||||
cursor.close()
|
||||
self.session.checkpoint()
|
||||
self.setup_leader(keys=range(self.nitems))
|
||||
super().setup_follower()
|
||||
|
||||
follower_config = (
|
||||
'disaggregated=(role="follower",'
|
||||
f'checkpoint_meta="{self.disagg_get_complete_checkpoint_meta()}")'
|
||||
)
|
||||
self.reopen_conn(config=follower_config)
|
||||
|
||||
# Truncate the range [start, stop] (inclusive). If stop is None, truncate
|
||||
# from start to the end of the table.
|
||||
def truncate_range(self, start, stop):
|
||||
c1 = self.session.open_cursor(self.uri)
|
||||
c1.set_key(self.key(start))
|
||||
c2 = None
|
||||
if stop is not None:
|
||||
c2 = self.session.open_cursor(self.uri)
|
||||
c2.set_key(self.key(stop))
|
||||
self.session.begin_transaction()
|
||||
self.session.truncate(None, c1, c2, None)
|
||||
self.session.commit_transaction()
|
||||
c1.close()
|
||||
if c2 is not None:
|
||||
c2.close()
|
||||
|
||||
# Return all keys visible via a forward scan.
|
||||
def scan_forward(self):
|
||||
cursor = self.session.open_cursor(self.uri)
|
||||
self.session.begin_transaction()
|
||||
keys = []
|
||||
while cursor.next() == 0:
|
||||
keys.append(cursor.get_key())
|
||||
self.session.rollback_transaction()
|
||||
cursor.close()
|
||||
return keys
|
||||
|
||||
# Return all keys visible via a backward scan.
|
||||
def scan_backward(self):
|
||||
cursor = self.session.open_cursor(self.uri)
|
||||
self.session.begin_transaction()
|
||||
keys = []
|
||||
while cursor.prev() == 0:
|
||||
keys.append(cursor.get_key())
|
||||
self.session.rollback_transaction()
|
||||
cursor.close()
|
||||
return list(reversed(keys)) # reverse so order matches forward scan
|
||||
# Return all keys visible via a forward and a backward scan; assert both
|
||||
# match the expected list.
|
||||
def assert_scan(self, expected):
|
||||
self.assertEqual(self.visible_keys(), expected, 'forward scan mismatch')
|
||||
self.assertEqual(list(reversed(self.visible_keys(forward=False))), expected,
|
||||
'backward scan mismatch')
|
||||
|
||||
# Run search_near in its own transaction; return (exact, landed_key).
|
||||
def search_near(self, key):
|
||||
cursor = self.session.open_cursor(self.uri)
|
||||
self.session.begin_transaction()
|
||||
cursor.set_key(self.key(key))
|
||||
exact = cursor.search_near()
|
||||
landed = cursor.get_key()
|
||||
self.session.rollback_transaction()
|
||||
cursor.close()
|
||||
return exact, landed
|
||||
|
||||
# Run search in its own transaction; return the return value (0 or WT_NOTFOUND).
|
||||
def search(self, key):
|
||||
cursor = self.session.open_cursor(self.uri)
|
||||
self.session.begin_transaction()
|
||||
cursor.set_key(self.key(key))
|
||||
ret = cursor.search()
|
||||
self.session.rollback_transaction()
|
||||
cursor.close()
|
||||
return ret
|
||||
|
||||
# Assert forward and backward scans both return the expected key list.
|
||||
def assert_scan(self, expected):
|
||||
self.assertEqual(self.scan_forward(), expected, 'forward scan mismatch')
|
||||
self.assertEqual(self.scan_backward(), expected, 'backward scan mismatch')
|
||||
return self.search_near_key(key)
|
||||
|
||||
# Write a single key/value pair in its own transaction.
|
||||
def put(self, key, value='v'):
|
||||
cursor = self.session.open_cursor(self.uri)
|
||||
self.session.begin_transaction()
|
||||
cursor[self.key(key)] = value
|
||||
self.session.commit_transaction()
|
||||
cursor.close()
|
||||
self.populate([key], value=value)
|
||||
|
||||
def test_cursor_scan_skips_truncated_range(self):
|
||||
# Forward and backward scans must skip every key in the truncated range.
|
||||
self.setup_follower()
|
||||
self.truncate_range(100, 700)
|
||||
self.truncate(100, 700)
|
||||
self.assert_scan([self.key(i) for i in range(self.nitems) if i < 100 or i > 700])
|
||||
|
||||
def test_search_near_inside_truncated_range(self):
|
||||
# search_near for a key deep inside a truncated range must land outside
|
||||
# the range and must not report an exact match.
|
||||
self.setup_follower()
|
||||
self.truncate_range(100, 700)
|
||||
self.truncate(100, 700)
|
||||
|
||||
exact, landed = self.search_near(400)
|
||||
self.assertFalse(self.key(100) <= landed <= self.key(700),
|
||||
@ -175,7 +105,7 @@ class test_layered_fast_truncate04(wttest.WiredTigerTestCase):
|
||||
# as candidates for search_near. Test both directions by placing the
|
||||
# single visible ingest key above or below the search key.
|
||||
self.setup_follower()
|
||||
self.truncate_range(0, self.nitems - 1)
|
||||
self.truncate(0, self.nitems - 1)
|
||||
|
||||
# Scenario 1: ingest 0600 above search key 0500 forward (exact=1).
|
||||
self.put(600, 'ingest-live')
|
||||
@ -197,7 +127,7 @@ class test_layered_fast_truncate04(wttest.WiredTigerTestCase):
|
||||
# The start and stop keys of the range are inclusive, so search_near at
|
||||
# either boundary must land strictly outside the range.
|
||||
self.setup_follower()
|
||||
self.truncate_range(100, 700)
|
||||
self.truncate(100, 700)
|
||||
|
||||
for boundary in (100, 700):
|
||||
_, landed = self.search_near(boundary)
|
||||
@ -207,22 +137,22 @@ class test_layered_fast_truncate04(wttest.WiredTigerTestCase):
|
||||
def test_truncate_to_end_of_table(self):
|
||||
# Open-ended truncate from key 500; only 0-499 remain visible.
|
||||
self.setup_follower()
|
||||
self.truncate_range(500, None)
|
||||
self.truncate(500, None)
|
||||
self.assert_scan([self.key(i) for i in range(500)])
|
||||
|
||||
def test_multiple_truncate_ranges(self):
|
||||
# Two disjoint bounded ranges; scans must skip both.
|
||||
self.setup_follower()
|
||||
self.truncate_range(100, 300)
|
||||
self.truncate_range(600, 800)
|
||||
self.truncate(100, 300)
|
||||
self.truncate(600, 800)
|
||||
self.assert_scan([self.key(i) for i in range(self.nitems)
|
||||
if not (100 <= i <= 300) and not (600 <= i <= 800)])
|
||||
|
||||
def test_mixed_bounded_and_open_ended_truncates(self):
|
||||
# Bounded [100, 300] combined with open-ended [600, end]; 0-99 and 301-599 visible.
|
||||
self.setup_follower()
|
||||
self.truncate_range(100, 300)
|
||||
self.truncate_range(600, None)
|
||||
self.truncate(100, 300)
|
||||
self.truncate(600, None)
|
||||
self.assert_scan([self.key(i) for i in range(self.nitems)
|
||||
if i < 100 or (301 <= i <= 599)])
|
||||
|
||||
@ -230,7 +160,7 @@ class test_layered_fast_truncate04(wttest.WiredTigerTestCase):
|
||||
# Open-ended truncate captures a snapshot of "end" at commit time. Keys
|
||||
# appended afterwards are new data and must remain visible.
|
||||
self.setup_follower()
|
||||
self.truncate_range(800, None)
|
||||
self.truncate(800, None)
|
||||
|
||||
for i in range(1000, 1100):
|
||||
self.put(i, 'appended')
|
||||
@ -244,23 +174,23 @@ class test_layered_fast_truncate04(wttest.WiredTigerTestCase):
|
||||
self.setup_follower()
|
||||
for i in range(200, 401):
|
||||
self.put(i, 'updated')
|
||||
self.truncate_range(100, 700)
|
||||
self.truncate(100, 700)
|
||||
|
||||
self.assert_scan([self.key(i) for i in range(self.nitems) if i < 100 or i > 700])
|
||||
self.assertEqual(self.search(300), wiredtiger.WT_NOTFOUND,
|
||||
self.assertFalse(self.key_exists(300),
|
||||
'search must hide an updated-then-truncated key')
|
||||
|
||||
def test_search_returns_not_found_in_truncated_range(self):
|
||||
# search() goes through a different read path than scans and search_near;
|
||||
# both boundaries and interior keys must return WT_NOTFOUND.
|
||||
self.setup_follower()
|
||||
self.truncate_range(100, 700)
|
||||
self.truncate(100, 700)
|
||||
|
||||
for k in (400, 100, 700):
|
||||
self.assertEqual(self.search(k), wiredtiger.WT_NOTFOUND,
|
||||
self.assertFalse(self.key_exists(k),
|
||||
f'search({self.key(k)}) inside range must be hidden')
|
||||
for k in (99, 701):
|
||||
self.assertEqual(self.search(k), 0,
|
||||
self.assertTrue(self.key_exists(k),
|
||||
f'search({self.key(k)}) outside range must succeed')
|
||||
|
||||
def test_search_near_direction_in_truncated_range(self):
|
||||
@ -269,24 +199,24 @@ class test_layered_fast_truncate04(wttest.WiredTigerTestCase):
|
||||
self.setup_follower()
|
||||
|
||||
# Bounded range [100, 700]. Forward finds 0701.
|
||||
self.truncate_range(100, 700)
|
||||
self.truncate(100, 700)
|
||||
self.assertEqual(self.search_near(400), (1, self.key(701)), 'forward scenario')
|
||||
|
||||
# Add open-ended truncate [800, end]. Forward exhausts, falls back to 0799.
|
||||
self.truncate_range(800, None)
|
||||
self.truncate(800, None)
|
||||
self.assertEqual(self.search_near(900), (-1, self.key(799)), 'backward scenario')
|
||||
|
||||
def test_overlapping_truncated_ranges_scan(self):
|
||||
# Two overlapping ranges [100, 400] and [300, 700]: scans must skip the
|
||||
# full union [100, 700], not just one range at a time.
|
||||
self.setup_follower()
|
||||
self.truncate_range(100, 400)
|
||||
self.truncate_range(300, 700)
|
||||
self.truncate(100, 400)
|
||||
self.truncate(300, 700)
|
||||
self.assert_scan([self.key(i) for i in range(self.nitems)
|
||||
if i < 100 or i > 700])
|
||||
|
||||
def test_entire_table_truncated(self):
|
||||
# Truncate every key; both scans must be empty.
|
||||
self.setup_follower()
|
||||
self.truncate_range(0, self.nitems - 1)
|
||||
self.truncate(0, self.nitems - 1)
|
||||
self.assert_scan([])
|
||||
|
||||
@ -28,6 +28,7 @@
|
||||
|
||||
import wttest
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin
|
||||
from wtscenario import make_scenarios
|
||||
|
||||
# test_layered_fast_truncate05.py
|
||||
@ -35,7 +36,7 @@ from wtscenario import make_scenarios
|
||||
# standby (follower) node.
|
||||
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate05(wttest.WiredTigerTestCase):
|
||||
class test_layered_fast_truncate05(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase):
|
||||
|
||||
conn_config = 'disaggregated=(role="leader"),'
|
||||
|
||||
@ -52,8 +53,7 @@ class test_layered_fast_truncate05(wttest.WiredTigerTestCase):
|
||||
# digits so that lexicographic order matches numeric order.
|
||||
nitems = 1000
|
||||
|
||||
@staticmethod
|
||||
def key(n):
|
||||
def key(self, n):
|
||||
return f'{n:04d}'
|
||||
|
||||
def session_create_config(self):
|
||||
@ -64,36 +64,8 @@ class test_layered_fast_truncate05(wttest.WiredTigerTestCase):
|
||||
|
||||
# Populate the table on the leader, checkpoint, then reopen as follower.
|
||||
def setup_follower(self):
|
||||
self.session.create(self.uri, self.session_create_config())
|
||||
cursor = self.session.open_cursor(self.uri)
|
||||
for i in range(self.nitems):
|
||||
self.session.begin_transaction()
|
||||
cursor[self.key(i)] = 'value'
|
||||
self.session.commit_transaction()
|
||||
cursor.close()
|
||||
self.session.checkpoint()
|
||||
|
||||
follower_config = (
|
||||
'disaggregated=(role="follower",'
|
||||
f'checkpoint_meta="{self.disagg_get_complete_checkpoint_meta()}")'
|
||||
)
|
||||
self.reopen_conn(config=follower_config)
|
||||
|
||||
# Truncate the range [start, stop] (inclusive). If stop is None, truncate
|
||||
# from start to the end of the table.
|
||||
def truncate_range(self, start, stop):
|
||||
c1 = self.session.open_cursor(self.uri)
|
||||
c1.set_key(self.key(start))
|
||||
c2 = None
|
||||
if stop is not None:
|
||||
c2 = self.session.open_cursor(self.uri)
|
||||
c2.set_key(self.key(stop))
|
||||
self.session.begin_transaction()
|
||||
self.session.truncate(None, c1, c2, None)
|
||||
self.session.commit_transaction()
|
||||
c1.close()
|
||||
if c2 is not None:
|
||||
c2.close()
|
||||
self.setup_leader(keys=range(self.nitems))
|
||||
super().setup_follower()
|
||||
|
||||
# Draw `samples` random keys and assert none fall inside [low, high].
|
||||
def sample_assert_random(self, low, high, samples=200):
|
||||
@ -110,7 +82,7 @@ class test_layered_fast_truncate05(wttest.WiredTigerTestCase):
|
||||
def test_random_cursor_skips_truncated_range(self):
|
||||
# 200 random samples must all land outside the truncated range.
|
||||
self.setup_follower()
|
||||
self.truncate_range(100, 700)
|
||||
self.truncate(100, 700)
|
||||
self.sample_assert_random(100, 700)
|
||||
|
||||
def test_random_cursor_skips_truncated_range_with_live_ingest(self):
|
||||
@ -125,5 +97,5 @@ class test_layered_fast_truncate05(wttest.WiredTigerTestCase):
|
||||
self.session.commit_transaction()
|
||||
cursor.close()
|
||||
|
||||
self.truncate_range(100, 700)
|
||||
self.truncate(100, 700)
|
||||
self.sample_assert_random(100, 700)
|
||||
|
||||
@ -34,10 +34,11 @@
|
||||
|
||||
import wttest
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin
|
||||
from wtscenario import make_scenarios
|
||||
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate06(wttest.WiredTigerTestCase):
|
||||
class test_layered_fast_truncate06(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase):
|
||||
conn_config = 'disaggregated=(role="leader"),'
|
||||
nrows = 100
|
||||
|
||||
@ -50,14 +51,6 @@ class test_layered_fast_truncate06(wttest.WiredTigerTestCase):
|
||||
'test_layered_fast_truncate06', disagg_only=True)
|
||||
scenarios = make_scenarios(disagg_storages, uris)
|
||||
|
||||
def visible_keys(self):
|
||||
c = self.session.open_cursor(self.uri)
|
||||
keys = []
|
||||
while c.next() == 0:
|
||||
keys.append(c.get_key())
|
||||
c.close()
|
||||
return keys
|
||||
|
||||
def session_create_config(self):
|
||||
cfg = 'key_format=i,value_format=S'
|
||||
if self.uri.startswith('table:'):
|
||||
@ -65,8 +58,8 @@ class test_layered_fast_truncate06(wttest.WiredTigerTestCase):
|
||||
return cfg
|
||||
|
||||
def setup_follower(self):
|
||||
# Create the table on the leader, load nrows, checkpoint, then reopen the
|
||||
# connection as a follower picking up that checkpoint.
|
||||
# Create the table on the leader, load nrows with per-row commit timestamps,
|
||||
# checkpoint, then reopen the connection as a follower picking up that checkpoint.
|
||||
self.session.create(self.uri, self.session_create_config())
|
||||
|
||||
cursor = self.session.open_cursor(self.uri)
|
||||
@ -77,32 +70,29 @@ class test_layered_fast_truncate06(wttest.WiredTigerTestCase):
|
||||
cursor.close()
|
||||
self.session.checkpoint()
|
||||
|
||||
follower_config = ('disaggregated=(role="follower",'
|
||||
f'checkpoint_meta="{self.disagg_get_complete_checkpoint_meta()}")')
|
||||
self.reopen_conn(config=follower_config)
|
||||
super().setup_follower()
|
||||
|
||||
def follower_truncate(self, start, stop):
|
||||
c_start = self.session.open_cursor(self.uri)
|
||||
c_start.set_key(start)
|
||||
c_stop = self.session.open_cursor(self.uri)
|
||||
c_stop.set_key(stop)
|
||||
self.session.begin_transaction()
|
||||
self.session.truncate(None, c_start, c_stop, None)
|
||||
self.session.commit_transaction()
|
||||
c_start.close()
|
||||
c_stop.close()
|
||||
def visible_keys_simple(self):
|
||||
# The test verifies a scan outside a transaction; use a simple inline scan
|
||||
# to match the original semantics (no transaction wrapping).
|
||||
c = self.session.open_cursor(self.uri)
|
||||
keys = []
|
||||
while c.next() == 0:
|
||||
keys.append(c.get_key())
|
||||
c.close()
|
||||
return keys
|
||||
|
||||
def test_verify_preserves_follower_truncate(self):
|
||||
self.setup_follower()
|
||||
self.follower_truncate(30, 60)
|
||||
self.truncate(30, 60)
|
||||
|
||||
expected = [i for i in range(1, self.nrows + 1) if i < 30 or i > 60]
|
||||
|
||||
# Before verify: a scan does not return the truncated rows.
|
||||
self.assertEqual(self.visible_keys(), expected)
|
||||
self.assertEqual(self.visible_keys_simple(), expected)
|
||||
|
||||
# Verify the layered URI. This triggers a close + reopen of the dhandle.
|
||||
self.session.verify(self.uri)
|
||||
|
||||
# After verify: a scan must still not return the truncated rows.
|
||||
self.assertEqual(self.visible_keys(), expected)
|
||||
self.assertEqual(self.visible_keys_simple(), expected)
|
||||
|
||||
@ -26,7 +26,7 @@
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
# test_layered_fast_truncate06.py
|
||||
# test_layered_fast_truncate07.py
|
||||
# Follower-initiated truncate stores a bounded range in the truncate list.
|
||||
# Verifies NULL start/stop from the session API are resolved to the table's
|
||||
# first/last visible key, both via the verbose log line and by the row set
|
||||
@ -34,19 +34,20 @@
|
||||
|
||||
import wttest
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin
|
||||
from wtscenario import make_scenarios
|
||||
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate06(wttest.WiredTigerTestCase):
|
||||
class test_layered_fast_truncate07(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase):
|
||||
|
||||
conn_config = 'verbose=[layered:3],disaggregated=(role="leader"),'
|
||||
uri = 'layered:test_layered_fast_truncate06'
|
||||
uri = 'layered:test_layered_fast_truncate07'
|
||||
|
||||
key_formats = [
|
||||
('string', dict(key_format='S')),
|
||||
('int', dict(key_format='i')),
|
||||
]
|
||||
disagg_storages = gen_disagg_storages('test_layered_fast_truncate06', disagg_only=True)
|
||||
disagg_storages = gen_disagg_storages('test_layered_fast_truncate07', disagg_only=True)
|
||||
scenarios = make_scenarios(disagg_storages, key_formats)
|
||||
|
||||
nitems = 100
|
||||
@ -59,42 +60,17 @@ class test_layered_fast_truncate06(wttest.WiredTigerTestCase):
|
||||
def key_str(self, n):
|
||||
return f'{n:04d}' if self.key_format == 'S' else str(n)
|
||||
|
||||
def session_create_config(self):
|
||||
return f'key_format={self.key_format},value_format=S'
|
||||
|
||||
def setup_follower(self):
|
||||
self.session.create(self.uri, f'key_format={self.key_format},value_format=S')
|
||||
self.session.create(self.uri, self.session_create_config())
|
||||
self.insert_range(1, self.nitems)
|
||||
self.session.checkpoint()
|
||||
follower_config = ('verbose=[layered:3],disaggregated=(role="follower",'
|
||||
f'checkpoint_meta="{self.disagg_get_complete_checkpoint_meta()}")')
|
||||
self.reopen_conn(config=follower_config)
|
||||
|
||||
def truncate(self, start=None, stop=None):
|
||||
c_start = c_stop = None
|
||||
if start is not None:
|
||||
c_start = self.session.open_cursor(self.uri)
|
||||
c_start.set_key(self.key(start))
|
||||
if stop is not None:
|
||||
c_stop = self.session.open_cursor(self.uri)
|
||||
c_stop.set_key(self.key(stop))
|
||||
|
||||
# Use the table uri if both start and stop cursors are not given.
|
||||
uri = self.uri if (c_start is None and c_stop is None) else None
|
||||
self.session.begin_transaction()
|
||||
self.session.truncate(uri, c_start, c_stop, None)
|
||||
self.session.commit_transaction()
|
||||
if c_start is not None:
|
||||
c_start.close()
|
||||
if c_stop is not None:
|
||||
c_stop.close()
|
||||
|
||||
def visible_keys(self, forward=True):
|
||||
c = self.session.open_cursor(self.uri)
|
||||
step = c.next if forward else c.prev
|
||||
keys = []
|
||||
while step() == 0:
|
||||
keys.append(c.get_key())
|
||||
c.close()
|
||||
return keys
|
||||
|
||||
def insert_range(self, lo, hi):
|
||||
c = self.session.open_cursor(self.uri)
|
||||
for i in range(lo, hi + 1):
|
||||
@ -103,6 +79,16 @@ class test_layered_fast_truncate06(wttest.WiredTigerTestCase):
|
||||
self.session.commit_transaction()
|
||||
c.close()
|
||||
|
||||
def follower_visible_keys(self, forward=True):
|
||||
# Simple inline scan without a transaction wrapper to match the original behavior.
|
||||
c = self.session.open_cursor(self.uri)
|
||||
step = c.next if forward else c.prev
|
||||
keys = []
|
||||
while step() == 0:
|
||||
keys.append(c.get_key())
|
||||
c.close()
|
||||
return keys
|
||||
|
||||
# Keys in [1, nitems] minus [start, stop] (inclusive on both ends).
|
||||
def expected_keys(self, start, stop):
|
||||
return [self.key(i) for i in range(1, self.nitems + 1)
|
||||
@ -117,59 +103,59 @@ class test_layered_fast_truncate06(wttest.WiredTigerTestCase):
|
||||
|
||||
def test_bounded_range(self):
|
||||
self.setup_follower()
|
||||
self.truncate(start=30, stop=60)
|
||||
self.truncate(start_key=30, stop_key=60)
|
||||
self.assert_trunc_log(30, 60)
|
||||
self.assertEqual(self.visible_keys(), self.expected_keys(30, 60))
|
||||
self.assertEqual(self.follower_visible_keys(), self.expected_keys(30, 60))
|
||||
|
||||
def test_null_start_resolves_to_first_key(self):
|
||||
self.setup_follower()
|
||||
self.truncate(start=None, stop=60)
|
||||
self.truncate(start_key=None, stop_key=60)
|
||||
self.assert_trunc_log(1, 60)
|
||||
self.assertEqual(self.visible_keys(), self.expected_keys(1, 60))
|
||||
self.assertEqual(self.follower_visible_keys(), self.expected_keys(1, 60))
|
||||
|
||||
def test_null_stop_resolves_to_last_key(self):
|
||||
self.setup_follower()
|
||||
self.truncate(start=30, stop=None)
|
||||
self.truncate(start_key=30, stop_key=None)
|
||||
self.assert_trunc_log(30, self.nitems)
|
||||
self.assertEqual(self.visible_keys(), self.expected_keys(30, self.nitems))
|
||||
self.assertEqual(self.follower_visible_keys(), self.expected_keys(30, self.nitems))
|
||||
|
||||
def test_both_null_is_full_table(self):
|
||||
self.setup_follower()
|
||||
self.truncate(start=None, stop=None)
|
||||
self.truncate(start_key=None, stop_key=None)
|
||||
self.assert_trunc_log(1, self.nitems)
|
||||
self.assertEqual(self.visible_keys(), [])
|
||||
self.assertEqual(self.follower_visible_keys(), [])
|
||||
|
||||
# An open-ended truncate captures "end" at commit time, not dynamically. Keys appended
|
||||
# after stop should be visible.
|
||||
def test_open_ended_truncate_does_not_hide_later_appends(self):
|
||||
self.setup_follower()
|
||||
self.truncate(start=80, stop=None)
|
||||
self.truncate(start_key=80, stop_key=None)
|
||||
self.assert_trunc_log(80, self.nitems)
|
||||
self.insert_range(200, 210)
|
||||
expected = [self.key(i) for i in range(1, 80)] + \
|
||||
[self.key(i) for i in range(200, 211)]
|
||||
self.assertEqual(self.visible_keys(), expected)
|
||||
self.assertEqual(self.follower_visible_keys(), expected)
|
||||
|
||||
def test_bounded_and_end_open_ended_overlap(self):
|
||||
self.setup_follower()
|
||||
self.truncate(start=20, stop=60)
|
||||
self.truncate(start_key=20, stop_key=60)
|
||||
self.assert_trunc_log(20, 60)
|
||||
self.truncate(start=50, stop=None)
|
||||
self.truncate(start_key=50, stop_key=None)
|
||||
# key 50-60 was deleted by the first truncate; search_near positions it on the
|
||||
# nearest in-bound key, 61.
|
||||
self.assert_trunc_log(61, self.nitems)
|
||||
expected = [self.key(i) for i in range(1, 20)]
|
||||
self.assertEqual(self.visible_keys(), expected)
|
||||
self.assertEqual(self.visible_keys(forward=False), list(reversed(expected)))
|
||||
self.assertEqual(self.follower_visible_keys(), expected)
|
||||
self.assertEqual(self.follower_visible_keys(forward=False), list(reversed(expected)))
|
||||
|
||||
def test_bounded_and_start_open_ended_overlap(self):
|
||||
self.setup_follower()
|
||||
self.truncate(start=20, stop=60)
|
||||
self.truncate(start_key=20, stop_key=60)
|
||||
self.assert_trunc_log(20, 60)
|
||||
self.truncate(start=0, stop=30)
|
||||
self.truncate(start_key=0, stop_key=30)
|
||||
# key 20-30 was deleted by the first truncate; search_near positions it on the
|
||||
# nearest live key, 19.
|
||||
self.assert_trunc_log(1, 19)
|
||||
expected = [self.key(i) for i in range(61, self.nitems + 1)]
|
||||
self.assertEqual(self.visible_keys(), expected)
|
||||
self.assertEqual(self.visible_keys(forward=False), list(reversed(expected)))
|
||||
self.assertEqual(self.follower_visible_keys(), expected)
|
||||
self.assertEqual(self.follower_visible_keys(forward=False), list(reversed(expected)))
|
||||
|
||||
@ -33,68 +33,59 @@
|
||||
|
||||
from contextlib import closing
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin
|
||||
from wtscenario import make_scenarios
|
||||
import wttest
|
||||
|
||||
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate08(wttest.WiredTigerTestCase):
|
||||
class test_layered_fast_truncate08(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase):
|
||||
test_name = __qualname__
|
||||
|
||||
disagg_storages = gen_disagg_storages(test_name, disagg_only=True)
|
||||
scenarios = make_scenarios(disagg_storages)
|
||||
conn_config = 'disaggregated=(role="leader"),'
|
||||
|
||||
def setup_layered_table(self, layered_uri: str):
|
||||
uri = f"layered:{test_name}"
|
||||
|
||||
def session_create_config(self):
|
||||
return "key_format=i,value_format=u"
|
||||
|
||||
def populate(self, keys, value=b"v"):
|
||||
with closing(self.session.open_cursor(self.uri)) as cursor:
|
||||
with self.transaction():
|
||||
for key in keys:
|
||||
cursor[key] = value
|
||||
|
||||
def setup_layered_table(self):
|
||||
# Create the table and produce the initial checkpoint that the follower
|
||||
# will attach to.
|
||||
session_config = "key_format=i,value_format=u"
|
||||
self.session.create(layered_uri, session_config)
|
||||
self.session.checkpoint()
|
||||
|
||||
def setup_follower(self, layered_uri: str):
|
||||
self.reopen_disagg_conn('disaggregated=(role="follower"),')
|
||||
self.setup_leader()
|
||||
|
||||
def setup_follower(self, keys=range(100)):
|
||||
super().setup_follower()
|
||||
# Add updates on the ingest that can be truncated later.
|
||||
with closing(self.session.open_cursor(layered_uri)) as cursor:
|
||||
with self.transaction():
|
||||
for i in range(100):
|
||||
cursor[i] = b"v"
|
||||
self.populate(keys)
|
||||
|
||||
def truncate(self, layered_uri: str, start_key: int, stop_key: int):
|
||||
# Truncate between start and stop keys inclusive.
|
||||
with (
|
||||
closing(self.session.open_cursor(layered_uri)) as start_cursor,
|
||||
closing(self.session.open_cursor(layered_uri)) as stop_cursor,
|
||||
):
|
||||
start_cursor.set_key(start_key)
|
||||
stop_cursor.set_key(stop_key)
|
||||
|
||||
with self.transaction():
|
||||
self.session.truncate(None, start_cursor, stop_cursor, None)
|
||||
|
||||
def get_values(self, uri: str, start_key: int, stop_key: int):
|
||||
def get_values(self, uri, start_key, stop_key):
|
||||
# Return values of any keys between start and stop inclusive that exist.
|
||||
values = []
|
||||
|
||||
with closing(self.session.open_cursor(uri)) as cursor:
|
||||
for i in range(start_key, stop_key + 1):
|
||||
cursor.set_key(i)
|
||||
if cursor.search() == 0:
|
||||
values.append(cursor.get_value())
|
||||
|
||||
return values
|
||||
|
||||
def test_follower_truncate_writes_tombstone_to_ingest(self):
|
||||
# Set up a follower with existing ingest updates.
|
||||
layered_uri = f"layered:{self.test_name}"
|
||||
self.setup_layered_table(layered_uri)
|
||||
self.setup_follower(layered_uri)
|
||||
self.setup_layered_table()
|
||||
self.setup_follower()
|
||||
|
||||
# Truncate a range of keys.
|
||||
start_key = 20
|
||||
stop_key = 80
|
||||
self.truncate(layered_uri, start_key, stop_key)
|
||||
self.truncate(start_key, stop_key)
|
||||
|
||||
# Examine what the truncate actually wrote to the ingest file.
|
||||
ingest_uri = f"file:{self.test_name}.wt_ingest"
|
||||
|
||||
@ -28,12 +28,13 @@
|
||||
|
||||
import wiredtiger, wttest
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin
|
||||
from wtscenario import make_scenarios
|
||||
|
||||
# test_layered_fast_truncate09.py
|
||||
# Follower truncate-list visibility coverage.
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate09(wttest.WiredTigerTestCase):
|
||||
class test_layered_fast_truncate09(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase):
|
||||
|
||||
conn_config = 'disaggregated=(role="leader"),'
|
||||
|
||||
@ -49,7 +50,6 @@ class test_layered_fast_truncate09(wttest.WiredTigerTestCase):
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
|
||||
self.setup_follower()
|
||||
|
||||
def session_create_config(self):
|
||||
@ -85,7 +85,7 @@ class test_layered_fast_truncate09(wttest.WiredTigerTestCase):
|
||||
c_start.close()
|
||||
c_stop.close()
|
||||
|
||||
def search_key(self, session, key):
|
||||
def search_in(self, session, key):
|
||||
cursor = session.open_cursor(self.uri)
|
||||
cursor.set_key(key)
|
||||
ret = cursor.search()
|
||||
@ -93,7 +93,7 @@ class test_layered_fast_truncate09(wttest.WiredTigerTestCase):
|
||||
cursor.close()
|
||||
return ret, value
|
||||
|
||||
def search_near_key(self, session, key):
|
||||
def search_near_in(self, session, key):
|
||||
cursor = session.open_cursor(self.uri)
|
||||
cursor.set_key(key)
|
||||
exact = cursor.search_near()
|
||||
@ -118,9 +118,9 @@ class test_layered_fast_truncate09(wttest.WiredTigerTestCase):
|
||||
with self.transaction(session=self.session, rollback=True):
|
||||
self.truncate_range(self.session, 100, 700)
|
||||
|
||||
ret = self.search_key(self.session, 150)[0]
|
||||
ret = self.search_in(self.session, 150)[0]
|
||||
self.assertEqual(ret, wiredtiger.WT_NOTFOUND)
|
||||
exact, landed = self.search_near_key(self.session, 150)
|
||||
exact, landed = self.search_near_in(self.session, 150)
|
||||
self.assertNotEqual(exact, 0)
|
||||
if exact < 0:
|
||||
self.assertEqual(landed, 99)
|
||||
@ -136,8 +136,8 @@ class test_layered_fast_truncate09(wttest.WiredTigerTestCase):
|
||||
session2 = self.conn.open_session()
|
||||
try:
|
||||
with self.transaction(session=session2, rollback=True):
|
||||
self.assertEqual(self.search_key(session2, 150), (0, 'value'))
|
||||
self.assertEqual(self.search_near_key(session2, 150), (0, 150))
|
||||
self.assertEqual(self.search_in(session2, 150), (0, 'value'))
|
||||
self.assertEqual(self.search_near_in(session2, 150), (0, 150))
|
||||
self.assertEqual(self.next_key_after(session2, 149), 150)
|
||||
finally:
|
||||
session2.close()
|
||||
@ -145,14 +145,14 @@ class test_layered_fast_truncate09(wttest.WiredTigerTestCase):
|
||||
def test_rollback_restores_visibility(self):
|
||||
with self.transaction(session=self.session, rollback=True):
|
||||
self.truncate_range(self.session, 100, 700)
|
||||
ret = self.search_key(self.session, 150)[0]
|
||||
ret = self.search_in(self.session, 150)[0]
|
||||
self.assertEqual(ret, wiredtiger.WT_NOTFOUND)
|
||||
|
||||
session2 = self.conn.open_session()
|
||||
try:
|
||||
with self.transaction(session=session2, rollback=True):
|
||||
self.assertEqual(self.search_key(session2, 150), (0, 'value'))
|
||||
self.assertEqual(self.search_near_key(session2, 150), (0, 150))
|
||||
self.assertEqual(self.search_in(session2, 150), (0, 'value'))
|
||||
self.assertEqual(self.search_near_in(session2, 150), (0, 150))
|
||||
self.assertEqual(self.next_key_after(session2, 149), 150)
|
||||
finally:
|
||||
session2.close()
|
||||
@ -163,14 +163,14 @@ class test_layered_fast_truncate09(wttest.WiredTigerTestCase):
|
||||
session2 = self.conn.open_session()
|
||||
try:
|
||||
with self.transaction(session=session2, read_timestamp=20, rollback=True):
|
||||
self.assertEqual(self.search_key(session2, 150), (0, 'value'))
|
||||
self.assertEqual(self.search_near_key(session2, 150), (0, 150))
|
||||
self.assertEqual(self.search_in(session2, 150), (0, 'value'))
|
||||
self.assertEqual(self.search_near_in(session2, 150), (0, 150))
|
||||
self.assertEqual(self.next_key_after(session2, 149), 150)
|
||||
|
||||
with self.transaction(session=session2, read_timestamp=30, rollback=True):
|
||||
ret = self.search_key(session2, 150)[0]
|
||||
ret = self.search_in(session2, 150)[0]
|
||||
self.assertEqual(ret, wiredtiger.WT_NOTFOUND)
|
||||
exact, landed = self.search_near_key(session2, 150)
|
||||
exact, landed = self.search_near_in(session2, 150)
|
||||
self.assertNotEqual(exact, 0)
|
||||
if exact < 0:
|
||||
self.assertEqual(landed, 99)
|
||||
@ -188,16 +188,16 @@ class test_layered_fast_truncate09(wttest.WiredTigerTestCase):
|
||||
session2 = self.conn.open_session()
|
||||
try:
|
||||
with self.transaction(session=session2, read_timestamp=30, rollback=True):
|
||||
ret = self.search_key(session2, 350)[0]
|
||||
ret = self.search_in(session2, 350)[0]
|
||||
self.assertEqual(ret, wiredtiger.WT_NOTFOUND)
|
||||
self.assertEqual(self.search_key(session2, 500), (0, 'value'))
|
||||
self.assertEqual(self.search_in(session2, 500), (0, 'value'))
|
||||
|
||||
with self.transaction(session=session2, read_timestamp=40, rollback=True):
|
||||
ret = self.search_key(session2, 350)[0]
|
||||
ret = self.search_in(session2, 350)[0]
|
||||
self.assertEqual(ret, wiredtiger.WT_NOTFOUND)
|
||||
ret = self.search_key(session2, 500)[0]
|
||||
ret = self.search_in(session2, 500)[0]
|
||||
self.assertEqual(ret, wiredtiger.WT_NOTFOUND)
|
||||
exact, landed = self.search_near_key(session2, 150)
|
||||
exact, landed = self.search_near_in(session2, 150)
|
||||
self.assertNotEqual(exact, 0)
|
||||
if exact < 0:
|
||||
self.assertEqual(landed, 99)
|
||||
|
||||
@ -33,26 +33,16 @@
|
||||
# the logical union of the stable and ingest tables, independent of which
|
||||
# table any given key actually lives in.
|
||||
|
||||
from contextlib import closing
|
||||
from itertools import chain
|
||||
from typing import Iterable
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from helper_layered_fast_truncate import (
|
||||
LayeredFastTruncateConfigMixin, concat, range_inclusive,
|
||||
)
|
||||
from wtscenario import make_scenarios
|
||||
import wttest
|
||||
|
||||
|
||||
def concat(*iterables: Iterable[int]) -> list[int]:
|
||||
"""Concatenate any number of iterables into a single list."""
|
||||
return list(chain.from_iterable(iterables))
|
||||
|
||||
|
||||
def range_inclusive(start: int, stop: int) -> range:
|
||||
"""Return a range covering [start, stop] inclusive."""
|
||||
return range(start, stop + 1)
|
||||
|
||||
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate10(wttest.WiredTigerTestCase):
|
||||
class test_layered_fast_truncate10(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase):
|
||||
"""
|
||||
Data location semantics (stable vs ingest).
|
||||
|
||||
@ -70,60 +60,6 @@ class test_layered_fast_truncate10(wttest.WiredTigerTestCase):
|
||||
scenarios = make_scenarios(disagg_storages, uris)
|
||||
conn_config = 'disaggregated=(role="leader"),'
|
||||
|
||||
def session_create_config(self):
|
||||
cfg = "key_format=i,value_format=S"
|
||||
if self.uri.startswith("table"):
|
||||
cfg += ",block_manager=disagg,type=layered"
|
||||
return cfg
|
||||
|
||||
def auto_closing_cursor(self):
|
||||
"""Return a cursor that auto-closes as it goes out of scope."""
|
||||
return closing(self.session.open_cursor(self.uri))
|
||||
|
||||
def populate(self, keys: Iterable[int]):
|
||||
"""Insert each key with a placeholder value in a single transaction."""
|
||||
with self.auto_closing_cursor() as cursor:
|
||||
with self.transaction():
|
||||
for key in keys:
|
||||
cursor[key] = "v"
|
||||
|
||||
def setup_leader(self, keys: Iterable[int] | None = None):
|
||||
"""
|
||||
Create the table on the leader and optionally pre-populate stable.
|
||||
The follower will pick up these keys via the initial checkpoint.
|
||||
"""
|
||||
self.session.create(self.uri, self.session_create_config())
|
||||
if keys is not None:
|
||||
self.populate(keys)
|
||||
self.session.checkpoint()
|
||||
|
||||
def setup_follower(self, keys: Iterable[int] | None = None):
|
||||
"""Switch to follower role and optionally write keys to ingest."""
|
||||
self.reopen_disagg_conn('disaggregated=(role="follower"),')
|
||||
if keys is not None:
|
||||
self.populate(keys)
|
||||
|
||||
def truncate(self, start_key: int, stop_key: int):
|
||||
"""Truncate between start and stop keys inclusive."""
|
||||
with (
|
||||
self.auto_closing_cursor() as start_cursor,
|
||||
self.auto_closing_cursor() as stop_cursor,
|
||||
):
|
||||
start_cursor.set_key(start_key)
|
||||
stop_cursor.set_key(stop_key)
|
||||
|
||||
with self.transaction():
|
||||
self.session.truncate(None, start_cursor, stop_cursor, None)
|
||||
|
||||
def visible_keys(self) -> list[int]:
|
||||
"""Return all keys visible via a forward scan, in key order."""
|
||||
result = []
|
||||
with self.auto_closing_cursor() as cursor:
|
||||
with self.transaction(rollback=True):
|
||||
while cursor.next() == 0:
|
||||
result.append(cursor.get_key())
|
||||
return result
|
||||
|
||||
def test_truncate_range_with_both_tables_empty(self):
|
||||
# Stable and ingest are both empty.
|
||||
self.setup_leader()
|
||||
|
||||
@ -34,27 +34,17 @@
|
||||
# Open-ended truncates should not apply to keys written after the truncate
|
||||
# commits.
|
||||
|
||||
from contextlib import closing, nullcontext
|
||||
from itertools import chain
|
||||
from typing import Iterable
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from helper_layered_fast_truncate import (
|
||||
LayeredFastTruncateConfigMixin, concat, range_inclusive,
|
||||
)
|
||||
from wiredtiger import WiredTigerError
|
||||
from wtscenario import make_scenarios
|
||||
import wttest
|
||||
|
||||
|
||||
def concat(*iterables: Iterable[int]) -> list[int]:
|
||||
"""Concatenate any number of iterables into a single list."""
|
||||
return list(chain.from_iterable(iterables))
|
||||
|
||||
|
||||
def range_inclusive(start: int, stop: int) -> range:
|
||||
"""Return a range covering [start, stop] inclusive."""
|
||||
return range(start, stop + 1)
|
||||
|
||||
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate11(wttest.WiredTigerTestCase):
|
||||
class test_layered_fast_truncate11(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase):
|
||||
"""
|
||||
Range specification (start / end / open-ended).
|
||||
|
||||
@ -73,67 +63,6 @@ class test_layered_fast_truncate11(wttest.WiredTigerTestCase):
|
||||
scenarios = make_scenarios(disagg_storages, uris)
|
||||
conn_config = 'disaggregated=(role="leader"),'
|
||||
|
||||
def session_create_config(self):
|
||||
cfg = "key_format=i,value_format=S"
|
||||
if self.uri.startswith("table"):
|
||||
cfg += ",block_manager=disagg,type=layered"
|
||||
return cfg
|
||||
|
||||
def auto_closing_cursor(self) -> closing:
|
||||
"""Return a cursor that auto-closes as it goes out of scope."""
|
||||
return closing(self.session.open_cursor(self.uri))
|
||||
|
||||
def populate(self, keys: Iterable[int]):
|
||||
"""Insert each key with a placeholder value in a single transaction."""
|
||||
with self.auto_closing_cursor() as cursor:
|
||||
with self.transaction():
|
||||
for key in keys:
|
||||
cursor[key] = "v"
|
||||
|
||||
def setup_leader(self, keys: Iterable[int] | None = None):
|
||||
"""
|
||||
Create the table on the leader and optionally pre-populate stable.
|
||||
The follower will pick up these keys via the initial checkpoint.
|
||||
"""
|
||||
self.session.create(self.uri, self.session_create_config())
|
||||
if keys is not None:
|
||||
self.populate(keys)
|
||||
self.session.checkpoint()
|
||||
|
||||
def setup_follower(self, keys: Iterable[int] | None = None):
|
||||
"""Switch to follower role and optionally write keys to ingest."""
|
||||
self.reopen_disagg_conn('disaggregated=(role="follower"),')
|
||||
if keys is not None:
|
||||
self.populate(keys)
|
||||
|
||||
def cursor_for_key(self, key: int | None):
|
||||
"""Return a cursor with its key set, or None if key is None."""
|
||||
if key is None:
|
||||
return nullcontext(None) # Open-ended truncate.
|
||||
cursor = self.auto_closing_cursor()
|
||||
cursor.thing.set_key(key)
|
||||
return cursor
|
||||
|
||||
def truncate(self, start_key: int | None, stop_key: int | None):
|
||||
"""Truncate [start_key, stop_key] inclusive; None means open end."""
|
||||
with (
|
||||
self.cursor_for_key(start_key) as start,
|
||||
self.cursor_for_key(stop_key) as stop,
|
||||
):
|
||||
# WT requires a URI when both cursors are absent.
|
||||
uri = self.uri if (start is None and stop is None) else None
|
||||
with self.transaction():
|
||||
self.session.truncate(uri, start, stop, None)
|
||||
|
||||
def visible_keys(self) -> list[int]:
|
||||
"""Return all keys visible via a forward scan, in key order."""
|
||||
result = []
|
||||
with self.auto_closing_cursor() as cursor:
|
||||
with self.transaction(rollback=True):
|
||||
while cursor.next() == 0:
|
||||
result.append(cursor.get_key())
|
||||
return result
|
||||
|
||||
def test_truncate_with_null_start_key(self):
|
||||
# Set up a follower with keys 1-100.
|
||||
self.setup_leader()
|
||||
|
||||
@ -32,27 +32,16 @@
|
||||
# Verify that forward scans, backward scans, next_random, search, and
|
||||
# search_near all treat truncated keys as non-existent on a follower.
|
||||
|
||||
from contextlib import closing, nullcontext
|
||||
from itertools import chain
|
||||
from typing import Iterable
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from wiredtiger import WT_NOTFOUND
|
||||
from helper_layered_fast_truncate import (
|
||||
LayeredFastTruncateConfigMixin, concat, range_inclusive,
|
||||
)
|
||||
from wtscenario import make_scenarios
|
||||
import wttest
|
||||
|
||||
|
||||
def concat(*iterables: Iterable[int]) -> list[int]:
|
||||
"""Concatenate any number of iterables into a single list."""
|
||||
return list(chain.from_iterable(iterables))
|
||||
|
||||
|
||||
def range_inclusive(start: int, stop: int) -> range:
|
||||
"""Return a range covering [start, stop] inclusive."""
|
||||
return range(start, stop + 1)
|
||||
|
||||
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate12(wttest.WiredTigerTestCase):
|
||||
class test_layered_fast_truncate12(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase):
|
||||
"""
|
||||
Cursor iteration and searches over truncated ranges.
|
||||
|
||||
@ -69,76 +58,7 @@ class test_layered_fast_truncate12(wttest.WiredTigerTestCase):
|
||||
scenarios = make_scenarios(disagg_storages, uris)
|
||||
conn_config = 'disaggregated=(role="leader"),'
|
||||
|
||||
def session_create_config(self):
|
||||
cfg = "key_format=i,value_format=S"
|
||||
if self.uri.startswith("table"):
|
||||
cfg += ",block_manager=disagg,type=layered"
|
||||
return cfg
|
||||
|
||||
def auto_closing_cursor(self, config: str | None = None) -> closing:
|
||||
"""Return a cursor that auto-closes as it goes out of scope."""
|
||||
return closing(self.session.open_cursor(self.uri, None, config))
|
||||
|
||||
def populate(self, keys: Iterable[int]):
|
||||
"""Insert each key with a placeholder value in a single transaction."""
|
||||
with self.auto_closing_cursor() as cursor:
|
||||
with self.transaction():
|
||||
for key in keys:
|
||||
cursor[key] = "v"
|
||||
|
||||
def setup_leader(self, keys: Iterable[int] | None = None):
|
||||
"""
|
||||
Create the table on the leader and optionally pre-populate stable.
|
||||
The follower will pick up these keys via the initial checkpoint.
|
||||
"""
|
||||
self.session.create(self.uri, self.session_create_config())
|
||||
if keys is not None:
|
||||
self.populate(keys)
|
||||
self.session.checkpoint()
|
||||
|
||||
def setup_follower(self, keys: Iterable[int] | None = None):
|
||||
"""Switch to follower role and optionally write keys to ingest."""
|
||||
self.reopen_disagg_conn('disaggregated=(role="follower"),')
|
||||
if keys is not None:
|
||||
self.populate(keys)
|
||||
|
||||
def cursor_for_key(self, key: int | None):
|
||||
"""Return a cursor with its key set, or None if key is None."""
|
||||
if key is None:
|
||||
return nullcontext(None)
|
||||
cursor = self.auto_closing_cursor()
|
||||
cursor.thing.set_key(key)
|
||||
return cursor
|
||||
|
||||
def truncate(self, start_key: int | None, stop_key: int | None):
|
||||
"""Truncate [start_key, stop_key] inclusive; None means open end."""
|
||||
with (
|
||||
self.cursor_for_key(start_key) as start,
|
||||
self.cursor_for_key(stop_key) as stop,
|
||||
):
|
||||
uri = self.uri if (start is None and stop is None) else None
|
||||
with self.transaction():
|
||||
self.session.truncate(uri, start, stop, None)
|
||||
|
||||
def visible_keys(self) -> list[int]:
|
||||
"""Return all keys visible via a forward scan, in key order."""
|
||||
result = []
|
||||
with self.auto_closing_cursor() as cursor:
|
||||
with self.transaction(rollback=True):
|
||||
while cursor.next() == 0:
|
||||
result.append(cursor.get_key())
|
||||
return result
|
||||
|
||||
def backward_visible_keys(self) -> list[int]:
|
||||
"""Return all keys visible via a backward scan."""
|
||||
result = []
|
||||
with self.auto_closing_cursor() as cursor:
|
||||
with self.transaction(rollback=True):
|
||||
while cursor.prev() == 0:
|
||||
result.append(cursor.get_key())
|
||||
return result
|
||||
|
||||
def random_sample_keys(self, n: int) -> list[int]:
|
||||
def random_sample_keys(self, n):
|
||||
"""Return n keys drawn from a next_random cursor."""
|
||||
result = []
|
||||
with self.auto_closing_cursor("next_random=true") as cursor:
|
||||
@ -148,27 +68,6 @@ class test_layered_fast_truncate12(wttest.WiredTigerTestCase):
|
||||
result.append(cursor.get_key())
|
||||
return result
|
||||
|
||||
def search_key(self, key: int) -> int:
|
||||
"""Search for key; return 0 on exact match or WT_NOTFOUND."""
|
||||
with self.cursor_for_key(key) as cursor:
|
||||
with self.transaction(rollback=True):
|
||||
return cursor.search()
|
||||
|
||||
def search_near_key(self, key: int) -> tuple[int, int | None]:
|
||||
"""
|
||||
Call search_near for a key.
|
||||
|
||||
Returns (exact, found_key). exact follows WT convention: 0 = exact,
|
||||
1 = positioned above, -1 = positioned below, or WT_NOTFOUND if no
|
||||
visible keys exist.
|
||||
"""
|
||||
with self.cursor_for_key(key) as cursor:
|
||||
with self.transaction(rollback=True):
|
||||
exact = cursor.search_near()
|
||||
if exact == WT_NOTFOUND:
|
||||
return exact, None
|
||||
return exact, cursor.get_key()
|
||||
|
||||
def test_forward_scan_skips_truncated_range(self):
|
||||
# Set up a follower with keys 1-100.
|
||||
self.setup_leader()
|
||||
@ -194,7 +93,7 @@ class test_layered_fast_truncate12(wttest.WiredTigerTestCase):
|
||||
reversed(range_inclusive(61, 100)),
|
||||
reversed(range_inclusive(1, 29)),
|
||||
)
|
||||
self.assertEqual(self.backward_visible_keys(), expected)
|
||||
self.assertEqual(self.visible_keys(forward=False), expected)
|
||||
|
||||
def test_next_random_never_lands_in_truncated_range(self):
|
||||
# Set up a follower with keys 1-100.
|
||||
@ -219,7 +118,7 @@ class test_layered_fast_truncate12(wttest.WiredTigerTestCase):
|
||||
|
||||
# Searching for a key inside the truncated range should return
|
||||
# WT_NOTFOUND.
|
||||
self.assertEqual(self.search_key(45), WT_NOTFOUND)
|
||||
self.assertFalse(self.key_exists(45))
|
||||
|
||||
def test_search_at_inclusive_truncate_boundary(self):
|
||||
# Set up a follower with keys 1-100.
|
||||
@ -230,12 +129,12 @@ class test_layered_fast_truncate12(wttest.WiredTigerTestCase):
|
||||
self.truncate(30, 60)
|
||||
|
||||
# The boundary keys should be invisible.
|
||||
self.assertEqual(self.search_key(30), WT_NOTFOUND)
|
||||
self.assertEqual(self.search_key(60), WT_NOTFOUND)
|
||||
self.assertFalse(self.key_exists(30))
|
||||
self.assertFalse(self.key_exists(60))
|
||||
|
||||
# The keys just outside the truncated range should still be found.
|
||||
self.assertEqual(self.search_key(29), 0)
|
||||
self.assertEqual(self.search_key(61), 0)
|
||||
self.assertTrue(self.key_exists(29))
|
||||
self.assertTrue(self.key_exists(61))
|
||||
|
||||
def test_search_near_inside_truncated_range(self):
|
||||
# Set up a follower with keys 1-100.
|
||||
|
||||
@ -32,26 +32,16 @@
|
||||
# Verify that subsequent operations - additional truncates, per-key removes,
|
||||
# and reinsertion - compose correctly with a prior committed truncate.
|
||||
|
||||
from contextlib import closing, nullcontext
|
||||
from itertools import chain
|
||||
from typing import Iterable
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from helper_layered_fast_truncate import (
|
||||
LayeredFastTruncateConfigMixin, concat, range_inclusive,
|
||||
)
|
||||
from wtscenario import make_scenarios
|
||||
import wttest
|
||||
|
||||
|
||||
def concat(*iterables: Iterable[int]) -> list[int]:
|
||||
"""Concatenate any number of iterables into a single list."""
|
||||
return list(chain.from_iterable(iterables))
|
||||
|
||||
|
||||
def range_inclusive(start: int, stop: int) -> range:
|
||||
"""Return a range covering [start, stop] inclusive."""
|
||||
return range(start, stop + 1)
|
||||
|
||||
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate13(wttest.WiredTigerTestCase):
|
||||
class test_layered_fast_truncate13(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase):
|
||||
"""
|
||||
Interactions with existing truncates.
|
||||
|
||||
@ -68,72 +58,13 @@ class test_layered_fast_truncate13(wttest.WiredTigerTestCase):
|
||||
scenarios = make_scenarios(disagg_storages, uris)
|
||||
conn_config = 'disaggregated=(role="leader"),'
|
||||
|
||||
def session_create_config(self):
|
||||
cfg = "key_format=i,value_format=S"
|
||||
if self.uri.startswith("table"):
|
||||
cfg += ",block_manager=disagg,type=layered"
|
||||
return cfg
|
||||
|
||||
def auto_closing_cursor(self, config: str | None = None) -> closing:
|
||||
"""Return a cursor that auto-closes as it goes out of scope."""
|
||||
return closing(self.session.open_cursor(self.uri, None, config))
|
||||
|
||||
def populate(self, keys: Iterable[int]):
|
||||
"""Insert each key with a placeholder value in a single transaction."""
|
||||
with self.auto_closing_cursor() as cursor:
|
||||
with self.transaction():
|
||||
for key in keys:
|
||||
cursor[key] = "v"
|
||||
|
||||
def setup_leader(self, keys: Iterable[int] | None = None):
|
||||
"""
|
||||
Create the table on the leader and optionally pre-populate stable. The
|
||||
follower will pick up these keys via the initial checkpoint.
|
||||
"""
|
||||
self.session.create(self.uri, self.session_create_config())
|
||||
if keys is not None:
|
||||
self.populate(keys)
|
||||
self.session.checkpoint()
|
||||
|
||||
def setup_follower(self, keys: Iterable[int] | None = None):
|
||||
"""Switch to follower role and optionally write keys to ingest."""
|
||||
self.reopen_disagg_conn('disaggregated=(role="follower"),')
|
||||
if keys is not None:
|
||||
self.populate(keys)
|
||||
|
||||
def cursor_for_key(self, key: int | None):
|
||||
"""Return a cursor with its key set, or None if key is None."""
|
||||
if key is None:
|
||||
return nullcontext(None)
|
||||
cursor = self.auto_closing_cursor()
|
||||
cursor.thing.set_key(key)
|
||||
return cursor
|
||||
|
||||
def truncate(self, start_key: int | None, stop_key: int | None):
|
||||
"""Truncate [start_key, stop_key] inclusive; None means open end."""
|
||||
with (
|
||||
self.cursor_for_key(start_key) as start,
|
||||
self.cursor_for_key(stop_key) as stop,
|
||||
):
|
||||
uri = self.uri if (start is None and stop is None) else None
|
||||
with self.transaction():
|
||||
self.session.truncate(uri, start, stop, None)
|
||||
|
||||
def remove_key(self, key: int):
|
||||
def remove_key(self, key):
|
||||
"""Remove a single key in a transaction."""
|
||||
with self.cursor_for_key(key) as cursor:
|
||||
with self.auto_closing_cursor() as cursor:
|
||||
cursor.set_key(self.key(key))
|
||||
with self.transaction():
|
||||
cursor.remove()
|
||||
|
||||
def visible_keys(self) -> list[int]:
|
||||
"""Return all keys visible via a forward scan, in key order."""
|
||||
result = []
|
||||
with self.auto_closing_cursor() as cursor:
|
||||
with self.transaction(rollback=True):
|
||||
while cursor.next() == 0:
|
||||
result.append(cursor.get_key())
|
||||
return result
|
||||
|
||||
def test_per_key_removes_before_truncate(self):
|
||||
# Set up a follower with keys 1-100.
|
||||
self.setup_leader()
|
||||
@ -226,10 +157,12 @@ class test_layered_fast_truncate13(wttest.WiredTigerTestCase):
|
||||
# Truncate keys 30-60 and reinsert key 45 within the same transaction.
|
||||
with self.transaction():
|
||||
with (
|
||||
self.cursor_for_key(30) as start,
|
||||
self.cursor_for_key(60) as stop,
|
||||
self.auto_closing_cursor() as start,
|
||||
self.auto_closing_cursor() as stop,
|
||||
self.auto_closing_cursor() as cursor,
|
||||
):
|
||||
start.set_key(self.key(30))
|
||||
stop.set_key(self.key(60))
|
||||
self.session.truncate(None, start, stop, None)
|
||||
cursor[45] = "v"
|
||||
|
||||
|
||||
@ -29,15 +29,14 @@
|
||||
# test_layered_fast_truncate14.py
|
||||
# Ensure next() skips truncated stable keys after search_near lands on an ingest key.
|
||||
|
||||
from contextlib import closing
|
||||
from typing import Iterable
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin
|
||||
from wtscenario import make_scenarios
|
||||
import wttest
|
||||
|
||||
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate14(wttest.WiredTigerTestCase):
|
||||
class test_layered_fast_truncate14(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase):
|
||||
"""next() skips truncated stable keys after search_near lands on an ingest key."""
|
||||
|
||||
uris = [
|
||||
@ -49,43 +48,7 @@ class test_layered_fast_truncate14(wttest.WiredTigerTestCase):
|
||||
scenarios = make_scenarios(disagg_storages, uris)
|
||||
conn_config = 'disaggregated=(role="leader"),'
|
||||
|
||||
def session_create_config(self):
|
||||
cfg = "key_format=i,value_format=S"
|
||||
if self.uri.startswith("table"):
|
||||
cfg += ",block_manager=disagg,type=layered"
|
||||
return cfg
|
||||
|
||||
def auto_closing_cursor(self):
|
||||
return closing(self.session.open_cursor(self.uri))
|
||||
|
||||
def populate(self, keys: Iterable[int]):
|
||||
with self.auto_closing_cursor() as cursor:
|
||||
with self.transaction():
|
||||
for key in keys:
|
||||
cursor[key] = "v"
|
||||
|
||||
def setup_leader(self, keys: Iterable[int] | None = None):
|
||||
self.session.create(self.uri, self.session_create_config())
|
||||
if keys is not None:
|
||||
self.populate(keys)
|
||||
self.session.checkpoint()
|
||||
|
||||
def setup_follower(self, keys: Iterable[int] | None = None):
|
||||
self.reopen_disagg_conn('disaggregated=(role="follower"),')
|
||||
if keys is not None:
|
||||
self.populate(keys)
|
||||
|
||||
def truncate(self, start_key: int, stop_key: int):
|
||||
with (
|
||||
self.auto_closing_cursor() as start,
|
||||
self.auto_closing_cursor() as stop,
|
||||
):
|
||||
start.set_key(start_key)
|
||||
stop.set_key(stop_key)
|
||||
with self.transaction():
|
||||
self.session.truncate(None, start, stop, None)
|
||||
|
||||
def keys_after_search_near(self, search_key: int) -> list[int]:
|
||||
def keys_after_search_near(self, search_key):
|
||||
"""
|
||||
Position on search_key via search_near (must be an exact match), then
|
||||
return all keys yielded by subsequent next() calls.
|
||||
|
||||
@ -30,16 +30,14 @@
|
||||
# Validate edge scenario where no tombstones are written when ingest keys sit outside
|
||||
# the range. Follower truncate tombstones ingest keys only inside the range.
|
||||
|
||||
from contextlib import closing
|
||||
from typing import Iterable
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from wiredtiger import WT_NOTFOUND
|
||||
from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin
|
||||
from wtscenario import make_scenarios
|
||||
import wttest
|
||||
|
||||
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate15(wttest.WiredTigerTestCase):
|
||||
class test_layered_fast_truncate15(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase):
|
||||
"""Follower truncate tombstones only ingest keys inside the range."""
|
||||
|
||||
uris = [
|
||||
@ -51,65 +49,15 @@ class test_layered_fast_truncate15(wttest.WiredTigerTestCase):
|
||||
scenarios = make_scenarios(disagg_storages, uris)
|
||||
conn_config = 'disaggregated=(role="leader"),'
|
||||
|
||||
def session_create_config(self):
|
||||
cfg = "key_format=i,value_format=S"
|
||||
if self.uri.startswith("table"):
|
||||
cfg += ",block_manager=disagg,type=layered"
|
||||
return cfg
|
||||
|
||||
def auto_closing_cursor(self):
|
||||
return closing(self.session.open_cursor(self.uri))
|
||||
|
||||
def populate(self, keys: Iterable[int]):
|
||||
with self.auto_closing_cursor() as cursor:
|
||||
with self.transaction():
|
||||
for key in keys:
|
||||
cursor[key] = "v"
|
||||
|
||||
def setup_leader(self, keys: Iterable[int] | None = None):
|
||||
self.session.create(self.uri, self.session_create_config())
|
||||
if keys is not None:
|
||||
self.populate(keys)
|
||||
self.session.checkpoint()
|
||||
|
||||
def setup_follower(self, keys: Iterable[int] | None = None):
|
||||
self.reopen_disagg_conn('disaggregated=(role="follower"),')
|
||||
if keys is not None:
|
||||
self.populate(keys)
|
||||
|
||||
def truncate(self, start_key: int, stop_key: int):
|
||||
with (
|
||||
self.auto_closing_cursor() as start,
|
||||
self.auto_closing_cursor() as stop,
|
||||
):
|
||||
start.set_key(start_key)
|
||||
stop.set_key(stop_key)
|
||||
with self.transaction():
|
||||
self.session.truncate(None, start, stop, None)
|
||||
|
||||
def search_key(self, key: int) -> int:
|
||||
with self.auto_closing_cursor() as cursor:
|
||||
with self.transaction(rollback=True):
|
||||
cursor.set_key(key)
|
||||
return cursor.search()
|
||||
|
||||
def visible_keys(self) -> list[int]:
|
||||
result = []
|
||||
with self.auto_closing_cursor() as cursor:
|
||||
with self.transaction(rollback=True):
|
||||
while cursor.next() == 0:
|
||||
result.append(cursor.get_key())
|
||||
return result
|
||||
|
||||
def test_ingest_keys_flanking_range_not_tombstoned(self):
|
||||
# Ingest keys flank the range on both sides with none inside; neither should be tombstoned.
|
||||
self.setup_leader(keys=[0, 10, 20, 30])
|
||||
self.setup_follower(keys=[5, 25])
|
||||
self.truncate(10, 20)
|
||||
|
||||
self.assertEqual(self.search_key(10), WT_NOTFOUND,
|
||||
self.assertFalse(self.key_exists(10),
|
||||
"key 10 must be deleted (stable-only, inside truncate range)")
|
||||
self.assertEqual(self.search_key(25), 0,
|
||||
self.assertTrue(self.key_exists(25),
|
||||
"key 25 must be visible (ingest key, outside truncate range)")
|
||||
|
||||
def test_scan_correct_when_ingest_keys_flank_range(self):
|
||||
@ -126,10 +74,8 @@ class test_layered_fast_truncate15(wttest.WiredTigerTestCase):
|
||||
self.setup_follower(keys=[5])
|
||||
self.truncate(10, 15)
|
||||
|
||||
self.assertEqual(self.search_key(10), WT_NOTFOUND,
|
||||
"key 10 must be deleted")
|
||||
self.assertEqual(self.search_key(5), 0,
|
||||
"key 5 must be visible")
|
||||
self.assertFalse(self.key_exists(10), "key 10 must be deleted")
|
||||
self.assertTrue(self.key_exists(5), "key 5 must be visible")
|
||||
|
||||
def test_ingest_key_only_above_range(self):
|
||||
# All ingest keys are above the range; none should be tombstoned.
|
||||
@ -137,10 +83,8 @@ class test_layered_fast_truncate15(wttest.WiredTigerTestCase):
|
||||
self.setup_follower(keys=[15])
|
||||
self.truncate(5, 10)
|
||||
|
||||
self.assertEqual(self.search_key(10), WT_NOTFOUND,
|
||||
"key 10 must be deleted")
|
||||
self.assertEqual(self.search_key(15), 0,
|
||||
"key 15 must be visible")
|
||||
self.assertFalse(self.key_exists(10), "key 10 must be deleted")
|
||||
self.assertTrue(self.key_exists(15), "key 15 must be visible")
|
||||
|
||||
def test_multiple_ingest_keys_both_sides_no_ingest_in_range(self):
|
||||
# Multiple ingest keys on both sides of the range; none inside; all should stay visible.
|
||||
@ -149,10 +93,10 @@ class test_layered_fast_truncate15(wttest.WiredTigerTestCase):
|
||||
self.truncate(10, 15)
|
||||
|
||||
for k in [10, 15]:
|
||||
self.assertEqual(self.search_key(k), WT_NOTFOUND,
|
||||
self.assertFalse(self.key_exists(k),
|
||||
f"key {k} must be deleted (stable-only, inside truncate range)")
|
||||
for k in [3, 7, 18, 22]:
|
||||
self.assertEqual(self.search_key(k), 0,
|
||||
self.assertTrue(self.key_exists(k),
|
||||
f"key {k} must be visible (ingest key, outside truncate range)")
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@ -28,13 +28,14 @@
|
||||
|
||||
import wttest, wiredtiger
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin
|
||||
from wtscenario import make_scenarios
|
||||
|
||||
# test_layered_fast_truncate16.py
|
||||
# Verify that pending follower truncates land on stable when the follower steps up,
|
||||
# across the variety of per-key shapes and edge cases.
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate_stepup(wttest.WiredTigerTestCase):
|
||||
class test_layered_fast_truncate_stepup(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase):
|
||||
|
||||
conn_config = 'disaggregated=(role="leader")'
|
||||
uri = 'layered:test_layered_fast_truncate_stepup'
|
||||
@ -53,22 +54,10 @@ class test_layered_fast_truncate_stepup(wttest.WiredTigerTestCase):
|
||||
self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(ts))
|
||||
self.session.checkpoint()
|
||||
|
||||
# Open a separate follower connection, create the table on both sides, leader populates,
|
||||
# follower picks up the checkpoint. After this, follower-side ops run on session_follow.
|
||||
def setup_follower(self):
|
||||
self.conn_follow = self.wiredtiger_open(
|
||||
'follower',
|
||||
self.extensionsConfig() + ',create,disaggregated=(role="follower")')
|
||||
self.session_follow = self.conn_follow.open_session('')
|
||||
self.session.create(self.uri, 'key_format=i,value_format=S')
|
||||
self.session_follow.create(self.uri, 'key_format=i,value_format=S')
|
||||
self.populate_on_leader()
|
||||
self.disagg_advance_checkpoint(self.conn_follow)
|
||||
|
||||
# Step up the follower (which becomes the new leader) and step the original leader down.
|
||||
def step_up(self):
|
||||
self.ignoreStdoutPattern('Picking up the same checkpoint')
|
||||
self.disagg_switch_follower_and_leader(self.conn_follow)
|
||||
self.conn_follow, self.session_follow = self.open_follower()
|
||||
|
||||
def write_kv(self, key, value, ts):
|
||||
cursor = self.session_follow.open_cursor(self.uri)
|
||||
@ -97,26 +86,18 @@ class test_layered_fast_truncate_stepup(wttest.WiredTigerTestCase):
|
||||
c_stop.close()
|
||||
|
||||
def assert_visible(self, keys, value=None, ts=None):
|
||||
self.session_follow.begin_transaction('read_timestamp=' + self.timestamp_str(ts))
|
||||
cursor = self.session_follow.open_cursor(self.uri)
|
||||
for k in keys:
|
||||
cursor.set_key(k)
|
||||
self.assertEqual(cursor.search(), 0, f"key {k} should be visible at ts={ts}")
|
||||
ret, val = self.search_at(self.session_follow, k, ts)
|
||||
self.assertEqual(ret, 0, f"key {k} should be visible at ts={ts}")
|
||||
if value is not None:
|
||||
expected = value(k) if callable(value) else value
|
||||
self.assertEqual(cursor.get_value(), expected)
|
||||
cursor.close()
|
||||
self.session_follow.rollback_transaction()
|
||||
self.assertEqual(val, expected)
|
||||
|
||||
def assert_deleted(self, keys, ts):
|
||||
self.session_follow.begin_transaction('read_timestamp=' + self.timestamp_str(ts))
|
||||
cursor = self.session_follow.open_cursor(self.uri)
|
||||
for k in keys:
|
||||
cursor.set_key(k)
|
||||
self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND,
|
||||
ret, _ = self.search_at(self.session_follow, k, ts)
|
||||
self.assertEqual(ret, wiredtiger.WT_NOTFOUND,
|
||||
f"key {k} should be deleted at ts={ts}")
|
||||
cursor.close()
|
||||
self.session_follow.rollback_transaction()
|
||||
|
||||
def assert_keys_gone(self, ranges):
|
||||
# Sweep the populated key space: keys inside any (lo, hi) inclusive range must be
|
||||
|
||||
@ -28,6 +28,7 @@
|
||||
|
||||
import wiredtiger, wttest
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin
|
||||
from wtscenario import make_scenarios
|
||||
from wiredtiger import stat
|
||||
|
||||
@ -35,7 +36,7 @@ from wiredtiger import stat
|
||||
# Verify that step-up replay uses fast page truncation (WT_REF_DELETED) when
|
||||
# replaying follower truncates.
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate17(wttest.WiredTigerTestCase):
|
||||
class test_layered_fast_truncate17(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase):
|
||||
|
||||
conn_config = 'disaggregated=(role="leader")'
|
||||
uri = 'layered:test_layered_ft_replay'
|
||||
@ -45,12 +46,6 @@ class test_layered_fast_truncate17(wttest.WiredTigerTestCase):
|
||||
disagg_storages = gen_disagg_storages('test_layered_ft_replay', disagg_only=True)
|
||||
scenarios = make_scenarios(disagg_storages)
|
||||
|
||||
def get_stat(self, conn, stat_key):
|
||||
s = conn.open_session('')
|
||||
val = s.open_cursor('statistics:')[stat_key][2]
|
||||
s.close()
|
||||
return val
|
||||
|
||||
def populate_on_leader(self, ts=10):
|
||||
cursor = self.session.open_cursor(self.uri)
|
||||
for i in range(self.nitems):
|
||||
@ -58,23 +53,12 @@ class test_layered_fast_truncate17(wttest.WiredTigerTestCase):
|
||||
cursor[i] = 'v'
|
||||
self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(ts))
|
||||
cursor.close()
|
||||
self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(ts) +
|
||||
',oldest_timestamp=' + self.timestamp_str(1))
|
||||
self.session.checkpoint()
|
||||
self.leader_checkpoint(ts)
|
||||
|
||||
def setup_follower(self):
|
||||
self.conn_follow = self.wiredtiger_open(
|
||||
'follower',
|
||||
self.extensionsConfig() + ',create,statistics=(all),disaggregated=(role="follower")')
|
||||
self.session_follow = self.conn_follow.open_session('')
|
||||
self.session.create(self.uri, self.table_config)
|
||||
self.session_follow.create(self.uri, self.table_config)
|
||||
self.populate_on_leader()
|
||||
self.disagg_advance_checkpoint(self.conn_follow)
|
||||
|
||||
def step_up(self):
|
||||
self.ignoreStdoutPattern('Picking up the same checkpoint')
|
||||
self.disagg_switch_follower_and_leader(self.conn_follow)
|
||||
self.conn_follow, self.session_follow = self.open_follower(self.table_config)
|
||||
|
||||
def truncate_range(self, start_key, stop_key, ts):
|
||||
c_start = self.session_follow.open_cursor(self.uri)
|
||||
|
||||
@ -30,22 +30,16 @@
|
||||
# Write conflict detection for follower fast truncate (truncate-truncate
|
||||
# conflicts only).
|
||||
|
||||
import unittest
|
||||
from contextlib import closing, nullcontext
|
||||
from typing import Iterable
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from helper_layered_fast_truncate import LayeredFastTruncateConfigMixin, range_inclusive
|
||||
from wiredtiger import WiredTigerError
|
||||
from wtscenario import make_scenarios
|
||||
import wttest
|
||||
|
||||
|
||||
def range_inclusive(start: int, stop: int) -> range:
|
||||
"""Return a range covering [start, stop] inclusive."""
|
||||
return range(start, stop + 1)
|
||||
|
||||
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate18(wttest.WiredTigerTestCase):
|
||||
class test_layered_fast_truncate18(LayeredFastTruncateConfigMixin, wttest.WiredTigerTestCase):
|
||||
"""
|
||||
Write conflict detection for follower fast truncate (truncate-truncate
|
||||
conflicts only).
|
||||
@ -62,51 +56,32 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase):
|
||||
|
||||
CONFLICT_MSG = "/conflict between concurrent operations/"
|
||||
|
||||
def session_create_config(self) -> str:
|
||||
"""Return a config string for session.create() based on table URI."""
|
||||
cfg = "key_format=i,value_format=S"
|
||||
if self.uri.startswith("table"):
|
||||
cfg += ",block_manager=disagg,type=layered"
|
||||
return cfg
|
||||
# These helpers are local to 18 because they all take an explicit session
|
||||
# (the conflict tests drive two sessions concurrently). The equivalent
|
||||
# mixin helpers are bound to self.session and so are not reusable here.
|
||||
|
||||
def auto_closing_cursor(self, session) -> closing:
|
||||
"""Return a cursor that auto-closes as it goes out of scope."""
|
||||
def cursor_on(self, session):
|
||||
"""Return a cursor on the given session that auto-closes."""
|
||||
return closing(session.open_cursor(self.uri))
|
||||
|
||||
def auto_closing_session(self) -> closing:
|
||||
def auto_closing_session(self):
|
||||
"""Return a session that auto-closes as it goes out of scope."""
|
||||
return closing(self.conn.open_session())
|
||||
|
||||
def populate(self, keys: Iterable[int]):
|
||||
"""Insert each key with a placeholder value in a single transaction."""
|
||||
with self.auto_closing_cursor(self.session) as cursor:
|
||||
with self.transaction():
|
||||
for key in keys:
|
||||
cursor[key] = "v"
|
||||
|
||||
def setup_leader(self, keys: Iterable[int] | None = None):
|
||||
"""Create the table on the leader and optionally populate stable."""
|
||||
self.session.create(self.uri, self.session_create_config())
|
||||
if keys is not None:
|
||||
self.populate(keys)
|
||||
self.session.checkpoint()
|
||||
|
||||
def setup_follower(self, keys: Iterable[int] | None = None):
|
||||
"""Switch to follower role and optionally write keys to ingest."""
|
||||
self.reopen_disagg_conn('disaggregated=(role="follower"),')
|
||||
if keys is not None:
|
||||
self.populate(keys)
|
||||
|
||||
def cursor_for_key(self, key: int | None, session):
|
||||
def cursor_for_key(self, key, session):
|
||||
"""Return a cursor with its key set, or None if key is None."""
|
||||
if key is None:
|
||||
return nullcontext(None)
|
||||
cursor = self.auto_closing_cursor(session)
|
||||
cursor = self.cursor_on(session)
|
||||
cursor.thing.set_key(key)
|
||||
return cursor
|
||||
|
||||
def truncate(self, session, start_key: int | None, stop_key: int | None):
|
||||
"""Execute a truncate from start to stop key inclusive."""
|
||||
def truncate_on(self, session, start_key, stop_key):
|
||||
"""
|
||||
Truncate [start_key, stop_key] inclusive on the given session.
|
||||
Caller manages the transaction (the conflict tests inspect the
|
||||
truncate's failure/success inside a hand-managed txn).
|
||||
"""
|
||||
with (
|
||||
self.cursor_for_key(start_key, session) as start,
|
||||
self.cursor_for_key(stop_key, session) as stop,
|
||||
@ -121,8 +96,8 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase):
|
||||
|
||||
# Within a single transaction: truncate 30-60, then truncate 40-80.
|
||||
with self.transaction():
|
||||
self.truncate(self.session, 30, 60)
|
||||
self.truncate(self.session, 40, 80)
|
||||
self.truncate_on(self.session, 30, 60)
|
||||
self.truncate_on(self.session, 40, 80)
|
||||
|
||||
# The transaction committed; no WT_ROLLBACK raised.
|
||||
|
||||
@ -134,7 +109,7 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase):
|
||||
# txn A begins a truncate over 30-60 and leaves it uncommitted.
|
||||
session_a = self.session
|
||||
session_a.begin_transaction()
|
||||
self.truncate(session_a, 30, 60)
|
||||
self.truncate_on(session_a, 30, 60)
|
||||
|
||||
# txn B truncates overlapping range 40-70 and gets WT_ROLLBACK.
|
||||
with (
|
||||
@ -143,7 +118,7 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase):
|
||||
):
|
||||
self.assertRaisesException(
|
||||
WiredTigerError,
|
||||
lambda: self.truncate(session_b, 40, 70),
|
||||
lambda: self.truncate_on(session_b, 40, 70),
|
||||
self.CONFLICT_MSG,
|
||||
)
|
||||
|
||||
@ -155,7 +130,7 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase):
|
||||
# txn A begins a truncate over 30-60 and leaves it uncommitted.
|
||||
session_a = self.session
|
||||
session_a.begin_transaction()
|
||||
self.truncate(session_a, 30, 60)
|
||||
self.truncate_on(session_a, 30, 60)
|
||||
|
||||
# txn B truncates overlapping range 40-70 and gets WT_ROLLBACK.
|
||||
with (
|
||||
@ -164,7 +139,7 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase):
|
||||
):
|
||||
self.assertRaisesException(
|
||||
WiredTigerError,
|
||||
lambda: self.truncate(session_b, 40, 70),
|
||||
lambda: self.truncate_on(session_b, 40, 70),
|
||||
self.CONFLICT_MSG,
|
||||
)
|
||||
|
||||
@ -176,14 +151,14 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase):
|
||||
# txn A truncates 10-30 and leaves it uncommitted.
|
||||
session_a = self.session
|
||||
session_a.begin_transaction()
|
||||
self.truncate(session_a, 10, 30)
|
||||
self.truncate_on(session_a, 10, 30)
|
||||
|
||||
# txn B truncates 50-70 (no overlap) and commits successfully.
|
||||
with (
|
||||
self.auto_closing_session() as session_b,
|
||||
self.transaction(session=session_b),
|
||||
):
|
||||
self.truncate(session_b, 50, 70)
|
||||
self.truncate_on(session_b, 50, 70)
|
||||
|
||||
def test_rolled_back_truncate_no_residual(self):
|
||||
# A follower with stable keys 1-100.
|
||||
@ -193,14 +168,14 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase):
|
||||
# txn A truncates 30-60 then explicitly rolls back.
|
||||
session_a = self.session
|
||||
with self.transaction(session=session_a, rollback=True):
|
||||
self.truncate(session_a, 30, 60)
|
||||
self.truncate_on(session_a, 30, 60)
|
||||
|
||||
# txn B truncates the same range 30-60 and commits without WT_ROLLBACK.
|
||||
with (
|
||||
self.auto_closing_session() as session_b,
|
||||
self.transaction(session=session_b),
|
||||
):
|
||||
self.truncate(session_b, 30, 60)
|
||||
self.truncate_on(session_b, 30, 60)
|
||||
|
||||
def test_invisible_committed_truncate_conflicts(self):
|
||||
# A follower with stable keys 1-100.
|
||||
@ -210,7 +185,7 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase):
|
||||
# txn A commits a truncate over 30-60 at ts=10 (invisible to txn B).
|
||||
self.conn.set_timestamp("oldest_timestamp=" + self.timestamp_str(1))
|
||||
with self.transaction(commit_timestamp=10):
|
||||
self.truncate(self.session, 30, 60)
|
||||
self.truncate_on(self.session, 30, 60)
|
||||
|
||||
# txn B (read_ts=5) truncates overlapping range 40-70 and gets
|
||||
# WT_ROLLBACK.
|
||||
@ -222,7 +197,7 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase):
|
||||
):
|
||||
self.assertRaisesException(
|
||||
WiredTigerError,
|
||||
lambda: self.truncate(session_b, 40, 70),
|
||||
lambda: self.truncate_on(session_b, 40, 70),
|
||||
self.CONFLICT_MSG,
|
||||
)
|
||||
|
||||
@ -234,7 +209,7 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase):
|
||||
# txn A commits a truncate over 30-60 at ts=5 (visible to txn B).
|
||||
self.conn.set_timestamp("oldest_timestamp=" + self.timestamp_str(1))
|
||||
with self.transaction(commit_timestamp=5):
|
||||
self.truncate(self.session, 30, 60)
|
||||
self.truncate_on(self.session, 30, 60)
|
||||
|
||||
# txn B (read_ts=10) truncates overlapping range 40-70 without
|
||||
# WT_ROLLBACK.
|
||||
@ -242,7 +217,7 @@ class test_layered_fast_truncate18(wttest.WiredTigerTestCase):
|
||||
self.auto_closing_session() as session_b,
|
||||
self.transaction(session=session_b, read_timestamp=10),
|
||||
):
|
||||
self.truncate(session_b, 40, 70)
|
||||
self.truncate_on(session_b, 40, 70)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
112
src/third_party/wiredtiger/test/suite/test_layered_prepare03.py
vendored
Normal file
112
src/third_party/wiredtiger/test/suite/test_layered_prepare03.py
vendored
Normal file
@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Public Domain 2014-present MongoDB, Inc.
|
||||
# Public Domain 2008-2014 WiredTiger, Inc.
|
||||
#
|
||||
# This is free and unencumbered software released into the public domain.
|
||||
#
|
||||
# Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
# distribute this software, either in source code form or as a compiled
|
||||
# binary, for any purpose, commercial or non-commercial, and by any
|
||||
# means.
|
||||
#
|
||||
# In jurisdictions that recognize copyright laws, the author or authors
|
||||
# of this software dedicate any and all copyright interest in the
|
||||
# software to the public domain. We make this dedication for the benefit
|
||||
# of the public at large and to the detriment of our heirs and
|
||||
# successors. We intend this dedication to be an overt act of
|
||||
# relinquishment in perpetuity of all present and future rights to this
|
||||
# software under copyright law.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
import wiredtiger, wttest
|
||||
from helper_disagg import disagg_test_class
|
||||
|
||||
# test_layered_prepare03.py
|
||||
# Forward iteration on a layered cursor after the very first next() returns
|
||||
# WT_PREPARE_CONFLICT must resume correctly and return all visible keys.
|
||||
|
||||
@disagg_test_class
|
||||
class test_layered_prepare03(wttest.WiredTigerTestCase):
|
||||
|
||||
conn_base_config = 'precise_checkpoint=true,'
|
||||
conn_config = conn_base_config + 'disaggregated=(role="leader")'
|
||||
|
||||
def safe_next(self, cursor):
|
||||
try:
|
||||
return cursor.next()
|
||||
except wiredtiger.WiredTigerError as e:
|
||||
if 'WT_PREPARE_CONFLICT' in str(e):
|
||||
return wiredtiger.WT_PREPARE_CONFLICT
|
||||
raise
|
||||
|
||||
def test_iterate_after_prepare_conflict_on_first_key(self):
|
||||
'''
|
||||
A layered cursor that encounters WT_PREPARE_CONFLICT on its very first
|
||||
next() call must resume from the beginning after the conflict is resolved
|
||||
and return all stable keys.
|
||||
'''
|
||||
uri = 'table:test_layered_prepare03'
|
||||
stable_keys = ['1', '2', '3']
|
||||
|
||||
# Write stable keys on the leader and checkpoint.
|
||||
self.session.create(
|
||||
uri, 'key_format=S,value_format=S,block_manager=disagg,type=layered')
|
||||
with self.transaction(session=self.session, commit_timestamp=100):
|
||||
c = self.session.open_cursor(uri)
|
||||
for k in stable_keys:
|
||||
c[k] = 'stable_' + k
|
||||
c.close()
|
||||
self.conn.set_timestamp(f'stable_timestamp={self.timestamp_str(200)}')
|
||||
self.session.checkpoint()
|
||||
|
||||
# Open a follower and pull in the stable checkpoint.
|
||||
conn_follow = self.wiredtiger_open(
|
||||
'follower',
|
||||
self.extensionsConfig() + ',create,' + self.conn_base_config +
|
||||
'disaggregated=(role="follower")')
|
||||
self.disagg_advance_checkpoint(conn_follow)
|
||||
|
||||
# Prepare an ingest update for key '1' so that the first next() on the
|
||||
# layered cursor returns WT_PREPARE_CONFLICT.
|
||||
prep_session = conn_follow.open_session('')
|
||||
prep_cursor = prep_session.open_cursor(uri)
|
||||
prep_session.begin_transaction()
|
||||
prep_cursor['1'] = 'prepared_update'
|
||||
prep_cursor.close()
|
||||
prep_session.prepare_transaction(
|
||||
f'prepare_timestamp={self.timestamp_str(300)}'
|
||||
+ f',prepared_id={self.prepared_id_str(1)}')
|
||||
|
||||
# Read-committed isolation: the transaction sees the prepared update as
|
||||
# a conflict on the very first next() call.
|
||||
iter_session = conn_follow.open_session('')
|
||||
iter_session.begin_transaction('isolation=read-committed')
|
||||
iter_cursor = iter_session.open_cursor(uri)
|
||||
|
||||
# First next() must hit the prepared key and return WT_PREPARE_CONFLICT.
|
||||
self.assertEqual(self.safe_next(iter_cursor), wiredtiger.WT_PREPARE_CONFLICT)
|
||||
|
||||
# Resolve the conflict and verify that subsequent iteration returns all
|
||||
# stable keys from the beginning.
|
||||
prep_session.rollback_transaction()
|
||||
|
||||
got = []
|
||||
ret = iter_cursor.next()
|
||||
while ret == 0:
|
||||
got.append(iter_cursor.get_key())
|
||||
ret = iter_cursor.next()
|
||||
self.assertEqual(ret, wiredtiger.WT_NOTFOUND)
|
||||
self.assertEqual(got, stable_keys)
|
||||
|
||||
iter_cursor.close()
|
||||
iter_session.rollback_transaction()
|
||||
prep_session.close()
|
||||
conn_follow.close()
|
||||
@ -87,7 +87,7 @@ class test_prepare35(test_prepare_preserve_prepare_base):
|
||||
session_evict.close()
|
||||
|
||||
# Step 4: Rollback the first prepared transaction
|
||||
# This prepends a globally visible tombstone
|
||||
# This appends a globally visible tombstone to the tail of the update chain
|
||||
session_prepare.rollback_transaction("rollback_timestamp=" + self.timestamp_str(35))
|
||||
session_prepare.close()
|
||||
|
||||
|
||||
@ -160,13 +160,13 @@ class test_prepare47(wttest.WiredTigerTestCase):
|
||||
evict_session.close()
|
||||
|
||||
def test_aborted_prepared_with_lost_disk_fallback(self):
|
||||
# Theory: at rollback time, first_committed_upd is NULL (no committed update behind
|
||||
# the prepared insert) but tw_found is true (on-disk cell with stop is the fallback),
|
||||
# so __txn_prepare_rollback_delete_key is not called and no rollback tombstone is
|
||||
# prepended. Later, a reconcile drops the on-disk cell (its stop is globally visible
|
||||
# and nothing is selected for the key), erasing the only fallback. A subsequent
|
||||
# reconcile that walks the surviving aborted prepared update has neither a rollback
|
||||
# tombstone nor an on-disk fallback, tripping the leaked-prepared-update assertion.
|
||||
# Theory: at rollback time there is no committed update behind the prepared insert,
|
||||
# but there is an on-disk cell with a stop that serves as the fallback, so no rollback
|
||||
# tombstone is appended to the chain. Later, a reconcile drops the on-disk cell (its
|
||||
# stop is globally visible and nothing is selected for the key), erasing the only
|
||||
# fallback. A subsequent reconcile that walks the surviving aborted prepared update has
|
||||
# neither a rollback tombstone nor an on-disk fallback, tripping the
|
||||
# leaked-prepared-update assertion.
|
||||
insert_ts = 20
|
||||
delete_ts = 30
|
||||
oldest_after_delete = 31
|
||||
@ -224,8 +224,9 @@ class test_prepare47(wttest.WiredTigerTestCase):
|
||||
self.conn.set_timestamp(
|
||||
'stable_timestamp=' + self.timestamp_str(stable_unstable))
|
||||
|
||||
# Roll back with rollback_ts ahead of stable; first_committed_upd is NULL but
|
||||
# tw_found is true so no rollback tombstone is prepended.
|
||||
# Roll back with rollback_ts ahead of stable; there is no committed update behind the
|
||||
# prepared insert but the on-disk cell exists, so no rollback tombstone is appended to
|
||||
# the chain.
|
||||
self.session.rollback_transaction(
|
||||
'rollback_timestamp=' + self.timestamp_str(rollback_ts))
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user