Import wiredtiger: 6f3dbbf2ed12faffad4a3e274d012c61e58874f5 from branch mongodb-master (#54433)
GitOrigin-RevId: 36bf4d0bf6ecb584bdbe873d873f281bc5cc72a6
This commit is contained in:
parent
e7ead1c8cf
commit
c5ae18cad2
21
src/third_party/wiredtiger/.devcontainer/Dockerfile
vendored
Normal file
21
src/third_party/wiredtiger/.devcontainer/Dockerfile
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
FROM ubuntu:26.04
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
aspell aspell-en ca-certificates ccache clang clang-format clang-tidy \
|
||||
clangd cmake curl doxygen ed file g++ gcc gdb git liblz4-dev libsnappy-dev \
|
||||
libsodium-dev libzstd-dev lld lldb llvm make ninja-build python-is-python3 \
|
||||
python3 python3-dev python3-pip python3-venv swig universal-ctags \
|
||||
zlib1g-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN useradd -m -s /bin/bash wiredtiger
|
||||
USER wiredtiger
|
||||
|
||||
RUN python -m venv /home/wiredtiger/venv
|
||||
ENV PATH="/home/wiredtiger/venv/bin:$PATH"
|
||||
|
||||
RUN pip install --no-cache-dir \
|
||||
find_libpython==0.4.0 gcovr==5.0 psutil==5.9.4 ruff==0.4.5
|
||||
|
||||
WORKDIR /workdir
|
||||
21
src/third_party/wiredtiger/.devcontainer/devcontainer.json
vendored
Normal file
21
src/third_party/wiredtiger/.devcontainer/devcontainer.json
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "WiredTiger",
|
||||
"build": {
|
||||
"dockerfile": "Dockerfile"
|
||||
},
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"ms-python.python",
|
||||
"ms-vscode.cmake-tools",
|
||||
"ms-vscode.cpptools",
|
||||
"redhat.vscode-yaml"
|
||||
],
|
||||
"settings": {
|
||||
"C_Cpp.intelliSenseEngine": "disabled",
|
||||
"clangd.path": "clangd"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
1
src/third_party/wiredtiger/.gitignore
vendored
1
src/third_party/wiredtiger/.gitignore
vendored
@ -12,7 +12,6 @@ cscope.out
|
||||
cscope.po.out
|
||||
build/
|
||||
build_*/
|
||||
dist/clang-format
|
||||
/docs/
|
||||
test-compatibility-run/
|
||||
COMPATIBILITY_TEST/
|
||||
|
||||
14
src/third_party/wiredtiger/cmake/README.md
vendored
14
src/third_party/wiredtiger/cmake/README.md
vendored
@ -58,6 +58,20 @@ choco install swig
|
||||
choco install python --pre
|
||||
```
|
||||
|
||||
##### Docker
|
||||
|
||||
Alternatively, the repository ships a Dockerfile in `.devcontainer/Dockerfile`
|
||||
with all required and optional build dependencies pre-installed. Build the image
|
||||
and mount the repository:
|
||||
|
||||
```bash
|
||||
docker build -t wiredtiger-dev .devcontainer
|
||||
docker run -it -v "$PWD":/workdir wiredtiger-dev
|
||||
```
|
||||
|
||||
The `.devcontainer/devcontainer.json` also makes this image usable as a
|
||||
[Dev Container](https://containers.dev/) in supporting editors (e.g. VS Code).
|
||||
|
||||
### Building the WiredTiger Library
|
||||
|
||||
Building the WiredTiger library is relatively straightforward. Navigate to the top level of the WiredTiger repository and run the following commands:
|
||||
|
||||
32
src/third_party/wiredtiger/dist/s_clang_format
vendored
32
src/third_party/wiredtiger/dist/s_clang_format
vendored
@ -6,38 +6,16 @@ setup_trap
|
||||
cd_top
|
||||
check_fast_mode_flag
|
||||
|
||||
venv="$TOP_DIR/.venv"
|
||||
|
||||
download_clang_format() {
|
||||
version=$1
|
||||
arch_and_os="$(uname -m)-$(uname)"
|
||||
archive=dist/clang-format.tar.gz
|
||||
|
||||
# Adding more clang-format binaries requires uploading them to boxes.10gen
|
||||
# You can either get the clang-format binary from the llvm releases page
|
||||
# (https://github.com/llvm/llvm-project/releases) or compile clang-format yourself.
|
||||
# Place the binary in dist/ and confirm that s_clang_format runs correctly, then
|
||||
# tar a folder containing just the clang-format binary with the format:
|
||||
# clang-format-llvm-${version}-${arch_and_os}/
|
||||
# clang-format
|
||||
# into a tarball named clang-format-llvm-${version}-${arch_and_os}.tar.gz
|
||||
# The tarball should extract using the tar command below.
|
||||
# This tarball can then be uploaded via a Jira request to the BUILD team.
|
||||
if [[ "$arch_and_os" =~ ^("aarch64-Linux"|"x86_64-Darwin"|"arm64-Darwin"|"x86_64-Linux")$ ]] ; then
|
||||
curl -s https://s3.amazonaws.com/boxes.10gen.com/build/clang-format-llvm-"$version"-"$arch_and_os".tar.gz -o $archive
|
||||
tar --strip=1 -C dist/ -xf $archive clang-format-llvm-"$version"-"$arch_and_os"/clang-format && rm $archive
|
||||
chmod +x ./dist/clang-format
|
||||
|
||||
if [[ "$arch_and_os" =~ ^("x86_64-Darwin"|"arm64-Darwin")$ ]] ; then
|
||||
# Needed to get around the macOS code signing issue.
|
||||
xattr -c ./dist/clang-format
|
||||
fi
|
||||
else
|
||||
echo "$0: unsupported architecture and OS combination '$arch_and_os' to run clang_format"
|
||||
return 1
|
||||
fi
|
||||
python3 -m venv "$venv"
|
||||
"$venv/bin/pip" install --quiet --disable-pip-version-check "clang-format==$version"
|
||||
}
|
||||
|
||||
# Override existing Clang-Format versions in the PATH.
|
||||
export PATH="${PWD}/dist":$PATH
|
||||
export PATH="$venv/bin":$PATH
|
||||
|
||||
# Check if Clang-Format is already available with the desired version.
|
||||
desired_version="12.0.1"
|
||||
|
||||
31
src/third_party/wiredtiger/dist/s_docs
vendored
31
src/third_party/wiredtiger/dist/s_docs
vendored
@ -194,17 +194,28 @@ setup_doxygen()
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Do not build the documentation if doxygen version is not compatible.
|
||||
# Select Doxyfile based on doxygen major.minor version.
|
||||
v=$(doxygen --version)
|
||||
case "$v" in
|
||||
1.8.17) doxyfile="Doxyfile";;
|
||||
1.9.1) doxyfile="Doxyfile.9";;
|
||||
1.9.3) doxyfile="Doxyfile.9";;
|
||||
1.11.*) doxyfile="Doxyfile.11";;
|
||||
*)
|
||||
echo "$0 skipped: unsupported version of doxygen: $v, not 1.8.17, 1.9.1, 1.9.3 or 1.11.*"
|
||||
exit 0
|
||||
esac
|
||||
major=$(cut -d. -f1 <<< "$v")
|
||||
minor=$(cut -d. -f2 <<< "$v")
|
||||
|
||||
# Doxygen < 1.8:
|
||||
if [ "$major" -lt 1 ] || { [ "$major" -eq 1 ] && [ "$minor" -lt 8 ]; }; then
|
||||
echo "$0 failed: doxygen $v is too old, need 1.8 or later"
|
||||
exit 1
|
||||
|
||||
# Doxygen 1.8:
|
||||
elif [ "$major" -eq 1 ] && [ "$minor" -eq 8 ]; then
|
||||
doxyfile="Doxyfile"
|
||||
|
||||
# Doxygen 1.9 - 1.10:
|
||||
elif [ "$major" -eq 1 ] && [ "$minor" -lt 11 ]; then
|
||||
doxyfile="Doxyfile.9"
|
||||
|
||||
# Doxygen >= 1.11:
|
||||
else
|
||||
doxyfile="Doxyfile.11"
|
||||
fi
|
||||
}
|
||||
|
||||
build()
|
||||
|
||||
1
src/third_party/wiredtiger/dist/stat_data.py
vendored
1
src/third_party/wiredtiger/dist/stat_data.py
vendored
@ -418,6 +418,7 @@ conn_stats = [
|
||||
# Note eviction_server_evict_attempt - eviction_server_evict_fail = evict page successes by eviction server.
|
||||
EvictStat('eviction_server_skip_checkpointing_trees', 'eviction server skips trees that are being checkpointed'),
|
||||
EvictStat('eviction_server_skip_dirty_pages_during_checkpoint', 'eviction server skips dirty pages during a running checkpoint'),
|
||||
EvictStat('eviction_server_skip_disagg_trees_checkpointed', 'eviction server skips disaggregated trees already visited by the ongoing checkpoint'),
|
||||
EvictStat('eviction_server_skip_history_store_pages_with_updates_during_checkpoint', 'eviction server skips clean history store pages with updates when a precise checkpoint is in progress'),
|
||||
EvictStat('eviction_server_skip_ingest_trees', 'eviction server skips ingest btrees in disagg'),
|
||||
EvictStat('eviction_server_skip_intl_page_non_aggressive', 'eviction server skipped the internal pages if eviction is not in aggressive mode'),
|
||||
|
||||
2
src/third_party/wiredtiger/import.data
vendored
2
src/third_party/wiredtiger/import.data
vendored
@ -2,5 +2,5 @@
|
||||
"vendor": "wiredtiger",
|
||||
"github": "wiredtiger/wiredtiger",
|
||||
"branch": "mongodb-master",
|
||||
"commit": "3c6ec61a01a4dbdb3351121c0a624d3996e28c03"
|
||||
"commit": "6f3dbbf2ed12faffad4a3e274d012c61e58874f5"
|
||||
}
|
||||
|
||||
@ -1202,10 +1202,6 @@ typedef int int_void;
|
||||
};
|
||||
%enddef
|
||||
|
||||
SIDESTEP_METHOD(__wt_page_log, pl_begin_checkpoint,
|
||||
(WT_SESSION *session, int checkpoint_id),
|
||||
(self, session, checkpoint_id))
|
||||
|
||||
SIDESTEP_METHOD(__wt_page_log, pl_complete_checkpoint,
|
||||
(WT_SESSION *session, WT_PAGE_LOG_COMPLETE_CHECKPOINT_ARGS *args),
|
||||
(self, session, args))
|
||||
|
||||
@ -105,15 +105,14 @@ __wt_conn_calc_read_load(WT_SESSION_IMPL *session)
|
||||
uint8_t load;
|
||||
|
||||
load_control = &S2C(session)->load_control;
|
||||
/*
|
||||
* Avoid division by zero if the cache size has not yet been set in a shared cache.
|
||||
*/
|
||||
bytes_max = load_control->read_load_max;
|
||||
bytes_inuse = __wt_cache_bytes_inuse(S2C(session)->cache);
|
||||
if (F_ISSET(load_control, WT_CONN_LOAD_CONTROL)) {
|
||||
bytes_max = load_control->read_load_max;
|
||||
bytes_inuse = __wt_cache_bytes_inuse(S2C(session)->cache);
|
||||
|
||||
load = __conn_calc_load_pct(bytes_inuse, bytes_max);
|
||||
__wt_atomic_store_uint8_relaxed(&load_control->read_load, load);
|
||||
WT_STAT_CONN_SET(session, read_load, load);
|
||||
load = __conn_calc_load_pct(bytes_inuse, bytes_max);
|
||||
__wt_atomic_store_uint8_relaxed(&load_control->read_load, load);
|
||||
WT_STAT_CONN_SET(session, read_load, load);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@ -128,15 +127,14 @@ __wt_conn_calc_write_load(WT_SESSION_IMPL *session)
|
||||
uint64_t bytes_dirty, bytes_max;
|
||||
uint8_t load;
|
||||
|
||||
/*
|
||||
* Avoid division by zero if the cache size has not yet been set in a shared cache.
|
||||
*/
|
||||
load_control = &S2C(session)->load_control;
|
||||
bytes_max = load_control->write_load_max;
|
||||
bytes_dirty = __wt_cache_dirty_inuse(S2C(session)->cache);
|
||||
if (F_ISSET(load_control, WT_CONN_LOAD_CONTROL)) {
|
||||
bytes_max = load_control->write_load_max;
|
||||
bytes_dirty = __wt_cache_dirty_inuse(S2C(session)->cache);
|
||||
|
||||
load = __conn_calc_load_pct(bytes_dirty, bytes_max);
|
||||
__wt_atomic_store_uint8_relaxed(&load_control->write_load, load);
|
||||
WT_STAT_CONN_SET(session, write_load, load);
|
||||
load = __conn_calc_load_pct(bytes_dirty, bytes_max);
|
||||
__wt_atomic_store_uint8_relaxed(&load_control->write_load, load);
|
||||
WT_STAT_CONN_SET(session, write_load, load);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2106,7 +2106,7 @@ __clayered_put(WT_SESSION_IMPL *session, WT_CURSOR_LAYERED *clayered, const WT_I
|
||||
|
||||
if (!leader) {
|
||||
/*
|
||||
* FIXME-WT-16812: Investigate whether this function can be called below the cursor layer.
|
||||
* FIXME-WT-17425: Investigate whether this function can be called below the cursor layer.
|
||||
* Doing so would remove the cursor write operation dependency on the truncate list.
|
||||
*/
|
||||
WT_RET(__wt_layered_table_truncate_detect_write_conflict(
|
||||
@ -2188,7 +2188,7 @@ __clayered_remove_follower(
|
||||
WT_RET(__clayered_reset_cursors(clayered, true));
|
||||
|
||||
/*
|
||||
* FIXME-WT-16812: Investigate whether this function can be called below the cursor layer. Doing
|
||||
* FIXME-WT-17425: Investigate whether this function can be called below the cursor layer. Doing
|
||||
* so would remove the write cursor operations dependency on the truncate list.
|
||||
*/
|
||||
WT_RET(__wt_layered_table_truncate_detect_write_conflict(
|
||||
|
||||
@ -703,7 +703,7 @@ Get an empty page instead and mark it instantiated.}
|
||||
Return \c WT_NOTFOUND instead of reading the page if we are skipping deleted pages.}
|
||||
|
||||
@row{bt_split.c, Hook, in \c __split_parent_discard_ref,
|
||||
Discard the \c page_del field of the }\c WT_REF.
|
||||
Discard the \c page_del field of the \c WT_REF.}
|
||||
@row{bt_split.c, Hook, in \c __split_parent,
|
||||
For VLCS trees\, avoid discarding the leftmost child even if it's deleted.}
|
||||
|
||||
|
||||
@ -330,7 +330,7 @@ __wti_evict_walk(WT_SESSION_IMPL *session, WTI_EVICT_QUEUE *queue)
|
||||
uint32_t evict_walk_period;
|
||||
u_int loop_count, max_entries, retries, slot, start_slot;
|
||||
u_int total_candidates;
|
||||
bool dhandle_list_locked;
|
||||
bool aggressive, dhandle_list_locked;
|
||||
|
||||
WT_TRACK_OP_INIT(session);
|
||||
|
||||
@ -429,13 +429,29 @@ retry:
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip disaggregated btrees that have already been visited by the ongoing checkpoint when
|
||||
* we are looking only for dirty pages and the cache is not under pressure. Every modified
|
||||
* page in such a tree belongs to the next checkpoint and would fail the post-lock recheck,
|
||||
* so walking only inflates the worker failure rate. When looking for clean or update pages,
|
||||
* or when eviction is aggressive, walk anyway: any candidates the workers can lay hands on
|
||||
* are better than starving the cache.
|
||||
*/
|
||||
aggressive = __wt_evict_aggressive(session);
|
||||
if (!F_ISSET(evict, WT_EVICT_CACHE_CLEAN | WT_EVICT_CACHE_UPDATES) && !aggressive &&
|
||||
__wt_btree_disagg_checkpointed(session, btree)) {
|
||||
WT_STAT_CONN_INCR(session, eviction_server_skip_disagg_trees_checkpointed);
|
||||
__evict_disagg_btree_skip_count(session, btree);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip files that are configured to stick in cache until we become aggressive.
|
||||
*
|
||||
* If the file is contributing heavily to our cache usage then ignore the "stickiness" of
|
||||
* its pages.
|
||||
*/
|
||||
if (btree->evict_priority != 0 && !__wt_evict_aggressive(session) &&
|
||||
if (btree->evict_priority != 0 && !aggressive &&
|
||||
!__evict_btree_dominating_cache(session, btree)) {
|
||||
WT_STAT_CONN_INCR(session, eviction_server_skip_trees_stick_in_cache);
|
||||
__evict_disagg_btree_skip_count(session, btree);
|
||||
@ -524,7 +540,7 @@ retry:
|
||||
* If eviction is not in aggressive mode, sleep a bit to give the checkpoint thread a
|
||||
* chance to gather its handles.
|
||||
*/
|
||||
if (F_ISSET_ATOMIC_32(conn, WT_CONN_CKPT_GATHER) && !__wt_evict_aggressive(session)) {
|
||||
if (F_ISSET_ATOMIC_32(conn, WT_CONN_CKPT_GATHER) && !aggressive) {
|
||||
__wt_sleep(0, 10);
|
||||
WT_STAT_CONN_INCR(session, eviction_walk_sleeps);
|
||||
}
|
||||
|
||||
@ -2242,6 +2242,21 @@ __wt_btree_can_discard(WT_SESSION_IMPL *session)
|
||||
return (__wt_materialization_check(session, rec_lsn_max));
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_btree_disagg_checkpointed --
|
||||
* Return true when a disaggregated btree has been visited by the current global checkpoint and
|
||||
* that checkpoint is still running. While this holds, every modified page in the btree belongs
|
||||
* to the next checkpoint and cannot be evicted.
|
||||
*/
|
||||
static WT_INLINE bool
|
||||
__wt_btree_disagg_checkpointed(WT_SESSION_IMPL *session, WT_BTREE *btree)
|
||||
{
|
||||
return (F_ISSET(btree, WT_BTREE_DISAGGREGATED) &&
|
||||
__wt_atomic_load_uint64_acquire(&btree->checkpoint_gen) ==
|
||||
__wt_gen(session, WT_GEN_CHECKPOINT) &&
|
||||
__wt_atomic_load_bool_v_acquire(&S2C(session)->txn_global.checkpoint_running));
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_page_can_evict --
|
||||
* Check whether a page can be evicted.
|
||||
@ -2252,8 +2267,7 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
|
||||
WT_BTREE *btree;
|
||||
WT_PAGE *page;
|
||||
WT_PAGE_MODIFY *mod;
|
||||
uint64_t checkpoint_gen;
|
||||
bool checkpoint_running, modified;
|
||||
bool modified;
|
||||
|
||||
if (inmem_splitp != NULL)
|
||||
*inmem_splitp = false;
|
||||
@ -2374,16 +2388,10 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
|
||||
* It is safe to evict when checkpoint is not running because we have opened a new checkpoint
|
||||
* before we set the checkpoint running flag to false.
|
||||
*/
|
||||
if (modified && F_ISSET(btree, WT_BTREE_DISAGGREGATED) && !WT_SESSION_BTREE_SYNC(session)) {
|
||||
checkpoint_gen = __wt_atomic_load_uint64_acquire(&btree->checkpoint_gen);
|
||||
if (checkpoint_gen == __wt_gen(session, WT_GEN_CHECKPOINT)) {
|
||||
checkpoint_running =
|
||||
__wt_atomic_load_bool_v_acquire(&S2C(session)->txn_global.checkpoint_running);
|
||||
if (checkpoint_running) {
|
||||
WT_STAT_CONN_DSRC_INCR(session, cache_eviction_blocked_disagg_next_checkpoint);
|
||||
return (false);
|
||||
}
|
||||
}
|
||||
if (modified && !WT_SESSION_BTREE_SYNC(session) &&
|
||||
__wt_btree_disagg_checkpointed(session, btree)) {
|
||||
WT_STAT_CONN_DSRC_INCR(session, cache_eviction_blocked_disagg_next_checkpoint);
|
||||
return (false);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@ -1988,6 +1988,8 @@ static WT_INLINE bool __wt_block_eligible_for_sweep(WT_BM *bm, WT_BLOCK *block)
|
||||
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
static WT_INLINE bool __wt_btree_can_discard(WT_SESSION_IMPL *session)
|
||||
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
static WT_INLINE bool __wt_btree_disagg_checkpointed(WT_SESSION_IMPL *session, WT_BTREE *btree)
|
||||
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
static WT_INLINE bool __wt_btree_syncing_by_other_sessions(WT_SESSION_IMPL *session)
|
||||
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
static WT_INLINE bool __wt_cache_full(WT_SESSION_IMPL *session)
|
||||
|
||||
@ -576,6 +576,7 @@ struct __wt_connection_stats {
|
||||
int64_t eviction_root_pages_skipped;
|
||||
int64_t eviction_server_skip_history_store_pages_with_updates_during_checkpoint;
|
||||
int64_t eviction_server_skip_dirty_pages_during_checkpoint;
|
||||
int64_t eviction_server_skip_disagg_trees_checkpointed;
|
||||
int64_t eviction_server_skip_ingest_trees;
|
||||
int64_t eviction_server_skip_intl_page_with_active_child;
|
||||
int64_t eviction_server_skip_metatdata_with_history;
|
||||
|
||||
1738
src/third_party/wiredtiger/src/include/wiredtiger.h.in
vendored
1738
src/third_party/wiredtiger/src/include/wiredtiger.h.in
vendored
File diff suppressed because it is too large
Load Diff
@ -2023,6 +2023,7 @@ static const char *const __stats_connection_desc[] = {
|
||||
"cache: eviction server skips clean history store pages with updates when a precise checkpoint "
|
||||
"is in progress",
|
||||
"cache: eviction server skips dirty pages during a running checkpoint",
|
||||
"cache: eviction server skips disaggregated trees already visited by the ongoing checkpoint",
|
||||
"cache: eviction server skips ingest btrees in disagg",
|
||||
"cache: eviction server skips internal pages as it has an active child",
|
||||
"cache: eviction server skips metadata pages with history",
|
||||
@ -3107,6 +3108,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
|
||||
stats->eviction_root_pages_skipped = 0;
|
||||
stats->eviction_server_skip_history_store_pages_with_updates_during_checkpoint = 0;
|
||||
stats->eviction_server_skip_dirty_pages_during_checkpoint = 0;
|
||||
stats->eviction_server_skip_disagg_trees_checkpointed = 0;
|
||||
stats->eviction_server_skip_ingest_trees = 0;
|
||||
stats->eviction_server_skip_intl_page_with_active_child = 0;
|
||||
stats->eviction_server_skip_metatdata_with_history = 0;
|
||||
@ -4179,6 +4181,8 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
|
||||
from, eviction_server_skip_history_store_pages_with_updates_during_checkpoint);
|
||||
to->eviction_server_skip_dirty_pages_during_checkpoint +=
|
||||
WT_STAT_CONN_READ(from, eviction_server_skip_dirty_pages_during_checkpoint);
|
||||
to->eviction_server_skip_disagg_trees_checkpointed +=
|
||||
WT_STAT_CONN_READ(from, eviction_server_skip_disagg_trees_checkpointed);
|
||||
to->eviction_server_skip_ingest_trees +=
|
||||
WT_STAT_CONN_READ(from, eviction_server_skip_ingest_trees);
|
||||
to->eviction_server_skip_intl_page_with_active_child +=
|
||||
|
||||
@ -10,8 +10,8 @@
|
||||
|
||||
/*
|
||||
* Selects which truncate-list entries __truncate_search considers: those visible to the calling
|
||||
* transaction (committed truncates we may need to honor) or those not visible (uncommitted
|
||||
* truncates that may conflict with our writes).
|
||||
* transaction (committed and within its read timestamp) or those not visible (uncommitted, or
|
||||
* committed at a timestamp beyond its read timestamp) that may conflict with our writes.
|
||||
*/
|
||||
typedef enum { WT_TRUNCATE_SEARCH_VISIBLE, WT_TRUNCATE_SEARCH_NOT_VISIBLE } WT_TRUNCATE_SEARCH_MODE;
|
||||
|
||||
@ -259,15 +259,14 @@ __truncate_search(WT_SESSION_IMPL *session, WT_LAYERED_TABLE *layered_table, con
|
||||
TAILQ_FOREACH (entry, &layered_table->truncateqh, q) {
|
||||
WT_STAT_CONN_INCR(session, layered_truncate_list_search_entries_walked);
|
||||
|
||||
if (mode == WT_TRUNCATE_SEARCH_VISIBLE) {
|
||||
wt_timestamp_t start_ts, durable_ts;
|
||||
__truncate_read_entry_timestamps(entry, &start_ts, &durable_ts);
|
||||
if (!__wt_txn_visible(session, entry->txn_id, start_ts, durable_ts))
|
||||
continue;
|
||||
} else if (mode == WT_TRUNCATE_SEARCH_NOT_VISIBLE) {
|
||||
if (__wt_txn_visible_id(session, entry->txn_id))
|
||||
continue;
|
||||
}
|
||||
wt_timestamp_t start_ts, durable_ts;
|
||||
__truncate_read_entry_timestamps(entry, &start_ts, &durable_ts);
|
||||
|
||||
const bool visible = __wt_txn_visible(session, entry->txn_id, start_ts, durable_ts);
|
||||
const bool want_visible = (mode == WT_TRUNCATE_SEARCH_VISIBLE);
|
||||
|
||||
if (visible != want_visible)
|
||||
continue;
|
||||
|
||||
WT_RET(__key_within_truncate_range(
|
||||
session, collator, &entry->start_key, &entry->stop_key, key, is_foundp));
|
||||
@ -287,7 +286,7 @@ __truncate_search(WT_SESSION_IMPL *session, WT_LAYERED_TABLE *layered_table, con
|
||||
* Search if the current key we are modifying conflicts with any uncommitted truncates in the
|
||||
* layered table truncate list.
|
||||
*
|
||||
* FIXME-WT-16812: Investigate whether this function can be called below the cursor layer. Doing so
|
||||
* FIXME-WT-17425: Investigate whether this function can be called below the cursor layer. Doing so
|
||||
* would remove the write cursor operations dependency on the truncate list.
|
||||
*/
|
||||
int
|
||||
@ -347,7 +346,10 @@ __wt_layered_table_truncate_detect_non_ingest_write_conflict(WT_SESSION_IMPL *se
|
||||
TAILQ_FOREACH (entry, &layered_table->truncateqh, q) {
|
||||
WT_STAT_CONN_INCR(session, layered_truncate_list_search_entries_walked);
|
||||
|
||||
if (__wt_txn_visible_id(session, entry->txn_id))
|
||||
wt_timestamp_t start_ts, durable_ts;
|
||||
__truncate_read_entry_timestamps(entry, &start_ts, &durable_ts);
|
||||
|
||||
if (__wt_txn_visible(session, entry->txn_id, start_ts, durable_ts))
|
||||
continue;
|
||||
|
||||
/* Does the new range start within an existing range? */
|
||||
|
||||
1069
src/third_party/wiredtiger/test/catch2/truncate/test_truncate_write_conflict.cpp
vendored
Normal file
1069
src/third_party/wiredtiger/test/catch2/truncate/test_truncate_write_conflict.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -92,13 +92,26 @@ class test_config09(wttest.WiredTigerTestCase):
|
||||
self.assertEqual(val, 1024)
|
||||
|
||||
self.update_tables()
|
||||
val = self.get_stat(stat.conn.checkpoint_handle_applied)
|
||||
applied = self.get_stat(stat.conn.checkpoint_handle_applied)
|
||||
# We cannot assert it is equal to half because there could be other
|
||||
# internal tables in the count. Assert it is less than 75% and at least
|
||||
# half.
|
||||
self.assertGreaterEqual(val, self.ntables // 2)
|
||||
self.assertLess(val, self.ntables // 4 * 3)
|
||||
val = self.get_stat(stat.conn.checkpoint_handle_skipped)
|
||||
self.assertNotEqual(val, 0)
|
||||
self.assertGreaterEqual(applied, self.ntables // 2)
|
||||
self.assertLess(applied, self.ntables // 4 * 3)
|
||||
skipped = self.get_stat(stat.conn.checkpoint_handle_skipped)
|
||||
self.assertNotEqual(skipped, 0)
|
||||
|
||||
# The handle stats should be reset at the start of each gather and
|
||||
# then set to the per-checkpoint value. Two back-to-back checkpoints
|
||||
# over the same working set must therefore publish the same value;
|
||||
# if the reset is missing, the second value will be roughly double
|
||||
# the first.
|
||||
locked_1 = self.get_stat(stat.conn.checkpoint_handle_locked)
|
||||
meta_checked_1 = self.get_stat(stat.conn.checkpoint_handle_meta_checked)
|
||||
self.session.checkpoint()
|
||||
locked_2 = self.get_stat(stat.conn.checkpoint_handle_locked)
|
||||
meta_checked_2 = self.get_stat(stat.conn.checkpoint_handle_meta_checked)
|
||||
self.assertEqual(locked_1, locked_2)
|
||||
self.assertEqual(meta_checked_1, meta_checked_2)
|
||||
|
||||
self.conn.close()
|
||||
|
||||
249
src/third_party/wiredtiger/test/suite/test_layered_fast_truncate18.py
vendored
Normal file
249
src/third_party/wiredtiger/test/suite/test_layered_fast_truncate18.py
vendored
Normal file
@ -0,0 +1,249 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Public Domain 2014-present MongoDB, Inc.
|
||||
# Public Domain 2008-2014 WiredTiger, Inc.
|
||||
#
|
||||
# This is free and unencumbered software released into the public domain.
|
||||
#
|
||||
# Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
# distribute this software, either in source code form or as a compiled
|
||||
# binary, for any purpose, commercial or non-commercial, and by any
|
||||
# means.
|
||||
#
|
||||
# In jurisdictions that recognize copyright laws, the author or authors
|
||||
# of this software dedicate any and all copyright interest in the
|
||||
# software to the public domain. We make this dedication for the benefit
|
||||
# of the public at large and to the detriment of our heirs and
|
||||
# successors. We intend this dedication to be an overt act of
|
||||
# relinquishment in perpetuity of all present and future rights to this
|
||||
# software under copyright law.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
# test_layered_fast_truncate18.py
|
||||
# Write conflict detection for follower fast truncate (truncate-truncate
|
||||
# conflicts only).
|
||||
|
||||
import unittest
|
||||
from contextlib import closing, nullcontext
|
||||
from typing import Iterable
|
||||
from helper_disagg import disagg_test_class, gen_disagg_storages
|
||||
from wiredtiger import WiredTigerError
|
||||
from wtscenario import make_scenarios
|
||||
import wttest
|
||||
|
||||
|
||||
def range_inclusive(start: int, stop: int) -> range:
|
||||
"""Return a range covering [start, stop] inclusive."""
|
||||
return range(start, stop + 1)
|
||||
|
||||
|
||||
@disagg_test_class
|
||||
class test_layered_fast_truncate18(wttest.WiredTigerTestCase):
|
||||
"""
|
||||
Write conflict detection for follower fast truncate (truncate-truncate
|
||||
conflicts only).
|
||||
"""
|
||||
|
||||
uris = [
|
||||
("layered", {"uri": "layered:fast_truncate"}),
|
||||
("table", {"uri": "table:fast_truncate"}),
|
||||
]
|
||||
|
||||
disagg_storages = gen_disagg_storages(disagg_only=True)
|
||||
scenarios = make_scenarios(disagg_storages, uris)
|
||||
conn_config = 'disaggregated=(role="leader"),'
|
||||
|
||||
CONFLICT_MSG = "/conflict between concurrent operations/"
|
||||
|
||||
def session_create_config(self) -> str:
|
||||
"""Return a config string for session.create() based on table URI."""
|
||||
cfg = "key_format=i,value_format=S"
|
||||
if self.uri.startswith("table"):
|
||||
cfg += ",block_manager=disagg,type=layered"
|
||||
return cfg
|
||||
|
||||
def auto_closing_cursor(self, session) -> closing:
|
||||
"""Return a cursor that auto-closes as it goes out of scope."""
|
||||
return closing(session.open_cursor(self.uri))
|
||||
|
||||
def auto_closing_session(self) -> closing:
|
||||
"""Return a session that auto-closes as it goes out of scope."""
|
||||
return closing(self.conn.open_session())
|
||||
|
||||
def populate(self, keys: Iterable[int]):
|
||||
"""Insert each key with a placeholder value in a single transaction."""
|
||||
with self.auto_closing_cursor(self.session) as cursor:
|
||||
with self.transaction():
|
||||
for key in keys:
|
||||
cursor[key] = "v"
|
||||
|
||||
def setup_leader(self, keys: Iterable[int] | None = None):
|
||||
"""Create the table on the leader and optionally populate stable."""
|
||||
self.session.create(self.uri, self.session_create_config())
|
||||
if keys is not None:
|
||||
self.populate(keys)
|
||||
self.session.checkpoint()
|
||||
|
||||
def setup_follower(self, keys: Iterable[int] | None = None):
|
||||
"""Switch to follower role and optionally write keys to ingest."""
|
||||
self.reopen_disagg_conn('disaggregated=(role="follower"),')
|
||||
if keys is not None:
|
||||
self.populate(keys)
|
||||
|
||||
def cursor_for_key(self, key: int | None, session):
|
||||
"""Return a cursor with its key set, or None if key is None."""
|
||||
if key is None:
|
||||
return nullcontext(None)
|
||||
cursor = self.auto_closing_cursor(session)
|
||||
cursor.thing.set_key(key)
|
||||
return cursor
|
||||
|
||||
def truncate(self, session, start_key: int | None, stop_key: int | None):
|
||||
"""Execute a truncate from start to stop key inclusive."""
|
||||
with (
|
||||
self.cursor_for_key(start_key, session) as start,
|
||||
self.cursor_for_key(stop_key, session) as stop,
|
||||
):
|
||||
uri = self.uri if (start is None and stop is None) else None
|
||||
session.truncate(uri, start, stop, None)
|
||||
|
||||
def test_same_txn_truncates_no_self_conflict(self):
|
||||
# A follower with stable keys 1-100.
|
||||
self.setup_leader(keys=range_inclusive(1, 100))
|
||||
self.setup_follower()
|
||||
|
||||
# Within a single transaction: truncate 30-60, then truncate 40-80.
|
||||
with self.transaction():
|
||||
self.truncate(self.session, 30, 60)
|
||||
self.truncate(self.session, 40, 80)
|
||||
|
||||
# The transaction committed; no WT_ROLLBACK raised.
|
||||
|
||||
def test_overlapping_truncates_conflict_with_ingest(self):
|
||||
# A follower with stable keys 1-100 and ingest key 45.
|
||||
self.setup_leader(keys=range_inclusive(1, 100))
|
||||
self.setup_follower(keys=[45])
|
||||
|
||||
# txn A begins a truncate over 30-60 and leaves it uncommitted.
|
||||
session_a = self.session
|
||||
session_a.begin_transaction()
|
||||
self.truncate(session_a, 30, 60)
|
||||
|
||||
# txn B truncates overlapping range 40-70 and gets WT_ROLLBACK.
|
||||
with (
|
||||
self.auto_closing_session() as session_b,
|
||||
self.transaction(session=session_b, rollback=True),
|
||||
):
|
||||
self.assertRaisesException(
|
||||
WiredTigerError,
|
||||
lambda: self.truncate(session_b, 40, 70),
|
||||
self.CONFLICT_MSG,
|
||||
)
|
||||
|
||||
def test_overlapping_truncates_conflict_no_ingest(self):
|
||||
# A follower with stable keys 1-100 and an empty ingest table.
|
||||
self.setup_leader(keys=range_inclusive(1, 100))
|
||||
self.setup_follower()
|
||||
|
||||
# txn A begins a truncate over 30-60 and leaves it uncommitted.
|
||||
session_a = self.session
|
||||
session_a.begin_transaction()
|
||||
self.truncate(session_a, 30, 60)
|
||||
|
||||
# txn B truncates overlapping range 40-70 and gets WT_ROLLBACK.
|
||||
with (
|
||||
self.auto_closing_session() as session_b,
|
||||
self.transaction(session=session_b, rollback=True),
|
||||
):
|
||||
self.assertRaisesException(
|
||||
WiredTigerError,
|
||||
lambda: self.truncate(session_b, 40, 70),
|
||||
self.CONFLICT_MSG,
|
||||
)
|
||||
|
||||
def test_non_overlapping_truncates_no_conflict(self):
|
||||
# A follower with stable keys 1-100.
|
||||
self.setup_leader(keys=range_inclusive(1, 100))
|
||||
self.setup_follower()
|
||||
|
||||
# txn A truncates 10-30 and leaves it uncommitted.
|
||||
session_a = self.session
|
||||
session_a.begin_transaction()
|
||||
self.truncate(session_a, 10, 30)
|
||||
|
||||
# txn B truncates 50-70 (no overlap) and commits successfully.
|
||||
with (
|
||||
self.auto_closing_session() as session_b,
|
||||
self.transaction(session=session_b),
|
||||
):
|
||||
self.truncate(session_b, 50, 70)
|
||||
|
||||
def test_rolled_back_truncate_no_residual(self):
|
||||
# A follower with stable keys 1-100.
|
||||
self.setup_leader(keys=range_inclusive(1, 100))
|
||||
self.setup_follower()
|
||||
|
||||
# txn A truncates 30-60 then explicitly rolls back.
|
||||
session_a = self.session
|
||||
with self.transaction(session=session_a, rollback=True):
|
||||
self.truncate(session_a, 30, 60)
|
||||
|
||||
# txn B truncates the same range 30-60 and commits without WT_ROLLBACK.
|
||||
with (
|
||||
self.auto_closing_session() as session_b,
|
||||
self.transaction(session=session_b),
|
||||
):
|
||||
self.truncate(session_b, 30, 60)
|
||||
|
||||
def test_invisible_committed_truncate_conflicts(self):
|
||||
# A follower with stable keys 1-100.
|
||||
self.setup_leader(keys=range_inclusive(1, 100))
|
||||
self.setup_follower()
|
||||
|
||||
# txn A commits a truncate over 30-60 at ts=10 (invisible to txn B).
|
||||
self.conn.set_timestamp("oldest_timestamp=" + self.timestamp_str(1))
|
||||
with self.transaction(commit_timestamp=10):
|
||||
self.truncate(self.session, 30, 60)
|
||||
|
||||
# txn B (read_ts=5) truncates overlapping range 40-70 and gets
|
||||
# WT_ROLLBACK.
|
||||
with (
|
||||
self.auto_closing_session() as session_b,
|
||||
self.transaction(
|
||||
session=session_b, read_timestamp=5, rollback=True
|
||||
),
|
||||
):
|
||||
self.assertRaisesException(
|
||||
WiredTigerError,
|
||||
lambda: self.truncate(session_b, 40, 70),
|
||||
self.CONFLICT_MSG,
|
||||
)
|
||||
|
||||
def test_visible_committed_truncate_no_conflict(self):
|
||||
# A follower with stable keys 1-100.
|
||||
self.setup_leader(keys=range_inclusive(1, 100))
|
||||
self.setup_follower()
|
||||
|
||||
# txn A commits a truncate over 30-60 at ts=5 (visible to txn B).
|
||||
self.conn.set_timestamp("oldest_timestamp=" + self.timestamp_str(1))
|
||||
with self.transaction(commit_timestamp=5):
|
||||
self.truncate(self.session, 30, 60)
|
||||
|
||||
# txn B (read_ts=10) truncates overlapping range 40-70 without
|
||||
# WT_ROLLBACK.
|
||||
with (
|
||||
self.auto_closing_session() as session_b,
|
||||
self.transaction(session=session_b, read_timestamp=10),
|
||||
):
|
||||
self.truncate(session_b, 40, 70)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
wttest.run()
|
||||
Loading…
Reference in New Issue
Block a user