SERVER-79547 Introduce snowball libstemmer_c import script (#29667)
GitOrigin-RevId: 8a52f04b302e9dfaa8f52e42b0db560111927a86
This commit is contained in:
parent
056dacce1a
commit
6aa2cbb084
@ -21,57 +21,57 @@ not authored by MongoDB, and has a license which requires reproduction,
|
||||
a notice will be included in
|
||||
`THIRD-PARTY-NOTICES`.
|
||||
|
||||
| Name | License | Vendored Version | Emits persisted data | Distributed in Release Binaries |
|
||||
| ---------------------------------------------------- | --------------------------------------------------------------------------------------------------- | -------------------------- | -------------------- | ------------------------------- |
|
||||
| [Abseil] | Apache-2.0 | 20230802.1 | | ✗ |
|
||||
| [arximboldi/immer] | BSL-1.0 | Unknown | | ✗ |
|
||||
| [Asio C++ Library] | BSL-1.0 | 1.12.2 | | ✗ |
|
||||
| [benchmark] | Apache-2.0 | v1.5.2 | | |
|
||||
| [Boost C++ Libraries - boost] | BSL-1.0 | 1.79.0 | | ✗ |
|
||||
| [c-ares] | MIT | 1.19.1 | | ✗ |
|
||||
| [concurrencytest] | GPL-3.0-or-later | 0.1.2 | unknown | |
|
||||
| [Cyrus SASL] | BSD-Attribution-HPND-disclaimer | 2.1.28 | unknown | |
|
||||
| [dcleblanc/SafeInt] | MIT | 3.0.26 | | ✗ |
|
||||
| [derickr/timelib] | MIT | 2022.10 | | ✗ |
|
||||
| [discover] | BSD-3-Clause | 0.4.0 | unknown | |
|
||||
| [fmtlib/fmt] | MIT | 7.1.3 | | ✗ |
|
||||
| [google-re2] | BSD-3-Clause | 2023-11-01 | | ✗ |
|
||||
| [google-snappy] | BSD-3-Clause | 1.1.10 | ✗ | ✗ |
|
||||
| [google/s2geometry] | Apache-2.0 | Unknown | ✗ | ✗ |
|
||||
| [gperftools] | BSD-3-Clause | 2.9.1 | | ✗ |
|
||||
| [grpc] | Apache-2.0 | 1.59.2 | | ✗ |
|
||||
| [ICU for C/C++ (ICU4C)] | BSD-3-Clause, MIT v2 with Ad Clause License, Public Domain, BSD-2-Clause | 57.1 | ✗ | ✗ |
|
||||
| [Intel Decimal Floating-Point Math Library] | BSD-3-Clause | v2.0 U1 | | ✗ |
|
||||
| [jbeder/yaml-cpp] | MIT | 0.6.3 | | ✗ |
|
||||
| [JSON-Schema-Test-Suite] | Unknown License | Unknown | | |
|
||||
| [libmongocrypt] | Apache-2.0 | 1.12.0 | ✗ | ✗ |
|
||||
| [librdkafka - the Apache Kafka C/C++ client library] | BSD-3-Clause, Xmlproc License, ISC, MIT, Public Domain, Zlib, BSD-2-Clause, Andreas Stolcke License | 2.0.2 | | ✗ |
|
||||
| [LibTomCrypt] | WTFPL, Public Domain | 1.18.2 | ✗ | ✗ |
|
||||
| [libunwind/libunwind] | MIT | v1.6.2 | | ✗ |
|
||||
| [linenoise] | BSD-2-Clause | Unknown | | ✗ |
|
||||
| [MongoDB C Driver] | Apache-2.0 | 1.27.6 | ✗ | ✗ |
|
||||
| [Mozilla Firefox] | MPL-2.0 | 115.7.0esr | unknown | ✗ |
|
||||
| [nlohmann.json.decomposed] | MIT | 3.10.5 | unknown | |
|
||||
| [node] | ISC | 22.1.0 | unknown | |
|
||||
| [ocspbuilder] | MIT | 0.10.2 | | |
|
||||
| [ocspresponder] | Apache-2.0 | 0.5.0 | | |
|
||||
| [PCRE2] | BSD-3-Clause, Public Domain | 10.40 | | ✗ |
|
||||
| [Protobuf] | BSD-3-Clause | v4.25.0 | | ✗ |
|
||||
| [pyiso8601] | MIT | 2.1.0 | unknown | |
|
||||
| [RoaringBitmap/CRoaring] | Unknown License | v3.0.1 | | ✗ |
|
||||
| [SchemaStore/schemastore] | Apache-2.0 | Unknown | | |
|
||||
| [SCons - a Software Construction tool] | MIT | 3.1.2 | | ✗ |
|
||||
| [smhasher] | Unknown License | Unknown | unknown | ✗ |
|
||||
| [Snowball Stemming Algorithms] | BSD-3-Clause | Unknown | unknown | ✗ |
|
||||
| [subunit] | BSD-3-Clause, Apache-2.0 | 1.4.4 | unknown | |
|
||||
| [tcmalloc] | Apache-2.0 | 20230227-snapshot-093ba93c | | ✗ |
|
||||
| [testing-cabal/extras] | MIT | 0.0.3 | unknown | |
|
||||
| [testscenarios] | BSD-3-Clause, Apache-2.0 | 0.4 | unknown | |
|
||||
| [testtools] | MIT | 2.7.1 | unknown | |
|
||||
| [unicode-data] | Unicode-DFS-2016 | 8.0 | ✗ | ✗ |
|
||||
| [valgrind] | GPL-2.0-or-later | Unknown | | ✗ |
|
||||
| [zlib] | Zlib | v1.3 | ✗ | ✗ |
|
||||
| [zstd] | BSD-3-Clause, GPL-2.0-or-later | 1.5.5 | ✗ | ✗ |
|
||||
| Name | License | Vendored Version | Emits persisted data | Distributed in Release Binaries |
|
||||
| ---------------------------------------------------- | --------------------------------------------------------------------------------------------------- | ---------------------------------------- | -------------------- | ------------------------------- |
|
||||
| [Abseil] | Apache-2.0 | 20230802.1 | | ✗ |
|
||||
| [arximboldi/immer] | BSL-1.0 | Unknown | | ✗ |
|
||||
| [Asio C++ Library] | BSL-1.0 | 1.12.2 | | ✗ |
|
||||
| [benchmark] | Apache-2.0 | v1.5.2 | | |
|
||||
| [Boost C++ Libraries - boost] | BSL-1.0 | 1.79.0 | | ✗ |
|
||||
| [c-ares] | MIT | 1.19.1 | | ✗ |
|
||||
| [concurrencytest] | GPL-3.0-or-later | 0.1.2 | unknown | |
|
||||
| [Cyrus SASL] | BSD-Attribution-HPND-disclaimer | 2.1.28 | unknown | |
|
||||
| [dcleblanc/SafeInt] | MIT | 3.0.26 | | ✗ |
|
||||
| [derickr/timelib] | MIT | 2022.10 | | ✗ |
|
||||
| [discover] | BSD-3-Clause | 0.4.0 | unknown | |
|
||||
| [fmtlib/fmt] | MIT | 7.1.3 | | ✗ |
|
||||
| [google-re2] | BSD-3-Clause | 2023-11-01 | | ✗ |
|
||||
| [google-snappy] | BSD-3-Clause | 1.1.10 | ✗ | ✗ |
|
||||
| [google/s2geometry] | Apache-2.0 | Unknown | ✗ | ✗ |
|
||||
| [gperftools] | BSD-3-Clause | 2.9.1 | | ✗ |
|
||||
| [grpc] | Apache-2.0 | 1.59.2 | | ✗ |
|
||||
| [ICU for C/C++ (ICU4C)] | BSD-3-Clause, MIT v2 with Ad Clause License, Public Domain, BSD-2-Clause | 57.1 | ✗ | ✗ |
|
||||
| [Intel Decimal Floating-Point Math Library] | BSD-3-Clause | v2.0 U1 | | ✗ |
|
||||
| [jbeder/yaml-cpp] | MIT | 0.6.3 | | ✗ |
|
||||
| [JSON-Schema-Test-Suite] | Unknown License | Unknown | | |
|
||||
| [libmongocrypt] | Apache-2.0 | 1.12.0 | ✗ | ✗ |
|
||||
| [librdkafka - the Apache Kafka C/C++ client library] | BSD-3-Clause, Xmlproc License, ISC, MIT, Public Domain, Zlib, BSD-2-Clause, Andreas Stolcke License | 2.0.2 | | ✗ |
|
||||
| [LibTomCrypt] | WTFPL, Public Domain | 1.18.2 | ✗ | ✗ |
|
||||
| [libunwind/libunwind] | MIT | v1.6.2 | | ✗ |
|
||||
| [linenoise] | BSD-2-Clause | Unknown | | ✗ |
|
||||
| [MongoDB C Driver] | Apache-2.0 | 1.27.6 | ✗ | ✗ |
|
||||
| [Mozilla Firefox] | MPL-2.0 | 115.7.0esr | unknown | ✗ |
|
||||
| [nlohmann.json.decomposed] | MIT | 3.10.5 | unknown | |
|
||||
| [node] | ISC | 22.1.0 | unknown | |
|
||||
| [ocspbuilder] | MIT | 0.10.2 | | |
|
||||
| [ocspresponder] | Apache-2.0 | 0.5.0 | | |
|
||||
| [PCRE2] | BSD-3-Clause, Public Domain | 10.40 | | ✗ |
|
||||
| [Protobuf] | BSD-3-Clause | v4.25.0 | | ✗ |
|
||||
| [pyiso8601] | MIT | 2.1.0 | unknown | |
|
||||
| [RoaringBitmap/CRoaring] | Unknown License | v3.0.1 | | ✗ |
|
||||
| [SchemaStore/schemastore] | Apache-2.0 | Unknown | | |
|
||||
| [SCons - a Software Construction tool] | MIT | 3.1.2 | | ✗ |
|
||||
| [smhasher] | Unknown License | Unknown | unknown | ✗ |
|
||||
| [Snowball Stemming Algorithms] | BSD-3-Clause | 7b264ffa0f767c579d052fd8142558dc8264d795 | ✗ | ✗ |
|
||||
| [subunit] | BSD-3-Clause, Apache-2.0 | 1.4.4 | unknown | |
|
||||
| [tcmalloc] | Apache-2.0 | 20230227-snapshot-093ba93c | | ✗ |
|
||||
| [testing-cabal/extras] | MIT | 0.0.3 | unknown | |
|
||||
| [testscenarios] | BSD-3-Clause, Apache-2.0 | 0.4 | unknown | |
|
||||
| [testtools] | MIT | 2.7.1 | unknown | |
|
||||
| [unicode-data] | Unicode-DFS-2016 | 8.0 | ✗ | ✗ |
|
||||
| [valgrind] | GPL-2.0-or-later | Unknown | | ✗ |
|
||||
| [zlib] | Zlib | v1.3 | ✗ | ✗ |
|
||||
| [zstd] | BSD-3-Clause, GPL-2.0-or-later | 1.5.5 | ✗ | ✗ |
|
||||
|
||||
[Abseil]: https://github.com/abseil/abseil-cpp
|
||||
[Asio C++ Library]: https://github.com/chriskohlhoff/asio
|
||||
|
||||
10
sbom.json
10
sbom.json
@ -1660,7 +1660,7 @@
|
||||
"name": ""
|
||||
},
|
||||
"name": "Snowball Stemming Algorithms",
|
||||
"version": "Unknown",
|
||||
"version": "7b264ffa0f767c579d052fd8142558dc8264d795",
|
||||
"licenses": [
|
||||
{
|
||||
"license": {
|
||||
@ -1676,6 +1676,14 @@
|
||||
{
|
||||
"name": "info_link",
|
||||
"value": "https://github.com/snowballstem/snowball"
|
||||
},
|
||||
{
|
||||
"name": "emits_persisted_data",
|
||||
"value": "true"
|
||||
},
|
||||
{
|
||||
"name": "import_script_path",
|
||||
"value": "src/third_party/libstemmer_c/scripts/import.sh"
|
||||
}
|
||||
],
|
||||
"type": "library",
|
||||
|
||||
2
src/third_party/SConscript
vendored
2
src/third_party/SConscript
vendored
@ -204,7 +204,7 @@ if not use_system_version_of_library('tomcrypt'):
|
||||
|
||||
if not use_system_version_of_library('stemmer'):
|
||||
thirdPartyEnvironmentModifications['stemmer'] = {
|
||||
'CPPPATH': ['#/src/third_party/libstemmer_c/include'],
|
||||
'CPPPATH': ['#/src/third_party/libstemmer_c/dist/include'],
|
||||
}
|
||||
|
||||
# Note that the wiredtiger.h header is generated, so
|
||||
|
||||
82
src/third_party/libstemmer_c/BUILD.bazel
vendored
82
src/third_party/libstemmer_c/BUILD.bazel
vendored
@ -5,48 +5,48 @@ package(default_visibility = ["//visibility:public"])
|
||||
mongo_cc_library(
|
||||
name = "stemmer",
|
||||
srcs = [
|
||||
"libstemmer/libstemmer_utf8.c",
|
||||
"libstemmer/modules.h",
|
||||
"libstemmer/modules_utf8.h",
|
||||
"runtime/api.c",
|
||||
"runtime/api.h",
|
||||
"runtime/header.h",
|
||||
"src_c/stem_UTF_8_danish.c",
|
||||
"src_c/stem_UTF_8_danish.h",
|
||||
"src_c/stem_UTF_8_dutch.c",
|
||||
"src_c/stem_UTF_8_dutch.h",
|
||||
"src_c/stem_UTF_8_english.c",
|
||||
"src_c/stem_UTF_8_english.h",
|
||||
"src_c/stem_UTF_8_finnish.c",
|
||||
"src_c/stem_UTF_8_finnish.h",
|
||||
"src_c/stem_UTF_8_french.c",
|
||||
"src_c/stem_UTF_8_french.h",
|
||||
"src_c/stem_UTF_8_german.c",
|
||||
"src_c/stem_UTF_8_german.h",
|
||||
"src_c/stem_UTF_8_hungarian.c",
|
||||
"src_c/stem_UTF_8_hungarian.h",
|
||||
"src_c/stem_UTF_8_italian.c",
|
||||
"src_c/stem_UTF_8_italian.h",
|
||||
"src_c/stem_UTF_8_norwegian.c",
|
||||
"src_c/stem_UTF_8_norwegian.h",
|
||||
"src_c/stem_UTF_8_porter.c",
|
||||
"src_c/stem_UTF_8_porter.h",
|
||||
"src_c/stem_UTF_8_portuguese.c",
|
||||
"src_c/stem_UTF_8_portuguese.h",
|
||||
"src_c/stem_UTF_8_romanian.c",
|
||||
"src_c/stem_UTF_8_romanian.h",
|
||||
"src_c/stem_UTF_8_russian.c",
|
||||
"src_c/stem_UTF_8_russian.h",
|
||||
"src_c/stem_UTF_8_spanish.c",
|
||||
"src_c/stem_UTF_8_spanish.h",
|
||||
"src_c/stem_UTF_8_swedish.c",
|
||||
"src_c/stem_UTF_8_swedish.h",
|
||||
"src_c/stem_UTF_8_turkish.c",
|
||||
"src_c/stem_UTF_8_turkish.h",
|
||||
"dist/libstemmer/libstemmer_utf8.c",
|
||||
"dist/libstemmer/modules.h",
|
||||
"dist/libstemmer/modules_utf8.h",
|
||||
"dist/runtime/api.c",
|
||||
"dist/runtime/api.h",
|
||||
"dist/runtime/header.h",
|
||||
"dist/src_c/stem_UTF_8_danish.c",
|
||||
"dist/src_c/stem_UTF_8_danish.h",
|
||||
"dist/src_c/stem_UTF_8_dutch.c",
|
||||
"dist/src_c/stem_UTF_8_dutch.h",
|
||||
"dist/src_c/stem_UTF_8_english.c",
|
||||
"dist/src_c/stem_UTF_8_english.h",
|
||||
"dist/src_c/stem_UTF_8_finnish.c",
|
||||
"dist/src_c/stem_UTF_8_finnish.h",
|
||||
"dist/src_c/stem_UTF_8_french.c",
|
||||
"dist/src_c/stem_UTF_8_french.h",
|
||||
"dist/src_c/stem_UTF_8_german.c",
|
||||
"dist/src_c/stem_UTF_8_german.h",
|
||||
"dist/src_c/stem_UTF_8_hungarian.c",
|
||||
"dist/src_c/stem_UTF_8_hungarian.h",
|
||||
"dist/src_c/stem_UTF_8_italian.c",
|
||||
"dist/src_c/stem_UTF_8_italian.h",
|
||||
"dist/src_c/stem_UTF_8_norwegian.c",
|
||||
"dist/src_c/stem_UTF_8_norwegian.h",
|
||||
"dist/src_c/stem_UTF_8_porter.c",
|
||||
"dist/src_c/stem_UTF_8_porter.h",
|
||||
"dist/src_c/stem_UTF_8_portuguese.c",
|
||||
"dist/src_c/stem_UTF_8_portuguese.h",
|
||||
"dist/src_c/stem_UTF_8_romanian.c",
|
||||
"dist/src_c/stem_UTF_8_romanian.h",
|
||||
"dist/src_c/stem_UTF_8_russian.c",
|
||||
"dist/src_c/stem_UTF_8_russian.h",
|
||||
"dist/src_c/stem_UTF_8_spanish.c",
|
||||
"dist/src_c/stem_UTF_8_spanish.h",
|
||||
"dist/src_c/stem_UTF_8_swedish.c",
|
||||
"dist/src_c/stem_UTF_8_swedish.h",
|
||||
"dist/src_c/stem_UTF_8_turkish.c",
|
||||
"dist/src_c/stem_UTF_8_turkish.h",
|
||||
],
|
||||
hdrs = [
|
||||
"include/libstemmer.h",
|
||||
"runtime/utilities.c",
|
||||
"dist/include/libstemmer.h",
|
||||
"dist/runtime/utilities.c",
|
||||
],
|
||||
copts = select({
|
||||
"//bazel/config:gcc_or_clang": [
|
||||
@ -56,6 +56,6 @@ mongo_cc_library(
|
||||
}),
|
||||
includes = [
|
||||
# from https://github.com/10gen/mongo/blob/master/src/third_party/SConscript#L172-L175
|
||||
"include",
|
||||
"dist/include",
|
||||
],
|
||||
)
|
||||
|
||||
29
src/third_party/libstemmer_c/scripts/import.sh
vendored
Executable file
29
src/third_party/libstemmer_c/scripts/import.sh
vendored
Executable file
@ -0,0 +1,29 @@
|
||||
#!/bin/bash
|
||||
# This script downloads and imports libstemmer_c.
|
||||
|
||||
set -euo pipefail
|
||||
IFS=$'\n\t'
|
||||
|
||||
set -vx
|
||||
|
||||
NAME=libstemmer_c
|
||||
VERSION="7b264ffa0f767c579d052fd8142558dc8264d795"
|
||||
|
||||
DEST_DIR=$(git rev-parse --show-toplevel)/src/third_party/libstemmer_c/dist
|
||||
if [[ -d $DEST_DIR ]]; then
|
||||
echo "You must remove '$DEST_DIR' before running $0" >&2
|
||||
exit 1
|
||||
fi
|
||||
mkdir -p $DEST_DIR
|
||||
|
||||
SNOWBALL_GIT_DIR=$(mktemp -d /tmp/import-snowball.XXXXXX)
|
||||
trap "rm -rf $SNOWBALL_GIT_DIR" EXIT
|
||||
|
||||
git clone git@github.com:snowballstem/snowball.git $SNOWBALL_GIT_DIR
|
||||
git -C $SNOWBALL_GIT_DIR checkout $VERSION
|
||||
pushd $SNOWBALL_GIT_DIR
|
||||
make dist_libstemmer_c
|
||||
popd
|
||||
|
||||
ARCHIVE=$(find $SNOWBALL_GIT_DIR -regextype posix-extended -regex '^.*\.(tgz|tar\.gz)$')
|
||||
tar --strip-components=1 -xvzf $ARCHIVE -C $DEST_DIR
|
||||
Loading…
Reference in New Issue
Block a user