SERVER-118522 Vendor Fuzztest (#47328)

GitOrigin-RevId: bfe44acc8976e2928a8c12257cf9e977e07fe416
This commit is contained in:
Mike Merrill 2026-02-06 14:16:06 -05:00 committed by MongoDB Bot
parent d5122c7b20
commit 9b770d3df9
327 changed files with 69547 additions and 2 deletions

View File

@ -5,6 +5,7 @@ src/third_party/protobuf/dist
src/third_party/re2/dist
src/third_party/tcmalloc/dist
src/third_party/wiredtiger/dist
src/third_party/fuzztest/dist
# Ignore node_modules due to the following error
# ERROR: in verify_node_modules_ignored:

View File

@ -597,6 +597,9 @@ common --experimental_collect_system_network_usage
common:fission --fission=yes
common:fission --remote_download_regex=.*\.dwo$
--config=fuzztest
common:fuzztest --@fuzztest//fuzztest:centipede_integration=True
# Avoid failing builds when BES metadata fails to upload.
common --bes_upload_mode=fully_async
@ -643,5 +646,8 @@ try-import %workspace%/.bazelrc.sync
# Engflow auth credentials
try-import %workspace%/.bazelrc.engflow_creds
# Flags for fuzztest
try-import %workspace%/.bazelrc.fuzztest
# Repository root absolute path to set --execution_log_compact_file
#try-import %workspace%/.bazelrc.exec_log_file

85
.bazelrc.fuzztest Normal file
View File

@ -0,0 +1,85 @@
### DO NOT EDIT. Generated file.
#
# To regenerate, run the following from your project's workspace:
#
# bazel run @com_google_fuzztest//bazel:setup_configs > fuzztest.bazelrc
#
# And don't forget to add the following to your project's .bazelrc:
#
# try-import %workspace%/fuzztest.bazelrc
### Common options.
#
# Do not use directly.
# Standard define for \"ifdef-ing\" any fuzz test specific code.
build:fuzztest-common --copt=-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
# In fuzz tests, we want to catch assertion violations even in optimized builds.
build:fuzztest-common --copt=-UNDEBUG
# Enable libc++ assertions.
# See https://libcxx.llvm.org/UsingLibcxx.html#enabling-the-safe-libc-mode
build:fuzztest-common --copt=-D_LIBCPP_ENABLE_ASSERTIONS=1
### ASan (Address Sanitizer) build configuration.
#
# Use with: --config=fuzztest_asan
build:fuzztest_asan --linkopt=-fsanitize=address
build:fuzztest_asan --copt=-fsanitize=address
# We rely on the following flag instead of the compiler provided
# __has_feature(address_sanitizer) to know that we have an ASAN build even in
# the uninstrumented runtime.
build:fuzztest_asan --copt=-DADDRESS_SANITIZER
### FuzzTest build configuration.
#
# Use with: --config=fuzztest
#
# Note that this configuration includes the ASan configuration.
build:fuzztest --config=fuzztest_asan
build:fuzztest --config=fuzztest-common
# Link statically.
build:fuzztest --dynamic_mode=off
# We apply coverage tracking instrumentation to everything but Centipede and the
# FuzzTest framework itself (including GoogleTest and GoogleMock).
build:fuzztest --copt=-fsanitize-coverage=inline-8bit-counters,trace-cmp,pc-table
build:fuzztest --per_file_copt=common/.*,fuzztest/.*,centipede/.*,-centipede/.*fuzz_target,googletest/.*,googlemock/.*@-fsanitize-coverage=0
### Experimental FuzzTest build configuration.
#
# Use with: --config=fuzztest-experimental
#
# Use this instead of --config=fuzztest when building test binaries to run with
# Centipede. Eventually, this will be consolidated with --config=fuzztest.
# Note that this configuration doesn't include the ASan configuration. If you
# want to use both, you can use --config=fuzztest-experimental --config=fuzztest_asan.
build:fuzztest-experimental --config=fuzztest-common
build:fuzztest-experimental --@com_google_fuzztest//fuzztest:centipede_integration
# Generate line tables for debugging.
build:fuzztest-experimental --copt=-gline-tables-only
build:fuzztest-experimental --strip=never
# Prevent memcmp & co from being inlined.
build:fuzztest-experimental --copt=-fno-builtin
# Disable heap checking.
build:fuzztest-experimental --copt=-DHEAPCHECK_DISABLE
# Link statically.
build:fuzztest-experimental --dynamic_mode=off
# We apply coverage tracking instrumentation to everything but Centipede and the
# FuzzTest framework itself (including GoogleTest and GoogleMock).
# TODO(b/374840534): Add -fsanitize-coverage=control-flow once we start building
# with clang 16+.
build:fuzztest-experimental --copt=-fsanitize-coverage=trace-pc-guard,pc-table,trace-loads,trace-cmp
build:fuzztest-experimental --per_file_copt=common/.*,fuzztest/.*,centipede/.*,-centipede/.*fuzz_target,googletest/.*,googlemock/.*@-fsanitize-coverage=0

View File

@ -181,6 +181,14 @@ local_path_override(
path = "src/third_party/zlib",
)
# When updating fuzztest run the following command
# bazel run @fuzztest//bazel:setup_configs > bazelrc.fuzztest
bazel_dep(name = "fuzztest", version = "20250805.0")
local_path_override(
module_name = "fuzztest",
path = "src/third_party/fuzztest/dist",
)
# This is just here because 1.5.1 has a bug in it and our current version of re2 will pull in 1.5.1
# If re2 is ever upgraded past 2025-08-12 this can be unpinned
bazel_dep(name = "rules_python", version = "1.5.2")

25
MODULE.bazel.lock generated
View File

@ -2,6 +2,8 @@
"lockFileVersion": 13,
"registryFileHashes": {
"https://bcr.bazel.build/bazel_registry.json": "8a28e4aff06ee60aed2a8c281907fb8bcbf3b753c91fb5a5c57da3215d5b3497",
"https://bcr.bazel.build/modules/abseil-py/2.1.0/MODULE.bazel": "5ebe5bf853769c65707e5c28f216798f7a4b1042015e6a36e6d03094d94bec8a",
"https://bcr.bazel.build/modules/abseil-py/2.1.0/source.json": "0e8fc4f088ce07099c1cd6594c20c7ddbb48b4b3c0849b7d94ba94be88ff042b",
"https://bcr.bazel.build/modules/apple_support/1.11.1/MODULE.bazel": "1843d7cd8a58369a444fc6000e7304425fba600ff641592161d9f15b179fb896",
"https://bcr.bazel.build/modules/apple_support/1.15.1/MODULE.bazel": "a0556fefca0b1bb2de8567b8827518f94db6a6e7e7d632b4c48dc5f865bc7c85",
"https://bcr.bazel.build/modules/apple_support/1.17.1/MODULE.bazel": "655c922ab1209978a94ef6ca7d9d43e940cd97d9c172fb55f94d91ac53f8610b",
@ -34,10 +36,14 @@
"https://bcr.bazel.build/modules/bazel_features/1.9.1/MODULE.bazel": "8f679097876a9b609ad1f60249c49d68bfab783dd9be012faf9d82547b14815a",
"https://bcr.bazel.build/modules/bazel_skylib/1.7.1/MODULE.bazel": "3120d80c5861aa616222ec015332e5f8d3171e062e3e804a2a0253e1be26e59b",
"https://bcr.bazel.build/modules/bazel_skylib/1.7.1/source.json": "f121b43eeefc7c29efbd51b83d08631e2347297c95aac9764a701f2a6a2bb953",
"https://bcr.bazel.build/modules/brotli/1.1.0/MODULE.bazel": "3b5b90488995183419c4b5c9b063a164f6c0bc4d0d6b40550a612a5e860cc0fe",
"https://bcr.bazel.build/modules/brotli/1.1.0/source.json": "098a4fd315527166e8dfe1fd1537c96a737a83764be38fc43f4da231d600f3d0",
"https://bcr.bazel.build/modules/buildifier_prebuilt/6.4.0/MODULE.bazel": "37389c6b5a40c59410b4226d3bb54b08637f393d66e2fa57925c6fcf68e64bf4",
"https://bcr.bazel.build/modules/buildifier_prebuilt/6.4.0/source.json": "83eb01b197ed0b392f797860c9da5ed1bf95f4d0ded994d694a3d44731275916",
"https://bcr.bazel.build/modules/buildozer/7.1.2/MODULE.bazel": "2e8dd40ede9c454042645fd8d8d0cd1527966aa5c919de86661e62953cd73d84",
"https://bcr.bazel.build/modules/buildozer/7.1.2/source.json": "c9028a501d2db85793a6996205c8de120944f50a0d570438fcae0457a5f9d1f8",
"https://bcr.bazel.build/modules/bzip2/1.0.8/MODULE.bazel": "83ee443b286b0b91566e5ee77e74ba6445895f3135467893871560f9e4ebc159",
"https://bcr.bazel.build/modules/bzip2/1.0.8/source.json": "b64f3a2f973749cf5f6ee32b3d804af56a35a746228a7845ed5daa31c8cc8af1",
"https://bcr.bazel.build/modules/cel-spec/0.15.0/MODULE.bazel": "e1eed53d233acbdcf024b4b0bc1528116d92c29713251b5154078ab1348cb600",
"https://bcr.bazel.build/modules/cel-spec/0.15.0/source.json": "ab7dccdf21ea2261c0f809b5a5221a4d7f8b580309f285fdf1444baaca75d44a",
"https://bcr.bazel.build/modules/civetweb/1.16/MODULE.bazel": "46a38f9daeb57392e3827fce7d40926be0c802bd23cdd6bfd3a96c804de42fae",
@ -65,10 +71,14 @@
"https://bcr.bazel.build/modules/grpc-java/1.66.0/source.json": "f841b339ff8516c86c3a5272cd053194dd0cb2fdd63157123835e1157a28328d",
"https://bcr.bazel.build/modules/grpc-proto/0.0.0-20240627-ec30f58/MODULE.bazel": "88de79051e668a04726e9ea94a481ec6f1692086735fd6f488ab908b3b909238",
"https://bcr.bazel.build/modules/grpc-proto/0.0.0-20240627-ec30f58/source.json": "5035d379c61042930244ab59e750106d893ec440add92ec0df6a0098ca7f131d",
"https://bcr.bazel.build/modules/highwayhash/0.0.0-20240305-5ad3bf8/MODULE.bazel": "5c7f29d5bd70feff14b0f65b39584957e18e4a8d555e5a29a4c36019afbb44b9",
"https://bcr.bazel.build/modules/highwayhash/0.0.0-20240305-5ad3bf8/source.json": "211c0937ef5f537da6c3c135d12e60927c71b380642e207e4a02b86d29c55e85",
"https://bcr.bazel.build/modules/jsoncpp/1.9.6/MODULE.bazel": "2f8d20d3b7d54143213c4dfc3d98225c42de7d666011528dc8fe91591e2e17b0",
"https://bcr.bazel.build/modules/jsoncpp/1.9.6/source.json": "a04756d367a2126c3541682864ecec52f92cdee80a35735a3cb249ce015ca000",
"https://bcr.bazel.build/modules/libpfm/4.11.0/MODULE.bazel": "45061ff025b301940f1e30d2c16bea596c25b176c8b6b3087e92615adbd52902",
"https://bcr.bazel.build/modules/libpfm/4.11.0/source.json": "caaffb3ac2b59b8aac456917a4ecf3167d40478ee79f15ab7a877ec9273937c9",
"https://bcr.bazel.build/modules/lz4/1.9.4/MODULE.bazel": "e3d307b1d354d70f6c809167eafecf5d622c3f27e3971ab7273410f429c7f83a",
"https://bcr.bazel.build/modules/lz4/1.9.4/source.json": "233f0bdfc21f254e3dda14683ddc487ca68c6a3a83b7d5db904c503f85bd089b",
"https://bcr.bazel.build/modules/mbedtls/3.6.0/MODULE.bazel": "8e380e4698107c5f8766264d4df92e36766248447858db28187151d884995a09",
"https://bcr.bazel.build/modules/mbedtls/3.6.0/source.json": "1dbe7eb5258050afcc3806b9d43050f71c6f539ce0175535c670df606790b30c",
"https://bcr.bazel.build/modules/nlohmann_json/3.11.3/MODULE.bazel": "87023db2f55fc3a9949c7b08dc711fae4d4be339a80a99d04453c4bb3998eefc",
@ -101,6 +111,8 @@
"https://bcr.bazel.build/modules/pybind11_bazel/2.13.6/source.json": "6aa0703de8efb20cc897bbdbeb928582ee7beaf278bcd001ac253e1605bddfae",
"https://bcr.bazel.build/modules/rapidjson/1.1.0.bcr.20241007/MODULE.bazel": "82fbcb2e42f9e0040e76ccc74c06c3e46dfd33c64ca359293f8b84df0e6dff4c",
"https://bcr.bazel.build/modules/rapidjson/1.1.0.bcr.20241007/source.json": "5c42389ad0e21fc06b95ad7c0b730008271624a2fa3292e0eab5f30e15adeee3",
"https://bcr.bazel.build/modules/riegeli/0.0.0-20250706-c4d1f27/MODULE.bazel": "b8b7309fb00c6b545fafcdfc3bf8cba168a61d37d841b9d90bacf7e70ae6627c",
"https://bcr.bazel.build/modules/riegeli/0.0.0-20250706-c4d1f27/source.json": "af3e2998bdf2f0ca3695816695c079f885d1e5b838e1d05ca82450aba4941762",
"https://bcr.bazel.build/modules/rules_android/0.1.1/MODULE.bazel": "48809ab0091b07ad0182defb787c4c5328bd3a278938415c00a7b69b50c4d3a8",
"https://bcr.bazel.build/modules/rules_android/0.1.1/source.json": "e6986b41626ee10bdc864937ffb6d6bf275bb5b9c65120e6137d56e6331f089e",
"https://bcr.bazel.build/modules/rules_apple/3.16.0/MODULE.bazel": "0d1caf0b8375942ce98ea944be754a18874041e4e0459401d925577624d3a54a",
@ -178,14 +190,17 @@
"https://bcr.bazel.build/modules/rules_proto/6.0.0/MODULE.bazel": "b531d7f09f58dce456cd61b4579ce8c86b38544da75184eadaf0a7cb7966453f",
"https://bcr.bazel.build/modules/rules_proto/6.0.2/MODULE.bazel": "ce916b775a62b90b61888052a416ccdda405212b6aaeb39522f7dc53431a5e73",
"https://bcr.bazel.build/modules/rules_proto/7.0.2/MODULE.bazel": "bf81793bd6d2ad89a37a40693e56c61b0ee30f7a7fdbaf3eabbf5f39de47dea2",
"https://bcr.bazel.build/modules/rules_proto/7.0.2/source.json": "1e5e7260ae32ef4f2b52fd1d0de8d03b606a44c91b694d2f1afb1d3b28a48ce1",
"https://bcr.bazel.build/modules/rules_proto/7.1.0/MODULE.bazel": "002d62d9108f75bb807cd56245d45648f38275cb3a99dcd45dfb864c5d74cb96",
"https://bcr.bazel.build/modules/rules_proto/7.1.0/source.json": "39f89066c12c24097854e8f57ab8558929f9c8d474d34b2c00ac04630ad8940e",
"https://bcr.bazel.build/modules/rules_python/0.20.0/MODULE.bazel": "bfe14d17f20e3fe900b9588f526f52c967a6f281e47a1d6b988679bd15082286",
"https://bcr.bazel.build/modules/rules_python/0.22.0/MODULE.bazel": "b8057bafa11a9e0f4b08fc3b7cd7bee0dcbccea209ac6fc9a3ff051cd03e19e9",
"https://bcr.bazel.build/modules/rules_python/0.22.1/MODULE.bazel": "26114f0c0b5e93018c0c066d6673f1a2c3737c7e90af95eff30cfee38d0bbac7",
"https://bcr.bazel.build/modules/rules_python/0.23.1/MODULE.bazel": "49ffccf0511cb8414de28321f5fcf2a31312b47c40cc21577144b7447f2bf300",
"https://bcr.bazel.build/modules/rules_python/0.27.1/MODULE.bazel": "65dc875cc1a06c30d5bbdba7ab021fd9e551a6579e408a3943a61303e2228a53",
"https://bcr.bazel.build/modules/rules_python/0.28.0/MODULE.bazel": "cba2573d870babc976664a912539b320cbaa7114cd3e8f053c720171cde331ed",
"https://bcr.bazel.build/modules/rules_python/0.31.0/MODULE.bazel": "93a43dc47ee570e6ec9f5779b2e64c1476a6ce921c48cc9a1678a91dd5f8fd58",
"https://bcr.bazel.build/modules/rules_python/0.34.0/MODULE.bazel": "1d623d026e075b78c9fde483a889cda7996f5da4f36dffb24c246ab30f06513a",
"https://bcr.bazel.build/modules/rules_python/0.36.0/MODULE.bazel": "a4ce1ccea92b9106c7d16ab9ee51c6183107e78ba4a37aa65055227b80cd480c",
"https://bcr.bazel.build/modules/rules_python/0.37.1/MODULE.bazel": "3faeb2d9fa0a81f8980643ee33f212308f4d93eea4b9ce6f36d0b742e71e9500",
"https://bcr.bazel.build/modules/rules_python/0.4.0/MODULE.bazel": "9208ee05fd48bf09ac60ed269791cf17fb343db56c8226a720fbb1cdf467166c",
"https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7",
@ -202,6 +217,8 @@
"https://bcr.bazel.build/modules/rules_swift/1.16.0/MODULE.bazel": "4a09f199545a60d09895e8281362b1ff3bb08bbde69c6fc87aff5b92fcc916ca",
"https://bcr.bazel.build/modules/rules_swift/2.1.1/MODULE.bazel": "494900a80f944fc7aa61500c2073d9729dff0b764f0e89b824eb746959bc1046",
"https://bcr.bazel.build/modules/rules_swift/2.1.1/source.json": "40fc69dfaac64deddbb75bd99cdac55f4427d9ca0afbe408576a65428427a186",
"https://bcr.bazel.build/modules/snappy/1.2.0/MODULE.bazel": "cc7a727b46089c7fdae0ede21b1fd65bdb14d01823da118ef5c48044f40b6b27",
"https://bcr.bazel.build/modules/snappy/1.2.0/source.json": "17f5527e15d30a9d9eebf79ed73b280b56cac44f8c8fea696666d99943f84c33",
"https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c",
"https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef",
"https://bcr.bazel.build/modules/stardoc/0.6.2/MODULE.bazel": "7060193196395f5dd668eda046ccbeacebfd98efc77fed418dbe2b82ffaa39fd",
@ -212,7 +229,11 @@
"https://bcr.bazel.build/modules/swift_argument_parser/1.3.1.1/MODULE.bazel": "5e463fbfba7b1701d957555ed45097d7f984211330106ccd1352c6e0af0dcf91",
"https://bcr.bazel.build/modules/swift_argument_parser/1.3.1.1/source.json": "32bd87e5f4d7acc57c5b2ff7c325ae3061d5e242c0c4c214ae87e0f1c13e54cb",
"https://bcr.bazel.build/modules/xds/0.0.0-20240423-555b57e/MODULE.bazel": "cea509976a77e34131411684ef05a1d6ad194dd71a8d5816643bc5b0af16dc0f",
"https://bcr.bazel.build/modules/xds/0.0.0-20240423-555b57e/source.json": "7227e1fcad55f3f3cab1a08691ecd753cb29cc6380a47bc650851be9f9ad6d20"
"https://bcr.bazel.build/modules/xds/0.0.0-20240423-555b57e/source.json": "7227e1fcad55f3f3cab1a08691ecd753cb29cc6380a47bc650851be9f9ad6d20",
"https://bcr.bazel.build/modules/xz/5.4.5.bcr.1/MODULE.bazel": "c037f75fa1b7e1ff15fbd15d807a8ce545e9b02f02df0a9777aa9aa7d8b268bb",
"https://bcr.bazel.build/modules/xz/5.4.5.bcr.1/source.json": "766f28499a16fa9ed8dc94382d50e80ceda0d0ab80b79b7b104a67074ab10e1f",
"https://bcr.bazel.build/modules/zstd/1.5.6/MODULE.bazel": "471ebe7d3cdd8c6469390fcf623eb4779ff55fbee0a87f1dc57a1def468b96d4",
"https://bcr.bazel.build/modules/zstd/1.5.6/source.json": "02010c3333fc89b44fe861db049968decb6e688411f7f9d4f6791d74f9adfb51"
},
"selectedYankedVersions": {},
"moduleExtensions": {

View File

@ -104,3 +104,6 @@ filters:
- ".tmp/*":
approvers:
- 10gen/devprod-build
- ".bazelrc.fuzztest":
approvers:
- 10gen/platsec-server

View File

@ -1876,6 +1876,8 @@ def _impl(ctx):
"-Wno-sign-compare",
"-Wno-implicit-fallthrough",
"-Wno-shorten-64-to-32",
"-Wno-unused-but-set-variable",
"-Wno-nullability-completeness",
])],
),
],

View File

@ -2505,6 +2505,64 @@
]
},
"scope": "required"
},
{
"type": "library",
"bom-ref": "pkg:github/google/fuzztest@v2025.07.28",
"supplier": {
"name": "Google LLC",
"url": [
"https://opensource.google/"
]
},
"author": "The Google Test and Google Mock Communities",
"group": "google.opensource",
"name": "fuzztest",
"version": "2025.07.28",
"description": "FuzzTest",
"licenses": [
{
"license": {
"id": "BSD-3-Clause"
}
},
{
"license": {
"id": "Apache-2.0"
}
}
],
"copyright": "Copyright 2008, Google Inc. All rights reserved.",
"cpe": "cpe:2.3:a:google:fuzztest:2025.07.28:*:*:*:*:*:*:*",
"purl": "pkg:github/google/fuzztest@v2025.07.28",
"externalReferences": [
{
"url": "https://github.com/google/fuzztest.git",
"type": "distribution"
}
],
"properties": [
{
"name": "internal:team_responsible",
"value": "Product Security"
},
{
"name": "emits_persisted_data",
"value": "false"
},
{
"name": "import_script_path",
"value": "src/third_party/fuzztest/scripts/import.sh"
}
],
"evidence": {
"occurrences": [
{
"location": "src/third_party/fuzztest"
}
]
},
"scope": "excluded"
}
],
"dependencies": [
@ -2525,6 +2583,7 @@
"pkg:github/google/re2@2025-08-05",
"pkg:github/google/s2geometry@a25c502bda9d7e0274b9e2b7825fbddf13cc0306",
"pkg:github/google/snappy@1.1.10",
"pkg:github/google/fuzztest@v2025.07.28",
"pkg:github/google/googletest@v1.17.0",
"pkg:github/gperftools/gperftools@2.9.1",
"pkg:github/grpc/grpc@v1.74.1",
@ -2636,6 +2695,10 @@
"ref": "pkg:github/google/benchmark@v1.5.2",
"dependsOn": []
},
{
"ref": "pkg:github/google/fuzztest@v2025.07.28",
"dependsOn": []
},
{
"ref": "pkg:github/google/googletest@v1.17.0",
"dependsOn": []

View File

@ -154,3 +154,6 @@ filters:
- "zstandard":
approvers:
- 10gen/server-networking-and-observability
- "fuzztest":
approvers:
- 10gen/platsec-server

251
src/third_party/fuzztest/dist/LICENSE vendored Normal file
View File

@ -0,0 +1,251 @@
Files: fuzztest/internal/domains/rune.*
The authors of this software are Rob Pike and Ken Thompson.
Copyright (c) 2002 by Lucent Technologies.
Permission to use, copy, modify, and distribute this software for any
purpose without fee is hereby granted, provided that this entire notice
is included in all copies of any software which is or includes a copy
or modification of this software and in all copies of the supporting
documentation for such software.
THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY OF
THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
rune.* have been converted to compile as C++ code in fuzztest::internal
namespace.
---
Files: grammar_codegen/generated_antlr_parser/*
[The "BSD 3-clause license"]
Copyright <YEAR> <COPYRIGHT HOLDER>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
---
Files: *
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,75 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
module(
name = "fuzztest",
version = "head",
# TODO(lszekeres): Remove and use default name.
repo_name = "com_google_fuzztest",
)
bazel_dep(
name = "abseil-cpp",
version = "20250512.0",
)
bazel_dep(
name = "re2",
version = "2024-07-02.bcr.1",
)
bazel_dep(
name = "bazel_skylib",
version = "1.7.1",
)
bazel_dep(
name = "platforms",
version = "0.0.10",
)
# GoogleTest is not a dev dependency, because it's needed when FuzzTest is used
# with GoogleTest integration (e.g., googletest_adaptor). Note that the FuzzTest
# framework can be used without GoogleTest integration as well.
bazel_dep(
name = "googletest",
version = "1.16.0"
)
# TODO(lszekeres): Make this a dev dependency, as the protobuf library is only
# required for testing.
bazel_dep(
name = "protobuf",
version = "31.1",
)
bazel_dep(
name = "rules_proto",
version = "7.1.0",
)
bazel_dep(
name = "riegeli",
version = "0.0.0-20250706-c4d1f27",
repo_name = "com_google_riegeli",
)
# Dev dependencies.
# These dependencies will be ignored if the current module is not the root
# module (https://bazel.build/rules/lib/globals/module#bazel_dep).
bazel_dep(
name = "nlohmann_json",
version = "3.11.3",
dev_dependency = True,
)
bazel_dep(
name = "antlr4-cpp-runtime",
version = "4.12.0",
dev_dependency = True,
repo_name = "antlr_cpp",
)

View File

@ -0,0 +1,25 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
package(default_visibility = ["//visibility:private"])
licenses(["notice"])
sh_binary(
name = "setup_configs",
srcs = ["setup_configs.sh"],
# To determine if the script runs from the fuzztest repo or from a client repo.
args = [repository_name()],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,42 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
cc_library(
name = "antlr_cpp",
srcs = glob(["runtime/src/**/*.cpp"]),
hdrs = ["runtime/src/antlr4-runtime.h"],
copts = ["-fexceptions"],
defines = ["ANTLR4CPP_USING_ABSEIL"],
features = ["-use_header_modules"],
includes = ["runtime/src"],
textual_hdrs = glob(
["runtime/src/**/*.h"],
exclude = ["runtime/src/antlr4-runtime.h"],
),
visibility = ["//visibility:public"],
deps = [
"@abseil-cpp//absl/base",
"@abseil-cpp//absl/base:core_headers",
"@abseil-cpp//absl/container:flat_hash_map",
"@abseil-cpp//absl/container:flat_hash_set",
"@abseil-cpp//absl/synchronization",
],
)
alias(
name = "antlr4-cpp-runtime",
actual = "@antlr_cpp//:antlr4-cpp-runtime",
)

View File

@ -0,0 +1,220 @@
#!/usr/bin/env bash
# Script for generating fuzztest.bazelrc.
set -euf -o pipefail
cat <<EOF
### DO NOT EDIT. Generated file.
#
# To regenerate, run the following from your project's workspace:
#
# bazel run @com_google_fuzztest//bazel:setup_configs > fuzztest.bazelrc
#
# And don't forget to add the following to your project's .bazelrc:
#
# try-import %workspace%/fuzztest.bazelrc
EOF
cat <<EOF
### Common options.
#
# Do not use directly.
# Standard define for \"ifdef-ing\" any fuzz test specific code.
build:fuzztest-common --copt=-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
# In fuzz tests, we want to catch assertion violations even in optimized builds.
build:fuzztest-common --copt=-UNDEBUG
# Enable libc++ assertions.
# See https://libcxx.llvm.org/UsingLibcxx.html#enabling-the-safe-libc-mode
build:fuzztest-common --copt=-D_LIBCPP_ENABLE_ASSERTIONS=1
EOF
cat <<EOF
### ASan (Address Sanitizer) build configuration.
#
# Use with: --config=fuzztest_asan
build:fuzztest_asan --linkopt=-fsanitize=address
build:fuzztest_asan --copt=-fsanitize=address
# We rely on the following flag instead of the compiler provided
# __has_feature(address_sanitizer) to know that we have an ASAN build even in
# the uninstrumented runtime.
build:fuzztest_asan --copt=-DADDRESS_SANITIZER
EOF
cat <<EOF
### FuzzTest build configuration.
#
# Use with: --config=fuzztest
#
# Note that this configuration includes the ASan configuration.
build:fuzztest --config=fuzztest_asan
build:fuzztest --config=fuzztest-common
# Link statically.
build:fuzztest --dynamic_mode=off
EOF
REPO_NAME="${1}"
# When used in the fuzztest repo itself.
if [[ ${REPO_NAME} == "@" ]]; then
COMMON_FILTER="//common:"
FUZZTEST_FILTER="//fuzztest:,//fuzztest/internal:,//fuzztest/internal/domains:"
CENTIPEDE_FILTER="//centipede:,-//centipede/.*fuzz_target"
else # When used in a client repo.
COMMON_FILTER="common/.*"
FUZZTEST_FILTER="fuzztest/.*"
CENTIPEDE_FILTER="centipede/.*,-centipede/.*fuzz_target"
fi
cat <<EOF
# We apply coverage tracking instrumentation to everything but Centipede and the
# FuzzTest framework itself (including GoogleTest and GoogleMock).
build:fuzztest --copt=-fsanitize-coverage=inline-8bit-counters,trace-cmp,pc-table
build:fuzztest --per_file_copt=${COMMON_FILTER},${FUZZTEST_FILTER},${CENTIPEDE_FILTER},googletest/.*,googlemock/.*@-fsanitize-coverage=0
EOF
cat <<EOF
### Experimental FuzzTest build configuration.
#
# Use with: --config=fuzztest-experimental
#
# Use this instead of --config=fuzztest when building test binaries to run with
# Centipede. Eventually, this will be consolidated with --config=fuzztest.
# Note that this configuration doesn't include the ASan configuration. If you
# want to use both, you can use --config=fuzztest-experimental --config=fuzztest_asan.
build:fuzztest-experimental --config=fuzztest-common
build:fuzztest-experimental --@com_google_fuzztest//fuzztest:centipede_integration
# Generate line tables for debugging.
build:fuzztest-experimental --copt=-gline-tables-only
build:fuzztest-experimental --strip=never
# Prevent memcmp & co from being inlined.
build:fuzztest-experimental --copt=-fno-builtin
# Disable heap checking.
build:fuzztest-experimental --copt=-DHEAPCHECK_DISABLE
# Link statically.
build:fuzztest-experimental --dynamic_mode=off
# We apply coverage tracking instrumentation to everything but Centipede and the
# FuzzTest framework itself (including GoogleTest and GoogleMock).
# TODO(b/374840534): Add -fsanitize-coverage=control-flow once we start building
# with clang 16+.
build:fuzztest-experimental --copt=-fsanitize-coverage=trace-pc-guard,pc-table,trace-loads,trace-cmp
build:fuzztest-experimental --per_file_copt=${COMMON_FILTER},${FUZZTEST_FILTER},${CENTIPEDE_FILTER},googletest/.*,googlemock/.*@-fsanitize-coverage=0
EOF
# Do not use the extra configurations below, unless you know what you're doing.
EXTRA_CONFIGS="${EXTRA_CONFIGS:-none}"
if [[ ${EXTRA_CONFIGS} == *"libfuzzer"* ]]; then
# Find llvm-config.
LLVM_CONFIG=$(command -v llvm-config ||
command -v llvm-config-15 ||
command -v llvm-config-14 ||
command -v llvm-config-13 ||
command -v llvm-config-12 ||
echo "")
if [[ -z "${LLVM_CONFIG}" ]]; then
echo "ERROR: Couldn't generate config, because cannot find llvm-config."
echo ""
echo "Please install clang and llvm, e.g.:"
echo ""
echo " sudo apt install clang llvm"
exit 1
fi
cat <<EOF
### libFuzzer compatibility mode.
#
# Use with: --config=libfuzzer
build:libfuzzer --config=fuzztest_asan
build:libfuzzer --config=fuzztest-common
build:libfuzzer --copt=-DFUZZTEST_COMPATIBILITY_MODE
build:libfuzzer --copt=-fsanitize=fuzzer-no-link
build:libfuzzer --linkopt=$(find $(${LLVM_CONFIG} --libdir) -name libclang_rt.fuzzer_no_main-x86_64.a | head -1)
EOF
fi # libFuzzer
# OSS-Fuzz
if [[ -n ${FUZZING_ENGINE:-} && -n ${SANITIZER:-} ]]; then
cat <<EOF
### OSS-Fuzz compatibility mode.
#
# Use with: --config=oss-fuzz
build:oss-fuzz --copt=-DFUZZTEST_COMPATIBILITY_MODE
build:oss-fuzz --dynamic_mode=off
build:oss-fuzz --action_env=CC=${CC}
build:oss-fuzz --action_env=CXX=${CXX}
EOF
ossfuz_flag_to_bazel_config_flag()
{
bazel_flag=$1
flag=$2
# When we have something along -fno-sanitize-recover=bool,array,...,.. we
# need to split them out and write each assignment without use of commas. Otherwise
# the per_file_copt option splits the comma string with spaces, which causes the
# build command to be erroneous.
if [[ $flag == *","* && $flag == *"="* ]]; then
# Split from first occurrence of equals.
flag_split_over_equals=(${flag//=/ })
lhs=${flag_split_over_equals[0]}
comma_values=($(echo ${flag_split_over_equals[1]} | tr ',' " "))
for val in "${comma_values[@]}"; do
echo "build:oss-fuzz $bazel_flag=${lhs}=${val}"
done
else
if [[ $flag != *"no-as-needed"* ]]; then
# Flags captured here include -fsanitize=fuzzer-no-link, -fsanitize=addresss.
echo "build:oss-fuzz $bazel_flag=$flag"
fi
fi
}
for flag in $CFLAGS; do
echo "$(ossfuz_flag_to_bazel_config_flag "--conlyopt" $flag)"
echo "$(ossfuz_flag_to_bazel_config_flag "--linkopt" $flag)"
done
for flag in $CXXFLAGS; do
echo "$(ossfuz_flag_to_bazel_config_flag "--cxxopt" $flag)"
echo "$(ossfuz_flag_to_bazel_config_flag "--linkopt" $flag)"
done
if [ "$SANITIZER" = "undefined" ]; then
echo "build:oss-fuzz --linkopt=$(find $(llvm-config --libdir) -name libclang_rt.ubsan_standalone_cxx-x86_64.a | head -1)"
fi
if [ "$FUZZING_ENGINE" = "libfuzzer" ]; then
echo "build:oss-fuzz --linkopt=$(find $(llvm-config --libdir) -name libclang_rt.fuzzer_no_main-x86_64.a | head -1)"
fi
# AFL version in oss-fuzz does not support LLVMFuzzerRunDriver. It must be updated first.
#if [ "$FUZZING_ENGINE" = "afl" ]; then
# echo "build:oss-fuzz --linkopt=${LIB_FUZZING_ENGINE}"
#fi
fi # OSS-Fuzz

View File

@ -0,0 +1,5 @@
BasedOnStyle: Google
Language: Cpp
# Force int* foo spacing style:
DerivePointerAlignment: false
PointerAlignment: Left

View File

@ -0,0 +1 @@
We are not yet accepting external contributions at this time. Stay tuned.

View File

@ -0,0 +1,6 @@
# Ignore the Google-only sources.
/copybara
/google
/production
/puzzles
/testing

View File

@ -0,0 +1,8 @@
# This is the list of Centipede's significant contributors.
#
# This does not necessarily list everyone who has contributed code,
# especially since many employees of one corporation may be contributing.
# To see the full list of contributors, see the revision history in
# source control.
Google LLC
Kostya Serebryany

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,261 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/analyze_corpora.h"
#include <algorithm>
#include <cstddef>
#include <sstream>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "./centipede/binary_info.h"
#include "./centipede/control_flow.h"
#include "./centipede/corpus.h"
#include "./centipede/corpus_io.h"
#include "./centipede/coverage.h"
#include "./centipede/feature.h"
#include "./centipede/pc_info.h"
#include "./centipede/workdir.h"
#include "./common/defs.h"
#include "./common/logging.h"
#include "./common/remote_file.h"
namespace fuzztest::internal {
namespace {
std::vector<CorpusRecord> ReadCorpora(std::string_view binary_name,
std::string_view binary_hash,
std::string_view workdir_path) {
WorkDir workdir(std::string(workdir_path), std::string(binary_name),
std::string(binary_hash), /*my_shard_index=*/0);
std::vector<std::string> corpus_paths;
CHECK_OK(
RemoteGlobMatch(workdir.CorpusFilePaths().AllShardsGlob(), corpus_paths));
std::vector<std::string> features_paths;
CHECK_OK(RemoteGlobMatch(workdir.FeaturesFilePaths().AllShardsGlob(),
features_paths));
CHECK_EQ(corpus_paths.size(), features_paths.size());
std::vector<CorpusRecord> corpus;
for (int i = 0; i < corpus_paths.size(); ++i) {
LOG(INFO) << "Reading corpus at: " << corpus_paths[i];
LOG(INFO) << "Reading features at: " << features_paths[i];
ReadShard(corpus_paths[i], features_paths[i],
[&corpus](ByteArray input, FeatureVec features) {
corpus.push_back({std::move(input), std::move(features)});
});
}
return corpus;
}
BinaryInfo ReadBinaryInfo(std::string_view binary_name,
std::string_view binary_hash,
std::string_view workdir_path) {
WorkDir workdir(std::string(workdir_path), std::string(binary_name),
std::string(binary_hash), /*my_shard_index=*/0);
BinaryInfo ret;
ret.Read(workdir.BinaryInfoDirPath());
return ret;
}
AnalyzeCorporaResults AnalyzeCorpora(const BinaryInfo &binary_info,
const std::vector<CorpusRecord> &a,
const std::vector<CorpusRecord> &b) {
// `a_pcs` will contain all PCs covered by `a`.
absl::flat_hash_set<size_t> a_pcs;
absl::flat_hash_map<size_t, CorpusRecord> a_pc_to_corpus;
for (const auto &record : a) {
for (const auto &feature : record.features) {
if (!feature_domains::kPCs.Contains(feature)) continue;
auto pc = ConvertPCFeatureToPcIndex(feature);
a_pcs.insert(pc);
a_pc_to_corpus.insert({pc, std::move(record)});
}
}
// `b_only_pcs` will contain PCs covered by `b` but not by `a`.
// `b_unique_indices` are indices of inputs that have PCs from `b_only_pcs`.
// `b_shared_indices` are indices of all other inputs from `b`.
absl::flat_hash_set<size_t> b_only_pcs;
absl::flat_hash_set<size_t> b_pcs;
absl::flat_hash_map<size_t, CorpusRecord> b_pc_to_corpus;
std::vector<size_t> b_shared_indices, b_unique_indices;
for (size_t i = 0; i < b.size(); ++i) {
const auto &record = b[i];
bool has_b_only = false;
for (const auto &feature : record.features) {
if (!feature_domains::kPCs.Contains(feature)) continue;
auto pc = ConvertPCFeatureToPcIndex(feature);
b_pcs.insert(pc);
b_pc_to_corpus.insert({pc, std::move(record)});
if (a_pcs.contains(pc)) continue;
b_only_pcs.insert(pc);
has_b_only = true;
}
if (has_b_only)
b_unique_indices.push_back(i);
else
b_shared_indices.push_back(i);
}
absl::flat_hash_set<size_t> a_only_pcs;
for (const auto &record : a) {
for (const auto &feature : record.features) {
if (!feature_domains::kPCs.Contains(feature)) continue;
auto pc = ConvertPCFeatureToPcIndex(feature);
if (b_pcs.contains(pc)) continue;
a_only_pcs.insert(pc);
}
}
LOG(INFO) << VV(a.size()) << VV(b.size()) << VV(a_pcs.size())
<< VV(a_only_pcs.size()) << VV(b_only_pcs.size())
<< VV(b_shared_indices.size()) << VV(b_unique_indices.size());
// Sort PCs to put them in the canonical order, as in pc_table.
AnalyzeCorporaResults ret;
ret.a_pcs = std::vector<size_t>{a_pcs.begin(), a_pcs.end()};
ret.b_pcs = std::vector<size_t>{b_pcs.begin(), b_pcs.end()};
ret.a_only_pcs = std::vector<size_t>{a_only_pcs.begin(), a_only_pcs.end()};
ret.b_only_pcs = std::vector<size_t>{b_only_pcs.begin(), b_only_pcs.end()};
ret.a_pc_to_corpus_record = std::move(a_pc_to_corpus);
ret.b_pc_to_corpus_record = std::move(b_pc_to_corpus);
std::sort(ret.a_pcs.begin(), ret.a_pcs.end());
std::sort(ret.b_pcs.begin(), ret.b_pcs.end());
std::sort(ret.a_only_pcs.begin(), ret.a_only_pcs.end());
std::sort(ret.b_only_pcs.begin(), ret.b_only_pcs.end());
return ret;
}
} // namespace
CoverageResults GetCoverage(const std::vector<CorpusRecord> &corpus_records,
BinaryInfo binary_info) {
absl::flat_hash_set<size_t> pcs;
for (const auto &record : corpus_records) {
for (const auto &feature : record.features) {
if (!feature_domains::kPCs.Contains(feature)) continue;
auto pc = ConvertPCFeatureToPcIndex(feature);
pcs.insert(pc);
}
}
CoverageResults ret = {
/*pcs=*/{pcs.begin(), pcs.end()},
/*binary_info=*/std::move(binary_info),
};
// Sort PCs to put them in the canonical order, as in pc_table.
std::sort(ret.pcs.begin(), ret.pcs.end());
return ret;
}
CoverageResults GetCoverage(std::string_view binary_name,
std::string_view binary_hash,
std::string_view workdir) {
const std::vector<CorpusRecord> corpus_records =
ReadCorpora(binary_name, binary_hash, workdir);
BinaryInfo binary_info = ReadBinaryInfo(binary_name, binary_hash, workdir);
return GetCoverage(corpus_records, std::move(binary_info));
}
void DumpCoverageReport(const CoverageResults &coverage_results,
std::string_view coverage_report_path) {
LOG(INFO) << "Dump coverage to file: " << coverage_report_path;
const fuzztest::internal::PCTable &pc_table =
coverage_results.binary_info.pc_table;
const fuzztest::internal::SymbolTable &symbols =
coverage_results.binary_info.symbols;
fuzztest::internal::SymbolTable coverage_symbol_table;
for (const PCIndex pc : coverage_results.pcs) {
CHECK_LE(pc, symbols.size());
if (!pc_table[pc].has_flag(fuzztest::internal::PCInfo::kFuncEntry))
continue;
const SymbolTable::Entry entry = symbols.entry(pc);
coverage_symbol_table.AddEntry(entry.func, entry.file_line_col());
}
std::ostringstream symbol_table_stream;
coverage_symbol_table.WriteToLLVMSymbolizer(symbol_table_stream);
CHECK_OK(
RemoteFileSetContents(coverage_report_path, symbol_table_stream.str()));
}
AnalyzeCorporaResults AnalyzeCorpora(std::string_view binary_name,
std::string_view binary_hash,
std::string_view workdir_a,
std::string_view workdir_b) {
BinaryInfo binary_info_a =
ReadBinaryInfo(binary_name, binary_hash, workdir_a);
BinaryInfo binary_info_b =
ReadBinaryInfo(binary_name, binary_hash, workdir_b);
CHECK_EQ(binary_info_a.pc_table.size(), binary_info_b.pc_table.size());
CHECK_EQ(binary_info_a.symbols.size(), binary_info_b.symbols.size());
const std::vector<CorpusRecord> a =
ReadCorpora(binary_name, binary_hash, workdir_a);
const std::vector<CorpusRecord> b =
ReadCorpora(binary_name, binary_hash, workdir_b);
AnalyzeCorporaResults ret = AnalyzeCorpora(binary_info_a, a, b);
ret.binary_info = std::move(binary_info_a);
return ret;
}
void AnalyzeCorporaToLog(std::string_view binary_name,
std::string_view binary_hash,
std::string_view workdir_a,
std::string_view workdir_b) {
AnalyzeCorporaResults results =
AnalyzeCorpora(binary_name, binary_hash, workdir_a, workdir_b);
const auto &pc_table = results.binary_info.pc_table;
const auto &symbols = results.binary_info.symbols;
CoverageLogger coverage_logger(pc_table, symbols);
// TODO(kcc): use frontier_a to show the most interesting b-only PCs.
// TODO(kcc): these cause a CHECK-fail
// CoverageFrontier frontier_a(results.binary_info);
// frontier_a.Compute(a);
// First, print the newly covered functions (including partially covered).
LOG(INFO) << "B-only new functions:";
absl::flat_hash_set<std::string_view> b_only_new_functions;
for (const auto pc : results.b_only_pcs) {
if (!pc_table[pc].has_flag(PCInfo::kFuncEntry)) continue;
auto str = coverage_logger.ObserveAndDescribeIfNew(pc);
if (!str.empty()) LOG(INFO).NoPrefix() << str;
b_only_new_functions.insert(symbols.func(pc));
}
// Now, print newly covered edges in functions that were covered in `a`.
LOG(INFO) << "B-only new edges:";
for (const auto pc : results.b_only_pcs) {
if (b_only_new_functions.contains(symbols.func(pc))) continue;
auto str = coverage_logger.ObserveAndDescribeIfNew(pc);
if (!str.empty()) LOG(INFO).NoPrefix() << str;
}
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,72 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_ANALYZE_CORPORA_H
#define THIRD_PARTY_CENTIPEDE_ANALYZE_CORPORA_H
#include <cstddef>
#include <string_view>
#include <vector>
#include "absl/container/flat_hash_map.h"
#include "./centipede/binary_info.h"
#include "./centipede/corpus.h"
namespace fuzztest::internal {
// The results of comparing corpus `a` with corpus `b`.
struct AnalyzeCorporaResults {
std::vector<size_t> a_pcs;
std::vector<size_t> b_pcs;
std::vector<size_t> a_only_pcs;
std::vector<size_t> b_only_pcs;
absl::flat_hash_map<size_t, CorpusRecord> a_pc_to_corpus_record;
absl::flat_hash_map<size_t, CorpusRecord> b_pc_to_corpus_record;
BinaryInfo binary_info;
};
// The result of analyzing a single corpus.
struct CoverageResults {
std::vector<size_t> pcs;
BinaryInfo binary_info;
};
// Returns information on the corpus within `workdir`.
CoverageResults GetCoverage(const std::vector<CorpusRecord>& records,
BinaryInfo binary_info);
// Returns information on the corpus within `workdir`.
CoverageResults GetCoverage(std::string_view binary_name,
std::string_view binary_hash,
std::string_view workdir);
// Dumps `coverage_results` to `coverage_report_path` in the same format as read
// by `SymbolTable::ReadFromLLVMSymbolizer`.
void DumpCoverageReport(const CoverageResults& coverage_results,
std::string_view coverage_report_path);
// Compares the corpus within `workdir_a` with the corpus in `workdir_b`.
AnalyzeCorporaResults AnalyzeCorpora(std::string_view binary_name,
std::string_view binary_hash,
std::string_view workdir_a,
std::string_view workdir_b);
// Same as above but `LOG`s the results for human consumption.
void AnalyzeCorporaToLog(std::string_view binary_name,
std::string_view binary_hash,
std::string_view workdir_a,
std::string_view workdir_b);
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_ANALYZE_CORPORA_H

View File

@ -0,0 +1,124 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/analyze_corpora.h"
#include <cstdlib>
#include <filesystem>
#include <sstream>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "./centipede/binary_info.h"
#include "./centipede/environment.h"
#include "./centipede/symbol_table.h"
#include "./centipede/test_coverage_util.h"
#include "./common/remote_file.h"
#include "./common/test_util.h"
namespace fuzztest::internal {
namespace {
using ::testing::AllOf;
using ::testing::Contains;
using ::testing::IsSupersetOf;
using ::testing::Not;
// Returns path to test_fuzz_target.
static std::string GetTargetPath() {
return GetDataDependencyFilepath("centipede/testing/test_fuzz_target");
}
// TODO(ussuri): Implement.
TEST(AnalyzeCorpora, AnalyzeCorpora) { LOG(INFO) << "Unimplemented"; }
TEST(GetCoverage, SimpleCoverageResults) {
Environment env;
env.binary = GetTargetPath();
auto corpus_records = RunInputsAndCollectCorpusRecords(env, {"func1"});
EXPECT_EQ(corpus_records.size(), 1);
// Get pc_table and symbols.
bool uses_legacy_trace_pc_instrumentation = {};
BinaryInfo binary_info;
binary_info.InitializeFromSanCovBinary(
GetTargetPath(), GetObjDumpPath(), GetLLVMSymbolizerPath(),
GetTestTempDir(test_info_->name()).string());
const auto &pc_table = binary_info.pc_table;
EXPECT_FALSE(uses_legacy_trace_pc_instrumentation);
const SymbolTable &symbols = binary_info.symbols;
// pc_table and symbols should have the same size.
EXPECT_EQ(pc_table.size(), symbols.size());
CoverageResults res = GetCoverage(corpus_records, std::move(binary_info));
// Check that inputs cover LLVMFuzzerTestOneInput and SingleEdgeFunc, but not
// MultiEdgeFunc.
size_t llvm_fuzzer_test_one_input_num_edges = 0;
size_t single_edge_func_num_edges = 0;
size_t multi_edge_func_num_edges = 0;
for (size_t pc : res.pcs) {
size_t check_pc = pc;
EXPECT_EQ(check_pc, pc);
single_edge_func_num_edges +=
res.binary_info.symbols.func(pc) == "SingleEdgeFunc";
multi_edge_func_num_edges +=
res.binary_info.symbols.func(pc) == "MultiEdgeFunc";
llvm_fuzzer_test_one_input_num_edges +=
res.binary_info.symbols.func(pc) == "LLVMFuzzerTestOneInput";
}
EXPECT_GT(llvm_fuzzer_test_one_input_num_edges, 1);
EXPECT_EQ(single_edge_func_num_edges, 1);
EXPECT_EQ(multi_edge_func_num_edges, 0);
}
TEST(DumpCoverageReport, SimpleCoverageResults) {
Environment env;
env.binary = GetTargetPath();
auto corpus_records = RunInputsAndCollectCorpusRecords(env, {"func1"});
ASSERT_EQ(corpus_records.size(), 1);
const std::string test_tmpdir = GetTestTempDir(test_info_->name());
BinaryInfo binary_info;
binary_info.InitializeFromSanCovBinary(GetTargetPath(), GetObjDumpPath(),
GetLLVMSymbolizerPath(), test_tmpdir);
CoverageResults coverage_results =
GetCoverage(corpus_records, std::move(binary_info));
const std::string coverage_report_path =
std::filesystem::path{test_tmpdir} / "covered_symbol_table";
DumpCoverageReport(coverage_results, coverage_report_path);
std::string symbol_table_contents;
ASSERT_OK(RemoteFileGetContents(coverage_report_path, symbol_table_contents));
std::istringstream symbol_table_stream(symbol_table_contents);
SymbolTable symbols;
symbols.ReadFromLLVMSymbolizer(symbol_table_stream);
std::vector<std::string_view> functions;
for (size_t index = 0; index < symbols.size(); ++index) {
functions.push_back(symbols.func(index));
}
// Check that inputs cover LLVMFuzzerTestOneInput and SingleEdgeFunc, but not
// MultiEdgeFunc.
EXPECT_THAT(functions,
AllOf(IsSupersetOf({"LLVMFuzzerTestOneInput", "SingleEdgeFunc"}),
Not(Contains("MultiEdgeFunc"))));
}
} // namespace
} // namespace fuzztest::internal

View File

@ -0,0 +1,166 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/binary_info.h"
#include <cstdlib>
#include <filesystem> // NOLINT
#include <sstream>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_split.h"
#include "./centipede/command.h"
#include "./centipede/control_flow.h"
#include "./centipede/pc_info.h"
#include "./centipede/util.h"
#include "./common/remote_file.h"
namespace fuzztest::internal {
namespace {
constexpr std::string_view kSymbolTableFileName = "symbol-table";
constexpr std::string_view kPCTableFileName = "pc-table";
constexpr std::string_view kCfTableFileName = "cf-table";
} // namespace
void BinaryInfo::InitializeFromSanCovBinary(
std::string_view binary_path_with_args, std::string_view objdump_path,
std::string_view symbolizer_path, std::string_view tmp_dir_path) {
if (binary_path_with_args.empty()) {
// This usually happens in tests.
LOG(INFO) << __func__ << ": binary_path_with_args is empty";
return;
}
// Compute names for temp files.
const std::filesystem::path tmp_dir = tmp_dir_path;
CHECK(std::filesystem::exists(tmp_dir) &&
std::filesystem::is_directory(tmp_dir));
ScopedFile pc_table_path(tmp_dir_path, "pc_table_tmp");
ScopedFile cf_table_path(tmp_dir_path, "cf_table_tmp");
ScopedFile dso_table_path(tmp_dir_path, "dso_table_tmp");
ScopedFile log_path(tmp_dir_path, "binary_info_log_tmp");
LOG(INFO) << __func__ << ": tmp_dir: " << tmp_dir;
Command::Options cmd_options;
cmd_options.env_add = {absl::StrCat(
"CENTIPEDE_RUNNER_FLAGS=:dump_binary_info:arg1=", pc_table_path.path(),
":arg2=", cf_table_path.path(), ":arg3=", dso_table_path.path(), ":")};
cmd_options.stdout_file = std::string(log_path.path());
Command cmd{binary_path_with_args, std::move(cmd_options)};
int exit_code = cmd.Execute();
if (exit_code != EXIT_SUCCESS) {
LOG(INFO) << __func__ << ": exit_code: " << exit_code;
}
// Load PC Table.
pc_table = ReadPcTableFromFile(pc_table_path.path());
// Load CF Table.
if (std::filesystem::exists(cf_table_path.path()))
cf_table = ReadCfTable(cf_table_path.path());
// Load the DSO Table.
dso_table = ReadDsoTableFromFile(dso_table_path.path());
if (pc_table.empty()) {
CHECK(dso_table.empty());
// Fallback to GetPcTableFromBinaryWithTracePC().
LOG(WARNING)
<< "Failed to dump PC table directly from binary using linked-in "
"runner; see target execution logs above; falling back to legacy PC "
"table extraction using trace-pc and objdump";
pc_table = GetPcTableFromBinaryWithTracePC(
binary_path_with_args, objdump_path, pc_table_path.path());
if (pc_table.empty()) {
LOG(ERROR) << "Failed to extract PC table from binary using objdump; see "
"objdump execution logs above";
}
// For the legacy trace-pc instrumentation, set the dso_table
// to 1-element array consisting of the binary name
const std::vector<std::string> args =
absl::StrSplit(binary_path_with_args, absl::ByAnyChar{" \t\n"},
absl::SkipWhitespace{});
CHECK(!args.empty());
dso_table.push_back({args[0], pc_table.size()});
uses_legacy_trace_pc_instrumentation = true;
} else {
uses_legacy_trace_pc_instrumentation = false;
}
if (!uses_legacy_trace_pc_instrumentation) {
// The number of instrumented PCs in the DSO table should match pc_table.
size_t num_instrumened_pcs_in_all_dsos = 0;
for (const auto& dso : dso_table) {
num_instrumened_pcs_in_all_dsos += dso.num_instrumented_pcs;
}
CHECK_EQ(num_instrumened_pcs_in_all_dsos, pc_table.size());
}
// Load symbols, if there is a PC table.
if (!pc_table.empty()) {
ScopedFile sym_tmp1_path(tmp_dir_path, "symbols_tmp1");
ScopedFile sym_tmp2_path(tmp_dir_path, "symbols_tmp2");
symbols.GetSymbolsFromBinary(pc_table, dso_table, symbolizer_path,
tmp_dir_path);
}
}
void BinaryInfo::Read(std::string_view dir) {
std::string symbol_table_contents;
// TODO(b/295978603): move calculation of paths into WorkDir class.
CHECK_OK(RemoteFileGetContents(
(std::filesystem::path(dir) / kSymbolTableFileName).c_str(),
symbol_table_contents));
std::istringstream symbol_table_stream(symbol_table_contents);
symbols.ReadFromLLVMSymbolizer(symbol_table_stream);
std::string pc_table_contents;
CHECK_OK(RemoteFileGetContents(
(std::filesystem::path(dir) / kPCTableFileName).c_str(),
pc_table_contents));
std::istringstream pc_table_stream(pc_table_contents);
pc_table = ReadPcTable(pc_table_stream);
cf_table =
ReadCfTable((std::filesystem::path(dir) / kCfTableFileName).c_str());
}
void BinaryInfo::Write(std::string_view dir) {
std::ostringstream symbol_table_stream;
symbols.WriteToLLVMSymbolizer(symbol_table_stream);
// TODO(b/295978603): move calculation of paths into WorkDir class.
CHECK_OK(RemoteFileSetContents(
(std::filesystem::path(dir) / kSymbolTableFileName).c_str(),
symbol_table_stream.str()));
std::ostringstream pc_table_stream;
WritePcTable(pc_table, pc_table_stream);
CHECK_OK(RemoteFileSetContents(
(std::filesystem::path(dir) / kPCTableFileName).c_str(),
pc_table_stream.str()));
std::ostringstream cf_table_stream;
WriteCfTable(cf_table, cf_table_stream);
CHECK_OK(RemoteFileSetContents(
(std::filesystem::path(dir) / kCfTableFileName).c_str(),
cf_table_stream.str()));
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,59 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_BINARY_INFO_H_
#define THIRD_PARTY_CENTIPEDE_BINARY_INFO_H_
#include <string_view>
#include "./centipede/call_graph.h"
#include "./centipede/control_flow.h"
#include "./centipede/pc_info.h"
#include "./centipede/symbol_table.h"
namespace fuzztest::internal {
// Information about the binary being fuzzed. Created once at program startup
// and doesn't change (other than for lazily initialized fields).
struct BinaryInfo {
PCTable pc_table;
SymbolTable symbols;
CFTable cf_table;
DsoTable dso_table;
ControlFlowGraph control_flow_graph;
CallGraph call_graph;
bool uses_legacy_trace_pc_instrumentation = false;
// Initializes `pc_table`, `symbols`, `cf_table` and
// `uses_legacy_trace_pc_instrumentation` based on `binary_path_with_args`.
// * `binary_path_with_args` is the path to the instrumented binary,
// possibly with space-separated arguments.
// * `objdump_path` and `symbolizer_path` are paths to respective tools.
// * `tmp_dir_path` is a path to a temp dir, that must exist.
void InitializeFromSanCovBinary(std::string_view binary_path_with_args,
std::string_view objdump_path,
std::string_view symbolizer_path,
std::string_view tmp_dir_path);
// Serialize `this` within the given `dir`.
void Write(std::string_view dir);
// Initialize `this` with the serialized contents in `dir`. Assumes the same
// format as `Write`.
void Read(std::string_view dir);
};
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_BINARY_INFO_H_

View File

@ -0,0 +1,82 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/binary_info.h"
#include <sstream>
#include <string>
#include <utility>
#include "gtest/gtest.h"
#include "./centipede/control_flow.h"
#include "./centipede/pc_info.h"
#include "./centipede/symbol_table.h"
#include "./common/test_util.h"
namespace fuzztest::internal {
namespace {
TEST(BinaryInfoTest, SerializesAndDeserializesBinaryInfoSuccessfully) {
const std::string temp_dir = GetTestTempDir(test_info_->name());
const PCTable input_pcs = {{/*pc=*/0, /*flags=*/1}, {/*pc=*/2, /*flags=*/3}};
std::string input_symbols =
R"(FunctionOne
source/location/one.cc:1:0
FunctionTwo
source/location/two.cc:2:0
)";
const CFTable cf_table = {1, 2, 3, 0, 0, 2, 4, 0};
std::istringstream input_stream(input_symbols);
SymbolTable symbol_table;
symbol_table.ReadFromLLVMSymbolizer(input_stream);
BinaryInfo input;
input.pc_table = input_pcs;
input.symbols = std::move(symbol_table);
input.cf_table = cf_table;
input.Write(temp_dir);
BinaryInfo output;
output.Read(temp_dir);
EXPECT_EQ(input.pc_table, output.pc_table);
EXPECT_EQ(input.symbols, output.symbols);
EXPECT_EQ(input.cf_table, output.cf_table);
}
TEST(BinaryInfoTest, SerializesAndDeserializesEmptyBinaryInfoSuccessfully) {
const std::string temp_dir = GetTestTempDir(test_info_->name());
const PCTable input_pcs = {};
std::string input_symbols = "";
const CFTable cf_table = {};
std::istringstream input_stream(input_symbols);
SymbolTable symbol_table;
symbol_table.ReadFromLLVMSymbolizer(input_stream);
BinaryInfo input;
input.pc_table = input_pcs;
input.symbols = std::move(symbol_table);
input.cf_table = cf_table;
input.Write(temp_dir);
BinaryInfo output;
output.Read(temp_dir);
EXPECT_EQ(input.pc_table, output.pc_table);
EXPECT_EQ(input.symbols, output.symbols);
EXPECT_EQ(input.cf_table, output.cf_table);
}
} // namespace
} // namespace fuzztest::internal

View File

@ -0,0 +1,150 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdint>
#include <cstdlib>
#include <filesystem> // NOLINT
#include <string>
#include "absl/base/nullability.h"
#include "absl/flags/flag.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/status/status.h"
#include "absl/strings/str_format.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
#include "./centipede/config_init.h"
#include "./centipede/rusage_profiler.h"
#include "./common/blob_file.h"
#include "./common/defs.h"
#include "./common/logging.h"
#include "./common/remote_file.h"
ABSL_FLAG(std::string, in, "", "Input path");
ABSL_FLAG(std::string, out, "", "Output path");
ABSL_FLAG(std::string, out_format, "riegeli", "--out format (legacy|riegeli)");
namespace fuzztest::internal {
namespace {
// TODO(ussuri): Pare down excessive rusage profiling after breaking in.
class StatsLogger {
public:
StatsLogger(absl::Duration log_every, RUsageProfiler& rprof)
: log_every_(log_every),
next_log_at_(start_ + log_every),
rprof_(rprof) {}
void UpdateStats(ByteSpan blob) {
++num_blobs_;
num_bytes_ += blob.size();
}
void Log() {
RPROF_THIS_FUNCTION_BY_EXISTING_RPROF(rprof_);
const auto secs = absl::ToDoubleSeconds(absl::Now() - start_);
const std::string stats = absl::StrFormat(
"blobs: %9lld | blobs/s: %5.0f | bytes: %12lld | bytes/s: %8.0f",
num_blobs_, num_blobs_ / secs, num_bytes_, num_bytes_ / secs);
if (ABSL_VLOG_IS_ON(3)) {
const RUsageProfiler::Snapshot& snapshot = RPROF_SNAPSHOT(stats);
LOG(INFO) << stats << " | " << snapshot.memory.ShortStr();
} else {
LOG(INFO) << stats;
}
}
void MaybeLogIfTime() {
const auto now = absl::Now();
if (now >= next_log_at_) {
Log();
next_log_at_ += log_every_;
if (next_log_at_ < now) next_log_at_ = now + log_every_;
}
}
private:
int64_t num_blobs_ = 0;
int64_t num_bytes_ = 0;
const absl::Time start_ = absl::Now();
const absl::Duration log_every_;
absl::Time next_log_at_;
RUsageProfiler& rprof_;
};
void Convert( //
const std::string& in, //
const std::string& out, const std::string& out_format) {
RPROF_THIS_FUNCTION_WITH_REPORT(/*enable=*/ABSL_VLOG_IS_ON(1));
LOG(INFO) << "Converting:\n" << VV(in) << "\n" << VV(out) << VV(out_format);
const bool out_is_riegeli = out_format == "riegeli";
// Verify and prepare source and destination.
CHECK(RemotePathExists(in)) << VV(in);
CHECK_OK(RemoteMkdir(std::filesystem::path{out}.parent_path().c_str()));
// Open blob file reader and writer.
RPROF_START_TIMELAPSE( //
absl::Seconds(20), /*also_log=*/ABSL_VLOG_IS_ON(3), "Opening --in");
const auto in_reader = DefaultBlobFileReaderFactory();
CHECK_OK(in_reader->Open(in)) << VV(in);
RPROF_STOP_TIMELAPSE();
RPROF_SNAPSHOT_AND_LOG("Opened --in; opening --out");
const auto out_writer = DefaultBlobFileWriterFactory(out_is_riegeli);
CHECK_OK(out_writer->Open(out, "w")) << VV(out);
RPROF_SNAPSHOT_AND_LOG("Opened --out");
// Read and write blobs one-by-one.
ByteSpan blob;
absl::Status read_status = absl::OkStatus();
StatsLogger stats_logger{
absl::Seconds(ABSL_VLOG_IS_ON(1) ? 20 : 60),
FUNCTION_LEVEL_RPROF_NAME,
};
while ((read_status = in_reader->Read(blob)).ok()) {
CHECK_OK(out_writer->Write(blob));
stats_logger.UpdateStats(blob);
stats_logger.MaybeLogIfTime();
}
stats_logger.Log();
CHECK(read_status.ok() || absl::IsOutOfRange(read_status)) << VV(read_status);
CHECK_OK(out_writer->Close()) << VV(out);
}
} // namespace
} // namespace fuzztest::internal
int main(int argc, char** absl_nonnull argv) {
(void)fuzztest::internal::InitRuntime(argc, argv);
const std::string in = absl::GetFlag(FLAGS_in);
QCHECK(!in.empty());
const std::string out = absl::GetFlag(FLAGS_out);
QCHECK(!out.empty());
const std::string out_format = absl::GetFlag(FLAGS_out_format);
QCHECK(out_format == "legacy" || out_format == "riegeli") << VV(out_format);
fuzztest::internal::Convert(in, out, out_format);
return EXIT_SUCCESS;
}

View File

@ -0,0 +1,351 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/byte_array_mutator.h"
#include <algorithm>
#include <array>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <utility>
#include <vector>
#include "./centipede/execution_metadata.h"
#include "./centipede/knobs.h"
#include "./centipede/mutation_input.h"
#include "./common/defs.h"
namespace fuzztest::internal {
//============= CmpDictionary ===============
bool CmpDictionary::SetFromMetadata(const ExecutionMetadata &metadata) {
dictionary_.clear();
if (!metadata.ForEachCmpEntry([&](ByteSpan a, ByteSpan b) {
auto size = a.size();
if (size > DictEntry::kMaxEntrySize) return;
if (size < kMinEntrySize) return;
// TODO(kcc): disregard boring CMP pairs, such as e.g. `1 CMP 0`.
dictionary_.emplace_back(a, b);
dictionary_.emplace_back(b, a);
}))
return false;
std::sort(dictionary_.begin(), dictionary_.end());
return true;
}
void CmpDictionary::SuggestReplacement(
ByteSpan bytes, std::vector<ByteSpan> &suggestions) const {
if (!suggestions.capacity()) return;
suggestions.clear();
if (bytes.size() < kMinEntrySize) return;
// Use binary search to find the first entry that starts with the
// same kMinEntrySize bytes as `bytes`.
// This is not supper efficient.
// We need to see the real usage before optimizing.
// TODO(kcc): investigate using absl/container/btree_map.h instead.
DictEntry prefix({bytes.begin(), kMinEntrySize});
auto iter = std::lower_bound(
dictionary_.begin(), dictionary_.end(), Pair{prefix, prefix},
[](const Pair &a, const Pair &b) { return a.first < b.first; });
// Iterate from the first entry that has the same first bytes as `bytes`
// to the last such entry.
for (; iter != dictionary_.end(); ++iter) {
const auto &a = iter->first;
const auto &b = iter->second;
// Check if `suggestions` is out of capacity.
if (suggestions.size() == suggestions.capacity()) break;
// Check if the first kMinEntrySize bytes are still the same.
if (!std::equal(bytes.begin(), bytes.begin() + kMinEntrySize, a.begin()))
break;
// Check if we have enough bytes to compare with `a`.
if (bytes.size() < a.size()) continue;
// If all bytes are the same as `a`, suggest `b`.
if (std::equal(a.begin(), a.end(), bytes.begin()))
suggestions.emplace_back(b.begin(), b.size());
}
}
//============= ByteArrayMutator ===============
size_t ByteArrayMutator::RoundUpToAdd(size_t curr_size, size_t to_add) {
if (curr_size >= max_len_) return 0;
const size_t remainder = (curr_size + to_add) % size_alignment_;
if (remainder != 0) {
to_add = to_add + size_alignment_ - remainder;
}
if (curr_size + to_add > max_len_) return max_len_ - curr_size;
return to_add;
}
size_t ByteArrayMutator::RoundDownToRemove(size_t curr_size, size_t to_remove) {
if (curr_size <= size_alignment_) return 0;
if (to_remove >= curr_size) return curr_size - size_alignment_;
size_t result_size = curr_size - to_remove;
result_size -= (result_size % size_alignment_);
to_remove = curr_size - result_size;
if (result_size == 0) {
to_remove -= size_alignment_;
}
if (result_size > max_len_) {
return curr_size - max_len_;
}
return to_remove;
}
static const KnobId knob_mutate[3] = {Knobs::NewId("mutate_same_size"),
Knobs::NewId("mutate_decrease_size"),
Knobs::NewId("mutate_increase_size")};
bool ByteArrayMutator::Mutate(ByteArray &data) {
// Individual mutator may fail to mutate and return false.
// So we iterate a few times and expect one of the mutations will succeed.
for (int iter = 0; iter < 15; iter++) {
Fn mutator = nullptr;
if (data.size() > max_len_) {
mutator = &ByteArrayMutator::MutateDecreaseSize;
} else if (data.size() == max_len_) {
mutator = knobs_.Choose<Fn>({knob_mutate[0], knob_mutate[1]},
{&ByteArrayMutator::MutateSameSize,
&ByteArrayMutator::MutateDecreaseSize},
rng_());
} else {
mutator = knobs_.Choose<Fn>(knob_mutate,
{&ByteArrayMutator::MutateSameSize,
&ByteArrayMutator::MutateIncreaseSize,
&ByteArrayMutator::MutateDecreaseSize},
rng_());
}
if ((this->*mutator)(data)) return true;
}
return false;
}
static const KnobId knob_mutate_same_size[5] = {
Knobs::NewId("mutate_same_size_0"), Knobs::NewId("mutate_same_size_1"),
Knobs::NewId("mutate_same_size_2"), Knobs::NewId("mutate_same_size_3"),
Knobs::NewId("mutate_same_size_4"),
};
bool ByteArrayMutator::MutateSameSize(ByteArray &data) {
auto mutator = knobs_.Choose<Fn>(
knob_mutate_same_size,
{&ByteArrayMutator::FlipBit, &ByteArrayMutator::SwapBytes,
&ByteArrayMutator::ChangeByte,
&ByteArrayMutator::OverwriteFromDictionary,
&ByteArrayMutator::OverwriteFromCmpDictionary},
rng_());
return (this->*mutator)(data);
}
static const KnobId knob_mutate_increase_size[2] = {
Knobs::NewId("mutate_increase_size_0"),
Knobs::NewId("mutate_increase_size_1"),
};
bool ByteArrayMutator::MutateIncreaseSize(ByteArray &data) {
auto mutator = knobs_.Choose<Fn>(
knob_mutate_increase_size,
{&ByteArrayMutator::InsertBytes, &ByteArrayMutator::InsertFromDictionary},
rng_());
return (this->*mutator)(data);
}
bool ByteArrayMutator::MutateDecreaseSize(ByteArray &data) {
auto mutator = &ByteArrayMutator::EraseBytes;
return (this->*mutator)(data);
}
bool ByteArrayMutator::FlipBit(ByteArray &data) {
uintptr_t random = rng_();
size_t bit_idx = random % (data.size() * 8);
size_t byte_idx = bit_idx / 8;
bit_idx %= 8;
uint8_t mask = 1 << bit_idx;
data[byte_idx] ^= mask;
return true;
}
bool ByteArrayMutator::SwapBytes(ByteArray &data) {
size_t idx1 = rng_() % data.size();
size_t idx2 = rng_() % data.size();
std::swap(data[idx1], data[idx2]);
return true;
}
bool ByteArrayMutator::ChangeByte(ByteArray &data) {
size_t idx = rng_() % data.size();
data[idx] = rng_();
return true;
}
bool ByteArrayMutator::InsertBytes(ByteArray &data) {
// Don't insert too many bytes at once.
const size_t kMaxInsertSize = 20;
size_t num_new_bytes = rng_() % kMaxInsertSize + 1;
num_new_bytes = RoundUpToAdd(data.size(), num_new_bytes);
if (num_new_bytes > kMaxInsertSize) {
num_new_bytes -= size_alignment_;
}
// There are N+1 positions to insert something into an array of N.
size_t pos = rng_() % (data.size() + 1);
// Fixed array to avoid memory allocation.
std::array<uint8_t, kMaxInsertSize> new_bytes;
for (size_t i = 0; i < num_new_bytes; i++) new_bytes[i] = rng_();
data.insert(data.begin() + pos, new_bytes.begin(),
new_bytes.begin() + num_new_bytes);
return true;
}
bool ByteArrayMutator::EraseBytes(ByteArray &data) {
if (data.size() <= size_alignment_) return false;
// Ok to erase a sizable chunk since small inputs are good (if they
// produce good features).
size_t num_bytes_to_erase = rng_() % (data.size() / 2) + 1;
num_bytes_to_erase = RoundDownToRemove(data.size(), num_bytes_to_erase);
if (num_bytes_to_erase == 0) return false;
size_t pos = rng_() % (data.size() - num_bytes_to_erase + 1);
data.erase(data.begin() + pos, data.begin() + pos + num_bytes_to_erase);
return true;
}
void ByteArrayMutator::AddToDictionary(
const std::vector<ByteArray> &dict_entries) {
for (const ByteArray &entry : dict_entries) {
if (entry.size() > DictEntry::kMaxEntrySize) continue;
dictionary_.emplace_back(entry);
}
}
bool ByteArrayMutator::OverwriteFromDictionary(ByteArray &data) {
if (dictionary_.empty()) return false;
size_t dict_entry_idx = rng_() % dictionary_.size();
const auto &dic_entry = dictionary_[dict_entry_idx];
if (dic_entry.size() > data.size()) return false;
size_t overwrite_pos = rng_() % (data.size() - dic_entry.size() + 1);
std::copy(dic_entry.begin(), dic_entry.end(), data.begin() + overwrite_pos);
return true;
}
bool ByteArrayMutator::OverwriteFromCmpDictionary(ByteArray &data) {
if (cmp_dictionary_.size() == 0) return false;
if (data.size() < CmpDictionary::kMinEntrySize) return false;
// Start with a random position in `data`, search though the entire `data`
// until some suggestion is found.
size_t search_start_idx = rng_() % data.size();
constexpr size_t kMaxNumSuggestions = 100;
std::vector<ByteSpan> suggestions;
suggestions.reserve(kMaxNumSuggestions);
for (size_t i = 0; i < data.size(); i++) {
size_t idx = (search_start_idx + i) % data.size();
if (idx + CmpDictionary::kMinEntrySize >= data.size()) continue;
ByteSpan tail{&data[idx], data.size() - idx};
cmp_dictionary_.SuggestReplacement(tail, suggestions);
if (suggestions.empty()) continue;
auto suggestion = suggestions[rng_() % suggestions.size()];
if (idx + suggestion.size() <= data.size()) {
std::copy(suggestion.begin(), suggestion.end(), data.begin() + idx);
return true;
}
}
return false;
}
bool ByteArrayMutator::InsertFromDictionary(ByteArray &data) {
if (dictionary_.empty()) return false;
size_t dict_entry_idx = rng_() % dictionary_.size();
const auto &dict_entry = dictionary_[dict_entry_idx];
// There are N+1 positions to insert something into an array of N.
size_t pos = rng_() % (data.size() + 1);
data.insert(data.begin() + pos, dict_entry.begin(), dict_entry.end());
return true;
}
void ByteArrayMutator::CrossOverInsert(ByteArray &data,
const ByteArray &other) {
if ((data.size() % size_alignment_) + other.size() < size_alignment_) return;
// insert other[first:first+size] at data[pos]
size_t size = 1 + rng_() % other.size();
size = RoundUpToAdd(data.size(), size);
if (size > other.size()) {
size -= size_alignment_;
}
size_t first = rng_() % (other.size() - size + 1);
size_t pos = rng_() % (data.size() + 1);
data.insert(data.begin() + pos, other.begin() + first,
other.begin() + first + size);
}
void ByteArrayMutator::CrossOverOverwrite(ByteArray &data,
const ByteArray &other) {
// Overwrite data[pos:pos+size] with other[first:first+size].
// Overwrite no more than half of data.
size_t max_size = std::max(1UL, data.size() / 2);
size_t first = rng_() % other.size();
max_size = std::min(max_size, other.size() - first);
size_t size = 1 + rng_() % max_size;
size_t max_pos = data.size() - size;
size_t pos = rng_() % (max_pos + 1);
std::copy(other.begin() + first, other.begin() + first + size,
data.begin() + pos);
}
const KnobId knob_cross_over_insert_or_overwrite =
Knobs::NewId("cross_over_insert_or_overwrite");
void ByteArrayMutator::CrossOver(ByteArray &data, const ByteArray &other) {
if (data.size() >= max_len_) {
CrossOverOverwrite(data, other);
} else {
if (knobs_.GenerateBool(knob_cross_over_insert_or_overwrite, rng_())) {
CrossOverInsert(data, other);
} else {
CrossOverOverwrite(data, other);
}
}
}
// Controls how much crossover is used during mutations.
// https://en.wikipedia.org/wiki/Crossover_(genetic_algorithm)
// TODO(kcc): add tests with different values of knobs.
const KnobId knob_mutate_or_crossover = Knobs::NewId("mutate_or_crossover");
std::vector<ByteArray> ByteArrayMutator::MutateMany(
const std::vector<MutationInputRef> &inputs, size_t num_mutants) {
if (inputs.empty()) abort();
// TODO(xinhaoyuan): Consider metadata in other inputs instead of always the
// first one.
SetMetadata(inputs[0].metadata != nullptr ? *inputs[0].metadata
: ExecutionMetadata());
size_t num_inputs = inputs.size();
std::vector<ByteArray> mutants;
mutants.reserve(num_mutants);
for (size_t i = 0; i < num_mutants; ++i) {
auto mutant = inputs[rng_() % num_inputs].data;
if (mutant.size() <= max_len_ &&
knobs_.GenerateBool(knob_mutate_or_crossover, rng_())) {
// Do crossover only if the mutant is not over the max_len_.
// Perform crossover with some other input. It may be the same input.
const auto &other_input = inputs[rng_() % num_inputs].data;
CrossOver(mutant, other_input);
} else {
// Perform mutation.
Mutate(mutant);
}
mutants.push_back(std::move(mutant));
}
return mutants;
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,255 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_BYTE_ARRAY_MUTATOR_H_
#define THIRD_PARTY_CENTIPEDE_BYTE_ARRAY_MUTATOR_H_
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <limits>
#include <utility>
#include <vector>
#include "absl/base/nullability.h"
#include "./centipede/execution_metadata.h"
#include "./centipede/knobs.h"
#include "./centipede/mutation_input.h"
#include "./common/defs.h"
namespace fuzztest::internal {
// A simple class representing an array of up to kMaxEntrySize bytes.
class DictEntry {
public:
static constexpr uint8_t kMaxEntrySize = 16;
explicit DictEntry(ByteSpan bytes)
: bytes_{}, // initialize bytes_ to all zeros
size_(bytes.size()) {
if (size_ > kMaxEntrySize) __builtin_trap();
memcpy(bytes_, bytes.data(), bytes.size());
}
const uint8_t *absl_nonnull begin() const { return bytes_; }
const uint8_t *end() const { return bytes_ + size_; }
size_t size() const { return size_; }
bool operator<(const DictEntry &other) const {
return memcmp(this, &other, sizeof(*this)) < 0;
}
private:
// bytes_ must go first so that operator < is lexicographic.
uint8_t bytes_[kMaxEntrySize];
uint8_t size_; // between kMinEntrySize and kMaxEntrySize.
};
// Dictionary of CMP args.
// Maintains an easy-to-query set of pairs {A,B}, such that
// an instruction `A CMP B` has been observed.
class CmpDictionary {
public:
static constexpr size_t kMinEntrySize = 2; // 1-byte entries won't be added.
CmpDictionary() = default;
// Sets the dictionary from execution `metadata`.
//
// Returns false on bad metadata, true otherwise.
bool SetFromMetadata(const ExecutionMetadata &metadata);
// Clears `suggestions` on entry.
// For every observed `A CMP B` such that `A` is a prefix of `bytes`,
// adds `B` to `suggestions`.
// `suggestions`, is filled up to capacity(), but not more.
void SuggestReplacement(ByteSpan bytes,
std::vector<ByteSpan> &suggestions) const;
// Returns the number of dictionary entries.
size_t size() const { return dictionary_.size(); }
private:
using Pair = std::pair<DictEntry, DictEntry>;
std::vector<Pair> dictionary_;
};
// This class allows to mutate a ByteArray in different ways.
// All mutations expect and guarantee that `data` remains non-empty
// since there is only one possible empty input and it's uninteresting.
//
// This class is thread-compatible.
// Typical usage is to have one such object per thread.
class ByteArrayMutator {
public:
// CTOR. Initializes the internal RNG with `seed` (`seed` != 0).
// Keeps a const reference to `knobs` throughout the lifetime.
ByteArrayMutator(const Knobs &knobs, uintptr_t seed)
: rng_(seed), knobs_(knobs) {
if (seed == 0) __builtin_trap(); // We don't include logging.h here.
}
// Adds `dict_entries` to an internal dictionary.
void AddToDictionary(const std::vector<ByteArray> &dict_entries);
// Populates the internal CmpDictionary using execution `metadata`.
// Returns false on failure, true otherwise.
bool SetMetadata(const ExecutionMetadata &metadata) {
return cmp_dictionary_.SetFromMetadata(metadata);
}
// Takes non-empty `inputs` and produces `num_mutants` mutants.
std::vector<ByteArray> MutateMany(const std::vector<MutationInputRef> &inputs,
size_t num_mutants);
using CrossOverFn = void (ByteArrayMutator::*)(ByteArray &,
const ByteArray &);
// Mutates `data` by inserting a random part from `other`.
void CrossOverInsert(ByteArray &data, const ByteArray &other);
// Mutates `data` by overwriting some of it with a random part of `other`.
void CrossOverOverwrite(ByteArray &data, const ByteArray &other);
// Applies one of {CrossOverOverwrite, CrossOverInsert}.
void CrossOver(ByteArray &data, const ByteArray &other);
// Type for a Mutator member-function.
// Every mutator function takes a ByteArray& as an input, mutates it in place
// and returns true if mutation took place. In some cases mutation may fail
// to happen, e.g. if EraseBytes() is called on a 1-byte input.
// Fn is test-only public.
using Fn = bool (ByteArrayMutator::*)(ByteArray &);
// All public functions below are mutators.
// They return true iff a mutation took place.
// Applies some random mutation to data.
bool Mutate(ByteArray &data);
// Applies some random mutation that doesn't change size.
bool MutateSameSize(ByteArray &data);
// Applies some random mutation that decreases size.
bool MutateDecreaseSize(ByteArray &data);
// Applies some random mutation that increases size.
bool MutateIncreaseSize(ByteArray &data);
// Flips a random bit.
bool FlipBit(ByteArray &data);
// Swaps two bytes.
bool SwapBytes(ByteArray &data);
// Changes a random byte to a random value.
bool ChangeByte(ByteArray &data);
// Overwrites a random part of `data` with a random dictionary entry.
bool OverwriteFromDictionary(ByteArray &data);
// Overwrites a random part of `data` with an entry suggested by the internal
// CmpDictionary.
bool OverwriteFromCmpDictionary(ByteArray &data);
// Inserts random bytes.
bool InsertBytes(ByteArray &data);
// Inserts a random dictionary entry at random position.
bool InsertFromDictionary(ByteArray &data);
// Erases random bytes.
bool EraseBytes(ByteArray &data);
// Set size alignment for mutants with modified sizes. Some mutators do not
// change input size, but mutators that insert or erase bytes will produce
// mutants with aligned sizes (if possible).
//
// Returns true if new size alignment was accepted. Returns false if max
// length is not a multiple of the specified size alignment.
bool set_size_alignment(size_t size_alignment) {
if ((max_len_ != std::numeric_limits<size_t>::max()) &&
(max_len_ % size_alignment != 0)) {
return false;
}
size_alignment_ = size_alignment;
return true;
}
// Set max length in bytes for mutants with modified sizes.
//
// Returns true if new max length was accepted. Returns false if specified max
// length is not a multiple of size alignment.
bool set_max_len(size_t max_len) {
if ((max_len != std::numeric_limits<size_t>::max()) &&
(max_len % size_alignment_ != 0)) {
return false;
}
max_len_ = max_len;
return true;
}
private:
FRIEND_TEST(ByteArrayMutator, RoundUpToAddCorrectly);
FRIEND_TEST(ByteArrayMutator, RoundDownToRemoveCorrectly);
// Given a current size and a number of bytes to add, returns the number of
// bytes that should be added for the resulting size to be properly aligned.
//
// If the original to_add would result in an unaligned input size, we round up
// to the next larger aligned size.
//
// This function respects `max_len_` and will return 0 if curr_size is already
// greater than or equal to `max_len_`.
size_t RoundUpToAdd(size_t curr_size, size_t to_add);
// Given a current size and a number of bytes to remove, returns the number of
// bytes that should be removed for the resulting size to be property aligned.
//
// If the original to_remove would result in an unaligned input size, we
// round down to the next smaller aligned size.
//
// However, we never return a number of bytes to remove that would result in a
// 0 size. In this case, the resulting size will be the smaller of
// curr_size and size_alignment_.
//
// This function respects `max_len_` and may return a larger number necessary
// to get the mutant's size to below `max_len_`.
size_t RoundDownToRemove(size_t curr_size, size_t to_remove);
// Size alignment in bytes to generate mutants.
//
// For example, if size_alignment_ is 1, generated mutants can have any
// number of bytes. If size_alignment_ is 4, generated mutants will have sizes
// that are 4-byte aligned.
size_t size_alignment_ = 1;
// Max length of a generated mutant in bytes.
size_t max_len_ = std::numeric_limits<size_t>::max();
Rng rng_;
const Knobs &knobs_;
std::vector<DictEntry> dictionary_;
CmpDictionary cmp_dictionary_;
};
// Controls how much crossover is used during mutations.
// https://en.wikipedia.org/wiki/Crossover_(genetic_algorithm)
// TODO(kcc): add tests with different values of knobs.
extern const KnobId knob_mutate_or_crossover;
// Controls how much crossver inserts data from the other input instead of
// overwriting.
extern const KnobId knob_cross_over_insert_or_overwrite;
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_BYTE_ARRAY_MUTATOR_H_

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,70 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/call_graph.h"
#include <cstddef>
#include <cstdint>
#include <vector>
#include "absl/log/check.h"
#include "./centipede/control_flow.h"
#include "./centipede/pc_info.h"
namespace fuzztest::internal {
void CallGraph::InitializeCallGraph(const CFTable &cf_table,
const PCTable &pc_table) {
// Find all function entries.
for (auto pc_info : pc_table) {
if (pc_info.has_flag(PCInfo::kFuncEntry))
function_entries_.insert(pc_info.pc);
}
uintptr_t current_function_entry = 0;
for (size_t j = 0; j < cf_table.size();) {
std::vector<uintptr_t> current_callees;
auto current_pc = cf_table[j];
++j;
basic_blocks_.insert(current_pc);
if (IsFunctionEntry(current_pc)) current_function_entry = current_pc;
// Iterate over successors.
while (cf_table[j]) {
++j;
}
++j; // Step over the delimeter.
// Iterate over callees.
while (cf_table[j]) {
current_callees.push_back(cf_table[j]);
++j;
}
++j; // Step over the delimeter.
CHECK_LE(j, cf_table.size());
if (current_callees.empty()) continue;
basic_block_callees_[current_pc] = current_callees;
// Append collected callees to the call graph.
call_graph_[current_function_entry].insert(
call_graph_[current_function_entry].end(), current_callees.begin(),
current_callees.end());
}
// This should stay empty.
CHECK(empty_.empty());
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,72 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_CALL_GRAPH_H_
#define THIRD_PARTY_CENTIPEDE_CALL_GRAPH_H_
#include <cstdint>
#include <vector>
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/log/check.h"
#include "./centipede/control_flow.h"
#include "./centipede/pc_info.h"
#include "./common/logging.h"
namespace fuzztest::internal {
class CallGraph {
public:
// Reads in the CfTable from __sancov_cfs section. On error it crashes, if the
// section is not available, the hash maps will be empty.
void InitializeCallGraph(const CFTable& cf_table, const PCTable& pc_table);
const std::vector<uintptr_t>& GetFunctionCallees(uintptr_t pc) const {
CHECK(IsFunctionEntry(pc)) << VV(pc) << " is not a function entry.";
const auto it = call_graph_.find(pc);
if (it == call_graph_.cend()) return empty_;
return it->second;
}
const std::vector<uintptr_t>& GetBasicBlockCallees(uintptr_t pc) const {
CHECK(basic_blocks_.contains(pc)) << VV(pc) << " is not a basic block.";
const auto it = basic_block_callees_.find(pc);
if (it == basic_block_callees_.cend()) return empty_;
return it->second;
}
const absl::flat_hash_set<uintptr_t>& GetFunctionEntries() const {
return function_entries_;
}
bool IsFunctionEntry(uintptr_t pc) const {
return function_entries_.contains(pc);
}
private:
// call_graph_: the key is function entry PC and value is all the
// callees of that function. It keep only non-zero vectors in a map. Meaning
// that if a function does not have any callee, it won't be in this map.
absl::flat_hash_map<uintptr_t, std::vector<uintptr_t>> call_graph_;
// bb_callees_: the key is a basic block PC and value is all callees in
// that basic block. It keep only non-zero vectors in a map. Meaning that if a
// basic_block does not have any callee, it won't be in this map.
absl::flat_hash_map<uintptr_t, std::vector<uintptr_t>> basic_block_callees_;
absl::flat_hash_set<uintptr_t> function_entries_;
absl::flat_hash_set<uintptr_t> basic_blocks_;
std::vector<uintptr_t> empty_;
};
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_CALL_GRAPH_H_

View File

@ -0,0 +1,128 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/call_graph.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/container/flat_hash_set.h"
#include "./centipede/control_flow.h"
#include "./centipede/pc_info.h"
#include "./common/logging.h"
namespace fuzztest::internal {
namespace {
using ::testing::Contains;
// Mock CFTable for the cfg of function 1: pcs in parentheses are callees.
// There are there more CFTables for functions 6, 7, 8.
// Function 99 has no CFTable.
// 1
// / \
// / \
// 2 (99) 3 (6, -1, 8)
// \ /
// \ /
// 4 (7)
static const CFTable g_cf_table = {
1, 2, 3, 0, 0, // PC 1 has no callee.
2, 4, 0, 99, 0, // PC 2 calls 99.
3, 4, 0, 6, -1, 8, 0, // PC 3 calls 6, 8, and has one indirect call.
4, 0, 7, 0, // PC 4 calls 7.
5, 0, 0, // PC 5 is not in pc_table.
6, 0, 0, // PC 6 has no callees.
7, 0, 0, // PC 7 has no callees.
8, 0, 7, 0, // PC 8 calls 7.
};
// Mock PCTable for the above cfg.
static const PCTable g_pc_table = {
{1, PCInfo::kFuncEntry},
{2, 0},
{3, 0},
{4, 0},
{6, PCInfo::kFuncEntry},
{7, PCInfo::kFuncEntry},
{8, PCInfo::kFuncEntry},
};
TEST(CallGraphDeathTest, CgNoneExistentPc) {
CallGraph call_graph;
call_graph.InitializeCallGraph(g_cf_table, g_pc_table);
// Check with a non-existent PC to make map::at fail.
EXPECT_DEATH(call_graph.GetFunctionCallees(666), "");
EXPECT_DEATH(call_graph.GetBasicBlockCallees(666), "");
}
TEST(CallGraph, BuildCgFromCfTable) {
CallGraph call_graph;
call_graph.InitializeCallGraph(g_cf_table, g_pc_table);
absl::flat_hash_set<uintptr_t> instrumented_pcs;
for (auto &pc_info : g_pc_table) {
instrumented_pcs.insert(pc_info.pc);
}
// Check callees.
for (size_t i = 0; i < g_pc_table.size(); ++i) {
uintptr_t pc = g_pc_table[i].pc;
if (g_pc_table[i].has_flag(PCInfo::kFuncEntry))
EXPECT_TRUE(call_graph.IsFunctionEntry(pc));
else
EXPECT_FALSE(call_graph.IsFunctionEntry(pc));
SCOPED_TRACE(testing::Message() << VV(pc));
if (pc == 1) {
EXPECT_THAT(call_graph.GetFunctionCallees(pc).size(), 5);
EXPECT_THAT(call_graph.GetBasicBlockCallees(pc).size(), 0);
} else if (pc == 2) {
EXPECT_THAT(call_graph.GetBasicBlockCallees(pc).size(), 1);
} else if (pc == 3) {
auto callees = call_graph.GetBasicBlockCallees(pc);
EXPECT_THAT(callees.size(), 3);
for (auto &callee_pc : callees) {
if (callee_pc == -1ULL || !instrumented_pcs.contains(callee_pc))
continue; // Indirect call or library function call.
SCOPED_TRACE(testing::Message() << VV(callee_pc));
EXPECT_TRUE(call_graph.IsFunctionEntry(callee_pc));
}
EXPECT_THAT(callees, Contains(6));
EXPECT_THAT(callees, Contains(8));
// Check the number of indirect calls.
EXPECT_THAT(std::count(callees.begin(), callees.end(), -1ULL), 1);
} else if (pc == 4) {
EXPECT_THAT(call_graph.GetBasicBlockCallees(pc).size(), 1);
} else if (pc == 5) {
EXPECT_THAT(call_graph.GetFunctionCallees(pc).size(), 0);
} else if (pc == 6 || pc == 7) {
EXPECT_THAT(call_graph.GetFunctionCallees(pc).size(), 0);
EXPECT_THAT(call_graph.GetBasicBlockCallees(pc).size(), 0);
} else if (pc == 8) {
EXPECT_THAT(call_graph.GetFunctionCallees(pc).size(), 1);
EXPECT_THAT(call_graph.GetBasicBlockCallees(pc).size(), 1);
}
}
}
} // namespace
} // namespace fuzztest::internal

View File

@ -0,0 +1,118 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_CALLSTACK_H_
#define THIRD_PARTY_CENTIPEDE_CALLSTACK_H_
#include <cstddef>
#include <cstdint>
#include "./centipede/rolling_hash.h"
namespace fuzztest::internal {
// CallStack maintains a function call stack for the current thread.
// It is told when a function is called, via OnFunctionEntry(pc, sp).
// It is not told when a function exits, so every time a new function is called
// it needs to unwind the stack based on the current and recorded sp values.
//
// This does not produce precise call stacks.
//
// For example, at some point the stack is:
// PC: 1, 2, 3
// SP: 10, 9, 8
// Then, functions 2 and 3 exit, and function 4 with a large stack is called:
// PC: 1, 4
// SP: 10, 7
// We will fail to unwind functions 2 and 3 and the stack will look like
// PC: 1, 2, 3, 4
// SP: 10, 9, 8, 7
//
// We currently don't see a reliable way to implement precise call stack by just
// observing function entries (and not exist).
// But for the purposes of Centipede (capturing call stacks as features) this
// implementation should be good enough.
//
// Alternatives that would allow collecting precise calls stacks are
// * add instrumentation to capture function exits
// (fragile in presence of exceptions and longjmp).
// * unwind stack with frame pointers (expensive and also fragile).
// * Wait for hardware shadow call stacks (CET, etc).
//
// Function calls with depth beyond `kMaxDepth` will be ignored.
// Objects of this class must be created as global or TLS.
// The typical non-test usage is to create on TLS.
// There is no CTOR, the objects are zero-initialized.
// We currently do not use a CTOR with absl::ConstInitType so that the objects
// can be declared as __thread.
//
// This code assumes that the stack grows down.
template <size_t kMaxDepth = (1 << 12)>
class CallStack {
public:
// Returns the depth of the call stack.
// May be less than the actual depth if that is greater than kMaxDepth.
size_t Depth() const { return depth_; }
// Returns the PC at `idx`, idx must be less than the current depth.
uintptr_t PC(size_t idx) const {
if (idx >= depth_) __builtin_trap();
return pc_[idx];
}
// Returns the hash of the current call stack.
// Only the last `window_size` frames are used to compute the hash.
// `ResetWindowSize(window_size)` must be called at the initialization time.
uint32_t Hash() const { return depth_ == 0 ? 0 : hashes_[depth_ - 1]; }
// Updates the call stack and its hash on function entry.
// `pc` is the function PC to be recorded.
// `sp` is the current stack pointer value, which grows down.
void OnFunctionEntry(uintptr_t pc, uintptr_t sp) {
// First, unwind until the last record's SP is above `sp`.
while (depth_ && sp_[depth_ - 1] <= sp) {
--depth_;
}
// Ignore this call if we are already too deep.
if (depth_ == kMaxDepth) return;
// Record the frame, compute and remember the hash.
pc_[depth_] = pc;
sp_[depth_] = sp;
uint32_t previous_hash = depth_ == 0 ? 0 : hashes_[depth_ - 1];
uintptr_t previous_pc =
depth_ >= window_size_ ? pc_[depth_ - window_size_] : 0;
hashes_[depth_] = rolling_hash_.Update(previous_hash, pc, previous_pc);
++depth_;
}
// Resets the call stack.
// `window_size` is the number of stack frames used to compute the hash.
void Reset(size_t window_size) {
depth_ = 0;
window_size_ = window_size;
rolling_hash_.Reset(window_size);
}
private:
// All data fields are zero initialized at process or thread startup.
size_t depth_;
uintptr_t pc_[kMaxDepth];
uintptr_t sp_[kMaxDepth];
uint32_t hashes_[kMaxDepth];
RollingHash rolling_hash_;
size_t window_size_;
};
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_CALLSTACK_H_

View File

@ -0,0 +1,242 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/callstack.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <iterator>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/nullability.h"
#include "absl/container/flat_hash_set.h"
#include "./common/defs.h"
namespace fuzztest::internal {
namespace {
using ::testing::Pointwise;
// Simple test, calls OnFunctionEntry with fake sp values.
TEST(CallStack, SimpleTest) {
static CallStack<> cs; // CallStack should be global/tls only.
cs.Reset(10);
constexpr uintptr_t pc0 = 100;
constexpr uintptr_t pc1 = 101;
constexpr uintptr_t pc2 = 102;
constexpr uintptr_t pc3 = 103;
constexpr uintptr_t stack_top = 10000;
EXPECT_EQ(cs.Depth(), 0);
cs.OnFunctionEntry(pc0, stack_top);
cs.OnFunctionEntry(pc1, stack_top - 1);
cs.OnFunctionEntry(pc2, stack_top - 2);
EXPECT_EQ(cs.Depth(), 3);
EXPECT_EQ(cs.PC(0), pc0);
EXPECT_EQ(cs.PC(1), pc1);
EXPECT_EQ(cs.PC(2), pc2);
cs.OnFunctionEntry(pc3, stack_top - 2);
EXPECT_EQ(cs.Depth(), 3);
EXPECT_EQ(cs.PC(2), pc3);
cs.OnFunctionEntry(pc3, stack_top - 1);
EXPECT_EQ(cs.Depth(), 2);
EXPECT_EQ(cs.PC(1), pc3);
cs.OnFunctionEntry(pc3, stack_top);
EXPECT_EQ(cs.Depth(), 1);
EXPECT_EQ(cs.PC(0), pc3);
}
static CallStack<> g_real_calls_cs; // CallStack should be global/tls only.
using TestCallstack = std::vector<uintptr_t>;
static std::vector<TestCallstack> g_test_callstacks;
static void RecordCallStack() {
TestCallstack test_callstack;
for (size_t i = 0, n = g_real_calls_cs.Depth(); i < n; ++i) {
test_callstack.push_back(g_real_calls_cs.PC(i));
}
g_test_callstacks.push_back(test_callstack);
}
// Call on entry to functions Func[123], that are helpers to RealCallsTest.
#define ON_ENTRY(PC) \
g_real_calls_cs.OnFunctionEntry( \
PC, reinterpret_cast<uintptr_t>(__builtin_frame_address(0)))
// Don't let the compiler be too smart.
static inline void BreakOptimization(const void *absl_nullable arg) {
__asm__ __volatile__("" : : "r"(arg) : "memory");
}
__attribute__((noinline)) void Func3() {
ON_ENTRY(3);
RecordCallStack();
BreakOptimization(0);
}
__attribute__((noinline)) void Func2() {
ON_ENTRY(2);
BreakOptimization(0);
Func3();
BreakOptimization(0);
Func3();
BreakOptimization(0);
}
__attribute__((noinline)) void Func1() {
ON_ENTRY(1);
BreakOptimization(0);
Func2();
BreakOptimization(0);
Func3();
BreakOptimization(0);
}
__attribute__((noinline)) void Func0() {
ON_ENTRY(0);
BreakOptimization(0);
Func1();
BreakOptimization(0);
Func2();
BreakOptimization(0);
}
// A 2-tuple matcher conversion of `::testing::IsSupersetOf`.
MATCHER(IsSupersetOf, "") {
auto [actual, expected] = arg;
return ::testing::ExplainMatchResult(::testing::IsSupersetOf(expected),
actual, result_listener);
}
// This test actually creates a function call tree, and calls OnFunctionEntry
// with real sp values (and fake PCs).
TEST(CallStack, RealCallsTest) {
g_test_callstacks.clear();
g_real_calls_cs.Reset(10);
Func0();
Func1();
Func2();
Func3();
std::vector<TestCallstack> expected_test_callstacks = {
{0, 1, 2, 3}, {0, 1, 2, 3}, {0, 1, 3}, {0, 2, 3}, {0, 2, 3}, {1, 2, 3},
{1, 2, 3}, {1, 3}, {2, 3}, {2, 3}, {3}};
// Each computed callstack should correctly include every function on the
// callstack. It may also contain some additional spurious functions - these
// are ones that have exited but not yet removed.
EXPECT_THAT(g_test_callstacks,
Pointwise(IsSupersetOf(), expected_test_callstacks));
// Check that the additional elements in each computed callstack only
// correspond to previous calls not yet removed.
for (TestCallstack &cs : g_test_callstacks) {
std::sort(cs.begin(), cs.end());
}
for (TestCallstack &cs : expected_test_callstacks) {
std::sort(cs.begin(), cs.end());
}
std::vector<TestCallstack> extra_calls(g_test_callstacks.size());
for (auto it_1 = g_test_callstacks.begin(),
it_2 = expected_test_callstacks.begin(), it = extra_calls.begin();
it_1 != g_test_callstacks.end(); it_1++, it_2++, it++) {
std::set_difference(it_1->begin(), it_1->end(), it_2->begin(), it_2->end(),
std::inserter(*it, it->begin()));
}
EXPECT_THAT(std::vector<TestCallstack>(g_test_callstacks.begin(),
g_test_callstacks.end() - 1),
Pointwise(IsSupersetOf(),
std::vector<TestCallstack>(extra_calls.begin() + 1,
extra_calls.end())));
}
// Tests deep recursion.
TEST(CallStack, DeepRecursion) {
static CallStack<100> cs; // CallStack should be global/tls only.
cs.Reset(10);
constexpr size_t kLargeDepth = 200;
constexpr uintptr_t kStackTop = 100000000;
// Enter deep recursion.
for (size_t i = 0; i < kLargeDepth; ++i) {
cs.OnFunctionEntry(i, kStackTop - i);
}
EXPECT_EQ(cs.Depth(), 100);
// Exit recursion, call not-too-deep.
cs.OnFunctionEntry(42, kStackTop - 2);
EXPECT_EQ(cs.Depth(), 3);
EXPECT_EQ(cs.PC(0), 0);
EXPECT_EQ(cs.PC(1), 1);
EXPECT_EQ(cs.PC(2), 42);
}
// Tests CallStack::Hash().
TEST(CallStack, Hash) {
constexpr size_t kDepth = 5000;
constexpr size_t kNumDifferentPCs = 10000;
constexpr size_t kNumIterations = 1000;
constexpr uintptr_t kStackTop = 100000000;
static CallStack<kDepth> cs; // CallStack should be global/tls only.
cs.Reset(10);
fuzztest::internal::Rng rng;
// Push the first PC on the stack, remembers it hash.
cs.OnFunctionEntry(42, kStackTop);
const auto initial_hash = cs.Hash();
absl::flat_hash_set<uintptr_t> hashes;
for (size_t iter = 0; iter < kNumIterations; ++iter) {
// Push many PCs on the stack, collect their hashes.
hashes.clear();
for (size_t i = 0; i < kDepth; ++i) {
cs.OnFunctionEntry(rng() % kNumDifferentPCs, kStackTop - i);
auto hash = cs.Hash();
hashes.insert(hash);
}
// Check that most hashes are unique. Some collisions are ok.
EXPECT_GE(hashes.size(), kDepth - 1);
// unwind all the way to the top.
cs.OnFunctionEntry(42, kStackTop);
EXPECT_EQ(cs.Depth(), 1);
EXPECT_EQ(cs.Hash(), initial_hash);
}
}
TEST(CallStack, WindowSize) {
constexpr size_t kDepth = 5000;
constexpr uintptr_t kStackTop = 100000000;
static CallStack<kDepth> cs; // CallStack should be global/tls only.
absl::flat_hash_set<uintptr_t> hashes;
for (size_t num_different_frames = 1; num_different_frames < 100;
++num_different_frames) {
for (size_t window_size = 1; window_size < 100; ++window_size) {
// Simulate recursive call stack with `num_different_frames` period,
// i.e. for `num_different_frames=3`, the call stack is
// {42, 43, 44, 42, 43, 44, 42 ...}
// Ensure that the hash() function respects the window size.
hashes.clear();
cs.Reset(window_size);
cs.OnFunctionEntry(42, kStackTop);
for (size_t i = 0; i < kDepth; ++i) {
cs.OnFunctionEntry(42 + (i % num_different_frames), kStackTop - i);
hashes.insert(cs.Hash());
}
EXPECT_EQ(hashes.size(), window_size + num_different_frames - 1);
}
}
}
} // namespace
} // namespace fuzztest::internal

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,224 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_CENTIPEDE_H_
#define THIRD_PARTY_CENTIPEDE_CENTIPEDE_H_
#include <atomic>
#include <cstddef>
#include <string>
#include <string_view>
#include <vector>
#include "absl/base/nullability.h"
#include "absl/status/status.h"
#include "absl/time/time.h"
#include "./centipede/binary_info.h"
#include "./centipede/centipede_callbacks.h"
#include "./centipede/command.h"
#include "./centipede/control_flow.h"
#include "./centipede/corpus.h"
#include "./centipede/coverage.h"
#include "./centipede/environment.h"
#include "./centipede/feature.h"
#include "./centipede/feature_set.h"
#include "./centipede/pc_info.h"
#include "./centipede/runner_result.h"
#include "./centipede/rusage_profiler.h"
#include "./centipede/stats.h"
#include "./centipede/symbol_table.h"
#include "./centipede/workdir.h"
#include "./common/blob_file.h"
#include "./common/defs.h"
namespace fuzztest::internal {
// The main fuzzing class.
class Centipede {
public:
Centipede(const Environment &env, CentipedeCallbacks &user_callbacks,
const BinaryInfo &binary_info, CoverageLogger &coverage_logger,
std::atomic<Stats> &stats);
virtual ~Centipede() = default;
// Non-copyable and non-movable.
Centipede(const Centipede &) = delete;
Centipede(Centipede &&) noexcept = delete;
Centipede &operator=(const Centipede &) = delete;
Centipede &operator=(Centipede &&) noexcept = delete;
// Main loop.
void FuzzingLoop();
// Saves the sharded corpus into `dir`, one file per input.
static void CorpusToFiles(const Environment &env, std::string_view dir);
// Exports the corpus from `dir` (one file per input) into the sharded corpus.
// Reads `dir` recursively.
// Ignores inputs that already exist in the shard they need to be added to.
// Sharding is stable and depends only on env.total_shards and the file name.
static void CorpusFromFiles(const Environment &env, std::string_view dir);
// Saves the sharded crash reproducers and metadata (failure description) into
// `dir`. Each crash with `ID` will be saved with file `ID.data` for the
// reproducer and `ID.metadata` metadata.
static absl::Status CrashesToFiles(const Environment &env,
std::string_view dir);
private:
// Executes inputs from `input_vec`.
// For every input, its pruned features are written to
// `unconditional_features_file`, (if that's non-null).
// For every input that caused new features to be observed:
// * the input is added to the corpus (corpus_ and fs_ are updated).
// * the input is written to `corpus_file` (if that's non-null).
// * its features are written to `features_file` (if that's non-null).
// Returns true if new features were observed.
// Post-condition: `batch_result.results.size()` == `input_vec.size()`.
bool RunBatch(const std::vector<ByteArray> &input_vec,
BlobFileWriter *absl_nullable corpus_file,
BlobFileWriter *absl_nullable features_file,
BlobFileWriter *absl_nullable unconditional_features_file);
// Loads seed inputs from the user callbacks, execute them, and store them
// with the corresponding features into `corpus_file` and `features_file`.
void LoadSeedInputs(BlobFileWriter *absl_nonnull corpus_file,
BlobFileWriter *absl_nonnull features_file);
// Loads a shard `shard_index` from `load_env.workdir`.
// Note: `load_env_` may be different from `env_`.
// If `rerun` is true, then also re-runs any inputs
// for which the features are not found in `load_env.workdir`.
void LoadShard(const Environment &load_env, size_t shard_index, bool rerun);
// Loads all the shards from corpus files in `load_env.workdir` in random
// order. If `rerun_my_shard` is true, then also re-runs any inputs found in
// `load_env.my_shard_index`th shard. Note: `load_env_` may be different from
// `env_`.
void LoadAllShardsInRandomOrder(const Environment &load_env,
bool rerun_my_shard);
// Runs all inputs from `to_rerun`, adds their features to the features file
// of env_.my_shard_index, adds interesting inputs to the corpus.
void Rerun(std::vector<ByteArray> &to_rerun);
// Prints one logging line with `log_type` in it
// if `min_log_level` is not greater than `env_.log_level`.
void UpdateAndMaybeLogStats(std::string_view log_type, size_t min_log_level);
// For every feature in `fv`, translates the feature into code coverage
// (PCIndex), then prints one logging line for every
// FUNC/EDGE observed for the first time.
// If symbolization failed, prints a simpler logging line.
// Uses coverage_logger_.
void LogFeaturesAsSymbols(const FeatureVec &f);
// Generates a coverage report file in workdir.
void GenerateCoverageReport(std::string_view filename_annotation,
std::string_view description);
// Generates a corpus stats file in workdir.
void GenerateCorpusStats(std::string_view filename_annotation,
std::string_view description);
// Generates the clang source-based coverage report in workdir.
void GenerateSourceBasedCoverageReport(std::string_view filename_annotation,
std::string_view description);
// Generates a performance report file in workdir.
void GenerateRUsageReport(std::string_view filename_annotation,
std::string_view description);
// Generates all the report and stats files in workdir if this shard is
// assigned to do that.
void MaybeGenerateTelemetry(std::string_view filename_annotation,
std::string_view description);
// Generates all the report and stats files in workdir if this shard is
// assigned to do that and if `batch_index` satisfies the telemetry frequency
// criteria set via the flags.
void MaybeGenerateTelemetryAfterBatch(std::string_view filename_annotation,
size_t batch_index);
// Returns true if `input` passes env_.input_filter.
bool InputPassesFilter(const ByteArray &input);
// Executes `binary` with `input_vec` and `batch_result` as input/output.
// If the binary crashes, calls ReportCrash().
// Returns true iff there were no crashes.
bool ExecuteAndReportCrash(std::string_view binary,
const std::vector<ByteArray> &input_vec,
BatchResult &batch_result);
// Reports a crash and saves the reproducer to workdir/crashes, if possible.
// `binary` is the binary causing the crash.
// Prints the first `env_.max_num_crash_reports` logs.
// `input_vec` is the batch of inputs that caused a crash.
// `batch_result` contains the features computed for `input_vec`
// (batch_result.results().size() == input_vec.size()). `batch_result` is used
// as a hint when choosing which input to try first.
// Stops early if `EarlyExitRequested()`.
void ReportCrash(std::string_view binary,
const std::vector<ByteArray> &input_vec,
const BatchResult &batch_result);
// Merges shard `shard_index_to_merge` of the corpus in `merge_from_dir`
// into the current corpus.
// Writes added inputs to the current shard.
void MergeFromOtherCorpus(std::string_view merge_from_dir,
size_t shard_index_to_merge);
// Reloads the entire corpus for all the shards from workdir (as if with
// `env_.full_sync`) thus distilling it, and saves it to a single file with a
// shard-hashed name in the workdir.
void ReloadAllShardsAndWriteDistilledCorpus();
// Collects all PCs from `fv`, then adds PC-pair features to `fv`.
// Returns the number of added features.
// See more comments in centipede.cc.
size_t AddPcPairFeatures(FeatureVec &fv);
const Environment &env_;
const WorkDir wd_{env_};
CentipedeCallbacks &user_callbacks_;
Rng rng_;
// A timestamp set just before the actual fuzzing begins. Used to measure
// the fuzzing performance.
absl::Time fuzz_start_time_ = absl::InfiniteFuture();
FeatureSet fs_;
Corpus corpus_;
CoverageFrontier coverage_frontier_;
size_t num_runs_ = 0; // counts executed inputs
// Binary-related data, initialized at startup, once per process,
// by calling the PopulateBinaryInfo callback.
const BinaryInfo &binary_info_;
const PCTable &pc_table_; // same as binary_info_.pc_table.
const SymbolTable &symbols_; // same as binary_info_.symbols.
// Derived from env_.function_filter. Currently, duplicated by every thread.
// In future, threads may have different filters.
const FunctionFilter function_filter_;
// Ensures every coverage location is reported at most once.
// This object is shared with other threads, it is thread-safe.
CoverageLogger &coverage_logger_;
// Statistics of the current run.
std::atomic<Stats> &stats_;
// Counts the number of crashes reported so far.
int num_crashes_ = 0;
// Scratch object for AddPcPairFeatures.
std::vector<size_t> add_pc_pair_scratch_;
// Path and command for the input_filter.
std::string input_filter_path_;
Command input_filter_cmd_;
// Resource usage stats collection & reporting.
RUsageProfiler rusage_profiler_;
};
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_CENTIPEDE_H_

View File

@ -0,0 +1,28 @@
#!/bin/bash
# Copyright 2022 The Centipede Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Verify that the `:centipede` build target indeed creates the expected binary.
set -eu
source "$(dirname "$0")/test_util.sh"
centipede_test_srcdir="$(fuzztest::internal::get_centipede_test_srcdir)"
centipede_binary="${centipede_test_srcdir}/centipede"
if ! [[ -x "${centipede_binary}" ]]; then
die "Build target ':centipede' failed to create expected executable \
${centipede_binary}"
fi

View File

@ -0,0 +1,469 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/centipede_callbacks.h"
#include <algorithm>
#include <cstddef>
#include <cstdlib>
#include <filesystem> // NOLINT
#include <memory>
#include <optional>
#include <string>
#include <string_view>
#include <system_error> // NOLINT
#include <utility>
#include <vector>
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/strings/ascii.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "absl/strings/str_split.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
#include "./centipede/binary_info.h"
#include "./centipede/command.h"
#include "./centipede/control_flow.h"
#include "./centipede/mutation_input.h"
#include "./centipede/runner_request.h"
#include "./centipede/runner_result.h"
#include "./centipede/util.h"
#include "./centipede/workdir.h"
#include "./common/blob_file.h"
#include "./common/defs.h"
#include "./common/hash.h"
#include "./common/logging.h"
namespace fuzztest::internal {
namespace {
// When running a test binary in a subprocess, we don't want these environment
// variables to be inherited and affect the execution of the tests.
//
// See list of environment variables here:
// https://bazel.build/reference/test-encyclopedia#initial-conditions
//
// TODO(fniksic): Add end-to-end tests that make sure we don't observe the
// effects of these variables in the test binary.
std::vector<std::string> EnvironmentVariablesToUnset() {
return {"TEST_DIAGNOSTICS_OUTPUT_DIR", //
"TEST_INFRASTRUCTURE_FAILURE_FILE", //
"TEST_LOGSPLITTER_OUTPUT_FILE", //
"TEST_PREMATURE_EXIT_FILE", //
"TEST_RANDOM_SEED", //
"TEST_RUN_NUMBER", //
"TEST_SHARD_INDEX", //
"TEST_SHARD_STATUS_FILE", //
"TEST_TOTAL_SHARDS", //
"TEST_UNDECLARED_OUTPUTS_ANNOTATIONS_DIR", //
"TEST_UNDECLARED_OUTPUTS_DIR", //
"TEST_WARNINGS_OUTPUT_FILE", //
"GTEST_OUTPUT", //
"XML_OUTPUT_FILE"};
}
} // namespace
void CentipedeCallbacks::PopulateBinaryInfo(BinaryInfo &binary_info) {
binary_info.InitializeFromSanCovBinary(
env_.coverage_binary, env_.objdump_path, env_.symbolizer_path, temp_dir_);
// Check the PC table.
if (binary_info.pc_table.empty()) {
if (env_.require_pc_table) {
LOG(ERROR) << "Could not get PC table; exiting (override with "
"--require_pc_table=false)";
exit(EXIT_FAILURE);
}
LOG(WARNING) << "Could not get PC table; CF table and debug symbols will "
"not be used";
return;
}
// Check CF table.
if (binary_info.cf_table.empty()) {
LOG(WARNING)
<< "Could not get CF table; binary should be built with Clang 16 (or "
"later) and with -fsanitize-coverage=control-flow flag";
} else {
// Construct call-graph and cfg using loaded cf_table and pc_table.
// TODO(b/284044008): These two are currently used only inside
// `CoverageFrontier`, so we can mask the bug's failure by conditionally
// initilizing them like this.
if (env_.use_coverage_frontier) {
binary_info.control_flow_graph.InitializeControlFlowGraph(
binary_info.cf_table, binary_info.pc_table);
binary_info.call_graph.InitializeCallGraph(binary_info.cf_table,
binary_info.pc_table);
}
}
}
std::string CentipedeCallbacks::ConstructRunnerFlags(
std::string_view extra_flags, bool disable_coverage) {
std::vector<std::string> flags = {
"CENTIPEDE_RUNNER_FLAGS=",
absl::StrCat("timeout_per_input=", env_.timeout_per_input),
absl::StrCat("timeout_per_batch=", env_.timeout_per_batch),
absl::StrCat("address_space_limit_mb=", env_.address_space_limit_mb),
absl::StrCat("rss_limit_mb=", env_.rss_limit_mb),
absl::StrCat("stack_limit_kb=", env_.stack_limit_kb),
absl::StrCat("crossover_level=", env_.crossover_level),
absl::StrCat("max_len=", env_.max_len),
};
if (env_.ignore_timeout_reports) {
flags.emplace_back("ignore_timeout_reports");
}
if (!disable_coverage) {
flags.emplace_back(absl::StrCat("path_level=", env_.path_level));
if (env_.use_pc_features) flags.emplace_back("use_pc_features");
if (env_.use_counter_features) flags.emplace_back("use_counter_features");
if (env_.use_cmp_features) flags.emplace_back("use_cmp_features");
flags.emplace_back(absl::StrCat("callstack_level=", env_.callstack_level));
if (env_.use_auto_dictionary) flags.emplace_back("use_auto_dictionary");
if (env_.use_dataflow_features) flags.emplace_back("use_dataflow_features");
}
if (!env_.runner_dl_path_suffix.empty()) {
flags.emplace_back(
absl::StrCat("dl_path_suffix=", env_.runner_dl_path_suffix));
}
if (!env_.pcs_file_path.empty())
flags.emplace_back(absl::StrCat("pcs_file_path=", env_.pcs_file_path));
if (!extra_flags.empty()) flags.emplace_back(extra_flags);
flags.emplace_back("");
return absl::StrJoin(flags, ":");
}
Command &CentipedeCallbacks::GetOrCreateCommandForBinary(
std::string_view binary) {
for (auto &cmd : commands_) {
if (cmd->path() == binary) return *cmd;
}
// We don't want to collect coverage for extra binaries. It won't be used.
bool disable_coverage =
std::find(env_.extra_binaries.begin(), env_.extra_binaries.end(),
binary) != env_.extra_binaries.end();
std::vector<std::string> env = {ConstructRunnerFlags(
absl::StrCat(":shmem:test=", env_.test_name, ":arg1=",
inputs_blobseq_.path(), ":arg2=", outputs_blobseq_.path(),
":failure_description_path=", failure_description_path_,
":failure_signature_path=", failure_signature_path_, ":"),
disable_coverage)};
if (env_.clang_coverage_binary == binary)
env.emplace_back(
absl::StrCat("LLVM_PROFILE_FILE=",
WorkDir{env_}.SourceBasedCoverageRawProfilePath()));
Command::Options cmd_options;
cmd_options.env_add = std::move(env);
cmd_options.env_remove = EnvironmentVariablesToUnset();
cmd_options.stdout_file = execute_log_path_;
cmd_options.stderr_file = execute_log_path_;
cmd_options.temp_file_path = temp_input_file_path_;
Command &cmd = *commands_.emplace_back(
std::make_unique<Command>(binary, std::move(cmd_options)));
if (env_.fork_server) cmd.StartForkServer(temp_dir_, Hash(binary));
return cmd;
}
int CentipedeCallbacks::RunBatchForBinary(std::string_view binary) {
auto &cmd = GetOrCreateCommandForBinary(binary);
const absl::Duration amortized_timeout =
env_.timeout_per_batch == 0
? absl::InfiniteDuration()
: absl::Seconds(env_.timeout_per_batch) + absl::Seconds(5);
const auto deadline = absl::Now() + amortized_timeout;
int exit_code = EXIT_SUCCESS;
const bool should_clean_up = [&] {
if (!cmd.ExecuteAsync()) return true;
const std::optional<int> ret = cmd.Wait(deadline);
if (!ret.has_value()) return true;
exit_code = *ret;
return false;
}();
if (should_clean_up) {
exit_code = [&] {
if (!cmd.is_executing()) return EXIT_FAILURE;
LOG(ERROR) << "Cleaning up the batch execution.";
cmd.RequestStop();
const auto ret = cmd.Wait(absl::Now() + absl::Seconds(60));
if (ret.has_value()) return *ret;
LOG(ERROR) << "Batch execution cleanup failed to end in 60s.";
return EXIT_FAILURE;
}();
commands_.erase(
std::find_if(commands_.begin(), commands_.end(),
[=](const auto &cmd) { return cmd->path() == binary; }));
}
return exit_code;
}
int CentipedeCallbacks::ExecuteCentipedeSancovBinaryWithShmem(
std::string_view binary, const std::vector<ByteArray> &inputs,
BatchResult &batch_result) {
auto start_time = absl::Now();
batch_result.ClearAndResize(inputs.size());
// Reset the blobseqs.
inputs_blobseq_.Reset();
outputs_blobseq_.Reset();
size_t num_inputs_written = 0;
if (env_.has_input_wildcards) {
CHECK_EQ(inputs.size(), 1);
WriteToLocalFile(temp_input_file_path_, inputs[0]);
num_inputs_written = 1;
} else {
// Feed the inputs to inputs_blobseq_.
num_inputs_written = RequestExecution(inputs, inputs_blobseq_);
}
if (num_inputs_written != inputs.size()) {
LOG(INFO) << "Wrote " << num_inputs_written << "/" << inputs.size()
<< " inputs; shmem_size_mb might be too small: "
<< env_.shmem_size_mb;
}
// Run.
const int exit_code = RunBatchForBinary(binary);
inputs_blobseq_.ReleaseSharedMemory(); // Inputs are already consumed.
// Get results.
batch_result.exit_code() = exit_code;
const bool read_success = batch_result.Read(outputs_blobseq_);
LOG_IF(ERROR, !read_success) << "Failed to read batch result!";
outputs_blobseq_.ReleaseSharedMemory(); // Outputs are already consumed.
// We may have fewer feature blobs than inputs if
// * some inputs were not written (i.e. num_inputs_written < inputs.size).
// * Logged above.
// * some outputs were not written because the subprocess died.
// * Will be logged by the caller.
// * some outputs were not written because the outputs_blobseq_ overflown.
// * Logged by the following code.
if (exit_code == 0 && read_success &&
batch_result.num_outputs_read() != num_inputs_written) {
LOG(INFO) << "Read " << batch_result.num_outputs_read() << "/"
<< num_inputs_written
<< " outputs; shmem_size_mb might be too small: "
<< env_.shmem_size_mb;
}
if (env_.print_runner_log) PrintExecutionLog();
if (exit_code != EXIT_SUCCESS) {
ReadFromLocalFile(execute_log_path_, batch_result.log());
ReadFromLocalFile(failure_description_path_,
batch_result.failure_description());
if (std::filesystem::exists(failure_signature_path_)) {
ReadFromLocalFile(failure_signature_path_,
batch_result.failure_signature());
} else {
// TODO(xinhaoyuan): Refactor runner to use dispatcher so this branch can
// be removed.
batch_result.failure_signature() = batch_result.failure_description();
}
// Remove the failure description and signature files here so that they do
// not stay until another failed execution.
std::filesystem::remove(failure_description_path_);
std::filesystem::remove(failure_signature_path_);
}
VLOG(1) << __FUNCTION__ << " took " << (absl::Now() - start_time);
return exit_code;
}
// See also: `DumpSeedsToDir()`.
bool CentipedeCallbacks::GetSeedsViaExternalBinary(
std::string_view binary, size_t &num_avail_seeds,
std::vector<ByteArray> &seeds) {
const auto output_dir = std::filesystem::path{temp_dir_} / "seed_inputs";
std::error_code error;
CHECK(std::filesystem::create_directories(output_dir, error));
CHECK(!error);
std::string centipede_runner_flags = absl::StrCat(
"CENTIPEDE_RUNNER_FLAGS=:dump_seed_inputs:test=", env_.test_name,
":arg1=", output_dir.string(), ":");
if (!env_.runner_dl_path_suffix.empty()) {
absl::StrAppend(&centipede_runner_flags,
"dl_path_suffix=", env_.runner_dl_path_suffix, ":");
}
Command::Options cmd_options;
cmd_options.env_add = {std::move(centipede_runner_flags)};
cmd_options.env_remove = EnvironmentVariablesToUnset();
cmd_options.stdout_file = execute_log_path_;
cmd_options.stderr_file = execute_log_path_;
cmd_options.temp_file_path = temp_input_file_path_;
Command cmd{binary, std::move(cmd_options)};
const int retval = cmd.Execute();
if (env_.print_runner_log) {
LOG(INFO) << "Getting seeds via external binary returns " << retval;
PrintExecutionLog();
}
std::vector<std::string> seed_input_filenames;
for (const auto &dir_ent : std::filesystem::directory_iterator(output_dir)) {
seed_input_filenames.push_back(dir_ent.path().filename());
}
std::sort(seed_input_filenames.begin(), seed_input_filenames.end());
num_avail_seeds = seed_input_filenames.size();
size_t num_seeds_read;
for (num_seeds_read = 0; num_seeds_read < seeds.size() &&
num_seeds_read < seed_input_filenames.size();
++num_seeds_read) {
ReadFromLocalFile(
(output_dir / seed_input_filenames[num_seeds_read]).string(),
seeds[num_seeds_read]);
}
seeds.resize(num_seeds_read);
std::filesystem::remove_all(output_dir, error);
LOG_IF(ERROR, error) << "Failed to remove seed inputs directory: "
<< error.message();
return retval == 0;
}
// See also: `DumpSerializedTargetConfigToFile()`.
bool CentipedeCallbacks::GetSerializedTargetConfigViaExternalBinary(
std::string_view binary, std::string &serialized_config) {
const auto config_file_path =
std::filesystem::path{temp_dir_} / "configuration";
std::string centipede_runner_flags =
absl::StrCat("CENTIPEDE_RUNNER_FLAGS=:dump_configuration:arg1=",
config_file_path.string(), ":");
if (!env_.runner_dl_path_suffix.empty()) {
absl::StrAppend(&centipede_runner_flags,
"dl_path_suffix=", env_.runner_dl_path_suffix, ":");
}
Command::Options cmd_options;
cmd_options.env_add = {std::move(centipede_runner_flags)};
cmd_options.env_remove = EnvironmentVariablesToUnset();
cmd_options.stdout_file = execute_log_path_;
cmd_options.stderr_file = execute_log_path_;
cmd_options.temp_file_path = temp_input_file_path_;
Command cmd{binary, std::move(cmd_options)};
const bool is_success = cmd.Execute() == 0;
if (is_success) {
if (std::filesystem::exists(config_file_path)) {
ReadFromLocalFile(config_file_path.string(), serialized_config);
} else {
serialized_config = "";
}
}
if (env_.print_runner_log || !is_success) {
PrintExecutionLog();
}
std::error_code error;
std::filesystem::remove(config_file_path, error);
CHECK(!error);
return is_success;
}
// See also: MutateInputsFromShmem().
MutationResult CentipedeCallbacks::MutateViaExternalBinary(
std::string_view binary, const std::vector<MutationInputRef> &inputs,
size_t num_mutants) {
CHECK(!env_.has_input_wildcards)
<< "Standalone binary does not support custom mutator";
auto start_time = absl::Now();
inputs_blobseq_.Reset();
outputs_blobseq_.Reset();
size_t num_inputs_written =
RequestMutation(num_mutants, inputs, inputs_blobseq_);
LOG_IF(INFO, num_inputs_written != inputs.size())
<< VV(num_inputs_written) << VV(inputs.size());
// Execute.
const int exit_code = RunBatchForBinary(binary);
inputs_blobseq_.ReleaseSharedMemory(); // Inputs are already consumed.
if (exit_code != EXIT_SUCCESS) {
LOG(WARNING) << "Custom mutator failed with exit code: " << exit_code;
}
if (env_.print_runner_log || exit_code != EXIT_SUCCESS) {
PrintExecutionLog();
}
MutationResult result;
result.exit_code() = exit_code;
result.Read(num_mutants, outputs_blobseq_);
outputs_blobseq_.ReleaseSharedMemory(); // Outputs are already consumed.
VLOG(1) << __FUNCTION__ << " took " << (absl::Now() - start_time);
return result;
}
size_t CentipedeCallbacks::LoadDictionary(std::string_view dictionary_path) {
if (dictionary_path.empty()) return 0;
// First, try to parse the dictionary as an AFL/libFuzzer dictionary.
// These dictionaries are in plain text format and thus a Centipede-native
// dictionary will never be mistaken for an AFL/libFuzzer dictionary.
std::string text;
ReadFromLocalFile(dictionary_path, text);
std::vector<ByteArray> entries;
if (ParseAFLDictionary(text, entries) && !entries.empty()) {
env_.use_legacy_default_mutator
? byte_array_mutator_.AddToDictionary(entries)
: fuzztest_mutator_.AddToDictionary(entries);
LOG(INFO) << "Loaded " << entries.size()
<< " dictionary entries from AFL/libFuzzer dictionary "
<< dictionary_path;
return entries.size();
}
// Didn't parse as plain text. Assume encoded corpus format.
auto reader = DefaultBlobFileReaderFactory();
CHECK_OK(reader->Open(dictionary_path))
<< "Error in opening dictionary file: " << dictionary_path;
std::vector<ByteArray> unpacked_dictionary;
ByteSpan blob;
while (reader->Read(blob).ok()) {
unpacked_dictionary.emplace_back(blob.begin(), blob.end());
}
CHECK_OK(reader->Close())
<< "Error in closing dictionary file: " << dictionary_path;
CHECK(!unpacked_dictionary.empty())
<< "Empty or corrupt dictionary file: " << dictionary_path;
env_.use_legacy_default_mutator
? byte_array_mutator_.AddToDictionary(unpacked_dictionary)
: fuzztest_mutator_.AddToDictionary(unpacked_dictionary);
LOG(INFO) << "Loaded " << unpacked_dictionary.size()
<< " dictionary entries from " << dictionary_path;
return unpacked_dictionary.size();
}
void CentipedeCallbacks::PrintExecutionLog() const {
if (!std::filesystem::exists(execute_log_path_)) {
LOG(WARNING) << "Log file for the last executed binary does not exist: "
<< execute_log_path_;
return;
}
std::string log_text;
ReadFromLocalFile(execute_log_path_, log_text);
for (const auto &log_line :
absl::StrSplit(absl::StripAsciiWhitespace(log_text), '\n')) {
LOG(INFO).NoPrefix() << "LOG: " << log_line;
}
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,229 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_CENTIPEDE_CALLBACKS_H_
#define THIRD_PARTY_CENTIPEDE_CENTIPEDE_CALLBACKS_H_
#include <cstddef>
#include <filesystem> // NOLINT
#include <string>
#include <string_view>
#include <vector>
#include "absl/base/nullability.h"
#include "absl/log/check.h"
#include "absl/status/statusor.h"
#include "./centipede/binary_info.h"
#include "./centipede/byte_array_mutator.h"
#include "./centipede/command.h"
#include "./centipede/environment.h"
#include "./centipede/fuzztest_mutator.h"
#include "./centipede/mutation_input.h"
#include "./centipede/runner_result.h"
#include "./centipede/shared_memory_blob_sequence.h"
#include "./centipede/util.h"
#include "./common/defs.h"
namespace fuzztest::internal {
// User must inherit from this class and override at least the
// pure virtual functions.
//
// The classes inherited from this one must be thread-compatible.
// Note: the interface is not yet stable and may change w/o a notice.
class CentipedeCallbacks {
public:
// `env` is used to pass flags to `this`, it must outlive `this`.
CentipedeCallbacks(const Environment &env)
: env_(env),
byte_array_mutator_(env.knobs, GetRandomSeed(env.seed)),
fuzztest_mutator_(env.knobs, GetRandomSeed(env.seed)),
inputs_blobseq_(shmem_name1_.c_str(), env.shmem_size_mb << 20,
env.use_posix_shmem),
outputs_blobseq_(shmem_name2_.c_str(), env.shmem_size_mb << 20,
env.use_posix_shmem) {
if (env.use_legacy_default_mutator)
CHECK(byte_array_mutator_.set_max_len(env.max_len));
else
CHECK(fuzztest_mutator_.set_max_len(env.max_len));
}
virtual ~CentipedeCallbacks() {}
// Feeds `inputs` into the `binary`, for every input populates `batch_result`.
// Old contents of `batch_result` are cleared.
// Returns true on success, false on failure.
// Post-condition:
// `batch_result` has results for every `input`, even on failure.
virtual bool Execute(std::string_view binary,
const std::vector<ByteArray> &inputs,
BatchResult &batch_result) = 0;
// Takes non-empty `inputs` and returns at most `num_mutants` mutated inputs.
virtual std::vector<ByteArray> Mutate(
const std::vector<MutationInputRef> &inputs, size_t num_mutants) {
return env_.use_legacy_default_mutator
? byte_array_mutator_.MutateMany(inputs, num_mutants)
: fuzztest_mutator_.MutateMany(inputs, num_mutants);
}
// Populates the BinaryInfo using the `symbolizer_path` and `coverage_binary`
// in `env_`. The tables may not be populated if the PC table cannot be
// determined from the `coverage_binary` or if symbolization fails. Exits if
// PC table was not populated and `env_.require_pc_table` is set.
virtual void PopulateBinaryInfo(BinaryInfo &binary_info);
// Retrieves at most `num_seeds` seed inputs. Returns the number of seeds
// available if `num_seeds` had been large enough.
virtual size_t GetSeeds(size_t num_seeds, std::vector<ByteArray> &seeds) {
if (num_seeds > 0) seeds = {{0}};
return 1;
}
// Returns the configuration from the test target in the serialized form.
// Returns an empty string if the test target doesn't provide configuration.
virtual absl::StatusOr<std::string> GetSerializedTargetConfig() { return ""; }
protected:
// Helpers that the user-defined class may use if needed.
// Same as ExecuteCentipedeSancovBinary, but uses shared memory.
// Much faster for fast targets since it uses fewer system calls.
int ExecuteCentipedeSancovBinaryWithShmem(
std::string_view binary, const std::vector<ByteArray> &inputs,
BatchResult &batch_result);
// Constructs a string CENTIPEDE_RUNNER_FLAGS=":flag1:flag2:...",
// where the flags are determined by `env` and also include `extra_flags`.
// If `disable_coverage`, coverage options are not added.
std::string ConstructRunnerFlags(std::string_view extra_flags = "",
bool disable_coverage = false);
// Uses an external binary `binary` to generate seed inputs. The binary should
// be linked against :centipede_runner and implement the RunnerCallbacks
// interface as described in runner_interface.h.
//
// Retrieves the first `seeds.size()` inputs (if exist) from `binary`,
// replacing the existing elements of `seeds`, and shrinking `seeds` if
// needed. Sets `num_avail_seeds` to the number of available seeds, which may
// be more than `seeds.size()`.
//
// Returns true on success.
bool GetSeedsViaExternalBinary(std::string_view binary,
size_t &num_avail_seeds,
std::vector<ByteArray> &seeds);
// Uses an external binary `binary` to get the serialized test target
// configuration. The binary should be linked against :centipede_runner and
// implement the RunnerCallbacks interface as described in runner_interface.h.
//
// If the binary returns with success and doesn't provide the configuration,
// sets `serialized_config` to empty string.
//
// Returns true on success.
bool GetSerializedTargetConfigViaExternalBinary(
std::string_view binary, std::string &serialized_config);
// Uses an external binary `binary` to mutate `inputs`. The binary
// should be linked against :centipede_runner and implement the
// RunnerCallbacks interface as described in runner_interface.h,
// or implement the legacy Structure-Aware Fuzzing interface described here:
// github.com/google/fuzzing/blob/master/docs/structure-aware-fuzzing.md
//
// Returns a `MutationResult` instance where `exit_code` indicates whether
// the binary was executed successfully, `has_custom_mutator` indicates
// whether the binary has a custom mutator, and if it does, `mutants` contains
// at most `num_mutants` non-empty mutants.
MutationResult MutateViaExternalBinary(
std::string_view binary, const std::vector<MutationInputRef> &inputs,
size_t num_mutants);
// Loads the dictionary from `dictionary_path`,
// returns the number of dictionary entries loaded.
size_t LoadDictionary(std::string_view dictionary_path);
protected:
const Environment &env_;
ByteArrayMutator byte_array_mutator_;
FuzzTestMutator fuzztest_mutator_;
private:
// Returns a Command object with matching `binary` from commands_,
// creates one if needed.
Command &GetOrCreateCommandForBinary(std::string_view binary);
// Runs a batch with the command `binary` and returns the exit code.
int RunBatchForBinary(std::string_view binary);
// Prints the execution log from the last executed binary.
void PrintExecutionLog() const;
// Variables required for ExecuteCentipedeSancovBinaryWithShmem.
// They are computed in CTOR, to avoid extra computation in the hot loop.
std::string temp_dir_ = TemporaryLocalDirPath();
std::string temp_input_file_path_ =
std::filesystem::path(temp_dir_).append("temp_input_file");
const std::string execute_log_path_ =
std::filesystem::path(temp_dir_).append("log");
std::string failure_description_path_ =
std::filesystem::path(temp_dir_).append("failure_description");
std::string failure_signature_path_ =
std::filesystem::path(temp_dir_).append("failure_signature");
const std::string shmem_name1_ = ProcessAndThreadUniqueID("/ctpd-shm1-");
const std::string shmem_name2_ = ProcessAndThreadUniqueID("/ctpd-shm2-");
SharedMemoryBlobSequence inputs_blobseq_;
SharedMemoryBlobSequence outputs_blobseq_;
// Need unique_ptr indirection because Command is not movable/copyable.
std::vector<std::unique_ptr<Command>> commands_;
};
// Abstract class for creating/destroying CentipedeCallbacks objects.
// A typical implementation would simply new/delete objects of appropriate type,
// see DefaultCallbacksFactory below.
// Other implementations (e.g. for tests) may take the object from elsewhere
// and not actually delete it.
class CentipedeCallbacksFactory {
public:
virtual CentipedeCallbacks *create(const Environment &env) = 0;
virtual void destroy(CentipedeCallbacks *callbacks) = 0;
virtual ~CentipedeCallbacksFactory() {}
};
// This is the typical way to implement a CentipedeCallbacksFactory for a Type.
template <typename Type>
class DefaultCallbacksFactory : public CentipedeCallbacksFactory {
public:
CentipedeCallbacks *create(const Environment &env) override {
return new Type(env);
}
void destroy(CentipedeCallbacks *callbacks) override { delete callbacks; }
};
// Creates a CentipedeCallbacks object in CTOR and destroys it in DTOR.
class ScopedCentipedeCallbacks {
public:
ScopedCentipedeCallbacks(CentipedeCallbacksFactory &factory,
const Environment &env)
: factory_(factory), callbacks_(factory_.create(env)) {}
~ScopedCentipedeCallbacks() { factory_.destroy(callbacks_); }
CentipedeCallbacks *absl_nonnull callbacks() { return callbacks_; }
private:
CentipedeCallbacksFactory &factory_;
CentipedeCallbacks *callbacks_;
};
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_CENTIPEDE_CALLBACKS_H_

View File

@ -0,0 +1,119 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/centipede_default_callbacks.h"
#include <cstddef>
#include <cstdlib>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "./centipede/centipede_callbacks.h"
#include "./centipede/environment.h"
#include "./centipede/mutation_input.h"
#include "./centipede/runner_result.h"
#include "./centipede/stop.h"
#include "./common/defs.h"
#include "./common/logging.h" // IWYU pragma: keep
namespace fuzztest::internal {
CentipedeDefaultCallbacks::CentipedeDefaultCallbacks(const Environment &env)
: CentipedeCallbacks(env) {
for (const auto &dictionary_path : env_.dictionary) {
LoadDictionary(dictionary_path);
}
if (env_.has_input_wildcards) {
LOG(INFO) << "Disabling custom mutator for standalone target";
custom_mutator_is_usable_ = false;
}
}
bool CentipedeDefaultCallbacks::Execute(std::string_view binary,
const std::vector<ByteArray> &inputs,
BatchResult &batch_result) {
return ExecuteCentipedeSancovBinaryWithShmem(binary, inputs, batch_result) ==
0;
}
size_t CentipedeDefaultCallbacks::GetSeeds(size_t num_seeds,
std::vector<ByteArray> &seeds) {
seeds.resize(num_seeds);
if (GetSeedsViaExternalBinary(env_.binary, num_seeds, seeds)) {
return num_seeds;
}
return CentipedeCallbacks::GetSeeds(num_seeds, seeds);
}
absl::StatusOr<std::string>
CentipedeDefaultCallbacks::GetSerializedTargetConfig() {
std::string serialized_target_config;
if (GetSerializedTargetConfigViaExternalBinary(env_.binary,
serialized_target_config)) {
return serialized_target_config;
}
return absl::InternalError(
"Failed to get serialized configuration from the target binary.");
}
std::vector<ByteArray> CentipedeDefaultCallbacks::Mutate(
const std::vector<MutationInputRef> &inputs, size_t num_mutants) {
if (num_mutants == 0) return {};
// Try to use the custom mutator if it hasn't been disabled.
if (custom_mutator_is_usable_.value_or(true)) {
MutationResult result =
MutateViaExternalBinary(env_.binary, inputs, num_mutants);
if (result.exit_code() == EXIT_SUCCESS) {
if (!custom_mutator_is_usable_.has_value()) {
custom_mutator_is_usable_ = result.has_custom_mutator();
if (*custom_mutator_is_usable_) {
LOG(INFO) << "Custom mutator detected; will use it.";
} else {
LOG(INFO) << "Custom mutator not detected; falling back to the "
"built-in mutator.";
}
}
if (*custom_mutator_is_usable_) {
// TODO(b/398261908): Exit with failure instead of crashing.
CHECK(result.has_custom_mutator())
<< "Test binary no longer has a custom mutator, even though it was "
"previously detected.";
if (!result.mutants().empty()) return std::move(result).mutants();
LOG_FIRST_N(WARNING, 5) << "Custom mutator returned no mutants; will "
"generate some using the built-in mutator.";
}
} else if (ShouldStop()) {
LOG(WARNING) << "Custom mutator failed, but ignored since the stop "
"condition it met. Possibly what triggered the stop "
"condition also interrupted the mutator.";
// Returning whatever mutants we got before the failure.
return std::move(result).mutants();
} else {
LOG(ERROR) << "Test binary failed when asked to mutate inputs - exiting.";
RequestEarlyStop(EXIT_FAILURE);
return {};
}
}
// Fall back to the internal mutator.
return CentipedeCallbacks::Mutate(inputs, num_mutants);
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,54 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Example fuzzer built on top of Centipede. It is capable of fuzzing any binary
// target built with sancov (see build_defs.bzl). CentipedeCallbacks::Mutate
// uses ByteArrayMutator.
#ifndef THIRD_PARTY_CENTIPEDE_CENTIPEDE_DEFAULT_CALLBACKS_H_
#define THIRD_PARTY_CENTIPEDE_CENTIPEDE_DEFAULT_CALLBACKS_H_
#include <cstddef>
#include <optional>
#include <string>
#include <string_view>
#include <vector>
#include "absl/status/statusor.h"
#include "./centipede/centipede_callbacks.h"
#include "./centipede/environment.h"
#include "./centipede/mutation_input.h"
#include "./centipede/runner_result.h"
#include "./common/defs.h"
namespace fuzztest::internal {
// Example of customized CentipedeCallbacks.
class CentipedeDefaultCallbacks : public CentipedeCallbacks {
public:
explicit CentipedeDefaultCallbacks(const Environment &env);
size_t GetSeeds(size_t num_seeds, std::vector<ByteArray> &seeds) override;
absl::StatusOr<std::string> GetSerializedTargetConfig() override;
bool Execute(std::string_view binary, const std::vector<ByteArray> &inputs,
BatchResult &batch_result) override;
std::vector<ByteArray> Mutate(const std::vector<MutationInputRef> &inputs,
size_t num_mutants) override;
private:
std::optional<bool> custom_mutator_is_usable_ = std::nullopt;
};
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_CENTIPEDE_DEFAULT_CALLBACKS_H_

View File

@ -0,0 +1,468 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Definitions of the Centipede flags to be expanded in different contexts.
// Each flag is defined as:
//
// CENTIPEDE_FLAG(type, name, default_value, description)
//
// default_value must be a compile-time constant.
// description must be a string literal.
#ifndef CENTIPEDE_FLAG
#error This file must be used only in Centipede with CENTIPEDE_FLAG defined.
#endif
// TODO(kcc): document usage of standalone binaries and how to use @@ wildcard.
// If the "binary" contains @@, it means the binary can only accept inputs
// from the command line, and only one input per process.
// @@ will be replaced with a path to file with the input.
// @@ is chosen to follow the AFL command line syntax.
// TODO(kcc): rename --binary to --command (same for --extra_binaries),
// while remaining backward compatible.
CENTIPEDE_FLAG(std::string, binary, "", "The target binary.")
CENTIPEDE_FLAG(
std::string, coverage_binary, "",
"The actual binary from which coverage is collected - if different "
"from --binary.")
CENTIPEDE_FLAG(
std::string, binary_hash, "",
"If not-empty, this hash string is used instead of the hash of the "
"contents of coverage_binary. Use this flag when the coverage_binary "
"is not available nor needed, e.g. when using --distill.")
CENTIPEDE_FLAG(
std::string, clang_coverage_binary, "",
"A clang source-based code coverage binary used to produce "
"human-readable reports. Do not add this binary to extra_binaries. "
"You must have llvm-cov and llvm-profdata in your path to generate "
"the reports. --workdir in turn must be local in order for this "
"functionality to work. See "
"https://clang.llvm.org/docs/SourceBasedCodeCoverage.html")
CENTIPEDE_FLAG(std::string, test_name, "",
"The name of test to pass to the binary to operate on.")
CENTIPEDE_FLAG(
std::vector<std::string>, extra_binaries, {},
"A comma-separated list of extra target binaries. These binaries are "
"fed the same inputs as the main binary, but the coverage feedback "
"from them is not collected. Use this e.g. to run the target under "
"sanitizers.")
CENTIPEDE_FLAG(std::string, workdir, "", "The working directory.")
CENTIPEDE_FLAG(
std::string, merge_from, "",
"Another working directory to merge the corpus from. Inputs from "
"--merge_from will be added to --workdir if the add new features.")
CENTIPEDE_FLAG(size_t, num_runs, std::numeric_limits<size_t>::max(),
"Number of inputs to run per shard (see --total_shards).")
CENTIPEDE_FLAG(size_t, total_shards, 1, "Number of shards.")
CENTIPEDE_FLAG(size_t, my_shard_index, 0,
"Index of the first shard, [0, --total_shards - --num_threads].")
CENTIPEDE_FLAG(
size_t, num_threads, 1,
"Number of threads to execute in one process. i-th thread, where i "
"is in [0, --num_threads), will work on shard "
"(--first_shard_index + i).")
CENTIPEDE_FLAG(size_t, j, 0,
"If not 0, --j=N is a shorthand for "
"--num_threads=N --total_shards=N --first_shard_index=0. "
"Overrides values of these flags if they are also used.")
CENTIPEDE_FLAG(size_t, max_len, 4000,
"Max length of mutants. Passed to mutator as a hint.")
CENTIPEDE_FLAG(
size_t, batch_size, 1000,
"The number of inputs given to the target at one time. Batches of "
"more than 1 input are used to amortize the process start-up cost.")
CENTIPEDE_FLAG(size_t, mutate_batch_size, 2,
"Mutate this many inputs to produce batch_size mutants")
CENTIPEDE_FLAG(
bool, use_legacy_default_mutator, false,
"When set, use the legacy ByteArrayMutator as the default mutator. "
"Otherwise, the FuzzTest domain based mutator will be used.")
CENTIPEDE_FLAG(
size_t, load_other_shard_frequency, 10,
"Load a random other shard after processing this many batches. Use 0 "
"to disable loading other shards. For now, choose the value of this "
"flag so that shard loads happen at most once in a few minutes. In "
"future we may be able to find the suitable value automatically.")
// TODO(b/262798184): Remove once the bug is fixed.
CENTIPEDE_FLAG(
bool, serialize_shard_loads, false,
"When this flag is on, shard loading is serialized. "
" Useful to avoid excessive RAM consumption when loading more"
" that one shard at a time. Currently, loading a single large shard"
" may create too many temporary heap allocations. "
" This means, if we load many large shards concurrently,"
" we may run out or RAM.")
CENTIPEDE_FLAG(
size_t, seed, 0,
"A seed for the random number generator. If 0, some other random "
"number is used as seed.")
CENTIPEDE_FLAG(
size_t, prune_frequency, 100,
"Prune the corpus every time after this many inputs were added. If "
"zero, pruning is disabled. Pruning removes redundant inputs from "
"the corpus, e.g. inputs that have only \"frequent\", i.e. "
"uninteresting features. When the corpus gets larger than "
"--max_corpus_size, some random elements may also be removed.")
CENTIPEDE_FLAG(
size_t, address_space_limit_mb,
#ifdef __APPLE__
// Address space limit is ignored on MacOS.
// Reference:
// https://bugs.chromium.org/p/chromium/issues/detail?id=853873#c2
0
#else
8192
#endif
,
"If not zero, instructs the target to set setrlimit(RLIMIT_AS) to "
"this number of megabytes. Some targets (e.g. if built with ASAN, "
"which can't run with RLIMIT_AS) may choose to ignore this flag. See "
"also --rss_limit_mb.")
CENTIPEDE_FLAG(
size_t, rss_limit_mb, 4096,
"If not zero, instructs the target to fail if RSS goes over this "
"number of megabytes and report an OOM. See also "
"--address_space_limit_mb. These two flags have somewhat different "
"meaning. --address_space_limit_mb does not allow the process to "
"grow the used address space beyond the limit. --rss_limit_mb runs a "
"background thread that monitors max RSS and also checks max RSS "
"after executing every input, so it may detect OOM late. However "
"--rss_limit_mb allows Centipede to *report* an OOM condition in "
"most cases, while --address_space_limit_mb will cause a crash that "
"may be hard to attribute to OOM.")
CENTIPEDE_FLAG(
size_t, stack_limit_kb, 0,
"If not zero, instructs the target to fail if stack usage goes over "
"this number of KiB.")
CENTIPEDE_FLAG(
size_t, timeout_per_input, 60,
"If not zero, the timeout in seconds for a single input. If an input "
"runs longer than this, the runner process will abort. Support may "
"vary depending on the runner.")
CENTIPEDE_FLAG(
size_t, timeout_per_batch, 0,
"If not zero, the collective timeout budget in seconds for a single "
"batch of inputs. Each input in a batch still has up to "
"--timeout_per_input seconds to finish, but the entire batch must "
"finish within --timeout_per_batch seconds. The default is computed "
"as a function of --timeout_per_input * --batch_size. Support may "
"vary depending on the runner.")
CENTIPEDE_FLAG(size_t, ignore_timeout_reports, false,
"If set, will ignore reporting timeouts as errors.")
CENTIPEDE_FLAG(
absl::Time, stop_at, absl::InfiniteFuture(),
"Stop fuzzing in all shards (--total_shards) at approximately this "
"time in ISO-8601/RFC-3339 format, e.g. 2023-04-06T23:35:02Z. "
"If a given shard is still running at that time, it will gracefully "
"wind down by letting the current batch of inputs to finish and then "
"exiting. A special value 'infinite-future' (the default) is "
"supported. Tip: `date` is useful for conversion of mostly free "
"format human readable date/time strings, e.g. "
"--stop_at=$(date --date='next Monday 6pm' --utc --iso-8601=seconds) "
". Also see --stop_after. These two flags are mutually exclusive.")
CENTIPEDE_FLAG(
bool, fork_server, true,
"If true (default) tries to execute the target(s) via the fork "
"server, if supported by the target(s). Prepend the binary path with "
"'%f' to disable the fork server. --fork_server applies to binaries "
"passed via these flags: --binary, --extra_binaries, "
"--input_filter.")
CENTIPEDE_FLAG(
bool, full_sync, false,
"Perform a full corpus sync on startup. If true, feature sets and "
"corpora are read from all shards before fuzzing. This way fuzzing "
"starts with a full knowledge of the current state and will avoid "
"adding duplicating inputs. This however is very expensive when the "
"number of shards is very large.")
CENTIPEDE_FLAG(
bool, use_corpus_weights, true,
"If true, use weighted distribution when choosing the corpus element "
"to mutate. This flag is mostly for Centipede developers.")
CENTIPEDE_FLAG(
bool, use_coverage_frontier, false,
"If true, use coverage frontier when choosing the corpus element to "
"mutate. This flag is mostly for Centipede developers.")
CENTIPEDE_FLAG(
size_t, max_corpus_size, 100000,
"Indicates the number of inputs in the in-memory corpus after which"
"more aggressive pruning will be applied.")
CENTIPEDE_FLAG(
size_t, crossover_level, 50,
"Defines how much crossover is used during mutations. 0 means no "
"crossover, 100 means the most aggressive crossover. See "
"https://en.wikipedia.org/wiki/Crossover_(genetic_algorithm).")
CENTIPEDE_FLAG(bool, use_pc_features, true,
"When available from instrumentation, use features derived from "
"PCs.")
CENTIPEDE_FLAG(
size_t, path_level, 0, // Not ready for wide usage.
"When available from instrumentation, use features derived from "
"bounded execution paths. Be careful, may cause exponential feature "
"explosion. 0 means no path features. Values between 1 and 100 "
"define how aggressively to use the paths.")
CENTIPEDE_FLAG(bool, use_cmp_features, true,
"When available from instrumentation, use features derived from "
"instrumentation of CMP instructions.")
CENTIPEDE_FLAG(
size_t, callstack_level, 0,
"When available from instrumentation, use features derived from "
"observing the function call stacks. 0 means no callstack features."
"Values between 1 and 100 define how aggressively to use the "
"callstacks. Level N roughly corresponds to N call frames.")
CENTIPEDE_FLAG(bool, use_auto_dictionary, true,
"If true, use automatically-generated dictionary derived from "
"intercepting comparison instructions, memcmp, and similar.")
CENTIPEDE_FLAG(bool, use_dataflow_features, true,
"When available from instrumentation, use features derived from "
"data flows.")
CENTIPEDE_FLAG(
bool, use_counter_features, false,
"When available from instrumentation, use features derived from "
"counting the number of occurrences of a given PC. When enabled, "
"supersedes --use_pc_features.")
CENTIPEDE_FLAG(bool, use_pcpair_features, false,
"If true, PC pairs are used as additional synthetic features. "
"Experimental, use with care - it may explode the corpus.")
CENTIPEDE_FLAG(
uint64_t, user_feature_domain_mask, ~0UL,
"A bitmask indicating which user feature domains should be enabled. "
"A value of zero will disable all user features.")
CENTIPEDE_FLAG(
size_t, feature_frequency_threshold, 100,
"Internal flag. When a given feature is present in the corpus this "
"many times Centipede will stop recording it for future corpus "
"elements. Larger values will use more RAM but may improve corpus "
"weights. Valid values are 2 - 255.")
CENTIPEDE_FLAG(bool, require_pc_table, true,
"If true, Centipede will exit if the --pc_table is not found.")
CENTIPEDE_FLAG(bool, require_seeds, false,
"If true, Centipede will exit if no seed inputs are found.")
CENTIPEDE_FLAG(
int, telemetry_frequency, 0,
"Dumping frequency for intermediate telemetry files, i.e. coverage "
"report (workdir/coverage-report-BINARY.*.txt), corpus stats "
"(workdir/corpus-stats-*.json), etc. Positive value N means dump "
"every N batches. Negative N means start dumping after 2^N processed "
"batches with exponential 2x back-off (e.g. for "
"--telemetry_frequency=-5, dump on batches 32, 64, 128,...). Zero "
"means no telemetry. Note that the before-fuzzing and after-fuzzing "
"telemetry are always dumped.")
CENTIPEDE_FLAG(bool, print_runner_log, false,
"If true, runner logs are printed after every batch. Note that "
"crash logs are always printed regardless of this flag's value.")
// TODO(kcc): --distill and several others had better be dedicated binaries.
CENTIPEDE_FLAG(
bool, distill, false,
"Distill (minimize) the --total_shards input shards from --workdir "
"into --num_threads output shards. The input shards are randomly and "
"evenly divided between --num_threads concurrent distillation "
"threads to speed up processing. The threads share and update the "
"global coverage info as they go, so the output shards will never "
"have identical input/feature pairs (some intputs can still be "
"identical if a non-deterministic target produced different features "
"for identical inputs in the corpus). The features.* files are "
"looked up in a --workdir subdirectory that corresponds to "
"--coverage_binary and --binary_hash, if --binary_hash is provided; "
"if it is not provided, the actual hash of the --coverage_binary "
"file on disk is computed and used. Therefore, with an explicit "
"--binary_hash, --coverage_binary can be just the basename of the "
"actual target binary; without it, it must be the full path. "
"Each distillation thread writes a distilled corpus shard to "
"to <--workdir>/distilled-<--coverage_binary basename>.<index>.")
CENTIPEDE_FLAG(
size_t, log_features_shards, 0,
"The first --log_features_shards shards will log newly observed "
"features as symbols. In most cases you don't need this to be >= 2.")
CENTIPEDE_FLAG(
std::string, knobs_file, "",
"If not empty, knobs will be read from this (possibly remote) file."
" The feature is experimental, not yet fully functional.")
CENTIPEDE_FLAG(
std::string, corpus_to_files, "",
"Save the remote corpus from working to the given directory, one "
"file per corpus.")
CENTIPEDE_FLAG(
std::string, crashes_to_files, "",
"When set to a directory path, save the crashing reproducers and "
"metadata from the workdir to the given path: Each crash with `ID`"
"will be saved with file `ID.data` for the reproducer, `ID.desc` the "
"description, `ID.sig` the signature. If multiple crashes with the same ID "
"exist, only one crash will be saved.")
CENTIPEDE_FLAG(
std::string, corpus_from_files, "",
"Export a corpus from a local directory with one file per input into "
"the sharded remote corpus in workdir. Not recursive.")
CENTIPEDE_FLAG(
std::vector<std::string>, corpus_dir, {},
"Comma-separated list of paths to local corpus dirs, with one file "
"per input. At startup, the files are exported into the corpus in "
"--workdir. While fuzzing, the new corpus elements are written to "
"the first dir if it is not empty. This makes it more convenient to "
"interop with libFuzzer corpora.")
CENTIPEDE_FLAG(
std::string, symbolizer_path, "llvm-symbolizer",
"Path to the symbolizer tool. By default, we use llvm-symbolizer "
"and assume it is in PATH.")
CENTIPEDE_FLAG(
std::string, objdump_path, "objdump",
"Path to the objdump tool. By default, we use the system objdump "
"and assume it is in PATH.")
CENTIPEDE_FLAG(std::string, runner_dl_path_suffix, "",
"If non-empty, this flag is passed to the Centipede runner. "
"It tells the runner that this dynamic library is instrumented "
"while the main binary is not. "
"The value could be the full path, like '/path/to/my.so' "
"or a suffix, like '/my.so' or 'my.so'."
"This flag is experimental and may be removed in future")
CENTIPEDE_FLAG(
std::string, input_filter, "",
"Path to a tool that filters bad inputs. The tool is invoked as "
"`input_filter INPUT_FILE` and should return 0 if the input is good "
"and non-0 otherwise. Ignored if empty. The --input_filter is "
"invoked only for inputs that are considered for addition to the "
"corpus.")
CENTIPEDE_FLAG(
std::vector<std::string>, dictionary, {},
"A comma-separated list of paths to dictionary files. The dictionary "
"file is either in AFL/libFuzzer plain text format or in the binary "
"Centipede corpus file format. The flag is interpreted by "
"CentipedeCallbacks so its meaning may be different in custom "
"implementations of CentipedeCallbacks.")
CENTIPEDE_FLAG(
std::string, function_filter, "",
"A comma-separated list of functions that fuzzing needs to focus on. "
"If this list is non-empty, the fuzzer will mutate only those inputs "
"that trigger code in one of these functions.")
CENTIPEDE_FLAG(
std::string, for_each_blob, "",
"If non-empty, extracts individual blobs from the files given as "
"arguments, copies each blob to a temporary file, and applies this "
"command to that temporary file. %P is replaced with the temporary "
"file's path and %H is replaced with the blob's hash. Example:\n"
"$ centipede --for_each_blob='ls -l %P && echo %H' corpus.000000")
CENTIPEDE_FLAG(
std::string, experiment, "",
"A colon-separated list of values, each of which is a flag followed "
"by = and a comma-separated list of values. Example: "
"'foo=1,2,3:bar=10,20'. When non-empty, this flag is used to run an "
"A/B[/C/D...] experiment: different threads will set different "
"values of 'foo' and 'bar' and will run independent fuzzing "
"sessions. If more than one flag is given, all flag combinations are "
"tested. In example above: '--foo=1 --bar=10' ... "
"'--foo=3 --bar=20'. The number of threads should be multiple of the "
"number of flag combinations.")
CENTIPEDE_FLAG(
bool, analyze, false,
"If set, Centipede will read the corpora from the work dirs provided"
" as argv. If two corpora are provided, then analyze differences"
" between those corpora. If one corpus is provided, then save the"
" coverage report to a file within workdir with prefix"
" 'coverage-report-'.")
CENTIPEDE_FLAG(bool, exit_on_crash, false,
"If true, Centipede will exit on the first crash of the target.")
CENTIPEDE_FLAG(size_t, max_num_crash_reports, 5,
"report this many crashes per shard.")
CENTIPEDE_FLAG(
std::string, minimize_crash_file_path, "",
"If non-empty, a path to an input file that triggers a crash."
" Centipede will run the minimization loop and store smaller crashing"
" inputs in workdir/crashes.NNNNNN/, where NNNNNN is "
"--first_shard_index padded on the left with zeros. "
" --num_runs and --num_threads apply. "
" Assumes local workdir.")
CENTIPEDE_FLAG(
bool, batch_triage_suspect_only, false,
"If set, triage the crash on only the suspected input in a crashing "
"batch. Otherwise, triage on all the executed inputs")
CENTIPEDE_FLAG(
size_t, shmem_size_mb, 1024,
"Size of the shared memory regions used to communicate between the "
"ending and the runner.")
CENTIPEDE_FLAG(
bool, use_posix_shmem,
#ifdef __APPLE__
true
#else
false
#endif
,
"[INTERNAL] When true, uses shm_open/shm_unlink instead of "
"memfd_create to allocate shared memory. You may want this if your "
"target doesn't have access to /proc/<arbitrary_pid> subdirs or the "
"memfd_create syscall is not supported.")
CENTIPEDE_FLAG(
bool, dry_run, false,
"Initializes as much of Centipede as possible without actually "
"running any fuzzing. Useful to validate the rest of the command "
"line, verify existence of all the input directories and files, "
"etc. Also useful in combination with --save_config or "
"--update_config to stop execution immediately after writing the "
"(updated) config file.")
CENTIPEDE_FLAG(bool, save_binary_info, false,
"Save the BinaryInfo from the fuzzing run within the working "
"directory.")
CENTIPEDE_FLAG(
bool, populate_binary_info, true,
"Get binary info from a coverage instrumented binary. This should "
"only be turned off when coverage is not based on instrumenting some "
"binary.")
CENTIPEDE_FLAG(
bool, riegeli,
#ifdef CENTIPEDE_DISABLE_RIEGELI
false
#else
true
#endif
,
"Use Riegeli file format (instead of the legacy bespoke encoding) "
"for storage")
CENTIPEDE_FLAG(bool, first_corpus_dir_output_only, false,
"If set, treat the first entry of `corpus_dir` as output-only. "
"For FuzzTest framework only, do not use from end-users.")
CENTIPEDE_FLAG(bool, load_shards_only, false,
"If set, load/merge shards without fuzzing new inputs. For "
"FuzzTest framework only, do not use from end-users.")
CENTIPEDE_FLAG(
bool, fuzztest_single_test_mode, false,
"If set, operate on the corpus database for a single test specified by "
"FuzzTest instead of all the tests. For FuzzTest framework only, do not "
"use from end-users.")
CENTIPEDE_FLAG(
std::string, fuzztest_configuration, "",
"If set, deserializes the FuzzTest configuration from the value as a "
"base64url string instead of querying the configuration via runner "
"callbacks. For FuzzTest framework only, do not use from end-users.")
CENTIPEDE_FLAG(
bool, list_crash_ids, false,
"If set, lists the crash IDs of a single test of the binary to the "
"`crash_ids_file` with each crash ID in a single line. If there is no "
"crash for the test, the empty content will be written to the file. For "
"FuzzTest framework only, do not use from end-users.")
CENTIPEDE_FLAG(std::string, list_crash_ids_file, "",
"The path to list the crash IDs for `list_crash_ids`. For "
"FuzzTest framework only, do not use from end-users.")
CENTIPEDE_FLAG(std::string, crash_id, "",
"The crash ID used for `replay_crash` or `export_crash`. For "
"FuzzTest framework only, do not use from end-users.")
CENTIPEDE_FLAG(bool, replay_crash, false,
"If set, replay `crash_id` in the corpus database. For FuzzTest "
"framework only, do not use from end-users.")
CENTIPEDE_FLAG(
bool, export_crash, false,
"If set, export the input contents of `crash_id` from the corpus database. "
"For FuzzTest framework only, do not use from end-users.")
CENTIPEDE_FLAG(
std::string, export_crash_file, "",
"The path to export the input contents of `crash_id` for `export_crash`. "
"For FuzzTest framework only, do not use from end-users.")

View File

@ -0,0 +1,918 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/centipede_interface.h"
#include <unistd.h>
#include <algorithm>
#include <atomic>
#include <csignal>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <filesystem> // NOLINT
#include <iostream>
#include <memory>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "absl/base/optimization.h"
#include "absl/cleanup/cleanup.h"
#include "absl/container/flat_hash_set.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/ascii.h"
#include "absl/strings/escaping.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/str_join.h"
#include "absl/strings/str_replace.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
#include "absl/types/span.h"
#include "./centipede/analyze_corpora.h"
#include "./centipede/binary_info.h"
#include "./centipede/centipede.h"
#include "./centipede/centipede_callbacks.h"
#include "./centipede/command.h"
#include "./centipede/coverage.h"
#include "./centipede/crash_summary.h"
#include "./centipede/distill.h"
#include "./centipede/environment.h"
#include "./centipede/minimize_crash.h"
#include "./centipede/pc_info.h"
#include "./centipede/periodic_action.h"
#include "./centipede/runner_result.h"
#include "./centipede/seed_corpus_maker_lib.h"
#include "./centipede/stats.h"
#include "./centipede/stop.h"
#include "./centipede/thread_pool.h"
#include "./centipede/util.h"
#include "./centipede/workdir.h"
#include "./common/bazel.h"
#include "./common/blob_file.h"
#include "./common/defs.h"
#include "./common/hash.h"
#include "./common/logging.h" // IWYU pragma: keep
#include "./common/remote_file.h"
#include "./common/status_macros.h"
#include "./fuzztest/internal/configuration.h"
namespace fuzztest::internal {
namespace {
// Sets signal handler for SIGINT.
// TODO(b/378532202): Replace this with a more generic mechanism that allows
// the called or `CentipedeMain()` to indicate when to stop.
void SetSignalHandlers() {
struct sigaction sigact = {};
sigact.sa_flags = SA_ONSTACK;
sigact.sa_handler = [](int received_signum) {
if (received_signum == SIGINT) {
LOG(INFO) << "Ctrl-C pressed: winding down";
RequestEarlyStop(EXIT_FAILURE);
return;
}
ABSL_UNREACHABLE();
};
sigaction(SIGINT, &sigact, nullptr);
}
// Runs env.for_each_blob on every blob extracted from env.args.
// Returns EXIT_SUCCESS on success, EXIT_FAILURE otherwise.
int ForEachBlob(const Environment &env) {
auto tmpdir = TemporaryLocalDirPath();
CreateLocalDirRemovedAtExit(tmpdir);
std::string tmpfile = std::filesystem::path(tmpdir).append("t");
for (const auto &arg : env.args) {
LOG(INFO) << "Running '" << env.for_each_blob << "' on " << arg;
auto blob_reader = DefaultBlobFileReaderFactory();
absl::Status open_status = blob_reader->Open(arg);
if (!open_status.ok()) {
LOG(INFO) << "Failed to open " << arg << ": " << open_status;
return EXIT_FAILURE;
}
ByteSpan blob;
while (blob_reader->Read(blob) == absl::OkStatus()) {
ByteArray bytes;
bytes.insert(bytes.begin(), blob.data(), blob.end());
// TODO(kcc): [impl] add a variant of WriteToLocalFile that accepts Span.
WriteToLocalFile(tmpfile, bytes);
std::string command_line = absl::StrReplaceAll(
env.for_each_blob, {{"%P", tmpfile}, {"%H", Hash(bytes)}});
Command cmd(command_line);
// TODO(kcc): [as-needed] this creates one process per blob.
// If this flag gets active use, we may want to define special cases,
// e.g. if for_each_blob=="cp %P /some/where" we can do it in-process.
cmd.Execute();
if (ShouldStop()) return ExitCode();
}
}
return EXIT_SUCCESS;
}
// Loads corpora from work dirs provided in `env.args`, if there are two args
// provided, analyzes differences. If there is one arg provided, reports the
// function coverage. Returns EXIT_SUCCESS on success, EXIT_FAILURE otherwise.
int Analyze(const Environment &env) {
LOG(INFO) << "Analyze " << absl::StrJoin(env.args, ",");
CHECK(!env.binary.empty()) << "--binary must be used";
if (env.args.size() == 1) {
const CoverageResults coverage_results =
GetCoverage(env.binary_name, env.binary_hash, env.args[0]);
WorkDir workdir{env};
const std::string coverage_report_path =
workdir.CoverageReportPath(/*annotation=*/"");
DumpCoverageReport(coverage_results, coverage_report_path);
} else if (env.args.size() == 2) {
AnalyzeCorporaToLog(env.binary_name, env.binary_hash, env.args[0],
env.args[1]);
} else {
LOG(FATAL) << "for now, --analyze supports only 1 or 2 work dirs; got "
<< env.args.size();
}
return EXIT_SUCCESS;
}
void SavePCTableToFile(const PCTable &pc_table, std::string_view file_path) {
WriteToLocalFile(file_path, AsByteSpan(pc_table));
}
BinaryInfo PopulateBinaryInfoAndSavePCsIfNecessary(
const Environment &env, CentipedeCallbacksFactory &callbacks_factory,
std::string &pcs_file_path) {
BinaryInfo binary_info;
// Some fuzz targets have coverage not based on instrumenting binaries.
// For those target, we should not populate binary info.
if (env.populate_binary_info) {
ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, env);
scoped_callbacks.callbacks()->PopulateBinaryInfo(binary_info);
}
if (env.save_binary_info) {
const std::string binary_info_dir = WorkDir{env}.BinaryInfoDirPath();
CHECK_OK(RemoteMkdir(binary_info_dir));
LOG(INFO) << "Serializing binary info to: " << binary_info_dir;
binary_info.Write(binary_info_dir);
}
if (binary_info.uses_legacy_trace_pc_instrumentation) {
pcs_file_path = std::filesystem::path(TemporaryLocalDirPath()) / "pcs";
SavePCTableToFile(binary_info.pc_table, pcs_file_path);
}
if (env.use_pcpair_features) {
CHECK(!binary_info.pc_table.empty())
<< "--use_pcpair_features requires non-empty pc_table";
}
return binary_info;
}
std::vector<Environment> CreateEnvironmentsForThreads(
const Environment &origin_env, std::string_view pcs_file_path) {
std::vector<Environment> envs(origin_env.num_threads, origin_env);
size_t thread_idx = 0;
for (auto &env : envs) {
env.my_shard_index += thread_idx++;
env.UpdateForExperiment();
env.pcs_file_path = pcs_file_path;
}
return envs;
}
int Fuzz(const Environment &env, const BinaryInfo &binary_info,
std::string_view pcs_file_path,
CentipedeCallbacksFactory &callbacks_factory) {
CoverageLogger coverage_logger(binary_info.pc_table, binary_info.symbols);
std::vector<Environment> envs =
CreateEnvironmentsForThreads(env, pcs_file_path);
std::vector<std::atomic<Stats>> stats_vec(env.num_threads);
// Start periodic stats dumping and, optionally, logging.
std::vector<PeriodicAction> stats_reporters;
stats_reporters.emplace_back(
[csv_appender = StatsCsvFileAppender{stats_vec, envs}]() mutable {
csv_appender.ReportCurrStats();
},
PeriodicAction::Options{
/*sleep_before_each=*/
[](size_t iteration) {
return absl::Minutes(std::clamp(iteration, 0UL, 10UL));
},
});
if (!envs.front().experiment.empty() || ABSL_VLOG_IS_ON(1)) {
stats_reporters.emplace_back(
[logger = StatsLogger{stats_vec, envs}]() mutable {
logger.ReportCurrStats();
},
PeriodicAction::Options{
/*sleep_before_each=*/
[](size_t iteration) {
return absl::Seconds(std::clamp(iteration, 5UL, 600UL));
},
});
}
auto fuzzing_worker =
[&env, &callbacks_factory, &binary_info, &coverage_logger](
Environment &my_env, std::atomic<Stats> &stats, bool create_tmpdir) {
if (create_tmpdir) CreateLocalDirRemovedAtExit(TemporaryLocalDirPath());
// Uses TID, call in this thread.
my_env.seed = GetRandomSeed(env.seed);
if (env.dry_run) return;
ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, my_env);
Centipede centipede(my_env, *scoped_callbacks.callbacks(), binary_info,
coverage_logger, stats);
centipede.FuzzingLoop();
};
if (env.num_threads == 1) {
// When fuzzing with one thread, run fuzzing loop in the current
// thread. This is because FuzzTest/Centipede's single-process
// fuzzing requires the test body, which is invoked by the fuzzing
// loop, to run in the main thread.
//
// Here, the fuzzing worker should not re-create the tmpdir since the path
// is thread-local and it has been created in the current function.
fuzzing_worker(envs[0], stats_vec[0], /*create_tmpdir=*/false);
} else {
ThreadPool fuzzing_worker_threads{static_cast<int>(env.num_threads)};
for (size_t thread_idx = 0; thread_idx < env.num_threads; thread_idx++) {
Environment &my_env = envs[thread_idx];
std::atomic<Stats> &my_stats = stats_vec[thread_idx];
fuzzing_worker_threads.Schedule([&fuzzing_worker, &my_env, &my_stats]() {
fuzzing_worker(my_env, my_stats, /*create_tmpdir=*/true);
});
} // All `fuzzing_worker_threads` join here.
}
for (auto &reporter : stats_reporters) {
// Nudge one final update and stop the reporting thread.
reporter.Nudge();
reporter.Stop();
}
if (!env.knobs_file.empty()) PrintRewardValues(stats_vec, std::cerr);
return ExitCode();
}
TestShard SetUpTestSharding() {
TestShard test_shard = GetBazelTestShard();
// Update the shard status file to indicate that we support test sharding.
// It suffices to update the file's modification time, but we clear the
// contents for simplicity. This is also what the GoogleTest framework does.
if (const char *test_shard_status_file =
std::getenv("TEST_SHARD_STATUS_FILE");
test_shard_status_file != nullptr) {
ClearLocalFileContents(test_shard_status_file);
}
return test_shard;
}
// Prunes non-reproducible and duplicate crashes and returns the crash
// signatures of the remaining crashes.
absl::flat_hash_set<std::string> PruneOldCrashesAndGetRemainingCrashSignatures(
const std::filesystem::path &crashing_dir, const Environment &env,
CentipedeCallbacksFactory &callbacks_factory, CrashSummary &crash_summary) {
const std::vector<std::string> crashing_input_files =
// The corpus database layout assumes the crash input files are located
// directly in the crashing subdirectory, so we don't list recursively.
ValueOrDie(RemoteListFiles(crashing_dir.c_str(), /*recursively=*/false));
ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, env);
BatchResult batch_result;
absl::flat_hash_set<std::string> remaining_crash_signatures;
for (const std::string &crashing_input_file : crashing_input_files) {
ByteArray crashing_input;
CHECK_OK(RemoteFileGetContents(crashing_input_file, crashing_input));
const bool is_reproducible = !scoped_callbacks.callbacks()->Execute(
env.binary, {crashing_input}, batch_result);
const bool is_duplicate =
is_reproducible && !batch_result.IsSetupFailure() &&
!remaining_crash_signatures.insert(batch_result.failure_signature())
.second;
if (!is_reproducible || batch_result.IsSetupFailure() || is_duplicate) {
CHECK_OK(RemotePathDelete(crashing_input_file, /*recursively=*/false));
} else {
crash_summary.AddCrash(
{std::filesystem::path(crashing_input_file).filename(),
/*category=*/batch_result.failure_description(),
batch_result.failure_signature(),
batch_result.failure_description()});
CHECK_OK(RemotePathTouchExistingFile(crashing_input_file));
}
}
return remaining_crash_signatures;
}
// TODO(b/405382531): Add unit tests once the function is unit-testable.
void DeduplicateAndStoreNewCrashes(
const std::filesystem::path &crashing_dir, const WorkDir &workdir,
size_t total_shards, absl::flat_hash_set<std::string> crash_signatures,
CrashSummary &crash_summary) {
for (size_t shard_idx = 0; shard_idx < total_shards; ++shard_idx) {
const std::vector<std::string> new_crashing_input_files =
// The crash reproducer directory may contain subdirectories with
// input files that don't individually cause a crash. We ignore those
// for now and don't list the files recursively.
ValueOrDie(
RemoteListFiles(workdir.CrashReproducerDirPaths().Shard(shard_idx),
/*recursively=*/false));
const std::filesystem::path crash_metadata_dir =
workdir.CrashMetadataDirPaths().Shard(shard_idx);
CHECK_OK(RemoteMkdir(crashing_dir.c_str()));
for (const std::string &crashing_input_file : new_crashing_input_files) {
const std::string crashing_input_file_name =
std::filesystem::path(crashing_input_file).filename();
const std::string crash_signature_path =
crash_metadata_dir / absl::StrCat(crashing_input_file_name, ".sig");
std::string new_crash_signature;
const absl::Status status =
RemoteFileGetContents(crash_signature_path, new_crash_signature);
if (!status.ok()) {
LOG(WARNING) << "Ignoring crashing input " << crashing_input_file_name
<< " due to failure to read the crash signature: "
<< status;
continue;
}
const bool is_duplicate =
!crash_signatures.insert(new_crash_signature).second;
if (is_duplicate) continue;
const std::string crash_description_path =
crash_metadata_dir / absl::StrCat(crashing_input_file_name, ".desc");
std::string new_crash_description;
const absl::Status description_status =
RemoteFileGetContents(crash_description_path, new_crash_description);
if (!description_status.ok()) {
LOG(WARNING)
<< "Failed to read crash description for "
<< crashing_input_file_name
<< ". Will use the crash signature as the description. Status: "
<< description_status;
new_crash_description = new_crash_signature;
}
crash_summary.AddCrash({crashing_input_file_name,
/*category=*/new_crash_description,
std::move(new_crash_signature),
new_crash_description});
CHECK_OK(
RemoteFileRename(crashing_input_file,
(crashing_dir / crashing_input_file_name).c_str()));
}
}
}
// Seeds the corpus files in `env.workdir` with the inputs in `regression_dir`
// (always used) and the previously distilled corpus files from `coverage_dir`
// (used if non-empty).
SeedCorpusConfig GetSeedCorpusConfig(const Environment &env,
std::string_view regression_dir,
std::string_view coverage_dir) {
const WorkDir workdir{env};
SeedCorpusSource regression;
regression.dir_glob = std::string(regression_dir);
regression.num_recent_dirs = 1;
regression.individual_input_rel_glob = "*";
regression.sampled_fraction_or_count = 1.0f;
std::vector<SeedCorpusSource> sources = {std::move(regression)};
if (!coverage_dir.empty()) {
SeedCorpusSource coverage;
coverage.dir_glob = std::string(coverage_dir);
coverage.num_recent_dirs = 1;
// We're using the previously distilled corpus files as seeds.
coverage.shard_rel_glob =
std::filesystem::path{
workdir.DistilledCorpusFilePaths().AllShardsGlob()}
.filename();
coverage.individual_input_rel_glob = "*";
coverage.sampled_fraction_or_count = 1.0f;
sources.push_back(std::move(coverage));
}
SeedCorpusDestination destination;
destination.dir_path = env.workdir;
// We're seeding the current corpus files.
destination.shard_rel_glob =
std::filesystem::path{workdir.CorpusFilePaths().AllShardsGlob()}
.filename();
destination.shard_index_digits = WorkDir::kDigitsInShardIndex;
destination.num_shards = static_cast<uint32_t>(env.num_threads);
return {
std::move(sources),
std::move(destination),
};
}
absl::Duration ReadFuzzingTime(std::string_view fuzzing_time_file) {
std::string fuzzing_time_str;
CHECK_OK(RemoteFileGetContents(fuzzing_time_file, fuzzing_time_str));
absl::Duration fuzzing_time;
if (!absl::ParseDuration(absl::StripAsciiWhitespace(fuzzing_time_str),
&fuzzing_time)) {
LOG(WARNING) << "Failed to parse fuzzing time of a resuming fuzz test: '"
<< fuzzing_time_str << "'. Assuming no fuzzing time so far.";
return absl::ZeroDuration();
}
return fuzzing_time;
}
PeriodicAction RecordFuzzingTime(std::string_view fuzzing_time_file,
absl::Time start_time) {
return {[=] {
absl::Status status = RemoteFileSetContents(
fuzzing_time_file,
absl::FormatDuration(absl::Now() - start_time));
LOG_IF(WARNING, !status.ok())
<< "Failed to write fuzzing time: " << status;
},
PeriodicAction::ZeroDelayConstInterval(absl::Seconds(15))};
}
// TODO(b/368325638): Add tests for this.
int UpdateCorpusDatabaseForFuzzTests(
Environment env, const fuzztest::internal::Configuration &fuzztest_config,
CentipedeCallbacksFactory &callbacks_factory) {
env.UpdateWithTargetConfig(fuzztest_config);
absl::Time start_time = absl::Now();
LOG(INFO) << "Starting the update of the corpus database for fuzz tests:"
<< "\nBinary: " << env.binary
<< "\nCorpus database: " << fuzztest_config.corpus_database;
// Step 1: Preliminary set up of test sharding, binary info, etc.
const auto [test_shard_index, total_test_shards] = SetUpTestSharding();
const auto corpus_database_path =
std::filesystem::path(fuzztest_config.corpus_database) /
fuzztest_config.binary_identifier;
const auto stats_root_path =
fuzztest_config.stats_root.empty()
? std::filesystem::path()
: std::filesystem::path(fuzztest_config.stats_root) /
fuzztest_config.binary_identifier;
const auto workdir_root_path =
fuzztest_config.workdir_root.empty()
? corpus_database_path
: std::filesystem::path(fuzztest_config.workdir_root) /
fuzztest_config.binary_identifier;
const auto execution_stamp = [] {
std::string stamp =
absl::FormatTime("%Y-%m-%d-%H-%M-%S", absl::Now(), absl::UTCTimeZone());
return stamp;
}();
std::vector<std::string> fuzz_tests_to_run;
if (env.fuzztest_single_test_mode) {
CHECK(fuzztest_config.fuzz_tests_in_current_shard.size() == 1)
<< "Must select exactly one fuzz test when running in the single test "
"mode";
fuzz_tests_to_run = fuzztest_config.fuzz_tests_in_current_shard;
} else {
for (int i = 0; i < fuzztest_config.fuzz_tests.size(); ++i) {
if (i % total_test_shards == test_shard_index) {
fuzz_tests_to_run.push_back(fuzztest_config.fuzz_tests[i]);
}
}
}
LOG(INFO) << "Fuzz tests to run:" << absl::StrJoin(fuzz_tests_to_run, ", ");
const bool is_workdir_specified = !env.workdir.empty();
CHECK(!is_workdir_specified || env.fuzztest_single_test_mode);
// When env.workdir is empty, the full workdir paths will be formed by
// appending the fuzz test names to the base workdir path. We use different
// path when only replaying to avoid replaying an unfinished fuzzing sessions.
const auto base_workdir_path =
is_workdir_specified
? std::filesystem::path{} // Will not be used.
: workdir_root_path /
absl::StrFormat("workdir%s.%03d",
fuzztest_config.only_replay ? "-replay" : "",
test_shard_index);
// There's no point in saving the binary info to the workdir, since the
// workdir is deleted at the end.
env.save_binary_info = false;
std::string pcs_file_path;
BinaryInfo binary_info = PopulateBinaryInfoAndSavePCsIfNecessary(
env, callbacks_factory, pcs_file_path);
LOG(INFO) << "Test shard index: " << test_shard_index
<< " Total test shards: " << total_test_shards;
// Step 2: Iterate over the fuzz tests and run them.
const std::string binary = env.binary;
for (int i = 0; i < fuzz_tests_to_run.size(); ++i) {
// Clean up previous stop requests. stop_time will be set later.
ClearEarlyStopRequestAndSetStopTime(/*stop_time=*/absl::InfiniteFuture());
if (!env.fuzztest_single_test_mode &&
fuzztest_config.GetTimeLimitPerTest() < absl::InfiniteDuration()) {
const absl::Duration test_time_limit =
fuzztest_config.GetTimeLimitPerTest();
const absl::Status has_enough_time = VerifyBazelHasEnoughTimeToRunTest(
start_time, test_time_limit,
/*executed_tests_in_shard=*/i, fuzztest_config.fuzz_tests.size());
CHECK_OK(has_enough_time)
<< "Not enough time for running the fuzz test "
<< fuzz_tests_to_run[i] << " for " << test_time_limit;
}
if (!is_workdir_specified) {
env.workdir = base_workdir_path / fuzz_tests_to_run[i];
}
const auto execution_id_path =
(base_workdir_path /
absl::StrCat(fuzz_tests_to_run[i], ".execution_id"))
.string();
bool is_resuming = false;
if (!is_workdir_specified && fuzztest_config.execution_id.has_value()) {
// Use the execution IDs to resume or skip tests.
const bool execution_id_matched = [&] {
if (!RemotePathExists(execution_id_path)) return false;
CHECK(!RemotePathIsDirectory(execution_id_path));
std::string prev_execution_id;
CHECK_OK(RemoteFileGetContents(execution_id_path, prev_execution_id));
return prev_execution_id == *fuzztest_config.execution_id;
}();
if (execution_id_matched) {
// If execution IDs match but the previous coverage is missing, it means
// the test was previously finished, and we skip running for the test.
if (!RemotePathExists(WorkDir{env}.CoverageDirPath())) {
LOG(INFO) << "Skipping running the fuzz test "
<< fuzz_tests_to_run[i];
continue;
}
// If execution IDs match and the previous coverage exists, it means
// the same workflow got interrupted when running the test. So we resume
// the test.
is_resuming = true;
LOG(INFO) << "Resuming running the fuzz test " << fuzz_tests_to_run[i];
} else {
// If the execution IDs mismatch, we start a new run.
is_resuming = false;
LOG(INFO) << "Starting a new run of the fuzz test "
<< fuzz_tests_to_run[i];
}
}
if (RemotePathExists(env.workdir) && !is_resuming) {
// This could be a workdir from a failed run that used a different version
// of the binary. We delete it so that we don't have to deal with
// the assumptions under which it is safe to reuse an old workdir.
CHECK_OK(RemotePathDelete(env.workdir, /*recursively=*/true));
}
const WorkDir workdir{env};
CHECK_OK(RemoteMkdir(
workdir.CoverageDirPath())); // Implicitly creates the workdir
// Updating execution ID must be after creating the coverage dir. Otherwise
// if it fails to create coverage dir after updating execution ID, next
// attempt would skip this test.
if (!is_workdir_specified && fuzztest_config.execution_id.has_value() &&
!is_resuming) {
CHECK_OK(RemoteFileSetContents(execution_id_path,
*fuzztest_config.execution_id));
}
absl::Cleanup clean_up_workdir = [is_workdir_specified, &env] {
if (!is_workdir_specified && !EarlyStopRequested()) {
CHECK_OK(RemotePathDelete(env.workdir, /*recursively=*/true));
}
};
const std::filesystem::path fuzztest_db_path =
corpus_database_path / fuzz_tests_to_run[i];
const std::filesystem::path regression_dir =
fuzztest_db_path / "regression";
const std::filesystem::path coverage_dir = fuzztest_db_path / "coverage";
// Seed the fuzzing session with the latest coverage corpus and regression
// inputs from the previous fuzzing session.
if (!is_resuming) {
CHECK_OK(GenerateSeedCorpusFromConfig(
GetSeedCorpusConfig(env, regression_dir.c_str(),
fuzztest_config.replay_coverage_inputs
? coverage_dir.c_str()
: ""),
env.binary_name, env.binary_hash))
<< "while generating the seed corpus";
}
if (!env.fuzztest_single_test_mode) {
// TODO: b/338217594 - Call the FuzzTest binary in a flag-agnostic way.
constexpr std::string_view kFuzzTestFuzzFlag = "--fuzz=";
constexpr std::string_view kFuzzTestReplayCorpusFlag =
"--replay_corpus=";
std::string_view test_selection_flag = fuzztest_config.only_replay
? kFuzzTestReplayCorpusFlag
: kFuzzTestFuzzFlag;
env.binary =
absl::StrCat(binary, " ", test_selection_flag, fuzz_tests_to_run[i]);
}
absl::Duration time_limit = fuzztest_config.GetTimeLimitPerTest();
absl::Duration time_spent = absl::ZeroDuration();
const std::string fuzzing_time_file =
std::filesystem::path(env.workdir) / "fuzzing_time";
if (is_resuming && RemotePathExists(fuzzing_time_file)) {
time_spent = ReadFuzzingTime(fuzzing_time_file);
time_limit = std::max(time_limit - time_spent, absl::ZeroDuration());
}
is_resuming = false;
if (EarlyStopRequested()) {
LOG(INFO) << "Skipping test " << fuzz_tests_to_run[i]
<< " because early stop requested.";
continue;
}
LOG(INFO) << (fuzztest_config.only_replay ? "Replaying " : "Fuzzing ")
<< fuzz_tests_to_run[i] << " for " << time_limit
<< "\n\tTest binary: " << env.binary;
const absl::Time start_time = absl::Now();
ClearEarlyStopRequestAndSetStopTime(/*stop_time=*/start_time + time_limit);
PeriodicAction record_fuzzing_time =
RecordFuzzingTime(fuzzing_time_file, start_time - time_spent);
Fuzz(env, binary_info, pcs_file_path, callbacks_factory);
record_fuzzing_time.Nudge();
record_fuzzing_time.Stop();
if (!stats_root_path.empty()) {
const auto stats_dir = stats_root_path / fuzz_tests_to_run[i];
CHECK_OK(RemoteMkdir(stats_dir.c_str()));
CHECK_OK(RemoteFileRename(
workdir.FuzzingStatsPath(),
(stats_dir / absl::StrCat("fuzzing_stats_", execution_stamp))
.c_str()));
}
if (EarlyStopRequested()) {
LOG(INFO) << "Skip updating corpus database due to early stop requested.";
continue;
}
// TODO(xinhaoyuan): Have a separate flag to skip corpus updating instead
// of checking whether workdir is specified or not.
if (fuzztest_config.only_replay || is_workdir_specified) continue;
// Distill and store the coverage corpus.
Distill(env);
if (RemotePathExists(coverage_dir.c_str())) {
// In the future, we will store k latest coverage corpora for some k, but
// for now we only keep the latest one.
CHECK_OK(RemotePathDelete(coverage_dir.c_str(), /*recursively=*/true));
}
CHECK_OK(RemoteMkdir(coverage_dir.c_str()));
std::vector<std::string> distilled_corpus_files;
CHECK_OK(RemoteGlobMatch(workdir.DistilledCorpusFilePaths().AllShardsGlob(),
distilled_corpus_files));
for (const std::string &corpus_file : distilled_corpus_files) {
const std::string file_name =
std::filesystem::path(corpus_file).filename();
CHECK_OK(
RemoteFileRename(corpus_file, (coverage_dir / file_name).c_str()));
}
// Deduplicate and update the crashing inputs.
CrashSummary crash_summary{fuzztest_config.binary_identifier,
fuzz_tests_to_run[i]};
const std::filesystem::path crashing_dir = fuzztest_db_path / "crashing";
absl::flat_hash_set<std::string> crash_signatures =
PruneOldCrashesAndGetRemainingCrashSignatures(
crashing_dir, env, callbacks_factory, crash_summary);
DeduplicateAndStoreNewCrashes(crashing_dir, workdir, env.total_shards,
std::move(crash_signatures), crash_summary);
crash_summary.Report(&std::cerr);
}
return EXIT_SUCCESS;
}
int ListCrashIds(const Environment &env,
const fuzztest::internal::Configuration &target_config) {
CHECK(!env.list_crash_ids_file.empty())
<< "Need list_crash_ids_file to be set for listing crash IDs";
CHECK_EQ(target_config.fuzz_tests_in_current_shard.size(), 1);
std::vector<std::string> crash_paths;
// TODO: b/406003594 - move the path construction to a library.
const auto crash_dir = std::filesystem::path(target_config.corpus_database) /
target_config.binary_identifier /
target_config.fuzz_tests_in_current_shard[0] /
"crashing";
if (RemotePathExists(crash_dir.string())) {
CHECK(RemotePathIsDirectory(crash_dir.string()))
<< "Crash dir " << crash_dir << " in the corpus database "
<< target_config.corpus_database << " is not a directory";
crash_paths =
ValueOrDie(RemoteListFiles(crash_dir.string(), /*recursively=*/false));
}
std::vector<std::string> results;
results.reserve(crash_paths.size());
for (const auto &crash_path : crash_paths) {
std::string crash_id = std::filesystem::path{crash_path}.filename();
results.push_back(std::move(crash_id));
}
CHECK_OK(RemoteFileSetContents(env.list_crash_ids_file,
absl::StrJoin(results, "\n")));
return EXIT_SUCCESS;
}
int ReplayCrash(const Environment &env,
const fuzztest::internal::Configuration &target_config,
CentipedeCallbacksFactory &callbacks_factory) {
CHECK(!env.crash_id.empty()) << "Need crash_id to be set for replay a crash";
CHECK(target_config.fuzz_tests_in_current_shard.size() == 1)
<< "Expecting exactly one test for replay_crash";
// TODO: b/406003594 - move the path construction to a library.
const auto crash_dir = std::filesystem::path(target_config.corpus_database) /
target_config.binary_identifier /
target_config.fuzz_tests_in_current_shard[0] /
"crashing";
const WorkDir workdir{env};
SeedCorpusSource crash_corpus_source;
crash_corpus_source.dir_glob = crash_dir;
crash_corpus_source.num_recent_dirs = 1;
crash_corpus_source.individual_input_rel_glob = env.crash_id;
crash_corpus_source.sampled_fraction_or_count = 1.0f;
const SeedCorpusConfig crash_corpus_config = {
/*sources=*/{crash_corpus_source},
/*destination=*/{
/*dir_path=*/env.workdir,
/*shard_rel_glob=*/
std::filesystem::path{workdir.CorpusFilePaths().AllShardsGlob()}
.filename(),
/*shard_index_digits=*/WorkDir::kDigitsInShardIndex,
/*num_shards=*/1}};
CHECK_OK(GenerateSeedCorpusFromConfig(crash_corpus_config, env.binary_name,
env.binary_hash));
Environment run_crash_env = env;
run_crash_env.load_shards_only = true;
return Fuzz(run_crash_env, {}, "", callbacks_factory);
}
int ExportCrash(const Environment &env,
const fuzztest::internal::Configuration &target_config) {
CHECK(!env.crash_id.empty())
<< "Need crash_id to be set for exporting a crash";
CHECK(!env.export_crash_file.empty())
<< "Need export_crash_file to be set for exporting a crash";
CHECK(target_config.fuzz_tests_in_current_shard.size() == 1)
<< "Expecting exactly one test for exporting a crash";
// TODO: b/406003594 - move the path construction to a library.
const auto crash_dir = std::filesystem::path(target_config.corpus_database) /
target_config.binary_identifier /
target_config.fuzz_tests_in_current_shard[0] /
"crashing";
std::string crash_contents;
const auto read_status =
RemoteFileGetContents((crash_dir / env.crash_id).c_str(), crash_contents);
if (!read_status.ok()) {
LOG(ERROR) << "Failed reading the crash " << env.crash_id << " from "
<< crash_dir.c_str() << ": " << read_status;
return EXIT_FAILURE;
}
const auto write_status =
RemoteFileSetContents(env.export_crash_file, crash_contents);
if (!write_status.ok()) {
LOG(ERROR) << "Failed write the crash " << env.crash_id << " to "
<< env.export_crash_file << ": " << write_status;
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
} // namespace
int CentipedeMain(const Environment &env,
CentipedeCallbacksFactory &callbacks_factory) {
ClearEarlyStopRequestAndSetStopTime(env.stop_at);
SetSignalHandlers();
if (!env.corpus_to_files.empty()) {
Centipede::CorpusToFiles(env, env.corpus_to_files);
return EXIT_SUCCESS;
}
if (!env.crashes_to_files.empty()) {
const auto status = Centipede::CrashesToFiles(env, env.crashes_to_files);
if (status.ok()) return EXIT_SUCCESS;
LOG(ERROR) << "Got error when exporting crashes to files: " << status;
return EXIT_FAILURE;
}
if (!env.for_each_blob.empty()) return ForEachBlob(env);
if (!env.minimize_crash_file_path.empty()) {
ByteArray crashy_input;
ReadFromLocalFile(env.minimize_crash_file_path, crashy_input);
return MinimizeCrash(crashy_input, env, callbacks_factory);
}
// Just export the corpus from a local dir and exit.
if (!env.corpus_from_files.empty()) {
Centipede::CorpusFromFiles(env, env.corpus_from_files);
return EXIT_SUCCESS;
}
// Export the corpus from a local dir and then fuzz.
if (!env.corpus_dir.empty()) {
for (size_t i = 0; i < env.corpus_dir.size(); ++i) {
const auto &corpus_dir = env.corpus_dir[i];
if (i > 0 || !env.first_corpus_dir_output_only)
Centipede::CorpusFromFiles(env, corpus_dir);
}
}
if (env.distill) return Distill(env);
// Create the local temporary dir once, before creating any threads. The
// temporary dir must typically exist before `CentipedeCallbacks` can be used.
const auto tmpdir = TemporaryLocalDirPath();
CreateLocalDirRemovedAtExit(tmpdir);
// Enter the update corpus database mode only if we have a binary to invoke
// and a corpus database to update.
// We don't update the corpus database for standalone binaries (i.e., when
// `env.has_input_wildcards` is true).
if (!env.binary.empty() && !env.has_input_wildcards) {
const auto serialized_target_config = [&]() -> absl::StatusOr<std::string> {
// TODO: b/410051414 Use Centipede flags to pass necessary information
// instead of passing the entirely serialized Configuration once switched
// to the unified execution model.
if (!env.fuzztest_configuration.empty()) {
std::string result;
CHECK(absl::WebSafeBase64Unescape(env.fuzztest_configuration, &result));
return result;
}
ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, env);
return scoped_callbacks.callbacks()->GetSerializedTargetConfig();
}();
CHECK_OK(serialized_target_config.status());
if (!serialized_target_config->empty()) {
const auto target_config = fuzztest::internal::Configuration::Deserialize(
*serialized_target_config);
CHECK_OK(target_config.status())
<< "Failed to deserialize target configuration";
if (!target_config->corpus_database.empty()) {
LOG_IF(FATAL,
env.list_crash_ids + env.replay_crash + env.export_crash > 1)
<< "At most one of list_crash_ids/replay_crash/export_crash can "
"be set, but seeing list_crash_ids: "
<< env.list_crash_ids << ", replay_crash: " << env.replay_crash
<< ", export_crash: " << env.export_crash;
if (env.list_crash_ids) {
return ListCrashIds(env, *target_config);
}
if (env.replay_crash) {
return ReplayCrash(env, *target_config, callbacks_factory);
}
if (env.export_crash) {
return ExportCrash(env, *target_config);
}
const auto time_limit_per_test = target_config->GetTimeLimitPerTest();
CHECK(target_config->only_replay ||
time_limit_per_test < absl::InfiniteDuration())
<< "Updating corpus database requires specifying time limit per "
"fuzz test.";
CHECK(time_limit_per_test >= absl::Seconds(1))
<< "Time limit per fuzz test must be at least 1 second.";
return UpdateCorpusDatabaseForFuzzTests(env, *target_config,
callbacks_factory);
}
}
}
// Create the remote coverage dirs once, before creating any threads.
const auto coverage_dir = WorkDir{env}.CoverageDirPath();
CHECK_OK(RemoteMkdir(coverage_dir));
LOG(INFO) << "Coverage dir: " << coverage_dir
<< "; temporary dir: " << tmpdir;
std::string pcs_file_path;
BinaryInfo binary_info = PopulateBinaryInfoAndSavePCsIfNecessary(
env, callbacks_factory, pcs_file_path);
if (env.analyze) return Analyze(env);
return Fuzz(env, binary_info, pcs_file_path, callbacks_factory);
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,37 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_CENTIPEDE_INTERFACE_H_
#define THIRD_PARTY_CENTIPEDE_CENTIPEDE_INTERFACE_H_
#include "./centipede/centipede_callbacks.h"
#include "./centipede/environment.h"
namespace fuzztest::internal {
// Usage:
// class MyCentipedeCallbacks: public CentipedeCallbacks { ... }
// int main(int argc, char **argv) {
// InitGoogle(argv[0], &argc, &argv, /*remove_flags=*/true);
// fuzztest::internal::Environment env; // reads FLAGS.
// fuzztest::internal::DefaultCallbacksFactory<MyCentipedeCallbacks>
// callbacks_factory; return fuzztest::internal::CentipedeMain(env,
// callbacks_factory);
// }
int CentipedeMain(const Environment &env,
CentipedeCallbacksFactory &callbacks_factory);
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_CENTIPEDE_INTERFACE_H_

View File

@ -0,0 +1,30 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/base/nullability.h"
#include "./centipede/centipede_callbacks.h"
#include "./centipede/centipede_default_callbacks.h"
#include "./centipede/centipede_interface.h"
#include "./centipede/config_file.h"
#include "./centipede/environment_flags.h"
int main(int argc, char** absl_nonnull argv) {
const auto runtime_state = fuzztest::internal::InitCentipede(argc, argv);
const auto env = fuzztest::internal::CreateEnvironmentFromFlags(
runtime_state->leftover_argv());
fuzztest::internal::DefaultCallbacksFactory<
fuzztest::internal::CentipedeDefaultCallbacks>
callbacks;
return CentipedeMain(env, callbacks);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,537 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/command.h"
#include <errno.h>
#include <fcntl.h>
#include <spawn.h>
#include <sys/poll.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#ifdef __APPLE__
#include <inttypes.h>
#include <libproc.h>
#endif // __APPLE__
#include <algorithm>
#include <csignal>
#include <cstdlib>
#include <filesystem> // NOLINT
#include <fstream>
#include <optional>
#include <string>
#include <string_view>
#include <system_error> // NOLINT
#include <utility>
#include <vector>
#include "absl/base/const_init.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/match.h"
#include "absl/strings/numbers.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/str_join.h"
#include "absl/strings/str_replace.h"
#include "absl/strings/str_split.h"
#include "absl/synchronization/mutex.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
#include "./centipede/stop.h"
#include "./centipede/util.h"
#include "./common/logging.h"
#if !defined(_MSC_VER)
// Needed to pass the current environment to posix_spawn, which needs an
// explicit envp without an option to inherit implicitly.
extern char **environ;
#endif
namespace fuzztest::internal {
namespace {
// See the definition of --fork_server flag.
constexpr std::string_view kCommandLineSeparator(" \\\n");
constexpr std::string_view kNoForkServerRequestPrefix("%f");
absl::StatusOr<std::string> GetProcessCreationStamp(pid_t pid) {
#ifdef __APPLE__
struct proc_bsdinfo info = {};
if (proc_pidinfo(pid, PROC_PIDTBSDINFO, 0, &info, PROC_PIDTBSDINFO_SIZE) !=
PROC_PIDTBSDINFO_SIZE) {
return absl::InternalError(
absl::StrCat("failed to get proc bsdinfo for ", pid));
}
return absl::StrFormat("%" PRIu64 ".%06" PRIu64, info.pbi_start_tvsec,
info.pbi_start_tvusec);
#else
constexpr int kFieldIndexOfStartTimeAfterComm = 19; // From `man procfs`
const std::string proc_stat_path = absl::StrFormat("/proc/%d/stat", pid);
std::string proc_stat_line;
// Cannot use `ReadFromLocalFile` on procfs since seek does not work.
// This seems to work assuming the filename of the command does not contain
// newline, which should be in our control when the process is ours.
if (std::getline(std::ifstream(proc_stat_path), proc_stat_line).bad()) {
return absl::InternalError(absl::StrCat("failed to read ", proc_stat_path));
}
// According to the current format of `/proc/[pid]/stat`, only the comm field
// can contain ')'.
const size_t comm_end_pos = proc_stat_line.find_last_of(')');
if (comm_end_pos == proc_stat_line.npos) {
return absl::NotFoundError(
absl::StrCat("cannot find the end of command in the first line of ",
proc_stat_path, ": ", proc_stat_line));
}
std::string_view proc_stat_after_comm =
std::string_view(proc_stat_line).substr(comm_end_pos + 1);
const std::vector<std::string_view> fields =
absl::StrSplit(proc_stat_after_comm, ' ', absl::SkipEmpty());
if (fields.size() <= kFieldIndexOfStartTimeAfterComm) {
return absl::NotFoundError(
absl::StrCat("not enough fields in the first line of ", proc_stat_path,
": ", proc_stat_line));
}
return std::string(fields[kFieldIndexOfStartTimeAfterComm]);
#endif
}
} // namespace
// TODO(ussuri): Encapsulate as much of the fork server functionality from
// this source as possible in this struct, and make it a class.
struct Command::ForkServerProps {
// The file paths of the comms pipes.
std::string fifo_path_[2];
// The file descriptors of the comms pipes.
int pipe_[2] = {-1, -1};
// The file path to write the PID of the fork server process to.
std::string pid_file_path_;
// The PID of the fork server process. Used to verify that the fork server is
// running and the pipes are ready for comms.
pid_t pid_ = -1;
// The creation stamp of the fork server process. Used to detect that the
// running process with `pid_` is still the original fork server, not a PID
// recycled by the OS.
std::string creation_stamp;
~ForkServerProps() {
for (int i = 0; i < 2; ++i) {
if (pipe_[i] >= 0 && close(pipe_[i]) != 0) {
LOG(ERROR) << "Failed to close fork server pipe for " << fifo_path_[i];
}
std::error_code ec;
if (!fifo_path_[i].empty() &&
!std::filesystem::remove(fifo_path_[i], ec)) {
LOG(ERROR) << "Failed to remove fork server pipe file " << fifo_path_[i]
<< ": " << ec;
}
}
}
};
// NOTE: Because std::unique_ptr<T> requires T to be a complete type wherever
// the deleter is instantiated, the special member functions must be defined
// out-of-line here, now that ForkServerProps is complete (that's by-the-book
// PIMPL).
Command::~Command() {
if (is_executing()) {
LOG(WARNING)
<< "Destructing Command object for " << path() << " with "
<< (fork_server_ ? absl::StrCat("fork server PID ", fork_server_->pid_)
: absl::StrCat("PID ", pid_))
<< " still running. Requesting it to stop without waiting for it...";
RequestStop();
}
}
Command::Command(std::string_view path, Options options)
: path_(path), options_(std::move(options)) {}
Command::Command(std::string_view path) : Command{path, {}} {}
std::string Command::ToString() const {
std::vector<std::string> ss;
ss.reserve(/*env*/ 1 + options_.env_add.size() + options_.env_remove.size() +
/*path*/ 1 + /*args*/ options_.args.size() + /*out/err*/ 2);
// env.
ss.push_back("env");
// Arguments that unset environment variables must appear first.
for (const auto &var : options_.env_remove) {
ss.push_back(absl::StrCat("-u ", var));
}
for (const auto &var : options_.env_add) {
ss.push_back(var);
}
// path.
std::string path = path_;
// Strip the % prefixes, if any.
if (absl::StartsWith(path, kNoForkServerRequestPrefix)) {
path = path.substr(kNoForkServerRequestPrefix.size());
}
// Replace @@ with temp_file_path_.
constexpr std::string_view kTempFileWildCard = "@@";
if (absl::StrContains(path, kTempFileWildCard)) {
CHECK(!options_.temp_file_path.empty());
path = absl::StrReplaceAll(path,
{{kTempFileWildCard, options_.temp_file_path}});
}
ss.push_back(std::move(path));
// args.
for (const auto &arg : options_.args) {
ss.push_back(arg);
}
// out/err.
if (!options_.stdout_file.empty()) {
ss.push_back(absl::StrCat("> ", options_.stdout_file));
}
if (!options_.stderr_file.empty()) {
if (options_.stdout_file != options_.stderr_file) {
ss.push_back(absl::StrCat("2> ", options_.stderr_file));
} else {
ss.push_back("2>&1");
}
}
// Trim trailing space and return.
return absl::StrJoin(ss, kCommandLineSeparator);
}
bool Command::StartForkServer(std::string_view temp_dir_path,
std::string_view prefix) {
if (absl::StartsWith(path_, kNoForkServerRequestPrefix)) {
VLOG(2) << "Fork server disabled for " << path();
return false;
}
VLOG(2) << "Starting fork server for " << path();
fork_server_.reset(new ForkServerProps);
fork_server_->fifo_path_[0] = std::filesystem::path(temp_dir_path)
.append(absl::StrCat(prefix, "_FIFO0"));
fork_server_->fifo_path_[1] = std::filesystem::path(temp_dir_path)
.append(absl::StrCat(prefix, "_FIFO1"));
const std::string pid_file_path =
std::filesystem::path(temp_dir_path).append("pid");
(void)std::filesystem::create_directory(temp_dir_path); // it may not exist.
for (int i = 0; i < 2; ++i) {
PCHECK(mkfifo(fork_server_->fifo_path_[i].c_str(), 0600) == 0)
<< VV(i) << VV(fork_server_->fifo_path_[i]);
}
// NOTE: A background process does not return its exit status to the subshell,
// so failures will never propagate to the caller of `system()`. Instead, we
// save out the background process's PID to a file and use it later to assert
// that the process has started and is still running.
static constexpr std::string_view kForkServerCommandStub = R"sh(
{
CENTIPEDE_FORK_SERVER_FIFO0="%s" \
CENTIPEDE_FORK_SERVER_FIFO1="%s" \
exec %s
} &
printf "%%s" $! > "%s"
)sh";
const std::string fork_server_command = absl::StrFormat(
kForkServerCommandStub, fork_server_->fifo_path_[0],
fork_server_->fifo_path_[1], command_line_, pid_file_path);
VLOG(2) << "Fork server command:" << fork_server_command;
const int exit_code = system(fork_server_command.c_str());
// Check if `system()` was able to parse and run the command at all.
if (exit_code != EXIT_SUCCESS) {
LogProblemInfo(
"Failed to parse or run command to launch fork server; will proceed "
"without it");
return false;
}
// The fork server is probably running now. However, one failure scenario is
// that it starts and exits early. Try opening the read/write comms pipes with
// it: if that fails, something is wrong.
// We use non-blocking I/O to open the pipes. That is good and safe, because:
// 1) This prevents the `open()` calls from hanging when the fork server fails
// to open the pipes on its side (note the use of O_RDWR, not O_WRONLY, to
// avoid ENXIO).
// 2) In `Command::Execute`, we wait for the return channel pipe with a
// `poll()`, so it should always have data when we attempt to `read()` from
// it.
// See more at
// https://www.gnu.org/software/libc/manual/html_node/Operating-Modes.html.
if ((fork_server_->pipe_[0] = open(fork_server_->fifo_path_[0].c_str(),
O_RDWR | O_NONBLOCK)) < 0 ||
(fork_server_->pipe_[1] = open(fork_server_->fifo_path_[1].c_str(),
O_RDONLY | O_NONBLOCK)) < 0) {
LogProblemInfo(
"Failed to establish communication with fork server; will proceed "
"without it");
return false;
}
std::string pid_str;
ReadFromLocalFile(pid_file_path, pid_str);
CHECK(absl::SimpleAtoi(pid_str, &fork_server_->pid_)) << VV(pid_str);
auto creation_stamp = GetProcessCreationStamp(fork_server_->pid_);
if (!creation_stamp.ok()) {
LogProblemInfo(
absl::StrCat("Failed to get the fork server's creation stamp; will "
"proceed without it "
"(failure status: ",
creation_stamp.status(), ")"));
return false;
}
fork_server_->creation_stamp = *std::move(creation_stamp);
return true;
}
absl::Status Command::VerifyForkServerIsHealthy() {
// Preconditions: the callers (`Execute()`) should call us only when the fork
// server is presumed to be running (`fork_server_pid_` >= 0). If it is, the
// comms pipes are guaranteed to be opened by `StartForkServer()`.
CHECK(fork_server_ != nullptr) << "Fork server wasn't started";
CHECK(fork_server_->pid_ >= 0) << "Fork server process failed to start";
CHECK(fork_server_->pipe_[0] >= 0 && fork_server_->pipe_[1] >= 0)
<< "Failed to connect to fork server";
// A process with the fork server PID exists (_some_ process, possibly with a
// recycled PID)...
if (kill(fork_server_->pid_, 0) != EXIT_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Can't communicate with fork server, PID=", fork_server_->pid_));
}
// ...and it is a process has the same creation stamp, so it's practically
// guaranteed to be our original fork server process.
const auto creation_stamp = GetProcessCreationStamp(fork_server_->pid_);
if (!creation_stamp.ok()) return creation_stamp.status();
if (*creation_stamp != fork_server_->creation_stamp) {
return absl::UnknownError(absl::StrCat(
"Fork server's creation stamp changed (new process?) - expected ",
fork_server_->creation_stamp, ", but got ", *creation_stamp));
}
return absl::OkStatus();
}
bool Command::ExecuteAsync() {
CHECK(!is_executing());
VLOG(1) << "Executing command '" << command_line_ << "'...";
if (fork_server_ != nullptr) {
VLOG(1) << "Sending execution request to fork server";
if (const auto status = VerifyForkServerIsHealthy(); !status.ok()) {
LogProblemInfo(absl::StrCat("Fork server should be running, but isn't: ",
status.message()));
return false;
}
// Wake up the fork server.
char x = ' ';
CHECK_EQ(1, write(fork_server_->pipe_[0], &x, 1));
} else {
CHECK_EQ(pid_, -1);
std::vector<std::string> argv_strs = {"/bin/sh", "-c", command_line_};
std::vector<char *> argv;
argv.reserve(argv_strs.size() + 1);
for (auto &argv_str : argv_strs) {
argv.push_back(argv_str.data());
}
argv.push_back(nullptr);
CHECK_EQ(posix_spawn(&pid_, argv[0], /*file_actions=*/nullptr,
/*attrp=*/nullptr, argv.data(), environ),
0);
}
is_executing_ = true;
return true;
}
std::optional<int> Command::Wait(absl::Time deadline) {
CHECK(is_executing());
int exit_code = EXIT_SUCCESS;
if (fork_server_ != nullptr) {
// The fork server forks, the child is running. Block until some readable
// data appears in the pipe (that is, after the fork server writes the
// execution result to it).
struct pollfd poll_fd = {};
int poll_ret = -1;
do {
// NOTE: `poll_fd` has to be reset every time.
poll_fd = {
/*fd=*/fork_server_->pipe_[1], // The file descriptor to wait for.
/*events=*/POLLIN, // Wait until `fd` gets readable data.
};
const int poll_timeout_ms = static_cast<int>(absl::ToInt64Milliseconds(
std::max(deadline - absl::Now(), absl::Milliseconds(1))));
poll_ret = poll(&poll_fd, 1, poll_timeout_ms);
// The `poll()` syscall can get interrupted: it sets errno==EINTR in that
// case. We should tolerate that.
} while (poll_ret < 0 && errno == EINTR);
if (poll_ret != 1 || (poll_fd.revents & POLLIN) == 0) {
// The fork server errored out or timed out, or some other error occurred,
// e.g. the syscall was interrupted.
if (poll_ret == 0) {
LogProblemInfo(absl::StrCat(
"Timeout while waiting for fork server: deadline is ", deadline));
} else {
LogProblemInfo(absl::StrCat(
"Error while waiting for fork server: poll() returned ", poll_ret));
}
return std::nullopt;
}
// The fork server wrote the execution result to the pipe: read it.
CHECK_EQ(sizeof(exit_code),
read(fork_server_->pipe_[1], &exit_code, sizeof(exit_code)));
} else {
CHECK_NE(pid_, -1);
while (true) {
const pid_t r = waitpid(pid_, &exit_code, WNOHANG);
CHECK_NE(r, -1);
if (r == pid_ && (WIFEXITED(exit_code) || WIFSIGNALED(exit_code))) break;
CHECK_EQ(r, 0);
const auto timeout = deadline - absl::Now();
if (timeout > absl::ZeroDuration()) {
const auto duration = std::clamp<useconds_t>(
absl::ToInt64Microseconds(timeout), 0, 100000);
usleep(duration); // NOLINT: early return on SIGCHLD is desired.
continue;
} else {
LogProblemInfo(absl::StrCat(
"Timeout while waiting for the command process: deadline is ",
deadline));
return std::nullopt;
}
}
pid_ = -1;
}
is_executing_ = false;
// When the command is actually a wrapper shell launching the binary(-es)
// (e.g. a Docker container), the shell will preserve a normal exit code
// returned by the binary (the legal range for such codes that can be
// passed to `exit()` is [0..125]); but the shell will specially encode
// the exit code returned by the binary when the binary is killed by a
// signal by adding 128 to the signal number and returning the result as
// a normal exit code. This encoding is used in `bash` and `dash` but may be
// different in other shells, e.g., `ksh`.
//
// For more details, see https://tldp.org/LDP/abs/html/exitcodes.html.
//
// Therefore, to handle this case, we need to first unpack these special
// pseudo-normal exit codes before analyzing them further. After
// reassigning `WEXITSTATUS()` to exit_code, the if-else below will take
// the else-branch and unpack the signal number from the updated value. This
// has experimentally been observed to work with existing implementations of
// the `wait` macros but there is no definitive documentation for it.
if (WIFEXITED(exit_code) && WEXITSTATUS(exit_code) > 128 &&
WEXITSTATUS(exit_code) < 255) {
exit_code = WEXITSTATUS(exit_code);
}
if (WIFEXITED(exit_code) && WEXITSTATUS(exit_code) != EXIT_SUCCESS) {
const auto exit_status = WEXITSTATUS(exit_code);
VlogProblemInfo(
absl::StrCat("Command errored out: exit status=", exit_status),
/*vlog_level=*/1);
exit_code = exit_status;
} else if (WIFSIGNALED(exit_code)) {
const auto signal = WTERMSIG(exit_code);
if (signal == SIGINT) {
RequestEarlyStop(EXIT_FAILURE);
// When the user kills Centipede via ^C, they are unlikely to be
// interested in any of the subprocesses' outputs. Also, ^C terminates all
// the subprocesses, including all the runners, so all their outputs would
// get printed simultaneously, flooding the log. Hence log at a high
// `vlog_level`.
VlogProblemInfo("Command killed: signal=SIGINT (likely Ctrl-C)",
/*vlog_level=*/10);
} else {
// The fork server subprocess was killed by something other than ^C: log
// at a lower `vlog_level` to help diagnose problems.
VlogProblemInfo(absl::StrCat("Command killed: signal=", signal),
/*vlog_level=*/1);
}
// TODO(ussuri): Consider changing this to exit_code = EXIT_FAILURE.
exit_code = signal;
}
return exit_code;
}
void Command::RequestStop() {
CHECK(is_executing());
if (fork_server_) {
CHECK_NE(fork_server_->pid_, -1);
kill(fork_server_->pid_, SIGTERM);
return;
}
CHECK_NE(pid_, -1);
kill(pid_, SIGTERM);
}
std::string Command::ReadRedirectedStdout() const {
std::string ret;
if (!options_.stdout_file.empty()) {
ReadFromLocalFile(options_.stdout_file, ret);
if (ret.empty()) ret = "<EMPTY>";
}
return ret;
}
std::string Command::ReadRedirectedStderr() const {
std::string ret;
if (!options_.stderr_file.empty()) {
if (options_.stderr_file == "2>&1" ||
options_.stderr_file == options_.stdout_file) {
ret = "<DUPED TO STDOUT>";
} else {
ReadFromLocalFile(options_.stderr_file, ret);
if (ret.empty()) ret = "<EMPTY>";
}
}
return ret;
}
void Command::LogProblemInfo(std::string_view message) const {
// Prevent confusing interlaced logs when multiple threads experience failures
// at the same time.
// TODO(ussuri): Non-failure related logs from other threads may still
// interlace with these. Improve further, if possible. Note the printiing
// line-by-line is unavoidable to overcome the single log line length limit.
static absl::Mutex mu{absl::kConstInit};
absl::MutexLock lock(&mu);
LOG(ERROR) << message;
LOG(ERROR).NoPrefix() << "=== COMMAND ===";
LOG(ERROR).NoPrefix() << command_line_;
LOG(ERROR).NoPrefix() << "=== STDOUT ===";
for (const auto &line : absl::StrSplit(ReadRedirectedStdout(), '\n')) {
LOG(ERROR).NoPrefix() << line;
}
LOG(ERROR).NoPrefix() << "=== STDERR ===";
for (const auto &line : absl::StrSplit(ReadRedirectedStderr(), '\n')) {
LOG(ERROR).NoPrefix() << line;
}
}
void Command::VlogProblemInfo(std::string_view message, int vlog_level) const {
if (ABSL_VLOG_IS_ON(vlog_level)) LogProblemInfo(message);
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,140 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_COMMAND_H_
#define THIRD_PARTY_CENTIPEDE_COMMAND_H_
#include <memory>
#include <optional>
#include <string>
#include <string_view>
#include <vector>
#include "absl/status/status.h"
#include "absl/time/time.h"
namespace fuzztest::internal {
class Command final {
public:
struct Options {
// Arguments to pass to the executed command. The command is executed by the
// shell, so the arguments need to be shell-escaped.
// TODO(b/381910257): Escape the arguments for passing to the shell.
std::vector<std::string> args;
// Environment variables/values in the form "KEY=VALUE" to set in the
// subprocess executing the command. These are added to the environment
// variables inherited from the parent process.
std::vector<std::string> env_add;
// Environment variables to unset in the subprocess executing the command.
std::vector<std::string> env_remove;
// Redirect stdout to this file. If empty, use parent's STDOUT.
std::string stdout_file;
// Redirect stderr to this file. If empty, use parent's STDERR. If `out` ==
// `err` and both are non-empty, stdout/stderr are combined.
std::string stderr_file;
// "@@" in the command will be replaced with `temp_file_path`.
std::string temp_file_path;
};
// Constructs a command to run the binary at `path` with the given `options`.
// The path can contain "@@" which will be replaced with
// `options.temp_file_path`.
explicit Command(std::string_view path, Options options);
// Constructs a command to run the binary at `path` with default options.
explicit Command(std::string_view path);
// Not movable or copyable to simplify the resource management logic.
Command(const Command& other) = delete;
Command& operator=(const Command& other) = delete;
Command(Command&& other) noexcept = delete;
Command& operator=(Command&& other) noexcept = delete;
// Cleans up the fork server, if that was created.
~Command();
// Returns a string representing the command, e.g. like this
// "env -u ENV1 ENV2=VAL2 path arg1 arg2 > out 2>& err"
std::string ToString() const;
// Execute the command asynchronously. Returns true if it starts a new
// execution, false otherwise. Must be called only when the command
// is not executing.
bool ExecuteAsync();
// Returns whether the command is currently executing.
bool is_executing() const { return is_executing_; }
// Waits for the command execution and returns the exit status if the
// execution finishes within `deadline`. Must be called only when the command
// is executing. execution or the execution times out. If interrupted, may
// call `RequestEarlyStop()` (see stop.h).
std::optional<int> Wait(absl::Time deadline);
// Requests the command execution to stop. Must be called only when the
// command is executing. Note that after calling this, `Wait()` is still
// needed to complete the execution.
void RequestStop();
// Convenient method to execute synchronously.
int Execute() {
if (!ExecuteAsync()) return EXIT_FAILURE;
return Wait(absl::InfiniteFuture()).value_or(EXIT_FAILURE);
}
// Attempts to start a fork server, returns true on success.
// Pipe files for the fork server are created in `temp_dir_path`
// with prefix `prefix`.
// See runner_fork_server.cc for details.
bool StartForkServer(std::string_view temp_dir_path, std::string_view prefix);
// Accessors.
const std::string& path() const { return path_; }
private:
struct ForkServerProps;
int pid_ = -1;
bool is_executing_ = false;
// Returns the status of the fork server process. Expects that the server was
// previously started using `StartForkServer()`.
absl::Status VerifyForkServerIsHealthy();
// Reads and returns the stdout of the command, if redirected to a file. If
// not redirected, returns a placeholder text.
std::string ReadRedirectedStdout() const;
// Reads and returns the stderr of the command, if redirected to a file that
// is also different from the redirected stdout. If not redirected, returns a
// placeholder text.
std::string ReadRedirectedStderr() const;
// Possibly logs information about a crash, starting with `message`, followed
// by the command line, followed by the redirected stdout and stderr read
// from `options_.out` and `options_.err` files, if any.
void LogProblemInfo(std::string_view message) const;
// Just as `LogCrashInfo()`, but logging occurs only when the VLOG level (set
// via `--v` or its equivalents) is >= `min_vlog`.
void VlogProblemInfo(std::string_view message, int vlog_level) const;
const std::string path_;
const Options options_;
const std::string command_line_ = ToString();
std::unique_ptr<ForkServerProps> fork_server_;
};
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_COMMAND_H_

View File

@ -0,0 +1,197 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/command.h"
#include <signal.h>
#include <sys/wait.h> // NOLINT(for WTERMSIG)
#include <cstdlib>
#include <filesystem> // NOLINT
#include <optional>
#include <string>
#include <string_view>
#include <utility>
#include "gtest/gtest.h"
#include "absl/strings/substitute.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
#include "./centipede/stop.h"
#include "./centipede/util.h"
#include "./common/test_util.h"
namespace fuzztest::internal {
namespace {
TEST(CommandTest, ToString) {
EXPECT_EQ(Command{"x"}.ToString(), "env \\\nx");
{
Command::Options cmd_options;
cmd_options.args = {"arg1", "arg2"};
EXPECT_EQ((Command{"path", std::move(cmd_options)}.ToString()),
"env \\\npath \\\narg1 \\\narg2");
}
{
Command::Options cmd_options;
cmd_options.env_add = {"K1=V1", "K2=V2"};
cmd_options.env_remove = {"K3"};
EXPECT_EQ((Command{"x", std::move(cmd_options)}.ToString()),
"env \\\n-u K3 \\\nK1=V1 \\\nK2=V2 \\\nx");
}
{
Command::Options cmd_options;
cmd_options.stdout_file = "out";
EXPECT_EQ((Command{"x", std::move(cmd_options)}.ToString()),
"env \\\nx \\\n> out");
}
{
Command::Options cmd_options;
cmd_options.stderr_file = "err";
EXPECT_EQ((Command{"x", std::move(cmd_options)}.ToString()),
"env \\\nx \\\n2> err");
}
{
Command::Options cmd_options;
cmd_options.stdout_file = "out";
cmd_options.stderr_file = "err";
EXPECT_EQ((Command{"x", std::move(cmd_options)}.ToString()),
"env \\\nx \\\n> out \\\n2> err");
}
{
Command::Options cmd_options;
cmd_options.stdout_file = "out";
cmd_options.stderr_file = "out";
EXPECT_EQ((Command{"x", std::move(cmd_options)}.ToString()),
"env \\\nx \\\n> out \\\n2>&1");
}
}
TEST(CommandTest, Execute) {
// Check for default exit code.
Command echo{"echo"};
EXPECT_EQ(echo.Execute(), 0);
EXPECT_FALSE(ShouldStop());
// Check for exit code 7.
Command exit7{"bash -c 'exit 7'"};
EXPECT_EQ(exit7.Execute(), 7);
EXPECT_FALSE(ShouldStop());
}
TEST(CommandTest, HandlesInterruptedCommand) {
Command self_sigint{"bash -c 'kill -SIGINT $$'"};
self_sigint.ExecuteAsync();
self_sigint.Wait(absl::InfiniteFuture());
EXPECT_TRUE(ShouldStop());
ClearEarlyStopRequestAndSetStopTime(absl::InfiniteFuture());
}
TEST(CommandTest, InputFileWildCard) {
Command::Options cmd_options;
cmd_options.temp_file_path = "TEMP_FILE";
Command cmd{"foo bar @@ baz", std::move(cmd_options)};
EXPECT_EQ(cmd.ToString(), "env \\\nfoo bar TEMP_FILE baz");
}
TEST(CommandTest, ForkServer) {
const std::string test_tmpdir = GetTestTempDir(test_info_->name());
const std::string helper =
GetDataDependencyFilepath("centipede/command_test_helper");
// TODO(ussuri): Dedupe these testcases.
{
const std::string input = "success";
const std::string log = std::filesystem::path{test_tmpdir} / input;
Command::Options cmd_options;
cmd_options.args = {input};
cmd_options.stdout_file = log;
cmd_options.stderr_file = log;
Command cmd{helper, std::move(cmd_options)};
EXPECT_TRUE(cmd.StartForkServer(test_tmpdir, "ForkServer"));
EXPECT_EQ(cmd.Execute(), EXIT_SUCCESS);
std::string log_contents;
ReadFromLocalFile(log, log_contents);
EXPECT_EQ(log_contents, absl::Substitute("Got input: $0", input));
}
{
const std::string input = "fail";
const std::string log = std::filesystem::path{test_tmpdir} / input;
Command::Options cmd_options;
cmd_options.args = {input};
cmd_options.stdout_file = log;
cmd_options.stderr_file = log;
Command cmd{helper, std::move(cmd_options)};
EXPECT_TRUE(cmd.StartForkServer(test_tmpdir, "ForkServer"));
EXPECT_EQ(cmd.Execute(), EXIT_FAILURE);
std::string log_contents;
ReadFromLocalFile(log, log_contents);
EXPECT_EQ(log_contents, absl::Substitute("Got input: $0", input));
}
{
const std::string input = "ret42";
const std::string log = std::filesystem::path{test_tmpdir} / input;
Command::Options cmd_options;
cmd_options.args = {input};
cmd_options.stdout_file = log;
cmd_options.stderr_file = log;
Command cmd{helper, std::move(cmd_options)};
EXPECT_TRUE(cmd.StartForkServer(test_tmpdir, "ForkServer"));
EXPECT_EQ(cmd.Execute(), 42);
std::string log_contents;
ReadFromLocalFile(log, log_contents);
EXPECT_EQ(log_contents, absl::Substitute("Got input: $0", input));
}
{
const std::string input = "abort";
const std::string log = std::filesystem::path{test_tmpdir} / input;
Command::Options cmd_options;
cmd_options.args = {input};
cmd_options.stdout_file = log;
cmd_options.stderr_file = log;
Command cmd{helper, std::move(cmd_options)};
EXPECT_TRUE(cmd.StartForkServer(test_tmpdir, "ForkServer"));
// WTERMSIG() needs an lvalue on some platforms.
const int ret = cmd.Execute();
EXPECT_EQ(WTERMSIG(ret), SIGABRT);
std::string log_contents;
ReadFromLocalFile(log, log_contents);
EXPECT_EQ(log_contents, absl::Substitute("Got input: $0", input));
}
{
const std::string input = "hang";
const std::string log = std::filesystem::path{test_tmpdir} / input;
Command::Options cmd_options;
cmd_options.args = {input};
cmd_options.stdout_file = log;
cmd_options.stderr_file = log;
Command cmd{helper, std::move(cmd_options)};
ASSERT_TRUE(cmd.StartForkServer(test_tmpdir, "ForkServer"));
ASSERT_TRUE(cmd.ExecuteAsync());
EXPECT_EQ(cmd.Wait(absl::Now() + absl::Seconds(2)), std::nullopt);
std::string log_contents;
ReadFromLocalFile(log, log_contents);
EXPECT_EQ(log_contents, absl::Substitute("Got input: $0", input));
}
// TODO(kcc): [impl] test what happens if the child is interrupted.
}
} // namespace
} // namespace fuzztest::internal

View File

@ -0,0 +1,36 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <unistd.h>
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "absl/base/nullability.h"
// A binary linked with the fork server that exits/crashes in different ways.
int main(int argc, char** absl_nonnull argv) {
assert(argc == 2);
printf("Got input: %s", argv[1]);
fflush(stdout);
if (!strcmp(argv[1], "success")) return EXIT_SUCCESS;
if (!strcmp(argv[1], "fail")) return EXIT_FAILURE;
if (!strcmp(argv[1], "ret42")) return 42;
if (!strcmp(argv[1], "abort")) abort();
// Sleep longer than kTimeout in CommandDeathTest_ForkServerHangingBinary.
if (!strcmp(argv[1], "hang")) sleep(5);
return 17;
}

View File

@ -0,0 +1,150 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This library defines the concepts "fuzzing feature" and "feature domain".
// It is used by Centipede, and it can be used by fuzz runners to
// define their features in a way most friendly to Centipede.
// Fuzz runners do not have to use this file nor to obey the rules defined here.
// But using this file and following its rules is the simplest way if you want
// Centipede to understand the details about the features generated by the
// runner.
//
// This library must not depend on anything other than libc so that fuzz targets
// using it doesn't gain redundant coverage. For the same reason this library
// uses raw __builtin_trap instead of CHECKs.
// We make an exception for <algorithm> for std::sort/std::unique,
// since <algorithm> is very lightweight.
// This library is also header-only, with all functions defined as inline.
#ifndef THIRD_PARTY_CENTIPEDE_CONCURRENT_BITSET_H_
#define THIRD_PARTY_CENTIPEDE_CONCURRENT_BITSET_H_
#include <stddef.h>
#include <string.h>
// WARNING!!!: Be very careful with what STL headers or other dependencies you
// add here. This header needs to remain mostly bare-bones so that we can
// include it into runner.
#include <climits>
#include <cstdint>
#include <functional>
#include "absl/base/const_init.h"
#include "./centipede/concurrent_byteset.h"
namespace fuzztest::internal {
// A fixed-size bitset with a lossy concurrent set() function.
// kSize (in bits) must be a multiple of 2**16.
//
// IMPORTANT!!! Objects of this class should only be constructed with static
// storage duration. This is because the class has intentionally uninitialized
// direct and transitive data members that rely on static initialization in the
// compiled process image.
template <size_t kSizeInBits>
class ConcurrentBitSet {
public:
static_assert((kSizeInBits % (1<<16)) == 0);
// Creates a ConcurrentBitSet with static storage duration.
explicit constexpr ConcurrentBitSet(absl::ConstInitType)
: lines_{absl::kConstInit} {}
// Clears the bit set.
void clear() {
memset(words_, 0, sizeof(words_));
lines_.clear();
}
// Sets the bit `idx % kSizeInBits`.
// set() can be called concurrently with another set().
// If several threads race to update adjacent bits,
// the update may be lost (i.e. set() is lossy).
// We could use atomic set-bit instructions to make it non-lossy,
// but it is going to be too expensive.
void set(size_t idx) {
idx %= kSizeInBits;
size_t word_idx = idx / kBitsInWord;
size_t bit_idx = idx % kBitsInWord;
size_t line_idx = word_idx / kWordsInLine;
lines_.Set(line_idx, 1);
word_t mask = 1ULL << bit_idx;
word_t word = __atomic_load_n(&words_[word_idx], __ATOMIC_RELAXED);
if (!(word & mask)) {
word |= mask;
__atomic_store_n(&words_[word_idx], word, __ATOMIC_RELAXED);
}
}
// Gets the bit at `idx % kSizeInBits`.
uint8_t get(size_t idx) {
idx %= kSizeInBits;
size_t word_idx = idx / kBitsInWord;
size_t bit_idx = idx % kBitsInWord;
word_t word = __atomic_load_n(&words_[word_idx], __ATOMIC_RELAXED);
word_t mask = 1ULL << bit_idx;
return (word & mask) != 0;
}
// Calls `action(index)` for every index of a non-zero bit in the set,
// then sets all those bits to zero.
__attribute__((noinline)) void ForEachNonZeroBit(
const std::function<void(size_t idx)> &action) {
// Iterates over all non-empty lines.
lines_.ForEachNonZeroByte([&](size_t idx, uint8_t value) {
size_t word_idx_beg = idx * kWordsInLine;
size_t word_idx_end = word_idx_beg + kWordsInLine;
ForEachNonZeroBit(action, word_idx_beg, word_idx_end);
});
}
private:
// Iterates over the range of words [`word_idx_beg`, `word_idx_end`).
void ForEachNonZeroBit(const std::function<void(size_t idx)> &action,
size_t word_idx_beg, size_t word_idx_end) {
for (size_t word_idx = word_idx_beg; word_idx < word_idx_end; ++word_idx) {
if (word_t word = words_[word_idx]) {
words_[word_idx] = 0;
do {
size_t bit_idx = __builtin_ctzll(word);
action(word_idx * kBitsInWord + bit_idx);
word_t mask = 1ULL << bit_idx;
word &= ~mask;
} while (word);
}
}
}
// A word is the largest integer type convenient for bitwise operations.
using word_t = uintptr_t;
static constexpr size_t kBytesInWord = sizeof(word_t);
static constexpr size_t kBitsInWord = CHAR_BIT * kBytesInWord;
static constexpr size_t kSizeInWords = kSizeInBits / kBitsInWord;
// All words are logically split into lines.
// When `set()` is called, we set the corresponding element of `lines_` to 1,
// so that we now know that at least 1 bit in that line is set. Then, in
// `ForEachNonZeroBit()`, we iterate only those lines that have non-zero bits.
static constexpr size_t kBytesInLine = 64 * 8;
static constexpr size_t kWordsInLine = kBytesInLine / kBytesInWord;
static constexpr size_t kSizeInLines = kSizeInWords / kWordsInLine;
ConcurrentByteSet<kSizeInLines> lines_;
// NOTE: No initializer for performance (`kSizeInWords` can be quite large).
// Relies on static initialization in the process image (see the class
// comment).
word_t words_[kSizeInWords];
};
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_CONCURRENT_BITSET_H_

View File

@ -0,0 +1,124 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/concurrent_bitset.h"
#include <cstddef>
#include <cstring>
#include <vector>
#include "gtest/gtest.h"
#include "absl/base/const_init.h"
#include "./centipede/thread_pool.h"
namespace fuzztest::internal {
namespace {
TEST(ConcurrentBitSetTest, Set) {
constexpr size_t kSize = 1 << 18;
static ConcurrentBitSet<kSize> bs(absl::kConstInit);
std::vector<size_t> in_bits = {0, 1, 2, 100, 102, 1000000};
std::vector<size_t> expected_out_bits = {0, 1, 2, 100, 102, 1000000 % kSize};
std::vector<size_t> out_bits;
for (auto idx : in_bits) {
bs.set(idx);
}
bs.ForEachNonZeroBit([&](size_t idx) { out_bits.push_back(idx); });
EXPECT_EQ(out_bits, expected_out_bits);
bs.clear();
out_bits.clear();
bs.ForEachNonZeroBit([&](size_t idx) { out_bits.push_back(idx); });
EXPECT_TRUE(out_bits.empty());
bs.set(42);
bs.ForEachNonZeroBit([&](size_t idx) { out_bits.push_back(idx); });
expected_out_bits = {42};
EXPECT_EQ(out_bits, expected_out_bits);
// Check that all bits are now clear.
out_bits.clear();
bs.ForEachNonZeroBit([&](size_t idx) { out_bits.push_back(idx); });
EXPECT_TRUE(out_bits.empty());
}
TEST(ConcurrentBitSetTest, Get) {
constexpr size_t kSize = 1 << 18;
static ConcurrentBitSet<kSize> bs(absl::kConstInit);
constexpr size_t kInBit1 = 134217728;
constexpr size_t kInBit2 = 134217732;
ASSERT_EQ(bs.get(kInBit1), 0);
ASSERT_EQ(bs.get(kInBit2), 0);
bs.set(kInBit1);
EXPECT_EQ(bs.get(kInBit1), 1);
EXPECT_EQ(bs.get(kInBit2), 0);
}
// Tests `ConcurrentBitSet` from multiple threads.
TEST(ConcurrentBitSetTest, SetInConcurrentThreads) {
// 3 threads will each set one specific bit in a long loop.
// 4th thread will set another bit, just once.
// The set() function is lossy, i.e. it may fail to set the bit.
// If the value is set in a long loop, it will be set with a probability
// indistinguishable from one (at least this is my theory :).
// But the 4th thread that sets its bit once, may actually fail to do it.
// So, this test allows two outcomes (possible_bits3/possible_bits4).
// WARNING: `bs` must be static (see the class comment).
static ConcurrentBitSet<(1 << 18)> bs(absl::kConstInit);
static auto cb = [](size_t idx) {
for (size_t i = 0; i < 10000000; i++) {
bs.set(idx);
}
};
{
ThreadPool pool{4};
pool.Schedule([]() { cb(10); });
pool.Schedule([]() { cb(11); });
pool.Schedule([]() { cb(14); });
pool.Schedule([]() { bs.set(15); });
}
std::vector<size_t> bits;
std::vector<size_t> possible_bits3 = {10, 11, 14};
std::vector<size_t> possible_bits4 = {10, 11, 14, 15};
bs.ForEachNonZeroBit([&bits](size_t idx) { bits.push_back(idx); });
if (bits.size() == 3) {
EXPECT_EQ(bits, possible_bits3);
} else {
EXPECT_EQ(bits, possible_bits4);
}
}
// Global ConcurrentBitSet with a absl::kConstInit CTOR.
static ConcurrentBitSet<(1 << 20)> large_concurrent_bitset(absl::kConstInit);
// Test a thread-local object.
static thread_local ConcurrentBitSet<(1 << 20)> large_tls_concurrent_bitset(
absl::kConstInit);
TEST(ConcurrentBitSetTest, Large) {
for (auto *bs : {&large_concurrent_bitset, &large_tls_concurrent_bitset}) {
const std::vector<size_t> in_bits = {
0, 1, 2, 100, 102, 800, 10000, 20000, 30000, 500000,
};
for (size_t iter = 0; iter < 100000; ++iter) {
for (auto idx : in_bits) {
bs->set(idx);
}
std::vector<size_t> out_bits;
bs->ForEachNonZeroBit([&](size_t idx) { out_bits.push_back(idx); });
EXPECT_EQ(out_bits, in_bits);
}
}
}
} // namespace
} // namespace fuzztest::internal

View File

@ -0,0 +1,187 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This library defines the concepts "fuzzing feature" and "feature domain".
// It is used by Centipede, and it can be used by fuzz runners to
// define their features in a way most friendly to Centipede.
// Fuzz runners do not have to use this file nor to obey the rules defined here.
// But using this file and following its rules is the simplest way if you want
// Centipede to understand the details about the features generated by the
// runner.
#ifndef THIRD_PARTY_CENTIPEDE_CONCURRENT_BYTESET_H_
#define THIRD_PARTY_CENTIPEDE_CONCURRENT_BYTESET_H_
#include <climits>
#include <cstddef>
#include <cstdint>
#include <functional>
// WARNING!!!: Be very careful with what STL headers or other dependencies you
// add here. This header needs to remain mostly bare-bones so that we can
// include it into runner.
#include "absl/base/const_init.h"
namespace fuzztest::internal {
// TODO(kcc): replace the standalone ForEachNonZeroByte with code from here.
// TODO(kcc): ConcurrentByteSet is an unoptimized single-layer byte set.
// Implement multi-layer byte set(s).
// A fixed-size byte set containing kSize bytes, kSize must be a multiple of 64.
// Set() can be called concurrently with another Set(), other uses should be
// synchronized externally.
// Intended usage is to call ForEachNonZeroByte() from one thread.
//
// IMPORTANT!!! Objects of this class should only be constructed with static
// storage duration. This is because the class has intentionally uninitialized
// direct and transitive data members that rely on static initialization in the
// compiled process image.
template <size_t kSize>
class ConcurrentByteSet {
public:
static constexpr size_t kSizeInBytes = kSize;
// kSize must be multiple of this.
static constexpr size_t kSizeMultiple = 64;
static_assert((kSize % kSizeMultiple) == 0);
// Creates a ConcurrentByteSet with static storage duration.
explicit constexpr ConcurrentByteSet(absl::ConstInitType) {}
// Clears the set.
void clear() { memset(bytes_, 0, sizeof(bytes_)); }
// Sets element `idx` to `value`. `idx` must be <= kSize.
// Can be called concurrently.
void Set(size_t idx, uint8_t value) {
if (idx >= kSize) __builtin_trap();
__atomic_store_n(&bytes_[idx], value, __ATOMIC_RELAXED);
}
// Performs a saturated increment of element `idx`.
void SaturatedIncrement(size_t idx) {
if (idx >= kSize) __builtin_trap();
uint8_t counter = __atomic_load_n(&bytes_[idx], __ATOMIC_RELAXED);
if (counter != 255)
__atomic_store_n(&bytes_[idx], counter + 1, __ATOMIC_RELAXED);
}
// Calls `action(index, value)` for every {index,value} of a non-zero byte in
// the set, then sets all those bytes to zero.
// `from` and `to` set the range of elements to iterate, both must be
// multiples of kSizeMultiple.
void ForEachNonZeroByte(const std::function<void(size_t, uint8_t)> &action,
size_t from = 0, size_t to = kSize) {
using word_t = uintptr_t;
constexpr size_t kWordSize = sizeof(word_t);
if (from % kSizeMultiple) __builtin_trap();
if (to % kSizeMultiple) __builtin_trap();
if (to > kSize) __builtin_trap();
// Iterate one word at a time.
for (uint8_t *ptr = &bytes_[from], *end = &bytes_[to]; ptr < end;
ptr += kWordSize) {
word_t word;
__builtin_memcpy(&word, ptr, kWordSize);
if (!word) continue;
__builtin_memset(ptr, 0, kWordSize);
// This loop assumes little-endianness. (Tests will break on big-endian).
for (size_t pos = 0; pos < kWordSize; pos++) {
uint8_t value = word >> (pos * CHAR_BIT); // lowest byte is taken.
if (value) action(ptr - &bytes_[0] + pos, value);
}
}
}
private:
// No initializer for performance (`kSize` can be quite large). Relies on
// static initialization in the process image (see the class comment).
uint8_t bytes_[kSize] __attribute__((aligned(64)));
};
// Similar to ConcurrentByteSet, but consists of two layers, upper and lower.
// The size of the lower layer is a multiple of the size of the upper layer.
// Set() writes 1 to an element in the upper layer and then writes `value` to an
// element of the lower value. This allows ForEachNonZeroByte() to
// skip sub-regions of lower layer that were not written to. Otherwise, the
// interface and the behaviour is equivalent to ConcurrentByteSet.
template <size_t kSize, typename Upper,
typename Lower = ConcurrentByteSet<kSize>>
class LayeredConcurrentByteSet {
public:
static constexpr size_t kSizeInBytes = kSize;
static constexpr size_t kSizeMultiple =
Lower::kSizeMultiple * Upper::kSizeMultiple;
static_assert(kSize == Lower::kSizeInBytes);
LayeredConcurrentByteSet() = default;
// Creates a LayeredConcurrentByteSet with static storage duration.
explicit constexpr LayeredConcurrentByteSet(absl::ConstInitType)
: upper_layer_(absl::kConstInit), lower_layer_(absl::kConstInit) {}
void clear() {
upper_layer_.clear();
lower_layer_.clear();
}
void Set(size_t idx, uint8_t value) {
if (idx >= kSize) __builtin_trap();
upper_layer_.Set(idx / kLayerRatio, 1);
lower_layer_.Set(idx, value);
}
void SaturatedIncrement(size_t idx) {
if (idx >= kSize) __builtin_trap();
upper_layer_.Set(idx / kLayerRatio, 1);
lower_layer_.SaturatedIncrement(idx);
}
void ForEachNonZeroByte(const std::function<void(size_t, uint8_t)> &action,
size_t from = 0, size_t to = kSize) {
if (to > kSize) __builtin_trap();
if (from % kSizeMultiple) __builtin_trap();
if (to % kSizeMultiple) __builtin_trap();
size_t upper_from = from / kLayerRatio;
size_t upper_to = to / kLayerRatio;
upper_layer_.ForEachNonZeroByte(
[&](size_t idx, uint8_t value) {
size_t lower_from = idx * kLayerRatio;
size_t lower_to = lower_from + kLayerRatio;
lower_layer_.ForEachNonZeroByte(action, lower_from, lower_to);
},
upper_from, upper_to);
}
private:
Upper upper_layer_;
Lower lower_layer_;
static constexpr size_t kLayerRatio =
Lower::kSizeInBytes / Upper::kSizeInBytes;
static_assert((Lower::kSizeInBytes % Upper::kSizeInBytes) == 0);
};
// Two-layer ConcurrentByteSet() with upper layer 64x smaller than the lower.
template <size_t kSize>
class TwoLayerConcurrentByteSet
: public LayeredConcurrentByteSet<kSize, ConcurrentByteSet<kSize / 64>> {
public:
// Creates a TwoLayerConcurrentByteSet with static storage duration.
explicit constexpr TwoLayerConcurrentByteSet(absl::ConstInitType)
: LayeredConcurrentByteSet<kSize, ConcurrentByteSet<kSize / 64>>(
absl::kConstInit) {}
};
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_CONCURRENT_BYTESET_H_

View File

@ -0,0 +1,124 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/concurrent_byteset.h"
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <utility>
#include <vector>
#include "gtest/gtest.h"
#include "absl/base/const_init.h"
#include "./centipede/thread_pool.h"
namespace fuzztest::internal {
namespace {
TEST(ConcurrentByteSetTest, Basic) {
static ConcurrentByteSet<1024> bs(absl::kConstInit);
const std::vector<std::pair<size_t, uint8_t>> in = {
{0, 1}, {1, 42}, {2, 33}, {100, 15}, {102, 1}, {800, 66}};
for (const auto &idx_value : in) {
bs.Set(idx_value.first, idx_value.second);
}
// Test ForEachNonZeroByte.
std::vector<std::pair<size_t, uint8_t>> out;
bs.ForEachNonZeroByte(
[&](size_t idx, uint8_t value) { out.emplace_back(idx, value); });
EXPECT_EQ(out, in);
// Now bs should be empty.
out.clear();
bs.ForEachNonZeroByte(
[&](size_t idx, uint8_t value) { out.emplace_back(idx, value); });
EXPECT_TRUE(out.empty());
// Test SaturatedIncrement.
for (const auto &idx_value : in) {
for (auto iter = 0; iter < idx_value.second; ++iter) {
bs.SaturatedIncrement(idx_value.first);
}
}
bs.ForEachNonZeroByte(
[&](size_t idx, uint8_t value) { out.emplace_back(idx, value); });
EXPECT_EQ(out, in);
}
// Test a thread_local object.
static thread_local TwoLayerConcurrentByteSet<(1 << 17)> two_layer_byte_set(
absl::kConstInit);
TEST(ConcurrentByteSetTest, TwoLayer) {
auto &bs = two_layer_byte_set;
const std::vector<std::pair<size_t, uint8_t>> in = {
{0, 1}, {1, 42}, {2, 33}, {100, 15}, {102, 1}, {800, 66}};
for (const auto &idx_value : in) {
bs.Set(idx_value.first, idx_value.second);
}
// Test ForEachNonZeroByte.
std::vector<std::pair<size_t, uint8_t>> out;
bs.ForEachNonZeroByte(
[&](size_t idx, uint8_t value) { out.emplace_back(idx, value); });
EXPECT_EQ(out, in);
// Now bs should be empty.
out.clear();
bs.ForEachNonZeroByte(
[&](size_t idx, uint8_t value) { out.emplace_back(idx, value); });
EXPECT_TRUE(out.empty());
// Test SaturatedIncrement.
for (const auto &idx_value : in) {
for (auto iter = 0; iter < idx_value.second; ++iter) {
bs.SaturatedIncrement(idx_value.first);
}
}
bs.ForEachNonZeroByte(
[&](size_t idx, uint8_t value) { out.emplace_back(idx, value); });
EXPECT_EQ(out, in);
}
// Tests TwoLayerConcurrentByteSet from multiple threads.
TEST(ConcurrentByteSetTest, TwoLayerConcurrentThreads) {
static TwoLayerConcurrentByteSet<(1 << 16)> bs(absl::kConstInit);
// 3 threads will each increment one specific byte in a long loop.
// 4th thread will increment another byte, just once.
static auto cb = [](size_t idx) {
for (size_t i = 0; i < 10000000; i++) {
bs.SaturatedIncrement(idx);
}
};
{
ThreadPool threads{4};
threads.Schedule([]() { cb(10); });
threads.Schedule([]() { cb(11); });
threads.Schedule([]() { cb(14); });
threads.Schedule([]() { bs.SaturatedIncrement(15); });
} // The threads join here.
const std::vector<std::pair<size_t, uint8_t>> expected = {
{10, 255}, {11, 255}, {14, 255}, {15, 1}};
std::vector<std::pair<size_t, uint8_t>> out;
bs.ForEachNonZeroByte(
[&](size_t idx, uint8_t value) { out.emplace_back(idx, value); });
EXPECT_EQ(out, expected);
}
} // namespace
} // namespace fuzztest::internal

View File

@ -0,0 +1,297 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/config_file.h"
#include <cstdlib>
#include <filesystem> // NOLINT
#include <memory>
#include <optional>
#include <set>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "absl/base/nullability.h"
#include "absl/flags/declare.h"
#include "absl/flags/flag.h"
#include "absl/flags/parse.h"
#include "absl/flags/reflection.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "absl/strings/str_replace.h"
#include "absl/strings/substitute.h"
#include "./centipede/config_init.h"
#include "./centipede/config_util.h"
#include "./centipede/util.h"
#include "./common/logging.h"
#include "./common/remote_file.h"
// TODO(ussuri): Move these flags next to main() ASAP. They are here
// only temporarily to simplify the APIs and implementation in V1.
ABSL_FLAG(std::string, config, "",
"Read flags from the specified file. The file can be either local or "
"remote. Relative paths are referenced from the CWD. The format "
"should be:\n"
"--flag=value\n"
"--another_flag=value\n"
"...\n"
"Lines that start with '#' or '//' are comments. Note that this "
"format is compatible with the built-in --flagfile flag (defined by "
"Abseil Flags library); however, unlike this flag, --flagfile "
"supports only local files.\n"
"Nested --load_config's won't work (but nested --flagfile's will,"
"provided they point at a local file, e.g. $HOME/.centipede_rc).\n"
"The flag is position-sensitive: flags read from it override (or "
"append, in case of std::vector flags) any previous occurrences of "
"the same flags on the command line, and vice versa.");
ABSL_FLAG(std::string, save_config, "",
"Saves Centipede flags to the specified file and exits the program."
"The file can be either local or remote. Relative paths are "
"referenced from the CWD. Both the command-line flags and defaulted "
"flags are saved (the defaulted flags are commented out). The format "
"is:\n"
"# --flag's help string.\n"
"# --flag's default value.\n"
"--flag=value\n"
"...\n"
"This format can be parsed back by both --config and --flagfile. "
"Unlike those two flags, this flag is not position-sensitive and "
"always saves the final resolved config.\n"
"Special case: if the file's extension is .sh, a runnable shell "
"script is saved instead.");
ABSL_FLAG(bool, update_config, false,
"Must be used in combination with --config=<file>. Writes the final "
"resolved config back to the same file.");
ABSL_FLAG(bool, print_config, false,
"Print the config to stderr upon starting Centipede.");
// Declare --flagfile defined by the Abseil Flags library. The flag should point
// at a _local_ file is always automatically parsed by Abseil Flags.
ABSL_DECLARE_FLAG(std::vector<std::string>, flagfile);
#define DASHED_FLAG_NAME(name) "--" << FLAGS_##name.Name()
namespace fuzztest::internal {
AugmentedArgvWithCleanup::AugmentedArgvWithCleanup(
const std::vector<std::string>& orig_argv,
const Replacements& flag_replacements, const Replacements& replacements,
BackingResourcesCleanup&& cleanup)
: was_augmented_{false}, cleanup_{cleanup} {
argv_.reserve(orig_argv.size());
for (const auto& old_arg : orig_argv) {
const auto flag_replaced_arg = [&]() -> std::optional<std::string> {
if (old_arg.empty() || old_arg[0] != '-') return std::nullopt;
std::string_view contents = old_arg;
std::string_view dashes =
(contents.size() > 1 && contents[1] == '-') ? "--" : "-";
contents = contents.substr(dashes.size());
for (const auto& flag_replacement : flag_replacements) {
if (absl::StartsWith(contents, flag_replacement.first) &&
(contents.size() == flag_replacement.first.size() ||
contents[flag_replacement.first.size()] == '=')) {
return absl::StrCat(dashes, flag_replacement.second,
contents.substr(flag_replacement.first.size()));
}
}
return std::nullopt;
}();
const std::string& new_arg = argv_.emplace_back(
absl::StrReplaceAll(flag_replaced_arg.value_or(old_arg), replacements));
if (new_arg != old_arg) {
VLOG(1) << "Augmented argv arg:\n" << VV(old_arg) << "\n" << VV(new_arg);
was_augmented_ = true;
}
}
}
AugmentedArgvWithCleanup::AugmentedArgvWithCleanup(
AugmentedArgvWithCleanup&& rhs) noexcept {
*this = std::move(rhs);
}
AugmentedArgvWithCleanup& AugmentedArgvWithCleanup::operator=(
AugmentedArgvWithCleanup&& rhs) noexcept {
argv_ = std::move(rhs.argv_);
was_augmented_ = rhs.was_augmented_;
cleanup_ = std::move(rhs.cleanup_);
// Prevent rhs from calling the cleanup in dtor (moving an std::function
// leaves the moved object in a valid, but undefined, state).
rhs.cleanup_ = {};
return *this;
}
AugmentedArgvWithCleanup::~AugmentedArgvWithCleanup() {
if (cleanup_) cleanup_();
}
AugmentedArgvWithCleanup LocalizeConfigFilesInArgv(
const std::vector<std::string>& argv) {
const std::filesystem::path path = absl::GetFlag(FLAGS_config);
if (!path.empty()) {
CHECK_NE(path, absl::GetFlag(FLAGS_save_config))
<< "To update config in place, use " << DASHED_FLAG_NAME(update_config);
}
// Always need these (--config=<path> can be passed with a local <path>).
const AugmentedArgvWithCleanup::Replacements flag_replacements = {
{std::string{FLAGS_config.Name()}, std::string{FLAGS_flagfile.Name()}},
};
AugmentedArgvWithCleanup::Replacements replacements;
AugmentedArgvWithCleanup::BackingResourcesCleanup cleanup;
// Copy the remote config file to a temporary local mirror.
if (!path.empty() && !std::filesystem::exists(path)) { // assume remote
// Read the remote file.
std::string contents;
CHECK_OK(RemoteFileGetContents(path.c_str(), contents));
// Save a temporary local copy.
const std::filesystem::path tmp_dir = TemporaryLocalDirPath();
const std::filesystem::path local_path = tmp_dir / path.filename();
LOG(INFO) << "Localizing remote config: " << VV(path) << VV(local_path);
// NOTE: Ignore "Remote" in the API names here: the paths are always local.
CHECK_OK(RemoteMkdir(tmp_dir.c_str()));
CHECK_OK(RemoteFileSetContents(local_path.c_str(), contents));
// Augment the argv to point at the local copy and ensure it is cleaned up.
replacements.emplace_back(path.c_str(), local_path.c_str());
cleanup = [local_path]() { std::filesystem::remove(local_path); };
}
return AugmentedArgvWithCleanup{argv, flag_replacements, replacements,
std::move(cleanup)};
}
std::filesystem::path MaybeSaveConfigToFile(
const std::vector<std::string>& leftover_argv) {
std::filesystem::path path;
// Initialize `path` if --save_config or --update_config is passed.
if (!absl::GetFlag(FLAGS_save_config).empty()) {
path = absl::GetFlag(FLAGS_save_config);
CHECK_NE(path, absl::GetFlag(FLAGS_config))
<< "To update config in place, use " << DASHED_FLAG_NAME(update_config);
CHECK(!absl::GetFlag(FLAGS_update_config))
<< DASHED_FLAG_NAME(save_config) << " and "
<< DASHED_FLAG_NAME(update_config) << " are mutually exclusive";
} else if (absl::GetFlag(FLAGS_update_config)) {
path = absl::GetFlag(FLAGS_config);
CHECK(!path.empty()) << DASHED_FLAG_NAME(update_config)
<< " must be used in combination with "
<< DASHED_FLAG_NAME(config);
}
// Save or update the config file.
if (!path.empty()) {
const std::set<std::string_view> excluded_flags = {
FLAGS_config.Name(),
FLAGS_save_config.Name(),
FLAGS_update_config.Name(),
FLAGS_print_config.Name(),
};
const FlagInfosPerSource flags =
GetFlagsPerSource("centipede", excluded_flags);
const std::string flags_str = FormatFlagfileString(
flags, DefaultedFlags::kCommentedOut, FlagComments::kHelpAndDefault);
std::string file_contents;
if (path.extension() == ".sh") {
// NOTES: 1) The first element of `leftover_argv` is expected to be the
// /path/to/centipede, so the $1 in the stub will run it.
// 2) absl::Substitute() replaces the escaped $$ with a $.
constexpr std::string_view kScriptStub =
R"(#!/bin/bash -eu
declare -ra flags=(
$0)
if [[ -n "$1" ]]; then
wd=$1
else
wd=$$PWD
fi
read -e -p "Clear workdir (which is '$$wd') [y/N]? " yn
# Tip: To default to 'y', change 'yY' to 'nN' below.
if [[ "$${yn}" =~ [yY] ]]; then
rm -rf "$$wd"/corpus* "$$wd"/*report*.txt "$$wd"/*/features*
fi
set -x
$2 "$${flags[@]}"
)";
const auto workdir = absl::GetAllFlags()["workdir"]->CurrentValue();
const auto argv_str = absl::StrJoin(leftover_argv, " ");
file_contents =
absl::Substitute(kScriptStub, flags_str, workdir, argv_str);
} else {
file_contents = flags_str;
}
CHECK_OK(RemoteFileSetContents(path.c_str(), file_contents));
}
return path;
}
std::unique_ptr<RuntimeState> InitCentipede( //
int argc, char** absl_nonnull argv) {
std::vector<std::string> leftover_argv;
// main_runtime_init() is allowed to remove recognized flags from `argv`, so
// we need a copy.
const std::vector<std::string> saved_argv = CastArgv(argc, argv);
// Among other things, this performs the initial command line parsing.
std::unique_ptr<RuntimeState> runtime_state = InitRuntime(argc, argv);
// If --config=<path> was passed, replace it with the Abseil Flags' built-in
// --flagfile=<localized_path> and reparse the command line. NOTE: It would be
// incorrect to just parse the contents of <path>, because --config (and
// --flagfile for that matter) are position-sensitive, i.e. they may override
// flags that come before on the command line, and vice versa.
const AugmentedArgvWithCleanup localized_argv =
LocalizeConfigFilesInArgv(saved_argv);
if (localized_argv.was_augmented()) {
LOG(INFO) << "Command line was augmented; reparsing";
runtime_state->leftover_argv() = CastArgv(absl::ParseCommandLine(
localized_argv.argc(), CastArgv(localized_argv.argv()).data()));
}
// Log the final resolved config.
if (absl::GetFlag(FLAGS_print_config)) {
const FlagInfosPerSource flags = GetFlagsPerSource("centipede");
const std::string flags_str = FormatFlagfileString(
flags, DefaultedFlags::kCommentedOut, FlagComments::kNone);
LOG(INFO) << "Final resolved config:\n" << flags_str;
}
// If --save_config was passed, save the final resolved flags to the requested
// file and exit the program.
const auto path = MaybeSaveConfigToFile(leftover_argv);
if (!path.empty()) {
LOG(INFO) << "Config written to file: " << VV(path);
LOG(INFO) << "Nothing left to do; exiting";
exit(EXIT_SUCCESS);
}
return runtime_state;
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,114 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_GOOGLE_CONFIG_FILE_H_
#define THIRD_PARTY_CENTIPEDE_GOOGLE_CONFIG_FILE_H_
#include <filesystem> // NOLINT
#include <functional>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "absl/base/nullability.h"
#include "./centipede/config_init.h"
// TODO(ussuri): Move implementation-only functions to .cc.
namespace fuzztest::internal {
// Constructs an augmented copy of `argv` with any substrings appearing in the
// original elements replaced according to a list replacements.
// TODO(ussuri): Make more robust. What we really want is replace any possible
// form of --flag=value with an equivalent form of --new_flag=new_value.
// TODO(ussuri): Remove and just use the required bits of logic in .cc.
class AugmentedArgvWithCleanup final {
public:
using Replacements = std::vector<std::pair<std::string, std::string>>;
using BackingResourcesCleanup = std::function<void()>;
// Ctor. The `orig_argc` and `orig_argv` are compatible with those passed to a
// main(). Each item in `orig_argv` is first processed with
// `flag_replacements` if the item has the format "-flag", "-flag=...",
// "--flag", or "--flag=", and the flag name matches. Then the `replacements`
// map should map an old substring to a new one. Only simple, one-stage string
// replacement is performed: no regexes, placeholders, envvars or recursion.
// The `cleanup` callback should clean up any temporary resources backing the
// modified flags, such as temporary files.
AugmentedArgvWithCleanup(const std::vector<std::string>& orig_argv,
const Replacements& flag_replacements,
const Replacements& replacements,
BackingResourcesCleanup&& cleanup);
// Dtor. Invokes `cleanup_`.
~AugmentedArgvWithCleanup();
// Movable by not copyable to prevent `cleanup_` from running twice.
AugmentedArgvWithCleanup(const AugmentedArgvWithCleanup&) = delete;
AugmentedArgvWithCleanup& operator=(const AugmentedArgvWithCleanup&) = delete;
AugmentedArgvWithCleanup(AugmentedArgvWithCleanup&&) noexcept;
AugmentedArgvWithCleanup& operator=(AugmentedArgvWithCleanup&&) noexcept;
// The new argc. Currently, will always match the original argc.
int argc() const { return static_cast<int>(argv_.size()); }
// The new, possibly augmented argv. Note that all its char* elements are
// backed by newly allocated std::strings, so they will all be different from
// their counterparts in the original argv.
const std::vector<std::string>& argv() const { return argv_; }
// Whether the original argv has been augmented from the original, i.e. if any
// of the requested string replacements actually occurred.
bool was_augmented() const { return was_augmented_; }
private:
std::vector<std::string> argv_;
bool was_augmented_;
BackingResourcesCleanup cleanup_;
};
// Replaces any --config=<config_file> in `argv` (or any alternative form of
// that flag) with a --flagfile=<possibly_localized_config_file>, where
// localization means that a remote <config_file> is copied to a temporary local
// mirror. If <config_file> is already local, it is used as-is.
//
// The remote file contents is additionally checked for possible nested
// --config, --save_config and --flagfile: such usage is currently unsupported.
//
// The returned AugmentedArgvWithCleanup deletes the localized files (if any) in
// dtor.
AugmentedArgvWithCleanup LocalizeConfigFilesInArgv(
const std::vector<std::string>& argv);
// If --save_config=<path> was passed on the command line, saves _all_
// Centipede flags (i.e. those specified on the command line AND the defaulted
// ones) to <path> in the format compatible with --config (defined by
// Centipede), as well as --flagfile (defined by Abseil Flags), and returns
// <path>. Otherwise, returns an empty string. If the <path>'s extension is .sh,
// saves a runnable script instead.
std::filesystem::path MaybeSaveConfigToFile(
const std::vector<std::string>& leftover_argv);
// Initializes Centipede:
// - Calls `InitRuntime()` at the right time to initialize the runtime
// subsystems and perform the initial flag parsing.
// - Handles config-related flags: loads the config from --config, if any,
// and saves it to --save_config (or --update_config), if any.
// - Logs the final resolved config.
// - Returns the runtime state that the caller should take ownership of and
// keep alive the duration of the process.
[[nodiscard]] std::unique_ptr<RuntimeState> InitCentipede(
int argc, char** absl_nonnull argv);
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_GOOGLE_CONFIG_FILE_H_

View File

@ -0,0 +1,91 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/config_file.h"
#include <string>
#include <vector>
#include "gtest/gtest.h"
namespace fuzztest::internal {
namespace {
TEST(ConfigFileTest, AugmentedArgv) {
const std::vector<std::string> kOrigArgv = {
"--foo=bar", "-baz", "-bazz", "baz", "qux",
};
// None of the replacements match.
{
const AugmentedArgvWithCleanup augmented_argv{
kOrigArgv,
/*flag_replacements=*/{},
/*replacements=*/
{
{"mismatching", "mod_mismatching"},
},
nullptr};
EXPECT_FALSE(augmented_argv.was_augmented());
EXPECT_EQ(augmented_argv.argv()[0], kOrigArgv[0]);
EXPECT_EQ(augmented_argv.argv()[1], kOrigArgv[1]);
EXPECT_EQ(augmented_argv.argv()[2], kOrigArgv[2]);
EXPECT_EQ(augmented_argv.argv()[3], kOrigArgv[3]);
EXPECT_EQ(augmented_argv.argv()[4], kOrigArgv[4]);
}
// The replacements match and the cleanup runs as a result.
{
bool cleanup_worked = false;
{
const AugmentedArgvWithCleanup augmented_argv{
kOrigArgv,
/*flag_replacements=*/
{
{"foo", "mod_foo"},
{"baz", "mod_baz"},
},
/*replacements=*/
{
{"bar", "mod_bar"},
{"qux", "mod_qux"},
},
[&cleanup_worked]() { cleanup_worked = true; }};
const std::vector<std::string> kExpectedArgv = {
"--mod_foo=mod_bar",
"-mod_baz",
// Flag replacement should skip this item because the flag name
// does not match as a whole.
"-bazz",
// Flag replacement should skip this item because it's not a flag.
"baz",
"mod_qux",
};
EXPECT_TRUE(augmented_argv.was_augmented());
EXPECT_EQ(augmented_argv.argv()[0], kExpectedArgv[0]);
EXPECT_EQ(augmented_argv.argv()[1], kExpectedArgv[1]);
EXPECT_EQ(augmented_argv.argv()[2], kExpectedArgv[2]);
EXPECT_EQ(augmented_argv.argv()[3], kExpectedArgv[3]);
EXPECT_EQ(augmented_argv.argv()[4], kExpectedArgv[4]);
}
EXPECT_TRUE(cleanup_worked);
}
}
// TODO(ussuri): The rest of the module is tested by calling Centipede with
// the new flags in centipede_main_cns_test.sh. Consider adding proper C++
// tests here too.
} // namespace
} // namespace fuzztest::internal

View File

@ -0,0 +1,58 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/config_init.h"
#include <memory>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "absl/base/attributes.h"
#include "absl/base/log_severity.h"
#include "absl/flags/parse.h"
#include "absl/flags/usage_config.h"
#include "absl/log/globals.h"
#include "absl/log/initialize.h"
#include "absl/strings/match.h"
#include "./centipede/config_util.h"
namespace fuzztest::internal {
RuntimeState::RuntimeState(std::vector<std::string> leftover_argv)
: leftover_argv_(std::move(leftover_argv)) {}
ABSL_ATTRIBUTE_WEAK std::unique_ptr<RuntimeState> InitRuntime(int argc,
char* argv[]) {
// NB: The invocation order below is very important. Do not change.
// Make `LOG(INFO)` to go to stderr by default. Note that an explicit
// `--stderrthreshold=N` on the command line will override this.
absl::SetStderrThreshold(absl::LogSeverityAtLeast::kInfo);
// Make --help print any flags defined by any Centipede source.
absl::FlagsUsageConfig usage_config;
usage_config.contains_help_flags = [](std::string_view filename) {
return absl::StrContains(filename, "centipede");
};
absl::SetFlagsUsageConfig(usage_config);
// Parse the known flags from the command line.
std::vector<std::string> leftover_argv =
CastArgv(absl::ParseCommandLine(argc, argv));
// Initialize the logging system using the just-parsed log-related flags.
absl::InitializeLog();
return std::make_unique<RuntimeState>(leftover_argv);
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,56 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_GOOGLE_CONFIG_INIT_H_
#define THIRD_PARTY_CENTIPEDE_GOOGLE_CONFIG_INIT_H_
#include <memory>
#include <string>
#include <vector>
namespace fuzztest::internal {
// The runtime state returned by `InitRuntime()`. The caller should take over
// the ownership of this and keep it alive for the duration of the process.
class [[nodiscard]] RuntimeState {
public:
explicit RuntimeState(std::vector<std::string> leftover_argv);
virtual ~RuntimeState() = default;
// Not copyable nor movable for simplicity and maximum extensibility.
RuntimeState(const RuntimeState&) = delete;
RuntimeState& operator=(const RuntimeState&) = delete;
RuntimeState(RuntimeState&&) = delete;
RuntimeState& operator=(RuntimeState&&) = delete;
auto leftover_argv() const { return leftover_argv_; }
auto& leftover_argv() { return leftover_argv_; }
private:
std::vector<std::string> leftover_argv_;
};
// * Initializes the relevant runtime subsystems in the correct order.
// * Directs all `LOG(INFO)`s to also to stderr (by default, only `LOG(ERROR)`s
// and higher go to stderr).
// * Tweaks --help behavior to print any flags defined by any Centipede source
// (by default, --help only prints flags defined in the source named
// <program>.cc or <program_main>.cc).
// * Returns the runtime state, which the client should keep alive for the
// duration of the process.
[[nodiscard]] std::unique_ptr<RuntimeState> InitRuntime(int argc, char* argv[]);
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_GOOGLE_CONFIG_INIT_H_

View File

@ -0,0 +1,109 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/config_util.h"
#include <set>
#include <string>
#include <string_view>
#include <vector>
#include "absl/flags/reflection.h"
#include "absl/strings/match.h"
#include "absl/strings/str_join.h"
#include "absl/strings/str_replace.h"
#include "absl/strings/substitute.h"
namespace fuzztest::internal {
std::vector<char*> CastArgv(const std::vector<std::string>& argv) {
std::vector<char*> ret_argv;
ret_argv.reserve(argv.size());
for (const auto& arg : argv) {
ret_argv.push_back(const_cast<char*>(arg.c_str()));
}
return ret_argv;
}
std::vector<std::string> CastArgv(const std::vector<char*>& argv) {
return {argv.cbegin(), argv.cend()};
}
std::vector<std::string> CastArgv(int argc, char** argv) {
return {argv, argv + argc};
}
FlagInfosPerSource GetFlagsPerSource(
std::string_view source_fragment,
const std::set<std::string_view>& exclude_flags) {
FlagInfosPerSource flags_per_source;
for (const auto& [name, flag] : absl::GetAllFlags()) {
if (absl::StrContains(flag->Filename(), source_fragment) &&
exclude_flags.find(name) == exclude_flags.cend()) {
flags_per_source[flag->Filename()].emplace(FlagInfo{
name, flag->CurrentValue(), flag->DefaultValue(), flag->Help()});
}
}
return flags_per_source;
}
std::string FormatFlagfileString(const FlagInfosPerSource& flags,
DefaultedFlags defaulted,
FlagComments comments) {
std::vector<std::string> lines;
lines.reserve(flags.size()); // this many files
if (defaulted == DefaultedFlags::kIncluded) {
lines.emplace_back("# NOTE: Explicit and defaulted flags are included");
} else if (defaulted == DefaultedFlags::kExcluded) {
lines.emplace_back("# NOTE: Defaulted flags are excluded");
} else if (defaulted == DefaultedFlags::kCommentedOut) {
lines.emplace_back("# NOTE: Defaulted flags are commented out");
}
lines.emplace_back();
for (const auto& [filename, flag_infos] : flags) {
lines.emplace_back(absl::Substitute("# Flags from $0:", filename));
for (const auto& [name, value, default_value, help] : flag_infos) {
if (defaulted == DefaultedFlags::kExcluded && value == default_value) {
continue;
}
if (comments == FlagComments::kHelpAndDefault) {
const std::string prepped_help =
absl::StrReplaceAll(help, {{"\n", " "}});
lines.emplace_back(absl::Substitute(" # $0", prepped_help));
}
if (comments == FlagComments::kDefault ||
comments == FlagComments::kHelpAndDefault) {
lines.emplace_back(
absl::Substitute(" # default: '$0'", default_value));
}
if (defaulted == DefaultedFlags::kCommentedOut &&
value == default_value) {
lines.emplace_back(absl::Substitute(" # --$0=$1", name, value));
} else {
lines.emplace_back(absl::Substitute(" --$0=$1", name, value));
}
if (comments == FlagComments::kDefault ||
comments == FlagComments::kHelpAndDefault) {
lines.emplace_back();
}
}
if (!lines.back().empty()) lines.emplace_back();
}
return absl::StrJoin(lines, "\n");
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,100 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_CONFIG_UTIL_H_
#define THIRD_PARTY_CENTIPEDE_CONFIG_UTIL_H_
#include <map>
#include <set>
#include <string>
#include <string_view>
#include <vector>
namespace fuzztest::internal {
// A set of overloads to cast argv between vector<string> and main()-compatible
// vector<char*> or argc/argv pair in both directions. The result can be used
// like this:
// AugmentedArgvWithCleanup new_argv{CastArgv(argc, argv), ...};
// std::vector<std::string> leftover_argv =
// CastArgv(absl::ParseCommandLine(
// new_argv.argc(), CastArgv(new_argv.argv()).data());
std::vector<std::string> CastArgv(int argc, char** argv);
std::vector<std::string> CastArgv(const std::vector<char*>& argv);
// WARNING: Beware of the lifetimes. The returned vector<char*> referenced the
// passed `argv`, so `argv` must outlive it.
std::vector<char*> CastArgv(const std::vector<std::string>& argv);
// Types returned from GetFlagsPerSource().
struct FlagInfo {
const std::string_view name;
const std::string value;
const std::string default_value;
const std::string help;
friend bool operator<(const FlagInfo& x, const FlagInfo& y) {
return x.name < y.name;
}
};
using FlagInfosPerSource =
std::map<std::string /*source_filename*/, std::set<FlagInfo>>;
// Returns a per-source map of all compiled-in flags defined by sources whose
// relative workspace paths contain `source_fragment`. An empty
// `source_fragment` returns flags from all sources.
FlagInfosPerSource GetFlagsPerSource(
std::string_view source_fragment = "",
const std::set<std::string_view>& exclude_flags = {});
// Returns a string with newline-separated --flag=value tokens for all
// compiled-in flags defined by sources whose relative workspace paths start
// with `source_prefix`. An empty `source_prefix` returns flags from all
// sources. Flag names in `exclude_flags` are excluded from the result.
//
// The flags are grouped by the source filename, and sorted within each group.
//
//
// # Flags from centipede/environment.cc:
//
// --binary="unicorn_x86_64_sancov"
// # --rss_limit_mb="4096"
// --use_pc_features="true"
//
// # Flags from third_party/absl/log/flags.cc:
//
// --alsologtostderr="true"
// # --log_backtrace_at=""
//
// (See config_util_test.cc for more examples of the output).
//
// The returned value is compatible with the standard Abseil's --flagfile flag
// and its remote-enabled Centipede's equivalents --config and --save_config.
enum class DefaultedFlags {
kIncluded = 0, // Include flags with value == default.
kExcluded = 1, // Exclude flags with value == default.
kCommentedOut = 2, // Comment out flags with value == default.
};
enum class FlagComments {
kNone = 0, // Do not add any comments.
kDefault = 1, // Add a comment with the flag's default.
kHelpAndDefault = 2, // Add a comment with the flag's help and default.
};
std::string FormatFlagfileString(
const FlagInfosPerSource& flags,
DefaultedFlags defaulted = DefaultedFlags::kIncluded,
FlagComments comments = FlagComments::kNone);
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_CONFIG_UTIL_H_

View File

@ -0,0 +1,245 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/config_util.h"
#include <string>
#include <string_view>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/flags/flag.h"
#include "./centipede/environment_flags.h"
#include "./common/logging.h"
// Dummy flags for testing.
ABSL_FLAG(std::string, foo, "bar", "foo help");
ABSL_FLAG(bool, qux, false, "qux help");
namespace fuzztest::internal {
// NOTE: Has to be outside the anonymous namespace.
bool operator==(const FlagInfo& tested, const FlagInfo& expected) {
return tested.name == expected.name &&
(expected.value == "*" || tested.value == expected.value) &&
(expected.default_value == "*" ||
tested.default_value == expected.default_value) &&
(expected.help == "*" || tested.help == expected.help);
}
namespace {
using ::testing::ElementsAreArray;
using ::testing::IsSupersetOf;
TEST(FlagUtilTest, GetFlagsPerSource) {
constexpr const char* kCentipedeRoot = "centipede/";
constexpr const char* kThisCc = "centipede/config_util_test.cc";
constexpr const char* kCentipedeFlagsInc =
"././centipede/centipede_flags.inc";
// Change some flag values to non-defaults.
absl::SetFlag(&FLAGS_foo, "baz");
absl::SetFlag(&FLAGS_qux, true);
// Create a dummy Environment to touch its flags and prevent them from being
// optimized out.
[[maybe_unused]] auto dummy_env = CreateEnvironmentFromFlags();
// All centipede/ modules.
{
const FlagInfosPerSource flags = GetFlagsPerSource(kCentipedeRoot);
SCOPED_TRACE(FormatFlagfileString(flags));
ASSERT_EQ(flags.count(kThisCc), 1);
ASSERT_EQ(flags.count(kCentipedeFlagsInc), 1);
ASSERT_THAT(flags.at(kThisCc),
ElementsAreArray({
FlagInfo{"foo", "baz", "bar", "foo help"},
FlagInfo{"qux", "true", "false", "qux help"},
}));
ASSERT_THAT(flags.at(kCentipedeFlagsInc),
IsSupersetOf({
FlagInfo{"binary", "*", "*", "*"},
FlagInfo{"workdir", "*", "*", "*"},
}));
}
// Just this file.
{
const FlagInfosPerSource flags = GetFlagsPerSource(kThisCc);
SCOPED_TRACE(FormatFlagfileString(flags));
ASSERT_EQ(flags.count(kThisCc), 1);
ASSERT_EQ(flags.count(kCentipedeFlagsInc), 0);
ASSERT_THAT(flags.at(kThisCc),
ElementsAreArray({
FlagInfo{"foo", "baz", "bar", "foo help"},
FlagInfo{"qux", "true", "false", "qux help"},
}));
}
// Just this file with one flag excluded.
{
const FlagInfosPerSource flags =
GetFlagsPerSource(kThisCc, /*exclude_flags=*/{"qux"});
SCOPED_TRACE(FormatFlagfileString(flags));
ASSERT_EQ(flags.count(kThisCc), 1);
ASSERT_EQ(flags.count(kCentipedeFlagsInc), 0);
ASSERT_THAT(flags.at(kThisCc),
ElementsAreArray({
FlagInfo{"foo", "baz", "bar", "foo help"},
}));
}
}
TEST(FlagUtilTest, FormatFlagfileString) {
// NOTE: Everything is intentionally unsorted: the result is expected to be
// sorted by file, then by flag name.
const FlagInfosPerSource kFlags = {
{"bob.cc",
{
FlagInfo{"bob_x", "bob_x def", "bob_x def", "bob_x help"},
FlagInfo{"bob_y", "bob_y val", "bob_y def", "bob_y help"},
}},
{"alice.cc",
{
FlagInfo{"alice_x", "alice_x val", "alice_x def", "alice_x help"},
FlagInfo{"alice_y", "alice_y val", "alice_y def", "alice_y help"},
FlagInfo{"alice_z", "alice_z def", "alice_z def", "alice_z help"},
}},
};
struct TestCase {
DefaultedFlags defaulted;
FlagComments comments;
std::string_view expected_flagfile_string;
};
TestCase kTestCases[] = {
{DefaultedFlags::kExcluded, FlagComments::kNone,
R"(# NOTE: Defaulted flags are excluded
# Flags from alice.cc:
--alice_x=alice_x val
--alice_y=alice_y val
# Flags from bob.cc:
--bob_y=bob_y val
)"},
{DefaultedFlags::kIncluded, FlagComments::kNone,
R"(# NOTE: Explicit and defaulted flags are included
# Flags from alice.cc:
--alice_x=alice_x val
--alice_y=alice_y val
--alice_z=alice_z def
# Flags from bob.cc:
--bob_x=bob_x def
--bob_y=bob_y val
)"},
{DefaultedFlags::kCommentedOut, FlagComments::kNone,
R"(# NOTE: Defaulted flags are commented out
# Flags from alice.cc:
--alice_x=alice_x val
--alice_y=alice_y val
# --alice_z=alice_z def
# Flags from bob.cc:
# --bob_x=bob_x def
--bob_y=bob_y val
)"},
{DefaultedFlags::kIncluded, FlagComments::kDefault,
R"(# NOTE: Explicit and defaulted flags are included
# Flags from alice.cc:
# default: 'alice_x def'
--alice_x=alice_x val
# default: 'alice_y def'
--alice_y=alice_y val
# default: 'alice_z def'
--alice_z=alice_z def
# Flags from bob.cc:
# default: 'bob_x def'
--bob_x=bob_x def
# default: 'bob_y def'
--bob_y=bob_y val
)"},
{DefaultedFlags::kIncluded, FlagComments::kHelpAndDefault,
R"(# NOTE: Explicit and defaulted flags are included
# Flags from alice.cc:
# alice_x help
# default: 'alice_x def'
--alice_x=alice_x val
# alice_y help
# default: 'alice_y def'
--alice_y=alice_y val
# alice_z help
# default: 'alice_z def'
--alice_z=alice_z def
# Flags from bob.cc:
# bob_x help
# default: 'bob_x def'
--bob_x=bob_x def
# bob_y help
# default: 'bob_y def'
--bob_y=bob_y val
)"},
{DefaultedFlags::kCommentedOut, FlagComments::kHelpAndDefault,
R"(# NOTE: Defaulted flags are commented out
# Flags from alice.cc:
# alice_x help
# default: 'alice_x def'
--alice_x=alice_x val
# alice_y help
# default: 'alice_y def'
--alice_y=alice_y val
# alice_z help
# default: 'alice_z def'
# --alice_z=alice_z def
# Flags from bob.cc:
# bob_x help
# default: 'bob_x def'
# --bob_x=bob_x def
# bob_y help
# default: 'bob_y def'
--bob_y=bob_y val
)"},
};
for (const auto& test_case : kTestCases) {
const std::string flagfile_string =
FormatFlagfileString(kFlags, test_case.defaulted, test_case.comments);
EXPECT_EQ(flagfile_string, test_case.expected_flagfile_string)
<< "\n--------\n"
<< VV(flagfile_string) << "--------\n"
<< VV(test_case.expected_flagfile_string) << "--------\n"
<< VV(static_cast<int>(test_case.defaulted))
<< VV(static_cast<int>(test_case.comments));
}
}
} // namespace
} // namespace fuzztest::internal

View File

@ -0,0 +1,235 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/control_flow.h"
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <filesystem> // NOLINT
#include <fstream>
#include <istream>
#include <iterator>
#include <ostream>
#include <queue>
#include <sstream>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "absl/container/flat_hash_set.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_split.h"
#include "./centipede/command.h"
#include "./centipede/pc_info.h"
#include "./centipede/util.h"
#include "./common/defs.h"
#include "./common/logging.h"
#include "./common/remote_file.h"
namespace fuzztest::internal {
PCTable ReadPcTableFromFile(std::string_view file_path) {
ByteArray pc_infos_as_bytes;
ReadFromLocalFile(file_path, pc_infos_as_bytes);
CHECK_EQ(pc_infos_as_bytes.size() % sizeof(PCInfo), 0);
size_t pc_table_size = pc_infos_as_bytes.size() / sizeof(PCInfo);
const auto *pc_infos = reinterpret_cast<PCInfo *>(pc_infos_as_bytes.data());
PCTable pc_table{pc_infos, pc_infos + pc_table_size};
CHECK_EQ(pc_table.size(), pc_table_size);
return pc_table;
}
PCTable GetPcTableFromBinaryWithTracePC(std::string_view binary_path,
std::string_view objdump_path,
std::string_view tmp_path) {
const std::string stderr_path = absl::StrCat(tmp_path, ".log");
Command::Options cmd_options;
cmd_options.args = {"-d", std::string(binary_path)};
cmd_options.stdout_file = std::string(tmp_path);
cmd_options.stderr_file = stderr_path;
Command cmd{objdump_path, std::move(cmd_options)};
int exit_code = cmd.Execute();
if (exit_code != EXIT_SUCCESS) {
std::string log_text;
ReadFromLocalFile(stderr_path, log_text);
LOG(ERROR) << "Failed to use objdump to get PC table; stderr is:";
for (const auto &line : absl::StrSplit(log_text, '\n')) {
LOG(ERROR).NoPrefix() << line;
}
std::filesystem::remove(tmp_path);
std::filesystem::remove(stderr_path);
return {};
}
std::filesystem::remove(stderr_path);
PCTable pc_table;
std::ifstream in(std::string{tmp_path});
CHECK(in.good()) << VV(tmp_path);
bool saw_new_function = false;
// Read the objdump output, find lines that start a function
// and lines that have a call to __sanitizer_cov_trace_pc.
// Reconstruct the PCTable from those.
for (std::string line; std::getline(in, line);) {
if (absl::EndsWith(line, ">:")) { // new function.
saw_new_function = true;
continue;
}
// On MacOS there is an extra underscope before the symbols, so not sealing
// the symbol with `<`.
if (!absl::EndsWith(line, "__sanitizer_cov_trace_pc>") &&
!absl::EndsWith(line, "__sanitizer_cov_trace_pc@plt>"))
continue;
uintptr_t pc = std::stoul(line, nullptr, 16);
uintptr_t flags = saw_new_function ? PCInfo::kFuncEntry : 0;
saw_new_function = false; // next trace_pc will be in the same function.
pc_table.push_back({pc, flags});
}
std::filesystem::remove(tmp_path);
return pc_table;
}
CFTable ReadCfTable(std::istream &in) {
const std::string input_string(std::istreambuf_iterator<char>(in), {});
const ByteArray cf_table_as_bytes(input_string.begin(), input_string.end());
CHECK_EQ(cf_table_as_bytes.size() % sizeof(CFTable::value_type), 0);
const size_t cf_table_size =
cf_table_as_bytes.size() / sizeof(CFTable::value_type);
const auto *cf_entries =
reinterpret_cast<const CFTable::value_type *>(cf_table_as_bytes.data());
return CFTable{cf_entries, cf_entries + cf_table_size};
}
CFTable ReadCfTable(std::string_view file_path) {
std::string cf_table_contents;
CHECK_OK(RemoteFileGetContents(file_path, cf_table_contents));
std::istringstream cf_table_stream(cf_table_contents);
return ReadCfTable(cf_table_stream);
}
void WriteCfTable(const CFTable &cf_table, std::ostream &out) {
out.write(reinterpret_cast<const char *>(cf_table.data()),
sizeof(CFTable::value_type) * cf_table.size());
}
DsoTable ReadDsoTableFromFile(std::string_view file_path) {
DsoTable result;
std::string data;
ReadFromLocalFile(file_path, data);
for (const auto &line : absl::StrSplit(data, '\n', absl::SkipEmpty())) {
// Use std::string; there is no std::stoul for std::string_view.
const std::vector<std::string> tokens =
absl::StrSplit(line, ' ', absl::SkipEmpty());
CHECK_EQ(tokens.size(), 2) << VV(line);
result.push_back(DsoInfo{tokens[0], std::stoul(tokens[1])});
}
return result;
}
void ControlFlowGraph::InitializeControlFlowGraph(const CFTable &cf_table,
const PCTable &pc_table) {
CHECK(!cf_table.empty());
func_entries_.resize(pc_table.size());
reachability_.resize(pc_table.size());
for (size_t j = 0; j < cf_table.size();) {
std::vector<uintptr_t> successors;
auto curr_pc = cf_table[j];
++j;
// Iterate over successors.
while (cf_table[j]) {
successors.push_back(cf_table[j]);
++j;
}
++j; // Step over the delimiter.
// Record the list of successors
graph_[curr_pc] = std::move(successors);
// TODO(ussuri): Remove after debugging.
VLOG(100) << "Added PC: " << curr_pc;
// Iterate over callees.
while (cf_table[j]) {
++j;
}
++j; // Step over the delimiter.
CHECK_LE(j, cf_table.size());
}
// Calculate cyclomatic complexity for all functions.
for (PCIndex i = 0; i < pc_table.size(); ++i) {
pc_index_map_[pc_table[i].pc] = i;
if (pc_table[i].has_flag(PCInfo::kFuncEntry)) {
func_entries_[i] = true;
uintptr_t func_pc = pc_table[i].pc;
auto func_comp = ComputeFunctionCyclomaticComplexity(func_pc, *this);
function_complexities_[func_pc] = func_comp;
}
}
}
const std::vector<uintptr_t> &ControlFlowGraph::GetSuccessors(
uintptr_t basic_block) const {
auto it = graph_.find(basic_block);
CHECK(it != graph_.end()) << VV(basic_block);
return it->second;
}
std::vector<uintptr_t> ControlFlowGraph::ComputeReachabilityForPc(
uintptr_t pc) const {
absl::flat_hash_set<uintptr_t> visited_pcs;
std::queue<uintptr_t> worklist;
worklist.push(pc);
while (!worklist.empty()) {
auto current_pc = worklist.front();
worklist.pop();
if (!visited_pcs.insert(current_pc).second) continue;
for (const auto &successor : graph_.at(current_pc)) {
if (!exists(successor)) continue;
worklist.push(successor);
}
}
return {visited_pcs.begin(), visited_pcs.end()};
}
uint32_t ComputeFunctionCyclomaticComplexity(uintptr_t pc,
const ControlFlowGraph &cfg) {
size_t edge_num = 0, node_num = 0;
absl::flat_hash_set<uintptr_t> visited_pcs;
std::queue<uintptr_t> worklist;
worklist.push(pc);
while (!worklist.empty()) {
auto current_pc = worklist.front();
worklist.pop();
if (!visited_pcs.insert(current_pc).second) continue;
++node_num;
for (auto &successor : cfg.GetSuccessors(current_pc)) {
if (!cfg.exists(successor)) continue;
++edge_num;
worklist.push(successor);
}
}
return edge_num - node_num + 2;
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,166 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_CONTROL_FLOW_H_
#define THIRD_PARTY_CENTIPEDE_CONTROL_FLOW_H_
#include <cstddef>
#include <cstdint>
#include <istream>
#include <mutex> //NOLINT
#include <ostream>
#include <string_view>
#include <vector>
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/log/check.h"
#include "./centipede/pc_info.h"
#include "./common/defs.h"
#include "./common/logging.h"
namespace fuzztest::internal {
class SymbolTable; // To avoid mutual inclusion with symbol_table.h.
// Reads a PCTable from `file_path`, returns it. Returns empty table on error.
PCTable ReadPcTableFromFile(std::string_view file_path);
// Helper for GetPcTableFromBinary, for binaries built with
// -fsanitize-coverage=trace-pc. Returns the PCTable reconstructed from
// `binary_path` with `<objdump_path> -d`. May create a file `tmp_path`, but
// will delete it afterwards.
PCTable GetPcTableFromBinaryWithTracePC(std::string_view binary_path,
std::string_view objdump_path,
std::string_view tmp_path);
// PCIndex: an index into the PCTable.
// We use 32-bit int for compactness since PCTable is never too large.
using PCIndex = uint32_t;
// A set of PCIndex-es, order is not important.
using PCIndexVec = std::vector<PCIndex>;
// Array of elements in __sancov_cfs section.
// CFTable is created by the compiler/linker in the instrumented binary.
// https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-control-flow.
using CFTable = std::vector<intptr_t>;
// Reads a CFTable from `file_path`, returns it. Returns empty table on error.
CFTable ReadCfTable(std::string_view file_path);
// Same as above but reads from a stream.
CFTable ReadCfTable(std::istream &in);
// Writes the `cf_table` to `out`.
void WriteCfTable(const CFTable &cf_table, std::ostream &out);
// Reads a DsoTable from `file_path`, returns it. Returns empty table on error.
DsoTable ReadDsoTableFromFile(std::string_view file_path);
class ControlFlowGraph {
public:
// Reads form __sancov_cfs section. On error it crashes, if the section is not
// there, the graph_ will be empty.
void InitializeControlFlowGraph(const CFTable &cf_table,
const PCTable &pc_table);
// Returns the vector of successor PCs for the given basic block PC.
const std::vector<uintptr_t> &GetSuccessors(uintptr_t basic_block) const;
// Returns the number of cfg entries.
size_t size() const { return graph_.size(); }
// Checks if basic_block is in cfg.
bool exists(const uintptr_t basic_block) const {
return graph_.contains(basic_block);
}
// Returns cyclomatic complexity of function PC. CHECK-fails if it is not a
// valid function PC.
uint32_t GetCyclomaticComplexity(uintptr_t pc) const {
auto it = function_complexities_.find(pc);
CHECK(it != function_complexities_.end());
return it->second;
}
// Returns true if the given basic block is function entry.
bool BlockIsFunctionEntry(PCIndex pc_index) const {
// TODO(ussuri): Change the following to use CHECK_LE(pc_index,
// func_entries_.size()) and have a death test.
return pc_index < func_entries_.size() ? func_entries_[pc_index] : false;
}
// Returns the idx in pc_table associated with the PC, CHECK-fails if the PC
// is not in the pc_table.
PCIndex GetPcIndex(uintptr_t pc) const {
auto it = pc_index_map_.find(pc);
CHECK(it != pc_index_map_.end()) << VV(pc) << " is not in pc_table.";
return it->second;
}
// Returns true if the PC is in PCTable.
bool IsInPcTable(uintptr_t pc) const { return pc_index_map_.contains(pc); }
// Returns a vector& containing all basic blocks (represented by their PCs)
// reachable from `pc`. The reachability is computed once, lazily.
// The method is const, under the hood it uses a mutable data member.
// Thread-safe: can be called concurrently from multiple threads
const std::vector<uintptr_t> &LazyGetReachabilityForPc(uintptr_t pc) const {
CHECK_EQ(reachability_.size(), pc_index_map_.size());
auto pc_index = GetPcIndex(pc);
std::call_once(*(reachability_[pc_index].once), [this, &pc, &pc_index]() {
reachability_[pc_index].reach = ComputeReachabilityForPc(pc);
});
return reachability_[pc_index].reach;
}
private:
// Map from PC to the idx in pc_table.
absl::flat_hash_map<uintptr_t, PCIndex> pc_index_map_;
// A vector of size PCTable. func_entries[idx] is true iff means the PC at idx
// is a function entry.
std::vector<bool> func_entries_;
// A map with PC as the keys and vector of PCs as value.
absl::flat_hash_map<uintptr_t, std::vector<uintptr_t>> graph_;
// A map from function PC to its calculated cyclomatic complexity. It is
// to avoid unnecessary calls to ComputeFunctionCyclomaticComplexity.
absl::flat_hash_map<uintptr_t, uint32_t> function_complexities_;
// Returns a vector of PCs reachable from `pc`, not in any particular order.
// The result always includes `pc`, since any block is reachable from itself.
std::vector<uintptr_t> ComputeReachabilityForPc(uintptr_t pc) const;
FRIEND_TEST(ControlFlowGraph, ComputeReachabilityForPc);
// ReachInfo is a struct to store reachability information for each PC in
// pc_table. The once flag is used to make sure the reach vector is populated
// only once lazily in a thread-friendly manner.
struct ReachInfo {
mutable std::once_flag *once;
mutable std::vector<uintptr_t> reach;
ReachInfo() : once(new std::once_flag) {}
~ReachInfo() { delete once; }
};
// A vector of size PCTable. reachability_[idx] is reachability info for the
// `idx`th pc. Conceptually it is constant, but we compute it lazily, hence
// 'mutable'
std::vector<ReachInfo> reachability_;
};
// Computes the Cyclomatic Complexity for the given function,
// https://en.wikipedia.org/wiki/Cyclomatic_complexity.
uint32_t ComputeFunctionCyclomaticComplexity(uintptr_t pc,
const ControlFlowGraph &cfg);
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_CONTROL_FLOW_H_

View File

@ -0,0 +1,341 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/control_flow.h"
#include <cstddef>
#include <cstdint>
#include <filesystem> // NOLINT
#include <sstream>
#include <string>
#include <string_view>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/container/flat_hash_map.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "./centipede/binary_info.h"
#include "./centipede/pc_info.h"
#include "./centipede/symbol_table.h"
#include "./centipede/thread_pool.h"
#include "./common/logging.h"
#include "./common/test_util.h"
namespace fuzztest::internal {
// Mock CFTable representing the following cfg:
// 1
// / \
// 2 3
// \ /
// 4
// TODO(ussuri): Change PCs to 100, 200 etc, to avoid confusion with PCIndex.
static const CFTable g_cf_table = {1, 2, 3, 0, 0, 2, 4, 0,
0, 3, 4, 0, 0, 4, 0, 0};
static const PCTable g_pc_table = {
{1, PCInfo::kFuncEntry}, {2, 0}, {3, 0}, {4, 0}};
TEST(ControlFlowGraph, ComputeReachabilityForPc) {
ControlFlowGraph cfg;
cfg.InitializeControlFlowGraph(g_cf_table, g_pc_table);
EXPECT_NE(cfg.size(), 0);
auto reach1 = cfg.ComputeReachabilityForPc(1);
auto reach2 = cfg.ComputeReachabilityForPc(2);
auto reach3 = cfg.ComputeReachabilityForPc(3);
auto reach4 = cfg.ComputeReachabilityForPc(4);
EXPECT_THAT(reach1, testing::UnorderedElementsAre(1, 2, 3, 4));
EXPECT_THAT(reach2, testing::UnorderedElementsAre(2, 4));
EXPECT_THAT(reach3, testing::UnorderedElementsAre(3, 4));
EXPECT_THAT(reach4, testing::ElementsAre(4));
}
namespace {
TEST(CFTable, MakeCfgFromCfTable) {
ControlFlowGraph cfg;
cfg.InitializeControlFlowGraph(g_cf_table, g_pc_table);
EXPECT_NE(cfg.size(), 0);
for (auto &pc : {1, 2, 3, 4}) {
SCOPED_TRACE(testing::Message() << VV(pc));
EXPECT_TRUE(cfg.exists(pc));
// Check that cfg traversal is possible.
auto successors = cfg.GetSuccessors(pc);
for (auto &successor : successors) {
EXPECT_TRUE(cfg.exists(successor));
}
EXPECT_THAT(cfg.GetSuccessors(1).size(), 2);
EXPECT_THAT(cfg.GetSuccessors(2).size(), 1);
EXPECT_THAT(cfg.GetSuccessors(3).size(), 1);
EXPECT_TRUE(cfg.GetSuccessors(4).empty());
}
CHECK_EQ(cfg.GetPcIndex(1), 0);
CHECK_EQ(cfg.GetPcIndex(2), 1);
CHECK_EQ(cfg.GetPcIndex(3), 2);
CHECK_EQ(cfg.GetPcIndex(4), 3);
EXPECT_TRUE(cfg.BlockIsFunctionEntry(0));
EXPECT_FALSE(cfg.BlockIsFunctionEntry(1));
EXPECT_FALSE(cfg.BlockIsFunctionEntry(2));
EXPECT_FALSE(cfg.BlockIsFunctionEntry(3));
CHECK_EQ(cfg.GetCyclomaticComplexity(1), 2);
}
TEST(CFTable, SerializesAndDeserializesCfTable) {
std::stringstream stream;
WriteCfTable(g_cf_table, stream);
const CFTable cf_table = ReadCfTable(stream);
EXPECT_EQ(cf_table, g_cf_table);
}
TEST(FunctionComplexity, ComputeFuncComplexity) {
static const CFTable g_cf_table1 = {
1, 2, 3, 0, 0, // 1 goes to 2 and 3.
2, 3, 4, 0, 0, // 2 goes to 3 and 4.
3, 1, 4, 0, 0, // 3 goes to 1 and 4.
4, 0, 0 // 4 goes nowhere.
};
static const CFTable g_cf_table2 = {
1, 0, 0, // 1 goes nowhere.
};
static const CFTable g_cf_table3 = {
1, 2, 0, 0, // 1 goes to 2.
2, 3, 0, 0, // 2 goes to 3.
3, 1, 0, 0, // 3 goes to 1.
};
static const CFTable g_cf_table4 = {
1, 2, 3, 0, 0, // 1 goes to 2 and 3.
2, 3, 4, 0, 0, // 2 goes to 3 and 4.
3, 0, 0, // 3 goes nowhere.
4, 0, 0 // 4 goes nowhere.
};
ControlFlowGraph cfg1;
cfg1.InitializeControlFlowGraph(g_cf_table1, g_pc_table);
EXPECT_NE(cfg1.size(), 0);
ControlFlowGraph cfg2;
cfg2.InitializeControlFlowGraph(g_cf_table2, g_pc_table);
EXPECT_NE(cfg2.size(), 0);
ControlFlowGraph cfg3;
cfg3.InitializeControlFlowGraph(g_cf_table3, g_pc_table);
EXPECT_NE(cfg3.size(), 0);
ControlFlowGraph cfg4;
cfg4.InitializeControlFlowGraph(g_cf_table4, g_pc_table);
EXPECT_NE(cfg4.size(), 0);
EXPECT_EQ(ComputeFunctionCyclomaticComplexity(1, cfg1), 4);
EXPECT_EQ(ComputeFunctionCyclomaticComplexity(1, cfg2), 1);
EXPECT_EQ(ComputeFunctionCyclomaticComplexity(1, cfg3), 2);
EXPECT_EQ(ComputeFunctionCyclomaticComplexity(1, cfg4), 2);
}
TEST(ControlFlowGraph, LazyReachability) {
ControlFlowGraph cfg;
cfg.InitializeControlFlowGraph(g_cf_table, g_pc_table);
EXPECT_NE(cfg.size(), 0);
auto rt = [&cfg]() {
for (int i = 0; i < 10; ++i) {
cfg.LazyGetReachabilityForPc(1);
cfg.LazyGetReachabilityForPc(2);
cfg.LazyGetReachabilityForPc(3);
cfg.LazyGetReachabilityForPc(4);
}
const auto &reach1 = cfg.LazyGetReachabilityForPc(1);
const auto &reach2 = cfg.LazyGetReachabilityForPc(2);
const auto &reach3 = cfg.LazyGetReachabilityForPc(3);
const auto &reach4 = cfg.LazyGetReachabilityForPc(4);
EXPECT_THAT(reach1, testing::UnorderedElementsAre(1, 2, 3, 4));
EXPECT_THAT(reach2, testing::UnorderedElementsAre(2, 4));
EXPECT_THAT(reach3, testing::UnorderedElementsAre(3, 4));
EXPECT_THAT(reach4, testing::ElementsAre(4));
};
{
ThreadPool threads{3};
threads.Schedule(rt);
threads.Schedule(rt);
threads.Schedule(rt);
} // The threads join here.
}
// Returns path to test_fuzz_target.
static std::string GetTargetPath() {
return GetDataDependencyFilepath("centipede/testing/test_fuzz_target");
}
// Returns path to test_fuzz_target_trace_pc.
static std::string GetTracePCTargetPath() {
return GetDataDependencyFilepath(
"centipede/testing/test_fuzz_target_trace_pc");
}
// Tests GetCfTableFromBinary() on test_fuzz_target.
TEST(CFTable, GetCfTable) {
auto target_path = GetTargetPath();
std::string tmp_path1 = GetTempFilePath(test_info_->name(), 1);
std::string tmp_path2 = GetTempFilePath(test_info_->name(), 2);
// Load the cf table.
BinaryInfo binary_info;
binary_info.InitializeFromSanCovBinary(
target_path, GetObjDumpPath(), GetLLVMSymbolizerPath(),
GetTestTempDir(test_info_->name()).string());
const auto &cf_table = binary_info.cf_table;
LOG(INFO) << VV(target_path) << VV(tmp_path1) << VV(cf_table.size());
if (cf_table.empty()) {
LOG(INFO) << "__sancov_cfs is empty.";
// TODO(ussuri): This should be removed once OSS clang supports
// control-flow.
GTEST_SKIP();
}
ASSERT_FALSE(
std::filesystem::exists(tmp_path1.c_str())); // tmp_path1 was deleted.
LOG(INFO) << VV(cf_table.size());
const auto &pc_table = binary_info.pc_table;
EXPECT_FALSE(binary_info.uses_legacy_trace_pc_instrumentation);
EXPECT_THAT(pc_table.empty(), false);
const SymbolTable &symbols = binary_info.symbols;
absl::flat_hash_map<uintptr_t, size_t> pc_table_index;
for (size_t i = 0; i < pc_table.size(); i++) {
pc_table_index[pc_table[i].pc] = i;
}
for (size_t j = 0; j < cf_table.size();) {
auto current_pc = cf_table[j];
++j;
size_t successor_num = 0;
size_t callee_num = 0;
size_t icallee_num = 0;
// Iterate over successors.
while (cf_table[j]) {
++successor_num;
++j;
}
++j; // Step over the delimiter.
// Iterate over callees.
while (cf_table[j]) {
if (cf_table[j] > 0) ++callee_num;
if (cf_table[j] < 0) ++icallee_num;
++j;
}
++j; // Step over the delimiter.
// Determine if current_pc is a function entry.
if (pc_table_index.contains(current_pc)) {
size_t index = pc_table_index[current_pc];
if (pc_table[index].has_flag(PCInfo::kFuncEntry)) {
const std::string_view current_function = symbols.func(index);
// Check for properties.
SCOPED_TRACE(testing::Message()
<< "Checking for " << VV(current_function)
<< VV(current_pc));
if (current_function == "SingleEdgeFunc") {
EXPECT_EQ(successor_num, 0);
EXPECT_EQ(icallee_num, 0);
EXPECT_EQ(callee_num, 0);
} else if (current_function == "MultiEdgeFunc") {
EXPECT_EQ(successor_num, 2);
EXPECT_EQ(icallee_num, 0);
EXPECT_EQ(callee_num, 0);
} else if (current_function == "IndirectCallFunc") {
EXPECT_EQ(successor_num, 0);
EXPECT_EQ(icallee_num, 1);
EXPECT_EQ(callee_num, 0);
}
}
}
}
}
static void SymbolizeBinary(std::string_view test_dir,
std::string_view target_path, bool use_trace_pc) {
BinaryInfo binary_info;
binary_info.InitializeFromSanCovBinary(target_path, GetObjDumpPath(),
GetLLVMSymbolizerPath(), test_dir);
// Load the pc table.
const auto &pc_table = binary_info.pc_table;
// Check that it's not empty.
EXPECT_NE(pc_table.size(), 0);
// Check that the first PCInfo corresponds to a kFuncEntry.
EXPECT_TRUE(pc_table[0].has_flag(PCInfo::kFuncEntry));
// Test the symbols.
const SymbolTable &symbols = binary_info.symbols;
ASSERT_EQ(symbols.size(), pc_table.size());
bool has_llvm_fuzzer_test_one_input = false;
size_t single_edge_func_num_edges = 0;
size_t multi_edge_func_num_edges = 0;
// Iterate all symbols, verify that we:
// * Don't have main (coverage instrumentation is disabled for main).
// * Have LLVMFuzzerTestOneInput with the correct location.
// * Have one edge for SingleEdgeFunc.
// * Have several edges for MultiEdgeFunc.
for (size_t i = 0; i < symbols.size(); i++) {
bool is_func_entry = pc_table[i].has_flag(PCInfo::kFuncEntry);
if (is_func_entry) {
LOG(INFO) << symbols.full_description(i);
}
single_edge_func_num_edges += symbols.func(i) == "SingleEdgeFunc";
multi_edge_func_num_edges += symbols.func(i) == "MultiEdgeFunc";
EXPECT_NE(symbols.func(i), "main");
if (is_func_entry && symbols.func(i) == "LLVMFuzzerTestOneInput") {
// This is a function entry block for LLVMFuzzerTestOneInput.
has_llvm_fuzzer_test_one_input = true;
EXPECT_THAT(
symbols.location(i),
testing::HasSubstr("centipede/testing/test_fuzz_target.cc:71"));
}
}
EXPECT_TRUE(has_llvm_fuzzer_test_one_input);
EXPECT_EQ(single_edge_func_num_edges, 1);
EXPECT_GT(multi_edge_func_num_edges, 1);
}
// Tests GetPcTableFromBinary() and SymbolTable on test_fuzz_target.
TEST(PCTable, GetPcTableFromBinary_And_SymbolTable_PCTable) {
EXPECT_NO_FATAL_FAILURE(SymbolizeBinary(
GetTestTempDir(test_info_->name()).string(), GetTargetPath(),
/*use_trace_pc=*/false));
}
// Tests GetPcTableFromBinary() and SymbolTable on test_fuzz_target_trace_pc.
TEST(PCTable, GetPcTableFromBinary_And_SymbolTable_TracePC) {
EXPECT_NO_FATAL_FAILURE(SymbolizeBinary(
GetTestTempDir(test_info_->name()).string(), GetTracePCTargetPath(),
/*use_trace_pc=*/true));
}
} // namespace
} // namespace fuzztest::internal

View File

@ -0,0 +1,322 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/corpus.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "absl/strings/substitute.h"
#include "./centipede/control_flow.h"
#include "./centipede/coverage.h"
#include "./centipede/execution_metadata.h"
#include "./centipede/feature.h"
#include "./centipede/feature_set.h"
#include "./centipede/util.h"
#include "./common/defs.h"
#include "./common/logging.h" // IWYU pragma: keep
#include "./common/remote_file.h"
#include "./common/status_macros.h"
namespace fuzztest::internal {
//------------------------------------------------------------------------------
// Corpus
//------------------------------------------------------------------------------
// Returns the weight of `fv` computed using `fs` and `coverage_frontier`.
static size_t ComputeWeight(const FeatureVec &fv, const FeatureSet &fs,
const CoverageFrontier &coverage_frontier) {
size_t weight = fs.ComputeWeight(fv);
// The following is checking for the cases where PCTable is not present. In
// such cases, we cannot use any ControlFlow related features.
if (coverage_frontier.MaxPcIndex() == 0) return weight;
size_t frontier_weights_sum = 0;
for (const auto feature : fv) {
if (!feature_domains::kPCs.Contains(feature)) continue;
const auto pc_index = ConvertPCFeatureToPcIndex(feature);
// Avoid checking frontier for out-of-bounds indices.
// TODO(b/299624088): revisit once dlopen is supported.
if (pc_index >= coverage_frontier.MaxPcIndex()) continue;
if (coverage_frontier.PcIndexIsFrontier(pc_index)) {
frontier_weights_sum += coverage_frontier.FrontierWeight(pc_index);
}
}
return weight * (frontier_weights_sum + 1); // Multiply by at least 1.
}
std::pair<size_t, size_t> Corpus::MaxAndAvgSize() const {
if (records_.empty()) return {0, 0};
size_t max = 0;
size_t total = 0;
for (const auto &r : records_) {
max = std::max(max, r.data.size());
total += r.data.size();
}
return {max, total / records_.size()};
}
size_t Corpus::Prune(const FeatureSet &fs,
const CoverageFrontier &coverage_frontier,
size_t max_corpus_size, Rng &rng) {
// TODO(kcc): use coverage_frontier.
CHECK(max_corpus_size);
if (records_.size() < 2UL) return 0;
// Recompute the weights.
size_t num_zero_weights = 0;
for (size_t i = 0, n = records_.size(); i < n; ++i) {
fs.PruneFeaturesAndCountUnseen(records_[i].features);
auto new_weight =
ComputeWeight(records_[i].features, fs, coverage_frontier);
weighted_distribution_.ChangeWeight(i, new_weight);
if (new_weight == 0) ++num_zero_weights;
}
// Remove zero weights and the corresponding corpus record.
// Also remove some random elements, if the corpus is still too big.
// The corpus must not be empty, hence target_size is at least 1.
// It should also be <= max_corpus_size.
size_t target_size = std::min(
max_corpus_size, std::max(1UL, records_.size() - num_zero_weights));
auto subset_to_remove =
weighted_distribution_.RemoveRandomWeightedSubset(target_size, rng);
RemoveSubset(subset_to_remove, records_);
weighted_distribution_.RecomputeInternalState();
CHECK(!records_.empty());
// Features may have shrunk from CountUnseenAndPruneFrequentFeatures.
// Call shrink_to_fit for the features that survived the pruning.
for (auto &record : records_) {
record.features.shrink_to_fit();
}
num_pruned_ += subset_to_remove.size();
return subset_to_remove.size();
}
void Corpus::Add(const ByteArray &data, const FeatureVec &fv,
const ExecutionMetadata &metadata, const FeatureSet &fs,
const CoverageFrontier &coverage_frontier) {
// TODO(kcc): use coverage_frontier.
CHECK(!data.empty())
<< "Got request to add empty element to corpus: ignoring";
CHECK_EQ(records_.size(), weighted_distribution_.size());
records_.push_back({data, fv, metadata});
weighted_distribution_.AddWeight(ComputeWeight(fv, fs, coverage_frontier));
}
const CorpusRecord &Corpus::WeightedRandom(size_t random) const {
return records_[weighted_distribution_.RandomIndex(random)];
}
const CorpusRecord &Corpus::UniformRandom(size_t random) const {
return records_[random % records_.size()];
}
void Corpus::DumpStatsToFile(const FeatureSet &fs, std::string_view filepath,
std::string_view description) {
auto *file = ValueOrDie(RemoteFileOpen(filepath, "w"));
CHECK(file != nullptr) << "Failed to open file: " << filepath;
CHECK_OK(RemoteFileSetWriteBufferSize(file, 100UL * 1024 * 1024));
static constexpr std::string_view kHeaderStub = R"(# $0
{
"num_inputs": $1,
"corpus_stats": [)";
static constexpr std::string_view kRecordStub = R"($0
{"size": $1, "frequencies": [$2]})";
static constexpr std::string_view kFooter = R"(
]
}
)";
const std::string header_str =
absl::Substitute(kHeaderStub, description, records_.size());
CHECK_OK(RemoteFileAppend(file, header_str));
std::string before_record;
for (const auto &record : records_) {
std::vector<size_t> frequencies;
frequencies.reserve(record.features.size());
for (const auto feature : record.features) {
frequencies.push_back(fs.Frequency(feature));
}
const std::string frequencies_str = absl::StrJoin(frequencies, ", ");
const std::string record_str = absl::Substitute(
kRecordStub, before_record, record.data.size(), frequencies_str);
CHECK_OK(RemoteFileAppend(file, record_str));
before_record = ",";
}
CHECK_OK(RemoteFileAppend(file, std::string{kFooter}));
CHECK_OK(RemoteFileClose(file));
}
std::string Corpus::MemoryUsageString() const {
size_t data_size = 0;
size_t features_size = 0;
for (const auto &record : records_) {
data_size += record.data.capacity() * sizeof(record.data[0]);
features_size += record.features.capacity() * sizeof(record.features[0]);
}
return absl::StrCat("d", data_size >> 20, "/f", features_size >> 20);
}
//------------------------------------------------------------------------------
// WeightedDistribution
//------------------------------------------------------------------------------
void WeightedDistribution::AddWeight(uint64_t weight) {
CHECK_EQ(weights_.size(), cumulative_weights_.size());
weights_.push_back(weight);
if (cumulative_weights_.empty()) {
cumulative_weights_.push_back(weight);
} else {
cumulative_weights_.push_back(cumulative_weights_.back() + weight);
}
}
void WeightedDistribution::ChangeWeight(size_t idx, uint64_t new_weight) {
CHECK_LT(idx, size());
weights_[idx] = new_weight;
cumulative_weights_valid_ = false;
}
__attribute__((noinline)) // to see it in profile.
void WeightedDistribution::RecomputeInternalState() {
uint64_t partial_sum = 0;
for (size_t i = 0, n = size(); i < n; i++) {
partial_sum += weights_[i];
cumulative_weights_[i] = partial_sum;
}
cumulative_weights_valid_ = true;
}
__attribute__((noinline)) // to see it in profile.
size_t
WeightedDistribution::RandomIndex(size_t random) const {
CHECK(!weights_.empty());
CHECK(cumulative_weights_valid_);
uint64_t sum_of_all_weights = cumulative_weights_.back();
if (sum_of_all_weights == 0)
return random % size(); // can't do much else here.
random = random % sum_of_all_weights;
auto it = std::upper_bound(cumulative_weights_.begin(),
cumulative_weights_.end(), random);
CHECK(it != cumulative_weights_.end());
return it - cumulative_weights_.begin();
}
uint64_t WeightedDistribution::PopBack() {
uint64_t result = weights_.back();
weights_.pop_back();
cumulative_weights_.pop_back();
return result;
}
//------------------------------------------------------------------------------
// CoverageFrontier
//------------------------------------------------------------------------------
size_t CoverageFrontier::Compute(const Corpus &corpus) {
return Compute(corpus.Records());
}
size_t CoverageFrontier::Compute(
const std::vector<CorpusRecord> &corpus_records) {
// Initialize the vectors.
std::fill(frontier_.begin(), frontier_.end(), false);
std::fill(frontier_weight_.begin(), frontier_weight_.end(), 0);
// A vector of covered indices in pc_table. Needed for Coverage object.
PCIndexVec covered_pcs;
for (const auto &record : corpus_records) {
for (auto feature : record.features) {
if (!feature_domains::kPCs.Contains(feature)) continue;
size_t idx = ConvertPCFeatureToPcIndex(feature);
if (idx >= binary_info_.pc_table.size()) continue;
covered_pcs.push_back(idx);
frontier_[idx] = true;
}
}
Coverage coverage(binary_info_.pc_table, covered_pcs);
num_functions_in_frontier_ = 0;
IteratePcTableFunctions(binary_info_.pc_table, [this, &coverage](size_t beg,
size_t end) {
auto frontier_begin = frontier_.begin() + beg;
auto frontier_end = frontier_.begin() + end;
size_t cov_size_in_this_func =
std::count(frontier_begin, frontier_end, true);
if (cov_size_in_this_func > 0 && cov_size_in_this_func < end - beg)
++num_functions_in_frontier_;
// Reset the frontier_ entries.
std::fill(frontier_begin, frontier_end, false);
// Iterate over BBs in the function and check the coverage statue.
for (size_t i = beg; i < end; ++i) {
// If the current pc is not covered, it cannot be a frontier.
if (!coverage.BlockIsCovered(i)) continue;
auto pc = binary_info_.pc_table[i].pc;
// Current pc is covered, look for a non-covered successor.
for (auto successor : binary_info_.control_flow_graph.GetSuccessors(pc)) {
// Successor pc may not be in PCTable because of pruning.
if (!binary_info_.control_flow_graph.IsInPcTable(successor)) continue;
auto successor_idx =
binary_info_.control_flow_graph.GetPcIndex(successor);
// This successor is covered, skip it.
if (coverage.BlockIsCovered(successor_idx)) continue;
// Now we have a frontier, compute the weight.
frontier_[i] = true;
// Calculate frontier weight.
// Here we use reachability and coverage to identify all reachable and
// non-covered BBs from successor, and then use all functions called
// in those BBs.
for (auto reachable_bb :
binary_info_.control_flow_graph.LazyGetReachabilityForPc(
successor)) {
if (!binary_info_.control_flow_graph.IsInPcTable(reachable_bb) ||
coverage.BlockIsCovered(
binary_info_.control_flow_graph.GetPcIndex(reachable_bb))) {
// This reachable BB is already either processed and added or
// covered via a different path -- not interesting!
continue;
}
frontier_weight_[i] += ComputeFrontierWeight(
coverage, binary_info_.control_flow_graph,
binary_info_.call_graph.GetBasicBlockCallees(reachable_bb));
}
}
}
});
return num_functions_in_frontier_;
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,210 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_CORPUS_H_
#define THIRD_PARTY_CENTIPEDE_CORPUS_H_
#include <cstddef>
#include <cstdint>
#include <ostream>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "absl/log/check.h"
#include "./centipede/binary_info.h"
#include "./centipede/execution_metadata.h"
#include "./centipede/feature.h"
#include "./centipede/feature_set.h"
#include "./centipede/util.h"
#include "./common/defs.h"
namespace fuzztest::internal {
// WeightedDistribution maintains an array of integer weights.
// It allows to compute a random number in range [0,size()) such that
// the probability of each number is proportional to its weight.
class WeightedDistribution {
public:
// Adds one more weight.
void AddWeight(uint64_t weight);
// Removes the last weight and returns it.
// Precondition: size() > 0.
uint64_t PopBack();
// Changes the existing idx-th weight to new_weight.
void ChangeWeight(size_t idx, uint64_t new_weight);
// Returns a random number in [0,size()), using a random number `random`.
// For proper randomness, `random` should come from a 64-bit RNG.
// RandomIndex() must not be called after ChangeWeight() without first
// calling RecomputeInternalState().
size_t RandomIndex(size_t random) const;
// Returns the number of weights.
size_t size() const { return weights_.size(); }
// Removes all weights.
void clear() {
weights_.clear();
cumulative_weights_.clear();
}
// Fixes the internal state that could become stale after call(s) to
// ChangeWeight().
void RecomputeInternalState();
// Computes a random weighted subset of elements to remove.
// Removes this subset from `this`.
// Returns the subset as a sorted array of indices.
std::vector<size_t> RemoveRandomWeightedSubset(size_t target_size, Rng &rng) {
auto subset_to_remove = RandomWeightedSubset(weights_, target_size, rng);
RemoveSubset(subset_to_remove, weights_);
RemoveSubset(subset_to_remove, cumulative_weights_);
return subset_to_remove;
}
private:
// The array of weights. The probability of choosing the index Idx
// is weights_[Idx] / SumOfAllWeights.
std::vector<uint64_t> weights_;
// i-th element is the sum of the first i elements of weights_.
std::vector<uint64_t> cumulative_weights_;
// If false, cumulative_weights_ needs to be recomputed.
bool cumulative_weights_valid_ = true;
};
class CoverageFrontier; // Forward decl, used in Corpus.
// Input data and metadata.
struct CorpusRecord {
ByteArray data;
FeatureVec features;
ExecutionMetadata metadata;
};
// Maintains the corpus of inputs.
// Allows to prune (forget) inputs that become uninteresting.
class Corpus {
public:
Corpus() = default;
Corpus(const Corpus &) = default;
Corpus(Corpus &&) noexcept = default;
Corpus &operator=(const Corpus &) = default;
Corpus &operator=(Corpus &&) noexcept = default;
// Mutators.
// Adds a corpus element, consisting of 'data' (the input bytes, non-empty),
// 'fv' (the features associated with this input), and execution `metadata`.
// `fs` is used to compute weights of `fv`.
void Add(const ByteArray &data, const FeatureVec &fv,
const ExecutionMetadata &metadata, const FeatureSet &fs,
const CoverageFrontier &coverage_frontier);
// Removes elements that contain only frequent features, according to 'fs'.
// Also, randomly removes elements to reduce the size to <= `max_corpus_size`.
// `max_corpus_size` should be positive.
// Returns the number of removed elements.
size_t Prune(const FeatureSet &fs, const CoverageFrontier &coverage_frontier,
size_t max_corpus_size, Rng &rng);
// Accessors.
// Returns the inputs.
const std::vector<CorpusRecord> &Records() const { return records_; }
// Returns the total number of inputs added.
size_t NumTotal() const { return num_pruned_ + NumActive(); }
// Return the number of currently active inputs, i.e. inputs that we want to
// keep mutating.
size_t NumActive() const { return records_.size(); }
// Returns the max and avg sizes of the inputs.
std::pair<size_t, size_t> MaxAndAvgSize() const;
// Returns a random active corpus record using weighted distribution.
// See WeightedDistribution.
const CorpusRecord &WeightedRandom(size_t random) const;
// Returns a random active corpus record using uniform distribution.
const CorpusRecord &UniformRandom(size_t random) const;
// Returns the element with index 'idx', where `idx` < NumActive().
const ByteArray &Get(size_t idx) const { return records_[idx].data; }
// Returns the execution metadata for the element `idx`, `idx` < NumActive().
const ExecutionMetadata &GetMetadata(size_t idx) const {
return records_[idx].metadata;
}
// Logging.
// Saves the corpus stats in JSON format to the `filepath` file, using `fs`
// for feature frequencies.
void DumpStatsToFile(const FeatureSet &fs, std::string_view filepath,
std::string_view description);
// Returns a string used for logging the corpus memory usage.
std::string MemoryUsageString() const;
private:
std::vector<CorpusRecord> records_;
// Maintains weights for elements of records_.
WeightedDistribution weighted_distribution_;
size_t num_pruned_ = 0;
};
// Coverage frontier is a set of PCs that are themselves covered, but some of
// adjacent PCs in the same function are not.
// This class identifies precise frontiers. Each frontier is assigned a weight.
// Frontier weight is a representation of how much code is behind the
// frontier. Therefore, it should be used to prioritize which frontier to focus
// first.
class CoverageFrontier {
public:
explicit CoverageFrontier(const BinaryInfo &binary_info)
: binary_info_(binary_info),
frontier_(binary_info.pc_table.size()),
frontier_weight_(binary_info.pc_table.size()) {}
// Computes the coverage frontier of `corpus`.
// Returns the number of functions in the frontier.
size_t Compute(const Corpus &corpus);
// Same as above.
size_t Compute(const std::vector<CorpusRecord> &corpus_records);
// Returns the number of functions in the frontier.
size_t NumFunctionsInFrontier() const { return num_functions_in_frontier_; }
// Returns true iff `idx` belongs to the frontier.
bool PcIndexIsFrontier(size_t idx) const {
CHECK_LT(idx, MaxPcIndex());
return frontier_[idx];
}
// Returns the size of the pc_table used to create `this`.
size_t MaxPcIndex() const { return binary_info_.pc_table.size(); }
// Returns the frontier weight of pc at `idx`, weight of a non-frontier is 0.
uint64_t FrontierWeight(size_t idx) const {
CHECK_LT(idx, MaxPcIndex());
return frontier_weight_[idx];
}
private:
const BinaryInfo &binary_info_;
// frontier_[idx] is true iff pc_table_[i] is part of the coverage frontier.
std::vector<bool> frontier_;
// Stores the weight associated with frontier_[idx].
std::vector<uint64_t> frontier_weight_;
// The number of functions in the frontier.
size_t num_functions_in_frontier_ = 0;
};
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_CORPUS_H_

View File

@ -0,0 +1,157 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/corpus_io.h"
#include <cstddef>
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/status/status.h"
#include "absl/time/time.h"
#include "absl/types/span.h"
#include "./centipede/feature.h"
#include "./centipede/rusage_profiler.h"
#include "./centipede/util.h"
#include "./common/blob_file.h"
#include "./common/defs.h"
#include "./common/hash.h"
#include "./common/logging.h"
#include "./common/remote_file.h"
namespace fuzztest::internal {
void ReadShard(std::string_view corpus_path, std::string_view features_path,
const std::function<void(ByteArray, FeatureVec)> &callback) {
const bool good_corpus_path =
!corpus_path.empty() && RemotePathExists(corpus_path);
const bool good_features_path =
!features_path.empty() && RemotePathExists(features_path);
if (!good_corpus_path) {
LOG(WARNING) << "Corpus file path empty or not found - returning: "
<< corpus_path;
return;
}
RPROF_THIS_FUNCTION_WITH_TIMELAPSE( //
/*enable=*/ABSL_VLOG_IS_ON(10), //
/*timelapse_interval=*/absl::Seconds(30), //
/*also_log_timelapses=*/false);
// Maps input hashes to inputs.
// NOTE: Using `std::multimap` to prevent auto-deduplication of inputs.
// TODO(ussuri): This is the legacy behavior. At least one test relies on
// it (but doesn't really need it). Investigate and switch to
// `absl::flat_hash_map`.
std::multimap<std::string /*hash*/, ByteArray /*input*/> hash_to_input;
// Read inputs from the corpus file into `hash_to_input`.
auto corpus_reader = DefaultBlobFileReaderFactory();
CHECK_OK(corpus_reader->Open(corpus_path)) << VV(corpus_path);
ByteSpan blob;
while (corpus_reader->Read(blob).ok()) {
std::string hash = Hash(blob);
ByteArray input{blob.begin(), blob.end()};
hash_to_input.emplace(std::move(hash), std::move(input));
}
RPROF_SNAPSHOT("Read inputs");
// Input counts of various kinds (for logging).
const size_t num_inputs = hash_to_input.size();
size_t num_inputs_missing_features = num_inputs;
size_t num_inputs_empty_features = 0;
size_t num_inputs_non_empty_features = 0;
// If the features file is not passed or doesn't exist, simply ignore it.
if (!good_features_path) {
LOG(WARNING) << "Features file path empty or not found - ignoring: "
<< features_path;
} else {
// Read features from the features file. For each feature, find a matching
// input in `hash_to_input`, call `callback` for the pair, and remove the
// entry from `hash_to_input`. In the end, `hash_to_input` will contain
// only inputs without matching features.
auto features_reader = DefaultBlobFileReaderFactory();
CHECK_OK(features_reader->Open(features_path)) << VV(features_path);
ByteSpan hash_and_features;
while (features_reader->Read(hash_and_features).ok()) {
// Every valid feature record must contain the hash at the end.
// Ignore this record if it is too short.
if (hash_and_features.size() < kHashLen) continue;
FeatureVec features;
std::string hash = UnpackFeaturesAndHash(hash_and_features, &features);
auto input_node = hash_to_input.extract(hash);
if (!input_node.empty()) {
--num_inputs_missing_features;
if (features.empty()) {
// When the features file got created, Centipede did compute features
// for the input, but they came up empty. Indicate to the client that
// there is no need to recompute by passing this special value.
features = {feature_domains::kNoFeature};
++num_inputs_empty_features;
} else {
++num_inputs_non_empty_features;
}
callback(std::move(input_node.mapped()), std::move(features));
}
}
RPROF_SNAPSHOT("Read features & reported input/features pairs");
}
// Finally, call `callback` on the remaining inputs without matching features.
// This also automatically covers the features file not passed or missing.
for (auto &&[hash, input] : hash_to_input) {
// Indicate to the client that it needs to recompute features for this input
// by passing an empty value.
callback(std::move(input), {});
}
RPROF_SNAPSHOT("Reported inputs with no matching features");
VLOG(1) //
<< "Finished shard reading:\n"
<< "Corpus path : " << corpus_path << "\n"
<< "Features path : " << features_path << "\n"
<< "Inputs : " << num_inputs << "\n"
<< "Inputs, non-empty features : " << num_inputs_non_empty_features
<< "\n"
<< "Inputs, empty features : " << num_inputs_empty_features << "\n"
<< "Inputs, missing features : " << num_inputs_missing_features;
}
void ExportCorpus(absl::Span<const std::string> sharded_file_paths,
std::string_view out_dir) {
LOG(INFO) << "Exporting corpus to " << out_dir;
for (const std::string &file : sharded_file_paths) {
auto reader = DefaultBlobFileReaderFactory();
CHECK_OK(reader->Open(file)) << VV(file);
ByteSpan blob;
size_t num_read = 0;
while (reader->Read(blob).ok()) {
++num_read;
WriteToRemoteHashedFileInDir(out_dir, blob);
}
LOG(INFO) << "Exported " << num_read << " inputs from " << file;
}
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,53 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_SHARD_READER_H_
#define THIRD_PARTY_CENTIPEDE_SHARD_READER_H_
#include <functional>
#include <string>
#include <string_view>
#include "absl/types/span.h"
#include "./centipede/feature.h"
#include "./common/defs.h"
namespace fuzztest::internal {
// `corpus_path` is a path to a BlobFile with corpus elements (inputs). If the
// path is empty or non-existent, no processing is done.
//
// `features_path` is a path to a BlobFile with {features/hash} pairs created by
// `PackFeaturesAndHash()`. If the path is empty or non-existent, an empty
// `FeatureVec` is passed to every call of `callback`.
//
// For every {features/hash} pair we need to find an input with this hash.
// This function reads `corpus_path` and `features_path` and calls `callback`
// on every pair {input, features}.
//
// If features are not found for a given input, callback's 2nd argument is {}.
//
// If features are found for a given input but are empty,
// then callback's 2nd argument is {feature_domains::kNoFeature}.
void ReadShard(std::string_view corpus_path, std::string_view features_path,
const std::function<void(ByteArray, FeatureVec)> &callback);
// Unpacks the corpus from `sharded_file_paths` and writes each input to an
// individual file in `out_dir`. The file names are the inputs' hashes.
void ExportCorpus(absl::Span<const std::string> sharded_file_paths,
std::string_view out_dir);
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_SHARD_READER_H_

View File

@ -0,0 +1,128 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/corpus_io.h"
#include <filesystem> // NOLINT
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/log/check.h"
#include "absl/types/span.h"
#include "./centipede/corpus.h"
#include "./centipede/feature.h"
#include "./centipede/util.h"
#include "./centipede/workdir.h"
#include "./common/blob_file.h"
#include "./common/defs.h"
#include "./common/test_util.h"
namespace fuzztest::internal {
namespace {
using ::testing::UnorderedElementsAre;
void WriteBlobsToFile(std::string_view blob_file_path,
absl::Span<const ByteArray> blobs) {
auto writer = DefaultBlobFileWriterFactory();
CHECK_OK(writer->Open(blob_file_path, "w"));
for (const ByteArray& blob : blobs) {
CHECK_OK(writer->Write(blob));
}
CHECK_OK(writer->Close());
}
std::vector<ByteArray> ReadInputsFromFiles(std::string_view dir) {
std::vector<ByteArray> inputs;
for (const auto& file : std::filesystem::directory_iterator(dir)) {
ByteArray input;
ReadFromLocalFile(file.path().c_str(), input);
inputs.push_back(std::move(input));
}
return inputs;
}
TEST(ReadShardTest, ReadsInputsAndFeaturesAndCallsCallbackForEachPair) {
ByteArray data1 = {1, 2, 3};
ByteArray data2 = {3, 4, 5, 6};
ByteArray data3 = {7, 8, 9, 10, 11};
ByteArray data4 = {12, 13, 14};
ByteArray data5 = {15, 16};
FeatureVec fv1 = {100, 200, 300};
FeatureVec fv2 = {300, 400, 500, 600};
FeatureVec fv3 = {700, 800, 900, 1000, 1100};
FeatureVec fv4 = {}; // empty.
std::vector<ByteArray> corpus_blobs;
corpus_blobs.push_back(data1);
corpus_blobs.push_back(data2);
corpus_blobs.push_back(data3);
corpus_blobs.push_back(data4);
corpus_blobs.push_back(data5);
std::vector<ByteArray> features_blobs;
features_blobs.push_back(PackFeaturesAndHash(data1, fv1));
features_blobs.push_back(PackFeaturesAndHash(data2, fv2));
features_blobs.push_back(PackFeaturesAndHash(data3, fv3));
features_blobs.push_back(PackFeaturesAndHash(data4, fv4));
TempDir tmp_dir{test_info_->name()};
std::string corpus_path = tmp_dir.GetFilePath("corpus");
std::string features_path = tmp_dir.GetFilePath("features");
WriteBlobsToFile(corpus_path, corpus_blobs);
WriteBlobsToFile(features_path, features_blobs);
std::vector<CorpusRecord> res;
ReadShard(corpus_path, features_path,
[&res](const ByteArray& input, const FeatureVec& features) {
res.push_back(CorpusRecord{input, features});
});
EXPECT_EQ(res.size(), 5UL);
EXPECT_EQ(res[0].data, data1);
EXPECT_EQ(res[1].data, data2);
EXPECT_EQ(res[2].data, data3);
EXPECT_EQ(res[3].data, data4);
EXPECT_EQ(res[4].data, data5);
EXPECT_EQ(res[0].features, fv1);
EXPECT_EQ(res[1].features, fv2);
EXPECT_EQ(res[2].features, fv3);
EXPECT_EQ(res[3].features, FeatureVec{feature_domains::kNoFeature});
EXPECT_EQ(res[4].features, FeatureVec());
}
TEST(ExportCorpusTest, ExportsCorpusToIndividualFiles) {
const std::filesystem::path temp_dir = GetTestTempDir(test_info_->name());
const std::filesystem::path out_dir = temp_dir / "out_dir";
CHECK(std::filesystem::create_directory(out_dir));
const WorkDir workdir{temp_dir.c_str(), "fake_binary_name",
"fake_binary_hash", /*my_shard_index=*/0};
const auto corpus_file_paths = workdir.CorpusFilePaths();
WriteBlobsToFile(corpus_file_paths.Shard(0), {ByteArray{1, 2}, ByteArray{3}});
WriteBlobsToFile(corpus_file_paths.Shard(1), {ByteArray{4}, ByteArray{5, 6}});
ExportCorpus({corpus_file_paths.Shard(0), corpus_file_paths.Shard(1)},
out_dir.c_str());
EXPECT_THAT(ReadInputsFromFiles(out_dir.c_str()),
UnorderedElementsAre(ByteArray{1, 2}, ByteArray{3}, ByteArray{4},
ByteArray{5, 6}));
}
} // namespace
} // namespace fuzztest::internal

View File

@ -0,0 +1,405 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/corpus.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <filesystem> // NOLINT
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "./centipede/binary_info.h"
#include "./centipede/call_graph.h"
#include "./centipede/control_flow.h"
#include "./centipede/feature.h"
#include "./centipede/feature_set.h"
#include "./centipede/pc_info.h"
#include "./centipede/util.h"
#include "./common/defs.h"
#include "./common/test_util.h"
namespace fuzztest::internal {
namespace {
TEST(Corpus, GetCmpData) {
PCTable pc_table(100);
CFTable cf_table(100);
BinaryInfo bin_info{pc_table, {}, cf_table, {}, {}, {}};
CoverageFrontier coverage_frontier(bin_info);
FeatureSet fs(3, {});
Corpus corpus;
ByteArray cmp_data{2, 0, 1, 2, 3};
FeatureVec features1 = {10, 20, 30};
fs.IncrementFrequencies(features1);
corpus.Add({1}, features1, /*metadata=*/{cmp_data}, fs, coverage_frontier);
EXPECT_EQ(corpus.NumActive(), 1);
EXPECT_EQ(corpus.GetMetadata(0).cmp_data, cmp_data);
}
TEST(Corpus, PrintStats) {
const std::filesystem::path test_tmpdir = GetTestTempDir(test_info_->name());
PCTable pc_table(100);
CFTable cf_table(100);
BinaryInfo bin_info{pc_table, {}, cf_table, {}, {}, {}};
CoverageFrontier coverage_frontier(bin_info);
FeatureSet fs(3, {});
Corpus corpus;
FeatureVec features1 = {10, 20, 30};
FeatureVec features2 = {20, 40};
fs.IncrementFrequencies(features1);
corpus.Add({1, 2, 3}, features1, {}, fs, coverage_frontier);
fs.IncrementFrequencies(features2);
corpus.Add({4, 5}, features2, {}, fs, coverage_frontier);
const std::string stats_filepath = test_tmpdir / "corpus.txt";
corpus.DumpStatsToFile(fs, stats_filepath, "Test corpus");
std::string stats_file_contents;
ReadFromLocalFile(stats_filepath, stats_file_contents);
EXPECT_EQ(stats_file_contents,
R"(# Test corpus
{
"num_inputs": 2,
"corpus_stats": [
{"size": 3, "frequencies": [1, 2, 1]},
{"size": 2, "frequencies": [2, 1]}
]
}
)");
}
TEST(Corpus, Prune) {
// Prune will remove an input if all of its features appear at least 3 times.
PCTable pc_table(100);
CFTable cf_table(100);
BinaryInfo bin_info{pc_table, {}, cf_table, {}, {}, {}};
CoverageFrontier coverage_frontier(bin_info);
FeatureSet fs(3, {});
Corpus corpus;
Rng rng(0);
size_t max_corpus_size = 1000;
auto Add = [&](const CorpusRecord &record) {
fs.IncrementFrequencies(record.features);
corpus.Add(record.data, record.features, {}, fs, coverage_frontier);
};
auto VerifyActiveInputs = [&](std::vector<ByteArray> expected_inputs) {
std::vector<ByteArray> observed_inputs;
for (size_t i = 0, n = corpus.NumActive(); i < n; i++) {
observed_inputs.push_back(corpus.Get(i));
}
std::sort(observed_inputs.begin(), observed_inputs.end());
std::sort(expected_inputs.begin(), expected_inputs.end());
EXPECT_EQ(observed_inputs, expected_inputs);
};
Add({{0}, {20, 40}});
Add({{1}, {20, 30}});
Add({{2}, {30, 40}});
Add({{3}, {40, 50}});
Add({{4}, {10, 20}});
// Prune. Features 20 and 40 are frequent => input {0} will be removed.
EXPECT_EQ(corpus.NumActive(), 5);
EXPECT_EQ(corpus.Prune(fs, coverage_frontier, max_corpus_size, rng), 1);
EXPECT_EQ(corpus.NumActive(), 4);
EXPECT_EQ(corpus.NumTotal(), 5);
VerifyActiveInputs({{1}, {2}, {3}, {4}});
Add({{5}, {30, 60}});
EXPECT_EQ(corpus.NumTotal(), 6);
// Prune. Feature 30 is now frequent => inputs {1} and {2} will be removed.
EXPECT_EQ(corpus.NumActive(), 5);
EXPECT_EQ(corpus.Prune(fs, coverage_frontier, max_corpus_size, rng), 2);
EXPECT_EQ(corpus.NumActive(), 3);
VerifyActiveInputs({{3}, {4}, {5}});
// Test with smaller max_corpus_size values.
EXPECT_EQ(corpus.Prune(fs, coverage_frontier, 3, rng), 0);
EXPECT_EQ(corpus.NumActive(), 3);
EXPECT_EQ(corpus.Prune(fs, coverage_frontier, 2, rng), 1);
EXPECT_EQ(corpus.NumActive(), 2);
EXPECT_EQ(corpus.Prune(fs, coverage_frontier, 1, rng), 1);
EXPECT_EQ(corpus.NumActive(), 1);
EXPECT_DEATH(corpus.Prune(fs, coverage_frontier, 0, rng),
"max_corpus_size"); // CHECK-fail.
EXPECT_EQ(corpus.NumTotal(), 6);
}
// Regression test for a crash in Corpus::Prune().
TEST(Corpus, PruneRegressionTest1) {
PCTable pc_table(100);
CFTable cf_table(100);
BinaryInfo bin_info{pc_table, {}, cf_table, {}, {}, {}};
CoverageFrontier coverage_frontier(bin_info);
FeatureSet fs(2, {});
Corpus corpus;
Rng rng(0);
size_t max_corpus_size = 1000;
auto Add = [&](const CorpusRecord &record) {
fs.IncrementFrequencies(record.features);
corpus.Add(record.data, record.features, {}, fs, coverage_frontier);
};
Add({{1}, {10, 20}});
Add({{2}, {10}});
corpus.Prune(fs, coverage_frontier, max_corpus_size, rng);
}
TEST(WeightedDistribution, WeightedDistribution) {
std::vector<uint64_t> freq;
WeightedDistribution wd;
const int kNumIter = 10000;
auto set_weights = [&](const std::vector<uint64_t> &weights) {
wd.clear();
for (auto weight : weights) {
wd.AddWeight(weight);
}
};
auto compute_freq = [&]() {
freq.clear();
freq.resize(wd.size());
// We use numbers in [0, kNumIter) instead of random numbers
// for simplicity.
for (int i = 0; i < kNumIter; i++) {
freq[wd.RandomIndex(i)]++;
}
};
set_weights({1, 1});
compute_freq();
EXPECT_EQ(freq[0], kNumIter / 2);
EXPECT_EQ(freq[1], kNumIter / 2);
set_weights({1, 2});
compute_freq();
EXPECT_GT(freq[0], kNumIter / 4);
EXPECT_LT(freq[0], kNumIter / 2);
EXPECT_GT(freq[1], kNumIter / 2);
set_weights({10, 100, 1});
compute_freq();
EXPECT_LT(9 * freq[2], freq[0]);
EXPECT_LT(9 * freq[0], freq[1]);
set_weights({0, 1, 2});
compute_freq();
EXPECT_EQ(freq[0], 0);
EXPECT_GT(freq[2], freq[1]);
set_weights({2, 1, 0});
compute_freq();
EXPECT_EQ(freq[2], 0);
EXPECT_GT(freq[0], freq[1]);
// Test ChangeWeight
set_weights({1, 2, 3, 4, 5});
compute_freq();
EXPECT_GT(freq[4], freq[3]);
EXPECT_GT(freq[3], freq[2]);
EXPECT_GT(freq[2], freq[1]);
EXPECT_GT(freq[1], freq[0]);
wd.ChangeWeight(2, 1);
// Calling RandomIndex() after ChangeWeight() w/o calling
// RecomputeInternalState() should crash.
EXPECT_DEATH(compute_freq(), "");
wd.RecomputeInternalState();
// Weights: {1, 2, 1, 4, 5}
compute_freq();
EXPECT_GT(freq[4], freq[3]);
EXPECT_GT(freq[3], freq[2]);
EXPECT_LT(freq[2], freq[1]);
EXPECT_GT(freq[1], freq[0]);
// Weights: {1, 2, 1, 0, 5}
wd.ChangeWeight(3, 0);
wd.RecomputeInternalState();
compute_freq();
EXPECT_GT(freq[4], freq[1]);
EXPECT_GT(freq[1], freq[0]);
EXPECT_GT(freq[1], freq[2]);
EXPECT_EQ(freq[3], 0);
// Test PopBack().
wd.PopBack();
// Weights: {1, 2, 1, 0} after PopBack().
EXPECT_EQ(wd.size(), 4);
EXPECT_GT(freq[1], freq[0]);
EXPECT_GT(freq[1], freq[2]);
EXPECT_EQ(freq[3], 0);
// Stress test. If the algorithm is too slow, we may be able to catch it as a
// timeout.
wd.clear();
for (int i = 1; i < 100000; i++) {
wd.AddWeight(i);
}
compute_freq();
}
// TODO(ussuri): This is becoming difficult to maintain: various bits of the
// input data are stored in independent arrays, other bits are dynamically
// initialized, and the matching expected results are listed in two long chains
// of EXPECT's. I think it should be doable to refactor this to use something
// like a TestCase struct tying all that together, then iterate over test_cases
// once to populate pc_table etc, and a second time to e.g.
// EXPECT_EQ(frontier.PcIndexIsFrontier(i),
// test_cases[i].expected_is_frontier).
TEST(CoverageFrontier, Compute) {
// Function [0, 1): Fully covered.
// Function [1, 2): Not covered.
// Function [2, 4): Partially covered => has one frontier.
// Function [4, 6): Not covered.
// Function [6, 9): Partially covered => has one frontier.
// Function [9, 12): Fully covered.
// Function [12, 19): Partially covered => has two frontiers.
PCTable pc_table{{0, PCInfo::kFuncEntry}, // Covered.
{1, PCInfo::kFuncEntry},
{2, PCInfo::kFuncEntry}, // Covered.
{3, 0},
{4, PCInfo::kFuncEntry},
{5, 0},
{6, PCInfo::kFuncEntry}, // Covered.
{7, 0}, // Covered.
{8, 0},
{9, PCInfo::kFuncEntry}, // Covered.
{10, 0}, // Covered.
{11, 0}, // Covered.
{12, PCInfo::kFuncEntry}, // Covered.
{13, 0}, // Covered.
{14, 0}, // Covered.
{15, 0},
{16, 0}, // Covered.
{17, 0}, // Covered.
{18, 0}};
CFTable cf_table{
0, 0, 9, 0, // 0 calls 9.
1, 0, 6, 0, // 1 calls 6.
2, 3, 0, 0, // 2 calls 4 in bb 3.
3, 0, 4, 0, // This bb calls 4.
4, 5, 0, 0, // 4 calls 9 in bb 5.
5, 0, 9, 0, // This bb calls 9.
6, 7, 8, 0, 0, // 6 calls 2 and makes indirect call in bb 8.
7, 0, 0, 8, 0, 2, -1, 0, // This bb calls 2 and makes an indirect
// call.
9, 66, 10, 0, 0, // 9 calls no one. 9 has a successor (66) which is not
// in pc_table. This may happen as a result of pruning.
10, 11, 0, 0, 11, 0, 0, 12, 13, 14, 0, 0, // 12 call 9 and 99 in bb
// 15, and calls 4 in
// bb 18.
13, 15, 16, 0, 0, 14, 17, 18, 0, 0, 15, 19, 0, 9, 99,
0, // PC 15 goes to 19 that is not in pc_table. This bb calls 9 and 99.
16, 13, 0, 0, 17, 0, 0, 18, 0, 4, 0, // This bb calls 4.
19, 0, 0};
BinaryInfo bin_info = {pc_table, {}, cf_table, {},
ControlFlowGraph(), CallGraph()};
bin_info.control_flow_graph.InitializeControlFlowGraph(cf_table, pc_table);
bin_info.call_graph.InitializeCallGraph(cf_table, pc_table);
CoverageFrontier frontier(bin_info);
FeatureVec pcs(pc_table.size());
for (size_t i = 0; i < pc_table.size(); i++) {
pcs[i] = feature_domains::kPCs.ConvertToMe(i);
}
FeatureSet fs(100, {});
Corpus corpus;
auto Add = [&](feature_t feature) {
fs.IncrementFrequencies({feature});
corpus.Add({42}, {feature}, {}, fs, frontier);
};
// Add PC-based features.
for (size_t idx : {0, 2, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17}) {
Add(pcs[idx]);
}
// add some non-pc features.
for (size_t x : {1, 2, 3, 4}) {
Add(feature_domains::kUnknown.ConvertToMe(x));
}
// Compute and check the frontier.
EXPECT_EQ(frontier.Compute(corpus), 3);
EXPECT_EQ(frontier.NumFunctionsInFrontier(), 3);
EXPECT_FALSE(frontier.PcIndexIsFrontier(0));
EXPECT_FALSE(frontier.PcIndexIsFrontier(1));
EXPECT_TRUE(frontier.PcIndexIsFrontier(2));
EXPECT_FALSE(frontier.PcIndexIsFrontier(3));
EXPECT_FALSE(frontier.PcIndexIsFrontier(4));
EXPECT_FALSE(frontier.PcIndexIsFrontier(5));
EXPECT_TRUE(frontier.PcIndexIsFrontier(6));
EXPECT_FALSE(frontier.PcIndexIsFrontier(7));
EXPECT_FALSE(frontier.PcIndexIsFrontier(8));
EXPECT_FALSE(frontier.PcIndexIsFrontier(9));
EXPECT_FALSE(frontier.PcIndexIsFrontier(10));
EXPECT_FALSE(frontier.PcIndexIsFrontier(11));
EXPECT_FALSE(frontier.PcIndexIsFrontier(12));
EXPECT_TRUE(frontier.PcIndexIsFrontier(13));
EXPECT_TRUE(frontier.PcIndexIsFrontier(14));
EXPECT_FALSE(frontier.PcIndexIsFrontier(15));
EXPECT_FALSE(frontier.PcIndexIsFrontier(16));
EXPECT_FALSE(frontier.PcIndexIsFrontier(17));
EXPECT_FALSE(frontier.PcIndexIsFrontier(18));
// Check frontier weight.
EXPECT_EQ(frontier.FrontierWeight(0), 0);
EXPECT_EQ(frontier.FrontierWeight(1), 0);
EXPECT_EQ(frontier.FrontierWeight(2), 153);
EXPECT_EQ(frontier.FrontierWeight(3), 0);
EXPECT_EQ(frontier.FrontierWeight(4), 0);
EXPECT_EQ(frontier.FrontierWeight(5), 0);
EXPECT_EQ(frontier.FrontierWeight(6), 230);
EXPECT_EQ(frontier.FrontierWeight(7), 0);
EXPECT_EQ(frontier.FrontierWeight(8), 0);
EXPECT_EQ(frontier.FrontierWeight(9), 0);
EXPECT_EQ(frontier.FrontierWeight(10), 0);
EXPECT_EQ(frontier.FrontierWeight(11), 0);
EXPECT_EQ(frontier.FrontierWeight(12), 0);
EXPECT_EQ(frontier.FrontierWeight(13), 25);
EXPECT_EQ(frontier.FrontierWeight(14), 153);
EXPECT_EQ(frontier.FrontierWeight(15), 0);
EXPECT_EQ(frontier.FrontierWeight(16), 0);
EXPECT_EQ(frontier.FrontierWeight(17), 0);
EXPECT_EQ(frontier.FrontierWeight(18), 0);
}
TEST(CoverageFrontierDeath, InvalidIndexToFrontier) {
PCTable pc_table = {{0, PCInfo::kFuncEntry}, {1, 0}};
CFTable cf_table = {
0, 1, 0, 0, 1, 0, 0,
};
BinaryInfo bin_info = {pc_table, {}, cf_table, {},
ControlFlowGraph(), CallGraph()};
bin_info.control_flow_graph.InitializeControlFlowGraph(cf_table, pc_table);
bin_info.call_graph.InitializeCallGraph(cf_table, pc_table);
CoverageFrontier frontier(bin_info);
Corpus corpus;
frontier.Compute(corpus);
// Check with a non-existent idx.
EXPECT_DEATH(frontier.PcIndexIsFrontier(666), "");
EXPECT_DEATH(frontier.FrontierWeight(666), "");
}
} // namespace
} // namespace fuzztest::internal

View File

@ -0,0 +1,228 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/coverage.h"
#include <string.h>
#include <cstdint>
#include <limits>
#include <ostream>
#include <sstream>
#include <string>
#include <string_view>
#include <vector>
#include "absl/container/flat_hash_set.h"
#include "absl/log/check.h"
#include "absl/strings/str_split.h"
#include "absl/synchronization/mutex.h"
#include "./centipede/control_flow.h"
#include "./centipede/feature.h"
#include "./centipede/pc_info.h"
#include "./centipede/symbol_table.h"
#include "./common/remote_file.h"
#include "./common/status_macros.h"
namespace fuzztest::internal {
Coverage::Coverage(const PCTable &pc_table, const PCIndexVec &pci_vec)
: func_entries_(pc_table.size()),
fully_covered_funcs_vec_(pc_table.size()),
covered_pcs_vec_(pc_table.size()) {
CHECK_LT(pc_table.size(), std::numeric_limits<PCIndex>::max());
absl::flat_hash_set<PCIndex> covered_pcs(pci_vec.begin(), pci_vec.end());
// Iterate though all the pc_table entries.
// The first one is some function's kFuncEntry.
// Then find the next kFuncEntry or the table end.
// Everything in between corresponds to the current function.
// For fully (un)covered functions, add their entry PCIndex
// to fully_covered_funcs or uncovered_funcs correspondingly.
// For all others add them to partially_covered_funcs.
for (size_t this_func = 0; this_func < pc_table.size();) {
CHECK(pc_table[this_func].has_flag(PCInfo::kFuncEntry));
func_entries_[this_func] = true;
// Find next entry.
size_t next_func = this_func + 1;
while (next_func < pc_table.size() &&
!pc_table[next_func].has_flag(PCInfo::kFuncEntry)) {
next_func++;
}
// Collect covered and uncovered indices.
PartiallyCoveredFunction pcf;
for (size_t i = this_func; i < next_func; i++) {
if (covered_pcs.contains(i)) {
pcf.covered.push_back(i);
covered_pcs_vec_[i] = true;
} else {
pcf.uncovered.push_back(i);
}
}
// Put this function into one of
// {fully_covered_funcs, uncovered_funcs, partially_covered_funcs}
size_t num_func_pcs = next_func - this_func;
if (num_func_pcs == pcf.covered.size()) {
fully_covered_funcs.push_back(this_func);
fully_covered_funcs_vec_[this_func] = true;
} else if (pcf.covered.empty()) {
uncovered_funcs.push_back(this_func);
} else {
CHECK(!pcf.covered.empty());
CHECK(!pcf.uncovered.empty());
CHECK_EQ(pcf.covered.size() + pcf.uncovered.size(), num_func_pcs);
partially_covered_funcs.push_back(pcf);
}
// Move to the next function.
this_func = next_func;
}
}
void Coverage::DumpReportToFile(const SymbolTable &symbols,
std::string_view filepath,
std::string_view description) {
auto *file = ValueOrDie(RemoteFileOpen(filepath, "w"));
CHECK(file != nullptr) << "Failed to open file: " << filepath;
CHECK_OK(RemoteFileSetWriteBufferSize(file, 100UL * 1024 * 1024));
if (!description.empty()) {
CHECK_OK(RemoteFileAppend(file, "# "));
CHECK_OK(RemoteFileAppend(file, std::string{description}));
CHECK_OK(RemoteFileAppend(file, ":\n\n"));
}
// Print symbolized function names for all covered functions.
for (auto pc_index : fully_covered_funcs) {
CHECK_OK(RemoteFileAppend(file, "FULL: "));
CHECK_OK(RemoteFileAppend(file, symbols.full_description(pc_index)));
CHECK_OK(RemoteFileAppend(file, "\n"));
}
CHECK_OK(RemoteFileFlush(file));
// Same for uncovered functions.
for (auto pc_index : uncovered_funcs) {
CHECK_OK(RemoteFileAppend(file, "NONE: "));
CHECK_OK(RemoteFileAppend(file, symbols.full_description(pc_index)));
CHECK_OK(RemoteFileAppend(file, "\n"));
}
CHECK_OK(RemoteFileFlush(file));
// For every partially covered function, first print its name,
// then print its covered edges, then uncovered edges.
for (auto &pcf : partially_covered_funcs) {
CHECK_OK(RemoteFileAppend(file, "PARTIAL: "));
CHECK_OK(RemoteFileAppend(file, symbols.full_description(pcf.covered[0])));
CHECK_OK(RemoteFileAppend(file, "\n"));
for (auto pc_index : pcf.covered) {
CHECK_OK(RemoteFileAppend(file, " + "));
CHECK_OK(RemoteFileAppend(file, symbols.full_description(pc_index)));
CHECK_OK(RemoteFileAppend(file, "\n"));
}
for (auto pc_index : pcf.uncovered) {
CHECK_OK(RemoteFileAppend(file, " - "));
CHECK_OK(RemoteFileAppend(file, symbols.full_description(pc_index)));
CHECK_OK(RemoteFileAppend(file, "\n"));
}
}
CHECK_OK(RemoteFileFlush(file));
CHECK_OK(RemoteFileClose(file));
}
std::string CoverageLogger::ObserveAndDescribeIfNew(PCIndex pc_index) {
if (pc_table_.empty()) return ""; // Fast-path return (symbolization is off).
absl::MutexLock l(&mu_);
if (!observed_indices_.insert(pc_index).second) return "";
std::ostringstream os;
if (pc_index >= pc_table_.size()) {
os << "FUNC/EDGE index: " << pc_index;
} else {
os << (pc_table_[pc_index].has_flag(PCInfo::kFuncEntry) ? "FUNC: "
: "EDGE: ");
os << symbols_.full_description(pc_index);
if (!observed_descriptions_.insert(os.str()).second) return "";
}
return os.str();
}
FunctionFilter::FunctionFilter(std::string_view functions_to_filter,
const SymbolTable &symbols) {
// set pcs_[idx] to 1, for any idx that belongs to a filtered function.
// keep pcs_ empty, if no filtered functions are found in symbols.
for (auto &func : absl::StrSplit(functions_to_filter, ',')) {
for (size_t idx = 0, n = symbols.size(); idx < n; ++idx) {
if (func == symbols.func(idx)) {
if (pcs_.empty()) {
pcs_.resize(n);
}
pcs_[idx] = 1;
}
}
}
}
bool FunctionFilter::filter(const FeatureVec &features) const {
if (pcs_.empty()) return true;
for (auto feature : features) {
if (!feature_domains::kPCs.Contains(feature)) continue;
size_t idx = ConvertPCFeatureToPcIndex(feature);
// idx should normally be within the range. Ignore it if it's not.
if (idx >= pcs_.size()) continue;
if (pcs_[idx]) return true;
}
return false;
}
static uint8_t SelectMultiplierByCoverageKind(uint8_t uncovered_knob,
uint8_t partially_covered_knob,
uint8_t fully_covered_knob,
PCIndex callee_idx,
const Coverage &coverage) {
if (coverage.FunctionIsFullyCovered(callee_idx)) return fully_covered_knob;
if (coverage.BlockIsCovered(callee_idx)) return partially_covered_knob;
return uncovered_knob;
}
uint32_t ComputeFrontierWeight(const Coverage &coverage,
const ControlFlowGraph &cfg,
const std::vector<uintptr_t> &callees) {
// Multiplication factors for different coverage types.
// TODO(ussuri): replace with actual knobs (cl/486229527).
uint8_t uncovered_knob = 153; // ~ (255 * 0.6)
uint8_t partially_covered_knob = 77; // ~ (255 * 0.3)
uint8_t fully_covered_knob = 25; // ~ (255 * 0.1)
uint32_t weight = 0;
for (auto callee : callees) {
// TODO(ussuri): Figure out a better way for determining the complexity
// of indirect callee. For now using cyclomatic_comp = 1, and factor of
// non-covered callee.
if (callee == -1ULL) {
weight += uncovered_knob;
continue;
}
// This function's body is not in this DSO, like library functions. For now
// skipping it as we have no coverage kind (Fully/Partially covered or
// uncovered) and no complexity for it.
if (!cfg.IsInPcTable(callee)) continue;
// Retrieve cyclomatic complexity
auto cyclomatic_comp = cfg.GetCyclomaticComplexity(callee);
// Determine knob based on callee coverage kind.
auto callee_idx = cfg.GetPcIndex(callee);
CHECK(cfg.BlockIsFunctionEntry(callee_idx));
auto coverage_multiplier = SelectMultiplierByCoverageKind(
uncovered_knob, partially_covered_knob, fully_covered_knob, callee_idx,
coverage);
weight += coverage_multiplier * cyclomatic_comp;
}
return weight;
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,185 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_COVERAGE_H_
#define THIRD_PARTY_CENTIPEDE_COVERAGE_H_
#include <stddef.h>
#include <algorithm>
#include <cstdint>
#include <ostream>
#include <string>
#include <string_view>
#include <vector>
#include "absl/base/thread_annotations.h"
#include "absl/container/flat_hash_set.h"
#include "absl/log/check.h"
#include "absl/synchronization/mutex.h"
#include "./centipede/control_flow.h"
#include "./centipede/feature.h"
#include "./centipede/pc_info.h"
namespace fuzztest::internal {
class SymbolTable; // To avoid mutual inclusion with symbol_table.h.
// Reads and visualizes the code coverage produced by SanitizerCoverage.
// https://clang.llvm.org/docs/SanitizerCoverage.html
//
// Thread-compatible.
class Coverage {
public:
// PCTable is a property of the binary.
// PCIndexVec is the coverage obtained from specific execution(s).
Coverage(const PCTable &pc_table,
const PCIndexVec &pci_vec);
// Saves the by-function coverage in human-readable form to the `filepath`
// file, using `symbols` to translate PCs to function names.
void DumpReportToFile(const SymbolTable &symbols, std::string_view filepath,
std::string_view description = "");
// Returns true if the function is fully covered. pc_index is for a function
// entry.
bool FunctionIsFullyCovered(PCIndex pc_index) const {
CHECK(func_entries_[pc_index]);
return fully_covered_funcs_vec_[pc_index];
}
// Returns true if the given basic block is covered. pc_index is for any BB.
bool BlockIsCovered(PCIndex pc_index) const {
return covered_pcs_vec_[pc_index];
}
private:
// A vector of size PCTable. func_entries[idx] is true iff means the PC at idx
// is a function entry.
std::vector<bool> func_entries_;
// Vector of fully covered functions i.e. functions with all edges covered.
// A Function is represented by its entry block's PCIndex.
// TODO(kcc): fix private variables' name to match the code style.
PCIndexVec fully_covered_funcs;
// A vector of size PCTable. fully_covered_funcs_vec[idx] is true iff the PC
// at idx is an entry block of a fully covered function.
std::vector<bool> fully_covered_funcs_vec_;
// A vector of size PCTable. covered_pcs_vec[idx] is true iff the PC at idx is
// covered.
std::vector<bool> covered_pcs_vec_;
// Same as `fully_covered_funcs`, but for functions with no edges covered.
PCIndexVec uncovered_funcs;
// Partially covered function: function with some, but not all, edges covered.
// Thus we can represent it as two vectors of PCIndex: covered and uncovered.
struct PartiallyCoveredFunction {
PCIndexVec
covered; // Non-empty, covered[0] is function entry.
PCIndexVec uncovered; // Non-empty.
};
std::vector<PartiallyCoveredFunction> partially_covered_funcs;
};
// Iterates `pc_table`, calls `callback` on every pair {beg, end}, such that
// pc_table[beg] is PCInfo::kFuncEntry, and pc_table[beg + 1 : end] are not.
template <typename Callback>
void IteratePcTableFunctions(const PCTable &pc_table,
Callback callback) {
for (size_t beg = 0, n = pc_table.size(); beg < n;) {
if (pc_table[beg].has_flag(PCInfo::kFuncEntry)) {
size_t end = beg + 1;
while (end < n &&
!pc_table[end].has_flag(PCInfo::kFuncEntry)) {
++end;
}
callback(beg, end);
beg = end;
}
}
}
// CoverageLogger helps to log coverage locations once for each location.
// CoverageLogger is thread-safe.
class CoverageLogger {
public:
// CTOR.
// Lifetimes of `pc_table` and `symbols` should be longer than for `this`.
CoverageLogger(const PCTable &pc_table,
const SymbolTable &symbols)
: pc_table_(pc_table), symbols_(symbols) {}
// Checks if `pc_index` or its symbolized description was observed before.
// If yes, returns empty string.
// If this is the first observation, returns a symbolized description.
// If symbolization is not available, returns a non-symbolized description.
std::string ObserveAndDescribeIfNew(PCIndex pc_index);
private:
const PCTable &pc_table_;
const SymbolTable &symbols_;
absl::Mutex mu_;
absl::flat_hash_set<PCIndex> observed_indices_
ABSL_GUARDED_BY(mu_);
absl::flat_hash_set<std::string> observed_descriptions_ ABSL_GUARDED_BY(mu_);
};
// FunctionFilter maps a set of function names to a set of features.
class FunctionFilter {
public:
// Initialize the filter.
// `functions_to_filter` is a comma-separated list of function names.
// If a function name is found in `symbols`, the PCs from that function
// will be filtered.
FunctionFilter(std::string_view functions_to_filter,
const SymbolTable &symbols);
// Returns true if
// * some of the `features` are from feature_domains::kPC
// and belong to a filtered function.
// * either `functions_to_filter` or `symbols` passed to CTOR was empty.
bool filter(const FeatureVec &features) const;
// Counts PCs that belong to filtered functions. Test-only.
size_t count() const { return std::count(pcs_.begin(), pcs_.end(), 1); }
private:
// pcs_[idx]==1 means that the PC at idx belongs to the filtered function.
// We don't use vector<bool> for performance.
// We don't use a hash set, because CPU is more important here than RAM.
std::vector<uint8_t> pcs_;
};
// Computes the frontier weight. The weight is calculated based on the functions
// called in the non-covered side of the frontier. For each such callee, the
// cyclomatic complexity (CC) of the callee is multiplied by a factor (MF)
// where MF is determined based on the coverage type of callee:
//
// frontier_weight = 0
// for f in callees_of_non_covered_successor_bb:
// frontier_weight += CC(f) * MF(f)
//
// The breakdown for MF based on the coverage type of callee is as follows
// (subject to change):
// - Non-covered: %60
// - Partially-covered: %30
// - Fully-covered: %10
// Non-covered callee gets the highest MF as it is very interesting to
// get it covered. That said, going to partially or even fully covered callee
// still have some value as it may trigger new state there.
uint32_t ComputeFrontierWeight(const Coverage &coverage,
const ControlFlowGraph &cfg,
const std::vector<uintptr_t> &callees);
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_COVERAGE_H_

View File

@ -0,0 +1,541 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/coverage.h"
#include <stdio.h>
#include <unistd.h>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <filesystem> // NOLINT
#include <sstream>
#include <string>
#include <string_view>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/container/flat_hash_set.h"
#include "./centipede/binary_info.h"
#include "./centipede/control_flow.h"
#include "./centipede/environment.h"
#include "./centipede/feature.h"
#include "./centipede/pc_info.h"
#include "./centipede/symbol_table.h"
#include "./centipede/test_coverage_util.h"
#include "./centipede/thread_pool.h"
#include "./centipede/util.h"
#include "./common/test_util.h"
namespace fuzztest::internal {
namespace {
// llvm-symbolizer output for a binary with 3 functions:
// A, BB, CCC.
// A and BB have one control flow edge each.
// CCC has 3 edges.
const char *symbolizer_output =
"A\n"
"a.cc:1:0\n"
"\n"
"BB\n"
"bb.cc:1:0\n"
"\n"
"CCC\n"
"ccc.cc:1:0\n"
"\n"
"CCC\n"
"ccc.cc:2:0\n"
"\n"
"CCC\n"
"ccc.cc:3:0\n"
"\n"
"CCC\n"
"ccc.cc:3:0\n" // same as the previous entry
"\n";
// PCTable that corresponds to symbolizer_output above.
static const PCTable g_pc_table = {
{100, PCInfo::kFuncEntry},
{200, PCInfo::kFuncEntry},
{300, PCInfo::kFuncEntry},
{400, 0},
{500, 0},
{600, 0},
};
// Tests Coverage and SymbolTable together.
TEST(Coverage, SymbolTable) {
const std::filesystem::path test_dir = GetTestTempDir(test_info_->name());
// Initialize and test SymbolTable.
SymbolTable symbols;
std::istringstream iss(symbolizer_output);
symbols.ReadFromLLVMSymbolizer(iss);
EXPECT_EQ(symbols.size(), 6U);
EXPECT_EQ(symbols.func(1), "BB");
EXPECT_EQ(symbols.location(2), "ccc.cc:1:0");
EXPECT_EQ(symbols.full_description(0), "A a.cc:1:0");
EXPECT_EQ(symbols.full_description(4), "CCC ccc.cc:3:0");
{
// Tests coverage output for PCIndexVec = {0, 2},
// i.e. the covered edges are 'A' and the entry of 'CCC'.
Coverage cov(g_pc_table, {0, 2});
cov.DumpReportToFile(symbols, (test_dir / "coverage.txt").string());
std::string str;
ReadFromLocalFile((test_dir / "coverage.txt").string(), str);
EXPECT_THAT(str, testing::HasSubstr("FULL: A a.cc:1:0"));
EXPECT_THAT(str, testing::HasSubstr("NONE: BB bb.cc:1:0"));
EXPECT_THAT(str, testing::HasSubstr("PARTIAL: CCC ccc.cc:1:0"));
EXPECT_THAT(str, testing::HasSubstr("+ CCC ccc.cc:1:0"));
EXPECT_THAT(str, testing::HasSubstr("- CCC ccc.cc:2:0"));
EXPECT_THAT(str, testing::HasSubstr("- CCC ccc.cc:3:0"));
}
{
// Same as above, but for PCIndexVec = {1, 2, 3},
Coverage cov(g_pc_table, {1, 2, 3});
cov.DumpReportToFile(symbols, (test_dir / "coverage.txt").string());
std::string str;
ReadFromLocalFile((test_dir / "coverage.txt").string(), str);
EXPECT_THAT(str, testing::HasSubstr("FULL: BB bb.cc:1:0"));
EXPECT_THAT(str, testing::HasSubstr("NONE: A a.cc:1:0"));
EXPECT_THAT(str, testing::HasSubstr("PARTIAL: CCC ccc.cc:1:0"));
EXPECT_THAT(str, testing::HasSubstr("+ CCC ccc.cc:1:0"));
EXPECT_THAT(str, testing::HasSubstr("+ CCC ccc.cc:2:0"));
EXPECT_THAT(str, testing::HasSubstr("- CCC ccc.cc:3:0"));
}
symbols.SetAllToUnknown(2);
EXPECT_EQ(symbols.size(), 2);
EXPECT_EQ(symbols.full_description(0), "? ?");
EXPECT_EQ(symbols.full_description(1), "? ?");
}
TEST(Coverage, CoverageLoad) {
Coverage cov(g_pc_table, {0, 2, 4, 5});
EXPECT_TRUE(cov.BlockIsCovered(0));
EXPECT_FALSE(cov.BlockIsCovered(1));
EXPECT_TRUE(cov.BlockIsCovered(2));
EXPECT_FALSE(cov.BlockIsCovered(3));
EXPECT_TRUE(cov.BlockIsCovered(4));
EXPECT_TRUE(cov.BlockIsCovered(5));
EXPECT_TRUE(cov.FunctionIsFullyCovered(0));
EXPECT_FALSE(cov.FunctionIsFullyCovered(1));
EXPECT_FALSE(cov.FunctionIsFullyCovered(2));
}
TEST(Coverage, CoverageLogger) {
SymbolTable symbols;
std::istringstream iss(symbolizer_output);
symbols.ReadFromLLVMSymbolizer(iss);
CoverageLogger logger(g_pc_table, symbols);
// First time logging pc_index=0.
EXPECT_EQ(logger.ObserveAndDescribeIfNew(0), "FUNC: A a.cc:1:0");
// Second time logger pc_index=0.
EXPECT_EQ(logger.ObserveAndDescribeIfNew(0), "");
// First time logging pc_index=4.
EXPECT_EQ(logger.ObserveAndDescribeIfNew(4), "EDGE: CCC ccc.cc:3:0");
// First time logging pc_index=5, but it produces the same description as
// pc_index=4, and so the result is empty.
EXPECT_EQ(logger.ObserveAndDescribeIfNew(5), "");
// Logging with pc_index out of bounds. Second time gives empty result.
EXPECT_EQ(logger.ObserveAndDescribeIfNew(42), "FUNC/EDGE index: 42");
EXPECT_EQ(logger.ObserveAndDescribeIfNew(42), "");
CoverageLogger concurrently_used_logger(g_pc_table, symbols);
auto cb = [&]() {
for (int i = 0; i < 1000; i++) {
PCIndex pc_index = i % g_pc_table.size();
logger.ObserveAndDescribeIfNew(pc_index);
}
};
{
ThreadPool threads{2};
threads.Schedule(cb);
threads.Schedule(cb);
} // The threads join here.
}
// Returns path to test_fuzz_target.
static std::string GetTargetPath() {
return GetDataDependencyFilepath("centipede/testing/test_fuzz_target");
}
// Returns path to threaded_fuzz_target.
static std::string GetThreadedTargetPath() {
return GetDataDependencyFilepath("centipede/testing/threaded_fuzz_target");
}
// Tests coverage collection on test_fuzz_target
// using two inputs that trigger different code paths.
TEST(Coverage, CoverageFeatures) {
// Prepare the inputs.
Environment env;
env.binary = GetTargetPath();
auto features = RunInputsAndCollectCoverage(env, {"func1", "func2-A"});
EXPECT_EQ(features.size(), 2);
EXPECT_NE(features[0], features[1]);
// Get pc_table and symbols.
bool uses_legacy_trace_pc_instrumentation = {};
BinaryInfo binary_info;
binary_info.InitializeFromSanCovBinary(
GetTargetPath(), GetObjDumpPath(), GetLLVMSymbolizerPath(),
GetTestTempDir(test_info_->name()).string());
const auto &pc_table = binary_info.pc_table;
EXPECT_FALSE(uses_legacy_trace_pc_instrumentation);
const SymbolTable &symbols = binary_info.symbols;
// pc_table and symbols should have the same size.
EXPECT_EQ(pc_table.size(), symbols.size());
// Check what's covered.
// Both inputs should cover LLVMFuzzerTestOneInput.
// Input[0] should cover SingleEdgeFunc and not MultiEdgeFunc.
// Input[1] - the other way around.
for (size_t input_idx = 0; input_idx < 2; input_idx++) {
size_t llvm_fuzzer_test_one_input_num_edges = 0;
size_t single_edge_func_num_edges = 0;
size_t multi_edge_func_num_edges = 0;
for (auto feature : features[input_idx]) {
if (!feature_domains::kPCs.Contains(feature)) continue;
auto pc_index = ConvertPCFeatureToPcIndex(feature);
single_edge_func_num_edges += symbols.func(pc_index) == "SingleEdgeFunc";
multi_edge_func_num_edges += symbols.func(pc_index) == "MultiEdgeFunc";
llvm_fuzzer_test_one_input_num_edges +=
symbols.func(pc_index) == "LLVMFuzzerTestOneInput";
}
EXPECT_GT(llvm_fuzzer_test_one_input_num_edges, 1);
if (input_idx == 0) {
// This input calls SingleEdgeFunc, but not MultiEdgeFunc.
EXPECT_EQ(single_edge_func_num_edges, 1);
EXPECT_EQ(multi_edge_func_num_edges, 0);
} else {
// This input calls MultiEdgeFunc, but not SingleEdgeFunc.
EXPECT_EQ(single_edge_func_num_edges, 0);
EXPECT_GT(multi_edge_func_num_edges, 1);
}
}
}
static FeatureVec ExtractDomainFeatures(const FeatureVec &features,
const feature_domains::Domain &domain) {
FeatureVec result;
for (auto feature : features) {
if (domain.Contains(feature)) {
result.push_back(feature);
}
}
return result;
}
// Tests data flow instrumentation and feature collection.
TEST(Coverage, DataFlowFeatures) {
Environment env;
env.binary = GetTargetPath();
auto features_g = RunInputsAndCollectCoverage(env, {"glob1", "glob2"});
auto features_c = RunInputsAndCollectCoverage(env, {"cons1", "cons2"});
for (auto &features : {features_g, features_c}) {
EXPECT_EQ(features.size(), 2);
// Dataflow features should be different.
EXPECT_NE(ExtractDomainFeatures(features[0], feature_domains::kDataFlow),
ExtractDomainFeatures(features[1], feature_domains::kDataFlow));
// But control flow features should be the same.
EXPECT_EQ(
ExtractDomainFeatures(features[0], feature_domains::k8bitCounters),
ExtractDomainFeatures(features[1], feature_domains::k8bitCounters));
}
}
// Tests feature collection for counters (--use_counter_features).
TEST(Coverage, CounterFeatures) {
Environment env;
env.binary = GetTargetPath();
// Inputs that generate the same PC coverage but different counters.
std::vector<std::string> inputs = {"cnt\x01", "cnt\x02", "cnt\x04", "cnt\x08",
"cnt\x10"};
const size_t n = inputs.size();
// Run with use_counter_features = true.
env.use_counter_features = true;
auto features = RunInputsAndCollectCoverage(env, inputs);
EXPECT_EQ(features.size(), n);
// Counter features should be different.
for (size_t i = 0; i < n; ++i) {
for (size_t j = i + 1; j < n; ++j) {
EXPECT_NE(
ExtractDomainFeatures(features[i], feature_domains::k8bitCounters),
ExtractDomainFeatures(features[j], feature_domains::k8bitCounters));
}
}
// Run with use_counter_features = false.
env.use_counter_features = false;
features = RunInputsAndCollectCoverage(env, inputs);
EXPECT_EQ(features.size(), n);
// Counter features should be the same now.
for (size_t i = 0; i < n; ++i) {
for (size_t j = i + 1; j < n; ++j) {
EXPECT_EQ(
ExtractDomainFeatures(features[i], feature_domains::k8bitCounters),
ExtractDomainFeatures(features[j], feature_domains::k8bitCounters));
}
}
}
// For each of {ABToCmpModDiff, ABToCmpHamming, ABToCmpDiffLog} verify that
// a) they create all possible values in [0,64)
// b) they don't create any other values.
// c) they are sufficiently different from each other, i.e. not using one of
// them as coverage signal may reduce the overall quality of signal.
TEST(Coverage, CMPFeatures) {
absl::flat_hash_set<uintptr_t> moddiff, hamming, difflog;
// clear all hash sets.
auto clear = [&]() {
moddiff.clear();
hamming.clear();
difflog.clear();
};
// verifies `value` < 64 and returns it.
auto must_be_6bit = [](uintptr_t value) {
EXPECT_LT(value, 64);
return value;
};
// inserts a value into all hash sets.
auto update = [&](uintptr_t a, uintptr_t b) {
moddiff.insert(must_be_6bit(ABToCmpModDiff(a, b)));
hamming.insert(must_be_6bit(ABToCmpHamming(a, b)));
difflog.insert(must_be_6bit(ABToCmpDiffLog(a, b)));
};
// Check moddiff.
clear();
for (uintptr_t a = 0; a <= 64; ++a) {
uintptr_t b = 32;
if (a == b) continue;
update(a, b);
}
EXPECT_EQ(moddiff.size(), 64);
EXPECT_EQ(hamming.size(), 6);
EXPECT_EQ(difflog.size(), 6);
// Check hamming.
clear();
for (uintptr_t bits = 0; bits < 64; ++bits) {
uintptr_t minus_one = -1;
uintptr_t a = minus_one << bits;
update(a, 0);
}
EXPECT_EQ(moddiff.size(), 6);
EXPECT_EQ(hamming.size(), 64);
EXPECT_EQ(difflog.size(), 1);
// Check difflog.
clear();
for (uintptr_t bits = 0; bits < 64; ++bits) {
uintptr_t a = 1ULL << bits;
uintptr_t b = 0;
update(a, b);
}
EXPECT_EQ(moddiff.size(), 7);
EXPECT_EQ(hamming.size(), 1);
EXPECT_EQ(difflog.size(), 64);
}
// Tests CMP tracing and feature collection.
TEST(Coverage, CMPFeaturesExecute) {
Environment env;
env.binary = GetTargetPath();
auto features =
RunInputsAndCollectCoverage(env, {"cmpAAAAAAAA", "cmpAAAABBBB"});
EXPECT_EQ(features.size(), 2);
// CMP features should be different.
EXPECT_NE(ExtractDomainFeatures(features[0], feature_domains::kCMPEq),
ExtractDomainFeatures(features[1], feature_domains::kCMPEq));
EXPECT_NE(ExtractDomainFeatures(features[0], feature_domains::kCMPModDiff),
ExtractDomainFeatures(features[1], feature_domains::kCMPModDiff));
EXPECT_NE(ExtractDomainFeatures(features[0], feature_domains::kCMPHamming),
ExtractDomainFeatures(features[1], feature_domains::kCMPHamming));
EXPECT_NE(ExtractDomainFeatures(features[0], feature_domains::kCMPDiffLog),
ExtractDomainFeatures(features[1], feature_domains::kCMPDiffLog));
// But control flow features should be the same.
EXPECT_EQ(ExtractDomainFeatures(features[0], feature_domains::k8bitCounters),
ExtractDomainFeatures(features[1], feature_domains::k8bitCounters));
}
// Tests memcmp interceptor.
TEST(Coverage, CMPFeaturesFromMemcmp) {
Environment env;
env.binary = GetTargetPath();
auto features =
RunInputsAndCollectCoverage(env, {"mcmpAAAAAAAA", "mcmpAAAABBBB"});
EXPECT_EQ(features.size(), 2);
// CMP features should be different.
EXPECT_NE(ExtractDomainFeatures(features[0], feature_domains::kCMP),
ExtractDomainFeatures(features[1], feature_domains::kCMP));
// But control flow features should be the same.
EXPECT_EQ(ExtractDomainFeatures(features[0], feature_domains::k8bitCounters),
ExtractDomainFeatures(features[1], feature_domains::k8bitCounters));
}
TEST(Coverage, PathFeatures) {
Environment env;
env.binary = GetTargetPath();
env.path_level = 10;
// Inputs "pth123" and "pth321" generate different call sequences but exactly
// the same edge coverage. This test verifies that we can capture this.
auto features = RunInputsAndCollectCoverage(env, {"pth123", "pth321"});
EXPECT_EQ(features.size(), 2);
// Path features should be different.
EXPECT_NE(ExtractDomainFeatures(features[0], feature_domains::kBoundedPath),
ExtractDomainFeatures(features[1], feature_domains::kBoundedPath));
// But control flow features should be the same.
EXPECT_EQ(ExtractDomainFeatures(features[0], feature_domains::k8bitCounters),
ExtractDomainFeatures(features[1], feature_domains::k8bitCounters));
}
TEST(Coverage, FunctionFilter) {
// Initialize coverage data.
BinaryInfo binary_info;
binary_info.InitializeFromSanCovBinary(
GetTargetPath(), GetObjDumpPath(), GetLLVMSymbolizerPath(),
GetTestTempDir(test_info_->name()).string());
const PCTable &pc_table = binary_info.pc_table;
EXPECT_FALSE(binary_info.uses_legacy_trace_pc_instrumentation);
const DsoTable dso_table = {{GetTargetPath(), pc_table.size()}};
SymbolTable symbols;
symbols.GetSymbolsFromBinary(pc_table, dso_table, GetLLVMSymbolizerPath(),
GetTestTempDir(test_info_->name()).string());
// Empty filter.
FunctionFilter empty_filter("", symbols);
EXPECT_EQ(empty_filter.count(), 0);
// Single-function filter. The function has one PC.
FunctionFilter sing_edge_func_filter("SingleEdgeFunc", symbols);
EXPECT_EQ(sing_edge_func_filter.count(), 1);
// Another single-function filter. This function has several PCs.
FunctionFilter multi_edge_func_filter("MultiEdgeFunc", symbols);
EXPECT_GT(multi_edge_func_filter.count(), 1);
// Two-function-filter.
FunctionFilter both_func_filter("MultiEdgeFunc,SingleEdgeFunc", symbols);
EXPECT_GT(both_func_filter.count(), multi_edge_func_filter.count());
// Collect features from the test target by running 3 different inputs.
Environment env;
env.binary = GetTargetPath();
std::vector<FeatureVec> features =
RunInputsAndCollectCoverage(env, {"func1", "func2-A", "other"});
EXPECT_EQ(features.size(), 3);
auto &single = features[0];
auto &multi = features[1];
auto &other = features[2];
// Check the features against the different filters.
EXPECT_TRUE(empty_filter.filter(single));
EXPECT_TRUE(empty_filter.filter(multi));
EXPECT_TRUE(empty_filter.filter(other));
EXPECT_TRUE(sing_edge_func_filter.filter(single));
EXPECT_FALSE(sing_edge_func_filter.filter(multi));
EXPECT_FALSE(sing_edge_func_filter.filter(other));
EXPECT_FALSE(multi_edge_func_filter.filter(single));
EXPECT_TRUE(multi_edge_func_filter.filter(multi));
EXPECT_FALSE(multi_edge_func_filter.filter(other));
EXPECT_TRUE(both_func_filter.filter(single));
EXPECT_TRUE(both_func_filter.filter(multi));
EXPECT_FALSE(both_func_filter.filter(other));
}
TEST(Coverage, ThreadedTest) {
Environment env;
env.path_level = 10;
env.binary = GetThreadedTargetPath();
std::vector<FeatureVec> features =
RunInputsAndCollectCoverage(env, {"f", "fu", "fuz", "fuzz"});
EXPECT_EQ(features.size(), 4);
// For several pairs of inputs, check that their features in
// kPC and kBoundedPath are different.
for (size_t idx0 = 0; idx0 < 3; ++idx0) {
for (size_t idx1 = idx0 + 1; idx1 < 4; ++idx1) {
EXPECT_NE(ExtractDomainFeatures(features[idx0], feature_domains::kPCs),
ExtractDomainFeatures(features[idx1],
feature_domains::k8bitCounters));
EXPECT_NE(
ExtractDomainFeatures(features[idx0], feature_domains::kBoundedPath),
ExtractDomainFeatures(features[idx1], feature_domains::kBoundedPath));
}
}
}
TEST(FrontierWeight, ComputeFrontierWeight) {
PCTable g_pc_table{{0, PCInfo::kFuncEntry},
{1, PCInfo::kFuncEntry},
{2, 0},
{3, PCInfo::kFuncEntry},
{4, PCInfo::kFuncEntry}};
// A simple CF table, to get cyclomatic complexity of 1 for all functions.
CFTable g_cf_table{
0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 0, 0, 4, 0, 0,
};
Coverage g_coverage(g_pc_table, {0, 1});
ControlFlowGraph cfg;
cfg.InitializeControlFlowGraph(g_cf_table, g_pc_table);
std::vector<uintptr_t> callees1 = {0, 1, 3, 4};
std::vector<uintptr_t> callees2 = {0, 1};
std::vector<uintptr_t> callees3 = {0};
// PC 99 should have no effect on computed weight.
std::vector<uintptr_t> callees4 = {1, 3, 99};
auto weight1 = ComputeFrontierWeight(g_coverage, cfg, callees1);
ASSERT_EQ(weight1, 408);
auto weight2 = ComputeFrontierWeight(g_coverage, cfg, callees2);
ASSERT_EQ(weight2, 102);
auto weight3 = ComputeFrontierWeight(g_coverage, cfg, callees3);
ASSERT_EQ(weight3, 25);
auto weight4 = ComputeFrontierWeight(g_coverage, cfg, callees4);
ASSERT_EQ(weight4, 230);
}
TEST(FrontierWeightDeath, InvalidCallee) {
// Makes call to ComputeFrontierWeight with some non-function PCs.
PCTable g_pc_table{{0, PCInfo::kFuncEntry}, {1, 0}, {2, 0}};
CFTable g_cf_table{0, 1, 0, 0, 1, 2, 0, 0, 2, 0, 0};
Coverage g_coverage(g_pc_table, {0, 1});
ControlFlowGraph cfg;
cfg.InitializeControlFlowGraph(g_cf_table, g_pc_table);
EXPECT_DEATH(ComputeFrontierWeight(g_coverage, cfg, {0, 1}), "");
EXPECT_DEATH(ComputeFrontierWeight(g_coverage, cfg, {1, 2}), "");
}
} // namespace
} // namespace fuzztest::internal

View File

@ -0,0 +1,58 @@
// Copyright 2025 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/crash_summary.h"
#include <utility>
#include "absl/strings/str_format.h"
#include "./centipede/util.h"
#include "./common/defs.h"
namespace fuzztest::internal {
namespace {
ExternalCrashReporter external_crash_reporter = nullptr;
} // namespace
void CrashSummary::AddCrash(Crash crash) {
crashes_.push_back(std::move(crash));
}
void CrashSummary::Report(absl::FormatRawSink sink) const {
if (external_crash_reporter != nullptr) {
external_crash_reporter(*this);
}
absl::Format(sink, "=== Summary of detected crashes ===\n\n");
absl::Format(sink, "Binary ID : %s\n", binary_id());
absl::Format(sink, "Fuzz test : %s\n", fuzz_test());
absl::Format(sink, "Total crashes: %d\n\n", crashes().size());
int i = 0;
for (const Crash& crash : crashes()) {
absl::Format(sink, "Crash #%d:\n", ++i);
absl::Format(sink, " Crash ID : %s\n", crash.id);
absl::Format(sink, " Category : %s\n", crash.category);
absl::Format(sink, " Signature : %s\n",
AsPrintableString(AsByteSpan(crash.signature), 32));
absl::Format(sink, " Description: %s\n\n", crash.description);
}
absl::Format(sink, "=== End of summary of detected crashes ===\n\n");
}
void SetExternalCrashReporter(ExternalCrashReporter reporter) {
external_crash_reporter = reporter;
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,84 @@
// Copyright 2025 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef FUZZTEST_CENTIPEDE_CRASH_SUMMARY_H_
#define FUZZTEST_CENTIPEDE_CRASH_SUMMARY_H_
#include <string>
#include <string_view>
#include <vector>
#include "absl/strings/str_format.h"
#include "absl/types/span.h"
namespace fuzztest::internal {
// Accumulates crashes for a single fuzz test and provides a method to report a
// summary of the crashes.
class CrashSummary {
public:
struct Crash {
std::string id;
std::string category;
std::string signature;
std::string description;
friend bool operator==(const Crash& lhs, const Crash& rhs) {
return lhs.id == rhs.id && lhs.category == rhs.category &&
lhs.signature == rhs.signature &&
lhs.description == rhs.description;
}
};
explicit CrashSummary(std::string_view binary_id, std::string_view fuzz_test)
: binary_id_(std::string(binary_id)),
fuzz_test_(std::string(fuzz_test)) {}
CrashSummary(const CrashSummary&) = default;
CrashSummary& operator=(const CrashSummary&) = default;
CrashSummary(CrashSummary&&) = default;
CrashSummary& operator=(CrashSummary&&) = default;
// Adds a crash to the summary.
void AddCrash(Crash crash);
// Reports a summary of the crashes to `sink`.
// If an external crash reporter has been set with `SetExternalCrashReporter`,
// calls it with the stored crashes.
void Report(absl::FormatRawSink sink) const;
std::string_view binary_id() const { return binary_id_; }
std::string_view fuzz_test() const { return fuzz_test_; }
absl::Span<const Crash> crashes() const { return crashes_; }
friend bool operator==(const CrashSummary& lhs, const CrashSummary& rhs) {
return lhs.binary_id_ == rhs.binary_id_ &&
lhs.fuzz_test_ == rhs.fuzz_test_ && lhs.crashes_ == rhs.crashes_;
}
private:
std::string binary_id_;
std::string fuzz_test_;
std::vector<Crash> crashes_;
};
using ExternalCrashReporter = void (*)(const CrashSummary&);
// Sets an external crash reporter that will be called when a `CrashSummary` is
// reported.
void SetExternalCrashReporter(ExternalCrashReporter reporter);
} // namespace fuzztest::internal
#endif // FUZZTEST_CENTIPEDE_CRASH_SUMMARY_H_

View File

@ -0,0 +1,87 @@
// Copyright 2025 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/crash_summary.h"
#include <string>
#include <string_view>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/log/check.h"
namespace fuzztest::internal {
namespace {
using ::testing::AllOf;
using ::testing::HasSubstr;
using ::testing::Pointee;
class CrashSummaryTest : public testing::Test {
public:
~CrashSummaryTest() {
if (dumped_summary_ != nullptr) {
delete dumped_summary_;
dumped_summary_ = nullptr;
}
}
protected:
static void DumpCrashSummary(const CrashSummary& summary) {
CHECK(dumped_summary_ == nullptr);
dumped_summary_ = new CrashSummary{summary};
};
static CrashSummary* dumped_summary_;
};
CrashSummary* CrashSummaryTest::dumped_summary_ = nullptr;
TEST_F(CrashSummaryTest, ReportPrintsSummary) {
CrashSummary summary("binary_id", "fuzz_test");
summary.AddCrash({"id1", "category1", "signature1", "description1"});
summary.AddCrash({"id2", "category2",
"Unprintable (\xbe\xef) and very long signature",
"description2"});
std::string output;
summary.Report(&output);
EXPECT_THAT(
output,
AllOf(HasSubstr("Binary ID : binary_id"),
HasSubstr("Fuzz test : fuzz_test"),
HasSubstr("Total crashes: 2"), //
HasSubstr("Crash ID : id1"), //
HasSubstr("Category : category1"),
HasSubstr("Signature : signature1"),
HasSubstr("Description: description1"),
HasSubstr("Crash ID : id2"), //
HasSubstr("Category : category2"),
HasSubstr("Signature : Unprintable (\\xBE\\xEF) and very long s"),
HasSubstr("Description: description2")));
}
TEST_F(CrashSummaryTest, ReportCallsExternalCrashReporter) {
CrashSummary summary("binary_id", "fuzz_test");
summary.AddCrash({"id1", "category1", "signature1", "description1"});
summary.AddCrash({"id2", "category2", "signature2", "description2"});
SetExternalCrashReporter(DumpCrashSummary);
std::string output;
summary.Report(&output);
EXPECT_THAT(dumped_summary_, Pointee(summary));
}
} // namespace
} // namespace fuzztest::internal

View File

@ -0,0 +1,601 @@
// Copyright 2025 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/dispatcher.h"
#include <fcntl.h>
#include <unistd.h>
#include <atomic>
#include <cerrno>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <optional>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "absl/base/nullability.h"
#include "./centipede/execution_metadata.h"
#include "./centipede/runner_request.h"
#include "./centipede/runner_result.h"
#include "./centipede/shared_memory_blob_sequence.h"
#include "./common/defs.h"
namespace fuzztest::internal {
namespace {
// Logging needs to be signal safe.
struct LogErrNo {};
struct LogLnSync {};
void DispatcherLog() {}
template <typename T, typename... Rest>
void DispatcherLog(const T& first, const Rest&... rest) {
if constexpr (std::is_same_v<LogErrNo, T>) {
auto saved_errno = errno;
char err_buf[80];
if (strerror_r(saved_errno, err_buf, sizeof(err_buf)) != 0) {
constexpr std::string_view kFallbackMsg = "[strerror_r failed]";
static_assert(kFallbackMsg.size() < sizeof(err_buf));
std::memcpy(err_buf, kFallbackMsg.data(), kFallbackMsg.size());
err_buf[kFallbackMsg.size()] = 0;
}
DispatcherLog(err_buf);
} else if constexpr (std::is_same_v<LogLnSync, T>) {
write(STDERR_FILENO, "\n", 1);
fsync(STDERR_FILENO);
} else {
std::string_view sv = first;
while (!sv.empty()) {
const int r = write(STDERR_FILENO, sv.data(), sv.size());
if (r <= 0) break;
sv = sv.substr(r);
}
}
DispatcherLog(rest...);
}
inline void DispatcherCheck(bool condition, std::string_view error) {
if (!condition) {
DispatcherLog(error, LogLnSync{});
std::_Exit(1);
}
}
const char* GetDispatcherFlags() {
static auto dispatcher_flags = []() -> const char* {
// TODO(xinhaoyuan): Rename the env name to FUZZTEST_DISPATCHER_FLAGS.
const char* env_flags = std::getenv("CENTIPEDE_RUNNER_FLAGS");
if (env_flags == nullptr) return nullptr;
const char* result = strdup(env_flags);
DispatcherCheck(result != nullptr, "Cannot copy the dispatcher flags");
return result;
}();
return dispatcher_flags;
}
std::optional<std::string_view> GetDispatcherFlag(
const char* absl_nonnull flag_header) {
const char* dispatcher_flags = GetDispatcherFlags();
if (dispatcher_flags == nullptr) return std::nullopt;
// Extract "value" from ":flag=value:"
const char* beg = std::strstr(dispatcher_flags, flag_header);
if (!beg) return std::nullopt;
const char* value_beg = beg + std::strlen(flag_header);
const char* value_end = std::strstr(value_beg, ":");
if (!value_end) return std::nullopt;
return std::string_view{value_beg,
static_cast<size_t>(value_end - value_beg)};
}
bool HasDispatcherSwitchFlag(const char* absl_nonnull switch_flag) {
const char* dispatcher_flags = GetDispatcherFlags();
if (dispatcher_flags == nullptr) return false;
return std::strstr(dispatcher_flags, switch_flag) != nullptr;
}
enum class DispatcherAction {
kGetBinaryId,
kListTests,
kTestGetSeeds,
kTestMutate,
kTestExecute,
};
constexpr char kDispatcherBinaryIdOutputFlagHeader[] = ":binary_id_output=";
constexpr char kDispatcherTestNameFlagHeader[] = ":test=";
constexpr char kDispatcherTestListingPrefixFlagHeader[] =
":test_listing_prefix=";
constexpr char kDispatcherTestGetSeedsOutputDirFlagHeader[] =
":arg1="; // TODO: Use better flag names when standardizing the protocol.
constexpr char kDispatcherFailureDescriptionPathFlagHeader[] =
":failure_description_path=";
constexpr char kDispatcherFailureSignaturePathFlagHeader[] =
":failure_signature_path=";
constexpr char kDispatcherInputsBlobSequencePathFlagHeader[] =
":arg1="; // TODO: Use better flag names when standardizing the protocol.
constexpr char kDispatcherOutputsBlobSequencePathFlagHeader[] =
":arg2="; // TODO: Use better flag names when standardizing the protocol.
BlobSequence* GetInputsBlobSequence() {
static auto result = []() -> BlobSequence* {
if (std::strstr(GetDispatcherFlags(), ":shmem:") == nullptr) {
return nullptr;
}
auto input_path =
GetDispatcherFlag(kDispatcherInputsBlobSequencePathFlagHeader);
DispatcherCheck(input_path.has_value(), "inputs blob sequence is missing");
return new SharedMemoryBlobSequence(std::string(*input_path).c_str());
}();
return result;
}
BlobSequence* GetOutputsBlobSequence() {
static auto result = []() -> BlobSequence* {
if (std::strstr(GetDispatcherFlags(), ":shmem:") == nullptr) {
return nullptr;
}
auto output_path =
GetDispatcherFlag(kDispatcherOutputsBlobSequencePathFlagHeader);
DispatcherCheck(output_path.has_value(),
"outputs blob sequence is missing");
return new SharedMemoryBlobSequence(std::string(*output_path).c_str());
}();
return result;
}
DispatcherAction GetDispatcherAction() {
static DispatcherAction dispatcher_action = [] {
if (HasDispatcherSwitchFlag(":dump_binary_id:")) {
return DispatcherAction::kGetBinaryId;
}
if (HasDispatcherSwitchFlag(":list_tests:")) {
return DispatcherAction::kListTests;
}
if (HasDispatcherSwitchFlag(":dump_seed_inputs:")) {
return DispatcherAction::kTestGetSeeds;
}
auto* inputs_blobseq = GetInputsBlobSequence();
DispatcherCheck(inputs_blobseq != nullptr,
"input blob sequence is not found");
auto request_type_blob = inputs_blobseq->Read();
if (IsMutationRequest(request_type_blob)) {
inputs_blobseq->Reset();
return DispatcherAction::kTestMutate;
}
if (IsExecutionRequest(request_type_blob)) {
inputs_blobseq->Reset();
return DispatcherAction::kTestExecute;
}
DispatcherCheck(false, "unknown dispatcher action from the flags");
// should not reach here.
std::abort();
}();
return dispatcher_action;
}
template <typename... C>
void TrySetFileContents(const char* absl_nonnull path, C... contents) {
// Needs to be signal-safe.
int f = open(path, O_CREAT | O_TRUNC | O_WRONLY, /*mode=*/0660);
if (f == -1) {
DispatcherLog("cannot open path ", path, ": ", LogErrNo{}, LogLnSync{});
return;
}
([&] {
std::string_view sv = contents;
while (!sv.empty()) {
const int r = write(f, sv.data(), sv.size());
if (r < 0) {
DispatcherLog("write() failed on ", path, ": ", LogErrNo{},
LogLnSync{});
return false;
}
if (r == 0) {
DispatcherLog("write() on ", path,
" returns 0 unexpectedly. Stopping writing the file.");
return false;
}
sv = sv.substr(r);
}
return true;
}() &&
...); // NOLINT - stop fighting with auto-fomatting.
if (fsync(f) != 0) {
DispatcherLog("fsync() failed on ", path, ": ", LogErrNo{}, LogLnSync{});
}
if (close(f) != 0) {
DispatcherLog("close() failed on ", path, ": ", LogErrNo{}, LogLnSync{});
}
}
static std::atomic<bool> in_test_callback = false;
class TestCallbackGuard {
public:
TestCallbackGuard() {
DispatcherCheck(!in_test_callback.exchange(true),
"test callback is already activated");
}
~TestCallbackGuard() { in_test_callback = false; }
};
void DispatcherDoGetBinaryId(const FuzzTestDispatcherCallbacks& callbacks) {
const auto binary_id_output_path =
GetDispatcherFlag(kDispatcherBinaryIdOutputFlagHeader);
DispatcherCheck(binary_id_output_path.has_value(),
"binary ID output path is not set");
std::string binary_id;
{
TestCallbackGuard guard;
binary_id = callbacks.get_binary_id ? callbacks.get_binary_id() : "";
}
TrySetFileContents(std::string{*binary_id_output_path}.c_str(), binary_id);
}
void DispatcherDoListTests(const FuzzTestDispatcherCallbacks& callbacks) {
DispatcherCheck(callbacks.list_tests != nullptr,
"list_tests callback must be set");
TestCallbackGuard guard;
callbacks.list_tests();
}
void DispatcherDoGetSeeds(const FuzzTestDispatcherCallbacks& callbacks) {
if (callbacks.get_seeds == nullptr) {
return;
}
TestCallbackGuard guard;
callbacks.get_seeds();
}
int DispatcherDoMutate(const FuzzTestDispatcherCallbacks& callbacks) {
auto* inputs_blobseq = GetInputsBlobSequence();
auto* outputs_blobseq = GetOutputsBlobSequence();
DispatcherCheck(inputs_blobseq != nullptr && outputs_blobseq != nullptr,
"inputs/outputs blob sequences must be specified");
bool has_mutate = callbacks.mutate != nullptr;
if (!MutationResult::WriteHasCustomMutator(has_mutate, *outputs_blobseq)) {
std::fprintf(stderr, "Failed to write custom mutator indicator!\n");
return EXIT_FAILURE;
}
if (!has_mutate) {
return EXIT_SUCCESS;
}
// Read max_num_mutants.
size_t num_mutants = 0;
size_t num_inputs = 0;
if (!IsMutationRequest(inputs_blobseq->Read())) {
std::fprintf(stderr, "Not mutation request!\n");
return EXIT_FAILURE;
}
if (!IsNumMutants(inputs_blobseq->Read(), num_mutants)) {
std::fprintf(stderr, "No num mutants\n");
return EXIT_FAILURE;
}
if (!IsNumInputs(inputs_blobseq->Read(), num_inputs)) {
std::fprintf(stderr, "No num inputs\n");
return EXIT_FAILURE;
}
struct OwningMutateInput {
ByteArray data;
ExecutionMetadata metadata;
};
// Note: unclear if we can continue using std::vector (or other STL)
// in the runner. But for now use std::vector.
//
// Collect the inputs into a vector. We copy them instead of using pointers
// into shared memory so that the user code doesn't touch the shared memory.
std::vector<OwningMutateInput> owning_inputs;
owning_inputs.reserve(num_inputs);
std::vector<FuzzTestDispatcherInputForMutate> inputs;
inputs.reserve(num_inputs);
for (size_t i = 0; i < num_inputs; ++i) {
// If inputs_blobseq have overflown in the engine, we still want to
// handle the first few inputs.
ExecutionMetadata metadata;
if (!IsExecutionMetadata(inputs_blobseq->Read(), metadata)) {
break;
}
auto blob = inputs_blobseq->Read();
if (!IsDataInput(blob)) break;
owning_inputs.push_back(
OwningMutateInput{/*data=*/ByteArray{blob.data, blob.data + blob.size},
/*metadata=*/std::move(metadata)});
inputs.push_back(FuzzTestDispatcherInputForMutate{
/*input=*/owning_inputs.back().data.data(),
/*input_size=*/owning_inputs.back().data.size(),
/*metadata=*/owning_inputs.back().metadata.cmp_data.data(),
/*metadata_size=*/owning_inputs.back().metadata.cmp_data.size()});
}
{
TestCallbackGuard guard;
fprintf(stderr, "calling custom mutator\n");
// We ensure that:
// * `inputs` is a valid pointer to an array of
// `FuzzTestDispatcherInputForMutate` objects with length `num_inputs`.
// * Each object of the array contains a valid `input` pointer to
// `input_size` bytes, and a valid `metadata` pointer to `metadata_size`
// bytes.
callbacks.mutate(inputs.data(), inputs.size(), num_mutants,
/*shrink=*/0);
}
return EXIT_SUCCESS;
}
int DispatcherDoExecute(const FuzzTestDispatcherCallbacks& callbacks) {
DispatcherCheck(callbacks.execute != nullptr, "execute callback must be set");
auto* inputs_blobseq = GetInputsBlobSequence();
auto* outputs_blobseq = GetOutputsBlobSequence();
DispatcherCheck(inputs_blobseq != nullptr && outputs_blobseq != nullptr,
"inputs/ouptuts blob sequence must exist");
size_t num_inputs = 0;
DispatcherCheck(IsExecutionRequest(inputs_blobseq->Read()),
"not an execution request");
DispatcherCheck(IsNumInputs(inputs_blobseq->Read(), num_inputs),
"failed to read num_inputs");
for (size_t i = 0; i < num_inputs; i++) {
auto blob = inputs_blobseq->Read();
if (!blob.IsValid()) return EXIT_SUCCESS; // no more blobs to read.
if (!IsDataInput(blob)) return EXIT_FAILURE;
// Copy from blob to data so that to not pass the shared memory further.
ByteArray data(blob.data, blob.data + blob.size);
if (!BatchResult::WriteInputBegin(*outputs_blobseq)) {
// TODO: This is to follow the previous behavior, but should we abort
// here?
break;
}
{
TestCallbackGuard guard;
// We ensure that `input` is a valid pointer to an array of `size` bytes.
callbacks.execute(data.data(), data.size());
}
if (!BatchResult::WriteInputEnd(*outputs_blobseq)) {
// TODO: This is to follow the previous behavior, but should we abort
// here?
break;
}
}
return EXIT_SUCCESS;
}
void DispatcherEmitFailure(const char* absl_nonnull prefix,
const char* absl_nonnull description,
const char* signature, size_t signature_size) {
bool success = false;
[[maybe_unused]] static bool write_once = [=, &success] {
if (const auto failure_description_path =
GetDispatcherFlag(kDispatcherFailureDescriptionPathFlagHeader);
failure_description_path.has_value()) {
TrySetFileContents(std::string{*failure_description_path}.c_str(), prefix,
description);
}
if (const auto failure_signature_path =
GetDispatcherFlag(kDispatcherFailureSignaturePathFlagHeader);
failure_signature_path.has_value()) {
TrySetFileContents(std::string{*failure_signature_path}.c_str(),
std::string_view{signature, signature_size});
}
success = true;
return true;
}();
if (!success) {
DispatcherLog("Failed to emit failure ", prefix, description, LogLnSync{});
}
}
} // namespace
} // namespace fuzztest::internal
using fuzztest::internal::BatchResult;
using fuzztest::internal::DispatcherAction;
using fuzztest::internal::DispatcherCheck;
using fuzztest::internal::DispatcherDoExecute;
using fuzztest::internal::DispatcherDoGetBinaryId;
using fuzztest::internal::DispatcherDoGetSeeds;
using fuzztest::internal::DispatcherDoListTests;
using fuzztest::internal::DispatcherDoMutate;
using fuzztest::internal::DispatcherEmitFailure;
using fuzztest::internal::GetDispatcherAction;
using fuzztest::internal::GetDispatcherFlag;
using fuzztest::internal::GetDispatcherFlags;
using fuzztest::internal::GetOutputsBlobSequence;
using fuzztest::internal::HasDispatcherSwitchFlag;
using fuzztest::internal::in_test_callback;
using fuzztest::internal::kDispatcherTestGetSeedsOutputDirFlagHeader;
using fuzztest::internal::kDispatcherTestListingPrefixFlagHeader;
using fuzztest::internal::kDispatcherTestNameFlagHeader;
using fuzztest::internal::MutationResult;
int FuzzTestDispatcherIsEnabled() {
const char* flags = GetDispatcherFlags();
if (flags == nullptr) return 0;
fprintf(stderr, "Dispatcher is enabled with flags: %s\n", flags);
return 1;
}
const char* FuzzTestDispatcherGetTestName() {
static auto test_name = []() -> const char* {
const auto test_name = GetDispatcherFlag(kDispatcherTestNameFlagHeader);
if (!test_name.has_value()) return nullptr;
return strndup(test_name->data(), test_name->size());
}();
return test_name;
}
int FuzzTestDispatcherRun(const FuzzTestDispatcherCallbacks* callbacks) {
DispatcherCheck(callbacks != nullptr, "callbacks must be set");
if (HasDispatcherSwitchFlag(":dump_configuration:")) {
return 0;
}
switch (GetDispatcherAction()) {
case DispatcherAction::kGetBinaryId:
DispatcherDoGetBinaryId(*callbacks);
break;
case DispatcherAction::kListTests:
DispatcherDoListTests(*callbacks);
break;
case DispatcherAction::kTestGetSeeds:
DispatcherDoGetSeeds(*callbacks);
break;
case DispatcherAction::kTestMutate:
DispatcherDoMutate(*callbacks);
break;
case DispatcherAction::kTestExecute:
DispatcherDoExecute(*callbacks);
break;
default:
DispatcherCheck(false, "unknown dispatcher action to take");
}
return 0;
}
void FuzzTestDispatcherEmitTestName(const char* name) {
DispatcherCheck(
GetDispatcherAction() == DispatcherAction::kListTests && in_test_callback,
"must be called inside test callback for listing tests");
static auto test_listing_prefix =
GetDispatcherFlag(kDispatcherTestListingPrefixFlagHeader);
DispatcherCheck(test_listing_prefix.has_value(),
"test listing path prefix must be set");
DispatcherCheck(name != nullptr, "test name must be set");
auto test_output_path = std::string{*test_listing_prefix};
test_output_path += name;
FILE* f = std::fopen(test_output_path.c_str(), "w");
if (f == nullptr) {
std::perror("FAILURE: fopen()");
}
std::fclose(f);
}
void FuzzTestDispatcherEmitSeed(const void* data, size_t size) {
DispatcherCheck(GetDispatcherAction() == DispatcherAction::kTestGetSeeds &&
in_test_callback,
"must be called inside test callback for getting seeds");
DispatcherCheck(size > 0 && data != nullptr,
"seed must be non-empty with a valid pointer");
static size_t seed_index = 0;
static const char* output_dir = [] {
const auto flag_value =
GetDispatcherFlag(kDispatcherTestGetSeedsOutputDirFlagHeader);
DispatcherCheck(flag_value.has_value(),
"seeds output path must be specified");
const char* result = strndup(flag_value->data(), flag_value->size());
DispatcherCheck(result != nullptr, "failed to copy the seeds output path");
return result;
}();
// Cap seed index within 9 digits. If this was triggered, the dumping would
// take forever..
if (seed_index >= 1000000000) return;
char seed_path_buf[PATH_MAX];
const size_t num_path_chars =
snprintf(seed_path_buf, PATH_MAX, "%s/%09lu", output_dir, seed_index);
DispatcherCheck(num_path_chars < PATH_MAX, "seed path reaches PATH_MAX");
FILE* output_file = fopen(seed_path_buf, "w");
const size_t num_bytes_written = fwrite(data, 1, size, output_file);
DispatcherCheck(num_bytes_written == size,
"wrong number of bytes written for seed");
fclose(output_file);
++seed_index;
}
void FuzzTestDispatcherEmitMutant(const void* data, size_t size) {
DispatcherCheck(GetDispatcherAction() == DispatcherAction::kTestMutate &&
in_test_callback,
"must be called inside test callback for mutating");
DispatcherCheck(size > 0 && data != nullptr,
"mutant must be non-empty with a valid pointer");
auto* output = GetOutputsBlobSequence();
DispatcherCheck(output != nullptr, "outputs blob sequence must exist");
DispatcherCheck(MutationResult::WriteMutant(
{static_cast<const uint8_t*>(data), size}, *output),
"failed to write mutant");
}
void FuzzTestDispatcherEmitFeedbackAs32BitFeatures(const uint32_t* features,
size_t num_features) {
DispatcherCheck(GetDispatcherAction() == DispatcherAction::kTestExecute &&
in_test_callback,
"must be called inside test callback of executing");
DispatcherCheck(num_features > 0 && features != nullptr,
"feature array must be non-empty with a valid pointer");
auto* output = GetOutputsBlobSequence();
DispatcherCheck(output != nullptr, "outputs blob sequence must exist");
DispatcherCheck(BatchResult::WriteDispatcher32BitFeatures(
features, num_features, *output),
"failed to write feedback");
}
void FuzzTestDispatcherEmitExecutionMetadata(const void* metadata,
size_t size) {
DispatcherCheck(GetDispatcherAction() == DispatcherAction::kTestExecute &&
in_test_callback,
"must be called inside test callback of executing");
DispatcherCheck(size > 0 && metadata != nullptr,
"metadata must be non-empty with a valid pointer");
auto* output = GetOutputsBlobSequence();
DispatcherCheck(output != nullptr, "outputs blob sequence must exist");
DispatcherCheck(BatchResult::WriteMetadata(
{static_cast<const uint8_t*>(metadata), size}, *output),
"failed to write metadata");
}
void FuzzTestDispatcherEmitInputFailure(const char* description,
const void* signature,
size_t signature_size) {
DispatcherCheck(GetDispatcherAction() == DispatcherAction::kTestExecute &&
in_test_callback,
"must be called inside test callback for executing");
DispatcherCheck((signature == nullptr) == (signature_size == 0),
"violated invariant: signature should be nullptr if and only "
"if signature_size is 0");
DispatcherEmitFailure(
"INPUT FAILURE: ", description != nullptr ? description : "",
reinterpret_cast<const char*>(signature), signature_size);
}
void FuzzTestDispatcherEmitIgnoredFailure(const char* description) {
DispatcherEmitFailure(
"IGNORED FAILURE: ", description != nullptr ? description : "",
/*signature=*/nullptr, /*signature_size=*/0);
}
void FuzzTestDispatcherEmitSetupFailure(const char* description) {
DispatcherEmitFailure(
"SETUP FAILURE: ", description != nullptr ? description : "",
/*signature=*/nullptr, /*signature_size=*/0);
}
void FuzzTestDispatcherEmitSkippedTestFailure(const char* description) {
DispatcherEmitFailure(
"SKIPPED TEST: ", description != nullptr ? description : "",
/*signature=*/nullptr, /*signature_size=*/0);
}

View File

@ -0,0 +1,141 @@
// Copyright 2025 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_DISPATCHER_H_
#define THIRD_PARTY_CENTIPEDE_DISPATCHER_H_
// Dispatcher interface.
//
// This header needs to be C compatible.
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
// Inputs to perform mutations.
struct FuzzTestDispatcherInputForMutate {
const void* input;
size_t input_size;
const void* metadata;
size_t metadata_size;
};
// Callbacks to be provided by the fuzz testing framework to
// `FuzzTestDispatcherRun`.
struct FuzzTestDispatcherCallbacks {
// Optional callback to return an ID for the current binary. If not
// implemented, the controller will generate a default ID based on the binary
// path.
const char* (*get_binary_id)();
// Callback to emit the list of available tests in the binary using
// `FuzzTestDispatcherEmitTestName`.
void (*list_tests)();
// Callback to emit the seed inputs for a test using
// `FuzzTestDispatcherEmitSeed`.
void (*get_seeds)();
// Optional callback to emit at most `num_mutants` from `inputs` with
// `num_inputs` entries using `FuzzTestDispatcherEmitMutant`. `shrink` != 0
// means to generate smaller mutants than the inputs used for mutation. If not
// implemented, the controller will perform basic string-based mutations.
//
// TODO: xinhaoyuan - Reconsider mutation interface design instead of
// following the existing Centipede/runner protocol.
void (*mutate)(const struct FuzzTestDispatcherInputForMutate* inputs,
size_t num_inputs, size_t num_mutants, int shrink);
// Callback to execute `input` with `size` bytes. The callback should emit
// coverage feedback using `FuzzTestDispatcherEmitFeedback*` functions, and
// any metadata for further mutation using
// `FuzzTestDispatEmitExecutionMetadata`. In case the input caused a failure,
// the callback should emit the failure using
// `FuzzTestDispatcherEmitInputFailure`.
void (*execute)(const void* input, size_t size);
};
// Functions provided by the FuzzTest engine.
// Returns 0 if the dispatcher mode is not enabled in the current process; 1 if
// the dispatcher mode is enabled; other values for unexpected errors.
int FuzzTestDispatcherIsEnabled();
// All functions below should be called only after `FuzzTestDispatcherIsEnabled`
// returns 1 in the current process.
// Returns the test name under operation as an unowned, static, and
// null-terminated string. Returns nullptr if the current process is not
// operating on a specific test.
const char* FuzzTestDispatcherGetTestName();
// Give control to the FuzzTest engine to invoke `callbacks`. Returns an exit
// code for the current process desired by the engine.
int FuzzTestDispatcherRun(const struct FuzzTestDispatcherCallbacks* callbacks);
// Emits a test name. Must be called from the `list_tests` callback. `name` must
// be a null-terminated string.
void FuzzTestDispatcherEmitTestName(const char* name);
// Emits a seed input. Must be called from the `get_seeds` callback. `data` must
// not be nullptr and `size > 0` must hold.
void FuzzTestDispatcherEmitSeed(const void* data, size_t size);
// Emits a mutant. Must be called from the `mutate` callback. `data` must not be
// nullptr and `size > 0` must hold.
void FuzzTestDispatcherEmitMutant(const void* data, size_t size);
// Emits coverage feedback for the current input as an array of 32-bit features.
//
// For each 32-bit feature, the bit [31] is ignored; the 4 bits [30-27]
// indicate the feature domain for engine prioritization. The remaining 27 bits
// [26-0] represent the actual 27-bit feature ID in the domain.
//
// Must be called from the `execute` callback. `features` must not be nullptr
// and `num_features > 0` must hold.
void FuzzTestDispatcherEmitFeedbackAs32BitFeatures(const uint32_t* features,
size_t num_features);
// Emits metadata of the current input as raw bytes. Must be called from
// the `execute` callback.
void FuzzTestDispatcherEmitExecutionMetadata(const void* metadata, size_t size);
// Functions for emitting various types of failures. After calling any of these
// functions, later calls of these functions would have no effect, and the
// current process should exit after necessary cleanup.
// Emits a failure caused by executing an input. Must be called within the
// `execute` callback. `description` should be a null-terminated string, or
// nullptr can be passed for an empty string; `signature` should be nullptr if
// and only if `signature_size == 0`.
void FuzzTestDispatcherEmitInputFailure(const char* description,
const void* signature,
size_t signature_size);
// Emits a failure that should be ignored (i.e. not affecting the fuzzing
// workflows). `description` should be a null-terminated string, or nullptr can
// be passed for an empty string.
void FuzzTestDispatcherEmitIgnoredFailure(const char* description);
// Emits a failure caused by the test setup. `description` should be a
// null-terminated string, or nullptr can be passed for an empty string.
void FuzzTestDispatcherEmitSetupFailure(const char* description);
// Emits a failure due to reasons to skip the entire test. `description` should
// be a null-terminated string, or nullptr can be passed for an empty string.
void FuzzTestDispatcherEmitSkippedTestFailure(const char* description);
#ifdef __cplusplus
} // extern "C"
#endif
#endif

View File

@ -0,0 +1,473 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/distill.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <functional>
#include <memory>
#include <numeric>
#include <optional>
#include <sstream>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "absl/base/thread_annotations.h"
#include "absl/container/flat_hash_set.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "absl/synchronization/mutex.h"
#include "absl/time/time.h"
#include "./centipede/corpus_io.h"
#include "./centipede/environment.h"
#include "./centipede/feature.h"
#include "./centipede/feature_set.h"
#include "./centipede/periodic_action.h"
#include "./centipede/resource_pool.h"
#include "./centipede/rusage_profiler.h"
#include "./centipede/rusage_stats.h"
#include "./centipede/thread_pool.h"
#include "./centipede/util.h"
#include "./centipede/workdir.h"
#include "./common/blob_file.h"
#include "./common/defs.h"
#include "./common/hash.h"
#include "./common/logging.h"
#include "./common/remote_file.h"
#include "./common/status_macros.h"
namespace fuzztest::internal {
namespace {
// A corpus element. Consists of a fuzz test input and its matching features.
struct CorpusElt {
ByteArray input;
FeatureVec features;
CorpusElt(const ByteArray &input, FeatureVec features)
: input(input), features(std::move(features)) {}
// Movable, but not copyable for efficiency.
CorpusElt(const CorpusElt &) = delete;
CorpusElt &operator=(const CorpusElt &) = delete;
CorpusElt(CorpusElt &&) = default;
CorpusElt &operator=(CorpusElt &&) = default;
ByteArray PackedFeatures() const {
return PackFeaturesAndHash(input, features);
}
};
using CorpusEltVec = std::vector<CorpusElt>;
// The maximum number of threads reading input shards concurrently. This is
// mainly to prevent I/O congestion.
inline constexpr size_t kMaxReadingThreads = 50;
// The maximum number of threads writing shards concurrently. These in turn
// launch up to `kMaxReadingThreads` reading threads.
inline constexpr size_t kMaxWritingThreads = 100;
// A global cap on the total number of threads, both writing and reading. Unlike
// the other two limits, this one is purely to prevent too many threads in the
// process.
inline constexpr size_t kMaxTotalThreads = 5000;
static_assert(kMaxReadingThreads * kMaxWritingThreads <= kMaxTotalThreads);
inline constexpr MemSize kGB = 1024L * 1024L * 1024L;
// The total approximate amount of RAM to be shared by the concurrent threads.
// TODO(ussuri): Replace by a function of free RSS on the system.
inline constexpr RUsageMemory kRamQuota{/*mem_vsize=*/0, /*mem_vpeak=*/0,
/*mem_rss=*/25 * kGB};
// The amount of time that each thread will wait for enough RAM to be freed up
// by its concurrent siblings.
inline constexpr absl::Duration kRamLeaseTimeout = absl::Hours(5);
std::string LogPrefix(const Environment &env) {
return absl::StrCat("DISTILL[S.", env.my_shard_index, "]: ");
}
std::string LogPrefix() { return absl::StrCat("DISTILL[ALL]: "); }
// TODO(ussuri): Move the reader/writer classes to shard_reader.cc, rename it
// to corpus_io.cc, and reuse the new APIs where useful in the code base.
// A helper class for reading input corpus shards. Thread-safe.
class InputCorpusShardReader {
public:
InputCorpusShardReader(const Environment &env)
: workdir_{env}, log_prefix_{LogPrefix(env)} {}
MemSize EstimateRamFootprint(size_t shard_idx) const {
const auto corpus_path = workdir_.CorpusFilePaths().Shard(shard_idx);
const auto features_path = workdir_.FeaturesFilePaths().Shard(shard_idx);
const MemSize corpus_file_size = ValueOrDie(RemoteFileGetSize(corpus_path));
const MemSize features_file_size =
ValueOrDie(RemoteFileGetSize(features_path));
// Conservative compression factors for the two file types. These have been
// observed empirically for the Riegeli blob format. The legacy format is
// approximately 1:1, but use the stricter Riegeli numbers, as the legacy
// should be considered obsolete.
// TODO(b/322880269): Use the actual in-memory footprint once available.
constexpr double kMaxCorpusCompressionRatio = 5.0;
constexpr double kMaxFeaturesCompressionRatio = 10.0;
return corpus_file_size * kMaxCorpusCompressionRatio +
features_file_size * kMaxFeaturesCompressionRatio;
}
// Reads and returns a single shard's elements. Thread-safe.
CorpusEltVec ReadShard(size_t shard_idx) {
const auto corpus_path = workdir_.CorpusFilePaths().Shard(shard_idx);
const auto features_path = workdir_.FeaturesFilePaths().Shard(shard_idx);
VLOG(1) << log_prefix_ << "reading input shard " << shard_idx << ":\n"
<< VV(corpus_path) << "\n"
<< VV(features_path);
CorpusEltVec elts;
// Read elements from the current shard.
fuzztest::internal::ReadShard( //
corpus_path, features_path,
[&elts](ByteArray input, FeatureVec features) {
elts.emplace_back(std::move(input), std::move(features));
});
return elts;
}
private:
const WorkDir workdir_;
const std::string log_prefix_;
};
// A helper class for writing corpus shards. Thread-safe.
class CorpusShardWriter {
public:
// The writing stats so far.
struct Stats {
size_t num_total_elts = 0;
size_t num_written_elts = 0;
size_t num_written_batches = 0;
};
CorpusShardWriter(const Environment &env, bool append)
: workdir_{env},
log_prefix_{LogPrefix(env)},
corpus_path_{workdir_.DistilledCorpusFilePaths().MyShard()},
features_path_{workdir_.DistilledFeaturesFilePaths().MyShard()},
corpus_writer_{DefaultBlobFileWriterFactory()},
feature_writer_{DefaultBlobFileWriterFactory()} {
CHECK_OK(corpus_writer_->Open(corpus_path_, append ? "a" : "w"));
CHECK_OK(feature_writer_->Open(features_path_, append ? "a" : "w"));
}
virtual ~CorpusShardWriter() = default;
void WriteElt(CorpusElt elt) {
absl::MutexLock lock(&mu_);
WriteEltImpl(std::move(elt));
}
void WriteBatch(CorpusEltVec elts) {
absl::MutexLock lock(&mu_);
VLOG(1) << log_prefix_ << "writing " << elts.size()
<< " elements to output shard:\n"
<< VV(corpus_path_) << "\n"
<< VV(features_path_);
for (auto &elt : elts) {
WriteEltImpl(std::move(elt));
}
++stats_.num_written_batches;
}
Stats GetStats() const {
absl::MutexLock lock(&mu_);
return stats_;
}
protected:
// A behavior customization point: a derived class gets an opportunity to
// analyze and/or preprocess `elt` before it is written. For example, a
// derived class can trim the element's feature set before it is written, or
// choose to skip writing it entirely by returning `std::nullopt`.
virtual std::optional<CorpusElt> PreprocessElt(CorpusElt elt) {
return std::move(elt);
}
private:
void WriteEltImpl(CorpusElt elt) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_) {
++stats_.num_total_elts;
const auto preprocessed_elt = PreprocessElt(std::move(elt));
if (preprocessed_elt.has_value()) {
// Append to the distilled corpus and features files.
CHECK_OK(corpus_writer_->Write(preprocessed_elt->input));
CHECK_OK(feature_writer_->Write(preprocessed_elt->PackedFeatures()));
++stats_.num_written_elts;
}
}
// Const state.
const WorkDir workdir_;
const std::string log_prefix_;
const std::string corpus_path_;
const std::string features_path_;
// Mutable state.
mutable absl::Mutex mu_;
std::unique_ptr<BlobFileWriter> corpus_writer_ ABSL_GUARDED_BY(mu_);
std::unique_ptr<BlobFileWriter> feature_writer_ ABSL_GUARDED_BY(mu_);
Stats stats_ ABSL_GUARDED_BY(mu_);
};
// A distilling input filter:
// - Deduplicates byte-identical inputs: only the first one is allowed to pass.
// - Deduplicates feature-equivalent inputs: up to N from each equivalency set
// are allowed to pass.
// - Discards the specified set of "uninteresting" feature domains from the
// feature sets of filtered inputs.
class DistillingInputFilter {
public:
// An extension to the parent class's `Stats`.
struct Stats {
size_t num_total_elts = 0;
size_t num_byte_unique_elts = 0;
size_t num_feature_unique_elts = 0;
// The accumulated features of the distilled corpus so far, represents in
// the same compact textual form that Centipede uses in its fuzzing progress
// log messages, e.g.: "ft: 96331 cov: 81793 usr1: 5045 ...".
std::string coverage_str;
};
// `feature_equiv_redundancy` specifies how many inputs with equivalent
// feature sets are allowed to pass the filter. Any subsequent inputs with the
// equivalent set will be rejected.
// `should_discard_domains` specifies the domains that should be discarded
// from the feature set of a filtered input.
DistillingInputFilter( //
uint8_t feature_frequency_threshold,
const FeatureSet::FeatureDomainSet &domains_to_discard)
: seen_inputs_{},
seen_features_{
/*frequency_threshold=*/feature_frequency_threshold,
/*should_discard_domain=*/domains_to_discard,
} {}
std::optional<CorpusElt> FilterElt(CorpusElt elt) {
absl::MutexLock lock{&mu_};
++stats_.num_total_elts;
// Filter out approximately byte-identical inputs ("approximately" because
// we use hashes).
std::string hash = Hash(elt.input);
const auto [iter, inserted] = seen_inputs_.insert(std::move(hash));
if (!inserted) return std::nullopt;
++stats_.num_byte_unique_elts;
// Filter out feature-equivalent inputs.
seen_features_.PruneDiscardedDomains(elt.features);
if (!seen_features_.HasUnseenFeatures(elt.features)) return std::nullopt;
seen_features_.IncrementFrequencies(elt.features);
++stats_.num_feature_unique_elts;
return std::move(elt);
}
Stats GetStats() {
absl::MutexLock lock{&mu_};
std::stringstream ss;
ss << seen_features_;
stats_.coverage_str = std::move(ss).str();
return stats_;
}
private:
absl::Mutex mu_;
absl::flat_hash_set<std::string /*hash*/> seen_inputs_ ABSL_GUARDED_BY(mu_);
FeatureSet seen_features_ ABSL_GUARDED_BY(mu_);
Stats stats_ ABSL_GUARDED_BY(mu_);
};
// A helper class for writing distilled corpus shards. NOT thread-safe because
// all writes go to a single file.
class DistilledCorpusShardWriter : public CorpusShardWriter {
public:
DistilledCorpusShardWriter( //
const Environment &env, bool append, DistillingInputFilter &filter)
: CorpusShardWriter{env, append}, input_filter_{filter} {}
~DistilledCorpusShardWriter() override = default;
protected:
std::optional<CorpusElt> PreprocessElt(CorpusElt elt) override {
return input_filter_.FilterElt(std::move(elt));
}
private:
DistillingInputFilter &input_filter_;
};
} // namespace
// Runs one independent distillation task. Reads shards in the order specified
// by `shard_indices`, distills inputs from them using `input_filter`, and
// writes the result to `WorkDir{env}.DistilledPath()`. Every task gets its own
// `env.my_shard_index`, and so every task creates its own independent distilled
// corpus file. `parallelism` is the maximum number of concurrent
// reading/writing threads. Values > 1 can cause non-determinism in which of the
// same-coverage inputs gets selected to be written to the output shard; set to
// 1 for tests.
void DistillToOneOutputShard( //
const Environment &env, //
const std::vector<size_t> &shard_indices, //
DistillingInputFilter &input_filter, //
ResourcePool<RUsageMemory> &ram_pool, //
int parallelism) {
LOG(INFO) << LogPrefix(env) << "Distilling to output shard "
<< env.my_shard_index << "; input shard indices:\n"
<< absl::StrJoin(shard_indices, ", ");
// Read and write the shards in parallel, but gate reading of each on the
// availability of free RAM to keep the peak RAM usage under control.
const size_t num_shards = shard_indices.size();
InputCorpusShardReader reader{env};
// NOTE: Always overwrite corpus and features files, never append.
DistilledCorpusShardWriter writer{env, /*append=*/false, input_filter};
{
ThreadPool threads{parallelism};
for (size_t shard_idx : shard_indices) {
threads.Schedule([shard_idx, &reader, &writer, &env, num_shards,
&ram_pool] {
const auto ram_lease = ram_pool.AcquireLeaseBlocking({
/*id=*/absl::StrCat("out_", env.my_shard_index, "/in_", shard_idx),
/*amount=*/
{/*mem_vsize=*/0, /*mem_vpeak=*/0,
/*mem_rss=*/reader.EstimateRamFootprint(shard_idx)},
/*timeout=*/kRamLeaseTimeout,
});
CHECK_OK(ram_lease.status());
CorpusEltVec shard_elts = reader.ReadShard(shard_idx);
// Reverse the order of elements. The intuition is as follows:
// * If the shard is the result of fuzzing with Centipede, the inputs
// that are closer to the end are more interesting, so we start there.
// * If the shard resulted from somethening else, the reverse order is
// not any better or worse than any other order.
std::reverse(shard_elts.begin(), shard_elts.end());
writer.WriteBatch(std::move(shard_elts));
const CorpusShardWriter::Stats shard_stats = writer.GetStats();
LOG(INFO) << LogPrefix(env)
<< "batches: " << shard_stats.num_written_batches << "/"
<< num_shards << " inputs: " << shard_stats.num_total_elts
<< " written: " << shard_stats.num_written_elts;
});
}
} // The threads join here.
LOG(INFO) << LogPrefix(env) << "Done distilling to output shard "
<< env.my_shard_index;
}
int Distill(const Environment &env, const DistillOptions &opts) {
RPROF_THIS_FUNCTION_WITH_TIMELAPSE( //
/*enable=*/ABSL_VLOG_IS_ON(1), //
/*timelapse_interval=*/absl::Seconds(ABSL_VLOG_IS_ON(2) ? 10 : 60), //
/*also_log_timelapses=*/ABSL_VLOG_IS_ON(10));
// Prepare the per-thread envs.
std::vector<Environment> envs_per_thread(env.num_threads, env);
for (size_t thread_idx = 0; thread_idx < env.num_threads; ++thread_idx) {
envs_per_thread[thread_idx].my_shard_index += thread_idx;
}
// Prepare the per-thread input shard indices. This assigns a randomized and
// shuffled subset of the input shards to each output shard writer. The subset
// sizes are roughly equal between the writers.
std::vector<std::vector<size_t>> shard_indices_per_thread(env.num_threads);
std::vector<size_t> all_shard_indices(env.total_shards);
std::iota(all_shard_indices.begin(), all_shard_indices.end(), 0);
Rng rng{GetRandomSeed(env.seed)};
std::shuffle(all_shard_indices.begin(), all_shard_indices.end(), rng);
size_t thread_idx = 0;
for (size_t shard_idx : all_shard_indices) {
shard_indices_per_thread[thread_idx].push_back(shard_idx);
thread_idx = (thread_idx + 1) % env.num_threads;
}
// Run the distillation threads in parallel.
{
// A global input filter shared by all output shard writers. The output
// shards will collectively contain a deduplicated set of byte- and
// feature-unique inputs.
DistillingInputFilter input_filter{
opts.feature_frequency_threshold,
env.MakeDomainDiscardMask(),
};
// A periodic logger of the global distillation progress. Runs on a separate
// thread.
PeriodicAction progress_logger{
[&input_filter]() {
const auto stats = input_filter.GetStats();
LOG(INFO) << LogPrefix() << stats.coverage_str
<< " inputs: " << stats.num_total_elts
<< " unique: " << stats.num_byte_unique_elts
<< " distilled: " << stats.num_feature_unique_elts;
},
// Seeing 0's at the beginning is not interesting, unless debugging.
// Likewise, increase the frequency --v >= 1 to aid debugging.
PeriodicAction::ConstDelayConstInterval(
absl::Seconds(ABSL_VLOG_IS_ON(1) ? 0 : 60),
absl::Seconds(ABSL_VLOG_IS_ON(1) ? 10 : 60)),
};
// The RAM pool shared between all the `DistillToOneOutputShard()` threads.
ResourcePool ram_pool{kRamQuota};
const size_t num_threads = std::min(env.num_threads, kMaxWritingThreads);
ThreadPool threads{static_cast<int>(num_threads)};
for (size_t thread_idx = 0; thread_idx < env.num_threads; ++thread_idx) {
threads.Schedule(
[&thread_env = envs_per_thread[thread_idx],
&thread_shard_indices = shard_indices_per_thread[thread_idx],
&input_filter, &progress_logger, &ram_pool]() {
DistillToOneOutputShard( //
thread_env, thread_shard_indices, input_filter, ram_pool,
kMaxReadingThreads);
// In addition to periodic progress reports, also log the progress
// after writing each output shard.
progress_logger.Nudge();
});
}
} // The threads join here.
return EXIT_SUCCESS;
}
void DistillForTests(const Environment &env,
const std::vector<size_t> &shard_indices) {
DistillingInputFilter input_filter{
/*feature_frequency_threshold=*/1,
env.MakeDomainDiscardMask(),
};
// Do not limit the max RAM.
ResourcePool ram_pool{RUsageMemory::Max()};
// Read the input shards sequentially and in order to ensure deterministic
// outputs.
DistillToOneOutputShard( //
env, shard_indices, input_filter, ram_pool, /*parallelism=*/1);
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,51 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_DISTILL_H_
#define THIRD_PARTY_CENTIPEDE_DISTILL_H_
#include <cstddef>
#include <cstdint>
#include <vector>
#include "./centipede/environment.h"
namespace fuzztest::internal {
// Options for `Distill()`.
struct DistillOptions {
// From each feature-equivalent set of inputs, select up to this many winners.
uint8_t feature_frequency_threshold = 1;
};
// Reads `env.total_shards` input shards from `WorkDir{env}.CorpusFiles()` and
// `WorkDir{env}.FeaturesFiles()`, distills them, and writes out the winning
// inputs to `env.num_threads` output shards.
//
// All reads and writes are parallelized for higher throughput. A side effect of
// that is that the results are generally non-deterministic (for a given
// feature-equivalent set of inputs, any one can win and make it to the output).
//
// Returns EXIT_SUCCESS.
int Distill(const Environment &env, const DistillOptions &opts = {});
// Same as `Distill()`, but runs distillation without I/O parallelization and
// reads shards in the order specified by `shard_indices` for deterministic
// results.
void DistillForTests(const Environment &env,
const std::vector<size_t> &shard_indices);
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_DISTILL_H_

View File

@ -0,0 +1,193 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/distill.h"
#include <cstddef>
#include <cstdint>
#include <filesystem> // NOLINT
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/flags/reflection.h"
#include "absl/log/check.h"
#include "./centipede/corpus_io.h"
#include "./centipede/environment.h"
#include "./centipede/feature.h"
#include "./centipede/util.h"
#include "./centipede/workdir.h"
#include "./common/blob_file.h"
#include "./common/defs.h"
#include "./common/test_util.h"
namespace fuzztest::internal {
namespace {
using testing::UnorderedElementsAreArray;
struct TestCorpusRecord {
ByteArray input;
FeatureVec feature_vec;
};
// Custom matcher for TestCorpusRecord. Compares `expected_input` with
// actual TestCorpusRecord::input and compares `expected_features` with
// actual TestCorpusRecord::feature_vec.
MATCHER_P2(EqualsTestCorpusRecord, expected_input, expected_features, "") {
return testing::ExplainMatchResult(
testing::Field(&TestCorpusRecord::input, expected_input), arg,
result_listener) &&
testing::ExplainMatchResult(
testing::Field(&TestCorpusRecord::feature_vec,
testing::ElementsAreArray(expected_features)),
arg, result_listener);
}
using Shard = std::vector<TestCorpusRecord>;
using ShardVec = std::vector<Shard>;
using InputVec = std::vector<ByteArray>;
// Writes `record` to shard `shard_index`.
void WriteToShard(const Environment &env, const TestCorpusRecord &record,
size_t shard_index) {
const WorkDir wd{env};
const auto corpus_path = wd.CorpusFilePaths().Shard(shard_index);
const auto features_path = wd.FeaturesFilePaths().Shard(shard_index);
const auto corpus_appender = DefaultBlobFileWriterFactory(env.riegeli);
const auto features_appender = DefaultBlobFileWriterFactory(env.riegeli);
CHECK_OK(corpus_appender->Open(corpus_path, "a"));
CHECK_OK(features_appender->Open(features_path, "a"));
CHECK_OK(corpus_appender->Write(record.input));
CHECK_OK(features_appender->Write(
PackFeaturesAndHash(record.input, record.feature_vec)));
}
// Reads and returns the distilled corpus record from
// `wd.DistilledCorpusPath()` and `wd.DistilledFeaturesPath()`.
std::vector<TestCorpusRecord> ReadFromDistilled(const WorkDir &wd) {
const auto distilled_corpus_path = wd.DistilledCorpusFilePaths().MyShard();
const auto distilled_features_path =
wd.DistilledFeaturesFilePaths().MyShard();
std::vector<TestCorpusRecord> result;
auto shard_reader_callback = [&result](ByteArray input, FeatureVec features) {
result.push_back({std::move(input), std::move(features)});
};
ReadShard(distilled_corpus_path, distilled_features_path,
shard_reader_callback);
return result;
}
// Distills `shards` in the order specified by `shard_indices`,
// returns the distilled corpus as a vector of inputs.
std::vector<TestCorpusRecord> TestDistill(
const ShardVec &shards, const std::vector<size_t> &shard_indices,
std::string_view test_name, uint64_t user_feature_domain_mask) {
// Set up the environment.
// We need to set at least --binary_hash before `env` is constructed,
// so we do this by overriding the flags.
absl::FlagSaver flag_saver;
std::string dir = GetTestTempDir(test_name);
std::filesystem::remove_all(dir);
std::filesystem::create_directories(dir);
Environment env;
env.workdir = dir;
env.binary = "binary_that_is_not_here";
env.binary_hash = "01234567890";
env.total_shards = shards.size();
env.my_shard_index = 1; // an arbitrary shard index.
env.user_feature_domain_mask = user_feature_domain_mask;
const WorkDir wd{env};
std::filesystem::create_directories(wd.CoverageDirPath());
// Write the shards.
for (size_t shard_index = 0; shard_index < shards.size(); ++shard_index) {
for (const auto &record : shards[shard_index]) {
WriteToShard(env, record, shard_index);
}
}
// Distill.
DistillForTests(env, shard_indices);
// Read the result back.
return ReadFromDistilled(wd);
}
TEST(Distill, BasicDistill) {
ByteArray in0 = {0};
ByteArray in1 = {1};
ByteArray in2 = {2};
ByteArray in3 = {3};
feature_t usr0 = feature_domains::kUserDomains[0].ConvertToMe(100);
feature_t usr1 = feature_domains::kUserDomains[1].ConvertToMe(101);
ShardVec shards = {
// shard 0; note: distillation iterates the shards backwards.
{
{in3, {10}},
{in0, {10, 20}},
},
// shard 1
{
{in1, {20, 30, usr0}},
},
// shard 2
{
{in2, {30, 40, usr1}},
},
};
// Distill these 3 shards in different orders, observe different results.
EXPECT_THAT(TestDistill(shards, {0, 1, 2}, test_info_->name(), 0),
UnorderedElementsAreArray({
EqualsTestCorpusRecord(in0, FeatureVec{10, 20}),
EqualsTestCorpusRecord(in1, FeatureVec{20, 30}),
EqualsTestCorpusRecord(in2, FeatureVec{30, 40}),
}));
EXPECT_THAT(TestDistill(shards, {2, 0, 1}, test_info_->name(), 0),
UnorderedElementsAreArray({
EqualsTestCorpusRecord(in2, FeatureVec{30, 40}),
EqualsTestCorpusRecord(in0, FeatureVec{10, 20}),
}));
EXPECT_THAT(TestDistill(shards, {2, 0, 1}, test_info_->name(), 0x1),
UnorderedElementsAreArray({
EqualsTestCorpusRecord(in2, FeatureVec{30, 40}),
EqualsTestCorpusRecord(in0, FeatureVec{10, 20}),
EqualsTestCorpusRecord(in1, FeatureVec{20, 30, usr0}),
}));
EXPECT_THAT(TestDistill(shards, {2, 0, 1}, test_info_->name(), 0x2),
UnorderedElementsAreArray({
EqualsTestCorpusRecord(in2, FeatureVec{30, 40, usr1}),
EqualsTestCorpusRecord(in0, FeatureVec{10, 20}),
}));
EXPECT_THAT(TestDistill(shards, {2, 0, 1}, test_info_->name(), 0x3),
UnorderedElementsAreArray({
EqualsTestCorpusRecord(in2, FeatureVec{30, 40, usr1}),
EqualsTestCorpusRecord(in0, FeatureVec{10, 20}),
EqualsTestCorpusRecord(in1, FeatureVec{20, 30, usr0}),
}));
EXPECT_THAT(TestDistill(shards, {1, 0, 2}, test_info_->name(), 0),
UnorderedElementsAreArray({
EqualsTestCorpusRecord(in1, FeatureVec{20, 30}),
EqualsTestCorpusRecord(in0, FeatureVec{10, 20}),
EqualsTestCorpusRecord(in2, FeatureVec{30, 40}),
}));
}
// TODO(kcc): add more tests once we settle on the testing code above.
} // namespace
} // namespace fuzztest::internal

View File

@ -0,0 +1,351 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/environment.h"
#include <algorithm>
#include <bitset>
#include <charconv>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <string>
#include <system_error> // NOLINT
#include <vector>
#include "absl/base/no_destructor.h"
#include "absl/container/flat_hash_map.h"
#include "absl/flags/marshalling.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_split.h"
#include "absl/strings/string_view.h"
#include "absl/time/time.h"
#include "./centipede/feature.h"
#include "./centipede/knobs.h"
#include "./centipede/util.h"
#include "./common/defs.h"
#include "./common/logging.h"
#include "./common/remote_file.h"
#include "./common/status_macros.h"
#include "./fuzztest/internal/configuration.h"
namespace fuzztest::internal {
namespace {
size_t ComputeTimeoutPerBatch(size_t timeout_per_input, size_t batch_size) {
CHECK_GT(batch_size, 0);
// NOTE: If `timeout_per_input` == 0, leave `timeout_per_batch` at 0 too:
// the implementation interprets both as "no limit".
if (timeout_per_input == 0) return 0;
// TODO(ussuri): The formula here is an unscientific heuristic conjured
// up for CPU instruction fuzzing. `timeout_per_input` is interpreted as
// the long tail of the input runtime distribution of yet-unknown nature.
// It might be the exponential, log-normal distribution or similar, and
// the distribution of the total time per batch could be modeled by the
// gamma distribution. Work out the math later. Right now, this naive
// formula gives ~18 min per batch with the input flags' defaults (this
// has worked in test runs so far).
constexpr double kScale = 12;
const double estimated_mean_time_per_input =
std::max(timeout_per_input / kScale, 1.0);
return std::ceil(std::log(estimated_mean_time_per_input + 1.0) * batch_size);
}
} // namespace
const Environment &Environment::Default() {
static absl::NoDestructor<Environment> default_env;
return *default_env;
}
bool Environment::DumpCorpusTelemetryInThisShard() const {
// Corpus stats are global across all shards on all machines.
return my_shard_index == 0;
}
bool Environment::DumpRUsageTelemetryInThisShard() const {
// Unlike the corpus stats, we want to measure/dump rusage stats for each
// Centipede process running on a separate machine: assign that to the first
// shard (i.e. thread) on the machine.
return my_shard_index % num_threads == 0;
}
bool Environment::DumpTelemetryForThisBatch(size_t batch_index) const {
// Always dump for batch 0 (i.e. at the beginning of execution).
if (telemetry_frequency != 0 && batch_index == 0) {
return true;
}
// Special mode for negative --telemetry_frequency: dump when batch_index
// is a power-of-two and is >= than 2^abs(--telemetry_frequency).
if (telemetry_frequency < 0 && batch_index >= (1 << -telemetry_frequency) &&
((batch_index - 1) & batch_index) == 0) {
return true;
}
// Normal mode: dump when requested number of batches get processed.
if (((telemetry_frequency > 0) && (batch_index % telemetry_frequency == 0))) {
return true;
}
return false;
}
std::bitset<feature_domains::kNumDomains> Environment::MakeDomainDiscardMask()
const {
constexpr size_t kNumUserDomains = std::size(feature_domains::kUserDomains);
std::bitset<kNumUserDomains> user_feature_domain_enabled(
user_feature_domain_mask);
std::bitset<feature_domains::kNumDomains> discard;
for (size_t i = 0; i < kNumUserDomains; ++i) {
if (!user_feature_domain_enabled.test(i)) {
discard.set(feature_domains::kUserDomains[i].domain_id());
}
}
return discard;
}
// Returns true if `value` is one of "1", "true".
// Returns true if `value` is one of "0", "false".
// CHECK-fails otherwise.
static bool GetBoolFlag(std::string_view value) {
if (value == "0" || value == "false") return false;
CHECK(value == "1" || value == "true") << value;
return true;
}
// Returns `value` as a size_t, CHECK-fails on parse error.
static size_t GetIntFlag(std::string_view value) {
size_t result{};
CHECK(std::from_chars(value.data(), value.data() + value.size(), result).ec ==
std::errc())
<< value;
return result;
}
void Environment::SetFlagForExperiment(std::string_view name,
std::string_view value) {
// TODO(kcc): support more flags, as needed.
// Handle bool flags.
absl::flat_hash_map<std::string, bool *> bool_flags{
{"use_cmp_features", &use_cmp_features},
{"use_auto_dictionary", &use_auto_dictionary},
{"use_dataflow_features", &use_dataflow_features},
{"use_counter_features", &use_counter_features},
{"use_pcpair_features", &use_pcpair_features},
{"use_coverage_frontier", &use_coverage_frontier},
{"use_legacy_default_mutator", &use_legacy_default_mutator},
};
auto bool_iter = bool_flags.find(name);
if (bool_iter != bool_flags.end()) {
*bool_iter->second = GetBoolFlag(value);
return;
}
// Handle int flags.
absl::flat_hash_map<std::string, size_t *> int_flags{
{"path_level", &path_level},
{"callstack_level", &callstack_level},
{"max_corpus_size", &max_corpus_size},
{"max_len", &max_len},
{"crossover_level", &crossover_level},
{"mutate_batch_size", &mutate_batch_size},
{"feature_frequency_threshold", &feature_frequency_threshold},
};
auto int_iter = int_flags.find(name);
if (int_iter != int_flags.end()) {
*int_iter->second = GetIntFlag(value);
return;
}
LOG(FATAL) << "Unknown flag for experiment: " << name << "=" << value;
}
void Environment::UpdateForExperiment() {
if (experiment.empty()) return;
// Parse the --experiments flag.
struct Experiment {
std::string flag_name;
std::vector<std::string> flag_values;
};
std::vector<Experiment> experiments;
for (auto flag : absl::StrSplit(this->experiment, ':', absl::SkipEmpty())) {
std::vector<std::string> flag_and_value = absl::StrSplit(flag, '=');
CHECK_EQ(flag_and_value.size(), 2) << flag;
experiments.emplace_back(
Experiment{flag_and_value[0], absl::StrSplit(flag_and_value[1], ',')});
}
// Count the number of flag combinations.
size_t num_combinations = 1;
for (const auto &exp : experiments) {
CHECK_NE(exp.flag_values.size(), 0) << exp.flag_name;
num_combinations *= exp.flag_values.size();
}
CHECK_GT(num_combinations, 0);
CHECK_EQ(num_threads % num_combinations, 0)
<< VV(num_threads) << VV(num_combinations);
// Update the flags for the current shard and compute experiment_name.
CHECK_LT(my_shard_index, num_threads);
size_t my_combination_num = my_shard_index % num_combinations;
experiment_name.clear();
experiment_flags.clear();
// Reverse the flags.
// This way, the flag combinations will go in natural order.
// E.g. for --experiment='foo=1,2,3:bar=10,20' the order of combinations is
// foo=1 bar=10
// foo=1 bar=20
// foo=2 bar=10 ...
// Alternative would be to iterate in reverse order with rbegin()/rend().
std::reverse(experiments.begin(), experiments.end());
for (const auto &exp : experiments) {
size_t idx = my_combination_num % exp.flag_values.size();
SetFlagForExperiment(exp.flag_name, exp.flag_values[idx]);
my_combination_num /= exp.flag_values.size();
experiment_name = std::to_string(idx) + experiment_name;
experiment_flags =
exp.flag_name + "=" + exp.flag_values[idx] + ":" + experiment_flags;
}
experiment_name = "E" + experiment_name;
load_other_shard_frequency = 0; // The experiments should be independent.
}
void Environment::ReadKnobsFileIfSpecified() {
const std::string_view knobs_file_path = knobs_file;
if (knobs_file_path.empty()) return;
ByteArray knob_bytes;
auto *f = ValueOrDie(RemoteFileOpen(knobs_file, "r"));
CHECK(f) << "Failed to open remote file " << knobs_file;
CHECK_OK(RemoteFileRead(f, knob_bytes));
CHECK_OK(RemoteFileClose(f));
VLOG(1) << "Knobs: " << knob_bytes.size() << " knobs read from "
<< knobs_file;
knobs.Set(knob_bytes);
knobs.ForEachKnob([](std::string_view name, Knobs::value_type value) {
VLOG(1) << "knob " << name << ": " << static_cast<uint32_t>(value);
});
}
void Environment::UpdateWithTargetConfig(
const fuzztest::internal::Configuration &config) {
// Allow more crashes to be reported when running with FuzzTest. This allows
// more unique crashes to collected after deduplication. But we don't want to
// make the limit too large to stress the filesystem, so this is not a perfect
// solution. Currently we just increase the default to be seemingly large
// enough.
if (max_num_crash_reports == Default().max_num_crash_reports) {
max_num_crash_reports = 20;
LOG(INFO) << "Overriding the default max_num_crash_reports to "
<< max_num_crash_reports << " for FuzzTest.";
}
if (config.jobs != 0) {
CHECK(j == Default().j || j == config.jobs)
<< "Value for --j is inconsistent with the value for jobs in the "
"target binary:"
<< VV(j) << VV(config.jobs);
j = config.jobs;
total_shards = config.jobs;
num_threads = config.jobs;
my_shard_index = 0;
}
const auto convert_to_seconds =
[&](absl::Duration duration, absl::string_view duration_name) -> size_t {
if (duration == absl::InfiniteDuration()) return 0;
// Centipede's time-related fields are in seconds, so we need at least 1s.
CHECK_GE(duration, absl::Seconds(1))
<< duration_name << " must not be less than one second";
return static_cast<size_t>(absl::ToInt64Seconds(duration));
};
// Update `timeout_per_input` and consequently `timeout_per_batch`.
const size_t time_limit_per_input_sec =
convert_to_seconds(config.time_limit_per_input, "Time limit per input");
CHECK(timeout_per_input == 0 ||
timeout_per_input == Default().timeout_per_input ||
timeout_per_input == time_limit_per_input_sec)
<< "Value for --timeout_per_input is inconsistent with the value for "
"time_limit_per_input in the target binary:"
<< VV(timeout_per_input) << VV(config.time_limit_per_input);
const size_t autocomputed_timeout_per_batch =
ComputeTimeoutPerBatch(timeout_per_input, batch_size);
timeout_per_input = time_limit_per_input_sec;
UpdateTimeoutPerBatchIfEqualTo(autocomputed_timeout_per_batch);
// Adjust `timeout_per_batch` to never exceed the test time limit.
if (const auto test_time_limit = config.GetTimeLimitPerTest();
test_time_limit < absl::InfiniteDuration()) {
const size_t test_time_limit_seconds =
convert_to_seconds(test_time_limit, "Test time limit");
timeout_per_batch =
timeout_per_batch == 0
? test_time_limit_seconds
: std::min(timeout_per_batch, test_time_limit_seconds);
}
// Convert bytes to MB by rounding up.
constexpr auto bytes_to_mb = [](size_t bytes) {
return bytes == 0 ? 0 : (bytes - 1) / 1024 / 1024 + 1;
};
CHECK(rss_limit_mb == Default().rss_limit_mb ||
rss_limit_mb == bytes_to_mb(config.rss_limit))
<< "Value for --rss_limit_mb is inconsistent with the value for "
"rss_limit in the target binary:"
<< VV(rss_limit_mb) << VV(config.rss_limit);
rss_limit_mb = bytes_to_mb(config.rss_limit);
// Convert bytes to KB by rounding up.
constexpr auto bytes_to_kb = [](size_t bytes) {
return bytes == 0 ? 0 : (bytes - 1) / 1024 + 1;
};
CHECK(stack_limit_kb == Default().stack_limit_kb ||
stack_limit_kb == bytes_to_kb(config.stack_limit))
<< "Value for --stack_limit_kb is inconsistent with the value for "
"stack_limit in the target binary:"
<< VV(stack_limit_kb) << VV(config.stack_limit);
stack_limit_kb = bytes_to_kb(config.stack_limit);
if (config.only_replay) {
load_shards_only = true;
populate_binary_info = false;
}
}
void Environment::UpdateTimeoutPerBatchIfEqualTo(size_t val) {
if (timeout_per_batch != val) return;
timeout_per_batch = ComputeTimeoutPerBatch(timeout_per_input, batch_size);
VLOG(1) << "--timeout_per_batch auto-computed: " << timeout_per_batch
<< " sec (see --help for details)";
}
void Environment::UpdateBinaryHashIfEmpty() {
if (binary_hash.empty()) {
binary_hash = HashOfFileContents(coverage_binary);
}
}
std::vector<std::string> Environment::CreateFlags() const {
std::vector<std::string> flags;
#define CENTIPEDE_FLAG(_TYPE, NAME, _DEFAULT, _DESC) \
if (NAME != Default().NAME) { \
flags.push_back(absl::StrCat("--" #NAME "=", absl::UnparseFlag(NAME))); \
}
#include "./centipede/centipede_flags.inc"
#undef CENTIPEDE_FLAG
return flags;
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,140 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_ENVIRONMENT_H_
#define THIRD_PARTY_CENTIPEDE_ENVIRONMENT_H_
#include <bitset>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <string>
#include <string_view>
#include <vector>
#include "absl/time/time.h"
#include "./centipede/feature.h"
#include "./centipede/knobs.h"
#include "./fuzztest/internal/configuration.h"
namespace fuzztest::internal {
// Fuzzing environment controlling the behavior of
// CentipedeMain(). Centipede binaries are creating Environment instances using
// the flags defined in environment_flags.cc, while other users can use
// CentipedeMain() as a library function without importing the flags.
struct Environment {
#define CENTIPEDE_FLAG(TYPE, NAME, DEFAULT, _DESC) TYPE NAME = DEFAULT;
#include "./centipede/centipede_flags.inc"
#undef CENTIPEDE_FLAG
// Command line-related fields -----------------------------------------------
std::string exec_name; // copied from argv[0]
std::vector<std::string> args; // copied from argv[1:].
std::string binary_name; // Name of `coverage_binary`, w/o directories.
bool has_input_wildcards = false; // Set to true iff `binary` contains "@@".
// Experiment-related settings -----------------------------------------------
std::string experiment_name; // Set by `UpdateForExperiment`.
std::string experiment_flags; // Set by `UpdateForExperiment`.
// Other ---------------------------------------------------------------------
Knobs knobs; // Read from a file by `ReadKnobsFileIfSpecified`, see knobs.h.
// Defines internal logging level. Set to zero to reduce logging in tests.
// TODO(ussuri): Retire in favor of VLOGs?
size_t log_level = 1;
// Path to a file with PCs. This file is created and the field is set in
// `CentipedeMain()` once per process if trace_pc instrumentation is detected.
std::string pcs_file_path;
// APIs ----------------------------------------------------------------------
// Returns an instance of the environment with default values.
static const Environment& Default();
// Should certain actions be performed ---------------------------------------
// Returns true if we want to log features as symbols in this shard.
bool LogFeaturesInThisShard() const {
return my_shard_index < log_features_shards;
}
// Returns true if we want to generate the corpus telemetry files (coverage
// report, corpus stats, etc.) in this shard.
bool DumpCorpusTelemetryInThisShard() const;
// Returns true if we want to generate the resource usage report in this
// shard. See the related RUsageTelemetryScope().
bool DumpRUsageTelemetryInThisShard() const;
// Returns true if we want to generate the telemetry files (coverage report,
// the corpus stats, etc.) after processing `batch_index`-th batch.
bool DumpTelemetryForThisBatch(size_t batch_index) const;
// Returns a bitmask indicating which domains Centipede should discard.
std::bitset<feature_domains::kNumDomains> MakeDomainDiscardMask() const;
// Experiment-related functions ----------------------------------------------
// Updates `this` according to the `--experiment` flag.
// The `--experiment` flag, if not empty, has this form:
// foo=1,2,3:bar=10,20
// where foo and bar are some of the flag names supported for experimentation,
// see `SetFlag()`.
// `--experiment` defines the flag values to be set differently in different
// shards. E.g. in this case,
// shard 0 will have {foo=1,bar=10},
// shard 1 will have {foo=1,bar=20},
// ...
// shard 3 will have {foo=2,bar=10},
// ...
// shard 5 will have {foo=2,bar=30},
// and so on.
//
// CHECK-fails if the `--experiment` flag is not well-formed,
// or if num_threads is not a multiple of the number of flag combinations
// (which is 6 in this example).
//
// Sets load_other_shard_frequency=0 (experiments should be independent).
//
// Sets this->experiment_name to a string like "E01",
// which means "value #0 is used for foo and value #1 is used for bar".
void UpdateForExperiment();
// Sets flag 'name' to `value` for an experiment. CHECK-fails on
// invalid name/value combination. Used in `UpdateForExperiment()`.
void SetFlagForExperiment(std::string_view name, std::string_view value);
// Other ---------------------------------------------------------------------
// Reads `knobs` from `knobs_file`. Does nothing if the `knobs_file` is empty.
void ReadKnobsFileIfSpecified();
// Updates `this` with `config` obtained from the target binary. CHECK-fails
// if the fields are non-default and inconsistent with the corresponding
// values in `config`.
void UpdateWithTargetConfig(const fuzztest::internal::Configuration& config);
// If `timeout_per_batch` is `val`, computes it as a function of
// `timeout_per_input` and `batch_size` and updates it. Otherwise, leaves it
// unchanged.
void UpdateTimeoutPerBatchIfEqualTo(size_t val);
// If `binary_hash` is empty, updates it using the file in `coverage_binary`.
void UpdateBinaryHashIfEmpty();
std::vector<std::string> CreateFlags() const;
};
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_ENVIRONMENT_H_

View File

@ -0,0 +1,139 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/environment_flags.h"
#include <cstdlib>
#include <filesystem> // NOLINT
#include <string>
#include <vector>
#include "absl/flags/flag.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/strings/match.h"
#include "absl/strings/str_split.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
#include "./centipede/environment.h"
#include "./common/logging.h"
using ::fuzztest::internal::Environment;
#define CENTIPEDE_FLAG(TYPE, NAME, DEFAULT, DESC) \
ABSL_FLAG(TYPE, NAME, DEFAULT, DESC);
#include "./centipede/centipede_flags.inc"
#undef CENTIPEDE_FLAG
#define CENTIPEDE_FLAG_ALIAS(ALIAS_NAME, ORIGINAL_NAME) \
ABSL_FLAG(decltype(Environment::Default().ORIGINAL_NAME), ALIAS_NAME, \
Environment::Default().ORIGINAL_NAME, \
"Alias of --" #ORIGINAL_NAME) \
.OnUpdate([]() { \
absl::SetFlag(&FLAGS_##ORIGINAL_NAME, \
absl::GetFlag(FLAGS_##ALIAS_NAME)); \
});
CENTIPEDE_FLAG_ALIAS(first_shard_index, my_shard_index)
CENTIPEDE_FLAG_ALIAS(timeout, timeout_per_input)
CENTIPEDE_FLAG_ALIAS(num_crash_reports, max_num_crash_reports)
CENTIPEDE_FLAG_ALIAS(minimize_crash, minimize_crash_file_path)
#undef CENTIPEDE_FLAG_ALIAS
ABSL_FLAG(absl::Duration, stop_after, absl::InfiniteDuration(),
"Equivalent to setting --stop_at to the current date/time + this "
"duration. These two flags are mutually exclusive.");
ABSL_RETIRED_FLAG(size_t, distill_shards, 0,
"No longer supported: use --distill instead.");
namespace fuzztest::internal {
namespace {
// Computes the final stop-at time based on the possibly user-provided inputs.
absl::Time GetStopAtTime(absl::Time stop_at, absl::Duration stop_after) {
const bool stop_at_is_non_default = stop_at != absl::InfiniteFuture();
const bool stop_after_is_non_default = stop_after != absl::InfiniteDuration();
CHECK_LE(stop_at_is_non_default + stop_after_is_non_default, 1)
<< "At most one of --stop_at and --stop_after should be specified, "
"including via --config file: "
<< VV(stop_at) << VV(stop_after);
if (stop_at_is_non_default) {
return stop_at;
} else if (stop_after_is_non_default) {
return absl::Now() + stop_after;
} else {
return absl::InfiniteFuture();
}
}
} // namespace
Environment CreateEnvironmentFromFlags(const std::vector<std::string> &argv) {
Environment env_from_flags = {
#define CENTIPEDE_FLAG(_TYPE, NAME, _DEFAULT, _DESC) \
absl::GetFlag(FLAGS_##NAME),
#include "./centipede/centipede_flags.inc"
#undef CENTIPEDE_FLAG
};
env_from_flags.stop_at =
GetStopAtTime(env_from_flags.stop_at, absl::GetFlag(FLAGS_stop_after));
if (env_from_flags.coverage_binary.empty()) {
env_from_flags.coverage_binary =
*absl::StrSplit(env_from_flags.binary, ' ').begin();
}
env_from_flags.binary_name =
std::filesystem::path(env_from_flags.coverage_binary).filename().string();
env_from_flags.UpdateBinaryHashIfEmpty();
env_from_flags.UpdateTimeoutPerBatchIfEqualTo(
Environment::Default().timeout_per_batch);
if (size_t j = absl::GetFlag(FLAGS_j)) {
env_from_flags.total_shards = j;
env_from_flags.num_threads = j;
env_from_flags.my_shard_index = 0;
}
CHECK_GE(env_from_flags.total_shards, 1);
CHECK_GE(env_from_flags.batch_size, 1);
CHECK_GE(env_from_flags.num_threads, 1);
CHECK_LE(env_from_flags.num_threads, env_from_flags.total_shards);
CHECK_LE(env_from_flags.my_shard_index + env_from_flags.num_threads,
env_from_flags.total_shards)
<< VV(env_from_flags.my_shard_index) << VV(env_from_flags.num_threads);
if (!argv.empty()) {
env_from_flags.exec_name = argv[0];
for (size_t i = 1; i < argv.size(); ++i) {
env_from_flags.args.emplace_back(argv[i]);
}
}
if (!env_from_flags.clang_coverage_binary.empty())
env_from_flags.extra_binaries.push_back(
env_from_flags.clang_coverage_binary);
if (absl::StrContains(env_from_flags.binary, "@@")) {
LOG(INFO) << "@@ detected; running in standalone mode with batch_size=1";
env_from_flags.has_input_wildcards = true;
env_from_flags.batch_size = 1;
// TODO(kcc): do we need to check if extra_binaries have @@?
}
env_from_flags.ReadKnobsFileIfSpecified();
return env_from_flags;
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,32 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_ENVIRONMENT_FLAGS_H_
#define THIRD_PARTY_CENTIPEDE_ENVIRONMENT_FLAGS_H_
#include <string>
#include <vector>
#include "./centipede/environment.h"
namespace fuzztest::internal {
// Create an Environment object from command line flags defined in
// environment_flags.cc.
Environment CreateEnvironmentFromFlags(
const std::vector<std::string> &argv = {});
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_ENVIRONMENT_FLAGS_H_

View File

@ -0,0 +1,222 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/environment.h"
#include <cstddef>
#include <string_view>
#include "gtest/gtest.h"
#include "absl/log/check.h"
#include "absl/time/time.h"
#include "./fuzztest/internal/configuration.h"
namespace fuzztest::internal {
TEST(Environment, UpdateForExperiment) {
Environment env;
env.num_threads = 12;
env.experiment = "use_cmp_features=false,true:path_level=10,20,30";
auto Experiment = [&](size_t shard_index, bool val1, size_t val2,
std::string_view experiment_name,
std::string_view experiment_flags) {
env.my_shard_index = shard_index;
env.UpdateForExperiment();
EXPECT_EQ(env.load_other_shard_frequency, 0);
EXPECT_EQ(env.use_cmp_features, val1);
EXPECT_EQ(env.path_level, val2);
EXPECT_EQ(env.experiment_name, experiment_name);
EXPECT_EQ(env.experiment_flags, experiment_flags);
};
Experiment(0, false, 10, "E00", "use_cmp_features=false:path_level=10:");
Experiment(1, false, 20, "E01", "use_cmp_features=false:path_level=20:");
Experiment(2, false, 30, "E02", "use_cmp_features=false:path_level=30:");
Experiment(3, true, 10, "E10", "use_cmp_features=true:path_level=10:");
Experiment(4, true, 20, "E11", "use_cmp_features=true:path_level=20:");
Experiment(5, true, 30, "E12", "use_cmp_features=true:path_level=30:");
Experiment(6, false, 10, "E00", "use_cmp_features=false:path_level=10:");
Experiment(7, false, 20, "E01", "use_cmp_features=false:path_level=20:");
Experiment(8, false, 30, "E02", "use_cmp_features=false:path_level=30:");
Experiment(9, true, 10, "E10", "use_cmp_features=true:path_level=10:");
Experiment(10, true, 20, "E11", "use_cmp_features=true:path_level=20:");
Experiment(11, true, 30, "E12", "use_cmp_features=true:path_level=30:");
}
TEST(Environment, UpdatesNumberOfShardsAndThreadsFromTargetConfigJobs) {
Environment env;
env.total_shards = 20;
env.my_shard_index = 10;
env.num_threads = 5;
fuzztest::internal::Configuration config;
config.jobs = 10;
env.UpdateWithTargetConfig(config);
EXPECT_EQ(env.j, 10);
EXPECT_EQ(env.total_shards, 10);
EXPECT_EQ(env.my_shard_index, 0);
EXPECT_EQ(env.num_threads, 10);
}
TEST(Environment, DiesOnInconsistentJAndTargetConfigJobs) {
Environment env;
env.j = 10;
fuzztest::internal::Configuration config;
config.jobs = 20;
EXPECT_DEATH(env.UpdateWithTargetConfig(config),
"Value for --j is inconsistent with the value for jobs in the "
"target binary");
}
TEST(Environment, UpdatesTimeoutPerBatchFromTimeoutPerInputAndBatchSize) {
Environment env;
env.batch_size = 1000;
env.timeout_per_input = 100;
env.timeout_per_batch = 0;
env.UpdateTimeoutPerBatchIfEqualTo(0);
EXPECT_GT(env.timeout_per_batch, 0);
env.timeout_per_batch = 123;
env.UpdateTimeoutPerBatchIfEqualTo(0);
EXPECT_EQ(env.timeout_per_batch, 123);
}
TEST(Environment,
UpdatesTimeoutPerInputFromFiniteTargetConfigTimeLimitPerInput) {
Environment env;
env.timeout_per_input = Environment::Default().timeout_per_input;
fuzztest::internal::Configuration config;
config.time_limit_per_input = absl::Seconds(456);
env.UpdateWithTargetConfig(config);
EXPECT_EQ(env.timeout_per_input, 456);
}
TEST(Environment,
UpdatesTimeoutPerInputFromInfiniteTargetConfigTimeLimitPerInput) {
Environment env;
env.timeout_per_input = Environment::Default().timeout_per_input;
fuzztest::internal::Configuration config;
config.time_limit_per_input = absl::InfiniteDuration();
env.UpdateWithTargetConfig(config);
EXPECT_EQ(env.timeout_per_input, 0);
}
TEST(Environment,
DiesOnInconsistentTimeoutPerInputAndTargetConfigTimeLimitPerInput) {
Environment env;
env.timeout_per_input = 123;
fuzztest::internal::Configuration config;
config.time_limit_per_input = absl::Seconds(456);
EXPECT_DEATH(
env.UpdateWithTargetConfig(config),
"Value for --timeout_per_input is inconsistent with the value for "
"time_limit_per_input in the target binary");
}
TEST(Environment,
UpdatesTimeoutPerBatchFromFiniteTargetConfigTimeLimitPerInput) {
Environment env;
env.timeout_per_input = Environment::Default().timeout_per_input;
env.UpdateTimeoutPerBatchIfEqualTo(Environment::Default().timeout_per_batch);
const size_t autocomputed_timeout_per_batch = env.timeout_per_batch;
fuzztest::internal::Configuration config;
config.time_limit_per_input = absl::Seconds(456);
env.UpdateWithTargetConfig(config);
EXPECT_NE(env.timeout_per_batch, autocomputed_timeout_per_batch);
}
TEST(Environment,
UpdatesTimeoutPerBatchFromInfiniteTargetConfigTimeLimitPerInput) {
Environment env;
env.timeout_per_input = Environment::Default().timeout_per_input;
env.UpdateTimeoutPerBatchIfEqualTo(Environment::Default().timeout_per_batch);
fuzztest::internal::Configuration config;
config.time_limit_per_input = absl::InfiniteDuration();
env.UpdateWithTargetConfig(config);
EXPECT_EQ(env.timeout_per_batch, 0);
}
TEST(Environment, UpdatesTimeoutPerBatchFromTargetConfigTimeLimit) {
Environment env;
fuzztest::internal::Configuration config;
config.time_limit = absl::Seconds(123);
config.time_budget_type = fuzztest::internal::TimeBudgetType::kPerTest;
CHECK(config.GetTimeLimitPerTest() == absl::Seconds(123));
env.UpdateWithTargetConfig(config);
EXPECT_EQ(env.timeout_per_batch, 123)
<< "`timeout_per_batch` should be set to the test time limit when it was "
"previously unset";
env.timeout_per_batch = 456;
env.UpdateWithTargetConfig(config);
EXPECT_EQ(env.timeout_per_batch, 123)
<< "`timeout_per_batch` should be set to test time limit when it is "
"shorter than the previous value";
env.timeout_per_batch = 56;
env.UpdateWithTargetConfig(config);
EXPECT_EQ(env.timeout_per_batch, 56)
<< "`timeout_per_batch` should not be updated with the test time limit "
"when it is longer than the previous value";
}
TEST(Environment, UpdatesRssLimitMbFromTargetConfigRssLimit) {
Environment env;
env.rss_limit_mb = Environment::Default().rss_limit_mb;
fuzztest::internal::Configuration config;
config.rss_limit = 5UL * 1024 * 1024 * 1024;
env.UpdateWithTargetConfig(config);
EXPECT_EQ(env.rss_limit_mb, 5 * 1024);
}
TEST(Environment, DiesOnInconsistentRssLimitMbAndTargetConfigRssLimit) {
Environment env;
env.rss_limit_mb = 123;
fuzztest::internal::Configuration config;
config.rss_limit = 5UL * 1024 * 1024 * 1024;
EXPECT_DEATH(
env.UpdateWithTargetConfig(config),
"Value for --rss_limit_mb is inconsistent with the value for rss_limit "
"in the target binary");
}
TEST(Environment, UpdatesStackLimitKbFromTargetConfigStackLimit) {
Environment env;
env.stack_limit_kb = Environment::Default().stack_limit_kb;
fuzztest::internal::Configuration config;
config.stack_limit = 5UL * 1024;
env.UpdateWithTargetConfig(config);
EXPECT_EQ(env.stack_limit_kb, 5);
}
TEST(Environment, DiesOnInconsistentStackLimitKbAndTargetConfigStackLimit) {
Environment env;
env.stack_limit_kb = 123;
fuzztest::internal::Configuration config;
config.stack_limit = 5UL * 1024;
EXPECT_DEATH(env.UpdateWithTargetConfig(config),
"Value for --stack_limit_kb is inconsistent with the value for "
"stack_limit in the target binary");
}
TEST(Environment, UpdatesReplayOnlyConfiguration) {
Environment env;
fuzztest::internal::Configuration config;
config.only_replay = true;
env.UpdateWithTargetConfig(config);
EXPECT_TRUE(env.load_shards_only);
EXPECT_FALSE(env.populate_binary_info);
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,58 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/execution_metadata.h"
#include <cstddef>
#include <functional>
#include "./centipede/shared_memory_blob_sequence.h"
#include "./common/defs.h"
namespace fuzztest::internal {
bool ExecutionMetadata::AppendCmpEntry(ByteSpan a, ByteSpan b) {
if (a.size() != b.size()) return false;
// Size must fit in a byte.
if (a.size() >= 256) return false;
cmp_data.push_back(a.size());
cmp_data.insert(cmp_data.end(), a.begin(), a.end());
cmp_data.insert(cmp_data.end(), b.begin(), b.end());
return true;
}
bool ExecutionMetadata::Write(Blob::SizeAndTagT tag,
BlobSequence &outputs_blobseq) const {
return outputs_blobseq.Write({tag, cmp_data.size(), cmp_data.data()});
}
void ExecutionMetadata::Read(Blob blob) {
cmp_data.assign(blob.data, blob.data + blob.size);
}
bool ExecutionMetadata::ForEachCmpEntry(
std::function<void(ByteSpan, ByteSpan)> callback) const {
size_t i = 0;
while (i < cmp_data.size()) {
auto size = cmp_data[i];
if (i + 2 * size + 1 > cmp_data.size()) return false;
ByteSpan a(cmp_data.data() + i + 1, size);
ByteSpan b(cmp_data.data() + i + size + 1, size);
i += 1 + 2 * size;
callback(a, b);
}
return true;
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,60 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Abstraction of metadata collected from executions that does not
// contribute to coverage but can be useful in mutation.
//
// This library is for both engine and runner.
#ifndef THIRD_PARTY_CENTIPEDE_EXECUTION_METADATA_H_
#define THIRD_PARTY_CENTIPEDE_EXECUTION_METADATA_H_
#include <functional>
#include "./centipede/shared_memory_blob_sequence.h"
#include "./common/defs.h"
namespace fuzztest::internal {
struct ExecutionMetadata {
// Appends a CMP entry comparing `a` and `b` to the metadata. Returns false if
// the entry cannot be appended. Return true otherwise.
bool AppendCmpEntry(ByteSpan a, ByteSpan b);
// Enumerates through all CMP entries in the metadata by calling
// `callback` on each of them. Returns false if there are invalid
// entries. Returns true otherwise.
bool ForEachCmpEntry(std::function<void(ByteSpan, ByteSpan)> callback) const;
// Writes the contents to `outputs_blobseq` with header `tag`. Returns true
// iff successful.
bool Write(Blob::SizeAndTagT tag, BlobSequence &outputs_blobseq) const;
// Reads the contents from `blob`.
//
// Note that the method does not check the blob tag, it should be checked by
// the method users.
void Read(Blob blob);
// CMP entries are stored in one large ByteArray to minimize RAM consumption.
// One CMP arg pair is stored as
// * `size` (1-byte value)
// * `value0` (`size` bytes)
// * `value1` (`size` bytes)
ByteArray cmp_data;
};
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_EXECUTION_METADATA_H_

View File

@ -0,0 +1,122 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/execution_metadata.h"
#include <cstdint>
#include <utility>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "./centipede/shared_memory_blob_sequence.h"
#include "./common/defs.h"
namespace fuzztest::internal {
namespace {
using ::testing::IsEmpty;
using ::testing::UnorderedElementsAreArray;
TEST(ExecutionMetadata, ForEachCmpEntryEnumeratesEntriesInRawBytes) {
ExecutionMetadata metadata;
metadata.cmp_data = {
2, // size
1, 2, // a
3, 4, // b
0, // zero-sized entry
3, // size
5, 6, 7, // a
8, 9, 10, // b
};
std::vector<std::pair<ByteSpan, ByteSpan>> enumeration_result;
EXPECT_TRUE(metadata.ForEachCmpEntry(
[&](ByteSpan a, ByteSpan b) { enumeration_result.emplace_back(a, b); }));
EXPECT_THAT(
enumeration_result,
UnorderedElementsAreArray(std::vector<std::pair<ByteSpan, ByteSpan>>{
{{1, 2}, {3, 4}},
{{}, {}},
{{5, 6, 7}, {8, 9, 10}},
}));
}
TEST(ExecutionMetadata, ForEachCmpEntryHandlesEmptyCmpData) {
auto noop_callback = [](ByteSpan, ByteSpan) {};
EXPECT_TRUE(ExecutionMetadata{}.ForEachCmpEntry(noop_callback));
}
TEST(ExecutionMetadata,
ForEachCmpEntryReturnsFalseOnCmpDataWithNotEnoughBytes) {
auto noop_callback = [](ByteSpan, ByteSpan) {};
auto bad_metadata_1 = ExecutionMetadata{};
bad_metadata_1.cmp_data = {3, 1, 2, 3};
EXPECT_FALSE(bad_metadata_1.ForEachCmpEntry(noop_callback));
auto bad_metadata_2 = ExecutionMetadata{};
bad_metadata_2.cmp_data = {3, 1, 2, 3, 4, 5};
EXPECT_FALSE(bad_metadata_2.ForEachCmpEntry(noop_callback));
}
TEST(ExecutionMetadata, ForEachCmpEntryEnumeratesEntriesFromAppendCmpEntry) {
ExecutionMetadata metadata;
ASSERT_TRUE(metadata.AppendCmpEntry({1, 2}, {3, 4}));
std::vector<std::pair<ByteSpan, ByteSpan>> enumeration_result;
EXPECT_TRUE(metadata.ForEachCmpEntry(
[&](ByteSpan a, ByteSpan b) { enumeration_result.emplace_back(a, b); }));
EXPECT_THAT(
enumeration_result,
UnorderedElementsAreArray(std::vector<std::pair<ByteSpan, ByteSpan>>{
{{1, 2}, {3, 4}},
}));
}
TEST(ExecutionMetadata, AppendCmpEntryReturnsFalseAndSkipsOnBadArgs) {
ExecutionMetadata metadata;
// Sizes don't match.
EXPECT_FALSE(metadata.AppendCmpEntry({}, {1}));
ByteArray long_byte_array;
long_byte_array.resize(256);
// Args too long.
EXPECT_FALSE(metadata.AppendCmpEntry(long_byte_array, long_byte_array));
// Should leave no entries and keep metadata well-formed.
std::vector<std::pair<ByteSpan, ByteSpan>> enumeration_result;
EXPECT_TRUE(metadata.ForEachCmpEntry(
[&](ByteSpan a, ByteSpan b) { enumeration_result.emplace_back(a, b); }));
EXPECT_THAT(enumeration_result, IsEmpty());
}
TEST(ExecutionMetadata, ReadAndWriteKeepsCmpEntries) {
ExecutionMetadata metadata_in;
ASSERT_TRUE(metadata_in.AppendCmpEntry({1, 2}, {3, 4}));
std::vector<uint8_t> blob_storage;
blob_storage.resize(1024);
BlobSequence blobseq(blob_storage.data(), blob_storage.size());
EXPECT_TRUE(metadata_in.Write(/*tag=*/1, blobseq));
blobseq.Reset();
Blob blob = blobseq.Read();
ExecutionMetadata metadata_out;
metadata_out.Read(blob);
std::vector<std::pair<ByteSpan, ByteSpan>> enumeration_result;
EXPECT_TRUE(metadata_out.ForEachCmpEntry(
[&](ByteSpan a, ByteSpan b) { enumeration_result.emplace_back(a, b); }));
EXPECT_THAT(
enumeration_result,
UnorderedElementsAreArray(std::vector<std::pair<ByteSpan, ByteSpan>>{
{{1, 2}, {3, 4}},
}));
}
} // namespace
} // namespace fuzztest::internal

View File

@ -0,0 +1,15 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// TODO(kcc): remove this file if nothing else gets added here.

View File

@ -0,0 +1,287 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This library defines the concepts "fuzzing feature" and "feature domain".
// It is used by Centipede, and it can be used by fuzz runners to
// define their features in a way most friendly to Centipede.
// Fuzz runners do not have to use this file nor to obey the rules defined here.
// But using this file and following its rules is the simplest way if you want
// Centipede to understand the details about the features generated by the
// runner.
//
// This library must not depend on anything other than libc so that fuzz targets
// using it doesn't gain redundant coverage. For the same reason this library
// uses raw __builtin_trap instead of CHECKs.
// We make an exception for <algorithm> for std::sort/std::unique,
// since <algorithm> is very lightweight.
// This library is also header-only, with all functions defined as inline.
#ifndef THIRD_PARTY_CENTIPEDE_FEATURE_H_
#define THIRD_PARTY_CENTIPEDE_FEATURE_H_
// WARNING!!!: Be very careful with what STL headers or other dependencies you
// add here. This header needs to remain mostly bare-bones so that we can
// include it into runner.
// <vector> is an exception, because it's too clumsy w/o it, and it introduces
// minimal code footprint.
#include <array>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <vector>
namespace fuzztest::internal {
// Feature is an integer that identifies some unique behaviour
// of the fuzz target exercised by a given input.
// We say, this input has this feature with regard to this fuzz target.
// One example of a feature: a certain control flow edge being executed.
using feature_t = uint64_t;
// A vector of features. It is not expected to be ordered.
// It typically does not contain repetitions, but it's ok to have them.
using FeatureVec = std::vector<feature_t>;
namespace feature_domains {
// Feature domain is a subset of 64-bit integers dedicated to a certain
// kind of fuzzing features.
// All domains are of the same size (kDomainSize), This way, we can compute
// a domain for a given feature by dividing by kDomainSize.
class Domain {
public:
// kDomainSize is a large enough value to hold all PCs of our largest target.
// It is also large enough to avoid too many collisions in other domains.
// At the same time, it is small enough that all domains combined require
// not too many bits (e.g. 32 bits is a good practical limit).
// TODO(kcc): consider making feature_t a 32-bit type if we expect to not
// use more than 32 bits.
// NOTE: this value may change in future.
static constexpr size_t kDomainSize = 1ULL << 27;
constexpr Domain(size_t domain_id) : domain_id_(domain_id) {}
constexpr feature_t begin() const { return kDomainSize * domain_id_; }
constexpr feature_t end() const { return begin() + kDomainSize; }
bool Contains(feature_t feature) const {
return feature >= begin() && feature < end();
}
constexpr size_t domain_id() const { return domain_id_; }
// Converts any `number` into a feature in this domain.
feature_t ConvertToMe(size_t number) const {
return begin() + number % kDomainSize;
}
// Returns the DomainId of the domain that the feature belongs to.
static size_t FeatureToDomainId(feature_t feature) {
return feature / kDomainSize;
}
// Returns the index into the domain of a feature.
static size_t FeatureToIndexInDomain(feature_t feature) {
return feature % kDomainSize;
}
private:
const size_t domain_id_;
};
// Notes on Designing Features and Domains
//
// Abstractly, a "feature" signals that there was something interesting about
// the input that Centipede should keep investigating. After seeing a particular
// feature occur often enough, Centipede will become less interested.
//
// Generally, different types of features should be put in different domains.
// This is useful for two reasons. First, Centipede can display the feature
// count for each domain separately. Second, Centipede calculates features
// weights relative to the size of the domain. If two different types of
// features are squeezed into the same domain, an overabundance of one type of
// feature can cause the other type of feature to be undervalued.
//
// The number of features can fit inside a particular domain is finite (see
// kDomainSize). A feature outside that range will be mapped inside that range.
// If the space of all possible features is larger than kDomainSize, it is
// recommended that the feature value is hashed as it is calculated. Feature
// spaces typically have some sort of internal structure and mapping a
// structured feature space into kDomainSize via a modulus can create
// predictable aliasing. Hashing the feature value reduces the worst case effect
// of the feature aliasing. If hashing, it is also recommended that the domain
// is defined in such a way so that the number of features actually discovered
// in that domain stays below a fraction of kDomainSize, even if the number of
// possible features is huge. The more feature aliasing that occurs in practice,
// the less effective the domain.
// Catch-all domain for unknown features.
inline constexpr Domain kUnknown = {__COUNTER__};
static_assert(kUnknown.domain_id() == 0); // No one used __COUNTER__ before.
// Represents PCs, i.e. control flow edges.
// Use ConvertPCFeatureToPcIndex() to convert back to a PC index.
inline constexpr Domain kPCs = {__COUNTER__};
static_assert(kPCs.domain_id() != kUnknown.domain_id()); // just in case.
// Features derived from edge counters. See Convert8bitCounterToNumber().
inline constexpr Domain k8bitCounters = {__COUNTER__};
// Features derived from data flow edges.
// A typical data flow edge is a pair of PCs: {store-PC, load-PC}.
// Another variant of a data flow edge is a pair of {global-address, load-PC}.
inline constexpr Domain kDataFlow = {__COUNTER__};
// Features derived from instrumenting CMP instructions. TODO(kcc): remove.
inline constexpr Domain kCMP = {__COUNTER__};
// Features in the following domains are created for comparison instructions
// 'a CMP b'. One component of the feature is the context, i.e. where the
// comparison happened. Another component depends on {a,b}.
//
// a == b.
// The other domains (kCMPModDiff, kCMPHamming, kCMPDiffLog) are for a != b.
inline constexpr Domain kCMPEq = {__COUNTER__};
// (a - b) if |a-b| < 32, see ABToCmpModDiff.
inline constexpr Domain kCMPModDiff = {__COUNTER__};
// hamming_distance(a, b), ABToCmpHamming.
inline constexpr Domain kCMPHamming = {__COUNTER__};
// log2(a > b ? a - b : b - a), see ABToCmpDiffLog.
inline constexpr Domain kCMPDiffLog = {__COUNTER__};
// A list of all the CMP domains.
inline constexpr std::array<Domain, 5> kCMPDomains = {{
kCMP,
kCMPEq,
kCMPModDiff,
kCMPHamming,
kCMPDiffLog,
}};
// Features derived from observing function call stacks.
inline constexpr Domain kCallStack = {__COUNTER__};
// Features derived from computing (bounded) control flow paths.
inline constexpr Domain kBoundedPath = {__COUNTER__};
// Features derived from (unordered) pairs of PCs.
inline constexpr Domain kPCPair = {__COUNTER__};
// Features defined by a user via
// __attribute__((section("__centipede_extra_features"))).
// There is no hard guarantee how many user domains are available, feel free to
// add or remove domains as needed.
inline constexpr std::array<Domain, 16> kUserDomains = {{
{__COUNTER__},
{__COUNTER__},
{__COUNTER__},
{__COUNTER__},
{__COUNTER__},
{__COUNTER__},
{__COUNTER__},
{__COUNTER__},
{__COUNTER__},
{__COUNTER__},
{__COUNTER__},
{__COUNTER__},
{__COUNTER__},
{__COUNTER__},
{__COUNTER__},
{__COUNTER__},
}};
// A fake domain, not actually used, must be last.
inline constexpr Domain kLastDomain = {__COUNTER__};
// For now, check that all domains (except maybe for kLastDomain) fit
// into 32 bits.
static_assert(kLastDomain.begin() <= (1ULL << 32));
inline constexpr size_t kNumDomains = kLastDomain.domain_id();
// Special feature used to indicate an absence of features. Typically used where
// a feature array must not be empty, but doesn't have any other features.
inline constexpr feature_t kNoFeature = kUnknown.begin();
} // namespace feature_domains
// Converts an 8-bit coverage counter, i.e. a pair of {`pc_index`,
// `counter_value` must not be zero.
//
// We convert the 8-bit counter value to a number from 0 to 7
// by computing its binary log, i.e. 1=>0, 2=>1, 4=>2, 8=>3, ..., 128=>7.
// This is a heuristic, similar to that of AFL or libFuzzer
// that tries to encourage inputs with different number of repetitions
// of the same PC.
inline size_t Convert8bitCounterToNumber(size_t pc_index,
uint8_t counter_value) {
if (counter_value == 0) __builtin_trap(); // Wrong input.
// Compute a log2 of counter_value, i.e. a value between 0 and 7.
// __builtin_clz consumes a 32-bit integer.
uint32_t counter_log2 =
sizeof(uint32_t) * 8 - 1 - __builtin_clz(counter_value);
return pc_index * 8 + counter_log2;
}
// Given the `feature` from the PC domain, returns the feature's
// pc_index. I.e. reverse of kPC.ConvertToMe(), assuming all PCs originally
// converted to features were less than Domain::kDomainSize.
inline size_t ConvertPCFeatureToPcIndex(feature_t feature) {
auto domain = feature_domains::kPCs;
if (!domain.Contains(feature)) __builtin_trap();
return feature - domain.begin();
}
// Encodes {`pc1`, `pc2`} into a number.
// `pc1` and `pc2` are in range [0, `max_pc`)
inline size_t ConvertPcPairToNumber(uintptr_t pc1, uintptr_t pc2,
uintptr_t max_pc) {
return pc1 * max_pc + pc2;
}
// Transforms {a,b}, a!=b, into a number in [0,64) using a-b.
inline uintptr_t ABToCmpModDiff(uintptr_t a, uintptr_t b) {
uintptr_t diff = a - b;
return diff <= 32 ? diff : -diff < 32 ? 32 + -diff : 0;
}
// Transforms {a,b}, a!=b, into a number in [0,64) using hamming distance.
inline uintptr_t ABToCmpHamming(uintptr_t a, uintptr_t b) {
return __builtin_popcountll(a ^ b) - 1;
}
// Transforms {a,b}, a!=b, into a number in [0,64) using log2(a-b).
inline uintptr_t ABToCmpDiffLog(uintptr_t a, uintptr_t b) {
return __builtin_clzll(a > b ? a - b : b - a);
}
// A simple fixed-capacity array with push_back.
// Thread-compatible.
template <size_t kSize>
class FeatureArray {
public:
// Constructs an empty feature array.
FeatureArray() = default;
// pushes `feature` back if there is enough space.
void push_back(feature_t feature) {
if (num_features_ < kSize) {
features_[num_features_++] = feature;
}
}
// Makes the array empty.
void clear() { num_features_ = 0; }
// Returns the array's raw data.
feature_t *data() { return &features_[0]; }
// Returns the number of elements in the array.
size_t size() const { return num_features_; }
private:
// NOTE: No initializer needed: object state is captured by `num_features_`.
feature_t features_[kSize];
size_t num_features_ = 0;
};
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_FEATURE_H_

View File

@ -0,0 +1,145 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/feature_set.h"
#include <cstddef>
#include <cstdint>
#include <ostream>
#include <sstream>
#include <string>
#include <string_view>
#include "absl/log/check.h"
#include "absl/strings/str_cat.h"
#include "./centipede/control_flow.h"
#include "./centipede/feature.h"
#include "./common/logging.h"
namespace fuzztest::internal {
//------------------------------------------------------------------------------
// FeatureSet
//------------------------------------------------------------------------------
// This implementation is slow (needs to iterate over the entire domain),
// but there is no need for it to be fast.
PCIndexVec FeatureSet::ToCoveragePCs() const {
PCIndexVec pcs;
for (size_t idx = 0; idx < feature_domains::Domain::kDomainSize; ++idx) {
if (frequencies_[feature_domains::kPCs.ConvertToMe(idx)])
pcs.push_back(idx);
}
return pcs;
}
size_t FeatureSet::CountFeatures(feature_domains::Domain domain) const {
return features_per_domain_[domain.domain_id()];
}
bool FeatureSet::HasUnseenFeatures(const FeatureVec &features) const {
for (auto feature : features) {
if (frequencies_[feature] == 0) return true;
}
return false;
}
__attribute__((noinline)) // to see it in profile.
size_t
FeatureSet::PruneFeaturesAndCountUnseen(FeatureVec &features) const {
size_t number_of_unseen_features = 0;
size_t num_kept = 0;
for (auto feature : features) {
if (ShouldDiscardFeature(feature)) continue;
auto freq = frequencies_[feature];
if (freq == 0) ++number_of_unseen_features;
if (freq < FrequencyThreshold(feature)) features[num_kept++] = feature;
}
features.resize(num_kept);
return number_of_unseen_features;
}
void FeatureSet::PruneDiscardedDomains(FeatureVec &features) const {
size_t num_kept = 0;
for (auto feature : features) {
if (ShouldDiscardFeature(feature)) continue;
features[num_kept++] = feature;
}
features.resize(num_kept);
}
void FeatureSet::IncrementFrequencies(const FeatureVec &features) {
for (auto f : features) {
auto &freq = frequencies_[f];
if (freq == 0) {
++num_features_;
++features_per_domain_[feature_domains::Domain::FeatureToDomainId(f)];
}
if (freq < FrequencyThreshold(f)) ++freq;
}
}
__attribute__((noinline)) // to see it in profile.
uint64_t
FeatureSet::ComputeWeight(const FeatureVec &features) const {
uint64_t weight = 0;
for (auto feature : features) {
// The less frequent is the feature, the more valuable it is.
// (frequency == 1) => (weight == 256)
// (frequency == 2) => (weight == 128)
// and so on.
// The less frequent is the domain, the more valuable are its features.
auto domain_id = feature_domains::Domain::FeatureToDomainId(feature);
auto features_in_domain = features_per_domain_[domain_id];
CHECK(features_in_domain);
auto domain_weight = num_features_ / features_in_domain;
auto feature_frequency = frequencies_[feature];
CHECK_GT(feature_frequency, 0)
<< VV(feature) << VV(domain_id) << VV(features_in_domain)
<< VV(domain_weight) << VV((int)feature_frequency) << DebugString();
weight += domain_weight * (256 / feature_frequency);
}
return weight;
}
std::string FeatureSet::DebugString() const {
std::ostringstream os;
os << VV((int)frequency_threshold_);
os << VV(num_features_);
os << this;
return os.str();
}
std::ostream &operator<<(std::ostream &out, const FeatureSet &fs) {
auto LogIfNotZero = [&out](size_t value, std::string_view name) {
if (!value) return;
out << " " << name << ": " << value;
};
out << "ft: " << fs.size();
LogIfNotZero(fs.CountFeatures(feature_domains::kPCs), "cov");
LogIfNotZero(fs.CountFeatures(feature_domains::k8bitCounters), "cnt");
LogIfNotZero(fs.CountFeatures(feature_domains::kDataFlow), "df");
LogIfNotZero(fs.CountFeatures(feature_domains::kCMPDomains), "cmp");
LogIfNotZero(fs.CountFeatures(feature_domains::kCallStack), "stk");
LogIfNotZero(fs.CountFeatures(feature_domains::kBoundedPath), "path");
LogIfNotZero(fs.CountFeatures(feature_domains::kPCPair), "pair");
for (size_t i = 0; i < std::size(feature_domains::kUserDomains); ++i) {
LogIfNotZero(fs.CountFeatures(feature_domains::kUserDomains[i]),
absl::StrCat("usr", i));
}
LogIfNotZero(fs.CountFeatures(feature_domains::kUnknown), "unknown");
return out;
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,144 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_FEATURE_SET_H_
#define THIRD_PARTY_CENTIPEDE_FEATURE_SET_H_
#include <bitset>
#include <cstddef>
#include <cstdint>
#include <initializer_list>
#include <ostream>
#include <string>
#include "absl/log/log.h"
#include "./centipede/control_flow.h"
#include "./centipede/feature.h"
#include "./centipede/util.h"
namespace fuzztest::internal {
// Set of features with their frequencies.
// Features that have a frequency >= frequency_threshold
// are considered too frequent and thus less interesting for further fuzzing.
// All features must be in [0, feature_domains::kLastDomain.begin()).
class FeatureSet {
public:
using FeatureDomainSet = std::bitset<feature_domains::kNumDomains>;
explicit FeatureSet(uint8_t frequency_threshold,
FeatureDomainSet should_discard_domain)
: frequency_threshold_(frequency_threshold),
should_discard_domain_(should_discard_domain) {}
// Returns true if there are features in `features` not present in `this`.
bool HasUnseenFeatures(const FeatureVec &features) const;
// Removes all features from `features` that are too frequent or are in
// discarded domains.
// Returns the number of unpruned features in `features` that were not
// previously present in `this`.
size_t PruneFeaturesAndCountUnseen(FeatureVec &features) const;
// Prune the features that are in discarded domains.
// Effectively a subset of PruneFeaturesAndCountUnseen.
void PruneDiscardedDomains(FeatureVec &features) const;
// For every feature in `features` increment its frequency.
// If a feature wasn't seen before, it is added to `this`.
void IncrementFrequencies(const FeatureVec &features);
// How many different features are in the set.
size_t size() const { return num_features_; }
// Returns features that originate from CFG counters, converted to PCIndexVec.
PCIndexVec ToCoveragePCs() const;
// Returns the number of features in `this` from the given feature domain.
size_t CountFeatures(feature_domains::Domain domain) const;
// Returns the number of features in `this` from the given feature domains.
template <typename DomainListT>
size_t CountFeatures(const DomainListT &domains) const {
size_t count = 0;
for (auto domain : domains) {
count += features_per_domain_[domain.domain_id()];
}
return count;
}
// The same for an `initializer_list`, to enable usages like
// `CountFeatures({kPCs, kCMP})`.
size_t CountFeatures(
std::initializer_list<feature_domains::Domain> domains) const {
return CountFeatures<>(domains);
}
// Returns the frequency associated with `feature`.
size_t Frequency(feature_t feature) const { return frequencies_[feature]; }
// Computes combined weight of `features`.
// The less frequent the feature is, the bigger its weight.
// The weight of a FeatureVec is a sum of individual feature weights.
uint64_t ComputeWeight(const FeatureVec &features) const;
// Returns a debug string representing the state of *this.
std::string DebugString() const;
private:
// Computes the frequency threshold based on the domain of `feature`.
// For now, just uses 1 for kPCPair and frequency_threshold_ for all others.
// Rationale: the kPCPair features might be too numerous, we don't want to
// store more than one of each such feature in the corpus.
uint8_t FrequencyThreshold(feature_t feature) const {
if (feature_domains::kPCPair.Contains(feature)) return 1;
return frequency_threshold_;
}
// Returns 'true' if we should always filter out this specific feature ID.
// This is a configurable policy that does not depend on the frequency of the
// feature.
bool ShouldDiscardFeature(feature_t feature) const {
size_t domain_id = feature_domains::Domain::FeatureToDomainId(feature);
// TODO(b/385774476): Remove this check once the root cause is fixed.
if (domain_id >= feature_domains::kNumDomains) {
LOG(ERROR) << "Unexpected feature with id: " << feature;
return true;
}
return should_discard_domain_.test(domain_id);
}
const uint8_t frequency_threshold_;
static constexpr size_t kSize = feature_domains::kLastDomain.begin();
// Maps features to their frequencies.
// This array is huge but sparse, and depending on the enabled features
// some parts of it will never be written to or read from.
// Unused parts of MmapNoReserveArray don't actually reserve memory.
MmapNoReserveArray<kSize> frequencies_;
// Counts all unique features added to this.
size_t num_features_ = 0;
// Counts features in each domain.
size_t features_per_domain_[feature_domains::kNumDomains] = {};
FeatureDomainSet should_discard_domain_;
};
// Stream out description and count of features in feature set.
std::ostream &operator<<(std::ostream &out, const FeatureSet &fs);
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_FEATURE_SET_H_

View File

@ -0,0 +1,204 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/feature_set.h"
#include <bitset>
#include <cstddef>
#include <cstdint>
#include "gtest/gtest.h"
#include "./centipede/feature.h"
namespace fuzztest::internal {
namespace {
TEST(FeatureSet, ComputeWeight) {
FeatureSet feature_set(10, {});
auto W = [&](const FeatureVec &features) -> uint64_t {
return feature_set.ComputeWeight(features);
};
feature_set.IncrementFrequencies({1, 2, 3});
EXPECT_EQ(W({1}), W({2}));
EXPECT_EQ(W({1}), W({3}));
EXPECT_DEATH(W({4}), "");
feature_set.IncrementFrequencies({1, 2});
EXPECT_GT(W({3}), W({2}));
EXPECT_GT(W({3}), W({1}));
EXPECT_GT(W({3, 1}), W({2, 1}));
EXPECT_GT(W({3, 2}), W({2}));
feature_set.IncrementFrequencies({1});
EXPECT_GT(W({3}), W({2}));
EXPECT_GT(W({2}), W({1}));
EXPECT_GT(W({3, 2}), W({3, 1}));
}
TEST(FeatureSet, ComputeWeightWithDifferentDomains) {
FeatureSet feature_set(10, {});
// Increment the feature frequencies such that the domain #1 is the rarest and
// the domain #3 is the most frequent.
auto f1 = feature_domains::k8bitCounters.begin();
auto f2 = feature_domains::kCMP.begin();
auto f3 = feature_domains::kBoundedPath.begin();
feature_set.IncrementFrequencies(
{/* one feature from domain #1 */ f1,
/* two features from domain #2 */ f2, f2 + 1,
/* three features from domain #3 */ f3, f3 + 1, f3 + 2});
auto weight = [&](const FeatureVec &features) -> uint64_t {
return feature_set.ComputeWeight(features);
};
// Test that features from a less frequent domain have more weight.
EXPECT_GT(weight({f1}), weight({f2}));
EXPECT_GT(weight({f2}), weight({f3}));
}
TEST(FeatureSet, HasUnseenFeatures_IncrementFrequencies) {
size_t frequency_threshold = 2;
FeatureSet feature_set(frequency_threshold, {});
FeatureVec features = {10};
EXPECT_TRUE(feature_set.HasUnseenFeatures(features));
feature_set.IncrementFrequencies(features);
EXPECT_FALSE(feature_set.HasUnseenFeatures(features));
features = {10, 20};
EXPECT_TRUE(feature_set.HasUnseenFeatures(features));
feature_set.IncrementFrequencies(features);
EXPECT_FALSE(feature_set.HasUnseenFeatures(features));
features = {50};
EXPECT_TRUE(feature_set.HasUnseenFeatures(features));
feature_set.IncrementFrequencies(features);
features = {10, 20};
EXPECT_FALSE(feature_set.HasUnseenFeatures(features));
}
TEST(FeatureSet, PruneFeaturesAndCountUnseen_IncrementFrequencies) {
size_t frequency_threshold = 3;
FeatureSet feature_set(frequency_threshold, {});
FeatureVec features;
// Shorthand for PruneFeaturesAndCountUnseen.
auto PruneAndCountUnseen = [&]() -> size_t {
return feature_set.PruneFeaturesAndCountUnseen(features);
};
// Shorthand for IncrementFrequencies.
auto Increment = [&](const FeatureVec &features) {
feature_set.IncrementFrequencies(features);
};
// PruneAndCountUnseen on the empty set.
features = {10, 20};
EXPECT_EQ(PruneAndCountUnseen(), 2);
EXPECT_EQ(feature_set.size(), 0);
EXPECT_EQ(features, FeatureVec({10, 20}));
// Add {10} for the first time.
features = {10, 20};
Increment({10});
EXPECT_EQ(PruneAndCountUnseen(), 1);
EXPECT_EQ(feature_set.size(), 1);
EXPECT_EQ(features, FeatureVec({10, 20}));
// Add {10} for the second time.
features = {10, 20};
Increment({10});
EXPECT_EQ(PruneAndCountUnseen(), 1);
EXPECT_EQ(feature_set.size(), 1);
EXPECT_EQ(features, FeatureVec({10, 20}));
// Add {10} for the third time. {10} becomes "frequent", prune removes it.
features = {10, 20};
Increment({10});
EXPECT_EQ(PruneAndCountUnseen(), 1);
EXPECT_EQ(feature_set.size(), 1);
EXPECT_EQ(features, FeatureVec({20}));
// Add {30} for the first time. {10, 20} still gets pruned to {20}.
features = {10, 20};
Increment({30});
EXPECT_EQ(PruneAndCountUnseen(), 1);
EXPECT_EQ(feature_set.size(), 2);
EXPECT_EQ(features, FeatureVec({20}));
// {10, 20, 30} => {20, 30}; 1 unseen.
features = {10, 20, 30};
EXPECT_EQ(PruneAndCountUnseen(), 1);
EXPECT_EQ(feature_set.size(), 2);
EXPECT_EQ(features, FeatureVec({20, 30}));
// {10, 20, 30} => {20}; 1 unseen.
features = {10, 20, 30};
Increment({30});
Increment({30});
EXPECT_EQ(PruneAndCountUnseen(), 1);
EXPECT_EQ(feature_set.size(), 2);
EXPECT_EQ(features, FeatureVec({20}));
// {10, 20, 30} => {20}; 0 unseen.
features = {10, 20, 30};
Increment({20});
Increment({20});
EXPECT_EQ(PruneAndCountUnseen(), 0);
EXPECT_EQ(feature_set.size(), 3);
EXPECT_EQ(features, FeatureVec({20}));
// {10, 20, 30} => {}; 0 unseen.
features = {10, 20, 30};
Increment({20});
EXPECT_EQ(PruneAndCountUnseen(), 0);
EXPECT_EQ(feature_set.size(), 3);
EXPECT_EQ(features, FeatureVec({}));
}
TEST(FeatureSet, PruneDiscardedDomains) {
for (size_t i = 0; i < feature_domains::kNumDomains; ++i) {
SCOPED_TRACE(i);
// Ban one domain.
std::bitset<feature_domains::kNumDomains> discarded_domains;
discarded_domains.set(i);
FeatureSet feature_set(10, discarded_domains);
FeatureVec features;
FeatureVec expected;
for (size_t j = 0; j < feature_domains::kNumDomains; ++j) {
feature_t f = feature_domains::Domain(j).ConvertToMe(0);
// Input vector with a feature in every domain.
features.push_back(f);
if (j != i) expected.push_back(f);
}
FeatureVec f1 = features;
feature_set.PruneDiscardedDomains(f1);
EXPECT_EQ(f1.size(), features.size() - 1);
EXPECT_EQ(f1, expected);
// PruneFeaturesAndCountUnseen should, at minimum, prune the same domains as
// PruneDiscardedDomains.
FeatureVec f2 = features;
feature_set.PruneFeaturesAndCountUnseen(f2);
EXPECT_EQ(f2.size(), features.size() - 1);
EXPECT_EQ(f2, expected);
}
}
} // namespace
} // namespace fuzztest::internal

View File

@ -0,0 +1,43 @@
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/feature.h"
#include "gtest/gtest.h"
namespace fuzztest::internal {
namespace {
TEST(Feature, FeatureArray) {
FeatureArray<3> array;
EXPECT_EQ(array.size(), 0);
array.push_back(10);
EXPECT_EQ(array.size(), 1);
array.push_back(20);
EXPECT_EQ(array.size(), 2);
array.clear();
EXPECT_EQ(array.size(), 0);
array.push_back(10);
array.push_back(20);
array.push_back(30);
EXPECT_EQ(array.size(), 3);
array.push_back(40); // no space left.
EXPECT_EQ(array.size(), 3);
EXPECT_EQ(array.data()[0], 10);
EXPECT_EQ(array.data()[1], 20);
EXPECT_EQ(array.data()[2], 30);
}
} // namespace
} // namespace fuzztest::internal

View File

@ -0,0 +1,70 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_FOREACH_NONZERO_H_
#define THIRD_PARTY_CENTIPEDE_FOREACH_NONZERO_H_
// WARNING!!!: Be very careful with what STL headers or other dependencies you
// add here. This header needs to remain mostly bare-bones so that we can
// include it into runner.
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <functional>
namespace fuzztest::internal {
// Iterates over [bytes, bytes + num_bytes) and calls action(idx, bytes[idx]),
// for every non-zero bytes[idx]. Then clears those non-zero bytes.
// Optimized for the case where lots of bytes are zero.
inline void ForEachNonZeroByte(uint8_t *bytes, size_t num_bytes,
std::function<void(size_t, uint8_t)> action) {
// The main loop will read words of this size.
constexpr uintptr_t kWordSize = sizeof(uintptr_t);
const uintptr_t initial_alignment =
reinterpret_cast<uintptr_t>(bytes) % kWordSize;
size_t idx = 0;
uintptr_t alignment = initial_alignment;
// Iterate the first few until we reach alignment by word size.
for (; idx < num_bytes && alignment != 0;
idx++, alignment = (alignment + 1) % kWordSize) {
if (bytes[idx]) {
action(idx, bytes[idx]);
bytes[idx] = 0;
}
}
// Iterate one word at a time. If the word is != 0, iterate its bytes.
for (; idx + kWordSize - 1 < num_bytes; idx += kWordSize) {
uintptr_t wide_load;
__builtin_memcpy(&wide_load, bytes + idx, kWordSize); // force inline.
if (!wide_load) continue;
__builtin_memset(bytes + idx, 0, kWordSize); // // force inline.
// This loop assumes little-endianness. (Tests will break on big-endian).
for (size_t pos = 0; pos < kWordSize; pos++) {
uint8_t value = wide_load >> (pos * 8); // lowest byte is taken.
if (value) action(idx + pos, value);
}
}
// Iterate the last few.
for (; idx < num_bytes; idx++) {
if (bytes[idx]) {
action(idx, bytes[idx]);
bytes[idx] = 0;
}
}
}
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_FOREACH_NONZERO_H_

View File

@ -0,0 +1,89 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/foreach_nonzero.h"
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <functional>
#include <utility>
#include <vector>
#include "gtest/gtest.h"
namespace fuzztest::internal {
namespace {
void TrivialForEachNonZeroByte(uint8_t *bytes, size_t num_bytes,
std::function<void(size_t, uint8_t)> action) {
for (size_t i = 0; i < num_bytes; i++) {
uint8_t value = bytes[i];
if (value) {
action(i, value);
bytes[i] = 0;
}
}
}
TEST(ForEachNonZeroByte, ProcessesSubArrays) {
// Some long data with long spans of zeros and a few non-zeros.
// We will test all sub-arrays of this array.
const uint8_t test_data[] = {
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const size_t kTestDataSize = sizeof(test_data);
uint8_t test_data_copy[kTestDataSize];
auto CheckResult = [&](size_t offset, size_t size,
const std::vector<std::pair<size_t, uint8_t>> &v) {
for (size_t i = 0; i < kTestDataSize; ++i) {
if (i >= offset && i < offset + size) {
EXPECT_EQ(test_data_copy[i], 0);
} else {
EXPECT_EQ(test_data_copy[i], test_data[i]);
}
}
};
for (size_t offset = 0; offset <= kTestDataSize; offset++) {
for (size_t size = 0; offset + size <= kTestDataSize; size++) {
std::vector<std::pair<size_t, uint8_t>> v1, v2;
memcpy(test_data_copy, test_data, kTestDataSize);
TrivialForEachNonZeroByte(
test_data_copy + offset, size,
[&](size_t idx, uint8_t value) { v1.emplace_back(idx, value); });
CheckResult(offset, size, v1);
memcpy(test_data_copy, test_data, kTestDataSize);
ForEachNonZeroByte(
test_data_copy + offset, size,
[&](size_t idx, uint8_t value) { v2.emplace_back(idx, value); });
CheckResult(offset, size, v2);
EXPECT_EQ(v1, v2);
}
}
}
} // namespace
} // namespace fuzztest::internal

View File

@ -0,0 +1,151 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/fuzztest_mutator.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <memory>
#include <utility>
#include <vector>
#include "absl/random/random.h"
#include "absl/types/span.h"
#include "./centipede/byte_array_mutator.h"
#include "./centipede/execution_metadata.h"
#include "./centipede/knobs.h"
#include "./centipede/mutation_input.h"
#include "./common/defs.h"
#include "./fuzztest/domain_core.h"
#include "./fuzztest/internal/table_of_recent_compares.h"
namespace fuzztest::internal {
namespace {
using MutatorDomainBase =
decltype(fuzztest::VectorOf(fuzztest::Arbitrary<uint8_t>()));
} // namespace
struct FuzzTestMutator::MutationMetadata {
fuzztest::internal::TablesOfRecentCompares cmp_tables;
};
class FuzzTestMutator::MutatorDomain : public MutatorDomainBase {
public:
MutatorDomain()
: MutatorDomainBase(fuzztest::VectorOf(fuzztest::Arbitrary<uint8_t>())) {}
~MutatorDomain() {}
};
FuzzTestMutator::FuzzTestMutator(const Knobs &knobs, uint64_t seed)
: knobs_(knobs),
prng_(seed),
mutation_metadata_(std::make_unique<MutationMetadata>()),
domain_(std::make_unique<MutatorDomain>()) {
domain_->WithMinSize(1).WithMaxSize(max_len_);
}
FuzzTestMutator::~FuzzTestMutator() = default;
void FuzzTestMutator::CrossOverInsert(ByteArray &data, const ByteArray &other) {
// insert other[first:first+size] at data[pos]
const auto size = absl::Uniform<size_t>(
prng_, 1, std::min(max_len_ - data.size(), other.size()) + 1);
const auto first = absl::Uniform<size_t>(prng_, 0, other.size() - size + 1);
const auto pos = absl::Uniform<size_t>(prng_, 0, data.size() + 1);
data.insert(data.begin() + pos, other.begin() + first,
other.begin() + first + size);
}
void FuzzTestMutator::CrossOverOverwrite(ByteArray &data,
const ByteArray &other) {
// Overwrite data[pos:pos+size] with other[first:first+size].
// Overwrite no more than half of data.
size_t max_size = std::max(1UL, data.size() / 2);
const auto first = absl::Uniform<size_t>(prng_, 0, other.size());
max_size = std::min(max_size, other.size() - first);
const auto size = absl::Uniform<size_t>(prng_, 1, max_size + 1);
const auto pos = absl::Uniform<size_t>(prng_, 0, data.size() - size + 1);
std::copy(other.begin() + first, other.begin() + first + size,
data.begin() + pos);
}
void FuzzTestMutator::CrossOver(ByteArray &data, const ByteArray &other) {
if (data.size() >= max_len_) {
CrossOverOverwrite(data, other);
} else {
if (knobs_.GenerateBool(knob_cross_over_insert_or_overwrite, prng_())) {
CrossOverInsert(data, other);
} else {
CrossOverOverwrite(data, other);
}
}
}
std::vector<ByteArray> FuzzTestMutator::MutateMany(
const std::vector<MutationInputRef> &inputs, size_t num_mutants) {
if (inputs.empty()) abort();
// TODO(xinhaoyuan): Consider metadata in other inputs instead of always the
// first one.
SetMetadata(inputs[0].metadata != nullptr ? *inputs[0].metadata
: ExecutionMetadata());
std::vector<ByteArray> mutants;
mutants.reserve(num_mutants);
for (int i = 0; i < num_mutants; ++i) {
auto mutant = inputs[absl::Uniform<size_t>(prng_, 0, inputs.size())].data;
if (mutant.size() > max_len_) mutant.resize(max_len_);
if (knobs_.GenerateBool(knob_mutate_or_crossover, prng_())) {
// Perform crossover with some other input. It may be the same input.
const auto &other_input =
inputs[absl::Uniform<size_t>(prng_, 0, inputs.size())].data;
CrossOver(mutant, other_input);
} else {
domain_->Mutate(mutant, prng_,
{/*cmp_tables=*/&mutation_metadata_->cmp_tables},
/*only_shrink=*/false);
}
mutants.push_back(std::move(mutant));
}
return mutants;
}
void FuzzTestMutator::SetMetadata(const ExecutionMetadata &metadata) {
metadata.ForEachCmpEntry([this](ByteSpan a, ByteSpan b) {
size_t size = a.size();
if (size < kMinCmpEntrySize) return;
if (size > kMaxCmpEntrySize) return;
// Use the memcmp table to avoid subtlety of the container domain mutation
// with integer tables. E.g. it won't insert integer comparison data.
mutation_metadata_->cmp_tables.GetMutable<0>().Insert(a.data(), b.data(),
size);
});
}
bool FuzzTestMutator::set_max_len(size_t max_len) {
max_len_ = max_len;
domain_->WithMaxSize(max_len);
return true;
}
void FuzzTestMutator::AddToDictionary(
const std::vector<ByteArray> &dict_entries) {
domain_->WithDictionary(dict_entries);
}
} // namespace fuzztest::internal

View File

@ -0,0 +1,82 @@
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_FUZZTEST_MUTATOR_H_
#define THIRD_PARTY_CENTIPEDE_FUZZTEST_MUTATOR_H_
#include <cstddef>
#include <cstdint>
#include <memory>
#include <vector>
#include "./centipede/execution_metadata.h"
#include "./centipede/knobs.h"
#include "./centipede/mutation_input.h"
#include "./common/defs.h"
namespace fuzztest::internal {
// Mutator based on the FuzzTest std::vector domain. It always
// generates non-empty results, with a default limit on the mutant
// size unless changed by `set_max_len`.
//
// This class is thread-compatible.
class FuzzTestMutator {
public:
// Initialize the mutator with the given `knobs` and RNG `seed`.
explicit FuzzTestMutator(const Knobs &knobs, uint64_t seed);
~FuzzTestMutator();
// Takes non-empty `inputs` and produces `num_mutants` mutants.
std::vector<ByteArray> MutateMany(const std::vector<MutationInputRef> &inputs,
size_t num_mutants);
// Adds `dict_entries` to the internal mutation dictionary.
void AddToDictionary(const std::vector<ByteArray>& dict_entries);
// Sets max length in bytes for mutants with modified sizes.
//
// Returns false on invalid `max_len`, true otherwise.
bool set_max_len(size_t max_len);
// TODO(xinhaoyuan): Support set_alignment().
private:
struct MutationMetadata;
class MutatorDomain;
// Propagates the execution `metadata` to the internal mutation dictionary.
void SetMetadata(const ExecutionMetadata& metadata);
// The crossover algorithm based on the legacy ByteArrayMutator.
// TODO(ussuri): Implement and use the domain level crossover.
void CrossOverInsert(ByteArray &data, const ByteArray &other);
void CrossOverOverwrite(ByteArray &data, const ByteArray &other);
void CrossOver(ByteArray &data, const ByteArray &other);
// Size limits on the cmp entries to be used in mutation.
static constexpr uint8_t kMaxCmpEntrySize = 15;
static constexpr uint8_t kMinCmpEntrySize = 2;
const Knobs &knobs_;
Rng prng_;
size_t max_len_ = 1000;
std::unique_ptr<MutationMetadata> mutation_metadata_;
std::unique_ptr<MutatorDomain> domain_;
};
} // namespace fuzztest::internal
#endif

Some files were not shown because too many files have changed in this diff Show More