From 9b770d3df99d8746c071419ba5fad4d4b6b9f659 Mon Sep 17 00:00:00 2001 From: Mike Merrill Date: Fri, 6 Feb 2026 14:16:06 -0500 Subject: [PATCH] SERVER-118522 Vendor Fuzztest (#47328) GitOrigin-RevId: bfe44acc8976e2928a8c12257cf9e977e07fe416 --- .bazelignore | 1 + .bazelrc | 6 + .bazelrc.fuzztest | 85 + MODULE.bazel | 8 + MODULE.bazel.lock | 25 +- OWNERS.yml | 3 + .../mongo_linux_cc_toolchain_config.bzl | 2 + sbom.json | 63 + src/third_party/OWNERS.yml | 3 + src/third_party/fuzztest/dist/LICENSE | 251 ++ src/third_party/fuzztest/dist/MODULE.bazel | 75 + src/third_party/fuzztest/dist/bazel/BUILD | 25 + .../dist/bazel/antlr_cpp_runtime.BUILD | 42 + .../fuzztest/dist/bazel/setup_configs.sh | 220 ++ .../fuzztest/dist/centipede/.clang-format | 5 + .../centipede/.github/PULL_REQUEST_TEMPLATE | 1 + .../fuzztest/dist/centipede/.gitignore | 6 + .../fuzztest/dist/centipede/AUTHORS | 8 + src/third_party/fuzztest/dist/centipede/BUILD | 1921 +++++++++++++ .../dist/centipede/analyze_corpora.cc | 261 ++ .../fuzztest/dist/centipede/analyze_corpora.h | 72 + .../dist/centipede/analyze_corpora_test.cc | 124 + .../fuzztest/dist/centipede/binary_info.cc | 166 ++ .../fuzztest/dist/centipede/binary_info.h | 59 + .../dist/centipede/binary_info_test.cc | 82 + .../dist/centipede/blob_file_converter.cc | 150 + .../dist/centipede/byte_array_mutator.cc | 351 +++ .../dist/centipede/byte_array_mutator.h | 255 ++ .../dist/centipede/byte_array_mutator_test.cc | 1020 +++++++ .../fuzztest/dist/centipede/call_graph.cc | 70 + .../fuzztest/dist/centipede/call_graph.h | 72 + .../dist/centipede/call_graph_test.cc | 128 + .../fuzztest/dist/centipede/callstack.h | 118 + .../fuzztest/dist/centipede/callstack_test.cc | 242 ++ .../fuzztest/dist/centipede/centipede.cc | 1037 +++++++ .../fuzztest/dist/centipede/centipede.h | 224 ++ .../dist/centipede/centipede_binary_test.sh | 28 + .../dist/centipede/centipede_callbacks.cc | 469 ++++ .../dist/centipede/centipede_callbacks.h | 229 ++ .../centipede/centipede_default_callbacks.cc | 119 + .../centipede/centipede_default_callbacks.h | 54 + .../dist/centipede/centipede_flags.inc | 468 ++++ .../dist/centipede/centipede_interface.cc | 918 +++++++ .../dist/centipede/centipede_interface.h | 37 + .../fuzztest/dist/centipede/centipede_main.cc | 30 + .../fuzztest/dist/centipede/centipede_test.cc | 1231 +++++++++ .../fuzztest/dist/centipede/command.cc | 537 ++++ .../fuzztest/dist/centipede/command.h | 140 + .../fuzztest/dist/centipede/command_test.cc | 197 ++ .../dist/centipede/command_test_helper.cc | 36 + .../dist/centipede/concurrent_bitset.h | 150 + .../dist/centipede/concurrent_bitset_test.cc | 124 + .../dist/centipede/concurrent_byteset.h | 187 ++ .../dist/centipede/concurrent_byteset_test.cc | 124 + .../fuzztest/dist/centipede/config_file.cc | 297 ++ .../fuzztest/dist/centipede/config_file.h | 114 + .../dist/centipede/config_file_test.cc | 91 + .../fuzztest/dist/centipede/config_init.cc | 58 + .../fuzztest/dist/centipede/config_init.h | 56 + .../fuzztest/dist/centipede/config_util.cc | 109 + .../fuzztest/dist/centipede/config_util.h | 100 + .../dist/centipede/config_util_test.cc | 245 ++ .../fuzztest/dist/centipede/control_flow.cc | 235 ++ .../fuzztest/dist/centipede/control_flow.h | 166 ++ .../dist/centipede/control_flow_test.cc | 341 +++ .../fuzztest/dist/centipede/corpus.cc | 322 +++ .../fuzztest/dist/centipede/corpus.h | 210 ++ .../fuzztest/dist/centipede/corpus_io.cc | 157 ++ .../fuzztest/dist/centipede/corpus_io.h | 53 + .../fuzztest/dist/centipede/corpus_io_test.cc | 128 + .../fuzztest/dist/centipede/corpus_test.cc | 405 +++ .../fuzztest/dist/centipede/coverage.cc | 228 ++ .../fuzztest/dist/centipede/coverage.h | 185 ++ .../fuzztest/dist/centipede/coverage_test.cc | 541 ++++ .../fuzztest/dist/centipede/crash_summary.cc | 58 + .../fuzztest/dist/centipede/crash_summary.h | 84 + .../dist/centipede/crash_summary_test.cc | 87 + .../fuzztest/dist/centipede/dispatcher.cc | 601 ++++ .../fuzztest/dist/centipede/dispatcher.h | 141 + .../fuzztest/dist/centipede/distill.cc | 473 ++++ .../fuzztest/dist/centipede/distill.h | 51 + .../fuzztest/dist/centipede/distill_test.cc | 193 ++ .../fuzztest/dist/centipede/environment.cc | 351 +++ .../fuzztest/dist/centipede/environment.h | 140 + .../dist/centipede/environment_flags.cc | 139 + .../dist/centipede/environment_flags.h | 32 + .../dist/centipede/environment_test.cc | 222 ++ .../dist/centipede/execution_metadata.cc | 58 + .../dist/centipede/execution_metadata.h | 60 + .../dist/centipede/execution_metadata_test.cc | 122 + .../fuzztest/dist/centipede/feature.cc | 15 + .../fuzztest/dist/centipede/feature.h | 287 ++ .../fuzztest/dist/centipede/feature_set.cc | 145 + .../fuzztest/dist/centipede/feature_set.h | 144 + .../dist/centipede/feature_set_test.cc | 204 ++ .../fuzztest/dist/centipede/feature_test.cc | 43 + .../fuzztest/dist/centipede/foreach_nonzero.h | 70 + .../dist/centipede/foreach_nonzero_test.cc | 89 + .../dist/centipede/fuzztest_mutator.cc | 151 + .../dist/centipede/fuzztest_mutator.h | 82 + .../dist/centipede/fuzztest_mutator_test.cc | 308 +++ .../dist/centipede/hashed_ring_buffer.h | 86 + .../dist/centipede/hashed_ring_buffer_test.cc | 64 + .../centipede/install_dependencies_debian.sh | 61 + .../fuzztest/dist/centipede/instrument.bzl | 86 + .../fuzztest/dist/centipede/int_utils.h | 33 + .../fuzztest/dist/centipede/int_utils_test.cc | 64 + .../fuzztest/dist/centipede/knobs.cc | 33 + .../fuzztest/dist/centipede/knobs.h | 203 ++ .../fuzztest/dist/centipede/knobs_test.cc | 120 + .../fuzztest/dist/centipede/minimize_crash.cc | 169 ++ .../fuzztest/dist/centipede/minimize_crash.h | 36 + .../dist/centipede/minimize_crash_test.cc | 113 + .../fuzztest/dist/centipede/mutation_input.h | 53 + .../dist/centipede/mutation_input_test.cc | 37 + .../fuzztest/dist/centipede/pc_info.cc | 53 + .../fuzztest/dist/centipede/pc_info.h | 96 + .../fuzztest/dist/centipede/pc_info_test.cc | 36 + .../dist/centipede/periodic_action.cc | 127 + .../fuzztest/dist/centipede/periodic_action.h | 116 + .../dist/centipede/periodic_action_test.cc | 228 ++ .../fuzztest/dist/centipede/resource_pool.cc | 178 ++ .../fuzztest/dist/centipede/resource_pool.h | 204 ++ .../dist/centipede/resource_pool_test.cc | 155 ++ .../dist/centipede/reverse_pc_table.h | 98 + .../dist/centipede/reverse_pc_table_test.cc | 59 + .../fuzztest/dist/centipede/rolling_hash.h | 75 + .../dist/centipede/rolling_hash_test.cc | 122 + .../dist/centipede/run_test_workflow.sh | 108 + .../run_test_workflow_using_docker.sh | 36 + .../fuzztest/dist/centipede/runner.cc | 1313 +++++++++ .../fuzztest/dist/centipede/runner.h | 369 +++ .../dist/centipede/runner_cmp_trace.h | 140 + .../dist/centipede/runner_cmp_trace_test.cc | 122 + .../fuzztest/dist/centipede/runner_dl_info.cc | 333 +++ .../fuzztest/dist/centipede/runner_dl_info.h | 57 + .../fuzztest/dist/centipede/runner_flags.cc | 85 + .../fuzztest/dist/centipede/runner_flags.h | 93 + .../dist/centipede/runner_flags_test.cc | 81 + .../dist/centipede/runner_fork_server.cc | 351 +++ .../dist/centipede/runner_interceptors.cc | 217 ++ .../dist/centipede/runner_interface.h | 180 ++ .../fuzztest/dist/centipede/runner_main.cc | 20 + .../fuzztest/dist/centipede/runner_request.cc | 118 + .../fuzztest/dist/centipede/runner_request.h | 64 + .../fuzztest/dist/centipede/runner_result.cc | 205 ++ .../fuzztest/dist/centipede/runner_result.h | 237 ++ .../dist/centipede/runner_result_test.cc | 265 ++ .../fuzztest/dist/centipede/runner_sancov.cc | 315 +++ .../dist/centipede/runner_sancov_object.cc | 197 ++ .../dist/centipede/runner_sancov_object.h | 115 + .../fuzztest/dist/centipede/runner_utils.cc | 61 + .../fuzztest/dist/centipede/runner_utils.h | 40 + .../dist/centipede/rusage_profiler.cc | 550 ++++ .../fuzztest/dist/centipede/rusage_profiler.h | 594 ++++ .../dist/centipede/rusage_profiler_test.cc | 244 ++ .../fuzztest/dist/centipede/rusage_stats.cc | 663 +++++ .../fuzztest/dist/centipede/rusage_stats.h | 293 ++ .../dist/centipede/rusage_stats_test.cc | 554 ++++ .../dist/centipede/seed_corpus_config.proto | 72 + .../dist/centipede/seed_corpus_maker.cc | 53 + .../dist/centipede/seed_corpus_maker_flags.cc | 42 + .../dist/centipede/seed_corpus_maker_flags.h | 27 + .../dist/centipede/seed_corpus_maker_lib.cc | 544 ++++ .../dist/centipede/seed_corpus_maker_lib.h | 123 + .../centipede/seed_corpus_maker_lib_test.cc | 248 ++ .../centipede/seed_corpus_maker_proto_lib.cc | 178 ++ .../centipede/seed_corpus_maker_proto_lib.h | 42 + .../seed_corpus_maker_proto_lib_test.cc | 153 ++ .../centipede/shared_memory_blob_sequence.cc | 192 ++ .../centipede/shared_memory_blob_sequence.h | 192 ++ .../shared_memory_blob_sequence_test.cc | 256 ++ .../fuzztest/dist/centipede/stats.cc | 305 +++ .../fuzztest/dist/centipede/stats.h | 628 +++++ .../fuzztest/dist/centipede/stats_test.cc | 877 ++++++ .../fuzztest/dist/centipede/stop.cc | 53 + .../fuzztest/dist/centipede/stop.h | 58 + .../fuzztest/dist/centipede/symbol_table.cc | 251 ++ .../fuzztest/dist/centipede/symbol_table.h | 146 + .../dist/centipede/symbol_table_test.cc | 71 + .../dist/centipede/test_coverage_util.cc | 73 + .../dist/centipede/test_coverage_util.h | 61 + .../dist/centipede/test_fuzzing_util.sh | 117 + .../fuzztest/dist/centipede/test_util.sh | 167 ++ .../fuzztest/dist/centipede/thread_pool.h | 100 + .../fuzztest/dist/centipede/util.cc | 375 +++ .../fuzztest/dist/centipede/util.h | 196 ++ .../fuzztest/dist/centipede/util_test.cc | 294 ++ .../dist/centipede/weak_sancov_stubs.cc | 44 + .../fuzztest/dist/centipede/workdir.cc | 256 ++ .../fuzztest/dist/centipede/workdir.h | 174 ++ .../fuzztest/dist/centipede/workdir_test.cc | 140 + src/third_party/fuzztest/dist/common/BUILD | 274 ++ src/third_party/fuzztest/dist/common/bazel.cc | 127 + src/third_party/fuzztest/dist/common/bazel.h | 44 + .../fuzztest/dist/common/blob_file.cc | 507 ++++ .../fuzztest/dist/common/blob_file.h | 130 + .../fuzztest/dist/common/blob_file_test.cc | 272 ++ src/third_party/fuzztest/dist/common/defs.h | 62 + src/third_party/fuzztest/dist/common/hash.cc | 48 + src/third_party/fuzztest/dist/common/hash.h | 35 + .../fuzztest/dist/common/hash_test.cc | 31 + .../fuzztest/dist/common/logging.h | 27 + .../fuzztest/dist/common/remote_file.cc | 89 + .../fuzztest/dist/common/remote_file.h | 171 ++ .../fuzztest/dist/common/remote_file_oss.cc | 420 +++ .../fuzztest/dist/common/remote_file_test.cc | 236 ++ src/third_party/fuzztest/dist/common/sha1.cc | 203 ++ src/third_party/fuzztest/dist/common/sha1.h | 34 + .../fuzztest/dist/common/status_macros.h | 65 + .../fuzztest/dist/common/temp_dir.cc | 43 + .../fuzztest/dist/common/temp_dir.h | 28 + .../fuzztest/dist/common/test_util.cc | 86 + .../fuzztest/dist/common/test_util.h | 135 + src/third_party/fuzztest/dist/fuzztest/BUILD | 231 ++ .../fuzztest/dist/fuzztest/domain.h | 85 + .../fuzztest/dist/fuzztest/domain_core.h | 1075 ++++++++ .../fuzztest/dist/fuzztest/fuzzing_bit_gen.cc | 80 + .../fuzztest/dist/fuzztest/fuzzing_bit_gen.h | 119 + .../fuzztest/dist/fuzztest/fuzztest.h | 23 + .../fuzztest/dist/fuzztest/fuzztest_core.h | 23 + .../dist/fuzztest/fuzztest_gtest_main.cc | 26 + .../fuzztest/dist/fuzztest/fuzztest_macros.cc | 169 ++ .../fuzztest/dist/fuzztest/fuzztest_macros.h | 192 ++ .../dist/fuzztest/fuzztest_macros_test.cc | 189 ++ .../fuzztest/googletest_fixture_adapter.h | 143 + .../fuzztest/dist/fuzztest/init_fuzztest.cc | 459 ++++ .../fuzztest/dist/fuzztest/init_fuzztest.h | 82 + .../fuzztest/dist/fuzztest/internal/BUILD | 541 ++++ .../fuzztest/dist/fuzztest/internal/any.h | 189 ++ .../dist/fuzztest/internal/any_test.cc | 94 + .../fuzztest/internal/centipede_adaptor.cc | 1053 +++++++ .../fuzztest/internal/centipede_adaptor.h | 57 + .../fuzztest/internal/compatibility_mode.cc | 168 ++ .../fuzztest/internal/compatibility_mode.h | 107 + .../dist/fuzztest/internal/configuration.cc | 317 +++ .../dist/fuzztest/internal/configuration.h | 126 + .../fuzztest/internal/configuration_test.cc | 105 + .../dist/fuzztest/internal/corpus_database.cc | 79 + .../dist/fuzztest/internal/corpus_database.h | 67 + .../dist/fuzztest/internal/coverage.cc | 526 ++++ .../dist/fuzztest/internal/coverage.h | 231 ++ .../dist/fuzztest/internal/domains/BUILD | 216 ++ .../fuzztest/internal/domains/absl_helpers.h | 68 + .../internal/domains/aggregate_of_impl.h | 208 ++ .../internal/domains/arbitrary_impl.h | 598 ++++ .../domains/bit_flag_combination_of_impl.h | 116 + .../fuzztest/internal/domains/bit_gen_ref.h | 183 ++ .../domains/container_mutation_helpers.h | 294 ++ .../internal/domains/container_of_impl.h | 632 +++++ .../dist/fuzztest/internal/domains/domain.h | 375 +++ .../fuzztest/internal/domains/domain_base.h | 260 ++ .../internal/domains/domain_type_erasure.h | 244 ++ .../internal/domains/element_of_impl.h | 121 + .../fuzztest/internal/domains/filter_impl.h | 125 + .../fuzztest/internal/domains/flat_map_impl.h | 206 ++ .../domains/flatbuffers_domain_impl.cc | 297 ++ .../domains/flatbuffers_domain_impl.h | 788 ++++++ .../internal/domains/in_grammar_impl.cc | 67 + .../internal/domains/in_grammar_impl.h | 814 ++++++ .../fuzztest/internal/domains/in_range_impl.h | 229 ++ .../internal/domains/in_regexp_impl.cc | 235 ++ .../internal/domains/in_regexp_impl.h | 76 + .../dist/fuzztest/internal/domains/map_impl.h | 215 ++ .../internal/domains/mutation_metadata.h | 33 + .../fuzztest/internal/domains/one_of_impl.h | 142 + .../internal/domains/optional_of_impl.h | 208 ++ .../internal/domains/overlap_of_impl.h | 256 ++ .../internal/domains/protobuf_domain_impl.h | 2436 +++++++++++++++++ .../fuzztest/internal/domains/regexp_dfa.cc | 444 +++ .../fuzztest/internal/domains/regexp_dfa.h | 127 + .../dist/fuzztest/internal/domains/rune.cc | 172 ++ .../dist/fuzztest/internal/domains/rune.h | 38 + .../internal/domains/serialization_helpers.h | 129 + .../internal/domains/smart_pointer_of_impl.h | 158 ++ .../internal/domains/special_values.h | 60 + .../unique_elements_container_of_impl.h | 124 + .../dist/fuzztest/internal/domains/utf.cc | 56 + .../dist/fuzztest/internal/domains/utf.h | 33 + .../internal/domains/value_mutation_helpers.h | 170 ++ .../internal/domains/variant_of_impl.h | 119 + .../dist/fuzztest/internal/escaping.cc | 29 + .../dist/fuzztest/internal/escaping.h | 29 + .../dist/fuzztest/internal/fixture_driver.cc | 32 + .../dist/fuzztest/internal/fixture_driver.h | 454 +++ .../fuzztest/internal/fixture_driver_test.cc | 346 +++ .../dist/fuzztest/internal/flag_name.h | 22 + .../fuzztest/internal/googletest_adaptor.cc | 185 ++ .../fuzztest/internal/googletest_adaptor.h | 146 + .../fuzztest/dist/fuzztest/internal/io.cc | 298 ++ .../fuzztest/dist/fuzztest/internal/io.h | 96 + .../dist/fuzztest/internal/io_test.cc | 296 ++ .../dist/fuzztest/internal/logging.cc | 146 + .../fuzztest/dist/fuzztest/internal/logging.h | 62 + .../fuzztest/dist/fuzztest/internal/meta.h | 834 ++++++ .../fuzztest/dist/fuzztest/internal/printer.h | 60 + .../internal/register_fuzzing_mocks.cc | 464 ++++ .../internal/register_fuzzing_mocks.h | 38 + .../dist/fuzztest/internal/registration.h | 349 +++ .../dist/fuzztest/internal/registry.cc | 81 + .../dist/fuzztest/internal/registry.h | 134 + .../dist/fuzztest/internal/runtime.cc | 1332 +++++++++ .../fuzztest/dist/fuzztest/internal/runtime.h | 443 +++ .../dist/fuzztest/internal/runtime_test.cc | 97 + .../dist/fuzztest/internal/seed_seq.cc | 90 + .../dist/fuzztest/internal/seed_seq.h | 58 + .../dist/fuzztest/internal/seed_seq_test.cc | 108 + .../dist/fuzztest/internal/serialization.cc | 347 +++ .../dist/fuzztest/internal/serialization.h | 315 +++ .../fuzztest/internal/serialization_test.cc | 479 ++++ .../fuzztest/dist/fuzztest/internal/status.cc | 25 + .../fuzztest/dist/fuzztest/internal/status.h | 13 + .../dist/fuzztest/internal/subprocess.cc | 371 +++ .../dist/fuzztest/internal/subprocess.h | 128 + .../dist/fuzztest/internal/subprocess_test.cc | 114 + .../internal/table_of_recent_compares.h | 552 ++++ .../internal/table_of_recent_compares_test.cc | 166 ++ .../fuzztest/internal/test_protobuf.proto | 317 +++ .../dist/fuzztest/internal/type_support.cc | 124 + .../dist/fuzztest/internal/type_support.h | 676 +++++ .../fuzztest/internal/type_support_test.cc | 589 ++++ .../dist/fuzztest/llvm_fuzzer_main.cc | 26 + .../dist/fuzztest/llvm_fuzzer_wrapper.cc | 212 ++ src/third_party/fuzztest/dist/tools/BUILD | 35 + .../tools/grammar_domain_code_generator.cc | 95 + .../fuzztest/dist/tools/minimizer.sh | 71 + src/third_party/fuzztest/scripts/import.sh | 87 + 327 files changed, 69547 insertions(+), 2 deletions(-) create mode 100644 .bazelrc.fuzztest create mode 100644 src/third_party/fuzztest/dist/LICENSE create mode 100644 src/third_party/fuzztest/dist/MODULE.bazel create mode 100644 src/third_party/fuzztest/dist/bazel/BUILD create mode 100644 src/third_party/fuzztest/dist/bazel/antlr_cpp_runtime.BUILD create mode 100755 src/third_party/fuzztest/dist/bazel/setup_configs.sh create mode 100644 src/third_party/fuzztest/dist/centipede/.clang-format create mode 100644 src/third_party/fuzztest/dist/centipede/.github/PULL_REQUEST_TEMPLATE create mode 100644 src/third_party/fuzztest/dist/centipede/.gitignore create mode 100644 src/third_party/fuzztest/dist/centipede/AUTHORS create mode 100644 src/third_party/fuzztest/dist/centipede/BUILD create mode 100644 src/third_party/fuzztest/dist/centipede/analyze_corpora.cc create mode 100644 src/third_party/fuzztest/dist/centipede/analyze_corpora.h create mode 100644 src/third_party/fuzztest/dist/centipede/analyze_corpora_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/binary_info.cc create mode 100644 src/third_party/fuzztest/dist/centipede/binary_info.h create mode 100644 src/third_party/fuzztest/dist/centipede/binary_info_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/blob_file_converter.cc create mode 100644 src/third_party/fuzztest/dist/centipede/byte_array_mutator.cc create mode 100644 src/third_party/fuzztest/dist/centipede/byte_array_mutator.h create mode 100644 src/third_party/fuzztest/dist/centipede/byte_array_mutator_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/call_graph.cc create mode 100644 src/third_party/fuzztest/dist/centipede/call_graph.h create mode 100644 src/third_party/fuzztest/dist/centipede/call_graph_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/callstack.h create mode 100644 src/third_party/fuzztest/dist/centipede/callstack_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/centipede.cc create mode 100644 src/third_party/fuzztest/dist/centipede/centipede.h create mode 100755 src/third_party/fuzztest/dist/centipede/centipede_binary_test.sh create mode 100644 src/third_party/fuzztest/dist/centipede/centipede_callbacks.cc create mode 100644 src/third_party/fuzztest/dist/centipede/centipede_callbacks.h create mode 100644 src/third_party/fuzztest/dist/centipede/centipede_default_callbacks.cc create mode 100644 src/third_party/fuzztest/dist/centipede/centipede_default_callbacks.h create mode 100644 src/third_party/fuzztest/dist/centipede/centipede_flags.inc create mode 100644 src/third_party/fuzztest/dist/centipede/centipede_interface.cc create mode 100644 src/third_party/fuzztest/dist/centipede/centipede_interface.h create mode 100644 src/third_party/fuzztest/dist/centipede/centipede_main.cc create mode 100644 src/third_party/fuzztest/dist/centipede/centipede_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/command.cc create mode 100644 src/third_party/fuzztest/dist/centipede/command.h create mode 100644 src/third_party/fuzztest/dist/centipede/command_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/command_test_helper.cc create mode 100644 src/third_party/fuzztest/dist/centipede/concurrent_bitset.h create mode 100644 src/third_party/fuzztest/dist/centipede/concurrent_bitset_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/concurrent_byteset.h create mode 100644 src/third_party/fuzztest/dist/centipede/concurrent_byteset_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/config_file.cc create mode 100644 src/third_party/fuzztest/dist/centipede/config_file.h create mode 100644 src/third_party/fuzztest/dist/centipede/config_file_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/config_init.cc create mode 100644 src/third_party/fuzztest/dist/centipede/config_init.h create mode 100644 src/third_party/fuzztest/dist/centipede/config_util.cc create mode 100644 src/third_party/fuzztest/dist/centipede/config_util.h create mode 100644 src/third_party/fuzztest/dist/centipede/config_util_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/control_flow.cc create mode 100644 src/third_party/fuzztest/dist/centipede/control_flow.h create mode 100644 src/third_party/fuzztest/dist/centipede/control_flow_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/corpus.cc create mode 100644 src/third_party/fuzztest/dist/centipede/corpus.h create mode 100644 src/third_party/fuzztest/dist/centipede/corpus_io.cc create mode 100644 src/third_party/fuzztest/dist/centipede/corpus_io.h create mode 100644 src/third_party/fuzztest/dist/centipede/corpus_io_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/corpus_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/coverage.cc create mode 100644 src/third_party/fuzztest/dist/centipede/coverage.h create mode 100644 src/third_party/fuzztest/dist/centipede/coverage_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/crash_summary.cc create mode 100644 src/third_party/fuzztest/dist/centipede/crash_summary.h create mode 100644 src/third_party/fuzztest/dist/centipede/crash_summary_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/dispatcher.cc create mode 100644 src/third_party/fuzztest/dist/centipede/dispatcher.h create mode 100644 src/third_party/fuzztest/dist/centipede/distill.cc create mode 100644 src/third_party/fuzztest/dist/centipede/distill.h create mode 100644 src/third_party/fuzztest/dist/centipede/distill_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/environment.cc create mode 100644 src/third_party/fuzztest/dist/centipede/environment.h create mode 100644 src/third_party/fuzztest/dist/centipede/environment_flags.cc create mode 100644 src/third_party/fuzztest/dist/centipede/environment_flags.h create mode 100644 src/third_party/fuzztest/dist/centipede/environment_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/execution_metadata.cc create mode 100644 src/third_party/fuzztest/dist/centipede/execution_metadata.h create mode 100644 src/third_party/fuzztest/dist/centipede/execution_metadata_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/feature.cc create mode 100644 src/third_party/fuzztest/dist/centipede/feature.h create mode 100644 src/third_party/fuzztest/dist/centipede/feature_set.cc create mode 100644 src/third_party/fuzztest/dist/centipede/feature_set.h create mode 100644 src/third_party/fuzztest/dist/centipede/feature_set_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/feature_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/foreach_nonzero.h create mode 100644 src/third_party/fuzztest/dist/centipede/foreach_nonzero_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/fuzztest_mutator.cc create mode 100644 src/third_party/fuzztest/dist/centipede/fuzztest_mutator.h create mode 100644 src/third_party/fuzztest/dist/centipede/fuzztest_mutator_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/hashed_ring_buffer.h create mode 100644 src/third_party/fuzztest/dist/centipede/hashed_ring_buffer_test.cc create mode 100755 src/third_party/fuzztest/dist/centipede/install_dependencies_debian.sh create mode 100644 src/third_party/fuzztest/dist/centipede/instrument.bzl create mode 100644 src/third_party/fuzztest/dist/centipede/int_utils.h create mode 100644 src/third_party/fuzztest/dist/centipede/int_utils_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/knobs.cc create mode 100644 src/third_party/fuzztest/dist/centipede/knobs.h create mode 100644 src/third_party/fuzztest/dist/centipede/knobs_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/minimize_crash.cc create mode 100644 src/third_party/fuzztest/dist/centipede/minimize_crash.h create mode 100644 src/third_party/fuzztest/dist/centipede/minimize_crash_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/mutation_input.h create mode 100644 src/third_party/fuzztest/dist/centipede/mutation_input_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/pc_info.cc create mode 100644 src/third_party/fuzztest/dist/centipede/pc_info.h create mode 100644 src/third_party/fuzztest/dist/centipede/pc_info_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/periodic_action.cc create mode 100644 src/third_party/fuzztest/dist/centipede/periodic_action.h create mode 100644 src/third_party/fuzztest/dist/centipede/periodic_action_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/resource_pool.cc create mode 100644 src/third_party/fuzztest/dist/centipede/resource_pool.h create mode 100644 src/third_party/fuzztest/dist/centipede/resource_pool_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/reverse_pc_table.h create mode 100644 src/third_party/fuzztest/dist/centipede/reverse_pc_table_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/rolling_hash.h create mode 100644 src/third_party/fuzztest/dist/centipede/rolling_hash_test.cc create mode 100755 src/third_party/fuzztest/dist/centipede/run_test_workflow.sh create mode 100755 src/third_party/fuzztest/dist/centipede/run_test_workflow_using_docker.sh create mode 100644 src/third_party/fuzztest/dist/centipede/runner.cc create mode 100644 src/third_party/fuzztest/dist/centipede/runner.h create mode 100644 src/third_party/fuzztest/dist/centipede/runner_cmp_trace.h create mode 100644 src/third_party/fuzztest/dist/centipede/runner_cmp_trace_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/runner_dl_info.cc create mode 100644 src/third_party/fuzztest/dist/centipede/runner_dl_info.h create mode 100644 src/third_party/fuzztest/dist/centipede/runner_flags.cc create mode 100644 src/third_party/fuzztest/dist/centipede/runner_flags.h create mode 100644 src/third_party/fuzztest/dist/centipede/runner_flags_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/runner_fork_server.cc create mode 100644 src/third_party/fuzztest/dist/centipede/runner_interceptors.cc create mode 100644 src/third_party/fuzztest/dist/centipede/runner_interface.h create mode 100644 src/third_party/fuzztest/dist/centipede/runner_main.cc create mode 100644 src/third_party/fuzztest/dist/centipede/runner_request.cc create mode 100644 src/third_party/fuzztest/dist/centipede/runner_request.h create mode 100644 src/third_party/fuzztest/dist/centipede/runner_result.cc create mode 100644 src/third_party/fuzztest/dist/centipede/runner_result.h create mode 100644 src/third_party/fuzztest/dist/centipede/runner_result_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/runner_sancov.cc create mode 100644 src/third_party/fuzztest/dist/centipede/runner_sancov_object.cc create mode 100644 src/third_party/fuzztest/dist/centipede/runner_sancov_object.h create mode 100644 src/third_party/fuzztest/dist/centipede/runner_utils.cc create mode 100644 src/third_party/fuzztest/dist/centipede/runner_utils.h create mode 100644 src/third_party/fuzztest/dist/centipede/rusage_profiler.cc create mode 100644 src/third_party/fuzztest/dist/centipede/rusage_profiler.h create mode 100644 src/third_party/fuzztest/dist/centipede/rusage_profiler_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/rusage_stats.cc create mode 100644 src/third_party/fuzztest/dist/centipede/rusage_stats.h create mode 100644 src/third_party/fuzztest/dist/centipede/rusage_stats_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/seed_corpus_config.proto create mode 100644 src/third_party/fuzztest/dist/centipede/seed_corpus_maker.cc create mode 100644 src/third_party/fuzztest/dist/centipede/seed_corpus_maker_flags.cc create mode 100644 src/third_party/fuzztest/dist/centipede/seed_corpus_maker_flags.h create mode 100644 src/third_party/fuzztest/dist/centipede/seed_corpus_maker_lib.cc create mode 100644 src/third_party/fuzztest/dist/centipede/seed_corpus_maker_lib.h create mode 100644 src/third_party/fuzztest/dist/centipede/seed_corpus_maker_lib_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/seed_corpus_maker_proto_lib.cc create mode 100644 src/third_party/fuzztest/dist/centipede/seed_corpus_maker_proto_lib.h create mode 100644 src/third_party/fuzztest/dist/centipede/seed_corpus_maker_proto_lib_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/shared_memory_blob_sequence.cc create mode 100644 src/third_party/fuzztest/dist/centipede/shared_memory_blob_sequence.h create mode 100644 src/third_party/fuzztest/dist/centipede/shared_memory_blob_sequence_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/stats.cc create mode 100644 src/third_party/fuzztest/dist/centipede/stats.h create mode 100644 src/third_party/fuzztest/dist/centipede/stats_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/stop.cc create mode 100644 src/third_party/fuzztest/dist/centipede/stop.h create mode 100644 src/third_party/fuzztest/dist/centipede/symbol_table.cc create mode 100644 src/third_party/fuzztest/dist/centipede/symbol_table.h create mode 100644 src/third_party/fuzztest/dist/centipede/symbol_table_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/test_coverage_util.cc create mode 100644 src/third_party/fuzztest/dist/centipede/test_coverage_util.h create mode 100644 src/third_party/fuzztest/dist/centipede/test_fuzzing_util.sh create mode 100644 src/third_party/fuzztest/dist/centipede/test_util.sh create mode 100644 src/third_party/fuzztest/dist/centipede/thread_pool.h create mode 100644 src/third_party/fuzztest/dist/centipede/util.cc create mode 100644 src/third_party/fuzztest/dist/centipede/util.h create mode 100644 src/third_party/fuzztest/dist/centipede/util_test.cc create mode 100644 src/third_party/fuzztest/dist/centipede/weak_sancov_stubs.cc create mode 100644 src/third_party/fuzztest/dist/centipede/workdir.cc create mode 100644 src/third_party/fuzztest/dist/centipede/workdir.h create mode 100644 src/third_party/fuzztest/dist/centipede/workdir_test.cc create mode 100644 src/third_party/fuzztest/dist/common/BUILD create mode 100644 src/third_party/fuzztest/dist/common/bazel.cc create mode 100644 src/third_party/fuzztest/dist/common/bazel.h create mode 100644 src/third_party/fuzztest/dist/common/blob_file.cc create mode 100644 src/third_party/fuzztest/dist/common/blob_file.h create mode 100644 src/third_party/fuzztest/dist/common/blob_file_test.cc create mode 100644 src/third_party/fuzztest/dist/common/defs.h create mode 100644 src/third_party/fuzztest/dist/common/hash.cc create mode 100644 src/third_party/fuzztest/dist/common/hash.h create mode 100644 src/third_party/fuzztest/dist/common/hash_test.cc create mode 100644 src/third_party/fuzztest/dist/common/logging.h create mode 100644 src/third_party/fuzztest/dist/common/remote_file.cc create mode 100644 src/third_party/fuzztest/dist/common/remote_file.h create mode 100644 src/third_party/fuzztest/dist/common/remote_file_oss.cc create mode 100644 src/third_party/fuzztest/dist/common/remote_file_test.cc create mode 100644 src/third_party/fuzztest/dist/common/sha1.cc create mode 100644 src/third_party/fuzztest/dist/common/sha1.h create mode 100644 src/third_party/fuzztest/dist/common/status_macros.h create mode 100644 src/third_party/fuzztest/dist/common/temp_dir.cc create mode 100644 src/third_party/fuzztest/dist/common/temp_dir.h create mode 100644 src/third_party/fuzztest/dist/common/test_util.cc create mode 100644 src/third_party/fuzztest/dist/common/test_util.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/BUILD create mode 100644 src/third_party/fuzztest/dist/fuzztest/domain.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/domain_core.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/fuzzing_bit_gen.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/fuzzing_bit_gen.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/fuzztest.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/fuzztest_core.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/fuzztest_gtest_main.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/fuzztest_macros.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/fuzztest_macros.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/fuzztest_macros_test.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/googletest_fixture_adapter.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/init_fuzztest.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/init_fuzztest.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/BUILD create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/any.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/any_test.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/centipede_adaptor.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/centipede_adaptor.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/compatibility_mode.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/compatibility_mode.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/configuration.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/configuration.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/configuration_test.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/corpus_database.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/corpus_database.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/coverage.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/coverage.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/BUILD create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/absl_helpers.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/aggregate_of_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/arbitrary_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/bit_flag_combination_of_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/bit_gen_ref.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/container_mutation_helpers.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/container_of_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/domain.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/domain_base.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/domain_type_erasure.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/element_of_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/filter_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/flat_map_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/flatbuffers_domain_impl.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/flatbuffers_domain_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/in_grammar_impl.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/in_grammar_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/in_range_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/in_regexp_impl.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/in_regexp_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/map_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/mutation_metadata.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/one_of_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/optional_of_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/overlap_of_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/protobuf_domain_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/regexp_dfa.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/regexp_dfa.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/rune.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/rune.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/serialization_helpers.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/smart_pointer_of_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/special_values.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/unique_elements_container_of_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/utf.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/utf.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/value_mutation_helpers.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/domains/variant_of_impl.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/escaping.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/escaping.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/fixture_driver.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/fixture_driver.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/fixture_driver_test.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/flag_name.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/googletest_adaptor.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/googletest_adaptor.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/io.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/io.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/io_test.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/logging.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/logging.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/meta.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/printer.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/register_fuzzing_mocks.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/register_fuzzing_mocks.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/registration.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/registry.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/registry.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/runtime.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/runtime.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/runtime_test.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/seed_seq.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/seed_seq.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/seed_seq_test.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/serialization.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/serialization.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/serialization_test.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/status.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/status.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/subprocess.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/subprocess.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/subprocess_test.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/table_of_recent_compares.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/table_of_recent_compares_test.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/test_protobuf.proto create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/type_support.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/type_support.h create mode 100644 src/third_party/fuzztest/dist/fuzztest/internal/type_support_test.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/llvm_fuzzer_main.cc create mode 100644 src/third_party/fuzztest/dist/fuzztest/llvm_fuzzer_wrapper.cc create mode 100644 src/third_party/fuzztest/dist/tools/BUILD create mode 100644 src/third_party/fuzztest/dist/tools/grammar_domain_code_generator.cc create mode 100755 src/third_party/fuzztest/dist/tools/minimizer.sh create mode 100755 src/third_party/fuzztest/scripts/import.sh diff --git a/.bazelignore b/.bazelignore index c9763483539..bd80032f359 100644 --- a/.bazelignore +++ b/.bazelignore @@ -5,6 +5,7 @@ src/third_party/protobuf/dist src/third_party/re2/dist src/third_party/tcmalloc/dist src/third_party/wiredtiger/dist +src/third_party/fuzztest/dist # Ignore node_modules due to the following error # ERROR: in verify_node_modules_ignored: diff --git a/.bazelrc b/.bazelrc index 0339d83f81f..64870413d91 100644 --- a/.bazelrc +++ b/.bazelrc @@ -597,6 +597,9 @@ common --experimental_collect_system_network_usage common:fission --fission=yes common:fission --remote_download_regex=.*\.dwo$ +--config=fuzztest +common:fuzztest --@fuzztest//fuzztest:centipede_integration=True + # Avoid failing builds when BES metadata fails to upload. common --bes_upload_mode=fully_async @@ -643,5 +646,8 @@ try-import %workspace%/.bazelrc.sync # Engflow auth credentials try-import %workspace%/.bazelrc.engflow_creds +# Flags for fuzztest +try-import %workspace%/.bazelrc.fuzztest + # Repository root absolute path to set --execution_log_compact_file #try-import %workspace%/.bazelrc.exec_log_file diff --git a/.bazelrc.fuzztest b/.bazelrc.fuzztest new file mode 100644 index 00000000000..b6ea7f547c1 --- /dev/null +++ b/.bazelrc.fuzztest @@ -0,0 +1,85 @@ +### DO NOT EDIT. Generated file. +# +# To regenerate, run the following from your project's workspace: +# +# bazel run @com_google_fuzztest//bazel:setup_configs > fuzztest.bazelrc +# +# And don't forget to add the following to your project's .bazelrc: +# +# try-import %workspace%/fuzztest.bazelrc + +### Common options. +# +# Do not use directly. + +# Standard define for \"ifdef-ing\" any fuzz test specific code. +build:fuzztest-common --copt=-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + +# In fuzz tests, we want to catch assertion violations even in optimized builds. +build:fuzztest-common --copt=-UNDEBUG + +# Enable libc++ assertions. +# See https://libcxx.llvm.org/UsingLibcxx.html#enabling-the-safe-libc-mode +build:fuzztest-common --copt=-D_LIBCPP_ENABLE_ASSERTIONS=1 + +### ASan (Address Sanitizer) build configuration. +# +# Use with: --config=fuzztest_asan + +build:fuzztest_asan --linkopt=-fsanitize=address +build:fuzztest_asan --copt=-fsanitize=address + +# We rely on the following flag instead of the compiler provided +# __has_feature(address_sanitizer) to know that we have an ASAN build even in +# the uninstrumented runtime. +build:fuzztest_asan --copt=-DADDRESS_SANITIZER + +### FuzzTest build configuration. +# +# Use with: --config=fuzztest +# +# Note that this configuration includes the ASan configuration. + +build:fuzztest --config=fuzztest_asan +build:fuzztest --config=fuzztest-common + +# Link statically. +build:fuzztest --dynamic_mode=off + +# We apply coverage tracking instrumentation to everything but Centipede and the +# FuzzTest framework itself (including GoogleTest and GoogleMock). +build:fuzztest --copt=-fsanitize-coverage=inline-8bit-counters,trace-cmp,pc-table +build:fuzztest --per_file_copt=common/.*,fuzztest/.*,centipede/.*,-centipede/.*fuzz_target,googletest/.*,googlemock/.*@-fsanitize-coverage=0 + +### Experimental FuzzTest build configuration. +# +# Use with: --config=fuzztest-experimental +# +# Use this instead of --config=fuzztest when building test binaries to run with +# Centipede. Eventually, this will be consolidated with --config=fuzztest. +# Note that this configuration doesn't include the ASan configuration. If you +# want to use both, you can use --config=fuzztest-experimental --config=fuzztest_asan. + +build:fuzztest-experimental --config=fuzztest-common +build:fuzztest-experimental --@com_google_fuzztest//fuzztest:centipede_integration + +# Generate line tables for debugging. +build:fuzztest-experimental --copt=-gline-tables-only +build:fuzztest-experimental --strip=never + +# Prevent memcmp & co from being inlined. +build:fuzztest-experimental --copt=-fno-builtin + +# Disable heap checking. +build:fuzztest-experimental --copt=-DHEAPCHECK_DISABLE + +# Link statically. +build:fuzztest-experimental --dynamic_mode=off + +# We apply coverage tracking instrumentation to everything but Centipede and the +# FuzzTest framework itself (including GoogleTest and GoogleMock). +# TODO(b/374840534): Add -fsanitize-coverage=control-flow once we start building +# with clang 16+. +build:fuzztest-experimental --copt=-fsanitize-coverage=trace-pc-guard,pc-table,trace-loads,trace-cmp +build:fuzztest-experimental --per_file_copt=common/.*,fuzztest/.*,centipede/.*,-centipede/.*fuzz_target,googletest/.*,googlemock/.*@-fsanitize-coverage=0 + diff --git a/MODULE.bazel b/MODULE.bazel index daff3e863c9..dd9d932d273 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -181,6 +181,14 @@ local_path_override( path = "src/third_party/zlib", ) +# When updating fuzztest run the following command +# bazel run @fuzztest//bazel:setup_configs > bazelrc.fuzztest +bazel_dep(name = "fuzztest", version = "20250805.0") +local_path_override( + module_name = "fuzztest", + path = "src/third_party/fuzztest/dist", +) + # This is just here because 1.5.1 has a bug in it and our current version of re2 will pull in 1.5.1 # If re2 is ever upgraded past 2025-08-12 this can be unpinned bazel_dep(name = "rules_python", version = "1.5.2") diff --git a/MODULE.bazel.lock b/MODULE.bazel.lock index 1a519117118..54f781b7872 100644 --- a/MODULE.bazel.lock +++ b/MODULE.bazel.lock @@ -2,6 +2,8 @@ "lockFileVersion": 13, "registryFileHashes": { "https://bcr.bazel.build/bazel_registry.json": "8a28e4aff06ee60aed2a8c281907fb8bcbf3b753c91fb5a5c57da3215d5b3497", + "https://bcr.bazel.build/modules/abseil-py/2.1.0/MODULE.bazel": "5ebe5bf853769c65707e5c28f216798f7a4b1042015e6a36e6d03094d94bec8a", + "https://bcr.bazel.build/modules/abseil-py/2.1.0/source.json": "0e8fc4f088ce07099c1cd6594c20c7ddbb48b4b3c0849b7d94ba94be88ff042b", "https://bcr.bazel.build/modules/apple_support/1.11.1/MODULE.bazel": "1843d7cd8a58369a444fc6000e7304425fba600ff641592161d9f15b179fb896", "https://bcr.bazel.build/modules/apple_support/1.15.1/MODULE.bazel": "a0556fefca0b1bb2de8567b8827518f94db6a6e7e7d632b4c48dc5f865bc7c85", "https://bcr.bazel.build/modules/apple_support/1.17.1/MODULE.bazel": "655c922ab1209978a94ef6ca7d9d43e940cd97d9c172fb55f94d91ac53f8610b", @@ -34,10 +36,14 @@ "https://bcr.bazel.build/modules/bazel_features/1.9.1/MODULE.bazel": "8f679097876a9b609ad1f60249c49d68bfab783dd9be012faf9d82547b14815a", "https://bcr.bazel.build/modules/bazel_skylib/1.7.1/MODULE.bazel": "3120d80c5861aa616222ec015332e5f8d3171e062e3e804a2a0253e1be26e59b", "https://bcr.bazel.build/modules/bazel_skylib/1.7.1/source.json": "f121b43eeefc7c29efbd51b83d08631e2347297c95aac9764a701f2a6a2bb953", + "https://bcr.bazel.build/modules/brotli/1.1.0/MODULE.bazel": "3b5b90488995183419c4b5c9b063a164f6c0bc4d0d6b40550a612a5e860cc0fe", + "https://bcr.bazel.build/modules/brotli/1.1.0/source.json": "098a4fd315527166e8dfe1fd1537c96a737a83764be38fc43f4da231d600f3d0", "https://bcr.bazel.build/modules/buildifier_prebuilt/6.4.0/MODULE.bazel": "37389c6b5a40c59410b4226d3bb54b08637f393d66e2fa57925c6fcf68e64bf4", "https://bcr.bazel.build/modules/buildifier_prebuilt/6.4.0/source.json": "83eb01b197ed0b392f797860c9da5ed1bf95f4d0ded994d694a3d44731275916", "https://bcr.bazel.build/modules/buildozer/7.1.2/MODULE.bazel": "2e8dd40ede9c454042645fd8d8d0cd1527966aa5c919de86661e62953cd73d84", "https://bcr.bazel.build/modules/buildozer/7.1.2/source.json": "c9028a501d2db85793a6996205c8de120944f50a0d570438fcae0457a5f9d1f8", + "https://bcr.bazel.build/modules/bzip2/1.0.8/MODULE.bazel": "83ee443b286b0b91566e5ee77e74ba6445895f3135467893871560f9e4ebc159", + "https://bcr.bazel.build/modules/bzip2/1.0.8/source.json": "b64f3a2f973749cf5f6ee32b3d804af56a35a746228a7845ed5daa31c8cc8af1", "https://bcr.bazel.build/modules/cel-spec/0.15.0/MODULE.bazel": "e1eed53d233acbdcf024b4b0bc1528116d92c29713251b5154078ab1348cb600", "https://bcr.bazel.build/modules/cel-spec/0.15.0/source.json": "ab7dccdf21ea2261c0f809b5a5221a4d7f8b580309f285fdf1444baaca75d44a", "https://bcr.bazel.build/modules/civetweb/1.16/MODULE.bazel": "46a38f9daeb57392e3827fce7d40926be0c802bd23cdd6bfd3a96c804de42fae", @@ -65,10 +71,14 @@ "https://bcr.bazel.build/modules/grpc-java/1.66.0/source.json": "f841b339ff8516c86c3a5272cd053194dd0cb2fdd63157123835e1157a28328d", "https://bcr.bazel.build/modules/grpc-proto/0.0.0-20240627-ec30f58/MODULE.bazel": "88de79051e668a04726e9ea94a481ec6f1692086735fd6f488ab908b3b909238", "https://bcr.bazel.build/modules/grpc-proto/0.0.0-20240627-ec30f58/source.json": "5035d379c61042930244ab59e750106d893ec440add92ec0df6a0098ca7f131d", + "https://bcr.bazel.build/modules/highwayhash/0.0.0-20240305-5ad3bf8/MODULE.bazel": "5c7f29d5bd70feff14b0f65b39584957e18e4a8d555e5a29a4c36019afbb44b9", + "https://bcr.bazel.build/modules/highwayhash/0.0.0-20240305-5ad3bf8/source.json": "211c0937ef5f537da6c3c135d12e60927c71b380642e207e4a02b86d29c55e85", "https://bcr.bazel.build/modules/jsoncpp/1.9.6/MODULE.bazel": "2f8d20d3b7d54143213c4dfc3d98225c42de7d666011528dc8fe91591e2e17b0", "https://bcr.bazel.build/modules/jsoncpp/1.9.6/source.json": "a04756d367a2126c3541682864ecec52f92cdee80a35735a3cb249ce015ca000", "https://bcr.bazel.build/modules/libpfm/4.11.0/MODULE.bazel": "45061ff025b301940f1e30d2c16bea596c25b176c8b6b3087e92615adbd52902", "https://bcr.bazel.build/modules/libpfm/4.11.0/source.json": "caaffb3ac2b59b8aac456917a4ecf3167d40478ee79f15ab7a877ec9273937c9", + "https://bcr.bazel.build/modules/lz4/1.9.4/MODULE.bazel": "e3d307b1d354d70f6c809167eafecf5d622c3f27e3971ab7273410f429c7f83a", + "https://bcr.bazel.build/modules/lz4/1.9.4/source.json": "233f0bdfc21f254e3dda14683ddc487ca68c6a3a83b7d5db904c503f85bd089b", "https://bcr.bazel.build/modules/mbedtls/3.6.0/MODULE.bazel": "8e380e4698107c5f8766264d4df92e36766248447858db28187151d884995a09", "https://bcr.bazel.build/modules/mbedtls/3.6.0/source.json": "1dbe7eb5258050afcc3806b9d43050f71c6f539ce0175535c670df606790b30c", "https://bcr.bazel.build/modules/nlohmann_json/3.11.3/MODULE.bazel": "87023db2f55fc3a9949c7b08dc711fae4d4be339a80a99d04453c4bb3998eefc", @@ -101,6 +111,8 @@ "https://bcr.bazel.build/modules/pybind11_bazel/2.13.6/source.json": "6aa0703de8efb20cc897bbdbeb928582ee7beaf278bcd001ac253e1605bddfae", "https://bcr.bazel.build/modules/rapidjson/1.1.0.bcr.20241007/MODULE.bazel": "82fbcb2e42f9e0040e76ccc74c06c3e46dfd33c64ca359293f8b84df0e6dff4c", "https://bcr.bazel.build/modules/rapidjson/1.1.0.bcr.20241007/source.json": "5c42389ad0e21fc06b95ad7c0b730008271624a2fa3292e0eab5f30e15adeee3", + "https://bcr.bazel.build/modules/riegeli/0.0.0-20250706-c4d1f27/MODULE.bazel": "b8b7309fb00c6b545fafcdfc3bf8cba168a61d37d841b9d90bacf7e70ae6627c", + "https://bcr.bazel.build/modules/riegeli/0.0.0-20250706-c4d1f27/source.json": "af3e2998bdf2f0ca3695816695c079f885d1e5b838e1d05ca82450aba4941762", "https://bcr.bazel.build/modules/rules_android/0.1.1/MODULE.bazel": "48809ab0091b07ad0182defb787c4c5328bd3a278938415c00a7b69b50c4d3a8", "https://bcr.bazel.build/modules/rules_android/0.1.1/source.json": "e6986b41626ee10bdc864937ffb6d6bf275bb5b9c65120e6137d56e6331f089e", "https://bcr.bazel.build/modules/rules_apple/3.16.0/MODULE.bazel": "0d1caf0b8375942ce98ea944be754a18874041e4e0459401d925577624d3a54a", @@ -178,14 +190,17 @@ "https://bcr.bazel.build/modules/rules_proto/6.0.0/MODULE.bazel": "b531d7f09f58dce456cd61b4579ce8c86b38544da75184eadaf0a7cb7966453f", "https://bcr.bazel.build/modules/rules_proto/6.0.2/MODULE.bazel": "ce916b775a62b90b61888052a416ccdda405212b6aaeb39522f7dc53431a5e73", "https://bcr.bazel.build/modules/rules_proto/7.0.2/MODULE.bazel": "bf81793bd6d2ad89a37a40693e56c61b0ee30f7a7fdbaf3eabbf5f39de47dea2", - "https://bcr.bazel.build/modules/rules_proto/7.0.2/source.json": "1e5e7260ae32ef4f2b52fd1d0de8d03b606a44c91b694d2f1afb1d3b28a48ce1", + "https://bcr.bazel.build/modules/rules_proto/7.1.0/MODULE.bazel": "002d62d9108f75bb807cd56245d45648f38275cb3a99dcd45dfb864c5d74cb96", + "https://bcr.bazel.build/modules/rules_proto/7.1.0/source.json": "39f89066c12c24097854e8f57ab8558929f9c8d474d34b2c00ac04630ad8940e", "https://bcr.bazel.build/modules/rules_python/0.20.0/MODULE.bazel": "bfe14d17f20e3fe900b9588f526f52c967a6f281e47a1d6b988679bd15082286", "https://bcr.bazel.build/modules/rules_python/0.22.0/MODULE.bazel": "b8057bafa11a9e0f4b08fc3b7cd7bee0dcbccea209ac6fc9a3ff051cd03e19e9", "https://bcr.bazel.build/modules/rules_python/0.22.1/MODULE.bazel": "26114f0c0b5e93018c0c066d6673f1a2c3737c7e90af95eff30cfee38d0bbac7", "https://bcr.bazel.build/modules/rules_python/0.23.1/MODULE.bazel": "49ffccf0511cb8414de28321f5fcf2a31312b47c40cc21577144b7447f2bf300", "https://bcr.bazel.build/modules/rules_python/0.27.1/MODULE.bazel": "65dc875cc1a06c30d5bbdba7ab021fd9e551a6579e408a3943a61303e2228a53", + "https://bcr.bazel.build/modules/rules_python/0.28.0/MODULE.bazel": "cba2573d870babc976664a912539b320cbaa7114cd3e8f053c720171cde331ed", "https://bcr.bazel.build/modules/rules_python/0.31.0/MODULE.bazel": "93a43dc47ee570e6ec9f5779b2e64c1476a6ce921c48cc9a1678a91dd5f8fd58", "https://bcr.bazel.build/modules/rules_python/0.34.0/MODULE.bazel": "1d623d026e075b78c9fde483a889cda7996f5da4f36dffb24c246ab30f06513a", + "https://bcr.bazel.build/modules/rules_python/0.36.0/MODULE.bazel": "a4ce1ccea92b9106c7d16ab9ee51c6183107e78ba4a37aa65055227b80cd480c", "https://bcr.bazel.build/modules/rules_python/0.37.1/MODULE.bazel": "3faeb2d9fa0a81f8980643ee33f212308f4d93eea4b9ce6f36d0b742e71e9500", "https://bcr.bazel.build/modules/rules_python/0.4.0/MODULE.bazel": "9208ee05fd48bf09ac60ed269791cf17fb343db56c8226a720fbb1cdf467166c", "https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7", @@ -202,6 +217,8 @@ "https://bcr.bazel.build/modules/rules_swift/1.16.0/MODULE.bazel": "4a09f199545a60d09895e8281362b1ff3bb08bbde69c6fc87aff5b92fcc916ca", "https://bcr.bazel.build/modules/rules_swift/2.1.1/MODULE.bazel": "494900a80f944fc7aa61500c2073d9729dff0b764f0e89b824eb746959bc1046", "https://bcr.bazel.build/modules/rules_swift/2.1.1/source.json": "40fc69dfaac64deddbb75bd99cdac55f4427d9ca0afbe408576a65428427a186", + "https://bcr.bazel.build/modules/snappy/1.2.0/MODULE.bazel": "cc7a727b46089c7fdae0ede21b1fd65bdb14d01823da118ef5c48044f40b6b27", + "https://bcr.bazel.build/modules/snappy/1.2.0/source.json": "17f5527e15d30a9d9eebf79ed73b280b56cac44f8c8fea696666d99943f84c33", "https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c", "https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef", "https://bcr.bazel.build/modules/stardoc/0.6.2/MODULE.bazel": "7060193196395f5dd668eda046ccbeacebfd98efc77fed418dbe2b82ffaa39fd", @@ -212,7 +229,11 @@ "https://bcr.bazel.build/modules/swift_argument_parser/1.3.1.1/MODULE.bazel": "5e463fbfba7b1701d957555ed45097d7f984211330106ccd1352c6e0af0dcf91", "https://bcr.bazel.build/modules/swift_argument_parser/1.3.1.1/source.json": "32bd87e5f4d7acc57c5b2ff7c325ae3061d5e242c0c4c214ae87e0f1c13e54cb", "https://bcr.bazel.build/modules/xds/0.0.0-20240423-555b57e/MODULE.bazel": "cea509976a77e34131411684ef05a1d6ad194dd71a8d5816643bc5b0af16dc0f", - "https://bcr.bazel.build/modules/xds/0.0.0-20240423-555b57e/source.json": "7227e1fcad55f3f3cab1a08691ecd753cb29cc6380a47bc650851be9f9ad6d20" + "https://bcr.bazel.build/modules/xds/0.0.0-20240423-555b57e/source.json": "7227e1fcad55f3f3cab1a08691ecd753cb29cc6380a47bc650851be9f9ad6d20", + "https://bcr.bazel.build/modules/xz/5.4.5.bcr.1/MODULE.bazel": "c037f75fa1b7e1ff15fbd15d807a8ce545e9b02f02df0a9777aa9aa7d8b268bb", + "https://bcr.bazel.build/modules/xz/5.4.5.bcr.1/source.json": "766f28499a16fa9ed8dc94382d50e80ceda0d0ab80b79b7b104a67074ab10e1f", + "https://bcr.bazel.build/modules/zstd/1.5.6/MODULE.bazel": "471ebe7d3cdd8c6469390fcf623eb4779ff55fbee0a87f1dc57a1def468b96d4", + "https://bcr.bazel.build/modules/zstd/1.5.6/source.json": "02010c3333fc89b44fe861db049968decb6e688411f7f9d4f6791d74f9adfb51" }, "selectedYankedVersions": {}, "moduleExtensions": { diff --git a/OWNERS.yml b/OWNERS.yml index 14cb11c3565..d74818cccdf 100644 --- a/OWNERS.yml +++ b/OWNERS.yml @@ -104,3 +104,6 @@ filters: - ".tmp/*": approvers: - 10gen/devprod-build + - ".bazelrc.fuzztest": + approvers: + - 10gen/platsec-server diff --git a/bazel/toolchains/cc/mongo_linux/mongo_linux_cc_toolchain_config.bzl b/bazel/toolchains/cc/mongo_linux/mongo_linux_cc_toolchain_config.bzl index 5de32a060fe..846eb22ca65 100644 --- a/bazel/toolchains/cc/mongo_linux/mongo_linux_cc_toolchain_config.bzl +++ b/bazel/toolchains/cc/mongo_linux/mongo_linux_cc_toolchain_config.bzl @@ -1876,6 +1876,8 @@ def _impl(ctx): "-Wno-sign-compare", "-Wno-implicit-fallthrough", "-Wno-shorten-64-to-32", + "-Wno-unused-but-set-variable", + "-Wno-nullability-completeness", ])], ), ], diff --git a/sbom.json b/sbom.json index e377b6fe791..bdf7b273d50 100644 --- a/sbom.json +++ b/sbom.json @@ -2505,6 +2505,64 @@ ] }, "scope": "required" + }, + { + "type": "library", + "bom-ref": "pkg:github/google/fuzztest@v2025.07.28", + "supplier": { + "name": "Google LLC", + "url": [ + "https://opensource.google/" + ] + }, + "author": "The Google Test and Google Mock Communities", + "group": "google.opensource", + "name": "fuzztest", + "version": "2025.07.28", + "description": "FuzzTest", + "licenses": [ + { + "license": { + "id": "BSD-3-Clause" + } + }, + { + "license": { + "id": "Apache-2.0" + } + } + ], + "copyright": "Copyright 2008, Google Inc. All rights reserved.", + "cpe": "cpe:2.3:a:google:fuzztest:2025.07.28:*:*:*:*:*:*:*", + "purl": "pkg:github/google/fuzztest@v2025.07.28", + "externalReferences": [ + { + "url": "https://github.com/google/fuzztest.git", + "type": "distribution" + } + ], + "properties": [ + { + "name": "internal:team_responsible", + "value": "Product Security" + }, + { + "name": "emits_persisted_data", + "value": "false" + }, + { + "name": "import_script_path", + "value": "src/third_party/fuzztest/scripts/import.sh" + } + ], + "evidence": { + "occurrences": [ + { + "location": "src/third_party/fuzztest" + } + ] + }, + "scope": "excluded" } ], "dependencies": [ @@ -2525,6 +2583,7 @@ "pkg:github/google/re2@2025-08-05", "pkg:github/google/s2geometry@a25c502bda9d7e0274b9e2b7825fbddf13cc0306", "pkg:github/google/snappy@1.1.10", + "pkg:github/google/fuzztest@v2025.07.28", "pkg:github/google/googletest@v1.17.0", "pkg:github/gperftools/gperftools@2.9.1", "pkg:github/grpc/grpc@v1.74.1", @@ -2636,6 +2695,10 @@ "ref": "pkg:github/google/benchmark@v1.5.2", "dependsOn": [] }, + { + "ref": "pkg:github/google/fuzztest@v2025.07.28", + "dependsOn": [] + }, { "ref": "pkg:github/google/googletest@v1.17.0", "dependsOn": [] diff --git a/src/third_party/OWNERS.yml b/src/third_party/OWNERS.yml index a414124760c..5917bef719f 100644 --- a/src/third_party/OWNERS.yml +++ b/src/third_party/OWNERS.yml @@ -154,3 +154,6 @@ filters: - "zstandard": approvers: - 10gen/server-networking-and-observability + - "fuzztest": + approvers: + - 10gen/platsec-server diff --git a/src/third_party/fuzztest/dist/LICENSE b/src/third_party/fuzztest/dist/LICENSE new file mode 100644 index 00000000000..227b21f7b2b --- /dev/null +++ b/src/third_party/fuzztest/dist/LICENSE @@ -0,0 +1,251 @@ +Files: fuzztest/internal/domains/rune.* +The authors of this software are Rob Pike and Ken Thompson. + Copyright (c) 2002 by Lucent Technologies. +Permission to use, copy, modify, and distribute this software for any +purpose without fee is hereby granted, provided that this entire notice +is included in all copies of any software which is or includes a copy +or modification of this software and in all copies of the supporting +documentation for such software. +THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED +WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE +ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY OF +THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + +rune.* have been converted to compile as C++ code in fuzztest::internal +namespace. + +--- + +Files: grammar_codegen/generated_antlr_parser/* + [The "BSD 3-clause license"] + Copyright + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +--- + +Files: * + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/src/third_party/fuzztest/dist/MODULE.bazel b/src/third_party/fuzztest/dist/MODULE.bazel new file mode 100644 index 00000000000..8fbe75ce263 --- /dev/null +++ b/src/third_party/fuzztest/dist/MODULE.bazel @@ -0,0 +1,75 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +module( + name = "fuzztest", + version = "head", +# TODO(lszekeres): Remove and use default name. + repo_name = "com_google_fuzztest", +) + +bazel_dep( + name = "abseil-cpp", + version = "20250512.0", +) +bazel_dep( + name = "re2", + version = "2024-07-02.bcr.1", +) +bazel_dep( + name = "bazel_skylib", + version = "1.7.1", +) +bazel_dep( + name = "platforms", + version = "0.0.10", +) + +# GoogleTest is not a dev dependency, because it's needed when FuzzTest is used +# with GoogleTest integration (e.g., googletest_adaptor). Note that the FuzzTest +# framework can be used without GoogleTest integration as well. +bazel_dep( + name = "googletest", + version = "1.16.0" +) +# TODO(lszekeres): Make this a dev dependency, as the protobuf library is only +# required for testing. +bazel_dep( + name = "protobuf", + version = "31.1", +) +bazel_dep( + name = "rules_proto", + version = "7.1.0", +) +bazel_dep( + name = "riegeli", + version = "0.0.0-20250706-c4d1f27", + repo_name = "com_google_riegeli", +) + +# Dev dependencies. +# These dependencies will be ignored if the current module is not the root +# module (https://bazel.build/rules/lib/globals/module#bazel_dep). +bazel_dep( + name = "nlohmann_json", + version = "3.11.3", + dev_dependency = True, +) +bazel_dep( + name = "antlr4-cpp-runtime", + version = "4.12.0", + dev_dependency = True, + repo_name = "antlr_cpp", +) diff --git a/src/third_party/fuzztest/dist/bazel/BUILD b/src/third_party/fuzztest/dist/bazel/BUILD new file mode 100644 index 00000000000..b9125222cf5 --- /dev/null +++ b/src/third_party/fuzztest/dist/bazel/BUILD @@ -0,0 +1,25 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//visibility:private"]) + +licenses(["notice"]) + +sh_binary( + name = "setup_configs", + srcs = ["setup_configs.sh"], + # To determine if the script runs from the fuzztest repo or from a client repo. + args = [repository_name()], + visibility = ["//visibility:public"], +) diff --git a/src/third_party/fuzztest/dist/bazel/antlr_cpp_runtime.BUILD b/src/third_party/fuzztest/dist/bazel/antlr_cpp_runtime.BUILD new file mode 100644 index 00000000000..1376fdfbaa2 --- /dev/null +++ b/src/third_party/fuzztest/dist/bazel/antlr_cpp_runtime.BUILD @@ -0,0 +1,42 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) + +cc_library( + name = "antlr_cpp", + srcs = glob(["runtime/src/**/*.cpp"]), + hdrs = ["runtime/src/antlr4-runtime.h"], + copts = ["-fexceptions"], + defines = ["ANTLR4CPP_USING_ABSEIL"], + features = ["-use_header_modules"], + includes = ["runtime/src"], + textual_hdrs = glob( + ["runtime/src/**/*.h"], + exclude = ["runtime/src/antlr4-runtime.h"], + ), + visibility = ["//visibility:public"], + deps = [ + "@abseil-cpp//absl/base", + "@abseil-cpp//absl/base:core_headers", + "@abseil-cpp//absl/container:flat_hash_map", + "@abseil-cpp//absl/container:flat_hash_set", + "@abseil-cpp//absl/synchronization", + ], +) + +alias( + name = "antlr4-cpp-runtime", + actual = "@antlr_cpp//:antlr4-cpp-runtime", +) \ No newline at end of file diff --git a/src/third_party/fuzztest/dist/bazel/setup_configs.sh b/src/third_party/fuzztest/dist/bazel/setup_configs.sh new file mode 100755 index 00000000000..9c200aa5c74 --- /dev/null +++ b/src/third_party/fuzztest/dist/bazel/setup_configs.sh @@ -0,0 +1,220 @@ +#!/usr/bin/env bash + +# Script for generating fuzztest.bazelrc. + +set -euf -o pipefail + +cat < fuzztest.bazelrc +# +# And don't forget to add the following to your project's .bazelrc: +# +# try-import %workspace%/fuzztest.bazelrc + +EOF + +cat < +#include +#include +#include +#include +#include +#include + +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "./centipede/binary_info.h" +#include "./centipede/control_flow.h" +#include "./centipede/corpus.h" +#include "./centipede/corpus_io.h" +#include "./centipede/coverage.h" +#include "./centipede/feature.h" +#include "./centipede/pc_info.h" +#include "./centipede/workdir.h" +#include "./common/defs.h" +#include "./common/logging.h" +#include "./common/remote_file.h" + +namespace fuzztest::internal { + +namespace { + +std::vector ReadCorpora(std::string_view binary_name, + std::string_view binary_hash, + std::string_view workdir_path) { + WorkDir workdir(std::string(workdir_path), std::string(binary_name), + std::string(binary_hash), /*my_shard_index=*/0); + std::vector corpus_paths; + CHECK_OK( + RemoteGlobMatch(workdir.CorpusFilePaths().AllShardsGlob(), corpus_paths)); + std::vector features_paths; + CHECK_OK(RemoteGlobMatch(workdir.FeaturesFilePaths().AllShardsGlob(), + features_paths)); + + CHECK_EQ(corpus_paths.size(), features_paths.size()); + std::vector corpus; + for (int i = 0; i < corpus_paths.size(); ++i) { + LOG(INFO) << "Reading corpus at: " << corpus_paths[i]; + LOG(INFO) << "Reading features at: " << features_paths[i]; + ReadShard(corpus_paths[i], features_paths[i], + [&corpus](ByteArray input, FeatureVec features) { + corpus.push_back({std::move(input), std::move(features)}); + }); + } + return corpus; +} + +BinaryInfo ReadBinaryInfo(std::string_view binary_name, + std::string_view binary_hash, + std::string_view workdir_path) { + WorkDir workdir(std::string(workdir_path), std::string(binary_name), + std::string(binary_hash), /*my_shard_index=*/0); + BinaryInfo ret; + ret.Read(workdir.BinaryInfoDirPath()); + return ret; +} + +AnalyzeCorporaResults AnalyzeCorpora(const BinaryInfo &binary_info, + const std::vector &a, + const std::vector &b) { + // `a_pcs` will contain all PCs covered by `a`. + absl::flat_hash_set a_pcs; + absl::flat_hash_map a_pc_to_corpus; + for (const auto &record : a) { + for (const auto &feature : record.features) { + if (!feature_domains::kPCs.Contains(feature)) continue; + auto pc = ConvertPCFeatureToPcIndex(feature); + a_pcs.insert(pc); + a_pc_to_corpus.insert({pc, std::move(record)}); + } + } + + // `b_only_pcs` will contain PCs covered by `b` but not by `a`. + // `b_unique_indices` are indices of inputs that have PCs from `b_only_pcs`. + // `b_shared_indices` are indices of all other inputs from `b`. + absl::flat_hash_set b_only_pcs; + absl::flat_hash_set b_pcs; + absl::flat_hash_map b_pc_to_corpus; + std::vector b_shared_indices, b_unique_indices; + for (size_t i = 0; i < b.size(); ++i) { + const auto &record = b[i]; + bool has_b_only = false; + for (const auto &feature : record.features) { + if (!feature_domains::kPCs.Contains(feature)) continue; + auto pc = ConvertPCFeatureToPcIndex(feature); + b_pcs.insert(pc); + b_pc_to_corpus.insert({pc, std::move(record)}); + if (a_pcs.contains(pc)) continue; + b_only_pcs.insert(pc); + has_b_only = true; + } + if (has_b_only) + b_unique_indices.push_back(i); + else + b_shared_indices.push_back(i); + } + + absl::flat_hash_set a_only_pcs; + for (const auto &record : a) { + for (const auto &feature : record.features) { + if (!feature_domains::kPCs.Contains(feature)) continue; + auto pc = ConvertPCFeatureToPcIndex(feature); + if (b_pcs.contains(pc)) continue; + a_only_pcs.insert(pc); + } + } + LOG(INFO) << VV(a.size()) << VV(b.size()) << VV(a_pcs.size()) + << VV(a_only_pcs.size()) << VV(b_only_pcs.size()) + << VV(b_shared_indices.size()) << VV(b_unique_indices.size()); + + // Sort PCs to put them in the canonical order, as in pc_table. + AnalyzeCorporaResults ret; + ret.a_pcs = std::vector{a_pcs.begin(), a_pcs.end()}; + ret.b_pcs = std::vector{b_pcs.begin(), b_pcs.end()}; + ret.a_only_pcs = std::vector{a_only_pcs.begin(), a_only_pcs.end()}; + ret.b_only_pcs = std::vector{b_only_pcs.begin(), b_only_pcs.end()}; + ret.a_pc_to_corpus_record = std::move(a_pc_to_corpus); + ret.b_pc_to_corpus_record = std::move(b_pc_to_corpus); + std::sort(ret.a_pcs.begin(), ret.a_pcs.end()); + std::sort(ret.b_pcs.begin(), ret.b_pcs.end()); + std::sort(ret.a_only_pcs.begin(), ret.a_only_pcs.end()); + std::sort(ret.b_only_pcs.begin(), ret.b_only_pcs.end()); + + return ret; +} + +} // namespace + +CoverageResults GetCoverage(const std::vector &corpus_records, + BinaryInfo binary_info) { + absl::flat_hash_set pcs; + for (const auto &record : corpus_records) { + for (const auto &feature : record.features) { + if (!feature_domains::kPCs.Contains(feature)) continue; + auto pc = ConvertPCFeatureToPcIndex(feature); + pcs.insert(pc); + } + } + CoverageResults ret = { + /*pcs=*/{pcs.begin(), pcs.end()}, + /*binary_info=*/std::move(binary_info), + }; + // Sort PCs to put them in the canonical order, as in pc_table. + std::sort(ret.pcs.begin(), ret.pcs.end()); + return ret; +} + +CoverageResults GetCoverage(std::string_view binary_name, + std::string_view binary_hash, + std::string_view workdir) { + const std::vector corpus_records = + ReadCorpora(binary_name, binary_hash, workdir); + BinaryInfo binary_info = ReadBinaryInfo(binary_name, binary_hash, workdir); + return GetCoverage(corpus_records, std::move(binary_info)); +} + +void DumpCoverageReport(const CoverageResults &coverage_results, + std::string_view coverage_report_path) { + LOG(INFO) << "Dump coverage to file: " << coverage_report_path; + + const fuzztest::internal::PCTable &pc_table = + coverage_results.binary_info.pc_table; + const fuzztest::internal::SymbolTable &symbols = + coverage_results.binary_info.symbols; + + fuzztest::internal::SymbolTable coverage_symbol_table; + for (const PCIndex pc : coverage_results.pcs) { + CHECK_LE(pc, symbols.size()); + if (!pc_table[pc].has_flag(fuzztest::internal::PCInfo::kFuncEntry)) + continue; + const SymbolTable::Entry entry = symbols.entry(pc); + coverage_symbol_table.AddEntry(entry.func, entry.file_line_col()); + } + + std::ostringstream symbol_table_stream; + coverage_symbol_table.WriteToLLVMSymbolizer(symbol_table_stream); + + CHECK_OK( + RemoteFileSetContents(coverage_report_path, symbol_table_stream.str())); +} + +AnalyzeCorporaResults AnalyzeCorpora(std::string_view binary_name, + std::string_view binary_hash, + std::string_view workdir_a, + std::string_view workdir_b) { + BinaryInfo binary_info_a = + ReadBinaryInfo(binary_name, binary_hash, workdir_a); + BinaryInfo binary_info_b = + ReadBinaryInfo(binary_name, binary_hash, workdir_b); + + CHECK_EQ(binary_info_a.pc_table.size(), binary_info_b.pc_table.size()); + CHECK_EQ(binary_info_a.symbols.size(), binary_info_b.symbols.size()); + + const std::vector a = + ReadCorpora(binary_name, binary_hash, workdir_a); + const std::vector b = + ReadCorpora(binary_name, binary_hash, workdir_b); + + AnalyzeCorporaResults ret = AnalyzeCorpora(binary_info_a, a, b); + ret.binary_info = std::move(binary_info_a); + return ret; +} + +void AnalyzeCorporaToLog(std::string_view binary_name, + std::string_view binary_hash, + std::string_view workdir_a, + std::string_view workdir_b) { + AnalyzeCorporaResults results = + AnalyzeCorpora(binary_name, binary_hash, workdir_a, workdir_b); + + const auto &pc_table = results.binary_info.pc_table; + const auto &symbols = results.binary_info.symbols; + CoverageLogger coverage_logger(pc_table, symbols); + + // TODO(kcc): use frontier_a to show the most interesting b-only PCs. + // TODO(kcc): these cause a CHECK-fail + // CoverageFrontier frontier_a(results.binary_info); + // frontier_a.Compute(a); + + // First, print the newly covered functions (including partially covered). + LOG(INFO) << "B-only new functions:"; + absl::flat_hash_set b_only_new_functions; + for (const auto pc : results.b_only_pcs) { + if (!pc_table[pc].has_flag(PCInfo::kFuncEntry)) continue; + auto str = coverage_logger.ObserveAndDescribeIfNew(pc); + if (!str.empty()) LOG(INFO).NoPrefix() << str; + b_only_new_functions.insert(symbols.func(pc)); + } + + // Now, print newly covered edges in functions that were covered in `a`. + LOG(INFO) << "B-only new edges:"; + for (const auto pc : results.b_only_pcs) { + if (b_only_new_functions.contains(symbols.func(pc))) continue; + auto str = coverage_logger.ObserveAndDescribeIfNew(pc); + if (!str.empty()) LOG(INFO).NoPrefix() << str; + } +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/analyze_corpora.h b/src/third_party/fuzztest/dist/centipede/analyze_corpora.h new file mode 100644 index 00000000000..3343e05d4fc --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/analyze_corpora.h @@ -0,0 +1,72 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_ANALYZE_CORPORA_H +#define THIRD_PARTY_CENTIPEDE_ANALYZE_CORPORA_H + +#include +#include +#include + +#include "absl/container/flat_hash_map.h" +#include "./centipede/binary_info.h" +#include "./centipede/corpus.h" + +namespace fuzztest::internal { + +// The results of comparing corpus `a` with corpus `b`. +struct AnalyzeCorporaResults { + std::vector a_pcs; + std::vector b_pcs; + std::vector a_only_pcs; + std::vector b_only_pcs; + absl::flat_hash_map a_pc_to_corpus_record; + absl::flat_hash_map b_pc_to_corpus_record; + BinaryInfo binary_info; +}; + +// The result of analyzing a single corpus. +struct CoverageResults { + std::vector pcs; + BinaryInfo binary_info; +}; + +// Returns information on the corpus within `workdir`. +CoverageResults GetCoverage(const std::vector& records, + BinaryInfo binary_info); + +// Returns information on the corpus within `workdir`. +CoverageResults GetCoverage(std::string_view binary_name, + std::string_view binary_hash, + std::string_view workdir); + +// Dumps `coverage_results` to `coverage_report_path` in the same format as read +// by `SymbolTable::ReadFromLLVMSymbolizer`. +void DumpCoverageReport(const CoverageResults& coverage_results, + std::string_view coverage_report_path); + +// Compares the corpus within `workdir_a` with the corpus in `workdir_b`. +AnalyzeCorporaResults AnalyzeCorpora(std::string_view binary_name, + std::string_view binary_hash, + std::string_view workdir_a, + std::string_view workdir_b); + +// Same as above but `LOG`s the results for human consumption. +void AnalyzeCorporaToLog(std::string_view binary_name, + std::string_view binary_hash, + std::string_view workdir_a, + std::string_view workdir_b); +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_ANALYZE_CORPORA_H diff --git a/src/third_party/fuzztest/dist/centipede/analyze_corpora_test.cc b/src/third_party/fuzztest/dist/centipede/analyze_corpora_test.cc new file mode 100644 index 00000000000..f65106db3ad --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/analyze_corpora_test.cc @@ -0,0 +1,124 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/analyze_corpora.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "./centipede/binary_info.h" +#include "./centipede/environment.h" +#include "./centipede/symbol_table.h" +#include "./centipede/test_coverage_util.h" +#include "./common/remote_file.h" +#include "./common/test_util.h" + +namespace fuzztest::internal { +namespace { + +using ::testing::AllOf; +using ::testing::Contains; +using ::testing::IsSupersetOf; +using ::testing::Not; + +// Returns path to test_fuzz_target. +static std::string GetTargetPath() { + return GetDataDependencyFilepath("centipede/testing/test_fuzz_target"); +} + +// TODO(ussuri): Implement. +TEST(AnalyzeCorpora, AnalyzeCorpora) { LOG(INFO) << "Unimplemented"; } + +TEST(GetCoverage, SimpleCoverageResults) { + Environment env; + env.binary = GetTargetPath(); + auto corpus_records = RunInputsAndCollectCorpusRecords(env, {"func1"}); + EXPECT_EQ(corpus_records.size(), 1); + // Get pc_table and symbols. + bool uses_legacy_trace_pc_instrumentation = {}; + BinaryInfo binary_info; + binary_info.InitializeFromSanCovBinary( + GetTargetPath(), GetObjDumpPath(), GetLLVMSymbolizerPath(), + GetTestTempDir(test_info_->name()).string()); + const auto &pc_table = binary_info.pc_table; + EXPECT_FALSE(uses_legacy_trace_pc_instrumentation); + const SymbolTable &symbols = binary_info.symbols; + // pc_table and symbols should have the same size. + EXPECT_EQ(pc_table.size(), symbols.size()); + CoverageResults res = GetCoverage(corpus_records, std::move(binary_info)); + // Check that inputs cover LLVMFuzzerTestOneInput and SingleEdgeFunc, but not + // MultiEdgeFunc. + size_t llvm_fuzzer_test_one_input_num_edges = 0; + size_t single_edge_func_num_edges = 0; + size_t multi_edge_func_num_edges = 0; + for (size_t pc : res.pcs) { + size_t check_pc = pc; + EXPECT_EQ(check_pc, pc); + single_edge_func_num_edges += + res.binary_info.symbols.func(pc) == "SingleEdgeFunc"; + multi_edge_func_num_edges += + res.binary_info.symbols.func(pc) == "MultiEdgeFunc"; + llvm_fuzzer_test_one_input_num_edges += + res.binary_info.symbols.func(pc) == "LLVMFuzzerTestOneInput"; + } + EXPECT_GT(llvm_fuzzer_test_one_input_num_edges, 1); + EXPECT_EQ(single_edge_func_num_edges, 1); + EXPECT_EQ(multi_edge_func_num_edges, 0); +} + +TEST(DumpCoverageReport, SimpleCoverageResults) { + Environment env; + env.binary = GetTargetPath(); + auto corpus_records = RunInputsAndCollectCorpusRecords(env, {"func1"}); + ASSERT_EQ(corpus_records.size(), 1); + + const std::string test_tmpdir = GetTestTempDir(test_info_->name()); + BinaryInfo binary_info; + binary_info.InitializeFromSanCovBinary(GetTargetPath(), GetObjDumpPath(), + GetLLVMSymbolizerPath(), test_tmpdir); + CoverageResults coverage_results = + GetCoverage(corpus_records, std::move(binary_info)); + + const std::string coverage_report_path = + std::filesystem::path{test_tmpdir} / "covered_symbol_table"; + DumpCoverageReport(coverage_results, coverage_report_path); + std::string symbol_table_contents; + ASSERT_OK(RemoteFileGetContents(coverage_report_path, symbol_table_contents)); + + std::istringstream symbol_table_stream(symbol_table_contents); + SymbolTable symbols; + symbols.ReadFromLLVMSymbolizer(symbol_table_stream); + + std::vector functions; + for (size_t index = 0; index < symbols.size(); ++index) { + functions.push_back(symbols.func(index)); + } + // Check that inputs cover LLVMFuzzerTestOneInput and SingleEdgeFunc, but not + // MultiEdgeFunc. + EXPECT_THAT(functions, + AllOf(IsSupersetOf({"LLVMFuzzerTestOneInput", "SingleEdgeFunc"}), + Not(Contains("MultiEdgeFunc")))); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/binary_info.cc b/src/third_party/fuzztest/dist/centipede/binary_info.cc new file mode 100644 index 00000000000..d604be32d94 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/binary_info.cc @@ -0,0 +1,166 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/binary_info.h" + +#include +#include // NOLINT +#include +#include +#include +#include +#include + +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "./centipede/command.h" +#include "./centipede/control_flow.h" +#include "./centipede/pc_info.h" +#include "./centipede/util.h" +#include "./common/remote_file.h" + +namespace fuzztest::internal { + +namespace { +constexpr std::string_view kSymbolTableFileName = "symbol-table"; +constexpr std::string_view kPCTableFileName = "pc-table"; +constexpr std::string_view kCfTableFileName = "cf-table"; +} // namespace + +void BinaryInfo::InitializeFromSanCovBinary( + std::string_view binary_path_with_args, std::string_view objdump_path, + std::string_view symbolizer_path, std::string_view tmp_dir_path) { + if (binary_path_with_args.empty()) { + // This usually happens in tests. + LOG(INFO) << __func__ << ": binary_path_with_args is empty"; + return; + } + // Compute names for temp files. + const std::filesystem::path tmp_dir = tmp_dir_path; + CHECK(std::filesystem::exists(tmp_dir) && + std::filesystem::is_directory(tmp_dir)); + ScopedFile pc_table_path(tmp_dir_path, "pc_table_tmp"); + ScopedFile cf_table_path(tmp_dir_path, "cf_table_tmp"); + ScopedFile dso_table_path(tmp_dir_path, "dso_table_tmp"); + ScopedFile log_path(tmp_dir_path, "binary_info_log_tmp"); + LOG(INFO) << __func__ << ": tmp_dir: " << tmp_dir; + + Command::Options cmd_options; + cmd_options.env_add = {absl::StrCat( + "CENTIPEDE_RUNNER_FLAGS=:dump_binary_info:arg1=", pc_table_path.path(), + ":arg2=", cf_table_path.path(), ":arg3=", dso_table_path.path(), ":")}; + cmd_options.stdout_file = std::string(log_path.path()); + Command cmd{binary_path_with_args, std::move(cmd_options)}; + int exit_code = cmd.Execute(); + if (exit_code != EXIT_SUCCESS) { + LOG(INFO) << __func__ << ": exit_code: " << exit_code; + } + + // Load PC Table. + pc_table = ReadPcTableFromFile(pc_table_path.path()); + + // Load CF Table. + if (std::filesystem::exists(cf_table_path.path())) + cf_table = ReadCfTable(cf_table_path.path()); + + // Load the DSO Table. + dso_table = ReadDsoTableFromFile(dso_table_path.path()); + + if (pc_table.empty()) { + CHECK(dso_table.empty()); + // Fallback to GetPcTableFromBinaryWithTracePC(). + LOG(WARNING) + << "Failed to dump PC table directly from binary using linked-in " + "runner; see target execution logs above; falling back to legacy PC " + "table extraction using trace-pc and objdump"; + pc_table = GetPcTableFromBinaryWithTracePC( + binary_path_with_args, objdump_path, pc_table_path.path()); + if (pc_table.empty()) { + LOG(ERROR) << "Failed to extract PC table from binary using objdump; see " + "objdump execution logs above"; + } + // For the legacy trace-pc instrumentation, set the dso_table + // to 1-element array consisting of the binary name + const std::vector args = + absl::StrSplit(binary_path_with_args, absl::ByAnyChar{" \t\n"}, + absl::SkipWhitespace{}); + CHECK(!args.empty()); + dso_table.push_back({args[0], pc_table.size()}); + uses_legacy_trace_pc_instrumentation = true; + } else { + uses_legacy_trace_pc_instrumentation = false; + } + + if (!uses_legacy_trace_pc_instrumentation) { + // The number of instrumented PCs in the DSO table should match pc_table. + size_t num_instrumened_pcs_in_all_dsos = 0; + for (const auto& dso : dso_table) { + num_instrumened_pcs_in_all_dsos += dso.num_instrumented_pcs; + } + CHECK_EQ(num_instrumened_pcs_in_all_dsos, pc_table.size()); + } + + // Load symbols, if there is a PC table. + if (!pc_table.empty()) { + ScopedFile sym_tmp1_path(tmp_dir_path, "symbols_tmp1"); + ScopedFile sym_tmp2_path(tmp_dir_path, "symbols_tmp2"); + symbols.GetSymbolsFromBinary(pc_table, dso_table, symbolizer_path, + tmp_dir_path); + } +} + +void BinaryInfo::Read(std::string_view dir) { + std::string symbol_table_contents; + // TODO(b/295978603): move calculation of paths into WorkDir class. + CHECK_OK(RemoteFileGetContents( + (std::filesystem::path(dir) / kSymbolTableFileName).c_str(), + symbol_table_contents)); + std::istringstream symbol_table_stream(symbol_table_contents); + symbols.ReadFromLLVMSymbolizer(symbol_table_stream); + + std::string pc_table_contents; + CHECK_OK(RemoteFileGetContents( + (std::filesystem::path(dir) / kPCTableFileName).c_str(), + pc_table_contents)); + std::istringstream pc_table_stream(pc_table_contents); + pc_table = ReadPcTable(pc_table_stream); + + cf_table = + ReadCfTable((std::filesystem::path(dir) / kCfTableFileName).c_str()); +} + +void BinaryInfo::Write(std::string_view dir) { + std::ostringstream symbol_table_stream; + symbols.WriteToLLVMSymbolizer(symbol_table_stream); + // TODO(b/295978603): move calculation of paths into WorkDir class. + CHECK_OK(RemoteFileSetContents( + (std::filesystem::path(dir) / kSymbolTableFileName).c_str(), + symbol_table_stream.str())); + + std::ostringstream pc_table_stream; + WritePcTable(pc_table, pc_table_stream); + CHECK_OK(RemoteFileSetContents( + (std::filesystem::path(dir) / kPCTableFileName).c_str(), + pc_table_stream.str())); + + std::ostringstream cf_table_stream; + WriteCfTable(cf_table, cf_table_stream); + CHECK_OK(RemoteFileSetContents( + (std::filesystem::path(dir) / kCfTableFileName).c_str(), + cf_table_stream.str())); +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/binary_info.h b/src/third_party/fuzztest/dist/centipede/binary_info.h new file mode 100644 index 00000000000..9a1e53d5ba0 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/binary_info.h @@ -0,0 +1,59 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_BINARY_INFO_H_ +#define THIRD_PARTY_CENTIPEDE_BINARY_INFO_H_ + +#include + +#include "./centipede/call_graph.h" +#include "./centipede/control_flow.h" +#include "./centipede/pc_info.h" +#include "./centipede/symbol_table.h" + +namespace fuzztest::internal { + +// Information about the binary being fuzzed. Created once at program startup +// and doesn't change (other than for lazily initialized fields). +struct BinaryInfo { + PCTable pc_table; + SymbolTable symbols; + CFTable cf_table; + DsoTable dso_table; + ControlFlowGraph control_flow_graph; + CallGraph call_graph; + bool uses_legacy_trace_pc_instrumentation = false; + + // Initializes `pc_table`, `symbols`, `cf_table` and + // `uses_legacy_trace_pc_instrumentation` based on `binary_path_with_args`. + // * `binary_path_with_args` is the path to the instrumented binary, + // possibly with space-separated arguments. + // * `objdump_path` and `symbolizer_path` are paths to respective tools. + // * `tmp_dir_path` is a path to a temp dir, that must exist. + void InitializeFromSanCovBinary(std::string_view binary_path_with_args, + std::string_view objdump_path, + std::string_view symbolizer_path, + std::string_view tmp_dir_path); + + // Serialize `this` within the given `dir`. + void Write(std::string_view dir); + + // Initialize `this` with the serialized contents in `dir`. Assumes the same + // format as `Write`. + void Read(std::string_view dir); +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_BINARY_INFO_H_ diff --git a/src/third_party/fuzztest/dist/centipede/binary_info_test.cc b/src/third_party/fuzztest/dist/centipede/binary_info_test.cc new file mode 100644 index 00000000000..2b730f945a0 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/binary_info_test.cc @@ -0,0 +1,82 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/binary_info.h" + +#include +#include +#include + +#include "gtest/gtest.h" +#include "./centipede/control_flow.h" +#include "./centipede/pc_info.h" +#include "./centipede/symbol_table.h" +#include "./common/test_util.h" + +namespace fuzztest::internal { +namespace { + +TEST(BinaryInfoTest, SerializesAndDeserializesBinaryInfoSuccessfully) { + const std::string temp_dir = GetTestTempDir(test_info_->name()); + + const PCTable input_pcs = {{/*pc=*/0, /*flags=*/1}, {/*pc=*/2, /*flags=*/3}}; + std::string input_symbols = + R"(FunctionOne + source/location/one.cc:1:0 + + FunctionTwo + source/location/two.cc:2:0 + +)"; + const CFTable cf_table = {1, 2, 3, 0, 0, 2, 4, 0}; + std::istringstream input_stream(input_symbols); + SymbolTable symbol_table; + symbol_table.ReadFromLLVMSymbolizer(input_stream); + BinaryInfo input; + input.pc_table = input_pcs; + input.symbols = std::move(symbol_table); + input.cf_table = cf_table; + input.Write(temp_dir); + BinaryInfo output; + output.Read(temp_dir); + + EXPECT_EQ(input.pc_table, output.pc_table); + EXPECT_EQ(input.symbols, output.symbols); + EXPECT_EQ(input.cf_table, output.cf_table); +} + +TEST(BinaryInfoTest, SerializesAndDeserializesEmptyBinaryInfoSuccessfully) { + const std::string temp_dir = GetTestTempDir(test_info_->name()); + + const PCTable input_pcs = {}; + std::string input_symbols = ""; + const CFTable cf_table = {}; + std::istringstream input_stream(input_symbols); + SymbolTable symbol_table; + symbol_table.ReadFromLLVMSymbolizer(input_stream); + BinaryInfo input; + input.pc_table = input_pcs; + input.symbols = std::move(symbol_table); + input.cf_table = cf_table; + input.Write(temp_dir); + BinaryInfo output; + output.Read(temp_dir); + + EXPECT_EQ(input.pc_table, output.pc_table); + EXPECT_EQ(input.symbols, output.symbols); + EXPECT_EQ(input.cf_table, output.cf_table); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/blob_file_converter.cc b/src/third_party/fuzztest/dist/centipede/blob_file_converter.cc new file mode 100644 index 00000000000..de214217690 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/blob_file_converter.cc @@ -0,0 +1,150 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include // NOLINT +#include + +#include "absl/base/nullability.h" +#include "absl/flags/flag.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/status/status.h" +#include "absl/strings/str_format.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "./centipede/config_init.h" +#include "./centipede/rusage_profiler.h" +#include "./common/blob_file.h" +#include "./common/defs.h" +#include "./common/logging.h" +#include "./common/remote_file.h" + +ABSL_FLAG(std::string, in, "", "Input path"); +ABSL_FLAG(std::string, out, "", "Output path"); +ABSL_FLAG(std::string, out_format, "riegeli", "--out format (legacy|riegeli)"); + +namespace fuzztest::internal { +namespace { + +// TODO(ussuri): Pare down excessive rusage profiling after breaking in. + +class StatsLogger { + public: + StatsLogger(absl::Duration log_every, RUsageProfiler& rprof) + : log_every_(log_every), + next_log_at_(start_ + log_every), + rprof_(rprof) {} + + void UpdateStats(ByteSpan blob) { + ++num_blobs_; + num_bytes_ += blob.size(); + } + + void Log() { + RPROF_THIS_FUNCTION_BY_EXISTING_RPROF(rprof_); + const auto secs = absl::ToDoubleSeconds(absl::Now() - start_); + const std::string stats = absl::StrFormat( + "blobs: %9lld | blobs/s: %5.0f | bytes: %12lld | bytes/s: %8.0f", + num_blobs_, num_blobs_ / secs, num_bytes_, num_bytes_ / secs); + if (ABSL_VLOG_IS_ON(3)) { + const RUsageProfiler::Snapshot& snapshot = RPROF_SNAPSHOT(stats); + LOG(INFO) << stats << " | " << snapshot.memory.ShortStr(); + } else { + LOG(INFO) << stats; + } + } + + void MaybeLogIfTime() { + const auto now = absl::Now(); + if (now >= next_log_at_) { + Log(); + next_log_at_ += log_every_; + if (next_log_at_ < now) next_log_at_ = now + log_every_; + } + } + + private: + int64_t num_blobs_ = 0; + int64_t num_bytes_ = 0; + + const absl::Time start_ = absl::Now(); + const absl::Duration log_every_; + absl::Time next_log_at_; + + RUsageProfiler& rprof_; +}; + +void Convert( // + const std::string& in, // + const std::string& out, const std::string& out_format) { + RPROF_THIS_FUNCTION_WITH_REPORT(/*enable=*/ABSL_VLOG_IS_ON(1)); + + LOG(INFO) << "Converting:\n" << VV(in) << "\n" << VV(out) << VV(out_format); + + const bool out_is_riegeli = out_format == "riegeli"; + + // Verify and prepare source and destination. + + CHECK(RemotePathExists(in)) << VV(in); + CHECK_OK(RemoteMkdir(std::filesystem::path{out}.parent_path().c_str())); + + // Open blob file reader and writer. + + RPROF_START_TIMELAPSE( // + absl::Seconds(20), /*also_log=*/ABSL_VLOG_IS_ON(3), "Opening --in"); + const auto in_reader = DefaultBlobFileReaderFactory(); + CHECK_OK(in_reader->Open(in)) << VV(in); + RPROF_STOP_TIMELAPSE(); + RPROF_SNAPSHOT_AND_LOG("Opened --in; opening --out"); + const auto out_writer = DefaultBlobFileWriterFactory(out_is_riegeli); + CHECK_OK(out_writer->Open(out, "w")) << VV(out); + RPROF_SNAPSHOT_AND_LOG("Opened --out"); + + // Read and write blobs one-by-one. + + ByteSpan blob; + absl::Status read_status = absl::OkStatus(); + StatsLogger stats_logger{ + absl::Seconds(ABSL_VLOG_IS_ON(1) ? 20 : 60), + FUNCTION_LEVEL_RPROF_NAME, + }; + while ((read_status = in_reader->Read(blob)).ok()) { + CHECK_OK(out_writer->Write(blob)); + stats_logger.UpdateStats(blob); + stats_logger.MaybeLogIfTime(); + } + stats_logger.Log(); + CHECK(read_status.ok() || absl::IsOutOfRange(read_status)) << VV(read_status); + CHECK_OK(out_writer->Close()) << VV(out); +} + +} // namespace +} // namespace fuzztest::internal + +int main(int argc, char** absl_nonnull argv) { + (void)fuzztest::internal::InitRuntime(argc, argv); + + const std::string in = absl::GetFlag(FLAGS_in); + QCHECK(!in.empty()); + const std::string out = absl::GetFlag(FLAGS_out); + QCHECK(!out.empty()); + const std::string out_format = absl::GetFlag(FLAGS_out_format); + QCHECK(out_format == "legacy" || out_format == "riegeli") << VV(out_format); + + fuzztest::internal::Convert(in, out, out_format); + + return EXIT_SUCCESS; +} diff --git a/src/third_party/fuzztest/dist/centipede/byte_array_mutator.cc b/src/third_party/fuzztest/dist/centipede/byte_array_mutator.cc new file mode 100644 index 00000000000..9a29a432e32 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/byte_array_mutator.cc @@ -0,0 +1,351 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/byte_array_mutator.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "./centipede/execution_metadata.h" +#include "./centipede/knobs.h" +#include "./centipede/mutation_input.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +//============= CmpDictionary =============== +bool CmpDictionary::SetFromMetadata(const ExecutionMetadata &metadata) { + dictionary_.clear(); + if (!metadata.ForEachCmpEntry([&](ByteSpan a, ByteSpan b) { + auto size = a.size(); + if (size > DictEntry::kMaxEntrySize) return; + if (size < kMinEntrySize) return; + // TODO(kcc): disregard boring CMP pairs, such as e.g. `1 CMP 0`. + dictionary_.emplace_back(a, b); + dictionary_.emplace_back(b, a); + })) + return false; + std::sort(dictionary_.begin(), dictionary_.end()); + return true; +} + +void CmpDictionary::SuggestReplacement( + ByteSpan bytes, std::vector &suggestions) const { + if (!suggestions.capacity()) return; + suggestions.clear(); + if (bytes.size() < kMinEntrySize) return; + // Use binary search to find the first entry that starts with the + // same kMinEntrySize bytes as `bytes`. + // This is not supper efficient. + // We need to see the real usage before optimizing. + // TODO(kcc): investigate using absl/container/btree_map.h instead. + DictEntry prefix({bytes.begin(), kMinEntrySize}); + auto iter = std::lower_bound( + dictionary_.begin(), dictionary_.end(), Pair{prefix, prefix}, + [](const Pair &a, const Pair &b) { return a.first < b.first; }); + // Iterate from the first entry that has the same first bytes as `bytes` + // to the last such entry. + for (; iter != dictionary_.end(); ++iter) { + const auto &a = iter->first; + const auto &b = iter->second; + // Check if `suggestions` is out of capacity. + if (suggestions.size() == suggestions.capacity()) break; + // Check if the first kMinEntrySize bytes are still the same. + if (!std::equal(bytes.begin(), bytes.begin() + kMinEntrySize, a.begin())) + break; + // Check if we have enough bytes to compare with `a`. + if (bytes.size() < a.size()) continue; + // If all bytes are the same as `a`, suggest `b`. + if (std::equal(a.begin(), a.end(), bytes.begin())) + suggestions.emplace_back(b.begin(), b.size()); + } +} + +//============= ByteArrayMutator =============== +size_t ByteArrayMutator::RoundUpToAdd(size_t curr_size, size_t to_add) { + if (curr_size >= max_len_) return 0; + const size_t remainder = (curr_size + to_add) % size_alignment_; + if (remainder != 0) { + to_add = to_add + size_alignment_ - remainder; + } + if (curr_size + to_add > max_len_) return max_len_ - curr_size; + return to_add; +} + +size_t ByteArrayMutator::RoundDownToRemove(size_t curr_size, size_t to_remove) { + if (curr_size <= size_alignment_) return 0; + if (to_remove >= curr_size) return curr_size - size_alignment_; + + size_t result_size = curr_size - to_remove; + result_size -= (result_size % size_alignment_); + to_remove = curr_size - result_size; + if (result_size == 0) { + to_remove -= size_alignment_; + } + if (result_size > max_len_) { + return curr_size - max_len_; + } + return to_remove; +} + +static const KnobId knob_mutate[3] = {Knobs::NewId("mutate_same_size"), + Knobs::NewId("mutate_decrease_size"), + Knobs::NewId("mutate_increase_size")}; + +bool ByteArrayMutator::Mutate(ByteArray &data) { + // Individual mutator may fail to mutate and return false. + // So we iterate a few times and expect one of the mutations will succeed. + for (int iter = 0; iter < 15; iter++) { + Fn mutator = nullptr; + if (data.size() > max_len_) { + mutator = &ByteArrayMutator::MutateDecreaseSize; + } else if (data.size() == max_len_) { + mutator = knobs_.Choose({knob_mutate[0], knob_mutate[1]}, + {&ByteArrayMutator::MutateSameSize, + &ByteArrayMutator::MutateDecreaseSize}, + rng_()); + } else { + mutator = knobs_.Choose(knob_mutate, + {&ByteArrayMutator::MutateSameSize, + &ByteArrayMutator::MutateIncreaseSize, + &ByteArrayMutator::MutateDecreaseSize}, + rng_()); + } + if ((this->*mutator)(data)) return true; + } + return false; +} + +static const KnobId knob_mutate_same_size[5] = { + Knobs::NewId("mutate_same_size_0"), Knobs::NewId("mutate_same_size_1"), + Knobs::NewId("mutate_same_size_2"), Knobs::NewId("mutate_same_size_3"), + Knobs::NewId("mutate_same_size_4"), +}; + +bool ByteArrayMutator::MutateSameSize(ByteArray &data) { + auto mutator = knobs_.Choose( + knob_mutate_same_size, + {&ByteArrayMutator::FlipBit, &ByteArrayMutator::SwapBytes, + &ByteArrayMutator::ChangeByte, + &ByteArrayMutator::OverwriteFromDictionary, + &ByteArrayMutator::OverwriteFromCmpDictionary}, + rng_()); + return (this->*mutator)(data); +} + +static const KnobId knob_mutate_increase_size[2] = { + Knobs::NewId("mutate_increase_size_0"), + Knobs::NewId("mutate_increase_size_1"), +}; + +bool ByteArrayMutator::MutateIncreaseSize(ByteArray &data) { + auto mutator = knobs_.Choose( + knob_mutate_increase_size, + {&ByteArrayMutator::InsertBytes, &ByteArrayMutator::InsertFromDictionary}, + rng_()); + return (this->*mutator)(data); +} + +bool ByteArrayMutator::MutateDecreaseSize(ByteArray &data) { + auto mutator = &ByteArrayMutator::EraseBytes; + return (this->*mutator)(data); +} + +bool ByteArrayMutator::FlipBit(ByteArray &data) { + uintptr_t random = rng_(); + size_t bit_idx = random % (data.size() * 8); + size_t byte_idx = bit_idx / 8; + bit_idx %= 8; + uint8_t mask = 1 << bit_idx; + data[byte_idx] ^= mask; + return true; +} + +bool ByteArrayMutator::SwapBytes(ByteArray &data) { + size_t idx1 = rng_() % data.size(); + size_t idx2 = rng_() % data.size(); + std::swap(data[idx1], data[idx2]); + return true; +} + +bool ByteArrayMutator::ChangeByte(ByteArray &data) { + size_t idx = rng_() % data.size(); + data[idx] = rng_(); + return true; +} + +bool ByteArrayMutator::InsertBytes(ByteArray &data) { + // Don't insert too many bytes at once. + const size_t kMaxInsertSize = 20; + size_t num_new_bytes = rng_() % kMaxInsertSize + 1; + num_new_bytes = RoundUpToAdd(data.size(), num_new_bytes); + if (num_new_bytes > kMaxInsertSize) { + num_new_bytes -= size_alignment_; + } + // There are N+1 positions to insert something into an array of N. + size_t pos = rng_() % (data.size() + 1); + // Fixed array to avoid memory allocation. + std::array new_bytes; + for (size_t i = 0; i < num_new_bytes; i++) new_bytes[i] = rng_(); + data.insert(data.begin() + pos, new_bytes.begin(), + new_bytes.begin() + num_new_bytes); + return true; +} + +bool ByteArrayMutator::EraseBytes(ByteArray &data) { + if (data.size() <= size_alignment_) return false; + // Ok to erase a sizable chunk since small inputs are good (if they + // produce good features). + size_t num_bytes_to_erase = rng_() % (data.size() / 2) + 1; + num_bytes_to_erase = RoundDownToRemove(data.size(), num_bytes_to_erase); + if (num_bytes_to_erase == 0) return false; + size_t pos = rng_() % (data.size() - num_bytes_to_erase + 1); + data.erase(data.begin() + pos, data.begin() + pos + num_bytes_to_erase); + return true; +} + +void ByteArrayMutator::AddToDictionary( + const std::vector &dict_entries) { + for (const ByteArray &entry : dict_entries) { + if (entry.size() > DictEntry::kMaxEntrySize) continue; + dictionary_.emplace_back(entry); + } +} + +bool ByteArrayMutator::OverwriteFromDictionary(ByteArray &data) { + if (dictionary_.empty()) return false; + size_t dict_entry_idx = rng_() % dictionary_.size(); + const auto &dic_entry = dictionary_[dict_entry_idx]; + if (dic_entry.size() > data.size()) return false; + size_t overwrite_pos = rng_() % (data.size() - dic_entry.size() + 1); + std::copy(dic_entry.begin(), dic_entry.end(), data.begin() + overwrite_pos); + return true; +} + +bool ByteArrayMutator::OverwriteFromCmpDictionary(ByteArray &data) { + if (cmp_dictionary_.size() == 0) return false; + if (data.size() < CmpDictionary::kMinEntrySize) return false; + // Start with a random position in `data`, search though the entire `data` + // until some suggestion is found. + size_t search_start_idx = rng_() % data.size(); + constexpr size_t kMaxNumSuggestions = 100; + std::vector suggestions; + suggestions.reserve(kMaxNumSuggestions); + for (size_t i = 0; i < data.size(); i++) { + size_t idx = (search_start_idx + i) % data.size(); + if (idx + CmpDictionary::kMinEntrySize >= data.size()) continue; + ByteSpan tail{&data[idx], data.size() - idx}; + cmp_dictionary_.SuggestReplacement(tail, suggestions); + if (suggestions.empty()) continue; + auto suggestion = suggestions[rng_() % suggestions.size()]; + if (idx + suggestion.size() <= data.size()) { + std::copy(suggestion.begin(), suggestion.end(), data.begin() + idx); + return true; + } + } + return false; +} + +bool ByteArrayMutator::InsertFromDictionary(ByteArray &data) { + if (dictionary_.empty()) return false; + size_t dict_entry_idx = rng_() % dictionary_.size(); + const auto &dict_entry = dictionary_[dict_entry_idx]; + // There are N+1 positions to insert something into an array of N. + size_t pos = rng_() % (data.size() + 1); + data.insert(data.begin() + pos, dict_entry.begin(), dict_entry.end()); + return true; +} + +void ByteArrayMutator::CrossOverInsert(ByteArray &data, + const ByteArray &other) { + if ((data.size() % size_alignment_) + other.size() < size_alignment_) return; + // insert other[first:first+size] at data[pos] + size_t size = 1 + rng_() % other.size(); + size = RoundUpToAdd(data.size(), size); + if (size > other.size()) { + size -= size_alignment_; + } + size_t first = rng_() % (other.size() - size + 1); + size_t pos = rng_() % (data.size() + 1); + data.insert(data.begin() + pos, other.begin() + first, + other.begin() + first + size); +} + +void ByteArrayMutator::CrossOverOverwrite(ByteArray &data, + const ByteArray &other) { + // Overwrite data[pos:pos+size] with other[first:first+size]. + // Overwrite no more than half of data. + size_t max_size = std::max(1UL, data.size() / 2); + size_t first = rng_() % other.size(); + max_size = std::min(max_size, other.size() - first); + size_t size = 1 + rng_() % max_size; + size_t max_pos = data.size() - size; + size_t pos = rng_() % (max_pos + 1); + std::copy(other.begin() + first, other.begin() + first + size, + data.begin() + pos); +} + +const KnobId knob_cross_over_insert_or_overwrite = + Knobs::NewId("cross_over_insert_or_overwrite"); + +void ByteArrayMutator::CrossOver(ByteArray &data, const ByteArray &other) { + if (data.size() >= max_len_) { + CrossOverOverwrite(data, other); + } else { + if (knobs_.GenerateBool(knob_cross_over_insert_or_overwrite, rng_())) { + CrossOverInsert(data, other); + } else { + CrossOverOverwrite(data, other); + } + } +} + +// Controls how much crossover is used during mutations. +// https://en.wikipedia.org/wiki/Crossover_(genetic_algorithm) +// TODO(kcc): add tests with different values of knobs. +const KnobId knob_mutate_or_crossover = Knobs::NewId("mutate_or_crossover"); + +std::vector ByteArrayMutator::MutateMany( + const std::vector &inputs, size_t num_mutants) { + if (inputs.empty()) abort(); + // TODO(xinhaoyuan): Consider metadata in other inputs instead of always the + // first one. + SetMetadata(inputs[0].metadata != nullptr ? *inputs[0].metadata + : ExecutionMetadata()); + size_t num_inputs = inputs.size(); + std::vector mutants; + mutants.reserve(num_mutants); + for (size_t i = 0; i < num_mutants; ++i) { + auto mutant = inputs[rng_() % num_inputs].data; + if (mutant.size() <= max_len_ && + knobs_.GenerateBool(knob_mutate_or_crossover, rng_())) { + // Do crossover only if the mutant is not over the max_len_. + // Perform crossover with some other input. It may be the same input. + const auto &other_input = inputs[rng_() % num_inputs].data; + CrossOver(mutant, other_input); + } else { + // Perform mutation. + Mutate(mutant); + } + mutants.push_back(std::move(mutant)); + } + return mutants; +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/byte_array_mutator.h b/src/third_party/fuzztest/dist/centipede/byte_array_mutator.h new file mode 100644 index 00000000000..3c6978caad5 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/byte_array_mutator.h @@ -0,0 +1,255 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_BYTE_ARRAY_MUTATOR_H_ +#define THIRD_PARTY_CENTIPEDE_BYTE_ARRAY_MUTATOR_H_ + +#include +#include +#include +#include +#include +#include + +#include "absl/base/nullability.h" +#include "./centipede/execution_metadata.h" +#include "./centipede/knobs.h" +#include "./centipede/mutation_input.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +// A simple class representing an array of up to kMaxEntrySize bytes. +class DictEntry { + public: + static constexpr uint8_t kMaxEntrySize = 16; + + explicit DictEntry(ByteSpan bytes) + : bytes_{}, // initialize bytes_ to all zeros + size_(bytes.size()) { + if (size_ > kMaxEntrySize) __builtin_trap(); + memcpy(bytes_, bytes.data(), bytes.size()); + } + const uint8_t *absl_nonnull begin() const { return bytes_; } + const uint8_t *end() const { return bytes_ + size_; } + size_t size() const { return size_; } + bool operator<(const DictEntry &other) const { + return memcmp(this, &other, sizeof(*this)) < 0; + } + + private: + // bytes_ must go first so that operator < is lexicographic. + uint8_t bytes_[kMaxEntrySize]; + uint8_t size_; // between kMinEntrySize and kMaxEntrySize. +}; + +// Dictionary of CMP args. +// Maintains an easy-to-query set of pairs {A,B}, such that +// an instruction `A CMP B` has been observed. +class CmpDictionary { + public: + static constexpr size_t kMinEntrySize = 2; // 1-byte entries won't be added. + + CmpDictionary() = default; + + // Sets the dictionary from execution `metadata`. + // + // Returns false on bad metadata, true otherwise. + bool SetFromMetadata(const ExecutionMetadata &metadata); + + // Clears `suggestions` on entry. + // For every observed `A CMP B` such that `A` is a prefix of `bytes`, + // adds `B` to `suggestions`. + // `suggestions`, is filled up to capacity(), but not more. + void SuggestReplacement(ByteSpan bytes, + std::vector &suggestions) const; + + // Returns the number of dictionary entries. + size_t size() const { return dictionary_.size(); } + + private: + using Pair = std::pair; + std::vector dictionary_; +}; + +// This class allows to mutate a ByteArray in different ways. +// All mutations expect and guarantee that `data` remains non-empty +// since there is only one possible empty input and it's uninteresting. +// +// This class is thread-compatible. +// Typical usage is to have one such object per thread. +class ByteArrayMutator { + public: + // CTOR. Initializes the internal RNG with `seed` (`seed` != 0). + // Keeps a const reference to `knobs` throughout the lifetime. + ByteArrayMutator(const Knobs &knobs, uintptr_t seed) + : rng_(seed), knobs_(knobs) { + if (seed == 0) __builtin_trap(); // We don't include logging.h here. + } + + // Adds `dict_entries` to an internal dictionary. + void AddToDictionary(const std::vector &dict_entries); + + // Populates the internal CmpDictionary using execution `metadata`. + // Returns false on failure, true otherwise. + bool SetMetadata(const ExecutionMetadata &metadata) { + return cmp_dictionary_.SetFromMetadata(metadata); + } + + // Takes non-empty `inputs` and produces `num_mutants` mutants. + std::vector MutateMany(const std::vector &inputs, + size_t num_mutants); + + using CrossOverFn = void (ByteArrayMutator::*)(ByteArray &, + const ByteArray &); + + // Mutates `data` by inserting a random part from `other`. + void CrossOverInsert(ByteArray &data, const ByteArray &other); + + // Mutates `data` by overwriting some of it with a random part of `other`. + void CrossOverOverwrite(ByteArray &data, const ByteArray &other); + + // Applies one of {CrossOverOverwrite, CrossOverInsert}. + void CrossOver(ByteArray &data, const ByteArray &other); + + // Type for a Mutator member-function. + // Every mutator function takes a ByteArray& as an input, mutates it in place + // and returns true if mutation took place. In some cases mutation may fail + // to happen, e.g. if EraseBytes() is called on a 1-byte input. + // Fn is test-only public. + using Fn = bool (ByteArrayMutator::*)(ByteArray &); + + // All public functions below are mutators. + // They return true iff a mutation took place. + + // Applies some random mutation to data. + bool Mutate(ByteArray &data); + + // Applies some random mutation that doesn't change size. + bool MutateSameSize(ByteArray &data); + + // Applies some random mutation that decreases size. + bool MutateDecreaseSize(ByteArray &data); + + // Applies some random mutation that increases size. + bool MutateIncreaseSize(ByteArray &data); + + // Flips a random bit. + bool FlipBit(ByteArray &data); + + // Swaps two bytes. + bool SwapBytes(ByteArray &data); + + // Changes a random byte to a random value. + bool ChangeByte(ByteArray &data); + + // Overwrites a random part of `data` with a random dictionary entry. + bool OverwriteFromDictionary(ByteArray &data); + + // Overwrites a random part of `data` with an entry suggested by the internal + // CmpDictionary. + bool OverwriteFromCmpDictionary(ByteArray &data); + + // Inserts random bytes. + bool InsertBytes(ByteArray &data); + + // Inserts a random dictionary entry at random position. + bool InsertFromDictionary(ByteArray &data); + + // Erases random bytes. + bool EraseBytes(ByteArray &data); + + // Set size alignment for mutants with modified sizes. Some mutators do not + // change input size, but mutators that insert or erase bytes will produce + // mutants with aligned sizes (if possible). + // + // Returns true if new size alignment was accepted. Returns false if max + // length is not a multiple of the specified size alignment. + bool set_size_alignment(size_t size_alignment) { + if ((max_len_ != std::numeric_limits::max()) && + (max_len_ % size_alignment != 0)) { + return false; + } + size_alignment_ = size_alignment; + return true; + } + + // Set max length in bytes for mutants with modified sizes. + // + // Returns true if new max length was accepted. Returns false if specified max + // length is not a multiple of size alignment. + bool set_max_len(size_t max_len) { + if ((max_len != std::numeric_limits::max()) && + (max_len % size_alignment_ != 0)) { + return false; + } + max_len_ = max_len; + return true; + } + + private: + FRIEND_TEST(ByteArrayMutator, RoundUpToAddCorrectly); + FRIEND_TEST(ByteArrayMutator, RoundDownToRemoveCorrectly); + + // Given a current size and a number of bytes to add, returns the number of + // bytes that should be added for the resulting size to be properly aligned. + // + // If the original to_add would result in an unaligned input size, we round up + // to the next larger aligned size. + // + // This function respects `max_len_` and will return 0 if curr_size is already + // greater than or equal to `max_len_`. + size_t RoundUpToAdd(size_t curr_size, size_t to_add); + + // Given a current size and a number of bytes to remove, returns the number of + // bytes that should be removed for the resulting size to be property aligned. + // + // If the original to_remove would result in an unaligned input size, we + // round down to the next smaller aligned size. + // + // However, we never return a number of bytes to remove that would result in a + // 0 size. In this case, the resulting size will be the smaller of + // curr_size and size_alignment_. + // + // This function respects `max_len_` and may return a larger number necessary + // to get the mutant's size to below `max_len_`. + size_t RoundDownToRemove(size_t curr_size, size_t to_remove); + + // Size alignment in bytes to generate mutants. + // + // For example, if size_alignment_ is 1, generated mutants can have any + // number of bytes. If size_alignment_ is 4, generated mutants will have sizes + // that are 4-byte aligned. + size_t size_alignment_ = 1; + + // Max length of a generated mutant in bytes. + size_t max_len_ = std::numeric_limits::max(); + + Rng rng_; + const Knobs &knobs_; + std::vector dictionary_; + CmpDictionary cmp_dictionary_; +}; + +// Controls how much crossover is used during mutations. +// https://en.wikipedia.org/wiki/Crossover_(genetic_algorithm) +// TODO(kcc): add tests with different values of knobs. +extern const KnobId knob_mutate_or_crossover; +// Controls how much crossver inserts data from the other input instead of +// overwriting. +extern const KnobId knob_cross_over_insert_or_overwrite; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_BYTE_ARRAY_MUTATOR_H_ diff --git a/src/third_party/fuzztest/dist/centipede/byte_array_mutator_test.cc b/src/third_party/fuzztest/dist/centipede/byte_array_mutator_test.cc new file mode 100644 index 00000000000..ae35641b585 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/byte_array_mutator_test.cc @@ -0,0 +1,1020 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/byte_array_mutator.h" + +#include +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/container/flat_hash_set.h" +#include "./centipede/execution_metadata.h" +#include "./centipede/knobs.h" +#include "./centipede/mutation_input.h" +#include "./centipede/runner_cmp_trace.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +// Tests that when alignment is not 1 byte, adding bytes to an input will result +// in a size-aligned mutant (even if the input is not size-aligned). +// +// Note: This test cannot be in an anonymous namespace due to the FRIEND_TEST in +// ByteArrayMutator. +TEST(ByteArrayMutator, RoundUpToAddCorrectly) { + Knobs knobs; + ByteArrayMutator mutator(knobs, /*seed=*/1); + EXPECT_TRUE(mutator.set_size_alignment(4)); + + EXPECT_EQ(mutator.RoundUpToAdd(/*curr_size=*/0, /*to_add=*/0), 0); + EXPECT_EQ(mutator.RoundUpToAdd(/*curr_size=*/4, /*to_add=*/0), 0); + EXPECT_EQ(mutator.RoundUpToAdd(/*curr_size=*/4, /*to_add=*/3), 4); + EXPECT_EQ(mutator.RoundUpToAdd(/*curr_size=*/5, /*to_add=*/0), 3); + EXPECT_EQ(mutator.RoundUpToAdd(/*curr_size=*/5, /*to_add=*/2), 3); + EXPECT_EQ(mutator.RoundUpToAdd(/*curr_size=*/5, /*to_add=*/18), 19); + + // Check that max length is also respected. + EXPECT_TRUE(mutator.set_max_len(12)); + + EXPECT_EQ(mutator.RoundUpToAdd(/*curr_size=*/5, /*to_add=*/0), 3); + EXPECT_EQ(mutator.RoundUpToAdd(/*curr_size=*/5, /*to_add=*/2), 3); + EXPECT_EQ(mutator.RoundUpToAdd(/*curr_size=*/5, /*to_add=*/18), 7); + EXPECT_EQ(mutator.RoundUpToAdd(/*curr_size=*/11, /*to_add=*/5), 1); + EXPECT_EQ(mutator.RoundUpToAdd(/*curr_size=*/12, /*to_add=*/5), 0); + EXPECT_EQ(mutator.RoundUpToAdd(/*curr_size=*/13, /*to_add=*/5), 0); +} + +// Tests that when alignment is not 1 byte, removing bytes from an input will +// result in a size-aligned mutant (even if the input is not size-aligned). +// +// Note: This test cannot be in an anonymous namespace due to the FRIEND_TEST in +// ByteArrayMutator. +TEST(ByteArrayMutator, RoundDownToRemoveCorrectly) { + Knobs knobs; + ByteArrayMutator mutator(knobs, /*seed=*/1); + EXPECT_TRUE(mutator.set_size_alignment(4)); + + EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/0, /*to_remove=*/0), 0); + EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/0, /*to_remove=*/1), 0); + EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/1, /*to_remove=*/0), 0); + EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/1, /*to_remove=*/1), 0); + EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/4, /*to_remove=*/0), 0); + EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/4, /*to_remove=*/3), 0); + EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/5, /*to_remove=*/0), 1); + EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/5, /*to_remove=*/2), 1); + EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/7, /*to_remove=*/2), 3); + EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/23, /*to_remove=*/4), 7); + EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/23, /*to_remove=*/20), 19); + EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/23, /*to_remove=*/24), 19); + + // Check that max length is also respected. + EXPECT_TRUE(mutator.set_max_len(12)); + + EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/7, /*to_remove=*/2), 3); + EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/23, /*to_remove=*/4), 11); + EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/23, /*to_remove=*/20), 19); +} + +namespace { + +TEST(DictEntry, DictEntry) { + uint8_t bytes[17] = {0, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16}; + DictEntry a_0_10({bytes + 0, 10}); + DictEntry a_0_4({bytes + 0, 4}); + DictEntry a_1_8({bytes + 1, 8}); + + EXPECT_LT(a_0_4, a_0_10); + EXPECT_LT(a_0_10, a_1_8); + EXPECT_EQ(memcmp(a_0_10.begin(), bytes, a_0_10.end() - a_0_10.begin()), 0); + + EXPECT_DEATH({ DictEntry a_0_10({bytes, 17}); }, ""); +} + +TEST(CmpDictionary, CmpDictionary) { + CmpDictionary dict; + ExecutionMetadata metadata{/*cmp_data=*/{ + 2, // size + 1, 2, // a + 3, 4, // b + 3, // size + 5, 6, 7, // a + 8, 9, 10, // b + 4, // size + 11, 12, 13, 14, // a + 15, 16, 17, 18, // b + 3, // size + 20, 21, 22, // a + 15, 16, 17, // b + 3, // size + 15, 16, 20, // a + 30, 40, 50, // b + }}; + EXPECT_TRUE(dict.SetFromMetadata(metadata)); + + using S = ByteSpan; + + std::vector suggestions; + suggestions.reserve(5); + + dict.SuggestReplacement({42, 43}, suggestions); + EXPECT_TRUE(suggestions.empty()); + + dict.SuggestReplacement({1, 2, 3}, suggestions); + EXPECT_THAT(suggestions, testing::ElementsAre(S({3, 4}))); + + dict.SuggestReplacement({5, 6, 7, 8}, suggestions); + EXPECT_THAT(suggestions, testing::ElementsAre(S({8, 9, 10}))); + + dict.SuggestReplacement({15, 16, 17, 18, 0, 0}, suggestions); + EXPECT_THAT(suggestions, testing::UnorderedElementsAre(S({11, 12, 13, 14}), + S({20, 21, 22}))); + + dict.SuggestReplacement({15, 16, 20}, suggestions); + EXPECT_THAT(suggestions, testing::UnorderedElementsAre(S({30, 40, 50}))); + + // check that we don't exceed capacity. + std::vector capacity1; + capacity1.reserve(1); + EXPECT_EQ(capacity1.capacity(), 1); + dict.SuggestReplacement({15, 16, 17, 18, 0, 0}, capacity1); + EXPECT_EQ(capacity1.size(), 1); + EXPECT_EQ(capacity1.capacity(), 1); +} + +TEST(CmpDictionary, CmpDictionaryIsCompatibleWithCmpTrace) { + CmpTrace<0, 13> traceN; + traceN.Clear(); + constexpr uint8_t long_array[20] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; + traceN.Capture(20, long_array, long_array); // will be trimmed to 16. + + ExecutionMetadata metadata; + bool append_failed = false; + int count = 0; + traceN.ForEachNonZero( + [&](uint8_t size, const uint8_t *v0, const uint8_t *v1) { + if (!metadata.AppendCmpEntry({v0, size}, {v1, size})) + append_failed = true; + count++; + }); + EXPECT_FALSE(append_failed); + EXPECT_EQ(1, count); + + CmpDictionary dictionary; + EXPECT_TRUE(dictionary.SetFromMetadata(metadata)); + EXPECT_EQ(2, dictionary.size()); +} + +// Tests that two mutators seeded with different rng seeds produce different +// results. +TEST(ByteArrayMutator, Randomness) { + Knobs knobs; + ByteArrayMutator mutator[2]{{knobs, 1}, {knobs, 2}}; + + std::vector res[2]; + for (size_t i = 0; i < 2; i++) { + ByteArray seed = {0}; + // Just run a few iterations. + for (size_t iter = 0; iter < 100; iter++) { + mutator[i].Mutate(seed); + res[i].push_back(seed); + } + } + EXPECT_NE(res[0], res[1]); +} + +// Tests that max length is always a multiple of size alignment. +TEST(ByteArrayMutator, CheckSizeAlignmentWithMaxLength) { + Knobs knobs; + ByteArrayMutator mutator(knobs, /*seed=*/1); + + EXPECT_TRUE(mutator.set_size_alignment(1000)); + EXPECT_TRUE(mutator.set_size_alignment(4)); + EXPECT_TRUE(mutator.set_max_len(4)); + EXPECT_TRUE(mutator.set_max_len(16)); + EXPECT_FALSE(mutator.set_max_len(2)); + EXPECT_FALSE(mutator.set_max_len(10)); + + EXPECT_TRUE(mutator.set_size_alignment(8)); + EXPECT_FALSE(mutator.set_size_alignment(12)); + EXPECT_FALSE(mutator.set_size_alignment(15)); +} + +// Tests a callback `fn`: mutations of `seed` are expected to eventually +// match all of `expected_mutants`, but never any of `unexpected_mutants`. +// Mutators that do a single-step can be tested for `unexpected_mutants`, +// while for more complicated mutators `unexpected_mutants` should be empty. +void TestMutatorFn(ByteArrayMutator::Fn fn, const ByteArray &seed, + const std::vector &expected_mutants, + const std::vector &unexpected_mutants, + size_t size_alignment = 1, + size_t max_len = std::numeric_limits::max(), + const std::vector &dictionary = {}, + ByteSpan cmp_data = {}, size_t num_iterations = 100000000) { + Knobs knobs; + ByteArrayMutator mutator(knobs, 1); + EXPECT_TRUE(mutator.set_size_alignment(size_alignment)); + EXPECT_TRUE(mutator.set_max_len(max_len)); + mutator.AddToDictionary(dictionary); + mutator.SetMetadata({/*cmp_data=*/{cmp_data.begin(), cmp_data.end()}}); + absl::flat_hash_set expected(expected_mutants.begin(), + expected_mutants.end()); + absl::flat_hash_set unexpected(unexpected_mutants.begin(), + unexpected_mutants.end()); + ByteArray mutant; // create outside the loop to avoid malloc in the loop. + for (size_t i = 0; i < num_iterations; i++) { + mutant = seed; + (mutator.*fn)(mutant); + expected.erase(mutant); + if (expected.empty()) break; + EXPECT_FALSE(unexpected.contains(mutant)); + } + EXPECT_TRUE(expected.empty()); +} + +TEST(ByteArrayMutator, ChangeByte) { + TestMutatorFn(&ByteArrayMutator::ChangeByte, {1, 2, 3}, + /*expected_mutants=*/ + { + {1, 2, 4}, + {42, 2, 3}, + {1, 66, 3}, + }, + /*unexpected_mutants=*/ + { + {9, 9, 3}, + {1, 8, 8}, + {7, 2, 7}, + }); +} + +TEST(ByteArrayMutator, FlipBit) { + TestMutatorFn(&ByteArrayMutator::FlipBit, {0, 7, 10}, + /*expected_mutants=*/ + { + {1, 7, 10}, + {0, 6, 10}, + {0, 7, 11}, + }, + /*unexpected_mutants=*/ + { + {1, 6, 10}, + {0, 6, 11}, + }); +} + +TEST(ByteArrayMutator, SwapBytes) { + TestMutatorFn(&ByteArrayMutator::SwapBytes, {0, 1, 2}, + /*expected_mutants=*/ + { + {0, 2, 1}, + {1, 0, 2}, + {2, 1, 0}, + }, + /*unexpected_mutants=*/ + { + {2, 0, 1}, + }); +} + +TEST(ByteArrayMutator, InsertBytes) { + TestMutatorFn(&ByteArrayMutator::InsertBytes, {0, 1, 2}, + /*expected_mutants=*/ + { + {0, 1, 2, 3}, + {0, 3, 1, 2}, + {3, 0, 1, 2}, + {0, 1, 2, 3, 4}, + {0, 3, 4, 1, 2}, + {3, 4, 0, 1, 2}, + }, + /*unexpected_mutants=*/ + { + {0, 1}, + {0, 1, 2}, + {0, 3, 1, 4, 2}, + }); +} + +TEST(ByteArrayMutator, InsertBytesWithAlignment) { + TestMutatorFn(&ByteArrayMutator::InsertBytes, {0, 1, 2}, + /*expected_mutants=*/ + { + {0, 1, 2, 3}, + {0, 3, 1, 2}, + {3, 0, 1, 2}, + }, + /*unexpected_mutants=*/ + { + {0, 1}, + {0, 1, 2}, + {0, 1, 2, 3, 4}, + {0, 3, 1, 4, 2}, + {0, 3, 4, 1, 2}, + {3, 4, 0, 1, 2}, + }, + /*size_alignment=*/4); +} + +TEST(ByteArrayMutator, InsertBytesWithMaxLen) { + TestMutatorFn(&ByteArrayMutator::InsertBytes, {0, 1, 2}, + /*expected_mutants=*/ + { + {0, 1, 2, 3}, + {0, 3, 1, 2}, + {3, 0, 1, 2}, + }, + /*unexpected_mutants=*/ + { + {0, 1, 2, 3, 4}, + {0, 3, 4, 1, 2}, + {3, 4, 0, 1, 2}, + }, + /*size_alignment=*/1, + /*max_len=*/4); +} + +// Currently, same as for InsertBytes. Will change in future as we add more +// mutators. +TEST(ByteArrayMutator, MutateIncreaseSize) { + TestMutatorFn(&ByteArrayMutator::MutateIncreaseSize, {0, 1, 2}, + /*expected_mutants=*/ + { + {0, 1, 2, 3}, + {0, 3, 1, 2}, + {3, 0, 1, 2}, + {0, 1, 2, 3, 4}, + {0, 3, 4, 1, 2}, + {3, 4, 0, 1, 2}, + }, + /*unexpected_mutants=*/ + { + {0, 1}, + {0, 3, 1, 4, 2}, + }); +} + +TEST(ByteArrayMutator, MutateIncreaseSizeWithAlignment) { + TestMutatorFn(&ByteArrayMutator::MutateIncreaseSize, {0, 1, 2}, + /*expected_mutants=*/ + { + {0, 1, 2, 3}, + {0, 3, 1, 2}, + {3, 0, 1, 2}, + }, + /*unexpected_mutants=*/ + { + {0, 1}, + {0, 1, 2, 3, 4}, + {0, 3, 1, 4, 2}, + {0, 3, 4, 1, 2}, + {3, 4, 0, 1, 2}, + }, + /*size_alignment=*/4); +} + +TEST(ByteArrayMutator, EraseBytes) { + TestMutatorFn(&ByteArrayMutator::EraseBytes, {0, 1, 2, 3}, + /*expected_mutants=*/ + { + {0, 1, 2}, + {0, 1, 3}, + {0, 2, 3}, + {1, 2, 3}, + {0, 1}, + {0, 3}, + {2, 3}, + }, + /*unexpected_mutants=*/ + { + {0}, + {1}, + {2}, + }); +} + +TEST(ByteArrayMutator, EraseBytesWithAlignment) { + TestMutatorFn(&ByteArrayMutator::EraseBytes, {0, 1, 2, 3}, + /*expected_mutants=*/ + { + {0, 1, 2, 3}, + }, + /*unexpected_mutants=*/ + { + {0, 1, 2}, + {0, 1, 3}, + {0, 2, 3}, + {1, 2, 3}, + {0, 1}, + {0, 3}, + {2, 3}, + {0}, + {1}, + {2}, + }, + /*size_alignment=*/4); + TestMutatorFn(&ByteArrayMutator::EraseBytes, {0, 1, 2, 3, 4}, + /*expected_mutants=*/ + { + {0, 1, 2, 3}, + {1, 2, 3, 4}, + {0, 1, 3, 4}, + }, + /*unexpected_mutants=*/ + { + {0, 1, 2, 3, 4}, + {0, 1, 2}, + {0, 1, 3}, + {0, 2, 3}, + {1, 2, 3}, + {0, 1}, + {0}, + }, + /*size_alignment=*/4); +} + +// Currently, same as EraseBytes. Will change in future as we add more mutators. +TEST(ByteArrayMutator, MutateDecreaseSize) { + TestMutatorFn(&ByteArrayMutator::MutateDecreaseSize, {0, 1, 2, 3}, + /*expected_mutants=*/ + { + {0, 1, 2}, + {0, 1, 3}, + {0, 2, 3}, + {1, 2, 3}, + {0, 1}, + {0, 3}, + {2, 3}, + }, + /*unexpected_mutants=*/ + { + {0}, + {1}, + {2}, + }); +} + +TEST(ByteArrayMutator, MutateDecreaseSizeWithAlignment) { + TestMutatorFn(&ByteArrayMutator::MutateDecreaseSize, {0, 1, 2, 3}, + /*expected_mutants=*/ + { + {0, 1, 2, 3}, + }, + /*unexpected_mutants=*/ + { + {0, 1, 2}, + {0, 1, 3}, + {0, 2, 3}, + {1, 2, 3}, + {0, 1}, + {0, 3}, + {2, 3}, + {0}, + {1}, + {2}, + }, + /*size_alignment=*/4); + TestMutatorFn(&ByteArrayMutator::MutateDecreaseSize, {0, 1, 2, 3, 4}, + /*expected_mutants=*/ + { + {0, 1, 2, 3}, + {1, 2, 3, 4}, + {0, 1, 3, 4}, + }, + /*unexpected_mutants=*/ + { + {0, 1, 2, 3, 4}, + {0, 1, 2}, + {0, 1, 3}, + {0, 2, 3}, + {1, 2, 3}, + {0, 1}, + {0}, + }, + /*size_alignment=*/4); +} + +TEST(ByteArrayMutator, MutateDecreaseSizeWithAlignmentAndMaxLen) { + TestMutatorFn(&ByteArrayMutator::MutateDecreaseSize, {0, 1, 2, 3}, + /*expected_mutants=*/ + { + {0, 1}, + {2, 3}, + }, + /*unexpected_mutants=*/ + { + {0}, + {1}, + {2}, + {1, 2}, + {0, 1, 2}, + }, + /*size_alignment=*/2, + /*max_len=*/2); +} + +// Tests that MutateSameSize will eventually produce all possible mutants of +// size 1 and 2. Also tests some of the 3-byte mutants. +TEST(ByteArrayMutator, MutateSameSize) { + Knobs knobs; + ByteArrayMutator mutator(knobs, 1); + for (size_t size = 1; size <= 2; size++) { + ByteArray data(size); + absl::flat_hash_set set; + size_t expected_set_size = 1 << (8 * size); + for (size_t iter = 0; iter < 2000000ULL; iter++) { + mutator.MutateSameSize(data); + EXPECT_EQ(data.size(), size); + set.insert(data); + if (set.size() == expected_set_size) break; + } + EXPECT_EQ(expected_set_size, set.size()); + } + + // One step of MutateSameSize may generate any mutant that can be generated by + // one step of its submutants. No mutant of other length may appear. + const std::vector kUnexpectedMutants = { + {1, 2}, + {1, 2, 3, 4}, + }; + TestMutatorFn(&ByteArrayMutator::MutateSameSize, {1, 2, 3}, + /*expected_mutants=*/ + { + {1, 2, 4}, + {42, 2, 3}, + {1, 66, 3}, + }, + kUnexpectedMutants); + TestMutatorFn(&ByteArrayMutator::MutateSameSize, {0, 7, 10}, + /*expected_mutants=*/ + { + {1, 7, 10}, + {0, 6, 10}, + {0, 7, 11}, + }, + kUnexpectedMutants); + TestMutatorFn(&ByteArrayMutator::MutateSameSize, {0, 1, 2}, + /*expected_mutants=*/ + { + {0, 2, 1}, + {1, 0, 2}, + {2, 1, 0}, + }, + kUnexpectedMutants); +} + +TEST(ByteArrayMutator, Mutate) { + TestMutatorFn(&ByteArrayMutator::Mutate, {1, 2, 3}, + /*expected_mutants=*/ + { + {1, 2, 4}, + {1, 2}, + {1, 2, 3, 4}, + }, + /*unexpected_mutants=*/ + { + {}, + }); +} + +TEST(ByteArrayMutator, OverwriteFromDictionary) { + TestMutatorFn(&ByteArrayMutator::OverwriteFromDictionary, {1, 2, 3, 4, 5}, + /*expected_mutants=*/ + { + {1, 2, 7, 8, 9}, + {1, 7, 8, 9, 5}, + {7, 8, 9, 4, 5}, + {1, 2, 3, 0, 6}, + {1, 2, 0, 6, 5}, + {1, 0, 6, 4, 5}, + {0, 6, 3, 4, 5}, + {42, 2, 3, 4, 5}, + {1, 42, 3, 4, 5}, + {1, 2, 42, 4, 5}, + {1, 2, 3, 42, 5}, + {1, 2, 3, 4, 42}, + }, + /*unexpected_mutants=*/ + { + {1, 2, 3, 7, 8}, + {8, 9, 3, 4, 5}, + {6, 2, 3, 4, 5}, + {1, 2, 3, 4, 0}, + {42, 42, 3, 4, 5}, + }, + /*size_alignment=*/1, + /*max_len=*/std::numeric_limits::max(), + /*dictionary=*/ + { + {7, 8, 9}, + {0, 6}, + {42}, + }); +} + +TEST(ByteArrayMutator, OverwriteFromCmpDictionary) { + TestMutatorFn(&ByteArrayMutator::OverwriteFromCmpDictionary, + {1, 2, 40, 50, 60}, + /*expected_mutants=*/ + { + {3, 4, 40, 50, 60}, + {1, 2, 10, 20, 30}, + }, + /*unexpected_mutants=*/ + { + {3, 4, 10, 20, 30}, + }, + /*size_alignment=*/1, + /*max_len=*/std::numeric_limits::max(), + /*dictionary=*/ + {}, + /*cmp_data=*/ + {/*args1*/ 2, 1, 2, 3, 4, /*args2*/ 3, 10, 20, 30, 40, 50, 60}); +} + +TEST(ByteArrayMutator, OverwriteFromCmpDictionaryAndSkipLongEntry) { + TestMutatorFn( + &ByteArrayMutator::OverwriteFromCmpDictionary, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, + /*expected_mutants=*/ + {{100, 101, 102, 103, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}}, + /*unexpected_mutants=*/ + {{100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119}}, + /*size_alignment=*/1, + /*max_len=*/std::numeric_limits::max(), + /*dictionary=*/ + {}, + /*cmp_data=*/ + {/*size*/ 20, /*lhs*/ 0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, + /*rhs*/ 100, 101, 102, 103, 104, 105, 106, + 107, 108, 109, 110, 111, 112, 113, + 114, 115, 116, 117, 118, 119, + /*size*/ 4, /*lhs*/ 0, 1, 2, 3, /*rhs*/ 100, 101, + 102, 103}); +} + +TEST(ByteArrayMutator, InsertFromDictionary) { + TestMutatorFn(&ByteArrayMutator::InsertFromDictionary, {1, 2, 3}, + /*expected_mutants=*/ + { + {1, 2, 3, 4, 5}, + {1, 2, 4, 5, 3}, + {1, 4, 5, 2, 3}, + {4, 5, 1, 2, 3}, + {1, 2, 3, 6, 7, 8}, + {1, 2, 6, 7, 8, 3}, + {1, 6, 7, 8, 2, 3}, + {6, 7, 8, 1, 2, 3}, + }, + /*unexpected_mutants=*/ + { + {1, 2, 3, 7, 8}, + {7, 8, 1, 2, 3}, + }, + /*size_alignment=*/1, + /*max_len=*/std::numeric_limits::max(), + /*dictionary=*/ + { + {4, 5}, + {6, 7, 8}, + }); +} + +// Tests CrossOver* mutations. +// With CrossOver, no random values are involved, only random offsets, +// and so we can test for all possible expected mutants. +void TestCrossOver(void (ByteArrayMutator::*fn)(ByteArray &, const ByteArray &), + const ByteArray &seed, const ByteArray &other, + const std::vector &all_possible_mutants, + size_t size_alignment = 1) { + Knobs knobs; + ByteArrayMutator mutator(knobs, 1); + EXPECT_TRUE(mutator.set_size_alignment(size_alignment)); + absl::flat_hash_set expected(all_possible_mutants.begin(), + all_possible_mutants.end()); + absl::flat_hash_set found; + const int kNumIter = 10000; + // Run for some number of iterations, make sure we saw all expected mutations + // and nothing else. + for (int i = 0; i < kNumIter; i++) { + ByteArray mutant = seed; + (mutator.*fn)(mutant, other); + EXPECT_EQ(expected.count(mutant), 1); + found.insert(mutant); + } + EXPECT_EQ(expected, found); +} + +TEST(ByteArrayMutator, CrossOverInsert) { + TestCrossOver(&ByteArrayMutator::CrossOverInsert, {1}, {2}, + /*all_possible_mutants=*/ + { + {1, 2}, + {2, 1}, + }); + TestCrossOver(&ByteArrayMutator::CrossOverInsert, {1, 2}, {3}, + /*all_possible_mutants=*/ + { + {1, 2, 3}, + {1, 3, 2}, + {3, 1, 2}, + }); + TestCrossOver(&ByteArrayMutator::CrossOverInsert, {1}, {2, 3}, + /*all_possible_mutants=*/ + { + {1, 2, 3}, + {2, 3, 1}, + {2, 1}, + {1, 2}, + {3, 1}, + {1, 3}, + }); + TestCrossOver(&ByteArrayMutator::CrossOverInsert, {1, 2}, {3, 4}, + /*all_possible_mutants=*/ + { + {1, 2, 3, 4}, + {1, 3, 4, 2}, + {3, 4, 1, 2}, + {1, 2, 3}, + {1, 3, 2}, + {3, 1, 2}, + {1, 2, 4}, + {1, 4, 2}, + {4, 1, 2}, + }); +} + +TEST(ByteArrayMutator, CrossOverInsertWithAlignment) { + TestCrossOver(&ByteArrayMutator::CrossOverInsert, {1}, {2}, + /*all_possible_mutants=*/ + { + {1}, + }, + /*size_alignment=*/4); + TestCrossOver(&ByteArrayMutator::CrossOverInsert, {1, 2}, {3, 4}, + /*all_possible_mutants=*/ + { + {1, 2, 3, 4}, + {1, 3, 4, 2}, + {3, 4, 1, 2}, + }, + /*size_alignment=*/4); + TestCrossOver(&ByteArrayMutator::CrossOverInsert, {1, 2}, {3, 4, 5}, + /*all_possible_mutants=*/ + { + {1, 2, 3, 4}, + {1, 3, 4, 2}, + {3, 4, 1, 2}, + {1, 2, 4, 5}, + {1, 4, 5, 2}, + {4, 5, 1, 2}, + }, + /*size_alignment=*/4); + TestCrossOver(&ByteArrayMutator::CrossOverInsert, {1, 2, 3, 4, 5}, {6}, + /*all_possible_mutants=*/ + { + {1, 2, 3, 4, 5}, + }, + /*size_alignment=*/4); + TestCrossOver(&ByteArrayMutator::CrossOverInsert, {1, 2, 3}, {4, 5, 6, 7}, + /*all_possible_mutants=*/ + { + {4, 1, 2, 3}, + {5, 1, 2, 3}, + {6, 1, 2, 3}, + {7, 1, 2, 3}, + {1, 4, 2, 3}, + {1, 5, 2, 3}, + {1, 6, 2, 3}, + {1, 7, 2, 3}, + {1, 2, 4, 3}, + {1, 2, 5, 3}, + {1, 2, 6, 3}, + {1, 2, 7, 3}, + {1, 2, 3, 4}, + {1, 2, 3, 5}, + {1, 2, 3, 6}, + {1, 2, 3, 7}, + }, + /*size_alignment=*/4); +} + +TEST(ByteArrayMutator, CrossOverOverwrite) { + TestCrossOver(&ByteArrayMutator::CrossOverOverwrite, {1}, {2}, + /*all_possible_mutants=*/ + { + {2}, + }); + TestCrossOver(&ByteArrayMutator::CrossOverOverwrite, {1, 2}, {3}, + /*all_possible_mutants=*/ + { + {1, 3}, + {3, 2}, + }); + TestCrossOver(&ByteArrayMutator::CrossOverOverwrite, {1}, {2, 3}, + /*all_possible_mutants=*/ + { + {2}, + {3}, + }); + TestCrossOver(&ByteArrayMutator::CrossOverOverwrite, {1, 2}, {3, 4}, + /*all_possible_mutants=*/ + { + {1, 3}, + {3, 2}, + {1, 4}, + {4, 2}, + }); + TestCrossOver(&ByteArrayMutator::CrossOverOverwrite, {1, 2, 3, 4, 5, 6}, + {7, 8}, + /*all_possible_mutants=*/ + { + // overwrite with {7} + {7, 2, 3, 4, 5, 6}, + {1, 7, 3, 4, 5, 6}, + {1, 2, 7, 4, 5, 6}, + {1, 2, 3, 7, 5, 6}, + {1, 2, 3, 4, 7, 6}, + {1, 2, 3, 4, 5, 7}, + // overwrite with {8} + {8, 2, 3, 4, 5, 6}, + {1, 8, 3, 4, 5, 6}, + {1, 2, 8, 4, 5, 6}, + {1, 2, 3, 8, 5, 6}, + {1, 2, 3, 4, 8, 6}, + {1, 2, 3, 4, 5, 8}, + // overwrite with {7, 8} + {7, 8, 3, 4, 5, 6}, + {1, 7, 8, 4, 5, 6}, + {1, 2, 7, 8, 5, 6}, + {1, 2, 3, 7, 8, 6}, + {1, 2, 3, 4, 7, 8}, + }); +} + +TEST(ByteArrayMutator, CrossOver) { + // Most of CrossOver is tested above in CrossOverOverwrite/CrossOverInsert. + // Here just test one set of inputs to ensure CrossOver calls the other two + // functions correctly. + TestCrossOver(&ByteArrayMutator::CrossOver, {1, 2}, {3, 4}, + /*all_possible_mutants=*/ + { + // CrossOverInsert + {1, 2, 3, 4}, + {1, 3, 4, 2}, + {3, 4, 1, 2}, + {1, 2, 3}, + {1, 3, 2}, + {3, 1, 2}, + {1, 2, 4}, + {1, 4, 2}, + {4, 1, 2}, + // CrossOverOverwrite + {1, 3}, + {3, 2}, + {1, 4}, + {4, 2}, + }); +} + +TEST(ByteArrayMutator, FailedMutations) { + const int kNumIter = 1000000; + ByteArray data = {1, 2, 3, 4, 5}; + Knobs knobs; + ByteArrayMutator mutator(knobs, 1); + size_t num_failed_erase = 0; + size_t num_failed_generic = 0; + for (int i = 0; i < kNumIter; i++) { + num_failed_erase += !mutator.EraseBytes(data); + num_failed_generic += !mutator.Mutate(data); + } + // EraseBytes() will fail sometimes, but should not fail too often. + EXPECT_GT(num_failed_erase, 0); + EXPECT_LT(num_failed_erase, kNumIter / 2); + // The generic Mutate() should fail very infrequently. + EXPECT_LT(num_failed_generic, kNumIter / 1000); +} + +TEST(ByteArrayMutator, MutateManyWithAlignedInputs) { + constexpr size_t kSizeAlignment = 4; + Knobs knobs; + ByteArrayMutator mutator(knobs, /*seed=*/1); + EXPECT_TRUE(mutator.set_size_alignment(kSizeAlignment)); + constexpr size_t kNumMutantsToGenerate = 10000; + + // If all inputs are aligned, expect all generated mutants to be aligned. + const std::vector aligned_inputs = { + {0, 1, 2, 3}, + {0, 1, 2, 3, 4, 5, 6, 7}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, + }; + const std::vector mutants = + mutator.MutateMany(GetMutationInputRefsFromDataInputs(aligned_inputs), + kNumMutantsToGenerate); + EXPECT_EQ(mutants.size(), kNumMutantsToGenerate); + for (const ByteArray &mutant : mutants) { + EXPECT_EQ(mutant.size() % kSizeAlignment, 0); + } +} + +TEST(ByteArrayMutator, MutateManyWithUnalignedInputs) { + constexpr size_t kSizeAlignment = 4; + Knobs knobs; + ByteArrayMutator mutator(knobs, /*seed=*/1); + EXPECT_TRUE(mutator.set_size_alignment(kSizeAlignment)); + constexpr size_t kNumMutantsToGenerate = 10000; + + // If there are unaligned inputs, most mutants should be aligned, but the ones + // that are unaligned should be the same size as the unaligned inputs (as they + // resulted from mutators that did not change the size of the inputs). + const std::vector unaligned_inputs = { + {0}, + {0, 1}, + {0, 1, 2}, + {0, 1, 2, 3, 4}, + {0, 1, 2, 3, 4, 5}, + {0, 1, 2, 3, 4, 5, 6}, + {0, 1, 2, 3, 4, 5, 6, 7, 8}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + }; + const std::vector mutants = + mutator.MutateMany(GetMutationInputRefsFromDataInputs(unaligned_inputs), + kNumMutantsToGenerate); + EXPECT_EQ(mutants.size(), kNumMutantsToGenerate); + for (const ByteArray &mutant : mutants) { + if (mutant.size() % kSizeAlignment != 0) { + EXPECT_LE(mutant.size(), 11); + } + } +} + +TEST(ByteArrayMutator, MutateManyWithMaxLen) { + constexpr size_t kMaxLen = 4; + Knobs knobs; + ByteArrayMutator mutator(knobs, /*seed=*/1); + EXPECT_TRUE(mutator.set_max_len(kMaxLen)); + constexpr size_t kNumMutantsToGenerate = 10000; + + const std::vector inputs = { + {0}, + {0, 1}, + {0, 1, 2}, + {0, 1, 2, 3}, + }; + const std::vector mutants = mutator.MutateMany( + GetMutationInputRefsFromDataInputs(inputs), kNumMutantsToGenerate); + EXPECT_EQ(mutants.size(), kNumMutantsToGenerate); + + for (const ByteArray &mutant : mutants) { + EXPECT_LE(mutant.size(), kMaxLen); + } +} + +TEST(ByteArrayMutator, MutateManyWithMaxLenWithStartingLargeInput) { + constexpr size_t kMaxLen = 4; + Knobs knobs; + ByteArrayMutator mutator(knobs, /*seed=*/1); + EXPECT_TRUE(mutator.set_max_len(kMaxLen)); + constexpr size_t kNumMutantsToGenerate = 10000; + + const std::vector large_input = { + {0, 1, 2, 3, 4, 5, 6, 7}, {0}, {0, 1}, {0, 1, 2}, {0, 1, 2, 3}, + }; + const std::vector mutants = mutator.MutateMany( + GetMutationInputRefsFromDataInputs(large_input), kNumMutantsToGenerate); + EXPECT_EQ(mutants.size(), kNumMutantsToGenerate); + + for (const ByteArray &mutant : mutants) { + if (mutant.size() > kMaxLen) { + // The only mutant larger than max length should be the same large input + // that mutation originally started with. All other mutants should be + // within the maximum length specified. + EXPECT_EQ(mutant, large_input[0]); + } + } +} + +} // namespace + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/call_graph.cc b/src/third_party/fuzztest/dist/centipede/call_graph.cc new file mode 100644 index 00000000000..912155791db --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/call_graph.cc @@ -0,0 +1,70 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/call_graph.h" + +#include +#include +#include + +#include "absl/log/check.h" +#include "./centipede/control_flow.h" +#include "./centipede/pc_info.h" + +namespace fuzztest::internal { + +void CallGraph::InitializeCallGraph(const CFTable &cf_table, + const PCTable &pc_table) { + // Find all function entries. + for (auto pc_info : pc_table) { + if (pc_info.has_flag(PCInfo::kFuncEntry)) + function_entries_.insert(pc_info.pc); + } + + uintptr_t current_function_entry = 0; + + for (size_t j = 0; j < cf_table.size();) { + std::vector current_callees; + auto current_pc = cf_table[j]; + ++j; + + basic_blocks_.insert(current_pc); + if (IsFunctionEntry(current_pc)) current_function_entry = current_pc; + + // Iterate over successors. + while (cf_table[j]) { + ++j; + } + ++j; // Step over the delimeter. + + // Iterate over callees. + while (cf_table[j]) { + current_callees.push_back(cf_table[j]); + ++j; + } + ++j; // Step over the delimeter. + CHECK_LE(j, cf_table.size()); + + if (current_callees.empty()) continue; + basic_block_callees_[current_pc] = current_callees; + // Append collected callees to the call graph. + call_graph_[current_function_entry].insert( + call_graph_[current_function_entry].end(), current_callees.begin(), + current_callees.end()); + } + // This should stay empty. + CHECK(empty_.empty()); +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/call_graph.h b/src/third_party/fuzztest/dist/centipede/call_graph.h new file mode 100644 index 00000000000..3e3c03b9640 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/call_graph.h @@ -0,0 +1,72 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_CALL_GRAPH_H_ +#define THIRD_PARTY_CENTIPEDE_CALL_GRAPH_H_ + +#include +#include + +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "./centipede/control_flow.h" +#include "./centipede/pc_info.h" +#include "./common/logging.h" + +namespace fuzztest::internal { + +class CallGraph { + public: + // Reads in the CfTable from __sancov_cfs section. On error it crashes, if the + // section is not available, the hash maps will be empty. + void InitializeCallGraph(const CFTable& cf_table, const PCTable& pc_table); + + const std::vector& GetFunctionCallees(uintptr_t pc) const { + CHECK(IsFunctionEntry(pc)) << VV(pc) << " is not a function entry."; + const auto it = call_graph_.find(pc); + if (it == call_graph_.cend()) return empty_; + return it->second; + } + const std::vector& GetBasicBlockCallees(uintptr_t pc) const { + CHECK(basic_blocks_.contains(pc)) << VV(pc) << " is not a basic block."; + const auto it = basic_block_callees_.find(pc); + if (it == basic_block_callees_.cend()) return empty_; + return it->second; + } + const absl::flat_hash_set& GetFunctionEntries() const { + return function_entries_; + } + + bool IsFunctionEntry(uintptr_t pc) const { + return function_entries_.contains(pc); + } + + private: + // call_graph_: the key is function entry PC and value is all the + // callees of that function. It keep only non-zero vectors in a map. Meaning + // that if a function does not have any callee, it won't be in this map. + absl::flat_hash_map> call_graph_; + // bb_callees_: the key is a basic block PC and value is all callees in + // that basic block. It keep only non-zero vectors in a map. Meaning that if a + // basic_block does not have any callee, it won't be in this map. + absl::flat_hash_map> basic_block_callees_; + absl::flat_hash_set function_entries_; + absl::flat_hash_set basic_blocks_; + std::vector empty_; +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_CALL_GRAPH_H_ diff --git a/src/third_party/fuzztest/dist/centipede/call_graph_test.cc b/src/third_party/fuzztest/dist/centipede/call_graph_test.cc new file mode 100644 index 00000000000..3bc2a51a7ee --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/call_graph_test.cc @@ -0,0 +1,128 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/call_graph.h" + +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/container/flat_hash_set.h" +#include "./centipede/control_flow.h" +#include "./centipede/pc_info.h" +#include "./common/logging.h" + +namespace fuzztest::internal { +namespace { + +using ::testing::Contains; + +// Mock CFTable for the cfg of function 1: pcs in parentheses are callees. +// There are there more CFTables for functions 6, 7, 8. +// Function 99 has no CFTable. +// 1 +// / \ +// / \ +// 2 (99) 3 (6, -1, 8) +// \ / +// \ / +// 4 (7) +static const CFTable g_cf_table = { + 1, 2, 3, 0, 0, // PC 1 has no callee. + 2, 4, 0, 99, 0, // PC 2 calls 99. + 3, 4, 0, 6, -1, 8, 0, // PC 3 calls 6, 8, and has one indirect call. + 4, 0, 7, 0, // PC 4 calls 7. + 5, 0, 0, // PC 5 is not in pc_table. + 6, 0, 0, // PC 6 has no callees. + 7, 0, 0, // PC 7 has no callees. + 8, 0, 7, 0, // PC 8 calls 7. +}; + +// Mock PCTable for the above cfg. +static const PCTable g_pc_table = { + {1, PCInfo::kFuncEntry}, + {2, 0}, + {3, 0}, + {4, 0}, + {6, PCInfo::kFuncEntry}, + {7, PCInfo::kFuncEntry}, + {8, PCInfo::kFuncEntry}, +}; + +TEST(CallGraphDeathTest, CgNoneExistentPc) { + CallGraph call_graph; + call_graph.InitializeCallGraph(g_cf_table, g_pc_table); + + // Check with a non-existent PC to make map::at fail. + EXPECT_DEATH(call_graph.GetFunctionCallees(666), ""); + EXPECT_DEATH(call_graph.GetBasicBlockCallees(666), ""); +} + +TEST(CallGraph, BuildCgFromCfTable) { + CallGraph call_graph; + call_graph.InitializeCallGraph(g_cf_table, g_pc_table); + + absl::flat_hash_set instrumented_pcs; + for (auto &pc_info : g_pc_table) { + instrumented_pcs.insert(pc_info.pc); + } + + // Check callees. + for (size_t i = 0; i < g_pc_table.size(); ++i) { + uintptr_t pc = g_pc_table[i].pc; + if (g_pc_table[i].has_flag(PCInfo::kFuncEntry)) + EXPECT_TRUE(call_graph.IsFunctionEntry(pc)); + else + EXPECT_FALSE(call_graph.IsFunctionEntry(pc)); + + SCOPED_TRACE(testing::Message() << VV(pc)); + if (pc == 1) { + EXPECT_THAT(call_graph.GetFunctionCallees(pc).size(), 5); + EXPECT_THAT(call_graph.GetBasicBlockCallees(pc).size(), 0); + } else if (pc == 2) { + EXPECT_THAT(call_graph.GetBasicBlockCallees(pc).size(), 1); + } else if (pc == 3) { + auto callees = call_graph.GetBasicBlockCallees(pc); + EXPECT_THAT(callees.size(), 3); + for (auto &callee_pc : callees) { + if (callee_pc == -1ULL || !instrumented_pcs.contains(callee_pc)) + continue; // Indirect call or library function call. + SCOPED_TRACE(testing::Message() << VV(callee_pc)); + EXPECT_TRUE(call_graph.IsFunctionEntry(callee_pc)); + } + EXPECT_THAT(callees, Contains(6)); + EXPECT_THAT(callees, Contains(8)); + + // Check the number of indirect calls. + EXPECT_THAT(std::count(callees.begin(), callees.end(), -1ULL), 1); + } else if (pc == 4) { + EXPECT_THAT(call_graph.GetBasicBlockCallees(pc).size(), 1); + } else if (pc == 5) { + EXPECT_THAT(call_graph.GetFunctionCallees(pc).size(), 0); + } else if (pc == 6 || pc == 7) { + EXPECT_THAT(call_graph.GetFunctionCallees(pc).size(), 0); + EXPECT_THAT(call_graph.GetBasicBlockCallees(pc).size(), 0); + } else if (pc == 8) { + EXPECT_THAT(call_graph.GetFunctionCallees(pc).size(), 1); + EXPECT_THAT(call_graph.GetBasicBlockCallees(pc).size(), 1); + } + } +} + +} // namespace + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/callstack.h b/src/third_party/fuzztest/dist/centipede/callstack.h new file mode 100644 index 00000000000..58dbe25bb25 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/callstack.h @@ -0,0 +1,118 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_CALLSTACK_H_ +#define THIRD_PARTY_CENTIPEDE_CALLSTACK_H_ + +#include +#include + +#include "./centipede/rolling_hash.h" + +namespace fuzztest::internal { +// CallStack maintains a function call stack for the current thread. +// It is told when a function is called, via OnFunctionEntry(pc, sp). +// It is not told when a function exits, so every time a new function is called +// it needs to unwind the stack based on the current and recorded sp values. +// +// This does not produce precise call stacks. +// +// For example, at some point the stack is: +// PC: 1, 2, 3 +// SP: 10, 9, 8 +// Then, functions 2 and 3 exit, and function 4 with a large stack is called: +// PC: 1, 4 +// SP: 10, 7 +// We will fail to unwind functions 2 and 3 and the stack will look like +// PC: 1, 2, 3, 4 +// SP: 10, 9, 8, 7 +// +// We currently don't see a reliable way to implement precise call stack by just +// observing function entries (and not exist). +// But for the purposes of Centipede (capturing call stacks as features) this +// implementation should be good enough. +// +// Alternatives that would allow collecting precise calls stacks are +// * add instrumentation to capture function exits +// (fragile in presence of exceptions and longjmp). +// * unwind stack with frame pointers (expensive and also fragile). +// * Wait for hardware shadow call stacks (CET, etc). +// +// Function calls with depth beyond `kMaxDepth` will be ignored. +// Objects of this class must be created as global or TLS. +// The typical non-test usage is to create on TLS. +// There is no CTOR, the objects are zero-initialized. +// We currently do not use a CTOR with absl::ConstInitType so that the objects +// can be declared as __thread. +// +// This code assumes that the stack grows down. +template +class CallStack { + public: + // Returns the depth of the call stack. + // May be less than the actual depth if that is greater than kMaxDepth. + size_t Depth() const { return depth_; } + + // Returns the PC at `idx`, idx must be less than the current depth. + uintptr_t PC(size_t idx) const { + if (idx >= depth_) __builtin_trap(); + return pc_[idx]; + } + + // Returns the hash of the current call stack. + // Only the last `window_size` frames are used to compute the hash. + // `ResetWindowSize(window_size)` must be called at the initialization time. + uint32_t Hash() const { return depth_ == 0 ? 0 : hashes_[depth_ - 1]; } + + // Updates the call stack and its hash on function entry. + // `pc` is the function PC to be recorded. + // `sp` is the current stack pointer value, which grows down. + void OnFunctionEntry(uintptr_t pc, uintptr_t sp) { + // First, unwind until the last record's SP is above `sp`. + while (depth_ && sp_[depth_ - 1] <= sp) { + --depth_; + } + // Ignore this call if we are already too deep. + if (depth_ == kMaxDepth) return; + // Record the frame, compute and remember the hash. + pc_[depth_] = pc; + sp_[depth_] = sp; + uint32_t previous_hash = depth_ == 0 ? 0 : hashes_[depth_ - 1]; + uintptr_t previous_pc = + depth_ >= window_size_ ? pc_[depth_ - window_size_] : 0; + hashes_[depth_] = rolling_hash_.Update(previous_hash, pc, previous_pc); + ++depth_; + } + + // Resets the call stack. + // `window_size` is the number of stack frames used to compute the hash. + void Reset(size_t window_size) { + depth_ = 0; + window_size_ = window_size; + rolling_hash_.Reset(window_size); + } + + private: + // All data fields are zero initialized at process or thread startup. + size_t depth_; + uintptr_t pc_[kMaxDepth]; + uintptr_t sp_[kMaxDepth]; + uint32_t hashes_[kMaxDepth]; + RollingHash rolling_hash_; + size_t window_size_; +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_CALLSTACK_H_ diff --git a/src/third_party/fuzztest/dist/centipede/callstack_test.cc b/src/third_party/fuzztest/dist/centipede/callstack_test.cc new file mode 100644 index 00000000000..cdec9b29de6 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/callstack_test.cc @@ -0,0 +1,242 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/callstack.h" + +#include +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/nullability.h" +#include "absl/container/flat_hash_set.h" +#include "./common/defs.h" + +namespace fuzztest::internal { +namespace { + +using ::testing::Pointwise; + +// Simple test, calls OnFunctionEntry with fake sp values. +TEST(CallStack, SimpleTest) { + static CallStack<> cs; // CallStack should be global/tls only. + cs.Reset(10); + constexpr uintptr_t pc0 = 100; + constexpr uintptr_t pc1 = 101; + constexpr uintptr_t pc2 = 102; + constexpr uintptr_t pc3 = 103; + constexpr uintptr_t stack_top = 10000; + EXPECT_EQ(cs.Depth(), 0); + cs.OnFunctionEntry(pc0, stack_top); + cs.OnFunctionEntry(pc1, stack_top - 1); + cs.OnFunctionEntry(pc2, stack_top - 2); + EXPECT_EQ(cs.Depth(), 3); + EXPECT_EQ(cs.PC(0), pc0); + EXPECT_EQ(cs.PC(1), pc1); + EXPECT_EQ(cs.PC(2), pc2); + cs.OnFunctionEntry(pc3, stack_top - 2); + EXPECT_EQ(cs.Depth(), 3); + EXPECT_EQ(cs.PC(2), pc3); + cs.OnFunctionEntry(pc3, stack_top - 1); + EXPECT_EQ(cs.Depth(), 2); + EXPECT_EQ(cs.PC(1), pc3); + cs.OnFunctionEntry(pc3, stack_top); + EXPECT_EQ(cs.Depth(), 1); + EXPECT_EQ(cs.PC(0), pc3); +} + +static CallStack<> g_real_calls_cs; // CallStack should be global/tls only. +using TestCallstack = std::vector; +static std::vector g_test_callstacks; + +static void RecordCallStack() { + TestCallstack test_callstack; + for (size_t i = 0, n = g_real_calls_cs.Depth(); i < n; ++i) { + test_callstack.push_back(g_real_calls_cs.PC(i)); + } + g_test_callstacks.push_back(test_callstack); +} + +// Call on entry to functions Func[123], that are helpers to RealCallsTest. +#define ON_ENTRY(PC) \ + g_real_calls_cs.OnFunctionEntry( \ + PC, reinterpret_cast(__builtin_frame_address(0))) + +// Don't let the compiler be too smart. +static inline void BreakOptimization(const void *absl_nullable arg) { + __asm__ __volatile__("" : : "r"(arg) : "memory"); +} + +__attribute__((noinline)) void Func3() { + ON_ENTRY(3); + RecordCallStack(); + BreakOptimization(0); +} + +__attribute__((noinline)) void Func2() { + ON_ENTRY(2); + BreakOptimization(0); + Func3(); + BreakOptimization(0); + Func3(); + BreakOptimization(0); +} + +__attribute__((noinline)) void Func1() { + ON_ENTRY(1); + BreakOptimization(0); + Func2(); + BreakOptimization(0); + Func3(); + BreakOptimization(0); +} + +__attribute__((noinline)) void Func0() { + ON_ENTRY(0); + BreakOptimization(0); + Func1(); + BreakOptimization(0); + Func2(); + BreakOptimization(0); +} + +// A 2-tuple matcher conversion of `::testing::IsSupersetOf`. +MATCHER(IsSupersetOf, "") { + auto [actual, expected] = arg; + return ::testing::ExplainMatchResult(::testing::IsSupersetOf(expected), + actual, result_listener); +} + +// This test actually creates a function call tree, and calls OnFunctionEntry +// with real sp values (and fake PCs). +TEST(CallStack, RealCallsTest) { + g_test_callstacks.clear(); + g_real_calls_cs.Reset(10); + Func0(); + Func1(); + Func2(); + Func3(); + std::vector expected_test_callstacks = { + {0, 1, 2, 3}, {0, 1, 2, 3}, {0, 1, 3}, {0, 2, 3}, {0, 2, 3}, {1, 2, 3}, + {1, 2, 3}, {1, 3}, {2, 3}, {2, 3}, {3}}; + + // Each computed callstack should correctly include every function on the + // callstack. It may also contain some additional spurious functions - these + // are ones that have exited but not yet removed. + EXPECT_THAT(g_test_callstacks, + Pointwise(IsSupersetOf(), expected_test_callstacks)); + + // Check that the additional elements in each computed callstack only + // correspond to previous calls not yet removed. + for (TestCallstack &cs : g_test_callstacks) { + std::sort(cs.begin(), cs.end()); + } + for (TestCallstack &cs : expected_test_callstacks) { + std::sort(cs.begin(), cs.end()); + } + std::vector extra_calls(g_test_callstacks.size()); + for (auto it_1 = g_test_callstacks.begin(), + it_2 = expected_test_callstacks.begin(), it = extra_calls.begin(); + it_1 != g_test_callstacks.end(); it_1++, it_2++, it++) { + std::set_difference(it_1->begin(), it_1->end(), it_2->begin(), it_2->end(), + std::inserter(*it, it->begin())); + } + EXPECT_THAT(std::vector(g_test_callstacks.begin(), + g_test_callstacks.end() - 1), + Pointwise(IsSupersetOf(), + std::vector(extra_calls.begin() + 1, + extra_calls.end()))); +} + +// Tests deep recursion. +TEST(CallStack, DeepRecursion) { + static CallStack<100> cs; // CallStack should be global/tls only. + cs.Reset(10); + constexpr size_t kLargeDepth = 200; + constexpr uintptr_t kStackTop = 100000000; + // Enter deep recursion. + for (size_t i = 0; i < kLargeDepth; ++i) { + cs.OnFunctionEntry(i, kStackTop - i); + } + EXPECT_EQ(cs.Depth(), 100); + // Exit recursion, call not-too-deep. + cs.OnFunctionEntry(42, kStackTop - 2); + EXPECT_EQ(cs.Depth(), 3); + EXPECT_EQ(cs.PC(0), 0); + EXPECT_EQ(cs.PC(1), 1); + EXPECT_EQ(cs.PC(2), 42); +} + +// Tests CallStack::Hash(). +TEST(CallStack, Hash) { + constexpr size_t kDepth = 5000; + constexpr size_t kNumDifferentPCs = 10000; + constexpr size_t kNumIterations = 1000; + constexpr uintptr_t kStackTop = 100000000; + static CallStack cs; // CallStack should be global/tls only. + cs.Reset(10); + fuzztest::internal::Rng rng; + + // Push the first PC on the stack, remembers it hash. + cs.OnFunctionEntry(42, kStackTop); + const auto initial_hash = cs.Hash(); + + absl::flat_hash_set hashes; + + for (size_t iter = 0; iter < kNumIterations; ++iter) { + // Push many PCs on the stack, collect their hashes. + hashes.clear(); + for (size_t i = 0; i < kDepth; ++i) { + cs.OnFunctionEntry(rng() % kNumDifferentPCs, kStackTop - i); + auto hash = cs.Hash(); + hashes.insert(hash); + } + // Check that most hashes are unique. Some collisions are ok. + EXPECT_GE(hashes.size(), kDepth - 1); + // unwind all the way to the top. + cs.OnFunctionEntry(42, kStackTop); + EXPECT_EQ(cs.Depth(), 1); + EXPECT_EQ(cs.Hash(), initial_hash); + } +} + +TEST(CallStack, WindowSize) { + constexpr size_t kDepth = 5000; + constexpr uintptr_t kStackTop = 100000000; + static CallStack cs; // CallStack should be global/tls only. + absl::flat_hash_set hashes; + for (size_t num_different_frames = 1; num_different_frames < 100; + ++num_different_frames) { + for (size_t window_size = 1; window_size < 100; ++window_size) { + // Simulate recursive call stack with `num_different_frames` period, + // i.e. for `num_different_frames=3`, the call stack is + // {42, 43, 44, 42, 43, 44, 42 ...} + // Ensure that the hash() function respects the window size. + hashes.clear(); + cs.Reset(window_size); + cs.OnFunctionEntry(42, kStackTop); + for (size_t i = 0; i < kDepth; ++i) { + cs.OnFunctionEntry(42 + (i % num_different_frames), kStackTop - i); + hashes.insert(cs.Hash()); + } + EXPECT_EQ(hashes.size(), window_size + num_different_frames - 1); + } + } +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/centipede.cc b/src/third_party/fuzztest/dist/centipede/centipede.cc new file mode 100644 index 00000000000..84490e435aa --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/centipede.cc @@ -0,0 +1,1037 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Centipede: an experimental distributed fuzzing engine. +// Very simple / naive so far. +// Main use case: large out-of-process fuzz targets with relatively slow +// execution (< 100 exec/s). +// +// Basic approach (subject to change): +// * All state is stored in a local or remote directory `workdir`. +// * State consists of a corpus (inputs) and feature sets (see feature_t). +// * Feature sets are associated with a binary, so that two binaries +// have independent feature sets stored in different subdirs in `workdir`, +// like binaryA-sha1-of-A and binaryB-sha1-of-B. +// If the binary is recompiled at different revision or with different +// compiler options, it is a different binary and feature sets will need to be +// recomputed for the new binary in its separate dir. +// * The corpus is not tied to the binary. It is stored in `workdir`/. +// * The fuzzer runs in `total_shards` independent processes. +// * Each shard appends data to its own files in `workdir`: corpus and features; +// no other process writes to those files. +// * Each shard may periodically read some other shard's corpus and features. +// Since all files are append-only (no renames, no deletions) we may only +// have partial reads, and the algorithm is expected to tolerate those. +// * Fuzzing can be run locally in multiple processes, with a local `workdir` +// or on a cluster, which supports `workdir` on a remote file system. +// * The intent is to scale to an arbitrary number of shards, +// currently tested with total_shards = 10000. +// +// Differential fuzzing is not yet properly implemented. +// Currently, one can run target A in a given workdir, then target B, and so +// on, and the corpus will grow over time benefiting from all targets. +#include "./centipede/centipede.h" + +#include +#include +#include +#include +#include +#include +#include // NOLINT +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/const_init.h" // NOLINT +#include "absl/base/nullability.h" +#include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/status/status.h" +#include "absl/strings/ascii.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_split.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "./centipede/binary_info.h" +#include "./centipede/centipede_callbacks.h" +#include "./centipede/command.h" +#include "./centipede/control_flow.h" +#include "./centipede/corpus_io.h" +#include "./centipede/coverage.h" +#include "./centipede/environment.h" +#include "./centipede/feature.h" +#include "./centipede/feature_set.h" +#include "./centipede/mutation_input.h" +#include "./centipede/runner_result.h" +#include "./centipede/rusage_profiler.h" +#include "./centipede/rusage_stats.h" +#include "./centipede/stats.h" +#include "./centipede/stop.h" +#include "./centipede/util.h" +#include "./centipede/workdir.h" +#include "./common/blob_file.h" +#include "./common/defs.h" +#include "./common/hash.h" +#include "./common/logging.h" +#include "./common/remote_file.h" +#include "./common/status_macros.h" + +namespace fuzztest::internal { + +Centipede::Centipede(const Environment &env, CentipedeCallbacks &user_callbacks, + const BinaryInfo &binary_info, + CoverageLogger &coverage_logger, std::atomic &stats) + : env_(env), + user_callbacks_(user_callbacks), + rng_(env_.seed), + // TODO(kcc): [impl] find a better way to compute frequency_threshold. + fs_(env_.feature_frequency_threshold, env_.MakeDomainDiscardMask()), + coverage_frontier_(binary_info), + binary_info_(binary_info), + pc_table_(binary_info_.pc_table), + symbols_(binary_info_.symbols), + function_filter_(env_.function_filter, symbols_), + coverage_logger_(coverage_logger), + stats_(stats), + input_filter_path_(std::filesystem::path(TemporaryLocalDirPath()) + .append("filter-input")), + input_filter_cmd_{[&] { + Command::Options cmd_options; + cmd_options.args = {input_filter_path_}; + cmd_options.stdout_file = "/dev/null"; + cmd_options.stderr_file = "/dev/null"; + return Command{env_.input_filter, std::move(cmd_options)}; + }()}, + rusage_profiler_( + /*scope=*/RUsageScope::ThisProcess(), + /*metrics=*/env.DumpRUsageTelemetryInThisShard() + ? RUsageProfiler::kAllMetrics + : RUsageProfiler::kMetricsOff, + /*raii_actions=*/RUsageProfiler::kRaiiOff, + /*location=*/{__FILE__, __LINE__}, + /*description=*/"Engine") { + CHECK(env_.seed) << "env_.seed must not be zero"; + if (!env_.input_filter.empty() && env_.fork_server) + input_filter_cmd_.StartForkServer(TemporaryLocalDirPath(), "input_filter"); +} + +void Centipede::CorpusToFiles(const Environment &env, std::string_view dir) { + std::vector sharded_corpus_files; + CHECK_OK(RemoteGlobMatch(WorkDir{env}.CorpusFilePaths().AllShardsGlob(), + sharded_corpus_files)); + ExportCorpus(sharded_corpus_files, dir); +} + +void Centipede::CorpusFromFiles(const Environment &env, std::string_view dir) { + // Shard the file paths in the source `dir` based on hashes of filenames. + // Such partition is stable: a given file always goes to a specific shard. + std::vector> sharded_paths(env.total_shards); + std::vector paths; + size_t total_paths = 0; + const std::vector listed_paths = + ValueOrDie(RemoteListFiles(dir, /*recursively=*/true)); + for (const std::string &path : listed_paths) { + size_t filename_hash = std::hash{}(path); + sharded_paths[filename_hash % env.total_shards].push_back(path); + ++total_paths; + } + + // If the destination `workdir` is specified (note that empty means "use the + // current directory"), we might need to create it. + if (!env.workdir.empty()) { + CHECK_OK(RemoteMkdir(env.workdir)); + } + + // Iterate over all shards, adding inputs to the current shard. + size_t inputs_added = 0; + size_t inputs_ignored = 0; + const auto corpus_file_paths = WorkDir{env}.CorpusFilePaths(); + for (size_t shard = 0; shard < env.total_shards; shard++) { + const std::string corpus_file_path = corpus_file_paths.Shard(shard); + size_t num_shard_bytes = 0; + // Read the shard (if it exists), collect input hashes from it. + absl::flat_hash_set existing_hashes; + if (RemotePathExists(corpus_file_path)) { + auto reader = DefaultBlobFileReaderFactory(); + // May fail to open if file doesn't exist. + reader->Open(corpus_file_path).IgnoreError(); + ByteSpan blob; + while (reader->Read(blob).ok()) { + existing_hashes.insert(Hash(blob)); + } + } + // Add inputs to the current shard, if the shard doesn't have them already. + auto appender = DefaultBlobFileWriterFactory(env.riegeli); + CHECK_OK(appender->Open(corpus_file_path, "a")) + << "Failed to open corpus file: " << corpus_file_path; + ByteArray shard_data; + for (const auto &path : sharded_paths[shard]) { + std::string input; + CHECK_OK(RemoteFileGetContents(path, input)); + if (input.empty() || existing_hashes.contains(Hash(input))) { + ++inputs_ignored; + continue; + } + CHECK_OK(appender->Write(ByteArray{input.begin(), input.end()})); + ++inputs_added; + } + LOG(INFO) << VV(shard) << VV(inputs_added) << VV(inputs_ignored) + << VV(num_shard_bytes) << VV(shard_data.size()); + } + CHECK_EQ(total_paths, inputs_added + inputs_ignored); +} + +absl::Status Centipede::CrashesToFiles(const Environment &env, + std::string_view dir) { + std::vector reproducer_dirs; + const auto wd = WorkDir{env}; + auto reproducer_match_status = RemoteGlobMatch( + wd.CrashReproducerDirPaths().AllShardsGlob(), reproducer_dirs); + if (!reproducer_match_status.ok() && + !absl::IsNotFound(reproducer_match_status)) { + return reproducer_match_status; + } + absl::flat_hash_set crash_ids; + for (const auto &reproducer_dir : reproducer_dirs) { + ASSIGN_OR_RETURN_IF_NOT_OK( + std::vector reproducer_paths, + RemoteListFiles(reproducer_dir, /*recursively=*/false)); + for (const auto &reproducer_path : reproducer_paths) { + std::string id = std::filesystem::path{reproducer_path}.filename(); + if (auto [_it, inserted] = crash_ids.insert(id); !inserted) { + continue; + } + RETURN_IF_NOT_OK(RemoteFileCopy( + reproducer_path, + (std::filesystem::path{dir} / absl::StrCat(id, ".data")).string())); + const auto shard_index = wd.CrashReproducerDirPaths().GetShardIndex( + std::filesystem::path{reproducer_path}.parent_path().string()); + CHECK(shard_index.has_value()); + const auto metadata_dir = wd.CrashMetadataDirPaths().Shard(*shard_index); + const auto description_filename = absl::StrCat(id, ".desc"); + const auto signature_filename = absl::StrCat(id, ".sig"); + RETURN_IF_NOT_OK(RemoteFileCopy( + (std::filesystem::path{metadata_dir} / description_filename).string(), + (std::filesystem::path{dir} / description_filename).string())); + RETURN_IF_NOT_OK(RemoteFileCopy( + (std::filesystem::path{metadata_dir} / signature_filename).string(), + (std::filesystem::path{dir} / signature_filename).string())); + } + } + return absl::OkStatus(); +} + +void Centipede::UpdateAndMaybeLogStats(std::string_view log_type, + size_t min_log_level) { + // `fuzz_start_time_ == ` means that fuzzing hasn't started yet. If so, grab + // the baseline numbers. + const double fuzz_time_secs = + fuzz_start_time_ == absl::InfiniteFuture() + ? 0 + : absl::ToDoubleSeconds(absl::Now() - fuzz_start_time_); + const double execs_per_sec = + fuzz_time_secs == 0 ? 0 : (1.0 * num_runs_ / fuzz_time_secs); + const auto [max_corpus_size, avg_corpus_size] = corpus_.MaxAndAvgSize(); + + // NOTE: For now, this will double-count rusage in every shard on the same + // machine. The stats reporter knows and deals with that. + static const auto rusage_scope = RUsageScope::ThisProcess(); + const auto rusage_timing = RUsageTiming::Snapshot(rusage_scope); + const auto rusage_memory = RUsageMemory::Snapshot(rusage_scope); + + namespace fd = feature_domains; + + stats_.store(Stats{ + StatsMeta{ + /*timestamp_unix_micros=*/ + static_cast(absl::ToUnixMicros(absl::Now())), + }, + ExecStats{ + /*fuzz_time_sec=*/static_cast(std::ceil(fuzz_time_secs)), + /*num_executions*/ num_runs_, + /*num_target_crashes*/ static_cast(num_crashes_), + }, + CovStats{ + /*num_covered_pcs=*/fs_.CountFeatures(fd::kPCs), + /*num_8bit_counter_features=*/fs_.CountFeatures(fd::k8bitCounters), + /*num_data_flow_features=*/fs_.CountFeatures(fd::kDataFlow), + /*num_cmp_features=*/fs_.CountFeatures(fd::kCMPDomains), + /*num_call_stack_features=*/fs_.CountFeatures(fd::kCallStack), + /*num_bounded_path_features=*/fs_.CountFeatures(fd::kBoundedPath), + /*num_pc_pair_features=*/fs_.CountFeatures(fd::kPCPair), + /*num_user_features=*/fs_.CountFeatures(fd::kUserDomains), + /*num_user0_features=*/fs_.CountFeatures(fd::kUserDomains[0]), + /*num_user1_features=*/fs_.CountFeatures(fd::kUserDomains[1]), + /*num_user2_features=*/fs_.CountFeatures(fd::kUserDomains[2]), + /*num_user3_features=*/fs_.CountFeatures(fd::kUserDomains[3]), + /*num_user4_features=*/fs_.CountFeatures(fd::kUserDomains[4]), + /*num_user5_features=*/fs_.CountFeatures(fd::kUserDomains[5]), + /*num_user6_features=*/fs_.CountFeatures(fd::kUserDomains[6]), + /*num_user7_features=*/fs_.CountFeatures(fd::kUserDomains[7]), + /*num_user8_features=*/fs_.CountFeatures(fd::kUserDomains[8]), + /*num_user9_features=*/fs_.CountFeatures(fd::kUserDomains[9]), + /*num_user10_features=*/fs_.CountFeatures(fd::kUserDomains[10]), + /*num_user11_features=*/fs_.CountFeatures(fd::kUserDomains[11]), + /*num_user12_features=*/fs_.CountFeatures(fd::kUserDomains[12]), + /*num_user13_features=*/fs_.CountFeatures(fd::kUserDomains[13]), + /*num_user14_features=*/fs_.CountFeatures(fd::kUserDomains[14]), + /*num_user15_features=*/fs_.CountFeatures(fd::kUserDomains[15]), + /*num_unknown_features=*/fs_.CountFeatures(fd::kUnknown), + /*num_funcs_in_frontier=*/coverage_frontier_.NumFunctionsInFrontier(), + }, + CorpusStats{ + /*active_corpus_size=*/corpus_.NumActive(), + /*total_corpus_size=*/corpus_.NumTotal(), + /*max_corpus_element_size=*/max_corpus_size, + /*avg_corpus_element_size=*/avg_corpus_size, + }, + RusageStats{ + /*engine_rusage_avg_millicores=*/static_cast( + std::lround(rusage_timing.cpu_hyper_cores * 1000)), + /*engine_rusage_cpu_percent=*/ + static_cast( + std::lround(rusage_timing.cpu_utilization * 100)), + /*engine_rusage_rss_mb=*/ + static_cast(rusage_memory.mem_rss >> 20), + /*engine_rusage_vsize_mb=*/ + static_cast(rusage_memory.mem_vsize >> 20), + }, + }); + + if (env_.log_level < min_log_level) return; + + std::ostringstream os; + auto LogIfNotZero = [&os](size_t value, std::string_view name) { + if (!value) return; + os << " " << name << ": " << value; + }; + if (!env_.experiment_name.empty()) os << env_.experiment_name << " "; + os << "[S" << env_.my_shard_index << "." << num_runs_ << "] " << log_type + << ": "; + os << fs_; + os << " corp: " << corpus_.NumActive() << "/" << corpus_.NumTotal(); + LogIfNotZero(coverage_frontier_.NumFunctionsInFrontier(), "fr"); + LogIfNotZero(num_crashes_, "crash"); + os << " max/avg: " << max_corpus_size << "/" << avg_corpus_size << " " + << corpus_.MemoryUsageString(); + os << " exec/s: " + << (execs_per_sec < 1.0 ? execs_per_sec : std::round(execs_per_sec)); + os << " mb: " << (rusage_memory.mem_rss >> 20); + LOG(INFO) << os.str(); +} + +void Centipede::LogFeaturesAsSymbols(const FeatureVec &fv) { + if (!env_.LogFeaturesInThisShard()) return; + for (auto feature : fv) { + if (!feature_domains::kPCs.Contains(feature)) continue; + PCIndex pc_index = ConvertPCFeatureToPcIndex(feature); + auto description = coverage_logger_.ObserveAndDescribeIfNew(pc_index); + if (description.empty()) continue; + LOG(INFO) << description; + } +} + +bool Centipede::InputPassesFilter(const ByteArray &input) { + if (env_.input_filter.empty()) return true; + WriteToLocalFile(input_filter_path_, input); + bool result = input_filter_cmd_.Execute() == EXIT_SUCCESS; + std::filesystem::remove(input_filter_path_); + return result; +} + +bool Centipede::ExecuteAndReportCrash(std::string_view binary, + const std::vector &input_vec, + BatchResult &batch_result) { + bool success = user_callbacks_.Execute(binary, input_vec, batch_result); + if (!success) ReportCrash(binary, input_vec, batch_result); + return success || batch_result.IsIgnoredFailure(); +} + +// *** Highly experimental and risky. May not scale well for large targets. *** +// +// The idea: an unordered pair of two features {a, b} is by itself a feature. +// In the worst case, the number of such synthetic features is a square of +// the number of regular features, which may not scale. +// For now, we only treat pairs of PCs as features, which is still quadratic +// by the number of PCs. But in moderate-sized programs this may be tolerable. +// +// Rationale: if two different parts of the target are exercised simultaneously, +// this may create interesting behaviour that is hard to capture with regular +// control flow (or other) features. +size_t Centipede::AddPcPairFeatures(FeatureVec &fv) { + // Using a scratch vector to avoid allocations. + auto &pcs = add_pc_pair_scratch_; + pcs.clear(); + + size_t num_pcs = pc_table_.size(); + size_t num_added_pairs = 0; + + // Collect PCs from fv. + for (auto feature : fv) { + if (feature_domains::kPCs.Contains(feature)) + pcs.push_back(ConvertPCFeatureToPcIndex(feature)); + } + + // The quadratic loop: iterate all PC pairs (!!). + for (size_t i = 0, n = pcs.size(); i < n; ++i) { + size_t pc1 = pcs[i]; + for (size_t j = i + 1; j < n; ++j) { + size_t pc2 = pcs[j]; + feature_t f = feature_domains::kPCPair.ConvertToMe( + ConvertPcPairToNumber(pc1, pc2, num_pcs)); + // If we have seen this pair at least once, ignore it. + if (fs_.Frequency(f) != 0) continue; + fv.push_back(f); + ++num_added_pairs; + } + } + return num_added_pairs; +} + +bool Centipede::RunBatch( + const std::vector &input_vec, + BlobFileWriter *absl_nullable corpus_file, + BlobFileWriter *absl_nullable features_file, + BlobFileWriter *absl_nullable unconditional_features_file) { + BatchResult batch_result; + bool success = ExecuteAndReportCrash(env_.binary, input_vec, batch_result); + CHECK_EQ(input_vec.size(), batch_result.results().size()); + + for (const auto &extra_binary : env_.extra_binaries) { + if (ShouldStop()) break; + BatchResult extra_batch_result; + success = + ExecuteAndReportCrash(extra_binary, input_vec, extra_batch_result) && + success; + } + if (EarlyStopRequested()) return false; + if (!success && env_.exit_on_crash) { + LOG(INFO) << "--exit_on_crash is enabled; exiting soon"; + RequestEarlyStop(EXIT_FAILURE); + return false; + } + CHECK_EQ(batch_result.results().size(), input_vec.size()); + num_runs_ += input_vec.size(); + bool batch_gained_new_coverage = false; + for (size_t i = 0; i < input_vec.size(); i++) { + if (ShouldStop()) break; + FeatureVec &fv = batch_result.results()[i].mutable_features(); + bool function_filter_passed = function_filter_.filter(fv); + bool input_gained_new_coverage = fs_.PruneFeaturesAndCountUnseen(fv) != 0; + if (env_.use_pcpair_features && AddPcPairFeatures(fv) != 0) + input_gained_new_coverage = true; + if (unconditional_features_file != nullptr) { + CHECK_OK(unconditional_features_file->Write( + PackFeaturesAndHash(input_vec[i], fv))); + } + if (input_gained_new_coverage) { + // TODO(kcc): [impl] add stats for filtered-out inputs. + if (!InputPassesFilter(input_vec[i])) continue; + fs_.IncrementFrequencies(fv); + LogFeaturesAsSymbols(fv); + batch_gained_new_coverage = true; + CHECK_GT(fv.size(), 0UL); + if (function_filter_passed) { + corpus_.Add(input_vec[i], fv, batch_result.results()[i].metadata(), fs_, + coverage_frontier_); + } + if (corpus_file != nullptr) { + CHECK_OK(corpus_file->Write(input_vec[i])); + } + if (!env_.corpus_dir.empty() && !env_.corpus_dir[0].empty()) { + WriteToLocalHashedFileInDir(env_.corpus_dir[0], input_vec[i]); + } + if (features_file != nullptr) { + CHECK_OK(features_file->Write(PackFeaturesAndHash(input_vec[i], fv))); + } + } + } + return batch_gained_new_coverage; +} + +// TODO(kcc): [impl] don't reread the same corpus twice. +void Centipede::LoadShard(const Environment &load_env, size_t shard_index, + bool rerun) { + VLOG(1) << "Loading shard " << shard_index + << (rerun ? " with rerunning" : " without rerunning"); + size_t num_added_inputs = 0; + size_t num_skipped_inputs = 0; + std::vector inputs_to_rerun; + auto input_features_callback = [&](ByteArray input, + FeatureVec input_features) { + if (ShouldStop()) return; + if (input_features.empty()) { + if (rerun) { + inputs_to_rerun.emplace_back(std::move(input)); + } + } else { + LogFeaturesAsSymbols(input_features); + const auto num_new_features = + fs_.PruneFeaturesAndCountUnseen(input_features); + if (num_new_features != 0) { + VLOG(10) << "Adding input " << Hash(input) + << "; new features: " << num_new_features; + fs_.IncrementFrequencies(input_features); + // TODO(kcc): cmp_args are currently not saved to disk and not reloaded. + corpus_.Add(input, input_features, {}, fs_, coverage_frontier_); + ++num_added_inputs; + } else { + VLOG(10) << "Skipping input: " << Hash(input); + ++num_skipped_inputs; + } + } + }; + + // See serialize_shard_loads on why we may want to serialize shard loads. + // TODO(kcc): remove serialize_shard_loads when LoadShards() uses less RAM. + const WorkDir wd{load_env}; + const std::string corpus_path = wd.CorpusFilePaths().Shard(shard_index); + const std::string features_path = wd.FeaturesFilePaths().Shard(shard_index); + if (env_.serialize_shard_loads) { + ABSL_CONST_INIT static absl::Mutex load_shard_mu{absl::kConstInit}; + absl::MutexLock lock(&load_shard_mu); + ReadShard(corpus_path, features_path, input_features_callback); + } else { + ReadShard(corpus_path, features_path, input_features_callback); + } + + VLOG(1) << "Loaded shard " << shard_index << ": added " << num_added_inputs + << " / skipped " << num_skipped_inputs << " inputs"; + + if (num_added_inputs > 0) UpdateAndMaybeLogStats("load-shard", 1); + if (!inputs_to_rerun.empty()) Rerun(inputs_to_rerun); +} + +void Centipede::LoadAllShardsInRandomOrder(const Environment &load_env, + bool rerun_my_shard) { + // TODO(ussuri): It seems logical to reset `corpus_` before this, but + // that broke `ShardsAndDistillTest` in testing/centipede_test.cc. + // Investigate. + std::vector shard_idxs(env_.total_shards); + std::iota(shard_idxs.begin(), shard_idxs.end(), 0); + std::shuffle(shard_idxs.begin(), shard_idxs.end(), rng_); + size_t num_shards_loaded = 0; + for (size_t shard_idx : shard_idxs) { + const bool rerun = rerun_my_shard && shard_idx == env_.my_shard_index; + LoadShard(load_env, shard_idx, rerun); + LOG_IF(INFO, (++num_shards_loaded % 100) == 0) << VV(num_shards_loaded); + } +} + +void Centipede::Rerun(std::vector &to_rerun) { + if (to_rerun.empty()) return; + auto features_file_path = wd_.FeaturesFilePaths().Shard(env_.my_shard_index); + auto features_file = DefaultBlobFileWriterFactory(env_.riegeli); + CHECK_OK(features_file->Open(features_file_path, "a")); + + LOG(INFO) << to_rerun.size() << " inputs to rerun"; + // Re-run all inputs for which we don't know their features. + // Run in batches of at most env_.batch_size inputs each. + while (!to_rerun.empty()) { + if (ShouldStop()) break; + size_t batch_size = std::min(to_rerun.size(), env_.batch_size); + std::vector batch(to_rerun.end() - batch_size, to_rerun.end()); + to_rerun.resize(to_rerun.size() - batch_size); + if (RunBatch(batch, nullptr, nullptr, features_file.get())) { + UpdateAndMaybeLogStats("rerun-old", 1); + } + } +} + +void Centipede::GenerateCoverageReport(std::string_view filename_annotation, + std::string_view description) { + if (pc_table_.empty()) return; + + auto coverage_path = wd_.CoverageReportPath(filename_annotation); + LOG(INFO) << "Generate coverage report [" << description << "]; " + << VV(coverage_path); + auto pci_vec = fs_.ToCoveragePCs(); + Coverage coverage(pc_table_, pci_vec); + coverage.DumpReportToFile(symbols_, coverage_path, description); +} + +void Centipede::GenerateCorpusStats(std::string_view filename_annotation, + std::string_view description) { + auto stats_path = wd_.CorpusStatsPath(filename_annotation); + LOG(INFO) << "Generate corpus stats [" << description << "]; " + << VV(stats_path); + corpus_.DumpStatsToFile(fs_, stats_path, description); +} + +// TODO(nedwill): add integration test once tests are refactored per b/255660879 +void Centipede::GenerateSourceBasedCoverageReport( + std::string_view filename_annotation, std::string_view description) { + if (env_.clang_coverage_binary.empty()) return; + + auto report_path = wd_.SourceBasedCoverageReportPath(filename_annotation); + LOG(INFO) << "Generate source based coverage report [" << description << "]; " + << VV(report_path); + CHECK_OK(RemoteMkdir(report_path)); + + std::vector raw_profiles = wd_.EnumerateRawCoverageProfiles(); + + if (raw_profiles.empty()) { + LOG(ERROR) << "No raw profiles found for coverage report"; + return; + } + + std::string indexed_profile_path = + wd_.SourceBasedCoverageIndexedProfilePath(); + + std::vector merge_arguments = {"merge", "-o", + indexed_profile_path, "-sparse"}; + for (const std::string &raw_profile : raw_profiles) { + merge_arguments.push_back(raw_profile); + } + + Command::Options merge_cmd_options; + merge_cmd_options.args = std::move(merge_arguments); + Command merge_command{"llvm-profdata", std::move(merge_cmd_options)}; + if (merge_command.Execute() != EXIT_SUCCESS) { + LOG(ERROR) << "Failed to run command " << merge_command.ToString(); + return; + } + + Command::Options generate_report_cmd_options; + generate_report_cmd_options.args = { + "show", "-format=html", absl::StrCat("-output-dir=", report_path), + absl::StrCat("-instr-profile=", indexed_profile_path), + env_.clang_coverage_binary}; + Command generate_report_command{"llvm-cov", + std::move(generate_report_cmd_options)}; + if (generate_report_command.Execute() != EXIT_SUCCESS) { + LOG(ERROR) << "Failed to run command " + << generate_report_command.ToString(); + return; + } +} + +void Centipede::GenerateRUsageReport(std::string_view filename_annotation, + std::string_view description) { + class ReportDumper : public RUsageProfiler::ReportSink { + public: + explicit ReportDumper(std::string_view path) + : file_{*RemoteFileOpen(path, "w")} { + CHECK(file_ != nullptr) << VV(path); + CHECK_OK(RemoteFileSetWriteBufferSize(file_, 10UL * 1024 * 1024)); + } + + ~ReportDumper() override { CHECK_OK(RemoteFileClose(file_)); } + + ReportDumper &operator<<(std::string_view fragment) override { + CHECK_OK(RemoteFileAppend(file_, + ByteArray{fragment.cbegin(), fragment.cend()})); + return *this; + } + + private: + RemoteFile *file_; + }; + + const auto &snapshot = rusage_profiler_.TakeSnapshot( + {__FILE__, __LINE__}, std::string{description}); + VLOG(1) << "Rusage @ " << description << ": " << snapshot.ShortMetricsStr(); + auto path = wd_.RUsageReportPath(filename_annotation); + LOG(INFO) << "Generate rusage report [" << description << "]; " + << VV(env_.my_shard_index) << VV(path); + ReportDumper dumper{path}; + rusage_profiler_.GenerateReport(&dumper); +} + +void Centipede::MaybeGenerateTelemetry(std::string_view filename_annotation, + std::string_view description) { + if (env_.DumpCorpusTelemetryInThisShard()) { + GenerateCoverageReport(filename_annotation, description); + GenerateCorpusStats(filename_annotation, description); + GenerateSourceBasedCoverageReport(filename_annotation, description); + } + if (env_.DumpRUsageTelemetryInThisShard()) { + GenerateRUsageReport(filename_annotation, description); + } +} + +void Centipede::MaybeGenerateTelemetryAfterBatch( + std::string_view filename_annotation, size_t batch_index) { + if (env_.DumpTelemetryForThisBatch(batch_index)) { + MaybeGenerateTelemetry( // + filename_annotation, absl::StrCat("After batch ", batch_index)); + } +} + +void Centipede::MergeFromOtherCorpus(std::string_view merge_from_dir, + size_t shard_index_to_merge) { + LOG(INFO) << __func__ << ": " << merge_from_dir; + Environment merge_from_env = env_; + merge_from_env.workdir = merge_from_dir; + size_t initial_corpus_size = corpus_.NumActive(); + LoadShard(merge_from_env, shard_index_to_merge, /*rerun=*/true); + size_t new_corpus_size = corpus_.NumActive(); + CHECK_GE(new_corpus_size, initial_corpus_size); // Corpus can't shrink here. + if (new_corpus_size > initial_corpus_size) { + auto appender = DefaultBlobFileWriterFactory(env_.riegeli); + CHECK_OK( + appender->Open(wd_.CorpusFilePaths().Shard(env_.my_shard_index), "a")); + for (size_t idx = initial_corpus_size; idx < new_corpus_size; ++idx) { + CHECK_OK(appender->Write(corpus_.Get(idx))); + } + LOG(INFO) << "Merge: " << (new_corpus_size - initial_corpus_size) + << " new inputs added"; + } +} + +void Centipede::ReloadAllShardsAndWriteDistilledCorpus() { + // Reload the shards. This automatically distills the corpus by discarding + // inputs with duplicate feature sets as they are being added. Reloading + // randomly leaves random winners from such sets of duplicates in the + // distilled output: so multiple distilling shards will produce different + // outputs from the same inputs (the property that we want). + LoadAllShardsInRandomOrder(env_, /*rerun_my_shard=*/false); + + // Save the distilled corpus to a file in workdir and possibly to a hashed + // file in the first corpus dir passed in `--corpus_dir`. + const auto distill_to_path = wd_.DistilledCorpusFilePaths().MyShard(); + LOG(INFO) << "Distilling: shard: " << env_.my_shard_index + << " output: " << distill_to_path << " " + << " distilled size: " << corpus_.NumActive(); + const auto appender = DefaultBlobFileWriterFactory(env_.riegeli); + // NOTE: Always overwrite distilled corpus files -- never append, unlike + // "regular", per-shard corpus files. + CHECK_OK(appender->Open(distill_to_path, "w")); + for (size_t i = 0; i < corpus_.NumActive(); ++i) { + const ByteArray &input = corpus_.Get(i); + CHECK_OK(appender->Write(input)); + if (!env_.corpus_dir.empty() && !env_.corpus_dir[0].empty()) { + WriteToLocalHashedFileInDir(env_.corpus_dir[0], input); + } + } +} + +void Centipede::LoadSeedInputs(BlobFileWriter *absl_nonnull corpus_file, + BlobFileWriter *absl_nonnull features_file) { + std::vector seed_inputs; + const size_t num_seeds_available = + user_callbacks_.GetSeeds(env_.batch_size, seed_inputs); + if (num_seeds_available > env_.batch_size) { + LOG(WARNING) << "More seeds available than requested: " + << num_seeds_available << " > " << env_.batch_size; + } + if (seed_inputs.empty()) { + QCHECK(!env_.require_seeds) + << "No seeds returned and --require_seeds=true, exiting early."; + LOG(WARNING) + << "No seeds returned - will use the default seed of single byte {0}"; + seed_inputs.push_back({0}); + } + + RunBatch(seed_inputs, corpus_file, features_file, + /*unconditional_features_file=*/nullptr); + LOG(INFO) << "Number of input seeds available: " << num_seeds_available + << ", number included in corpus: " << corpus_.NumTotal(); + + // Forcely add all seed inputs to avoid empty corpus if none of them increased + // coverage and passed the filters. + if (corpus_.NumTotal() == 0) { + for (const auto &seed_input : seed_inputs) + corpus_.Add(seed_input, {}, {}, fs_, coverage_frontier_); + } +} + +void Centipede::FuzzingLoop() { + LOG(INFO) << "Shard: " << env_.my_shard_index << "/" << env_.total_shards + << " " << TemporaryLocalDirPath() << " " + << "seed: " << env_.seed << "\n\n\n"; + + UpdateAndMaybeLogStats("begin-fuzz", 0); + + if (env_.full_sync) { + LoadAllShardsInRandomOrder(env_, /*rerun_my_shard=*/true); + } else { + LoadShard(env_, env_.my_shard_index, /*rerun=*/true); + } + + if (!env_.merge_from.empty()) { + // Merge a shard with the same index from another corpus. + MergeFromOtherCorpus(env_.merge_from, env_.my_shard_index); + } + + if (env_.load_shards_only) return; + + auto corpus_path = wd_.CorpusFilePaths().Shard(env_.my_shard_index); + auto corpus_file = DefaultBlobFileWriterFactory(env_.riegeli); + CHECK_OK(corpus_file->Open(corpus_path, "a")); + auto features_path = wd_.FeaturesFilePaths().Shard(env_.my_shard_index); + auto features_file = DefaultBlobFileWriterFactory(env_.riegeli); + CHECK_OK(features_file->Open(features_path, "a")); + + LoadSeedInputs(corpus_file.get(), features_file.get()); + + UpdateAndMaybeLogStats("init-done", 0); + + // If we're going to fuzz, dump the initial telemetry files. For a brand-new + // run, these will be functionally empty, e.g. the coverage report will list + // all target functions as not covered (NONE). For a bootstrapped run (the + // workdir already has data), these may or may not coincide with the final + // "latest" report of the previous run, depending on how the runs are + // configured (the same number of shards, for example). + if (env_.num_runs != 0) MaybeGenerateTelemetry("initial", "Before fuzzing"); + + // Reset fuzz_start_time_ and num_runs_, so that the pre-init work doesn't + // affect them. + fuzz_start_time_ = absl::Now(); + num_runs_ = 0; + + // num_runs / batch_size, rounded up. + size_t number_of_batches = env_.num_runs / env_.batch_size; + if (env_.num_runs % env_.batch_size != 0) ++number_of_batches; + size_t new_runs = 0; + size_t corpus_size_at_last_prune = corpus_.NumActive(); + for (size_t batch_index = 0; batch_index < number_of_batches; batch_index++) { + if (ShouldStop()) break; + CHECK_LT(new_runs, env_.num_runs); + auto remaining_runs = env_.num_runs - new_runs; + auto batch_size = std::min(env_.batch_size, remaining_runs); + std::vector mutation_inputs; + mutation_inputs.reserve(env_.mutate_batch_size); + for (size_t i = 0; i < env_.mutate_batch_size; i++) { + const auto &corpus_record = env_.use_corpus_weights + ? corpus_.WeightedRandom(rng_()) + : corpus_.UniformRandom(rng_()); + mutation_inputs.push_back( + MutationInputRef{corpus_record.data, &corpus_record.metadata}); + } + + const std::vector mutants = + user_callbacks_.Mutate(mutation_inputs, batch_size); + if (ShouldStop()) break; + + bool gained_new_coverage = + RunBatch(mutants, corpus_file.get(), features_file.get(), nullptr); + new_runs += mutants.size(); + + if (gained_new_coverage) { + UpdateAndMaybeLogStats("new-feature", 1); + } else if (((batch_index - 1) & batch_index) == 0) { + // Log if batch_index is a power of two. + UpdateAndMaybeLogStats("pulse", 1); + } + + // Dump the intermediate telemetry files. + MaybeGenerateTelemetryAfterBatch("latest", batch_index); + + if (env_.load_other_shard_frequency != 0 && batch_index != 0 && + (batch_index % env_.load_other_shard_frequency) == 0 && + env_.total_shards > 1) { + size_t rand = rng_() % (env_.total_shards - 1); + size_t other_shard_index = + (env_.my_shard_index + 1 + rand) % env_.total_shards; + CHECK_NE(other_shard_index, env_.my_shard_index); + LoadShard(env_, other_shard_index, /*rerun=*/false); + } + + // Prune if we added enough new elements since last prune. + if (env_.prune_frequency != 0 && + corpus_.NumActive() > + corpus_size_at_last_prune + env_.prune_frequency) { + if (env_.use_coverage_frontier) coverage_frontier_.Compute(corpus_); + corpus_.Prune(fs_, coverage_frontier_, env_.max_corpus_size, rng_); + corpus_size_at_last_prune = corpus_.NumActive(); + } + } + + // The tests rely on this stat being logged last. + UpdateAndMaybeLogStats("end-fuzz", 0); + + // If we've fuzzed anything, dump the final telemetry files. + if (env_.num_runs != 0) MaybeGenerateTelemetry("final", "After fuzzing"); +} + +void Centipede::ReportCrash(std::string_view binary, + const std::vector &input_vec, + const BatchResult &batch_result) { + CHECK_EQ(input_vec.size(), batch_result.results().size()); + + const size_t suspect_input_idx = std::clamp( + batch_result.num_outputs_read(), 0, input_vec.size() - 1); + auto log_execution_failure = [&](std::string_view log_prefix) { + LOG(INFO) << log_prefix << "Batch execution failed:" + << "\nBinary : " << binary + << "\nExit code : " << batch_result.exit_code() + << "\nFailure : " + << batch_result.failure_description() + << "\nSignature : " + << AsPrintableString(AsByteSpan(batch_result.failure_signature()), + /*max_len=*/32) + << "\nNumber of inputs : " << input_vec.size() + << "\nNumber of inputs read: " << batch_result.num_outputs_read() + << (batch_result.IsSetupFailure() + ? "" + : absl::StrCat("\nSuspect input index : ", + suspect_input_idx)) + << "\nCrash log :\n\n"; + for (const auto &log_line : + absl::StrSplit(absl::StripAsciiWhitespace(batch_result.log()), '\n')) { + LOG(INFO).NoPrefix() << "CRASH LOG: " << log_line; + } + LOG(INFO).NoPrefix() << "\n"; + }; + + if (batch_result.IsIgnoredFailure()) { + LOG(INFO) << "Skip further processing of " + << batch_result.failure_description(); + return; + } + + if (batch_result.IsSkippedTest()) { + log_execution_failure("Skipped Test: "); + LOG(INFO) << "Requesting early stop due to skipped test."; + RequestEarlyStop(EXIT_SUCCESS); + return; + } + + if (batch_result.IsSetupFailure()) { + log_execution_failure("Test Setup Failure: "); + LOG(INFO) << "Requesting early stop due to setup failure in the test."; + RequestEarlyStop(EXIT_FAILURE); + return; + } + + // Skip reporting only if RequestEarlyStop is called - still reporting if time + // runs out. + if (EarlyStopRequested()) return; + + if (++num_crashes_ > env_.max_num_crash_reports) return; + + const std::string log_prefix = + absl::StrCat("ReportCrash[", num_crashes_, "]: "); + log_execution_failure(log_prefix); + + LOG_IF(INFO, num_crashes_ == env_.max_num_crash_reports) + << log_prefix + << "Reached --max_num_crash_reports: further reports will be suppressed"; + + if (batch_result.failure_description() == kExecutionFailurePerBatchTimeout) { + LOG(INFO) << log_prefix + << "Failure applies to entire batch: not executing inputs " + "one-by-one, trying to find the reproducer"; + return; + } + + // Determine the optimal order of the inputs to try to maximize the chances of + // finding the reproducer fast. + std::vector input_idxs_to_try; + // Prioritize the presumed crasher by inserting it in front of everything + // else. + input_idxs_to_try.push_back(suspect_input_idx); + if (!env_.batch_triage_suspect_only) { + // TODO(b/274705740): When the bug is fixed, set `input_idxs_to_try`'s size + // to `suspect_input_idx + 1`. + input_idxs_to_try.resize(input_vec.size() + 1); + // Keep the suspect at the old location, too, in case the target was + // primed for a crash by the sequence of inputs that preceded the crasher. + std::iota(input_idxs_to_try.begin() + 1, input_idxs_to_try.end(), 0); + } else { + LOG(INFO) + << log_prefix + << "Skip finding the reproducer from the inputs other than the suspect"; + } + + // Try inputs one-by-one in the determined order. + LOG(INFO) << log_prefix + << "Executing inputs one-by-one, trying to find the reproducer"; + for (auto input_idx : input_idxs_to_try) { + if (ShouldStop()) return; + const auto &one_input = input_vec[input_idx]; + BatchResult one_input_batch_result; + if (!user_callbacks_.Execute(binary, {one_input}, one_input_batch_result)) { + auto hash = Hash(one_input); + auto crash_dir = wd_.CrashReproducerDirPaths().MyShard(); + CHECK_OK(RemoteMkdir(crash_dir)); + std::string input_file_path = std::filesystem::path(crash_dir) / hash; + auto crash_metadata_dir = wd_.CrashMetadataDirPaths().MyShard(); + CHECK_OK(RemoteMkdir(crash_metadata_dir)); + std::string crash_metadata_path_prefix = + std::filesystem::path(crash_metadata_dir) / hash; + LOG(INFO) << log_prefix << "Detected crash-reproducing input:" + << "\nInput index : " << input_idx << "\nInput bytes : " + << AsPrintableString(one_input, /*max_len=*/32) + << "\nExit code : " << one_input_batch_result.exit_code() + << "\nFailure : " + << one_input_batch_result.failure_description() + << "\nSignature : " + << AsPrintableString( + AsByteSpan(one_input_batch_result.failure_signature()), + /*max_len=*/32) + << "\nSaving input to: " << input_file_path + << "\nSaving crash" // + << "\nmetadata to : " << crash_metadata_path_prefix << ".*"; + CHECK_OK(RemoteFileSetContents(input_file_path, one_input)); + CHECK_OK(RemoteFileSetContents( + absl::StrCat(crash_metadata_path_prefix, ".desc"), + one_input_batch_result.failure_description())); + CHECK_OK(RemoteFileSetContents( + absl::StrCat(crash_metadata_path_prefix, ".sig"), + one_input_batch_result.failure_signature())); + return; + } + } + + LOG(INFO) << log_prefix + << "Crash was not observed when running inputs one-by-one"; + + // There will be cases when several inputs collectively cause a crash, but no + // single input does. Handle this by writing out the inputs from the batch + // between 0 and `suspect_input_idx` (inclusive) as individual files under the + // <--workdir>/crash/crashing_batch- directory. + // TODO(bookholt): Check for repro by re-running the whole batch. + // TODO(ussuri): Consolidate the crash reproduction logic here and above. + // TODO(ussuri): This can create a lot of tiny files. Write to a single + // shard-like corpus file instead. + const auto &suspect_input = input_vec[suspect_input_idx]; + auto suspect_hash = Hash(suspect_input); + auto crash_dir = wd_.CrashReproducerDirPaths().MyShard(); + CHECK_OK(RemoteMkdir(crash_dir)); + std::string crashing_batch_name = + absl::StrCat("crashing_batch-", suspect_hash); + std::string save_dir = std::filesystem::path(crash_dir) / crashing_batch_name; + CHECK_OK(RemoteMkdir(save_dir)); + LOG(INFO) << log_prefix << "Saving used inputs from batch to: " << save_dir; + for (int i = 0; i <= suspect_input_idx; ++i) { + const auto &one_input = input_vec[i]; + auto hash = Hash(one_input); + std::string file_path = std::filesystem::path(save_dir).append( + absl::StrFormat("input-%010d-%s", i, hash)); + CHECK_OK(RemoteFileSetContents(file_path, one_input)); + } + auto crash_metadata_dir = wd_.CrashMetadataDirPaths().MyShard(); + CHECK_OK(RemoteMkdir(crash_metadata_dir)); + std::string crash_metadata_file_path = + std::filesystem::path(crash_metadata_dir) / crashing_batch_name; + LOG(INFO) << log_prefix + << "Saving crash metadata to: " << crash_metadata_file_path; + CHECK_OK(RemoteFileSetContents(crash_metadata_file_path, + batch_result.failure_description())); +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/centipede.h b/src/third_party/fuzztest/dist/centipede/centipede.h new file mode 100644 index 00000000000..24416c66d06 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/centipede.h @@ -0,0 +1,224 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_CENTIPEDE_H_ +#define THIRD_PARTY_CENTIPEDE_CENTIPEDE_H_ + +#include +#include +#include +#include +#include + +#include "absl/base/nullability.h" +#include "absl/status/status.h" +#include "absl/time/time.h" +#include "./centipede/binary_info.h" +#include "./centipede/centipede_callbacks.h" +#include "./centipede/command.h" +#include "./centipede/control_flow.h" +#include "./centipede/corpus.h" +#include "./centipede/coverage.h" +#include "./centipede/environment.h" +#include "./centipede/feature.h" +#include "./centipede/feature_set.h" +#include "./centipede/pc_info.h" +#include "./centipede/runner_result.h" +#include "./centipede/rusage_profiler.h" +#include "./centipede/stats.h" +#include "./centipede/symbol_table.h" +#include "./centipede/workdir.h" +#include "./common/blob_file.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +// The main fuzzing class. +class Centipede { + public: + Centipede(const Environment &env, CentipedeCallbacks &user_callbacks, + const BinaryInfo &binary_info, CoverageLogger &coverage_logger, + std::atomic &stats); + virtual ~Centipede() = default; + + // Non-copyable and non-movable. + Centipede(const Centipede &) = delete; + Centipede(Centipede &&) noexcept = delete; + Centipede &operator=(const Centipede &) = delete; + Centipede &operator=(Centipede &&) noexcept = delete; + + // Main loop. + void FuzzingLoop(); + + // Saves the sharded corpus into `dir`, one file per input. + static void CorpusToFiles(const Environment &env, std::string_view dir); + // Exports the corpus from `dir` (one file per input) into the sharded corpus. + // Reads `dir` recursively. + // Ignores inputs that already exist in the shard they need to be added to. + // Sharding is stable and depends only on env.total_shards and the file name. + static void CorpusFromFiles(const Environment &env, std::string_view dir); + // Saves the sharded crash reproducers and metadata (failure description) into + // `dir`. Each crash with `ID` will be saved with file `ID.data` for the + // reproducer and `ID.metadata` metadata. + static absl::Status CrashesToFiles(const Environment &env, + std::string_view dir); + + private: + // Executes inputs from `input_vec`. + // For every input, its pruned features are written to + // `unconditional_features_file`, (if that's non-null). + // For every input that caused new features to be observed: + // * the input is added to the corpus (corpus_ and fs_ are updated). + // * the input is written to `corpus_file` (if that's non-null). + // * its features are written to `features_file` (if that's non-null). + // Returns true if new features were observed. + // Post-condition: `batch_result.results.size()` == `input_vec.size()`. + bool RunBatch(const std::vector &input_vec, + BlobFileWriter *absl_nullable corpus_file, + BlobFileWriter *absl_nullable features_file, + BlobFileWriter *absl_nullable unconditional_features_file); + // Loads seed inputs from the user callbacks, execute them, and store them + // with the corresponding features into `corpus_file` and `features_file`. + void LoadSeedInputs(BlobFileWriter *absl_nonnull corpus_file, + BlobFileWriter *absl_nonnull features_file); + // Loads a shard `shard_index` from `load_env.workdir`. + // Note: `load_env_` may be different from `env_`. + // If `rerun` is true, then also re-runs any inputs + // for which the features are not found in `load_env.workdir`. + void LoadShard(const Environment &load_env, size_t shard_index, bool rerun); + // Loads all the shards from corpus files in `load_env.workdir` in random + // order. If `rerun_my_shard` is true, then also re-runs any inputs found in + // `load_env.my_shard_index`th shard. Note: `load_env_` may be different from + // `env_`. + void LoadAllShardsInRandomOrder(const Environment &load_env, + bool rerun_my_shard); + // Runs all inputs from `to_rerun`, adds their features to the features file + // of env_.my_shard_index, adds interesting inputs to the corpus. + void Rerun(std::vector &to_rerun); + + // Prints one logging line with `log_type` in it + // if `min_log_level` is not greater than `env_.log_level`. + void UpdateAndMaybeLogStats(std::string_view log_type, size_t min_log_level); + // For every feature in `fv`, translates the feature into code coverage + // (PCIndex), then prints one logging line for every + // FUNC/EDGE observed for the first time. + // If symbolization failed, prints a simpler logging line. + // Uses coverage_logger_. + void LogFeaturesAsSymbols(const FeatureVec &f); + + // Generates a coverage report file in workdir. + void GenerateCoverageReport(std::string_view filename_annotation, + std::string_view description); + // Generates a corpus stats file in workdir. + void GenerateCorpusStats(std::string_view filename_annotation, + std::string_view description); + // Generates the clang source-based coverage report in workdir. + void GenerateSourceBasedCoverageReport(std::string_view filename_annotation, + std::string_view description); + // Generates a performance report file in workdir. + void GenerateRUsageReport(std::string_view filename_annotation, + std::string_view description); + // Generates all the report and stats files in workdir if this shard is + // assigned to do that. + void MaybeGenerateTelemetry(std::string_view filename_annotation, + std::string_view description); + // Generates all the report and stats files in workdir if this shard is + // assigned to do that and if `batch_index` satisfies the telemetry frequency + // criteria set via the flags. + void MaybeGenerateTelemetryAfterBatch(std::string_view filename_annotation, + size_t batch_index); + + // Returns true if `input` passes env_.input_filter. + bool InputPassesFilter(const ByteArray &input); + // Executes `binary` with `input_vec` and `batch_result` as input/output. + // If the binary crashes, calls ReportCrash(). + // Returns true iff there were no crashes. + bool ExecuteAndReportCrash(std::string_view binary, + const std::vector &input_vec, + BatchResult &batch_result); + // Reports a crash and saves the reproducer to workdir/crashes, if possible. + // `binary` is the binary causing the crash. + // Prints the first `env_.max_num_crash_reports` logs. + // `input_vec` is the batch of inputs that caused a crash. + // `batch_result` contains the features computed for `input_vec` + // (batch_result.results().size() == input_vec.size()). `batch_result` is used + // as a hint when choosing which input to try first. + // Stops early if `EarlyExitRequested()`. + void ReportCrash(std::string_view binary, + const std::vector &input_vec, + const BatchResult &batch_result); + // Merges shard `shard_index_to_merge` of the corpus in `merge_from_dir` + // into the current corpus. + // Writes added inputs to the current shard. + void MergeFromOtherCorpus(std::string_view merge_from_dir, + size_t shard_index_to_merge); + // Reloads the entire corpus for all the shards from workdir (as if with + // `env_.full_sync`) thus distilling it, and saves it to a single file with a + // shard-hashed name in the workdir. + void ReloadAllShardsAndWriteDistilledCorpus(); + + // Collects all PCs from `fv`, then adds PC-pair features to `fv`. + // Returns the number of added features. + // See more comments in centipede.cc. + size_t AddPcPairFeatures(FeatureVec &fv); + + const Environment &env_; + const WorkDir wd_{env_}; + + CentipedeCallbacks &user_callbacks_; + Rng rng_; + + // A timestamp set just before the actual fuzzing begins. Used to measure + // the fuzzing performance. + absl::Time fuzz_start_time_ = absl::InfiniteFuture(); + + FeatureSet fs_; + Corpus corpus_; + CoverageFrontier coverage_frontier_; + size_t num_runs_ = 0; // counts executed inputs + + // Binary-related data, initialized at startup, once per process, + // by calling the PopulateBinaryInfo callback. + const BinaryInfo &binary_info_; + const PCTable &pc_table_; // same as binary_info_.pc_table. + const SymbolTable &symbols_; // same as binary_info_.symbols. + + // Derived from env_.function_filter. Currently, duplicated by every thread. + // In future, threads may have different filters. + const FunctionFilter function_filter_; + + // Ensures every coverage location is reported at most once. + // This object is shared with other threads, it is thread-safe. + CoverageLogger &coverage_logger_; + + // Statistics of the current run. + std::atomic &stats_; + + // Counts the number of crashes reported so far. + int num_crashes_ = 0; + + // Scratch object for AddPcPairFeatures. + std::vector add_pc_pair_scratch_; + + // Path and command for the input_filter. + std::string input_filter_path_; + Command input_filter_cmd_; + + // Resource usage stats collection & reporting. + RUsageProfiler rusage_profiler_; +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_CENTIPEDE_H_ diff --git a/src/third_party/fuzztest/dist/centipede/centipede_binary_test.sh b/src/third_party/fuzztest/dist/centipede/centipede_binary_test.sh new file mode 100755 index 00000000000..7ab2096bc98 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/centipede_binary_test.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# Copyright 2022 The Centipede Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Verify that the `:centipede` build target indeed creates the expected binary. + +set -eu + +source "$(dirname "$0")/test_util.sh" + +centipede_test_srcdir="$(fuzztest::internal::get_centipede_test_srcdir)" +centipede_binary="${centipede_test_srcdir}/centipede" +if ! [[ -x "${centipede_binary}" ]]; then + die "Build target ':centipede' failed to create expected executable \ +${centipede_binary}" +fi diff --git a/src/third_party/fuzztest/dist/centipede/centipede_callbacks.cc b/src/third_party/fuzztest/dist/centipede/centipede_callbacks.cc new file mode 100644 index 00000000000..f2c1ff749a6 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/centipede_callbacks.cc @@ -0,0 +1,469 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/centipede_callbacks.h" + +#include +#include +#include +#include // NOLINT +#include +#include +#include +#include +#include // NOLINT +#include +#include + +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/strings/ascii.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_split.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "./centipede/binary_info.h" +#include "./centipede/command.h" +#include "./centipede/control_flow.h" +#include "./centipede/mutation_input.h" +#include "./centipede/runner_request.h" +#include "./centipede/runner_result.h" +#include "./centipede/util.h" +#include "./centipede/workdir.h" +#include "./common/blob_file.h" +#include "./common/defs.h" +#include "./common/hash.h" +#include "./common/logging.h" + +namespace fuzztest::internal { +namespace { + +// When running a test binary in a subprocess, we don't want these environment +// variables to be inherited and affect the execution of the tests. +// +// See list of environment variables here: +// https://bazel.build/reference/test-encyclopedia#initial-conditions +// +// TODO(fniksic): Add end-to-end tests that make sure we don't observe the +// effects of these variables in the test binary. +std::vector EnvironmentVariablesToUnset() { + return {"TEST_DIAGNOSTICS_OUTPUT_DIR", // + "TEST_INFRASTRUCTURE_FAILURE_FILE", // + "TEST_LOGSPLITTER_OUTPUT_FILE", // + "TEST_PREMATURE_EXIT_FILE", // + "TEST_RANDOM_SEED", // + "TEST_RUN_NUMBER", // + "TEST_SHARD_INDEX", // + "TEST_SHARD_STATUS_FILE", // + "TEST_TOTAL_SHARDS", // + "TEST_UNDECLARED_OUTPUTS_ANNOTATIONS_DIR", // + "TEST_UNDECLARED_OUTPUTS_DIR", // + "TEST_WARNINGS_OUTPUT_FILE", // + "GTEST_OUTPUT", // + "XML_OUTPUT_FILE"}; +} + +} // namespace + +void CentipedeCallbacks::PopulateBinaryInfo(BinaryInfo &binary_info) { + binary_info.InitializeFromSanCovBinary( + env_.coverage_binary, env_.objdump_path, env_.symbolizer_path, temp_dir_); + // Check the PC table. + if (binary_info.pc_table.empty()) { + if (env_.require_pc_table) { + LOG(ERROR) << "Could not get PC table; exiting (override with " + "--require_pc_table=false)"; + exit(EXIT_FAILURE); + } + LOG(WARNING) << "Could not get PC table; CF table and debug symbols will " + "not be used"; + return; + } + // Check CF table. + if (binary_info.cf_table.empty()) { + LOG(WARNING) + << "Could not get CF table; binary should be built with Clang 16 (or " + "later) and with -fsanitize-coverage=control-flow flag"; + } else { + // Construct call-graph and cfg using loaded cf_table and pc_table. + // TODO(b/284044008): These two are currently used only inside + // `CoverageFrontier`, so we can mask the bug's failure by conditionally + // initilizing them like this. + if (env_.use_coverage_frontier) { + binary_info.control_flow_graph.InitializeControlFlowGraph( + binary_info.cf_table, binary_info.pc_table); + binary_info.call_graph.InitializeCallGraph(binary_info.cf_table, + binary_info.pc_table); + } + } +} + +std::string CentipedeCallbacks::ConstructRunnerFlags( + std::string_view extra_flags, bool disable_coverage) { + std::vector flags = { + "CENTIPEDE_RUNNER_FLAGS=", + absl::StrCat("timeout_per_input=", env_.timeout_per_input), + absl::StrCat("timeout_per_batch=", env_.timeout_per_batch), + absl::StrCat("address_space_limit_mb=", env_.address_space_limit_mb), + absl::StrCat("rss_limit_mb=", env_.rss_limit_mb), + absl::StrCat("stack_limit_kb=", env_.stack_limit_kb), + absl::StrCat("crossover_level=", env_.crossover_level), + absl::StrCat("max_len=", env_.max_len), + }; + if (env_.ignore_timeout_reports) { + flags.emplace_back("ignore_timeout_reports"); + } + if (!disable_coverage) { + flags.emplace_back(absl::StrCat("path_level=", env_.path_level)); + if (env_.use_pc_features) flags.emplace_back("use_pc_features"); + if (env_.use_counter_features) flags.emplace_back("use_counter_features"); + if (env_.use_cmp_features) flags.emplace_back("use_cmp_features"); + flags.emplace_back(absl::StrCat("callstack_level=", env_.callstack_level)); + if (env_.use_auto_dictionary) flags.emplace_back("use_auto_dictionary"); + if (env_.use_dataflow_features) flags.emplace_back("use_dataflow_features"); + } + if (!env_.runner_dl_path_suffix.empty()) { + flags.emplace_back( + absl::StrCat("dl_path_suffix=", env_.runner_dl_path_suffix)); + } + if (!env_.pcs_file_path.empty()) + flags.emplace_back(absl::StrCat("pcs_file_path=", env_.pcs_file_path)); + if (!extra_flags.empty()) flags.emplace_back(extra_flags); + flags.emplace_back(""); + return absl::StrJoin(flags, ":"); +} + +Command &CentipedeCallbacks::GetOrCreateCommandForBinary( + std::string_view binary) { + for (auto &cmd : commands_) { + if (cmd->path() == binary) return *cmd; + } + // We don't want to collect coverage for extra binaries. It won't be used. + bool disable_coverage = + std::find(env_.extra_binaries.begin(), env_.extra_binaries.end(), + binary) != env_.extra_binaries.end(); + + std::vector env = {ConstructRunnerFlags( + absl::StrCat(":shmem:test=", env_.test_name, ":arg1=", + inputs_blobseq_.path(), ":arg2=", outputs_blobseq_.path(), + ":failure_description_path=", failure_description_path_, + ":failure_signature_path=", failure_signature_path_, ":"), + disable_coverage)}; + + if (env_.clang_coverage_binary == binary) + env.emplace_back( + absl::StrCat("LLVM_PROFILE_FILE=", + WorkDir{env_}.SourceBasedCoverageRawProfilePath())); + + Command::Options cmd_options; + cmd_options.env_add = std::move(env); + cmd_options.env_remove = EnvironmentVariablesToUnset(); + cmd_options.stdout_file = execute_log_path_; + cmd_options.stderr_file = execute_log_path_; + cmd_options.temp_file_path = temp_input_file_path_; + Command &cmd = *commands_.emplace_back( + std::make_unique(binary, std::move(cmd_options))); + if (env_.fork_server) cmd.StartForkServer(temp_dir_, Hash(binary)); + + return cmd; +} + +int CentipedeCallbacks::RunBatchForBinary(std::string_view binary) { + auto &cmd = GetOrCreateCommandForBinary(binary); + const absl::Duration amortized_timeout = + env_.timeout_per_batch == 0 + ? absl::InfiniteDuration() + : absl::Seconds(env_.timeout_per_batch) + absl::Seconds(5); + const auto deadline = absl::Now() + amortized_timeout; + int exit_code = EXIT_SUCCESS; + const bool should_clean_up = [&] { + if (!cmd.ExecuteAsync()) return true; + const std::optional ret = cmd.Wait(deadline); + if (!ret.has_value()) return true; + exit_code = *ret; + return false; + }(); + if (should_clean_up) { + exit_code = [&] { + if (!cmd.is_executing()) return EXIT_FAILURE; + LOG(ERROR) << "Cleaning up the batch execution."; + cmd.RequestStop(); + const auto ret = cmd.Wait(absl::Now() + absl::Seconds(60)); + if (ret.has_value()) return *ret; + LOG(ERROR) << "Batch execution cleanup failed to end in 60s."; + return EXIT_FAILURE; + }(); + commands_.erase( + std::find_if(commands_.begin(), commands_.end(), + [=](const auto &cmd) { return cmd->path() == binary; })); + } + return exit_code; +} + +int CentipedeCallbacks::ExecuteCentipedeSancovBinaryWithShmem( + std::string_view binary, const std::vector &inputs, + BatchResult &batch_result) { + auto start_time = absl::Now(); + batch_result.ClearAndResize(inputs.size()); + + // Reset the blobseqs. + inputs_blobseq_.Reset(); + outputs_blobseq_.Reset(); + + size_t num_inputs_written = 0; + + if (env_.has_input_wildcards) { + CHECK_EQ(inputs.size(), 1); + WriteToLocalFile(temp_input_file_path_, inputs[0]); + num_inputs_written = 1; + } else { + // Feed the inputs to inputs_blobseq_. + num_inputs_written = RequestExecution(inputs, inputs_blobseq_); + } + + if (num_inputs_written != inputs.size()) { + LOG(INFO) << "Wrote " << num_inputs_written << "/" << inputs.size() + << " inputs; shmem_size_mb might be too small: " + << env_.shmem_size_mb; + } + + // Run. + const int exit_code = RunBatchForBinary(binary); + inputs_blobseq_.ReleaseSharedMemory(); // Inputs are already consumed. + + // Get results. + batch_result.exit_code() = exit_code; + const bool read_success = batch_result.Read(outputs_blobseq_); + LOG_IF(ERROR, !read_success) << "Failed to read batch result!"; + outputs_blobseq_.ReleaseSharedMemory(); // Outputs are already consumed. + + // We may have fewer feature blobs than inputs if + // * some inputs were not written (i.e. num_inputs_written < inputs.size). + // * Logged above. + // * some outputs were not written because the subprocess died. + // * Will be logged by the caller. + // * some outputs were not written because the outputs_blobseq_ overflown. + // * Logged by the following code. + if (exit_code == 0 && read_success && + batch_result.num_outputs_read() != num_inputs_written) { + LOG(INFO) << "Read " << batch_result.num_outputs_read() << "/" + << num_inputs_written + << " outputs; shmem_size_mb might be too small: " + << env_.shmem_size_mb; + } + + if (env_.print_runner_log) PrintExecutionLog(); + + if (exit_code != EXIT_SUCCESS) { + ReadFromLocalFile(execute_log_path_, batch_result.log()); + ReadFromLocalFile(failure_description_path_, + batch_result.failure_description()); + if (std::filesystem::exists(failure_signature_path_)) { + ReadFromLocalFile(failure_signature_path_, + batch_result.failure_signature()); + } else { + // TODO(xinhaoyuan): Refactor runner to use dispatcher so this branch can + // be removed. + batch_result.failure_signature() = batch_result.failure_description(); + } + // Remove the failure description and signature files here so that they do + // not stay until another failed execution. + std::filesystem::remove(failure_description_path_); + std::filesystem::remove(failure_signature_path_); + } + VLOG(1) << __FUNCTION__ << " took " << (absl::Now() - start_time); + return exit_code; +} + +// See also: `DumpSeedsToDir()`. +bool CentipedeCallbacks::GetSeedsViaExternalBinary( + std::string_view binary, size_t &num_avail_seeds, + std::vector &seeds) { + const auto output_dir = std::filesystem::path{temp_dir_} / "seed_inputs"; + std::error_code error; + CHECK(std::filesystem::create_directories(output_dir, error)); + CHECK(!error); + + std::string centipede_runner_flags = absl::StrCat( + "CENTIPEDE_RUNNER_FLAGS=:dump_seed_inputs:test=", env_.test_name, + ":arg1=", output_dir.string(), ":"); + if (!env_.runner_dl_path_suffix.empty()) { + absl::StrAppend(¢ipede_runner_flags, + "dl_path_suffix=", env_.runner_dl_path_suffix, ":"); + } + Command::Options cmd_options; + cmd_options.env_add = {std::move(centipede_runner_flags)}; + cmd_options.env_remove = EnvironmentVariablesToUnset(); + cmd_options.stdout_file = execute_log_path_; + cmd_options.stderr_file = execute_log_path_; + cmd_options.temp_file_path = temp_input_file_path_; + Command cmd{binary, std::move(cmd_options)}; + const int retval = cmd.Execute(); + + if (env_.print_runner_log) { + LOG(INFO) << "Getting seeds via external binary returns " << retval; + PrintExecutionLog(); + } + + std::vector seed_input_filenames; + for (const auto &dir_ent : std::filesystem::directory_iterator(output_dir)) { + seed_input_filenames.push_back(dir_ent.path().filename()); + } + std::sort(seed_input_filenames.begin(), seed_input_filenames.end()); + num_avail_seeds = seed_input_filenames.size(); + + size_t num_seeds_read; + for (num_seeds_read = 0; num_seeds_read < seeds.size() && + num_seeds_read < seed_input_filenames.size(); + ++num_seeds_read) { + ReadFromLocalFile( + (output_dir / seed_input_filenames[num_seeds_read]).string(), + seeds[num_seeds_read]); + } + seeds.resize(num_seeds_read); + std::filesystem::remove_all(output_dir, error); + LOG_IF(ERROR, error) << "Failed to remove seed inputs directory: " + << error.message(); + + return retval == 0; +} + +// See also: `DumpSerializedTargetConfigToFile()`. +bool CentipedeCallbacks::GetSerializedTargetConfigViaExternalBinary( + std::string_view binary, std::string &serialized_config) { + const auto config_file_path = + std::filesystem::path{temp_dir_} / "configuration"; + std::string centipede_runner_flags = + absl::StrCat("CENTIPEDE_RUNNER_FLAGS=:dump_configuration:arg1=", + config_file_path.string(), ":"); + if (!env_.runner_dl_path_suffix.empty()) { + absl::StrAppend(¢ipede_runner_flags, + "dl_path_suffix=", env_.runner_dl_path_suffix, ":"); + } + Command::Options cmd_options; + cmd_options.env_add = {std::move(centipede_runner_flags)}; + cmd_options.env_remove = EnvironmentVariablesToUnset(); + cmd_options.stdout_file = execute_log_path_; + cmd_options.stderr_file = execute_log_path_; + cmd_options.temp_file_path = temp_input_file_path_; + Command cmd{binary, std::move(cmd_options)}; + const bool is_success = cmd.Execute() == 0; + + if (is_success) { + if (std::filesystem::exists(config_file_path)) { + ReadFromLocalFile(config_file_path.string(), serialized_config); + } else { + serialized_config = ""; + } + } + if (env_.print_runner_log || !is_success) { + PrintExecutionLog(); + } + std::error_code error; + std::filesystem::remove(config_file_path, error); + CHECK(!error); + + return is_success; +} + +// See also: MutateInputsFromShmem(). +MutationResult CentipedeCallbacks::MutateViaExternalBinary( + std::string_view binary, const std::vector &inputs, + size_t num_mutants) { + CHECK(!env_.has_input_wildcards) + << "Standalone binary does not support custom mutator"; + + auto start_time = absl::Now(); + inputs_blobseq_.Reset(); + outputs_blobseq_.Reset(); + + size_t num_inputs_written = + RequestMutation(num_mutants, inputs, inputs_blobseq_); + LOG_IF(INFO, num_inputs_written != inputs.size()) + << VV(num_inputs_written) << VV(inputs.size()); + + // Execute. + const int exit_code = RunBatchForBinary(binary); + inputs_blobseq_.ReleaseSharedMemory(); // Inputs are already consumed. + + if (exit_code != EXIT_SUCCESS) { + LOG(WARNING) << "Custom mutator failed with exit code: " << exit_code; + } + if (env_.print_runner_log || exit_code != EXIT_SUCCESS) { + PrintExecutionLog(); + } + + MutationResult result; + result.exit_code() = exit_code; + result.Read(num_mutants, outputs_blobseq_); + outputs_blobseq_.ReleaseSharedMemory(); // Outputs are already consumed. + + VLOG(1) << __FUNCTION__ << " took " << (absl::Now() - start_time); + return result; +} + +size_t CentipedeCallbacks::LoadDictionary(std::string_view dictionary_path) { + if (dictionary_path.empty()) return 0; + // First, try to parse the dictionary as an AFL/libFuzzer dictionary. + // These dictionaries are in plain text format and thus a Centipede-native + // dictionary will never be mistaken for an AFL/libFuzzer dictionary. + std::string text; + ReadFromLocalFile(dictionary_path, text); + std::vector entries; + if (ParseAFLDictionary(text, entries) && !entries.empty()) { + env_.use_legacy_default_mutator + ? byte_array_mutator_.AddToDictionary(entries) + : fuzztest_mutator_.AddToDictionary(entries); + LOG(INFO) << "Loaded " << entries.size() + << " dictionary entries from AFL/libFuzzer dictionary " + << dictionary_path; + return entries.size(); + } + // Didn't parse as plain text. Assume encoded corpus format. + auto reader = DefaultBlobFileReaderFactory(); + CHECK_OK(reader->Open(dictionary_path)) + << "Error in opening dictionary file: " << dictionary_path; + std::vector unpacked_dictionary; + ByteSpan blob; + while (reader->Read(blob).ok()) { + unpacked_dictionary.emplace_back(blob.begin(), blob.end()); + } + CHECK_OK(reader->Close()) + << "Error in closing dictionary file: " << dictionary_path; + CHECK(!unpacked_dictionary.empty()) + << "Empty or corrupt dictionary file: " << dictionary_path; + env_.use_legacy_default_mutator + ? byte_array_mutator_.AddToDictionary(unpacked_dictionary) + : fuzztest_mutator_.AddToDictionary(unpacked_dictionary); + LOG(INFO) << "Loaded " << unpacked_dictionary.size() + << " dictionary entries from " << dictionary_path; + return unpacked_dictionary.size(); +} + +void CentipedeCallbacks::PrintExecutionLog() const { + if (!std::filesystem::exists(execute_log_path_)) { + LOG(WARNING) << "Log file for the last executed binary does not exist: " + << execute_log_path_; + return; + } + std::string log_text; + ReadFromLocalFile(execute_log_path_, log_text); + for (const auto &log_line : + absl::StrSplit(absl::StripAsciiWhitespace(log_text), '\n')) { + LOG(INFO).NoPrefix() << "LOG: " << log_line; + } +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/centipede_callbacks.h b/src/third_party/fuzztest/dist/centipede/centipede_callbacks.h new file mode 100644 index 00000000000..9410b5cb930 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/centipede_callbacks.h @@ -0,0 +1,229 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_CENTIPEDE_CALLBACKS_H_ +#define THIRD_PARTY_CENTIPEDE_CENTIPEDE_CALLBACKS_H_ + +#include +#include // NOLINT +#include +#include +#include + +#include "absl/base/nullability.h" +#include "absl/log/check.h" +#include "absl/status/statusor.h" +#include "./centipede/binary_info.h" +#include "./centipede/byte_array_mutator.h" +#include "./centipede/command.h" +#include "./centipede/environment.h" +#include "./centipede/fuzztest_mutator.h" +#include "./centipede/mutation_input.h" +#include "./centipede/runner_result.h" +#include "./centipede/shared_memory_blob_sequence.h" +#include "./centipede/util.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +// User must inherit from this class and override at least the +// pure virtual functions. +// +// The classes inherited from this one must be thread-compatible. +// Note: the interface is not yet stable and may change w/o a notice. +class CentipedeCallbacks { + public: + // `env` is used to pass flags to `this`, it must outlive `this`. + CentipedeCallbacks(const Environment &env) + : env_(env), + byte_array_mutator_(env.knobs, GetRandomSeed(env.seed)), + fuzztest_mutator_(env.knobs, GetRandomSeed(env.seed)), + inputs_blobseq_(shmem_name1_.c_str(), env.shmem_size_mb << 20, + env.use_posix_shmem), + outputs_blobseq_(shmem_name2_.c_str(), env.shmem_size_mb << 20, + env.use_posix_shmem) { + if (env.use_legacy_default_mutator) + CHECK(byte_array_mutator_.set_max_len(env.max_len)); + else + CHECK(fuzztest_mutator_.set_max_len(env.max_len)); + } + virtual ~CentipedeCallbacks() {} + + // Feeds `inputs` into the `binary`, for every input populates `batch_result`. + // Old contents of `batch_result` are cleared. + // Returns true on success, false on failure. + // Post-condition: + // `batch_result` has results for every `input`, even on failure. + virtual bool Execute(std::string_view binary, + const std::vector &inputs, + BatchResult &batch_result) = 0; + + // Takes non-empty `inputs` and returns at most `num_mutants` mutated inputs. + virtual std::vector Mutate( + const std::vector &inputs, size_t num_mutants) { + return env_.use_legacy_default_mutator + ? byte_array_mutator_.MutateMany(inputs, num_mutants) + : fuzztest_mutator_.MutateMany(inputs, num_mutants); + } + + // Populates the BinaryInfo using the `symbolizer_path` and `coverage_binary` + // in `env_`. The tables may not be populated if the PC table cannot be + // determined from the `coverage_binary` or if symbolization fails. Exits if + // PC table was not populated and `env_.require_pc_table` is set. + virtual void PopulateBinaryInfo(BinaryInfo &binary_info); + + // Retrieves at most `num_seeds` seed inputs. Returns the number of seeds + // available if `num_seeds` had been large enough. + virtual size_t GetSeeds(size_t num_seeds, std::vector &seeds) { + if (num_seeds > 0) seeds = {{0}}; + return 1; + } + + // Returns the configuration from the test target in the serialized form. + // Returns an empty string if the test target doesn't provide configuration. + virtual absl::StatusOr GetSerializedTargetConfig() { return ""; } + + protected: + // Helpers that the user-defined class may use if needed. + + // Same as ExecuteCentipedeSancovBinary, but uses shared memory. + // Much faster for fast targets since it uses fewer system calls. + int ExecuteCentipedeSancovBinaryWithShmem( + std::string_view binary, const std::vector &inputs, + BatchResult &batch_result); + + // Constructs a string CENTIPEDE_RUNNER_FLAGS=":flag1:flag2:...", + // where the flags are determined by `env` and also include `extra_flags`. + // If `disable_coverage`, coverage options are not added. + std::string ConstructRunnerFlags(std::string_view extra_flags = "", + bool disable_coverage = false); + + // Uses an external binary `binary` to generate seed inputs. The binary should + // be linked against :centipede_runner and implement the RunnerCallbacks + // interface as described in runner_interface.h. + // + // Retrieves the first `seeds.size()` inputs (if exist) from `binary`, + // replacing the existing elements of `seeds`, and shrinking `seeds` if + // needed. Sets `num_avail_seeds` to the number of available seeds, which may + // be more than `seeds.size()`. + // + // Returns true on success. + bool GetSeedsViaExternalBinary(std::string_view binary, + size_t &num_avail_seeds, + std::vector &seeds); + + // Uses an external binary `binary` to get the serialized test target + // configuration. The binary should be linked against :centipede_runner and + // implement the RunnerCallbacks interface as described in runner_interface.h. + // + // If the binary returns with success and doesn't provide the configuration, + // sets `serialized_config` to empty string. + // + // Returns true on success. + bool GetSerializedTargetConfigViaExternalBinary( + std::string_view binary, std::string &serialized_config); + + // Uses an external binary `binary` to mutate `inputs`. The binary + // should be linked against :centipede_runner and implement the + // RunnerCallbacks interface as described in runner_interface.h, + // or implement the legacy Structure-Aware Fuzzing interface described here: + // github.com/google/fuzzing/blob/master/docs/structure-aware-fuzzing.md + // + // Returns a `MutationResult` instance where `exit_code` indicates whether + // the binary was executed successfully, `has_custom_mutator` indicates + // whether the binary has a custom mutator, and if it does, `mutants` contains + // at most `num_mutants` non-empty mutants. + MutationResult MutateViaExternalBinary( + std::string_view binary, const std::vector &inputs, + size_t num_mutants); + + // Loads the dictionary from `dictionary_path`, + // returns the number of dictionary entries loaded. + size_t LoadDictionary(std::string_view dictionary_path); + + protected: + const Environment &env_; + ByteArrayMutator byte_array_mutator_; + FuzzTestMutator fuzztest_mutator_; + + private: + // Returns a Command object with matching `binary` from commands_, + // creates one if needed. + Command &GetOrCreateCommandForBinary(std::string_view binary); + // Runs a batch with the command `binary` and returns the exit code. + int RunBatchForBinary(std::string_view binary); + + // Prints the execution log from the last executed binary. + void PrintExecutionLog() const; + + // Variables required for ExecuteCentipedeSancovBinaryWithShmem. + // They are computed in CTOR, to avoid extra computation in the hot loop. + std::string temp_dir_ = TemporaryLocalDirPath(); + std::string temp_input_file_path_ = + std::filesystem::path(temp_dir_).append("temp_input_file"); + const std::string execute_log_path_ = + std::filesystem::path(temp_dir_).append("log"); + std::string failure_description_path_ = + std::filesystem::path(temp_dir_).append("failure_description"); + std::string failure_signature_path_ = + std::filesystem::path(temp_dir_).append("failure_signature"); + const std::string shmem_name1_ = ProcessAndThreadUniqueID("/ctpd-shm1-"); + const std::string shmem_name2_ = ProcessAndThreadUniqueID("/ctpd-shm2-"); + + SharedMemoryBlobSequence inputs_blobseq_; + SharedMemoryBlobSequence outputs_blobseq_; + + // Need unique_ptr indirection because Command is not movable/copyable. + std::vector> commands_; +}; + +// Abstract class for creating/destroying CentipedeCallbacks objects. +// A typical implementation would simply new/delete objects of appropriate type, +// see DefaultCallbacksFactory below. +// Other implementations (e.g. for tests) may take the object from elsewhere +// and not actually delete it. +class CentipedeCallbacksFactory { + public: + virtual CentipedeCallbacks *create(const Environment &env) = 0; + virtual void destroy(CentipedeCallbacks *callbacks) = 0; + virtual ~CentipedeCallbacksFactory() {} +}; + +// This is the typical way to implement a CentipedeCallbacksFactory for a Type. +template +class DefaultCallbacksFactory : public CentipedeCallbacksFactory { + public: + CentipedeCallbacks *create(const Environment &env) override { + return new Type(env); + } + void destroy(CentipedeCallbacks *callbacks) override { delete callbacks; } +}; + +// Creates a CentipedeCallbacks object in CTOR and destroys it in DTOR. +class ScopedCentipedeCallbacks { + public: + ScopedCentipedeCallbacks(CentipedeCallbacksFactory &factory, + const Environment &env) + : factory_(factory), callbacks_(factory_.create(env)) {} + ~ScopedCentipedeCallbacks() { factory_.destroy(callbacks_); } + CentipedeCallbacks *absl_nonnull callbacks() { return callbacks_; } + + private: + CentipedeCallbacksFactory &factory_; + CentipedeCallbacks *callbacks_; +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_CENTIPEDE_CALLBACKS_H_ diff --git a/src/third_party/fuzztest/dist/centipede/centipede_default_callbacks.cc b/src/third_party/fuzztest/dist/centipede/centipede_default_callbacks.cc new file mode 100644 index 00000000000..20b2efa2588 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/centipede_default_callbacks.cc @@ -0,0 +1,119 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/centipede_default_callbacks.h" + +#include +#include +#include +#include +#include +#include + +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "./centipede/centipede_callbacks.h" +#include "./centipede/environment.h" +#include "./centipede/mutation_input.h" +#include "./centipede/runner_result.h" +#include "./centipede/stop.h" +#include "./common/defs.h" +#include "./common/logging.h" // IWYU pragma: keep + +namespace fuzztest::internal { + +CentipedeDefaultCallbacks::CentipedeDefaultCallbacks(const Environment &env) + : CentipedeCallbacks(env) { + for (const auto &dictionary_path : env_.dictionary) { + LoadDictionary(dictionary_path); + } + + if (env_.has_input_wildcards) { + LOG(INFO) << "Disabling custom mutator for standalone target"; + custom_mutator_is_usable_ = false; + } +} + +bool CentipedeDefaultCallbacks::Execute(std::string_view binary, + const std::vector &inputs, + BatchResult &batch_result) { + return ExecuteCentipedeSancovBinaryWithShmem(binary, inputs, batch_result) == + 0; +} + +size_t CentipedeDefaultCallbacks::GetSeeds(size_t num_seeds, + std::vector &seeds) { + seeds.resize(num_seeds); + if (GetSeedsViaExternalBinary(env_.binary, num_seeds, seeds)) { + return num_seeds; + } + return CentipedeCallbacks::GetSeeds(num_seeds, seeds); +} + +absl::StatusOr +CentipedeDefaultCallbacks::GetSerializedTargetConfig() { + std::string serialized_target_config; + if (GetSerializedTargetConfigViaExternalBinary(env_.binary, + serialized_target_config)) { + return serialized_target_config; + } + return absl::InternalError( + "Failed to get serialized configuration from the target binary."); +} + +std::vector CentipedeDefaultCallbacks::Mutate( + const std::vector &inputs, size_t num_mutants) { + if (num_mutants == 0) return {}; + // Try to use the custom mutator if it hasn't been disabled. + if (custom_mutator_is_usable_.value_or(true)) { + MutationResult result = + MutateViaExternalBinary(env_.binary, inputs, num_mutants); + if (result.exit_code() == EXIT_SUCCESS) { + if (!custom_mutator_is_usable_.has_value()) { + custom_mutator_is_usable_ = result.has_custom_mutator(); + if (*custom_mutator_is_usable_) { + LOG(INFO) << "Custom mutator detected; will use it."; + } else { + LOG(INFO) << "Custom mutator not detected; falling back to the " + "built-in mutator."; + } + } + if (*custom_mutator_is_usable_) { + // TODO(b/398261908): Exit with failure instead of crashing. + CHECK(result.has_custom_mutator()) + << "Test binary no longer has a custom mutator, even though it was " + "previously detected."; + if (!result.mutants().empty()) return std::move(result).mutants(); + LOG_FIRST_N(WARNING, 5) << "Custom mutator returned no mutants; will " + "generate some using the built-in mutator."; + } + } else if (ShouldStop()) { + LOG(WARNING) << "Custom mutator failed, but ignored since the stop " + "condition it met. Possibly what triggered the stop " + "condition also interrupted the mutator."; + // Returning whatever mutants we got before the failure. + return std::move(result).mutants(); + } else { + LOG(ERROR) << "Test binary failed when asked to mutate inputs - exiting."; + RequestEarlyStop(EXIT_FAILURE); + return {}; + } + } + // Fall back to the internal mutator. + return CentipedeCallbacks::Mutate(inputs, num_mutants); +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/centipede_default_callbacks.h b/src/third_party/fuzztest/dist/centipede/centipede_default_callbacks.h new file mode 100644 index 00000000000..0b785626132 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/centipede_default_callbacks.h @@ -0,0 +1,54 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Example fuzzer built on top of Centipede. It is capable of fuzzing any binary +// target built with sancov (see build_defs.bzl). CentipedeCallbacks::Mutate +// uses ByteArrayMutator. + +#ifndef THIRD_PARTY_CENTIPEDE_CENTIPEDE_DEFAULT_CALLBACKS_H_ +#define THIRD_PARTY_CENTIPEDE_CENTIPEDE_DEFAULT_CALLBACKS_H_ + +#include +#include +#include +#include +#include + +#include "absl/status/statusor.h" +#include "./centipede/centipede_callbacks.h" +#include "./centipede/environment.h" +#include "./centipede/mutation_input.h" +#include "./centipede/runner_result.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +// Example of customized CentipedeCallbacks. +class CentipedeDefaultCallbacks : public CentipedeCallbacks { + public: + explicit CentipedeDefaultCallbacks(const Environment &env); + size_t GetSeeds(size_t num_seeds, std::vector &seeds) override; + absl::StatusOr GetSerializedTargetConfig() override; + bool Execute(std::string_view binary, const std::vector &inputs, + BatchResult &batch_result) override; + std::vector Mutate(const std::vector &inputs, + size_t num_mutants) override; + + private: + std::optional custom_mutator_is_usable_ = std::nullopt; +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_CENTIPEDE_DEFAULT_CALLBACKS_H_ diff --git a/src/third_party/fuzztest/dist/centipede/centipede_flags.inc b/src/third_party/fuzztest/dist/centipede/centipede_flags.inc new file mode 100644 index 00000000000..fd331ac4d13 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/centipede_flags.inc @@ -0,0 +1,468 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Definitions of the Centipede flags to be expanded in different contexts. +// Each flag is defined as: +// +// CENTIPEDE_FLAG(type, name, default_value, description) +// +// default_value must be a compile-time constant. +// description must be a string literal. + +#ifndef CENTIPEDE_FLAG +#error This file must be used only in Centipede with CENTIPEDE_FLAG defined. +#endif + +// TODO(kcc): document usage of standalone binaries and how to use @@ wildcard. +// If the "binary" contains @@, it means the binary can only accept inputs +// from the command line, and only one input per process. +// @@ will be replaced with a path to file with the input. +// @@ is chosen to follow the AFL command line syntax. +// TODO(kcc): rename --binary to --command (same for --extra_binaries), +// while remaining backward compatible. +CENTIPEDE_FLAG(std::string, binary, "", "The target binary.") +CENTIPEDE_FLAG( + std::string, coverage_binary, "", + "The actual binary from which coverage is collected - if different " + "from --binary.") +CENTIPEDE_FLAG( + std::string, binary_hash, "", + "If not-empty, this hash string is used instead of the hash of the " + "contents of coverage_binary. Use this flag when the coverage_binary " + "is not available nor needed, e.g. when using --distill.") +CENTIPEDE_FLAG( + std::string, clang_coverage_binary, "", + "A clang source-based code coverage binary used to produce " + "human-readable reports. Do not add this binary to extra_binaries. " + "You must have llvm-cov and llvm-profdata in your path to generate " + "the reports. --workdir in turn must be local in order for this " + "functionality to work. See " + "https://clang.llvm.org/docs/SourceBasedCodeCoverage.html") +CENTIPEDE_FLAG(std::string, test_name, "", + "The name of test to pass to the binary to operate on.") +CENTIPEDE_FLAG( + std::vector, extra_binaries, {}, + "A comma-separated list of extra target binaries. These binaries are " + "fed the same inputs as the main binary, but the coverage feedback " + "from them is not collected. Use this e.g. to run the target under " + "sanitizers.") +CENTIPEDE_FLAG(std::string, workdir, "", "The working directory.") +CENTIPEDE_FLAG( + std::string, merge_from, "", + "Another working directory to merge the corpus from. Inputs from " + "--merge_from will be added to --workdir if the add new features.") +CENTIPEDE_FLAG(size_t, num_runs, std::numeric_limits::max(), + "Number of inputs to run per shard (see --total_shards).") +CENTIPEDE_FLAG(size_t, total_shards, 1, "Number of shards.") +CENTIPEDE_FLAG(size_t, my_shard_index, 0, + "Index of the first shard, [0, --total_shards - --num_threads].") +CENTIPEDE_FLAG( + size_t, num_threads, 1, + "Number of threads to execute in one process. i-th thread, where i " + "is in [0, --num_threads), will work on shard " + "(--first_shard_index + i).") +CENTIPEDE_FLAG(size_t, j, 0, + "If not 0, --j=N is a shorthand for " + "--num_threads=N --total_shards=N --first_shard_index=0. " + "Overrides values of these flags if they are also used.") +CENTIPEDE_FLAG(size_t, max_len, 4000, + "Max length of mutants. Passed to mutator as a hint.") +CENTIPEDE_FLAG( + size_t, batch_size, 1000, + "The number of inputs given to the target at one time. Batches of " + "more than 1 input are used to amortize the process start-up cost.") +CENTIPEDE_FLAG(size_t, mutate_batch_size, 2, + "Mutate this many inputs to produce batch_size mutants") +CENTIPEDE_FLAG( + bool, use_legacy_default_mutator, false, + "When set, use the legacy ByteArrayMutator as the default mutator. " + "Otherwise, the FuzzTest domain based mutator will be used.") +CENTIPEDE_FLAG( + size_t, load_other_shard_frequency, 10, + "Load a random other shard after processing this many batches. Use 0 " + "to disable loading other shards. For now, choose the value of this " + "flag so that shard loads happen at most once in a few minutes. In " + "future we may be able to find the suitable value automatically.") +// TODO(b/262798184): Remove once the bug is fixed. +CENTIPEDE_FLAG( + bool, serialize_shard_loads, false, + "When this flag is on, shard loading is serialized. " + " Useful to avoid excessive RAM consumption when loading more" + " that one shard at a time. Currently, loading a single large shard" + " may create too many temporary heap allocations. " + " This means, if we load many large shards concurrently," + " we may run out or RAM.") +CENTIPEDE_FLAG( + size_t, seed, 0, + "A seed for the random number generator. If 0, some other random " + "number is used as seed.") +CENTIPEDE_FLAG( + size_t, prune_frequency, 100, + "Prune the corpus every time after this many inputs were added. If " + "zero, pruning is disabled. Pruning removes redundant inputs from " + "the corpus, e.g. inputs that have only \"frequent\", i.e. " + "uninteresting features. When the corpus gets larger than " + "--max_corpus_size, some random elements may also be removed.") +CENTIPEDE_FLAG( + size_t, address_space_limit_mb, +#ifdef __APPLE__ + // Address space limit is ignored on MacOS. + // Reference: + // https://bugs.chromium.org/p/chromium/issues/detail?id=853873#c2 + 0 +#else + 8192 +#endif + , + "If not zero, instructs the target to set setrlimit(RLIMIT_AS) to " + "this number of megabytes. Some targets (e.g. if built with ASAN, " + "which can't run with RLIMIT_AS) may choose to ignore this flag. See " + "also --rss_limit_mb.") +CENTIPEDE_FLAG( + size_t, rss_limit_mb, 4096, + "If not zero, instructs the target to fail if RSS goes over this " + "number of megabytes and report an OOM. See also " + "--address_space_limit_mb. These two flags have somewhat different " + "meaning. --address_space_limit_mb does not allow the process to " + "grow the used address space beyond the limit. --rss_limit_mb runs a " + "background thread that monitors max RSS and also checks max RSS " + "after executing every input, so it may detect OOM late. However " + "--rss_limit_mb allows Centipede to *report* an OOM condition in " + "most cases, while --address_space_limit_mb will cause a crash that " + "may be hard to attribute to OOM.") +CENTIPEDE_FLAG( + size_t, stack_limit_kb, 0, + "If not zero, instructs the target to fail if stack usage goes over " + "this number of KiB.") +CENTIPEDE_FLAG( + size_t, timeout_per_input, 60, + "If not zero, the timeout in seconds for a single input. If an input " + "runs longer than this, the runner process will abort. Support may " + "vary depending on the runner.") +CENTIPEDE_FLAG( + size_t, timeout_per_batch, 0, + "If not zero, the collective timeout budget in seconds for a single " + "batch of inputs. Each input in a batch still has up to " + "--timeout_per_input seconds to finish, but the entire batch must " + "finish within --timeout_per_batch seconds. The default is computed " + "as a function of --timeout_per_input * --batch_size. Support may " + "vary depending on the runner.") +CENTIPEDE_FLAG(size_t, ignore_timeout_reports, false, + "If set, will ignore reporting timeouts as errors.") +CENTIPEDE_FLAG( + absl::Time, stop_at, absl::InfiniteFuture(), + "Stop fuzzing in all shards (--total_shards) at approximately this " + "time in ISO-8601/RFC-3339 format, e.g. 2023-04-06T23:35:02Z. " + "If a given shard is still running at that time, it will gracefully " + "wind down by letting the current batch of inputs to finish and then " + "exiting. A special value 'infinite-future' (the default) is " + "supported. Tip: `date` is useful for conversion of mostly free " + "format human readable date/time strings, e.g. " + "--stop_at=$(date --date='next Monday 6pm' --utc --iso-8601=seconds) " + ". Also see --stop_after. These two flags are mutually exclusive.") +CENTIPEDE_FLAG( + bool, fork_server, true, + "If true (default) tries to execute the target(s) via the fork " + "server, if supported by the target(s). Prepend the binary path with " + "'%f' to disable the fork server. --fork_server applies to binaries " + "passed via these flags: --binary, --extra_binaries, " + "--input_filter.") +CENTIPEDE_FLAG( + bool, full_sync, false, + "Perform a full corpus sync on startup. If true, feature sets and " + "corpora are read from all shards before fuzzing. This way fuzzing " + "starts with a full knowledge of the current state and will avoid " + "adding duplicating inputs. This however is very expensive when the " + "number of shards is very large.") +CENTIPEDE_FLAG( + bool, use_corpus_weights, true, + "If true, use weighted distribution when choosing the corpus element " + "to mutate. This flag is mostly for Centipede developers.") +CENTIPEDE_FLAG( + bool, use_coverage_frontier, false, + "If true, use coverage frontier when choosing the corpus element to " + "mutate. This flag is mostly for Centipede developers.") +CENTIPEDE_FLAG( + size_t, max_corpus_size, 100000, + "Indicates the number of inputs in the in-memory corpus after which" + "more aggressive pruning will be applied.") +CENTIPEDE_FLAG( + size_t, crossover_level, 50, + "Defines how much crossover is used during mutations. 0 means no " + "crossover, 100 means the most aggressive crossover. See " + "https://en.wikipedia.org/wiki/Crossover_(genetic_algorithm).") +CENTIPEDE_FLAG(bool, use_pc_features, true, + "When available from instrumentation, use features derived from " + "PCs.") +CENTIPEDE_FLAG( + size_t, path_level, 0, // Not ready for wide usage. + "When available from instrumentation, use features derived from " + "bounded execution paths. Be careful, may cause exponential feature " + "explosion. 0 means no path features. Values between 1 and 100 " + "define how aggressively to use the paths.") +CENTIPEDE_FLAG(bool, use_cmp_features, true, + "When available from instrumentation, use features derived from " + "instrumentation of CMP instructions.") +CENTIPEDE_FLAG( + size_t, callstack_level, 0, + "When available from instrumentation, use features derived from " + "observing the function call stacks. 0 means no callstack features." + "Values between 1 and 100 define how aggressively to use the " + "callstacks. Level N roughly corresponds to N call frames.") +CENTIPEDE_FLAG(bool, use_auto_dictionary, true, + "If true, use automatically-generated dictionary derived from " + "intercepting comparison instructions, memcmp, and similar.") +CENTIPEDE_FLAG(bool, use_dataflow_features, true, + "When available from instrumentation, use features derived from " + "data flows.") +CENTIPEDE_FLAG( + bool, use_counter_features, false, + "When available from instrumentation, use features derived from " + "counting the number of occurrences of a given PC. When enabled, " + "supersedes --use_pc_features.") +CENTIPEDE_FLAG(bool, use_pcpair_features, false, + "If true, PC pairs are used as additional synthetic features. " + "Experimental, use with care - it may explode the corpus.") +CENTIPEDE_FLAG( + uint64_t, user_feature_domain_mask, ~0UL, + "A bitmask indicating which user feature domains should be enabled. " + "A value of zero will disable all user features.") +CENTIPEDE_FLAG( + size_t, feature_frequency_threshold, 100, + "Internal flag. When a given feature is present in the corpus this " + "many times Centipede will stop recording it for future corpus " + "elements. Larger values will use more RAM but may improve corpus " + "weights. Valid values are 2 - 255.") +CENTIPEDE_FLAG(bool, require_pc_table, true, + "If true, Centipede will exit if the --pc_table is not found.") +CENTIPEDE_FLAG(bool, require_seeds, false, + "If true, Centipede will exit if no seed inputs are found.") +CENTIPEDE_FLAG( + int, telemetry_frequency, 0, + "Dumping frequency for intermediate telemetry files, i.e. coverage " + "report (workdir/coverage-report-BINARY.*.txt), corpus stats " + "(workdir/corpus-stats-*.json), etc. Positive value N means dump " + "every N batches. Negative N means start dumping after 2^N processed " + "batches with exponential 2x back-off (e.g. for " + "--telemetry_frequency=-5, dump on batches 32, 64, 128,...). Zero " + "means no telemetry. Note that the before-fuzzing and after-fuzzing " + "telemetry are always dumped.") +CENTIPEDE_FLAG(bool, print_runner_log, false, + "If true, runner logs are printed after every batch. Note that " + "crash logs are always printed regardless of this flag's value.") +// TODO(kcc): --distill and several others had better be dedicated binaries. +CENTIPEDE_FLAG( + bool, distill, false, + "Distill (minimize) the --total_shards input shards from --workdir " + "into --num_threads output shards. The input shards are randomly and " + "evenly divided between --num_threads concurrent distillation " + "threads to speed up processing. The threads share and update the " + "global coverage info as they go, so the output shards will never " + "have identical input/feature pairs (some intputs can still be " + "identical if a non-deterministic target produced different features " + "for identical inputs in the corpus). The features.* files are " + "looked up in a --workdir subdirectory that corresponds to " + "--coverage_binary and --binary_hash, if --binary_hash is provided; " + "if it is not provided, the actual hash of the --coverage_binary " + "file on disk is computed and used. Therefore, with an explicit " + "--binary_hash, --coverage_binary can be just the basename of the " + "actual target binary; without it, it must be the full path. " + "Each distillation thread writes a distilled corpus shard to " + "to <--workdir>/distilled-<--coverage_binary basename>..") +CENTIPEDE_FLAG( + size_t, log_features_shards, 0, + "The first --log_features_shards shards will log newly observed " + "features as symbols. In most cases you don't need this to be >= 2.") +CENTIPEDE_FLAG( + std::string, knobs_file, "", + "If not empty, knobs will be read from this (possibly remote) file." + " The feature is experimental, not yet fully functional.") +CENTIPEDE_FLAG( + std::string, corpus_to_files, "", + "Save the remote corpus from working to the given directory, one " + "file per corpus.") +CENTIPEDE_FLAG( + std::string, crashes_to_files, "", + "When set to a directory path, save the crashing reproducers and " + "metadata from the workdir to the given path: Each crash with `ID`" + "will be saved with file `ID.data` for the reproducer, `ID.desc` the " + "description, `ID.sig` the signature. If multiple crashes with the same ID " + "exist, only one crash will be saved.") +CENTIPEDE_FLAG( + std::string, corpus_from_files, "", + "Export a corpus from a local directory with one file per input into " + "the sharded remote corpus in workdir. Not recursive.") +CENTIPEDE_FLAG( + std::vector, corpus_dir, {}, + "Comma-separated list of paths to local corpus dirs, with one file " + "per input. At startup, the files are exported into the corpus in " + "--workdir. While fuzzing, the new corpus elements are written to " + "the first dir if it is not empty. This makes it more convenient to " + "interop with libFuzzer corpora.") +CENTIPEDE_FLAG( + std::string, symbolizer_path, "llvm-symbolizer", + "Path to the symbolizer tool. By default, we use llvm-symbolizer " + "and assume it is in PATH.") +CENTIPEDE_FLAG( + std::string, objdump_path, "objdump", + "Path to the objdump tool. By default, we use the system objdump " + "and assume it is in PATH.") +CENTIPEDE_FLAG(std::string, runner_dl_path_suffix, "", + "If non-empty, this flag is passed to the Centipede runner. " + "It tells the runner that this dynamic library is instrumented " + "while the main binary is not. " + "The value could be the full path, like '/path/to/my.so' " + "or a suffix, like '/my.so' or 'my.so'." + "This flag is experimental and may be removed in future") +CENTIPEDE_FLAG( + std::string, input_filter, "", + "Path to a tool that filters bad inputs. The tool is invoked as " + "`input_filter INPUT_FILE` and should return 0 if the input is good " + "and non-0 otherwise. Ignored if empty. The --input_filter is " + "invoked only for inputs that are considered for addition to the " + "corpus.") +CENTIPEDE_FLAG( + std::vector, dictionary, {}, + "A comma-separated list of paths to dictionary files. The dictionary " + "file is either in AFL/libFuzzer plain text format or in the binary " + "Centipede corpus file format. The flag is interpreted by " + "CentipedeCallbacks so its meaning may be different in custom " + "implementations of CentipedeCallbacks.") +CENTIPEDE_FLAG( + std::string, function_filter, "", + "A comma-separated list of functions that fuzzing needs to focus on. " + "If this list is non-empty, the fuzzer will mutate only those inputs " + "that trigger code in one of these functions.") +CENTIPEDE_FLAG( + std::string, for_each_blob, "", + "If non-empty, extracts individual blobs from the files given as " + "arguments, copies each blob to a temporary file, and applies this " + "command to that temporary file. %P is replaced with the temporary " + "file's path and %H is replaced with the blob's hash. Example:\n" + "$ centipede --for_each_blob='ls -l %P && echo %H' corpus.000000") +CENTIPEDE_FLAG( + std::string, experiment, "", + "A colon-separated list of values, each of which is a flag followed " + "by = and a comma-separated list of values. Example: " + "'foo=1,2,3:bar=10,20'. When non-empty, this flag is used to run an " + "A/B[/C/D...] experiment: different threads will set different " + "values of 'foo' and 'bar' and will run independent fuzzing " + "sessions. If more than one flag is given, all flag combinations are " + "tested. In example above: '--foo=1 --bar=10' ... " + "'--foo=3 --bar=20'. The number of threads should be multiple of the " + "number of flag combinations.") +CENTIPEDE_FLAG( + bool, analyze, false, + "If set, Centipede will read the corpora from the work dirs provided" + " as argv. If two corpora are provided, then analyze differences" + " between those corpora. If one corpus is provided, then save the" + " coverage report to a file within workdir with prefix" + " 'coverage-report-'.") +CENTIPEDE_FLAG(bool, exit_on_crash, false, + "If true, Centipede will exit on the first crash of the target.") +CENTIPEDE_FLAG(size_t, max_num_crash_reports, 5, + "report this many crashes per shard.") +CENTIPEDE_FLAG( + std::string, minimize_crash_file_path, "", + "If non-empty, a path to an input file that triggers a crash." + " Centipede will run the minimization loop and store smaller crashing" + " inputs in workdir/crashes.NNNNNN/, where NNNNNN is " + "--first_shard_index padded on the left with zeros. " + " --num_runs and --num_threads apply. " + " Assumes local workdir.") +CENTIPEDE_FLAG( + bool, batch_triage_suspect_only, false, + "If set, triage the crash on only the suspected input in a crashing " + "batch. Otherwise, triage on all the executed inputs") +CENTIPEDE_FLAG( + size_t, shmem_size_mb, 1024, + "Size of the shared memory regions used to communicate between the " + "ending and the runner.") +CENTIPEDE_FLAG( + bool, use_posix_shmem, +#ifdef __APPLE__ + true +#else + false +#endif + , + "[INTERNAL] When true, uses shm_open/shm_unlink instead of " + "memfd_create to allocate shared memory. You may want this if your " + "target doesn't have access to /proc/ subdirs or the " + "memfd_create syscall is not supported.") +CENTIPEDE_FLAG( + bool, dry_run, false, + "Initializes as much of Centipede as possible without actually " + "running any fuzzing. Useful to validate the rest of the command " + "line, verify existence of all the input directories and files, " + "etc. Also useful in combination with --save_config or " + "--update_config to stop execution immediately after writing the " + "(updated) config file.") +CENTIPEDE_FLAG(bool, save_binary_info, false, + "Save the BinaryInfo from the fuzzing run within the working " + "directory.") +CENTIPEDE_FLAG( + bool, populate_binary_info, true, + "Get binary info from a coverage instrumented binary. This should " + "only be turned off when coverage is not based on instrumenting some " + "binary.") +CENTIPEDE_FLAG( + bool, riegeli, +#ifdef CENTIPEDE_DISABLE_RIEGELI + false +#else + true +#endif + , + "Use Riegeli file format (instead of the legacy bespoke encoding) " + "for storage") +CENTIPEDE_FLAG(bool, first_corpus_dir_output_only, false, + "If set, treat the first entry of `corpus_dir` as output-only. " + "For FuzzTest framework only, do not use from end-users.") +CENTIPEDE_FLAG(bool, load_shards_only, false, + "If set, load/merge shards without fuzzing new inputs. For " + "FuzzTest framework only, do not use from end-users.") +CENTIPEDE_FLAG( + bool, fuzztest_single_test_mode, false, + "If set, operate on the corpus database for a single test specified by " + "FuzzTest instead of all the tests. For FuzzTest framework only, do not " + "use from end-users.") +CENTIPEDE_FLAG( + std::string, fuzztest_configuration, "", + "If set, deserializes the FuzzTest configuration from the value as a " + "base64url string instead of querying the configuration via runner " + "callbacks. For FuzzTest framework only, do not use from end-users.") +CENTIPEDE_FLAG( + bool, list_crash_ids, false, + "If set, lists the crash IDs of a single test of the binary to the " + "`crash_ids_file` with each crash ID in a single line. If there is no " + "crash for the test, the empty content will be written to the file. For " + "FuzzTest framework only, do not use from end-users.") +CENTIPEDE_FLAG(std::string, list_crash_ids_file, "", + "The path to list the crash IDs for `list_crash_ids`. For " + "FuzzTest framework only, do not use from end-users.") +CENTIPEDE_FLAG(std::string, crash_id, "", + "The crash ID used for `replay_crash` or `export_crash`. For " + "FuzzTest framework only, do not use from end-users.") +CENTIPEDE_FLAG(bool, replay_crash, false, + "If set, replay `crash_id` in the corpus database. For FuzzTest " + "framework only, do not use from end-users.") +CENTIPEDE_FLAG( + bool, export_crash, false, + "If set, export the input contents of `crash_id` from the corpus database. " + "For FuzzTest framework only, do not use from end-users.") +CENTIPEDE_FLAG( + std::string, export_crash_file, "", + "The path to export the input contents of `crash_id` for `export_crash`. " + "For FuzzTest framework only, do not use from end-users.") diff --git a/src/third_party/fuzztest/dist/centipede/centipede_interface.cc b/src/third_party/fuzztest/dist/centipede/centipede_interface.cc new file mode 100644 index 00000000000..fbf90db97b3 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/centipede_interface.cc @@ -0,0 +1,918 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/centipede_interface.h" + +#include + +#include +#include +#include +#include +#include +#include +#include // NOLINT +#include +#include +#include +#include +#include +#include + +#include "absl/base/optimization.h" +#include "absl/cleanup/cleanup.h" +#include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/ascii.h" +#include "absl/strings/escaping.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_replace.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "absl/types/span.h" +#include "./centipede/analyze_corpora.h" +#include "./centipede/binary_info.h" +#include "./centipede/centipede.h" +#include "./centipede/centipede_callbacks.h" +#include "./centipede/command.h" +#include "./centipede/coverage.h" +#include "./centipede/crash_summary.h" +#include "./centipede/distill.h" +#include "./centipede/environment.h" +#include "./centipede/minimize_crash.h" +#include "./centipede/pc_info.h" +#include "./centipede/periodic_action.h" +#include "./centipede/runner_result.h" +#include "./centipede/seed_corpus_maker_lib.h" +#include "./centipede/stats.h" +#include "./centipede/stop.h" +#include "./centipede/thread_pool.h" +#include "./centipede/util.h" +#include "./centipede/workdir.h" +#include "./common/bazel.h" +#include "./common/blob_file.h" +#include "./common/defs.h" +#include "./common/hash.h" +#include "./common/logging.h" // IWYU pragma: keep +#include "./common/remote_file.h" +#include "./common/status_macros.h" +#include "./fuzztest/internal/configuration.h" + +namespace fuzztest::internal { + +namespace { + +// Sets signal handler for SIGINT. +// TODO(b/378532202): Replace this with a more generic mechanism that allows +// the called or `CentipedeMain()` to indicate when to stop. +void SetSignalHandlers() { + struct sigaction sigact = {}; + sigact.sa_flags = SA_ONSTACK; + sigact.sa_handler = [](int received_signum) { + if (received_signum == SIGINT) { + LOG(INFO) << "Ctrl-C pressed: winding down"; + RequestEarlyStop(EXIT_FAILURE); + return; + } + ABSL_UNREACHABLE(); + }; + sigaction(SIGINT, &sigact, nullptr); +} + +// Runs env.for_each_blob on every blob extracted from env.args. +// Returns EXIT_SUCCESS on success, EXIT_FAILURE otherwise. +int ForEachBlob(const Environment &env) { + auto tmpdir = TemporaryLocalDirPath(); + CreateLocalDirRemovedAtExit(tmpdir); + std::string tmpfile = std::filesystem::path(tmpdir).append("t"); + + for (const auto &arg : env.args) { + LOG(INFO) << "Running '" << env.for_each_blob << "' on " << arg; + auto blob_reader = DefaultBlobFileReaderFactory(); + absl::Status open_status = blob_reader->Open(arg); + if (!open_status.ok()) { + LOG(INFO) << "Failed to open " << arg << ": " << open_status; + return EXIT_FAILURE; + } + ByteSpan blob; + while (blob_reader->Read(blob) == absl::OkStatus()) { + ByteArray bytes; + bytes.insert(bytes.begin(), blob.data(), blob.end()); + // TODO(kcc): [impl] add a variant of WriteToLocalFile that accepts Span. + WriteToLocalFile(tmpfile, bytes); + std::string command_line = absl::StrReplaceAll( + env.for_each_blob, {{"%P", tmpfile}, {"%H", Hash(bytes)}}); + Command cmd(command_line); + // TODO(kcc): [as-needed] this creates one process per blob. + // If this flag gets active use, we may want to define special cases, + // e.g. if for_each_blob=="cp %P /some/where" we can do it in-process. + cmd.Execute(); + if (ShouldStop()) return ExitCode(); + } + } + return EXIT_SUCCESS; +} + +// Loads corpora from work dirs provided in `env.args`, if there are two args +// provided, analyzes differences. If there is one arg provided, reports the +// function coverage. Returns EXIT_SUCCESS on success, EXIT_FAILURE otherwise. +int Analyze(const Environment &env) { + LOG(INFO) << "Analyze " << absl::StrJoin(env.args, ","); + CHECK(!env.binary.empty()) << "--binary must be used"; + if (env.args.size() == 1) { + const CoverageResults coverage_results = + GetCoverage(env.binary_name, env.binary_hash, env.args[0]); + WorkDir workdir{env}; + const std::string coverage_report_path = + workdir.CoverageReportPath(/*annotation=*/""); + DumpCoverageReport(coverage_results, coverage_report_path); + } else if (env.args.size() == 2) { + AnalyzeCorporaToLog(env.binary_name, env.binary_hash, env.args[0], + env.args[1]); + } else { + LOG(FATAL) << "for now, --analyze supports only 1 or 2 work dirs; got " + << env.args.size(); + } + return EXIT_SUCCESS; +} + +void SavePCTableToFile(const PCTable &pc_table, std::string_view file_path) { + WriteToLocalFile(file_path, AsByteSpan(pc_table)); +} + +BinaryInfo PopulateBinaryInfoAndSavePCsIfNecessary( + const Environment &env, CentipedeCallbacksFactory &callbacks_factory, + std::string &pcs_file_path) { + BinaryInfo binary_info; + // Some fuzz targets have coverage not based on instrumenting binaries. + // For those target, we should not populate binary info. + if (env.populate_binary_info) { + ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, env); + scoped_callbacks.callbacks()->PopulateBinaryInfo(binary_info); + } + if (env.save_binary_info) { + const std::string binary_info_dir = WorkDir{env}.BinaryInfoDirPath(); + CHECK_OK(RemoteMkdir(binary_info_dir)); + LOG(INFO) << "Serializing binary info to: " << binary_info_dir; + binary_info.Write(binary_info_dir); + } + if (binary_info.uses_legacy_trace_pc_instrumentation) { + pcs_file_path = std::filesystem::path(TemporaryLocalDirPath()) / "pcs"; + SavePCTableToFile(binary_info.pc_table, pcs_file_path); + } + if (env.use_pcpair_features) { + CHECK(!binary_info.pc_table.empty()) + << "--use_pcpair_features requires non-empty pc_table"; + } + return binary_info; +} + +std::vector CreateEnvironmentsForThreads( + const Environment &origin_env, std::string_view pcs_file_path) { + std::vector envs(origin_env.num_threads, origin_env); + size_t thread_idx = 0; + for (auto &env : envs) { + env.my_shard_index += thread_idx++; + env.UpdateForExperiment(); + env.pcs_file_path = pcs_file_path; + } + return envs; +} + +int Fuzz(const Environment &env, const BinaryInfo &binary_info, + std::string_view pcs_file_path, + CentipedeCallbacksFactory &callbacks_factory) { + CoverageLogger coverage_logger(binary_info.pc_table, binary_info.symbols); + + std::vector envs = + CreateEnvironmentsForThreads(env, pcs_file_path); + std::vector> stats_vec(env.num_threads); + + // Start periodic stats dumping and, optionally, logging. + std::vector stats_reporters; + stats_reporters.emplace_back( + [csv_appender = StatsCsvFileAppender{stats_vec, envs}]() mutable { + csv_appender.ReportCurrStats(); + }, + PeriodicAction::Options{ + /*sleep_before_each=*/ + [](size_t iteration) { + return absl::Minutes(std::clamp(iteration, 0UL, 10UL)); + }, + }); + if (!envs.front().experiment.empty() || ABSL_VLOG_IS_ON(1)) { + stats_reporters.emplace_back( + [logger = StatsLogger{stats_vec, envs}]() mutable { + logger.ReportCurrStats(); + }, + PeriodicAction::Options{ + /*sleep_before_each=*/ + [](size_t iteration) { + return absl::Seconds(std::clamp(iteration, 5UL, 600UL)); + }, + }); + } + + auto fuzzing_worker = + [&env, &callbacks_factory, &binary_info, &coverage_logger]( + Environment &my_env, std::atomic &stats, bool create_tmpdir) { + if (create_tmpdir) CreateLocalDirRemovedAtExit(TemporaryLocalDirPath()); + // Uses TID, call in this thread. + my_env.seed = GetRandomSeed(env.seed); + + if (env.dry_run) return; + + ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, my_env); + Centipede centipede(my_env, *scoped_callbacks.callbacks(), binary_info, + coverage_logger, stats); + centipede.FuzzingLoop(); + }; + + if (env.num_threads == 1) { + // When fuzzing with one thread, run fuzzing loop in the current + // thread. This is because FuzzTest/Centipede's single-process + // fuzzing requires the test body, which is invoked by the fuzzing + // loop, to run in the main thread. + // + // Here, the fuzzing worker should not re-create the tmpdir since the path + // is thread-local and it has been created in the current function. + fuzzing_worker(envs[0], stats_vec[0], /*create_tmpdir=*/false); + } else { + ThreadPool fuzzing_worker_threads{static_cast(env.num_threads)}; + for (size_t thread_idx = 0; thread_idx < env.num_threads; thread_idx++) { + Environment &my_env = envs[thread_idx]; + std::atomic &my_stats = stats_vec[thread_idx]; + fuzzing_worker_threads.Schedule([&fuzzing_worker, &my_env, &my_stats]() { + fuzzing_worker(my_env, my_stats, /*create_tmpdir=*/true); + }); + } // All `fuzzing_worker_threads` join here. + } + + for (auto &reporter : stats_reporters) { + // Nudge one final update and stop the reporting thread. + reporter.Nudge(); + reporter.Stop(); + } + + if (!env.knobs_file.empty()) PrintRewardValues(stats_vec, std::cerr); + + return ExitCode(); +} + + +TestShard SetUpTestSharding() { + TestShard test_shard = GetBazelTestShard(); + // Update the shard status file to indicate that we support test sharding. + // It suffices to update the file's modification time, but we clear the + // contents for simplicity. This is also what the GoogleTest framework does. + if (const char *test_shard_status_file = + std::getenv("TEST_SHARD_STATUS_FILE"); + test_shard_status_file != nullptr) { + ClearLocalFileContents(test_shard_status_file); + } + return test_shard; +} + +// Prunes non-reproducible and duplicate crashes and returns the crash +// signatures of the remaining crashes. +absl::flat_hash_set PruneOldCrashesAndGetRemainingCrashSignatures( + const std::filesystem::path &crashing_dir, const Environment &env, + CentipedeCallbacksFactory &callbacks_factory, CrashSummary &crash_summary) { + const std::vector crashing_input_files = + // The corpus database layout assumes the crash input files are located + // directly in the crashing subdirectory, so we don't list recursively. + ValueOrDie(RemoteListFiles(crashing_dir.c_str(), /*recursively=*/false)); + ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, env); + BatchResult batch_result; + absl::flat_hash_set remaining_crash_signatures; + + for (const std::string &crashing_input_file : crashing_input_files) { + ByteArray crashing_input; + CHECK_OK(RemoteFileGetContents(crashing_input_file, crashing_input)); + const bool is_reproducible = !scoped_callbacks.callbacks()->Execute( + env.binary, {crashing_input}, batch_result); + const bool is_duplicate = + is_reproducible && !batch_result.IsSetupFailure() && + !remaining_crash_signatures.insert(batch_result.failure_signature()) + .second; + if (!is_reproducible || batch_result.IsSetupFailure() || is_duplicate) { + CHECK_OK(RemotePathDelete(crashing_input_file, /*recursively=*/false)); + } else { + crash_summary.AddCrash( + {std::filesystem::path(crashing_input_file).filename(), + /*category=*/batch_result.failure_description(), + batch_result.failure_signature(), + batch_result.failure_description()}); + CHECK_OK(RemotePathTouchExistingFile(crashing_input_file)); + } + } + return remaining_crash_signatures; +} + +// TODO(b/405382531): Add unit tests once the function is unit-testable. +void DeduplicateAndStoreNewCrashes( + const std::filesystem::path &crashing_dir, const WorkDir &workdir, + size_t total_shards, absl::flat_hash_set crash_signatures, + CrashSummary &crash_summary) { + for (size_t shard_idx = 0; shard_idx < total_shards; ++shard_idx) { + const std::vector new_crashing_input_files = + // The crash reproducer directory may contain subdirectories with + // input files that don't individually cause a crash. We ignore those + // for now and don't list the files recursively. + ValueOrDie( + RemoteListFiles(workdir.CrashReproducerDirPaths().Shard(shard_idx), + /*recursively=*/false)); + const std::filesystem::path crash_metadata_dir = + workdir.CrashMetadataDirPaths().Shard(shard_idx); + + CHECK_OK(RemoteMkdir(crashing_dir.c_str())); + for (const std::string &crashing_input_file : new_crashing_input_files) { + const std::string crashing_input_file_name = + std::filesystem::path(crashing_input_file).filename(); + const std::string crash_signature_path = + crash_metadata_dir / absl::StrCat(crashing_input_file_name, ".sig"); + std::string new_crash_signature; + const absl::Status status = + RemoteFileGetContents(crash_signature_path, new_crash_signature); + if (!status.ok()) { + LOG(WARNING) << "Ignoring crashing input " << crashing_input_file_name + << " due to failure to read the crash signature: " + << status; + continue; + } + const bool is_duplicate = + !crash_signatures.insert(new_crash_signature).second; + if (is_duplicate) continue; + + const std::string crash_description_path = + crash_metadata_dir / absl::StrCat(crashing_input_file_name, ".desc"); + std::string new_crash_description; + const absl::Status description_status = + RemoteFileGetContents(crash_description_path, new_crash_description); + if (!description_status.ok()) { + LOG(WARNING) + << "Failed to read crash description for " + << crashing_input_file_name + << ". Will use the crash signature as the description. Status: " + << description_status; + new_crash_description = new_crash_signature; + } + crash_summary.AddCrash({crashing_input_file_name, + /*category=*/new_crash_description, + std::move(new_crash_signature), + new_crash_description}); + CHECK_OK( + RemoteFileRename(crashing_input_file, + (crashing_dir / crashing_input_file_name).c_str())); + } + } +} + +// Seeds the corpus files in `env.workdir` with the inputs in `regression_dir` +// (always used) and the previously distilled corpus files from `coverage_dir` +// (used if non-empty). +SeedCorpusConfig GetSeedCorpusConfig(const Environment &env, + std::string_view regression_dir, + std::string_view coverage_dir) { + const WorkDir workdir{env}; + SeedCorpusSource regression; + regression.dir_glob = std::string(regression_dir); + regression.num_recent_dirs = 1; + regression.individual_input_rel_glob = "*"; + regression.sampled_fraction_or_count = 1.0f; + std::vector sources = {std::move(regression)}; + if (!coverage_dir.empty()) { + SeedCorpusSource coverage; + coverage.dir_glob = std::string(coverage_dir); + coverage.num_recent_dirs = 1; + // We're using the previously distilled corpus files as seeds. + coverage.shard_rel_glob = + std::filesystem::path{ + workdir.DistilledCorpusFilePaths().AllShardsGlob()} + .filename(); + coverage.individual_input_rel_glob = "*"; + coverage.sampled_fraction_or_count = 1.0f; + sources.push_back(std::move(coverage)); + } + SeedCorpusDestination destination; + destination.dir_path = env.workdir; + // We're seeding the current corpus files. + destination.shard_rel_glob = + std::filesystem::path{workdir.CorpusFilePaths().AllShardsGlob()} + .filename(); + destination.shard_index_digits = WorkDir::kDigitsInShardIndex; + destination.num_shards = static_cast(env.num_threads); + return { + std::move(sources), + std::move(destination), + }; +} + +absl::Duration ReadFuzzingTime(std::string_view fuzzing_time_file) { + std::string fuzzing_time_str; + CHECK_OK(RemoteFileGetContents(fuzzing_time_file, fuzzing_time_str)); + absl::Duration fuzzing_time; + if (!absl::ParseDuration(absl::StripAsciiWhitespace(fuzzing_time_str), + &fuzzing_time)) { + LOG(WARNING) << "Failed to parse fuzzing time of a resuming fuzz test: '" + << fuzzing_time_str << "'. Assuming no fuzzing time so far."; + return absl::ZeroDuration(); + } + return fuzzing_time; +} + +PeriodicAction RecordFuzzingTime(std::string_view fuzzing_time_file, + absl::Time start_time) { + return {[=] { + absl::Status status = RemoteFileSetContents( + fuzzing_time_file, + absl::FormatDuration(absl::Now() - start_time)); + LOG_IF(WARNING, !status.ok()) + << "Failed to write fuzzing time: " << status; + }, + PeriodicAction::ZeroDelayConstInterval(absl::Seconds(15))}; +} + +// TODO(b/368325638): Add tests for this. +int UpdateCorpusDatabaseForFuzzTests( + Environment env, const fuzztest::internal::Configuration &fuzztest_config, + CentipedeCallbacksFactory &callbacks_factory) { + env.UpdateWithTargetConfig(fuzztest_config); + + absl::Time start_time = absl::Now(); + LOG(INFO) << "Starting the update of the corpus database for fuzz tests:" + << "\nBinary: " << env.binary + << "\nCorpus database: " << fuzztest_config.corpus_database; + + // Step 1: Preliminary set up of test sharding, binary info, etc. + const auto [test_shard_index, total_test_shards] = SetUpTestSharding(); + const auto corpus_database_path = + std::filesystem::path(fuzztest_config.corpus_database) / + fuzztest_config.binary_identifier; + const auto stats_root_path = + fuzztest_config.stats_root.empty() + ? std::filesystem::path() + : std::filesystem::path(fuzztest_config.stats_root) / + fuzztest_config.binary_identifier; + const auto workdir_root_path = + fuzztest_config.workdir_root.empty() + ? corpus_database_path + : std::filesystem::path(fuzztest_config.workdir_root) / + fuzztest_config.binary_identifier; + const auto execution_stamp = [] { + std::string stamp = + absl::FormatTime("%Y-%m-%d-%H-%M-%S", absl::Now(), absl::UTCTimeZone()); + return stamp; + }(); + std::vector fuzz_tests_to_run; + if (env.fuzztest_single_test_mode) { + CHECK(fuzztest_config.fuzz_tests_in_current_shard.size() == 1) + << "Must select exactly one fuzz test when running in the single test " + "mode"; + fuzz_tests_to_run = fuzztest_config.fuzz_tests_in_current_shard; + } else { + for (int i = 0; i < fuzztest_config.fuzz_tests.size(); ++i) { + if (i % total_test_shards == test_shard_index) { + fuzz_tests_to_run.push_back(fuzztest_config.fuzz_tests[i]); + } + } + } + LOG(INFO) << "Fuzz tests to run:" << absl::StrJoin(fuzz_tests_to_run, ", "); + + const bool is_workdir_specified = !env.workdir.empty(); + CHECK(!is_workdir_specified || env.fuzztest_single_test_mode); + // When env.workdir is empty, the full workdir paths will be formed by + // appending the fuzz test names to the base workdir path. We use different + // path when only replaying to avoid replaying an unfinished fuzzing sessions. + const auto base_workdir_path = + is_workdir_specified + ? std::filesystem::path{} // Will not be used. + : workdir_root_path / + absl::StrFormat("workdir%s.%03d", + fuzztest_config.only_replay ? "-replay" : "", + test_shard_index); + // There's no point in saving the binary info to the workdir, since the + // workdir is deleted at the end. + env.save_binary_info = false; + std::string pcs_file_path; + BinaryInfo binary_info = PopulateBinaryInfoAndSavePCsIfNecessary( + env, callbacks_factory, pcs_file_path); + + LOG(INFO) << "Test shard index: " << test_shard_index + << " Total test shards: " << total_test_shards; + + // Step 2: Iterate over the fuzz tests and run them. + const std::string binary = env.binary; + for (int i = 0; i < fuzz_tests_to_run.size(); ++i) { + // Clean up previous stop requests. stop_time will be set later. + ClearEarlyStopRequestAndSetStopTime(/*stop_time=*/absl::InfiniteFuture()); + if (!env.fuzztest_single_test_mode && + fuzztest_config.GetTimeLimitPerTest() < absl::InfiniteDuration()) { + const absl::Duration test_time_limit = + fuzztest_config.GetTimeLimitPerTest(); + const absl::Status has_enough_time = VerifyBazelHasEnoughTimeToRunTest( + start_time, test_time_limit, + /*executed_tests_in_shard=*/i, fuzztest_config.fuzz_tests.size()); + CHECK_OK(has_enough_time) + << "Not enough time for running the fuzz test " + << fuzz_tests_to_run[i] << " for " << test_time_limit; + } + if (!is_workdir_specified) { + env.workdir = base_workdir_path / fuzz_tests_to_run[i]; + } + const auto execution_id_path = + (base_workdir_path / + absl::StrCat(fuzz_tests_to_run[i], ".execution_id")) + .string(); + + bool is_resuming = false; + if (!is_workdir_specified && fuzztest_config.execution_id.has_value()) { + // Use the execution IDs to resume or skip tests. + const bool execution_id_matched = [&] { + if (!RemotePathExists(execution_id_path)) return false; + CHECK(!RemotePathIsDirectory(execution_id_path)); + std::string prev_execution_id; + CHECK_OK(RemoteFileGetContents(execution_id_path, prev_execution_id)); + return prev_execution_id == *fuzztest_config.execution_id; + }(); + if (execution_id_matched) { + // If execution IDs match but the previous coverage is missing, it means + // the test was previously finished, and we skip running for the test. + if (!RemotePathExists(WorkDir{env}.CoverageDirPath())) { + LOG(INFO) << "Skipping running the fuzz test " + << fuzz_tests_to_run[i]; + continue; + } + // If execution IDs match and the previous coverage exists, it means + // the same workflow got interrupted when running the test. So we resume + // the test. + is_resuming = true; + LOG(INFO) << "Resuming running the fuzz test " << fuzz_tests_to_run[i]; + } else { + // If the execution IDs mismatch, we start a new run. + is_resuming = false; + LOG(INFO) << "Starting a new run of the fuzz test " + << fuzz_tests_to_run[i]; + } + } + if (RemotePathExists(env.workdir) && !is_resuming) { + // This could be a workdir from a failed run that used a different version + // of the binary. We delete it so that we don't have to deal with + // the assumptions under which it is safe to reuse an old workdir. + CHECK_OK(RemotePathDelete(env.workdir, /*recursively=*/true)); + } + const WorkDir workdir{env}; + CHECK_OK(RemoteMkdir( + workdir.CoverageDirPath())); // Implicitly creates the workdir + + // Updating execution ID must be after creating the coverage dir. Otherwise + // if it fails to create coverage dir after updating execution ID, next + // attempt would skip this test. + if (!is_workdir_specified && fuzztest_config.execution_id.has_value() && + !is_resuming) { + CHECK_OK(RemoteFileSetContents(execution_id_path, + *fuzztest_config.execution_id)); + } + + absl::Cleanup clean_up_workdir = [is_workdir_specified, &env] { + if (!is_workdir_specified && !EarlyStopRequested()) { + CHECK_OK(RemotePathDelete(env.workdir, /*recursively=*/true)); + } + }; + + const std::filesystem::path fuzztest_db_path = + corpus_database_path / fuzz_tests_to_run[i]; + const std::filesystem::path regression_dir = + fuzztest_db_path / "regression"; + const std::filesystem::path coverage_dir = fuzztest_db_path / "coverage"; + + // Seed the fuzzing session with the latest coverage corpus and regression + // inputs from the previous fuzzing session. + if (!is_resuming) { + CHECK_OK(GenerateSeedCorpusFromConfig( + GetSeedCorpusConfig(env, regression_dir.c_str(), + fuzztest_config.replay_coverage_inputs + ? coverage_dir.c_str() + : ""), + env.binary_name, env.binary_hash)) + << "while generating the seed corpus"; + } + + if (!env.fuzztest_single_test_mode) { + // TODO: b/338217594 - Call the FuzzTest binary in a flag-agnostic way. + constexpr std::string_view kFuzzTestFuzzFlag = "--fuzz="; + constexpr std::string_view kFuzzTestReplayCorpusFlag = + "--replay_corpus="; + std::string_view test_selection_flag = fuzztest_config.only_replay + ? kFuzzTestReplayCorpusFlag + : kFuzzTestFuzzFlag; + env.binary = + absl::StrCat(binary, " ", test_selection_flag, fuzz_tests_to_run[i]); + } + + absl::Duration time_limit = fuzztest_config.GetTimeLimitPerTest(); + absl::Duration time_spent = absl::ZeroDuration(); + const std::string fuzzing_time_file = + std::filesystem::path(env.workdir) / "fuzzing_time"; + if (is_resuming && RemotePathExists(fuzzing_time_file)) { + time_spent = ReadFuzzingTime(fuzzing_time_file); + time_limit = std::max(time_limit - time_spent, absl::ZeroDuration()); + } + is_resuming = false; + + if (EarlyStopRequested()) { + LOG(INFO) << "Skipping test " << fuzz_tests_to_run[i] + << " because early stop requested."; + continue; + } + + LOG(INFO) << (fuzztest_config.only_replay ? "Replaying " : "Fuzzing ") + << fuzz_tests_to_run[i] << " for " << time_limit + << "\n\tTest binary: " << env.binary; + + const absl::Time start_time = absl::Now(); + ClearEarlyStopRequestAndSetStopTime(/*stop_time=*/start_time + time_limit); + PeriodicAction record_fuzzing_time = + RecordFuzzingTime(fuzzing_time_file, start_time - time_spent); + Fuzz(env, binary_info, pcs_file_path, callbacks_factory); + record_fuzzing_time.Nudge(); + record_fuzzing_time.Stop(); + + if (!stats_root_path.empty()) { + const auto stats_dir = stats_root_path / fuzz_tests_to_run[i]; + CHECK_OK(RemoteMkdir(stats_dir.c_str())); + CHECK_OK(RemoteFileRename( + workdir.FuzzingStatsPath(), + (stats_dir / absl::StrCat("fuzzing_stats_", execution_stamp)) + .c_str())); + } + + if (EarlyStopRequested()) { + LOG(INFO) << "Skip updating corpus database due to early stop requested."; + continue; + } + + // TODO(xinhaoyuan): Have a separate flag to skip corpus updating instead + // of checking whether workdir is specified or not. + if (fuzztest_config.only_replay || is_workdir_specified) continue; + + // Distill and store the coverage corpus. + Distill(env); + if (RemotePathExists(coverage_dir.c_str())) { + // In the future, we will store k latest coverage corpora for some k, but + // for now we only keep the latest one. + CHECK_OK(RemotePathDelete(coverage_dir.c_str(), /*recursively=*/true)); + } + CHECK_OK(RemoteMkdir(coverage_dir.c_str())); + std::vector distilled_corpus_files; + CHECK_OK(RemoteGlobMatch(workdir.DistilledCorpusFilePaths().AllShardsGlob(), + distilled_corpus_files)); + for (const std::string &corpus_file : distilled_corpus_files) { + const std::string file_name = + std::filesystem::path(corpus_file).filename(); + CHECK_OK( + RemoteFileRename(corpus_file, (coverage_dir / file_name).c_str())); + } + + // Deduplicate and update the crashing inputs. + CrashSummary crash_summary{fuzztest_config.binary_identifier, + fuzz_tests_to_run[i]}; + const std::filesystem::path crashing_dir = fuzztest_db_path / "crashing"; + absl::flat_hash_set crash_signatures = + PruneOldCrashesAndGetRemainingCrashSignatures( + crashing_dir, env, callbacks_factory, crash_summary); + DeduplicateAndStoreNewCrashes(crashing_dir, workdir, env.total_shards, + std::move(crash_signatures), crash_summary); + crash_summary.Report(&std::cerr); + } + + return EXIT_SUCCESS; +} + +int ListCrashIds(const Environment &env, + const fuzztest::internal::Configuration &target_config) { + CHECK(!env.list_crash_ids_file.empty()) + << "Need list_crash_ids_file to be set for listing crash IDs"; + CHECK_EQ(target_config.fuzz_tests_in_current_shard.size(), 1); + std::vector crash_paths; + // TODO: b/406003594 - move the path construction to a library. + const auto crash_dir = std::filesystem::path(target_config.corpus_database) / + target_config.binary_identifier / + target_config.fuzz_tests_in_current_shard[0] / + "crashing"; + if (RemotePathExists(crash_dir.string())) { + CHECK(RemotePathIsDirectory(crash_dir.string())) + << "Crash dir " << crash_dir << " in the corpus database " + << target_config.corpus_database << " is not a directory"; + crash_paths = + ValueOrDie(RemoteListFiles(crash_dir.string(), /*recursively=*/false)); + } + std::vector results; + results.reserve(crash_paths.size()); + for (const auto &crash_path : crash_paths) { + std::string crash_id = std::filesystem::path{crash_path}.filename(); + results.push_back(std::move(crash_id)); + } + CHECK_OK(RemoteFileSetContents(env.list_crash_ids_file, + absl::StrJoin(results, "\n"))); + return EXIT_SUCCESS; +} + +int ReplayCrash(const Environment &env, + const fuzztest::internal::Configuration &target_config, + CentipedeCallbacksFactory &callbacks_factory) { + CHECK(!env.crash_id.empty()) << "Need crash_id to be set for replay a crash"; + CHECK(target_config.fuzz_tests_in_current_shard.size() == 1) + << "Expecting exactly one test for replay_crash"; + // TODO: b/406003594 - move the path construction to a library. + const auto crash_dir = std::filesystem::path(target_config.corpus_database) / + target_config.binary_identifier / + target_config.fuzz_tests_in_current_shard[0] / + "crashing"; + const WorkDir workdir{env}; + SeedCorpusSource crash_corpus_source; + crash_corpus_source.dir_glob = crash_dir; + crash_corpus_source.num_recent_dirs = 1; + crash_corpus_source.individual_input_rel_glob = env.crash_id; + crash_corpus_source.sampled_fraction_or_count = 1.0f; + const SeedCorpusConfig crash_corpus_config = { + /*sources=*/{crash_corpus_source}, + /*destination=*/{ + /*dir_path=*/env.workdir, + /*shard_rel_glob=*/ + std::filesystem::path{workdir.CorpusFilePaths().AllShardsGlob()} + .filename(), + /*shard_index_digits=*/WorkDir::kDigitsInShardIndex, + /*num_shards=*/1}}; + CHECK_OK(GenerateSeedCorpusFromConfig(crash_corpus_config, env.binary_name, + env.binary_hash)); + Environment run_crash_env = env; + run_crash_env.load_shards_only = true; + return Fuzz(run_crash_env, {}, "", callbacks_factory); +} + +int ExportCrash(const Environment &env, + const fuzztest::internal::Configuration &target_config) { + CHECK(!env.crash_id.empty()) + << "Need crash_id to be set for exporting a crash"; + CHECK(!env.export_crash_file.empty()) + << "Need export_crash_file to be set for exporting a crash"; + CHECK(target_config.fuzz_tests_in_current_shard.size() == 1) + << "Expecting exactly one test for exporting a crash"; + // TODO: b/406003594 - move the path construction to a library. + const auto crash_dir = std::filesystem::path(target_config.corpus_database) / + target_config.binary_identifier / + target_config.fuzz_tests_in_current_shard[0] / + "crashing"; + std::string crash_contents; + const auto read_status = + RemoteFileGetContents((crash_dir / env.crash_id).c_str(), crash_contents); + if (!read_status.ok()) { + LOG(ERROR) << "Failed reading the crash " << env.crash_id << " from " + << crash_dir.c_str() << ": " << read_status; + return EXIT_FAILURE; + } + const auto write_status = + RemoteFileSetContents(env.export_crash_file, crash_contents); + if (!write_status.ok()) { + LOG(ERROR) << "Failed write the crash " << env.crash_id << " to " + << env.export_crash_file << ": " << write_status; + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + +} // namespace + +int CentipedeMain(const Environment &env, + CentipedeCallbacksFactory &callbacks_factory) { + ClearEarlyStopRequestAndSetStopTime(env.stop_at); + SetSignalHandlers(); + + if (!env.corpus_to_files.empty()) { + Centipede::CorpusToFiles(env, env.corpus_to_files); + return EXIT_SUCCESS; + } + + if (!env.crashes_to_files.empty()) { + const auto status = Centipede::CrashesToFiles(env, env.crashes_to_files); + if (status.ok()) return EXIT_SUCCESS; + LOG(ERROR) << "Got error when exporting crashes to files: " << status; + return EXIT_FAILURE; + } + + if (!env.for_each_blob.empty()) return ForEachBlob(env); + + if (!env.minimize_crash_file_path.empty()) { + ByteArray crashy_input; + ReadFromLocalFile(env.minimize_crash_file_path, crashy_input); + return MinimizeCrash(crashy_input, env, callbacks_factory); + } + + // Just export the corpus from a local dir and exit. + if (!env.corpus_from_files.empty()) { + Centipede::CorpusFromFiles(env, env.corpus_from_files); + return EXIT_SUCCESS; + } + + // Export the corpus from a local dir and then fuzz. + if (!env.corpus_dir.empty()) { + for (size_t i = 0; i < env.corpus_dir.size(); ++i) { + const auto &corpus_dir = env.corpus_dir[i]; + if (i > 0 || !env.first_corpus_dir_output_only) + Centipede::CorpusFromFiles(env, corpus_dir); + } + } + + if (env.distill) return Distill(env); + + // Create the local temporary dir once, before creating any threads. The + // temporary dir must typically exist before `CentipedeCallbacks` can be used. + const auto tmpdir = TemporaryLocalDirPath(); + CreateLocalDirRemovedAtExit(tmpdir); + + // Enter the update corpus database mode only if we have a binary to invoke + // and a corpus database to update. + // We don't update the corpus database for standalone binaries (i.e., when + // `env.has_input_wildcards` is true). + if (!env.binary.empty() && !env.has_input_wildcards) { + const auto serialized_target_config = [&]() -> absl::StatusOr { + // TODO: b/410051414 Use Centipede flags to pass necessary information + // instead of passing the entirely serialized Configuration once switched + // to the unified execution model. + if (!env.fuzztest_configuration.empty()) { + std::string result; + CHECK(absl::WebSafeBase64Unescape(env.fuzztest_configuration, &result)); + return result; + } + ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, env); + return scoped_callbacks.callbacks()->GetSerializedTargetConfig(); + }(); + CHECK_OK(serialized_target_config.status()); + if (!serialized_target_config->empty()) { + const auto target_config = fuzztest::internal::Configuration::Deserialize( + *serialized_target_config); + CHECK_OK(target_config.status()) + << "Failed to deserialize target configuration"; + if (!target_config->corpus_database.empty()) { + LOG_IF(FATAL, + env.list_crash_ids + env.replay_crash + env.export_crash > 1) + << "At most one of list_crash_ids/replay_crash/export_crash can " + "be set, but seeing list_crash_ids: " + << env.list_crash_ids << ", replay_crash: " << env.replay_crash + << ", export_crash: " << env.export_crash; + if (env.list_crash_ids) { + return ListCrashIds(env, *target_config); + } + if (env.replay_crash) { + return ReplayCrash(env, *target_config, callbacks_factory); + } + if (env.export_crash) { + return ExportCrash(env, *target_config); + } + + const auto time_limit_per_test = target_config->GetTimeLimitPerTest(); + CHECK(target_config->only_replay || + time_limit_per_test < absl::InfiniteDuration()) + << "Updating corpus database requires specifying time limit per " + "fuzz test."; + CHECK(time_limit_per_test >= absl::Seconds(1)) + << "Time limit per fuzz test must be at least 1 second."; + return UpdateCorpusDatabaseForFuzzTests(env, *target_config, + callbacks_factory); + } + } + } + + // Create the remote coverage dirs once, before creating any threads. + const auto coverage_dir = WorkDir{env}.CoverageDirPath(); + CHECK_OK(RemoteMkdir(coverage_dir)); + LOG(INFO) << "Coverage dir: " << coverage_dir + << "; temporary dir: " << tmpdir; + + std::string pcs_file_path; + BinaryInfo binary_info = PopulateBinaryInfoAndSavePCsIfNecessary( + env, callbacks_factory, pcs_file_path); + + if (env.analyze) return Analyze(env); + + return Fuzz(env, binary_info, pcs_file_path, callbacks_factory); +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/centipede_interface.h b/src/third_party/fuzztest/dist/centipede/centipede_interface.h new file mode 100644 index 00000000000..d6c5dfdd938 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/centipede_interface.h @@ -0,0 +1,37 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_CENTIPEDE_INTERFACE_H_ +#define THIRD_PARTY_CENTIPEDE_CENTIPEDE_INTERFACE_H_ + +#include "./centipede/centipede_callbacks.h" +#include "./centipede/environment.h" + +namespace fuzztest::internal { + +// Usage: +// class MyCentipedeCallbacks: public CentipedeCallbacks { ... } +// int main(int argc, char **argv) { +// InitGoogle(argv[0], &argc, &argv, /*remove_flags=*/true); +// fuzztest::internal::Environment env; // reads FLAGS. +// fuzztest::internal::DefaultCallbacksFactory +// callbacks_factory; return fuzztest::internal::CentipedeMain(env, +// callbacks_factory); +// } +int CentipedeMain(const Environment &env, + CentipedeCallbacksFactory &callbacks_factory); + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_CENTIPEDE_INTERFACE_H_ diff --git a/src/third_party/fuzztest/dist/centipede/centipede_main.cc b/src/third_party/fuzztest/dist/centipede/centipede_main.cc new file mode 100644 index 00000000000..8f6a71564f0 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/centipede_main.cc @@ -0,0 +1,30 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/base/nullability.h" +#include "./centipede/centipede_callbacks.h" +#include "./centipede/centipede_default_callbacks.h" +#include "./centipede/centipede_interface.h" +#include "./centipede/config_file.h" +#include "./centipede/environment_flags.h" + +int main(int argc, char** absl_nonnull argv) { + const auto runtime_state = fuzztest::internal::InitCentipede(argc, argv); + const auto env = fuzztest::internal::CreateEnvironmentFromFlags( + runtime_state->leftover_argv()); + fuzztest::internal::DefaultCallbacksFactory< + fuzztest::internal::CentipedeDefaultCallbacks> + callbacks; + return CentipedeMain(env, callbacks); +} diff --git a/src/third_party/fuzztest/dist/centipede/centipede_test.cc b/src/third_party/fuzztest/dist/centipede/centipede_test.cc new file mode 100644 index 00000000000..c66e7ab221b --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/centipede_test.cc @@ -0,0 +1,1231 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include // NOLINT +#include +#include +#include +#include +#include +#include // NOLINT: For thread::get_id() only. +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/nullability.h" +#include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/strings/str_cat.h" +#include "absl/time/time.h" +#include "./centipede/centipede_callbacks.h" +#include "./centipede/centipede_default_callbacks.h" +#include "./centipede/centipede_interface.h" +#include "./centipede/environment.h" +#include "./centipede/feature.h" +#include "./centipede/mutation_input.h" +#include "./centipede/runner_result.h" +#include "./centipede/stop.h" +#include "./centipede/util.h" +#include "./centipede/workdir.h" +#include "./common/defs.h" +#include "./common/hash.h" +#include "./common/logging.h" +#include "./common/test_util.h" + +namespace fuzztest::internal { +namespace { + +using ::testing::AllOf; +using ::testing::Contains; +using ::testing::Each; +using ::testing::HasSubstr; +using ::testing::IsEmpty; +using ::testing::IsSupersetOf; +using ::testing::Le; +using ::testing::Not; +using ::testing::SizeIs; + +// A mock for CentipedeCallbacks. +class CentipedeMock : public CentipedeCallbacks { + public: + CentipedeMock(const Environment &env) : CentipedeCallbacks(env) {} + // Doesn't execute anything + // Sets `batch_result.results()` based on the values of `inputs`: + // Collects various stats about the inputs, to be checked in tests. + bool Execute(std::string_view binary, const std::vector &inputs, + BatchResult &batch_result) override { + batch_result.results().clear(); + // For every input, we create a 256-element array `counters`, where + // i-th element is the number of bytes with the value 'i' in the input. + // `counters` is converted to FeatureVec and added to + // `batch_result.results()`. + for (auto &input : inputs) { + ByteArray counters(256); + for (uint8_t byte : input) { + counters[byte]++; + } + FeatureVec features; + for (size_t i = 0; i < counters.size(); ++i) { + if (counters[i] == 0) continue; + features.push_back(feature_domains::k8bitCounters.ConvertToMe( + Convert8bitCounterToNumber(i, counters[i]))); + } + batch_result.results().emplace_back(ExecutionResult{features}); + if (input.size() == 1) { + observed_1byte_inputs_.insert(input[0]); + } else { + EXPECT_EQ(input.size(), 2); + uint16_t input2bytes = (input[0] << 8) | input[1]; + observed_2byte_inputs_.insert(input2bytes); + } + num_inputs_++; + } + num_executions_++; + max_batch_size_ = std::max(max_batch_size_, inputs.size()); + min_batch_size_ = std::min(min_batch_size_, inputs.size()); + return true; + } + // Makes predictable mutants: + // first 255 mutations are 1-byte sequences {1} ... {255}. + // (the value {0} is produced by the default GetSeeds()). + // Next 65536 mutations are 2-byte sequences {0,0} ... {255, 255}. + // Then repeat 2-byte sequences. + std::vector Mutate(const std::vector &inputs, + size_t num_mutants) override { + std::vector mutants; + mutants.reserve(num_mutants); + for (size_t i = 0; i < num_mutants; ++i) { + num_mutations_++; + if (num_mutations_ < 256) { + mutants.push_back({static_cast(num_mutations_)}); + continue; + } + uint8_t byte0 = (num_mutations_ - 256) / 256; + uint8_t byte1 = (num_mutations_ - 256) % 256; + mutants.push_back({byte0, byte1}); + } + return mutants; + } + + absl::flat_hash_set observed_1byte_inputs_; + absl::flat_hash_set observed_2byte_inputs_; + + size_t num_executions_ = 0; + size_t num_inputs_ = 0; + size_t num_mutations_ = 0; + size_t max_batch_size_ = 0; + size_t min_batch_size_ = -1; +}; + +// Returns the same CentipedeCallbacks object every time, never destroys it. +class MockFactory : public CentipedeCallbacksFactory { + public: + explicit MockFactory(CentipedeCallbacks &cb) : cb_(cb) {} + CentipedeCallbacks *absl_nonnull create(const Environment &env) override { + return &cb_; + } + void destroy(CentipedeCallbacks *cb) override { EXPECT_EQ(cb, &cb_); } + + private: + CentipedeCallbacks &cb_; +}; + +TEST(Centipede, MockTest) { + TempCorpusDir tmp_dir{test_info_->name()}; + Environment env; + env.log_level = 0; // Disable most of the logging in the test. + env.workdir = tmp_dir.path(); + env.num_runs = 100000; // Enough to run through all 1- and 2-byte inputs. + env.batch_size = 7; // Just some small number. + env.require_pc_table = false; // No PC table here. + CentipedeMock mock(env); + MockFactory factory(mock); + CentipedeMain(env, factory); // Run fuzzing with num_runs inputs. + EXPECT_EQ(mock.num_inputs_, env.num_runs + 1); // num_runs and one dummy. + EXPECT_EQ(mock.num_mutations_, env.num_runs); + EXPECT_EQ(mock.max_batch_size_, env.batch_size); + EXPECT_EQ(mock.min_batch_size_, 1); // 1 for dummy. + EXPECT_EQ(tmp_dir.CountElementsInCorpusFile(0), 512); + EXPECT_EQ(mock.observed_1byte_inputs_.size(), 256); // all 1-byte seqs. + EXPECT_EQ(mock.observed_2byte_inputs_.size(), 65536); // all 2-byte seqs. +} + +static size_t CountFilesInDir(std::string_view dir_path) { + const std::filesystem::directory_iterator dir_iter{dir_path}; + return std::distance(std::filesystem::begin(dir_iter), + std::filesystem::end(dir_iter)); +} + +TEST(Centipede, ReadFirstCorpusDir) { + TempDir workdir_1{test_info_->name(), "workdir_1"}; + TempDir workdir_2{test_info_->name(), "workdir_2"}; + TempDir corpus_dir{test_info_->name(), "corpus"}; + Environment env; + env.log_level = 0; // Disable most of the logging in the test. + env.workdir = workdir_1.path(); + env.num_runs = 100000; // Enough to run through all 1- and 2-byte inputs. + env.batch_size = 7; // Just some small number. + env.require_pc_table = false; // No PC table here. + env.corpus_dir.push_back(corpus_dir.path()); + + // Need to wrap each CentipedeMain in a scope to make sure the shmem is + // released before the next call. Otherwise it may fail in MacOS. + { + // First, generate corpus files in corpus_dir. + CentipedeMock mock_1(env); + MockFactory factory_1(mock_1); + CentipedeMain(env, factory_1); + ASSERT_EQ(mock_1.observed_1byte_inputs_.size(), 256); // all 1-byte seqs. + ASSERT_EQ(mock_1.observed_2byte_inputs_.size(), 65536); // all 2-byte seqs. + ASSERT_EQ(CountFilesInDir(env.corpus_dir[0]), + 512); // All 1-byte and 2-byte inputs. + } + + { + // Second, run without fuzzing using the same corpus_dir. + env.workdir = workdir_2.path(); + env.num_runs = 0; + CentipedeMock mock_2(env); + MockFactory factory_2(mock_2); + CentipedeMain(env, factory_2); + // Should observe all inputs in corpus_dir, plus the dummy seed input {0}. + EXPECT_EQ(mock_2.num_inputs_, 513); + } +} + +TEST(Centipede, DoesNotReadFirstCorpusDirIfOutputOnly) { + TempDir workdir_1{test_info_->name(), "workdir_1"}; + TempDir workdir_2{test_info_->name(), "workdir_2"}; + TempDir corpus_dir{test_info_->name(), "corpus"}; + Environment env; + env.log_level = 0; // Disable most of the logging in the test. + env.workdir = workdir_1.path(); + env.num_runs = 100000; // Enough to run through all 1- and 2-byte inputs. + env.batch_size = 7; // Just some small number. + env.require_pc_table = false; // No PC table here. + env.corpus_dir.push_back(corpus_dir.path()); + + { + // First, generate corpus files in corpus_dir. + CentipedeMock mock_1(env); + MockFactory factory_1(mock_1); + CentipedeMain(env, factory_1); + ASSERT_EQ(mock_1.observed_1byte_inputs_.size(), 256); // all 1-byte seqs. + ASSERT_EQ(mock_1.observed_2byte_inputs_.size(), 65536); // all 2-byte seqs. + ASSERT_EQ(CountFilesInDir(env.corpus_dir[0]), + 512); // All 1-byte and 2-byte inputs. + } + + { + // Second, run without fuzzing using the same corpus_dir, but as + // output-only. + env.workdir = workdir_2.path(); + env.num_runs = 0; + env.first_corpus_dir_output_only = true; + CentipedeMock mock_2(env); + MockFactory factory_2(mock_2); + CentipedeMain(env, factory_2); + // Should observe no inputs other than the seed input {0}. + EXPECT_EQ(mock_2.num_inputs_, 1); + } +} + +TEST(Centipede, SkipsOutputIfFirstCorpusDirIsEmptyPath) { + TempCorpusDir tmp_dir{test_info_->name()}; + Environment env; + env.log_level = 0; // Disable most of the logging in the test. + env.workdir = tmp_dir.path(); + env.num_runs = 100000; // Enough to run through all 1- and 2-byte inputs. + env.batch_size = 7; // Just some small number. + env.require_pc_table = false; // No PC table here. + // Set the first corpus_dir entry to empty path to skip output. + env.corpus_dir.push_back(""); + env.corpus_dir.push_back(tmp_dir.CreateSubdir("cd")); + + CentipedeMock mock(env); + MockFactory factory(mock); + CentipedeMain(env, factory); // Run fuzzing with num_runs inputs. + EXPECT_EQ(mock.observed_1byte_inputs_.size(), 256); // all 1-byte seqs. + EXPECT_EQ(mock.observed_2byte_inputs_.size(), 65536); // all 2-byte seqs. + // No output should be in other entires of corpus_dir. + EXPECT_EQ(CountFilesInDir(env.corpus_dir[1]), 0); +} + +// Tests fuzzing and distilling in multiple shards. +TEST(Centipede, ShardsAndDistillTest) { + TempCorpusDir tmp_dir{test_info_->name()}; + Environment env; + env.workdir = tmp_dir.path(); + env.log_level = 0; // Disable most of the logging in the test. + size_t combined_num_runs = 100000; // Enough to run through all inputs. + env.total_shards = 20; + env.num_runs = combined_num_runs / env.total_shards; + env.require_pc_table = false; // No PC table here. + + // Create two empty dirs and add them to corpus_dir. + env.corpus_dir.push_back(tmp_dir.CreateSubdir("cd1")); + env.corpus_dir.push_back(tmp_dir.CreateSubdir("cd2")); + + CentipedeMock mock(env); + // First round of runs: do the actual fuzzing, compute the features. + size_t max_shard_size = 0; + for (size_t shard_index = 0; shard_index < env.total_shards; shard_index++) { + env.my_shard_index = shard_index; + MockFactory factory(mock); + CentipedeMain(env, factory); // Run fuzzing in shard `shard_index`. + auto corpus_size = tmp_dir.CountElementsInCorpusFile(shard_index); + // Every byte should be present at least once. + // With 2-byte inputs, we get at least 128 inputs covering 256 features. + EXPECT_GT(corpus_size, 128); + max_shard_size = std::max(max_shard_size, corpus_size); + } + EXPECT_EQ(mock.observed_1byte_inputs_.size(), 256); // all 1-byte seqs. + EXPECT_EQ(mock.observed_2byte_inputs_.size(), 65536); // all 2-byte seqs. + + EXPECT_GT(CountFilesInDir(env.corpus_dir[0]), 128); + EXPECT_EQ(CountFilesInDir(env.corpus_dir[1]), 0); + + // Second round of runs. Don't fuzz, only distill. + // Don't distill in the last one to test the flag behaviour. + env.distill = true; + env.num_threads = env.total_shards - 1; + env.my_shard_index = 0; + // Empty the corpus_dir[0] + std::filesystem::remove_all(env.corpus_dir[0]); + std::filesystem::create_directory(env.corpus_dir[0]); + MockFactory factory(mock); + CentipedeMain(env, factory); // Run distilling in shard `shard_index`. + EXPECT_EQ(CountFilesInDir(env.corpus_dir[0]), 0); + size_t distilled_size = 0; + for (size_t shard_index = 0; shard_index < env.total_shards; shard_index++) { + SCOPED_TRACE(absl::StrCat("Shard ", shard_index)); + const auto shard_distilled_size = + tmp_dir.CountElementsInCorpusFile(shard_index, "distilled-."); + if (shard_index == env.total_shards - 1) { + // Didn't distill in the last shard. + EXPECT_EQ(shard_distilled_size, 0); + } + distilled_size += shard_distilled_size; + } + // Distillation is expected to find more inputs than any individual shard. + EXPECT_GT(distilled_size, max_shard_size); + // And since we are expecting 512 features, with 2-byte inputs, + // we get at least 512/2 corpus elements after distillation. + EXPECT_GT(distilled_size, 256); +} + +// Tests --input_filter. test_input_filter filters out inputs with 'b' in them. +TEST(Centipede, InputFilter) { + TempCorpusDir tmp_dir{test_info_->name()}; + Environment env; + env.workdir = tmp_dir.path(); + env.num_runs = 256; // Enough to run through all 1- byte inputs. + env.log_level = 0; // Disable most of the logging in the test. + env.require_pc_table = false; // No PC table here. + // Add %f so that test_input_filter doesn't need to be linked with forkserver. + env.input_filter = "%f" + std::string{GetDataDependencyFilepath( + "centipede/testing/test_input_filter")}; + CentipedeMock mock(env); + MockFactory factory(mock); + CentipedeMain(env, factory); // Run fuzzing. + auto corpus = tmp_dir.GetCorpus(0); + std::set corpus_set(corpus.begin(), corpus.end()); + EXPECT_FALSE(corpus_set.count({'b'})); + EXPECT_TRUE(corpus_set.count({'a'})); + EXPECT_TRUE(corpus_set.count({'c'})); +} + +// Callbacks for MutateViaExternalBinary test. +class MutateCallbacks : public CentipedeCallbacks { + public: + explicit MutateCallbacks(const Environment &env) : CentipedeCallbacks(env) {} + // Will not be called. + bool Execute(std::string_view binary, const std::vector &inputs, + BatchResult &batch_result) override { + CHECK(false); + return false; + } + + // Will not be called. + std::vector Mutate(const std::vector &inputs, + size_t num_mutants) override { + CHECK(false); + } + + // Redeclare a protected member function as public so the tests can call it. + using CentipedeCallbacks::MutateViaExternalBinary; +}; + +// Maintains `TemporaryLocalDirPath()` during the lifetime. +// +// Some parts of Centipede rely on `TemporaryLocalDirPath()` being set up as a +// global resource. Tests that exercise such parts of Centipede should use this +// fixture. +// +// TODO(b/391433873): Get rid of this once the design of +// `TemporaryLocalDirPath()` is revisited. +class CentipedeWithTemporaryLocalDir : public testing::Test { + public: + CentipedeWithTemporaryLocalDir() { + std::filesystem::path tmp_dir = TemporaryLocalDirPath(); + std::filesystem::remove_all(tmp_dir); + std::filesystem::create_directory(tmp_dir); + } + + ~CentipedeWithTemporaryLocalDir() override { + std::filesystem::remove_all(TemporaryLocalDirPath()); + } +}; + +TEST_F(CentipedeWithTemporaryLocalDir, MutateViaExternalBinary) { + // This binary contains a test-friendly custom mutator. + const std::string binary_with_custom_mutator = + GetDataDependencyFilepath("centipede/testing/test_fuzz_target"); + // This binary does not contain a custom mutator. + const std::string binary_without_custom_mutator = + GetDataDependencyFilepath("centipede/testing/abort_fuzz_target"); + // Mutate a couple of different inputs. + std::vector inputs = {{0, 1, 2}, {3, 4}}; + // The custom mutator in the test binary will revert the order of bytes + // and sometimes add a number in [100-107) at the end. + // Periodically, the custom mutator will fall back to LLVMFuzzerMutate, + // which in turn will sometimes shrink the inputs. + std::vector some_of_expected_mutants = { + // Reverted inputs, sometimes with an extra byte at the end. + {2, 1, 0}, + {2, 1, 0, 100}, + {2, 1, 0, 101}, + {2, 1, 0, 102}, + {4, 3}, + {4, 3, 103}, + {4, 3, 104}, + {4, 3, 105}, + // Shrunk inputs. + {0, 1}, + {4}}; + + std::vector expected_crossover_mutants = { + // Crossed-over mutants. + {0, 1, 2, 42, 3, 4}, + {3, 4, 42, 0, 1, 2}, + }; + + auto all_expected_mutants = some_of_expected_mutants; + all_expected_mutants.insert(all_expected_mutants.end(), + expected_crossover_mutants.begin(), + expected_crossover_mutants.end()); + + // Test with crossover enabled (default). + { + Environment env; + MutateCallbacks callbacks(env); + { + const MutationResult result = callbacks.MutateViaExternalBinary( + binary_without_custom_mutator, + GetMutationInputRefsFromDataInputs(inputs), 1); + EXPECT_EQ(result.exit_code(), EXIT_SUCCESS); + EXPECT_FALSE(result.has_custom_mutator()); + } + + { + const MutationResult result = callbacks.MutateViaExternalBinary( + binary_with_custom_mutator, + GetMutationInputRefsFromDataInputs(inputs), 10000); + EXPECT_EQ(result.exit_code(), EXIT_SUCCESS); + EXPECT_TRUE(result.has_custom_mutator()); + EXPECT_THAT(result.mutants(), AllOf(IsSupersetOf(all_expected_mutants), + Each(Not(IsEmpty())))); + } + } + + // Test with a max_len of 10 + { + Environment env; + env.max_len = 10; + MutateCallbacks callbacks(env); + const MutationResult result = callbacks.MutateViaExternalBinary( + binary_with_custom_mutator, GetMutationInputRefsFromDataInputs(inputs), + 10000); + EXPECT_EQ(result.exit_code(), EXIT_SUCCESS); + EXPECT_TRUE(result.has_custom_mutator()); + EXPECT_THAT(result.mutants(), AllOf(IsSupersetOf(all_expected_mutants), + Each(Not(IsEmpty())))); + EXPECT_THAT(result.mutants(), + AllOf(IsSupersetOf(all_expected_mutants), Each(Not(IsEmpty())), + // The byte_array_mutator may insert up to 20 bytes to an + // input, which may push the size over the max_len. + Each(SizeIs(Le(30))))); + } + + // Test with crossover disabled. + { + Environment env_no_crossover; + env_no_crossover.crossover_level = 0; + MutateCallbacks callbacks_no_crossover(env_no_crossover); + + const MutationResult result = + callbacks_no_crossover.MutateViaExternalBinary( + binary_with_custom_mutator, + GetMutationInputRefsFromDataInputs(inputs), 10000); + // Must contain normal mutants, but not the ones from crossover. + EXPECT_THAT(result.mutants(), IsSupersetOf(some_of_expected_mutants)); + for (const auto &crossover_mutant : expected_crossover_mutants) { + EXPECT_THAT(result.mutants(), Not(Contains(crossover_mutant))); + } + } +} + +// A mock for MergeFromOtherCorpus test. +class MergeMock : public CentipedeCallbacks { + public: + explicit MergeMock(const Environment &env) : CentipedeCallbacks(env) {} + + // Doesn't execute anything. + // All inputs are 1-byte long. + // For an input {X}, the feature output is {X}. + bool Execute(std::string_view binary, const std::vector &inputs, + BatchResult &batch_result) override { + batch_result.results().resize(inputs.size()); + for (size_t i = 0, n = inputs.size(); i < n; ++i) { + CHECK_EQ(inputs[i].size(), 1); + batch_result.results()[i].mutable_features() = {inputs[i][0]}; + } + return true; + } + + // Every consecutive mutation is {number_of_mutations_} (starting from 1). + std::vector Mutate(const std::vector &inputs, + size_t num_mutants) override { + std::vector mutants{num_mutants}; + for (auto &mutant : mutants) { + mutant.resize(1); + mutant[0] = ++number_of_mutations_; + } + return mutants; + } + + void Reset() { number_of_mutations_ = 0; } + + private: + size_t number_of_mutations_ = 0; +}; + +TEST(Centipede, MergeFromOtherCorpus) { + using Corpus = std::vector; + + // Set up the workdir, create a 2-shard corpus with 3 inputs plus the seed {0} + // each. + TempCorpusDir work_tmp_dir{test_info_->name(), "workdir"}; + Environment env; + env.workdir = work_tmp_dir.path(); + env.num_runs = 3; // Just a few runs. + env.require_pc_table = false; // No PC table here. + MergeMock mock(env); + MockFactory factory(mock); + for (env.my_shard_index = 0; env.my_shard_index < 2; ++env.my_shard_index) { + CentipedeMain(env, factory); + } + CentipedeMain(env, factory); + EXPECT_EQ(work_tmp_dir.GetCorpus(0), Corpus({{0}, {1}, {2}, {3}})); + EXPECT_EQ(work_tmp_dir.GetCorpus(1), Corpus({{0}, {4}, {5}, {6}})); + + // Set up another workdir, create a 2-shard corpus there, with 4 inputs plus + // the seed {0} each. + TempCorpusDir merge_tmp_dir(test_info_->name(), "merge_from"); + Environment merge_env; + merge_env.workdir = merge_tmp_dir.path(); + merge_env.num_runs = 4; + merge_env.require_pc_table = false; // No PC table here. + mock.Reset(); + for (merge_env.my_shard_index = 0; merge_env.my_shard_index < 2; + + ++merge_env.my_shard_index) { + CentipedeMain(merge_env, factory); + } + EXPECT_EQ(merge_tmp_dir.GetCorpus(0), Corpus({{0}, {1}, {2}, {3}, {4}})); + EXPECT_EQ(merge_tmp_dir.GetCorpus(1), Corpus({{0}, {5}, {6}, {7}, {8}})); + + // Merge shards of `merge_env` into shards of `env`. + // Shard 0 will receive one extra input: {4} + // Shard 1 will receive two extra inputs: {7}, {8} + env.merge_from = merge_tmp_dir.path(); + env.num_runs = 0; + for (env.my_shard_index = 0; env.my_shard_index < 2; ++env.my_shard_index) { + CentipedeMain(env, factory); + } + EXPECT_EQ(work_tmp_dir.GetCorpus(0), Corpus({{0}, {1}, {2}, {3}, {4}})); + EXPECT_EQ(work_tmp_dir.GetCorpus(1), Corpus({{0}, {4}, {5}, {6}, {7}, {8}})); +} + +// A mock for FunctionFilter test. +class FunctionFilterMock : public CentipedeCallbacks { + public: + explicit FunctionFilterMock(const Environment &env) + : CentipedeCallbacks(env) { + std::vector seed_inputs; + const size_t num_seeds_available = GetSeeds(/*num_seeds=*/1, seed_inputs); + CHECK_EQ(num_seeds_available, 1) << "Default seeds must have size one."; + CHECK_EQ(seed_inputs.size(), 1) << "Default seeds must have size one."; + seed_inputs_.insert(seed_inputs.begin(), seed_inputs.end()); + } + + // Executes the target in the normal way. + bool Execute(std::string_view binary, const std::vector &inputs, + BatchResult &batch_result) override { + return ExecuteCentipedeSancovBinaryWithShmem(env_.binary, inputs, + batch_result) == EXIT_SUCCESS; + } + + // Sets the inputs to one of 3 pre-defined values. + std::vector Mutate(const std::vector &inputs, + size_t num_mutants) override { + for (auto &input : inputs) { + if (!seed_inputs_.contains(input.data)) { + observed_inputs_.insert(input.data); + } + } + std::vector mutants; + mutants.reserve(num_mutants); + for (size_t i = 0; i < num_mutants; ++i) { + mutants.push_back(GetMutant(++number_of_mutations_)); + } + return mutants; + } + + // Returns one of 3 pre-defined values, that trigger different code paths in + // the test target. + static ByteArray GetMutant(size_t idx) { + const char *mutants[3] = {"func1", "func2-A", "foo"}; + const char *mutant = mutants[idx % 3]; + return {mutant, mutant + strlen(mutant)}; + } + + // Seed inputs generated from GetSeeds(). + absl::flat_hash_set seed_inputs_; + // Set of inputs observed by Mutate(), except for seed inputs. + absl::flat_hash_set observed_inputs_; + + private: + size_t number_of_mutations_ = 0; +}; + +// Runs a short fuzzing session with the provided `function_filter`. +// Returns a sorted array of observed inputs. +static std::vector RunWithFunctionFilter( + std::string_view function_filter, const TempDir &tmp_dir) { + Environment env; + env.workdir = tmp_dir.path(); + env.seed = 1; // make the runs predictable. + env.num_runs = 100; + env.batch_size = 10; + env.binary = GetDataDependencyFilepath("centipede/testing/test_fuzz_target"); + env.coverage_binary = env.binary; + // Must symbolize in order for the filter to work. + env.symbolizer_path = GetLLVMSymbolizerPath(); + env.objdump_path = GetObjDumpPath(); + env.log_level = 0; + env.function_filter = function_filter; + FunctionFilterMock mock(env); + MockFactory factory(mock); + CentipedeMain(env, factory); + LOG(INFO) << mock.observed_inputs_.size(); + std::vector res(mock.observed_inputs_.begin(), + mock.observed_inputs_.end()); + std::sort(res.begin(), res.end()); + return res; +} + +// Tests --function_filter. +TEST(Centipede, FunctionFilter) { + // Run with empty function filter. + { + TempDir tmp_dir{test_info_->name(), "none"}; + auto observed_empty = RunWithFunctionFilter("", tmp_dir); + ASSERT_EQ(observed_empty.size(), 3); + } + + // Run with a one-function filter + { + TempDir tmp_dir{test_info_->name(), "single"}; + auto observed_single = RunWithFunctionFilter("SingleEdgeFunc", tmp_dir); + ASSERT_EQ(observed_single.size(), 1); + EXPECT_EQ(observed_single[0], FunctionFilterMock::GetMutant(0)); + } + + // Run with a two-function filter. + { + TempDir tmp_dir{test_info_->name(), "single_multi"}; + auto observed_both = + RunWithFunctionFilter("SingleEdgeFunc,MultiEdgeFunc", tmp_dir); + ASSERT_EQ(observed_both.size(), 2); + EXPECT_EQ(observed_both[0], FunctionFilterMock::GetMutant(0)); + EXPECT_EQ(observed_both[1], FunctionFilterMock::GetMutant(1)); + } +} + +struct Crash { + std::string binary; + unsigned char input = 0; + std::string description; + std::string signature; +}; + +// A mock for ExtraBinaries test. +class ExtraBinariesMock : public CentipedeCallbacks { + public: + explicit ExtraBinariesMock(const Environment &env, std::vector crashes) + : CentipedeCallbacks(env), crashes_(std::move(crashes)) {} + + // Doesn't execute anything. + // On certain combinations of {binary,input} returns false. + bool Execute(std::string_view binary, const std::vector &inputs, + BatchResult &batch_result) override { + bool res = true; + for (const auto &input : inputs) { + if (input.size() != 1) continue; + for (const Crash &crash : crashes_) { + if (binary == crash.binary && input[0] == crash.input) { + batch_result.failure_description() = crash.description; + batch_result.failure_signature() = crash.signature; + res = false; + } + } + } + batch_result.results().resize(inputs.size()); + return res; + } + + // Sets the mutants to different 1-byte values. + std::vector Mutate(const std::vector &inputs, + size_t num_mutants) override { + std::vector mutants{num_mutants}; + for (auto &mutant : mutants) { + mutant.resize(1); + mutant[0] = ++number_of_mutations_; + } + return mutants; + } + + private: + size_t number_of_mutations_ = 0; + std::vector crashes_; +}; + +struct FileAndContents { + std::string file; + std::string contents; + + bool operator==(const FileAndContents &other) const { + return file == other.file && contents == other.contents; + } + + template + friend void AbslStringify(Sink &sink, const FileAndContents &f) { + absl::Format(&sink, "FileAndContents{%s, \"%s\"}", f.file, f.contents); + } +}; + +MATCHER_P(HasFilesWithContents, expected_files_and_contents, "") { + const std::string &dir_path = arg; + std::vector files_and_contents; + for (const auto &dir_ent : std::filesystem::directory_iterator(dir_path)) { + auto file_and_contents = FileAndContents{dir_ent.path().filename()}; + ReadFromLocalFile(dir_ent.path().c_str(), file_and_contents.contents); + files_and_contents.push_back(std::move(file_and_contents)); + } + return ExplainMatchResult(expected_files_and_contents, files_and_contents, + result_listener); +} + +// Tests --extra_binaries. +// Executes one main binary (--binary) and 3 extra ones (--extra_binaries). +// Expects the main binary and two extra ones to generate one crash each. +TEST(Centipede, ExtraBinaries) { + TempDir tmp_dir{test_info_->name()}; + Environment env; + env.workdir = tmp_dir.path(); + env.num_runs = 100; + env.batch_size = 10; + env.log_level = 1; + env.binary = "b1"; + env.extra_binaries = {"b2", "b3", "b4"}; + env.require_pc_table = false; // No PC table here. + ExtraBinariesMock mock(env, {Crash{"b1", 10, "b1-crash", "b1-sig"}, + Crash{"b2", 30, "b2-crash", "b2-sig"}, + Crash{"b3", 50, "b3-crash", "b3-sig"}}); + MockFactory factory(mock); + CentipedeMain(env, factory); + + // Verify that we see the expected crashes. + // The "crashes" dir must contain 3 crashy inputs, one for each binary. + auto crashes_dir_path = WorkDir{env}.CrashReproducerDirPaths().MyShard(); + ASSERT_TRUE(std::filesystem::exists(crashes_dir_path)) + << VV(crashes_dir_path); + EXPECT_THAT(crashes_dir_path, + HasFilesWithContents(testing::UnorderedElementsAre( + FileAndContents{Hash({10}), AsString({10})}, + FileAndContents{Hash({30}), AsString({30})}, + FileAndContents{Hash({50}), AsString({50})}))); + + // Verify that we see the expected crash metadata. + // The "crash-metadata" dir must contain 3 crash metadata files, one for each + // crashy input. + auto crash_metadata_dir_path = WorkDir{env}.CrashMetadataDirPaths().MyShard(); + ASSERT_TRUE(std::filesystem::exists(crash_metadata_dir_path)) + << VV(crash_metadata_dir_path); + EXPECT_THAT( + crash_metadata_dir_path, + HasFilesWithContents(testing::UnorderedElementsAre( + FileAndContents{absl::StrCat(Hash({10}), ".desc"), "b1-crash"}, + FileAndContents{absl::StrCat(Hash({10}), ".sig"), "b1-sig"}, + FileAndContents{absl::StrCat(Hash({30}), ".desc"), "b2-crash"}, + FileAndContents{absl::StrCat(Hash({30}), ".sig"), "b2-sig"}, + FileAndContents{absl::StrCat(Hash({50}), ".desc"), "b3-crash"}, + FileAndContents{absl::StrCat(Hash({50}), ".sig"), "b3-sig"}))); +} + +// A mock for UndetectedCrashingInput test. +class UndetectedCrashingInputMock : public CentipedeCallbacks { + public: + explicit UndetectedCrashingInputMock(const Environment &env, + size_t crashing_input_idx) + : CentipedeCallbacks{env}, crashing_input_idx_{crashing_input_idx} { + CHECK_LE(crashing_input_idx_, std::numeric_limits::max()); + } + + // Doesn't execute anything. + // Crash when 0th char of input to binary b1 equals `crashing_input_idx_`, but + // only on 1st exec. + bool Execute(std::string_view binary, const std::vector &inputs, + BatchResult &batch_result) override { + batch_result.ClearAndResize(inputs.size()); + bool res = true; + if (!first_pass_) { + num_inputs_triaged_ += inputs.size(); + } + for (const auto &input : inputs) { + CHECK_EQ(input.size(), 1); // By construction in `Mutate()`. + // The contents of each mutant is its sequential number. + if (input[0] == crashing_input_idx_) { + if (first_pass_) { + first_pass_ = false; + crashing_input_ = input; + // TODO(b/274705740): `num_outputs_read()` is the number of outputs + // that Centipede engine *expects* to have been read from *the + // current BatchResult* by the *particular* implementation of + // `CentipedeCallbacks` (and `DefaultCentipedeCallbacks` fits the + // bill). `fuzztest::internal::ReportCrash()` then uses this value as + // a hint for the crashing input's index, and in our case saves the + // batch's inputs from 0 up to and including the crasher to a subdir. + // See the bug for details. All of this is horribly convoluted and + // misplaced here. Implement a cleaner solution. + batch_result.num_outputs_read() = + crashing_input_idx_ % env_.batch_size; + res = false; + } + } + } + return res; + } + + // Sets the mutants to different 1-byte values. + std::vector Mutate(const std::vector &inputs, + size_t num_mutants) override { + std::vector mutants; + mutants.reserve(num_mutants); + for (size_t i = 0; i < num_mutants; ++i) { + // The contents of each mutant is simply its sequential number. + mutants.push_back({static_cast(curr_input_idx_++)}); + } + return mutants; + } + + // Gets the input that triggered the crash. + ByteArray crashing_input() const { return crashing_input_; } + + size_t num_inputs_triaged() const { return num_inputs_triaged_; } + + private: + const size_t crashing_input_idx_; + size_t curr_input_idx_ = 0; + size_t num_inputs_triaged_ = 0; + ByteArray crashing_input_ = {}; + bool first_pass_ = true; +}; + +// Test for preserving a crashing batch when 1-by-1 exec fails to reproduce. +// Executes one main binary (--binary). +// Expects the binary to crash once and 1-by-1 reproduction to fail. +TEST(Centipede, UndetectedCrashingInput) { + constexpr size_t kNumBatches = 7; + constexpr size_t kBatchSize = 11; + constexpr size_t kCrashingInputIdxInBatch = kBatchSize / 2; + constexpr size_t kCrashingInputIdx = + (kNumBatches / 2) * kBatchSize + kCrashingInputIdxInBatch; + + LOG(INFO) << VV(kNumBatches) << VV(kBatchSize) + << VV(kCrashingInputIdxInBatch) VV(kCrashingInputIdx); + + TempDir temp_dir{test_info_->name()}; + Environment env; + env.workdir = temp_dir.path(); + env.num_runs = kBatchSize * kNumBatches; + env.batch_size = kBatchSize; + // No real binary: prevent attempts by Centipede to read a PCtable from it. + env.require_pc_table = false; + env.exit_on_crash = true; + + { + UndetectedCrashingInputMock mock(env, kCrashingInputIdx); + MockFactory factory(mock); + CentipedeMain(env, factory); + + // Verify that we see the expected inputs from the batch. + // The "crashes/unreliable_batch-" dir must contain all inputs from + // the batch that were executing during the session. We simply verify the + // number of saved inputs matches the number of executed inputs. + const auto crashing_input_hash = Hash(mock.crashing_input()); + const auto crashes_dir_path = + std::filesystem::path{ + WorkDir{env}.CrashReproducerDirPaths().MyShard()} / + absl::StrCat("crashing_batch-", crashing_input_hash); + EXPECT_TRUE(std::filesystem::exists(crashes_dir_path)) << crashes_dir_path; + std::vector found_crash_file_names; + for (auto const &dir_ent : + std::filesystem::directory_iterator(crashes_dir_path)) { + found_crash_file_names.push_back(dir_ent.path().filename()); + } + // TODO(ussuri): Verify exact names/contents of the files, not just count. + EXPECT_EQ(found_crash_file_names.size(), kCrashingInputIdxInBatch + 1); + // Suspected input first, then every input in the batch (including the + // suspected input again). + EXPECT_EQ(mock.num_inputs_triaged(), kBatchSize + 1); + } + + // Verify that when `env.batch_triage_suspect_only` is set, only triage the + // suspect. + TempDir suspect_only_temp_dir{test_info_->name()}; + env.workdir = suspect_only_temp_dir.path(); + env.batch_triage_suspect_only = true; + UndetectedCrashingInputMock suspect_only_mock(env, kCrashingInputIdx); + MockFactory suspect_only_factory(suspect_only_mock); + CentipedeMain(env, suspect_only_factory); + + EXPECT_EQ(suspect_only_mock.num_inputs_triaged(), 1); +} + +TEST_F(CentipedeWithTemporaryLocalDir, GetsSeedInputs) { + Environment env; + env.binary = + GetDataDependencyFilepath("centipede/testing/seeded_fuzz_target"); + CentipedeDefaultCallbacks callbacks(env); + + std::vector seeds; + EXPECT_EQ(callbacks.GetSeeds(10, seeds), 10); + EXPECT_THAT(seeds, testing::ContainerEq(std::vector{ + {0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}})); + EXPECT_EQ(callbacks.GetSeeds(5, seeds), 10); + EXPECT_THAT(seeds, testing::ContainerEq( + std::vector{{0}, {1}, {2}, {3}, {4}})); + EXPECT_EQ(callbacks.GetSeeds(100, seeds), 10); + EXPECT_THAT(seeds, testing::ContainerEq(std::vector{ + {0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}})); +} + +TEST_F(CentipedeWithTemporaryLocalDir, GetsSerializedTargetConfig) { + Environment env; + env.binary = + GetDataDependencyFilepath("centipede/testing/fuzz_target_with_config"); + CentipedeDefaultCallbacks callbacks(env); + + const auto serialized_config = callbacks.GetSerializedTargetConfig(); + ASSERT_TRUE(serialized_config.ok()); + EXPECT_EQ(*serialized_config, "fake serialized config"); +} + +TEST_F(CentipedeWithTemporaryLocalDir, + GetSerializedTargetConfigProducesFailure) { + Environment env; + env.binary = absl::StrCat( + GetDataDependencyFilepath("centipede/testing/fuzz_target_with_config") + .c_str(), + " --simulate_failure"); + CentipedeDefaultCallbacks callbacks(env); + + const auto serialized_config = callbacks.GetSerializedTargetConfig(); + EXPECT_FALSE(serialized_config.ok()); +} + +TEST_F(CentipedeWithTemporaryLocalDir, CleansUpMetadataAfterStartup) { + Environment env; + env.binary = GetDataDependencyFilepath( + "centipede/testing/expensive_startup_fuzz_target"); + CentipedeDefaultCallbacks callbacks(env); + + BatchResult batch_result; + const std::vector inputs = {{0}}; + ASSERT_TRUE(callbacks.Execute(env.binary, inputs, batch_result)); + ASSERT_EQ(batch_result.results().size(), 1); + bool found_startup_cmp_entry = false; + batch_result.results()[0].metadata().ForEachCmpEntry( + [&](ByteSpan a, ByteSpan b) { + if (a == ByteArray{'F', 'u', 'z', 'z'}) found_startup_cmp_entry = true; + if (b == ByteArray{'F', 'u', 'z', 'z'}) found_startup_cmp_entry = true; + }); + EXPECT_FALSE(found_startup_cmp_entry); +} + +class FakeCentipedeCallbacksForThreadChecking : public CentipedeCallbacks { + public: + FakeCentipedeCallbacksForThreadChecking(const Environment &env, + std::thread::id execute_thread_id) + : CentipedeCallbacks(env), execute_thread_id_(execute_thread_id) {} + + bool Execute(std::string_view binary, const std::vector &inputs, + BatchResult &batch_result) override { + batch_result.ClearAndResize(inputs.size()); + thread_check_passed_ = thread_check_passed_ && + std::this_thread::get_id() == execute_thread_id_; + return true; + } + + std::vector Mutate(const std::vector &inputs, + size_t num_mutants) override { + return {num_mutants, {0}}; + } + + bool thread_check_passed() { return thread_check_passed_; } + + private: + std::thread::id execute_thread_id_; + bool thread_check_passed_ = true; +}; + +TEST(Centipede, RunsExecuteCallbackInTheCurrentThreadWhenFuzzingWithOneThread) { + TempDir temp_dir{test_info_->name()}; + Environment env; + env.workdir = temp_dir.path(); + env.require_pc_table = false; + ASSERT_EQ(env.num_threads, 1); + FakeCentipedeCallbacksForThreadChecking callbacks(env, + std::this_thread::get_id()); + BatchResult batch_result; + const std::vector inputs = {{0}}; + env.num_runs = 100; + MockFactory factory(callbacks); + EXPECT_EQ(CentipedeMain(env, factory), EXIT_SUCCESS); + EXPECT_TRUE(callbacks.thread_check_passed()); +} + +TEST_F(CentipedeWithTemporaryLocalDir, DetectsStackOverflow) { + Environment env; + env.binary = GetDataDependencyFilepath("centipede/testing/test_fuzz_target"); + env.stack_limit_kb = 64; + CentipedeDefaultCallbacks callbacks(env); + + BatchResult batch_result; + const std::vector inputs = {ByteArray{'s', 't', 'k'}}; + + ASSERT_FALSE(callbacks.Execute(env.binary, inputs, batch_result)); + EXPECT_THAT(batch_result.log(), HasSubstr("Stack limit exceeded")); + EXPECT_EQ(batch_result.failure_description(), "stack-limit-exceeded"); +} + +class SetupFailureCallbacks : public CentipedeCallbacks { + public: + using CentipedeCallbacks::CentipedeCallbacks; + + bool Execute(std::string_view binary, const std::vector &inputs, + BatchResult &batch_result) override { + ++execute_count_; + batch_result.ClearAndResize(inputs.size()); + batch_result.exit_code() = EXIT_FAILURE; + batch_result.failure_description() = "SETUP FAILURE: something went wrong"; + return false; + } + + std::vector Mutate(const std::vector &inputs, + size_t num_mutants) override { + return {num_mutants, {0}}; + } + + int execute_count() const { return execute_count_; } + + private: + int execute_count_ = 0; +}; + +TEST(Centipede, ReturnsFailureOnSetupFailure) { + TempDir temp_dir{test_info_->name()}; + Environment env; + env.log_level = 0; // Disable most of the logging in the test. + env.workdir = temp_dir.path(); + env.batch_size = 7; // Just some small number. + env.require_pc_table = false; // No PC table here. + SetupFailureCallbacks mock(env); + MockFactory factory(mock); + EXPECT_EQ(CentipedeMain(env, factory), EXIT_FAILURE); + EXPECT_EQ(mock.execute_count(), 1); +} + +class SkippedTestCallbacks : public CentipedeCallbacks { + public: + using CentipedeCallbacks::CentipedeCallbacks; + + bool Execute(std::string_view binary, const std::vector &inputs, + BatchResult &batch_result) override { + ++execute_count_; + batch_result.ClearAndResize(inputs.size()); + batch_result.exit_code() = EXIT_FAILURE; + batch_result.failure_description() = + "SKIPPED TEST: test skipped on purpose"; + return false; + } + + std::vector Mutate(const std::vector &inputs, + size_t num_mutants) override { + return {num_mutants, {0}}; + } + + int execute_count() const { return execute_count_; } + + private: + int execute_count_ = 0; +}; + +TEST(Centipede, ReturnsSuccessOnSkippedTest) { + TempDir temp_dir{test_info_->name()}; + Environment env; + env.log_level = 0; // Disable most of the logging in the test. + env.workdir = temp_dir.path(); + env.batch_size = 7; // Just some small number. + env.require_pc_table = false; // No PC table here. + SkippedTestCallbacks mock(env); + MockFactory factory(mock); + EXPECT_EQ(CentipedeMain(env, factory), EXIT_SUCCESS); + EXPECT_EQ(mock.execute_count(), 1); +} + +class IgnoredFailureCallbacks : public CentipedeCallbacks { + public: + using CentipedeCallbacks::CentipedeCallbacks; + + bool Execute(std::string_view binary, const std::vector &inputs, + BatchResult &batch_result) override { + ++execute_count_; + batch_result.ClearAndResize(inputs.size()); + batch_result.exit_code() = EXIT_FAILURE; + batch_result.failure_description() = + "IGNORED FAILURE: failure ignored on purpose"; + return false; + } + + std::vector Mutate(const std::vector &inputs, + size_t num_mutants) override { + return {num_mutants, {0}}; + } + + int execute_count() const { return execute_count_; } + + private: + int execute_count_ = 0; +}; + +TEST(Centipede, KeepsRunningAndReturnsSuccessWithIgnoredFailures) { + TempDir temp_dir{test_info_->name()}; + Environment env; + env.log_level = 0; // Disable most of the logging in the test. + env.workdir = temp_dir.path(); + env.batch_size = 7; // Just some small number. + env.num_runs = 100; + env.require_pc_table = false; // No PC table here. + env.exit_on_crash = true; + IgnoredFailureCallbacks mock(env); + MockFactory factory(mock); + EXPECT_EQ(CentipedeMain(env, factory), EXIT_SUCCESS); + EXPECT_GE(mock.execute_count(), 2); +} + +TEST_F(CentipedeWithTemporaryLocalDir, UsesProvidedCustomMutator) { + Environment env; + env.binary = GetDataDependencyFilepath( + "centipede/testing/fuzz_target_with_custom_mutator"); + CentipedeDefaultCallbacks callbacks(env); + + const std::vector inputs = {{1}, {2}, {3}, {4}, {5}, {6}}; + const std::vector mutants = callbacks.Mutate( + GetMutationInputRefsFromDataInputs(inputs), inputs.size()); + + // The custom mutator just returns the original inputs as mutants. + EXPECT_EQ(inputs, mutants); +} + +TEST_F(CentipedeWithTemporaryLocalDir, FailsOnMisbehavingCustomMutator) { + Environment env; + env.binary = + absl::StrCat(GetDataDependencyFilepath( + "centipede/testing/fuzz_target_with_custom_mutator") + .c_str(), + " --simulate_failure"); + CentipedeDefaultCallbacks callbacks(env); + + const std::vector inputs = {{1}, {2}, {3}, {4}, {5}, {6}}; + // Previous stop condition could interfere here. + ClearEarlyStopRequestAndSetStopTime(absl::InfiniteFuture()); + EXPECT_THAT(callbacks.Mutate(GetMutationInputRefsFromDataInputs(inputs), + inputs.size()), + IsEmpty()); + EXPECT_TRUE(EarlyStopRequested()); + EXPECT_EQ(ExitCode(), EXIT_FAILURE); +} + +TEST_F(CentipedeWithTemporaryLocalDir, + FallsBackToBuiltInMutatorWhenCustomMutatorNotProvided) { + Environment env; + env.binary = GetDataDependencyFilepath("centipede/testing/abort_fuzz_target"); + CentipedeDefaultCallbacks callbacks(env); + + const std::vector inputs = {{1}, {2}, {3}, {4}, {5}, {6}}; + const std::vector mutants = callbacks.Mutate( + GetMutationInputRefsFromDataInputs(inputs), inputs.size()); + + // The built-in mutator performs non-trivial mutations. + EXPECT_EQ(inputs.size(), mutants.size()); + EXPECT_NE(inputs, mutants); +} + +TEST_F(CentipedeWithTemporaryLocalDir, HangingFuzzTargetExitsAfterTimeout) { + Environment env; + env.binary = + GetDataDependencyFilepath("centipede/testing/hanging_fuzz_target"); + BatchResult batch_result; + const std::vector inputs = {{0}}; + CentipedeDefaultCallbacks callbacks(env); + + env.timeout_per_batch = 1; + env.fork_server = false; + + // Test that the process does not get stuck and exits promptly. + EXPECT_FALSE(callbacks.Execute(env.binary, {{0}}, batch_result)); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/command.cc b/src/third_party/fuzztest/dist/centipede/command.cc new file mode 100644 index 00000000000..242850c782b --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/command.cc @@ -0,0 +1,537 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/command.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef __APPLE__ +#include +#include +#endif // __APPLE__ + +#include +#include +#include +#include // NOLINT +#include +#include +#include +#include +#include // NOLINT +#include +#include + +#include "absl/base/const_init.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/match.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_replace.h" +#include "absl/strings/str_split.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "./centipede/stop.h" +#include "./centipede/util.h" +#include "./common/logging.h" + +#if !defined(_MSC_VER) +// Needed to pass the current environment to posix_spawn, which needs an +// explicit envp without an option to inherit implicitly. +extern char **environ; +#endif + +namespace fuzztest::internal { +namespace { + +// See the definition of --fork_server flag. +constexpr std::string_view kCommandLineSeparator(" \\\n"); +constexpr std::string_view kNoForkServerRequestPrefix("%f"); + +absl::StatusOr GetProcessCreationStamp(pid_t pid) { +#ifdef __APPLE__ + struct proc_bsdinfo info = {}; + if (proc_pidinfo(pid, PROC_PIDTBSDINFO, 0, &info, PROC_PIDTBSDINFO_SIZE) != + PROC_PIDTBSDINFO_SIZE) { + return absl::InternalError( + absl::StrCat("failed to get proc bsdinfo for ", pid)); + } + return absl::StrFormat("%" PRIu64 ".%06" PRIu64, info.pbi_start_tvsec, + info.pbi_start_tvusec); +#else + constexpr int kFieldIndexOfStartTimeAfterComm = 19; // From `man procfs` + const std::string proc_stat_path = absl::StrFormat("/proc/%d/stat", pid); + std::string proc_stat_line; + // Cannot use `ReadFromLocalFile` on procfs since seek does not work. + // This seems to work assuming the filename of the command does not contain + // newline, which should be in our control when the process is ours. + if (std::getline(std::ifstream(proc_stat_path), proc_stat_line).bad()) { + return absl::InternalError(absl::StrCat("failed to read ", proc_stat_path)); + } + // According to the current format of `/proc/[pid]/stat`, only the comm field + // can contain ')'. + const size_t comm_end_pos = proc_stat_line.find_last_of(')'); + if (comm_end_pos == proc_stat_line.npos) { + return absl::NotFoundError( + absl::StrCat("cannot find the end of command in the first line of ", + proc_stat_path, ": ", proc_stat_line)); + } + std::string_view proc_stat_after_comm = + std::string_view(proc_stat_line).substr(comm_end_pos + 1); + const std::vector fields = + absl::StrSplit(proc_stat_after_comm, ' ', absl::SkipEmpty()); + if (fields.size() <= kFieldIndexOfStartTimeAfterComm) { + return absl::NotFoundError( + absl::StrCat("not enough fields in the first line of ", proc_stat_path, + ": ", proc_stat_line)); + } + return std::string(fields[kFieldIndexOfStartTimeAfterComm]); +#endif +} + +} // namespace + +// TODO(ussuri): Encapsulate as much of the fork server functionality from +// this source as possible in this struct, and make it a class. +struct Command::ForkServerProps { + // The file paths of the comms pipes. + std::string fifo_path_[2]; + // The file descriptors of the comms pipes. + int pipe_[2] = {-1, -1}; + // The file path to write the PID of the fork server process to. + std::string pid_file_path_; + // The PID of the fork server process. Used to verify that the fork server is + // running and the pipes are ready for comms. + pid_t pid_ = -1; + // The creation stamp of the fork server process. Used to detect that the + // running process with `pid_` is still the original fork server, not a PID + // recycled by the OS. + std::string creation_stamp; + + ~ForkServerProps() { + for (int i = 0; i < 2; ++i) { + if (pipe_[i] >= 0 && close(pipe_[i]) != 0) { + LOG(ERROR) << "Failed to close fork server pipe for " << fifo_path_[i]; + } + std::error_code ec; + if (!fifo_path_[i].empty() && + !std::filesystem::remove(fifo_path_[i], ec)) { + LOG(ERROR) << "Failed to remove fork server pipe file " << fifo_path_[i] + << ": " << ec; + } + } + } +}; + +// NOTE: Because std::unique_ptr requires T to be a complete type wherever +// the deleter is instantiated, the special member functions must be defined +// out-of-line here, now that ForkServerProps is complete (that's by-the-book +// PIMPL). +Command::~Command() { + if (is_executing()) { + LOG(WARNING) + << "Destructing Command object for " << path() << " with " + << (fork_server_ ? absl::StrCat("fork server PID ", fork_server_->pid_) + : absl::StrCat("PID ", pid_)) + << " still running. Requesting it to stop without waiting for it..."; + RequestStop(); + } +} + +Command::Command(std::string_view path, Options options) + : path_(path), options_(std::move(options)) {} + +Command::Command(std::string_view path) : Command{path, {}} {} + +std::string Command::ToString() const { + std::vector ss; + ss.reserve(/*env*/ 1 + options_.env_add.size() + options_.env_remove.size() + + /*path*/ 1 + /*args*/ options_.args.size() + /*out/err*/ 2); + // env. + ss.push_back("env"); + // Arguments that unset environment variables must appear first. + for (const auto &var : options_.env_remove) { + ss.push_back(absl::StrCat("-u ", var)); + } + for (const auto &var : options_.env_add) { + ss.push_back(var); + } + // path. + std::string path = path_; + // Strip the % prefixes, if any. + if (absl::StartsWith(path, kNoForkServerRequestPrefix)) { + path = path.substr(kNoForkServerRequestPrefix.size()); + } + // Replace @@ with temp_file_path_. + constexpr std::string_view kTempFileWildCard = "@@"; + if (absl::StrContains(path, kTempFileWildCard)) { + CHECK(!options_.temp_file_path.empty()); + path = absl::StrReplaceAll(path, + {{kTempFileWildCard, options_.temp_file_path}}); + } + ss.push_back(std::move(path)); + // args. + for (const auto &arg : options_.args) { + ss.push_back(arg); + } + // out/err. + if (!options_.stdout_file.empty()) { + ss.push_back(absl::StrCat("> ", options_.stdout_file)); + } + if (!options_.stderr_file.empty()) { + if (options_.stdout_file != options_.stderr_file) { + ss.push_back(absl::StrCat("2> ", options_.stderr_file)); + } else { + ss.push_back("2>&1"); + } + } + // Trim trailing space and return. + return absl::StrJoin(ss, kCommandLineSeparator); +} + +bool Command::StartForkServer(std::string_view temp_dir_path, + std::string_view prefix) { + if (absl::StartsWith(path_, kNoForkServerRequestPrefix)) { + VLOG(2) << "Fork server disabled for " << path(); + return false; + } + VLOG(2) << "Starting fork server for " << path(); + + fork_server_.reset(new ForkServerProps); + fork_server_->fifo_path_[0] = std::filesystem::path(temp_dir_path) + .append(absl::StrCat(prefix, "_FIFO0")); + fork_server_->fifo_path_[1] = std::filesystem::path(temp_dir_path) + .append(absl::StrCat(prefix, "_FIFO1")); + const std::string pid_file_path = + std::filesystem::path(temp_dir_path).append("pid"); + (void)std::filesystem::create_directory(temp_dir_path); // it may not exist. + for (int i = 0; i < 2; ++i) { + PCHECK(mkfifo(fork_server_->fifo_path_[i].c_str(), 0600) == 0) + << VV(i) << VV(fork_server_->fifo_path_[i]); + } + + // NOTE: A background process does not return its exit status to the subshell, + // so failures will never propagate to the caller of `system()`. Instead, we + // save out the background process's PID to a file and use it later to assert + // that the process has started and is still running. + static constexpr std::string_view kForkServerCommandStub = R"sh( + { + CENTIPEDE_FORK_SERVER_FIFO0="%s" \ + CENTIPEDE_FORK_SERVER_FIFO1="%s" \ + exec %s + } & + printf "%%s" $! > "%s" +)sh"; + const std::string fork_server_command = absl::StrFormat( + kForkServerCommandStub, fork_server_->fifo_path_[0], + fork_server_->fifo_path_[1], command_line_, pid_file_path); + VLOG(2) << "Fork server command:" << fork_server_command; + + const int exit_code = system(fork_server_command.c_str()); + + // Check if `system()` was able to parse and run the command at all. + if (exit_code != EXIT_SUCCESS) { + LogProblemInfo( + "Failed to parse or run command to launch fork server; will proceed " + "without it"); + return false; + } + + // The fork server is probably running now. However, one failure scenario is + // that it starts and exits early. Try opening the read/write comms pipes with + // it: if that fails, something is wrong. + // We use non-blocking I/O to open the pipes. That is good and safe, because: + // 1) This prevents the `open()` calls from hanging when the fork server fails + // to open the pipes on its side (note the use of O_RDWR, not O_WRONLY, to + // avoid ENXIO). + // 2) In `Command::Execute`, we wait for the return channel pipe with a + // `poll()`, so it should always have data when we attempt to `read()` from + // it. + // See more at + // https://www.gnu.org/software/libc/manual/html_node/Operating-Modes.html. + if ((fork_server_->pipe_[0] = open(fork_server_->fifo_path_[0].c_str(), + O_RDWR | O_NONBLOCK)) < 0 || + (fork_server_->pipe_[1] = open(fork_server_->fifo_path_[1].c_str(), + O_RDONLY | O_NONBLOCK)) < 0) { + LogProblemInfo( + "Failed to establish communication with fork server; will proceed " + "without it"); + return false; + } + + std::string pid_str; + ReadFromLocalFile(pid_file_path, pid_str); + CHECK(absl::SimpleAtoi(pid_str, &fork_server_->pid_)) << VV(pid_str); + auto creation_stamp = GetProcessCreationStamp(fork_server_->pid_); + if (!creation_stamp.ok()) { + LogProblemInfo( + absl::StrCat("Failed to get the fork server's creation stamp; will " + "proceed without it " + "(failure status: ", + creation_stamp.status(), ")")); + return false; + } + fork_server_->creation_stamp = *std::move(creation_stamp); + return true; +} + +absl::Status Command::VerifyForkServerIsHealthy() { + // Preconditions: the callers (`Execute()`) should call us only when the fork + // server is presumed to be running (`fork_server_pid_` >= 0). If it is, the + // comms pipes are guaranteed to be opened by `StartForkServer()`. + CHECK(fork_server_ != nullptr) << "Fork server wasn't started"; + CHECK(fork_server_->pid_ >= 0) << "Fork server process failed to start"; + CHECK(fork_server_->pipe_[0] >= 0 && fork_server_->pipe_[1] >= 0) + << "Failed to connect to fork server"; + + // A process with the fork server PID exists (_some_ process, possibly with a + // recycled PID)... + if (kill(fork_server_->pid_, 0) != EXIT_SUCCESS) { + return absl::UnknownError(absl::StrCat( + "Can't communicate with fork server, PID=", fork_server_->pid_)); + } + // ...and it is a process has the same creation stamp, so it's practically + // guaranteed to be our original fork server process. + const auto creation_stamp = GetProcessCreationStamp(fork_server_->pid_); + if (!creation_stamp.ok()) return creation_stamp.status(); + if (*creation_stamp != fork_server_->creation_stamp) { + return absl::UnknownError(absl::StrCat( + "Fork server's creation stamp changed (new process?) - expected ", + fork_server_->creation_stamp, ", but got ", *creation_stamp)); + } + return absl::OkStatus(); +} + +bool Command::ExecuteAsync() { + CHECK(!is_executing()); + VLOG(1) << "Executing command '" << command_line_ << "'..."; + + if (fork_server_ != nullptr) { + VLOG(1) << "Sending execution request to fork server"; + + if (const auto status = VerifyForkServerIsHealthy(); !status.ok()) { + LogProblemInfo(absl::StrCat("Fork server should be running, but isn't: ", + status.message())); + return false; + } + + // Wake up the fork server. + char x = ' '; + CHECK_EQ(1, write(fork_server_->pipe_[0], &x, 1)); + } else { + CHECK_EQ(pid_, -1); + std::vector argv_strs = {"/bin/sh", "-c", command_line_}; + std::vector argv; + argv.reserve(argv_strs.size() + 1); + for (auto &argv_str : argv_strs) { + argv.push_back(argv_str.data()); + } + argv.push_back(nullptr); + CHECK_EQ(posix_spawn(&pid_, argv[0], /*file_actions=*/nullptr, + /*attrp=*/nullptr, argv.data(), environ), + 0); + } + + is_executing_ = true; + return true; +} + +std::optional Command::Wait(absl::Time deadline) { + CHECK(is_executing()); + int exit_code = EXIT_SUCCESS; + + if (fork_server_ != nullptr) { + // The fork server forks, the child is running. Block until some readable + // data appears in the pipe (that is, after the fork server writes the + // execution result to it). + struct pollfd poll_fd = {}; + int poll_ret = -1; + do { + // NOTE: `poll_fd` has to be reset every time. + poll_fd = { + /*fd=*/fork_server_->pipe_[1], // The file descriptor to wait for. + /*events=*/POLLIN, // Wait until `fd` gets readable data. + }; + const int poll_timeout_ms = static_cast(absl::ToInt64Milliseconds( + std::max(deadline - absl::Now(), absl::Milliseconds(1)))); + poll_ret = poll(&poll_fd, 1, poll_timeout_ms); + // The `poll()` syscall can get interrupted: it sets errno==EINTR in that + // case. We should tolerate that. + } while (poll_ret < 0 && errno == EINTR); + if (poll_ret != 1 || (poll_fd.revents & POLLIN) == 0) { + // The fork server errored out or timed out, or some other error occurred, + // e.g. the syscall was interrupted. + if (poll_ret == 0) { + LogProblemInfo(absl::StrCat( + "Timeout while waiting for fork server: deadline is ", deadline)); + } else { + LogProblemInfo(absl::StrCat( + "Error while waiting for fork server: poll() returned ", poll_ret)); + } + return std::nullopt; + } + + // The fork server wrote the execution result to the pipe: read it. + CHECK_EQ(sizeof(exit_code), + read(fork_server_->pipe_[1], &exit_code, sizeof(exit_code))); + } else { + CHECK_NE(pid_, -1); + while (true) { + const pid_t r = waitpid(pid_, &exit_code, WNOHANG); + CHECK_NE(r, -1); + if (r == pid_ && (WIFEXITED(exit_code) || WIFSIGNALED(exit_code))) break; + CHECK_EQ(r, 0); + const auto timeout = deadline - absl::Now(); + if (timeout > absl::ZeroDuration()) { + const auto duration = std::clamp( + absl::ToInt64Microseconds(timeout), 0, 100000); + usleep(duration); // NOLINT: early return on SIGCHLD is desired. + continue; + } else { + LogProblemInfo(absl::StrCat( + "Timeout while waiting for the command process: deadline is ", + deadline)); + return std::nullopt; + } + } + pid_ = -1; + } + is_executing_ = false; + + // When the command is actually a wrapper shell launching the binary(-es) + // (e.g. a Docker container), the shell will preserve a normal exit code + // returned by the binary (the legal range for such codes that can be + // passed to `exit()` is [0..125]); but the shell will specially encode + // the exit code returned by the binary when the binary is killed by a + // signal by adding 128 to the signal number and returning the result as + // a normal exit code. This encoding is used in `bash` and `dash` but may be + // different in other shells, e.g., `ksh`. + // + // For more details, see https://tldp.org/LDP/abs/html/exitcodes.html. + // + // Therefore, to handle this case, we need to first unpack these special + // pseudo-normal exit codes before analyzing them further. After + // reassigning `WEXITSTATUS()` to exit_code, the if-else below will take + // the else-branch and unpack the signal number from the updated value. This + // has experimentally been observed to work with existing implementations of + // the `wait` macros but there is no definitive documentation for it. + if (WIFEXITED(exit_code) && WEXITSTATUS(exit_code) > 128 && + WEXITSTATUS(exit_code) < 255) { + exit_code = WEXITSTATUS(exit_code); + } + + if (WIFEXITED(exit_code) && WEXITSTATUS(exit_code) != EXIT_SUCCESS) { + const auto exit_status = WEXITSTATUS(exit_code); + VlogProblemInfo( + absl::StrCat("Command errored out: exit status=", exit_status), + /*vlog_level=*/1); + exit_code = exit_status; + } else if (WIFSIGNALED(exit_code)) { + const auto signal = WTERMSIG(exit_code); + if (signal == SIGINT) { + RequestEarlyStop(EXIT_FAILURE); + // When the user kills Centipede via ^C, they are unlikely to be + // interested in any of the subprocesses' outputs. Also, ^C terminates all + // the subprocesses, including all the runners, so all their outputs would + // get printed simultaneously, flooding the log. Hence log at a high + // `vlog_level`. + VlogProblemInfo("Command killed: signal=SIGINT (likely Ctrl-C)", + /*vlog_level=*/10); + } else { + // The fork server subprocess was killed by something other than ^C: log + // at a lower `vlog_level` to help diagnose problems. + VlogProblemInfo(absl::StrCat("Command killed: signal=", signal), + /*vlog_level=*/1); + } + + // TODO(ussuri): Consider changing this to exit_code = EXIT_FAILURE. + exit_code = signal; + } + + return exit_code; +} + +void Command::RequestStop() { + CHECK(is_executing()); + if (fork_server_) { + CHECK_NE(fork_server_->pid_, -1); + kill(fork_server_->pid_, SIGTERM); + return; + } + CHECK_NE(pid_, -1); + kill(pid_, SIGTERM); +} + +std::string Command::ReadRedirectedStdout() const { + std::string ret; + if (!options_.stdout_file.empty()) { + ReadFromLocalFile(options_.stdout_file, ret); + if (ret.empty()) ret = ""; + } + return ret; +} + +std::string Command::ReadRedirectedStderr() const { + std::string ret; + if (!options_.stderr_file.empty()) { + if (options_.stderr_file == "2>&1" || + options_.stderr_file == options_.stdout_file) { + ret = ""; + } else { + ReadFromLocalFile(options_.stderr_file, ret); + if (ret.empty()) ret = ""; + } + } + return ret; +} + +void Command::LogProblemInfo(std::string_view message) const { + // Prevent confusing interlaced logs when multiple threads experience failures + // at the same time. + // TODO(ussuri): Non-failure related logs from other threads may still + // interlace with these. Improve further, if possible. Note the printiing + // line-by-line is unavoidable to overcome the single log line length limit. + static absl::Mutex mu{absl::kConstInit}; + absl::MutexLock lock(&mu); + + LOG(ERROR) << message; + LOG(ERROR).NoPrefix() << "=== COMMAND ==="; + LOG(ERROR).NoPrefix() << command_line_; + LOG(ERROR).NoPrefix() << "=== STDOUT ==="; + for (const auto &line : absl::StrSplit(ReadRedirectedStdout(), '\n')) { + LOG(ERROR).NoPrefix() << line; + } + LOG(ERROR).NoPrefix() << "=== STDERR ==="; + for (const auto &line : absl::StrSplit(ReadRedirectedStderr(), '\n')) { + LOG(ERROR).NoPrefix() << line; + } +} + +void Command::VlogProblemInfo(std::string_view message, int vlog_level) const { + if (ABSL_VLOG_IS_ON(vlog_level)) LogProblemInfo(message); +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/command.h b/src/third_party/fuzztest/dist/centipede/command.h new file mode 100644 index 00000000000..cb8de7a3243 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/command.h @@ -0,0 +1,140 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_COMMAND_H_ +#define THIRD_PARTY_CENTIPEDE_COMMAND_H_ + +#include +#include +#include +#include +#include + +#include "absl/status/status.h" +#include "absl/time/time.h" + +namespace fuzztest::internal { + +class Command final { + public: + struct Options { + // Arguments to pass to the executed command. The command is executed by the + // shell, so the arguments need to be shell-escaped. + // TODO(b/381910257): Escape the arguments for passing to the shell. + std::vector args; + // Environment variables/values in the form "KEY=VALUE" to set in the + // subprocess executing the command. These are added to the environment + // variables inherited from the parent process. + std::vector env_add; + // Environment variables to unset in the subprocess executing the command. + std::vector env_remove; + // Redirect stdout to this file. If empty, use parent's STDOUT. + std::string stdout_file; + // Redirect stderr to this file. If empty, use parent's STDERR. If `out` == + // `err` and both are non-empty, stdout/stderr are combined. + std::string stderr_file; + // "@@" in the command will be replaced with `temp_file_path`. + std::string temp_file_path; + }; + + // Constructs a command to run the binary at `path` with the given `options`. + // The path can contain "@@" which will be replaced with + // `options.temp_file_path`. + explicit Command(std::string_view path, Options options); + + // Constructs a command to run the binary at `path` with default options. + explicit Command(std::string_view path); + + // Not movable or copyable to simplify the resource management logic. + Command(const Command& other) = delete; + Command& operator=(const Command& other) = delete; + Command(Command&& other) noexcept = delete; + Command& operator=(Command&& other) noexcept = delete; + + // Cleans up the fork server, if that was created. + ~Command(); + + // Returns a string representing the command, e.g. like this + // "env -u ENV1 ENV2=VAL2 path arg1 arg2 > out 2>& err" + std::string ToString() const; + + // Execute the command asynchronously. Returns true if it starts a new + // execution, false otherwise. Must be called only when the command + // is not executing. + bool ExecuteAsync(); + + // Returns whether the command is currently executing. + bool is_executing() const { return is_executing_; } + + // Waits for the command execution and returns the exit status if the + // execution finishes within `deadline`. Must be called only when the command + // is executing. execution or the execution times out. If interrupted, may + // call `RequestEarlyStop()` (see stop.h). + std::optional Wait(absl::Time deadline); + + // Requests the command execution to stop. Must be called only when the + // command is executing. Note that after calling this, `Wait()` is still + // needed to complete the execution. + void RequestStop(); + + // Convenient method to execute synchronously. + int Execute() { + if (!ExecuteAsync()) return EXIT_FAILURE; + return Wait(absl::InfiniteFuture()).value_or(EXIT_FAILURE); + } + + // Attempts to start a fork server, returns true on success. + // Pipe files for the fork server are created in `temp_dir_path` + // with prefix `prefix`. + // See runner_fork_server.cc for details. + bool StartForkServer(std::string_view temp_dir_path, std::string_view prefix); + + // Accessors. + const std::string& path() const { return path_; } + + private: + struct ForkServerProps; + + int pid_ = -1; + bool is_executing_ = false; + + // Returns the status of the fork server process. Expects that the server was + // previously started using `StartForkServer()`. + absl::Status VerifyForkServerIsHealthy(); + + // Reads and returns the stdout of the command, if redirected to a file. If + // not redirected, returns a placeholder text. + std::string ReadRedirectedStdout() const; + // Reads and returns the stderr of the command, if redirected to a file that + // is also different from the redirected stdout. If not redirected, returns a + // placeholder text. + std::string ReadRedirectedStderr() const; + // Possibly logs information about a crash, starting with `message`, followed + // by the command line, followed by the redirected stdout and stderr read + // from `options_.out` and `options_.err` files, if any. + void LogProblemInfo(std::string_view message) const; + // Just as `LogCrashInfo()`, but logging occurs only when the VLOG level (set + // via `--v` or its equivalents) is >= `min_vlog`. + void VlogProblemInfo(std::string_view message, int vlog_level) const; + + const std::string path_; + const Options options_; + const std::string command_line_ = ToString(); + + std::unique_ptr fork_server_; +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_COMMAND_H_ diff --git a/src/third_party/fuzztest/dist/centipede/command_test.cc b/src/third_party/fuzztest/dist/centipede/command_test.cc new file mode 100644 index 00000000000..ab6d6f0fe3a --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/command_test.cc @@ -0,0 +1,197 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/command.h" + +#include +#include // NOLINT(for WTERMSIG) + +#include +#include // NOLINT +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/strings/substitute.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "./centipede/stop.h" +#include "./centipede/util.h" +#include "./common/test_util.h" + +namespace fuzztest::internal { +namespace { + +TEST(CommandTest, ToString) { + EXPECT_EQ(Command{"x"}.ToString(), "env \\\nx"); + { + Command::Options cmd_options; + cmd_options.args = {"arg1", "arg2"}; + EXPECT_EQ((Command{"path", std::move(cmd_options)}.ToString()), + "env \\\npath \\\narg1 \\\narg2"); + } + { + Command::Options cmd_options; + cmd_options.env_add = {"K1=V1", "K2=V2"}; + cmd_options.env_remove = {"K3"}; + EXPECT_EQ((Command{"x", std::move(cmd_options)}.ToString()), + "env \\\n-u K3 \\\nK1=V1 \\\nK2=V2 \\\nx"); + } + { + Command::Options cmd_options; + cmd_options.stdout_file = "out"; + EXPECT_EQ((Command{"x", std::move(cmd_options)}.ToString()), + "env \\\nx \\\n> out"); + } + { + Command::Options cmd_options; + cmd_options.stderr_file = "err"; + EXPECT_EQ((Command{"x", std::move(cmd_options)}.ToString()), + "env \\\nx \\\n2> err"); + } + { + Command::Options cmd_options; + cmd_options.stdout_file = "out"; + cmd_options.stderr_file = "err"; + EXPECT_EQ((Command{"x", std::move(cmd_options)}.ToString()), + "env \\\nx \\\n> out \\\n2> err"); + } + { + Command::Options cmd_options; + cmd_options.stdout_file = "out"; + cmd_options.stderr_file = "out"; + EXPECT_EQ((Command{"x", std::move(cmd_options)}.ToString()), + "env \\\nx \\\n> out \\\n2>&1"); + } +} + +TEST(CommandTest, Execute) { + // Check for default exit code. + Command echo{"echo"}; + EXPECT_EQ(echo.Execute(), 0); + EXPECT_FALSE(ShouldStop()); + + // Check for exit code 7. + Command exit7{"bash -c 'exit 7'"}; + EXPECT_EQ(exit7.Execute(), 7); + EXPECT_FALSE(ShouldStop()); +} + +TEST(CommandTest, HandlesInterruptedCommand) { + Command self_sigint{"bash -c 'kill -SIGINT $$'"}; + self_sigint.ExecuteAsync(); + self_sigint.Wait(absl::InfiniteFuture()); + EXPECT_TRUE(ShouldStop()); + ClearEarlyStopRequestAndSetStopTime(absl::InfiniteFuture()); +} + +TEST(CommandTest, InputFileWildCard) { + Command::Options cmd_options; + cmd_options.temp_file_path = "TEMP_FILE"; + Command cmd{"foo bar @@ baz", std::move(cmd_options)}; + EXPECT_EQ(cmd.ToString(), "env \\\nfoo bar TEMP_FILE baz"); +} + +TEST(CommandTest, ForkServer) { + const std::string test_tmpdir = GetTestTempDir(test_info_->name()); + const std::string helper = + GetDataDependencyFilepath("centipede/command_test_helper"); + + // TODO(ussuri): Dedupe these testcases. + + { + const std::string input = "success"; + const std::string log = std::filesystem::path{test_tmpdir} / input; + Command::Options cmd_options; + cmd_options.args = {input}; + cmd_options.stdout_file = log; + cmd_options.stderr_file = log; + Command cmd{helper, std::move(cmd_options)}; + EXPECT_TRUE(cmd.StartForkServer(test_tmpdir, "ForkServer")); + EXPECT_EQ(cmd.Execute(), EXIT_SUCCESS); + std::string log_contents; + ReadFromLocalFile(log, log_contents); + EXPECT_EQ(log_contents, absl::Substitute("Got input: $0", input)); + } + + { + const std::string input = "fail"; + const std::string log = std::filesystem::path{test_tmpdir} / input; + Command::Options cmd_options; + cmd_options.args = {input}; + cmd_options.stdout_file = log; + cmd_options.stderr_file = log; + Command cmd{helper, std::move(cmd_options)}; + EXPECT_TRUE(cmd.StartForkServer(test_tmpdir, "ForkServer")); + EXPECT_EQ(cmd.Execute(), EXIT_FAILURE); + std::string log_contents; + ReadFromLocalFile(log, log_contents); + EXPECT_EQ(log_contents, absl::Substitute("Got input: $0", input)); + } + + { + const std::string input = "ret42"; + const std::string log = std::filesystem::path{test_tmpdir} / input; + Command::Options cmd_options; + cmd_options.args = {input}; + cmd_options.stdout_file = log; + cmd_options.stderr_file = log; + Command cmd{helper, std::move(cmd_options)}; + EXPECT_TRUE(cmd.StartForkServer(test_tmpdir, "ForkServer")); + EXPECT_EQ(cmd.Execute(), 42); + std::string log_contents; + ReadFromLocalFile(log, log_contents); + EXPECT_EQ(log_contents, absl::Substitute("Got input: $0", input)); + } + + { + const std::string input = "abort"; + const std::string log = std::filesystem::path{test_tmpdir} / input; + Command::Options cmd_options; + cmd_options.args = {input}; + cmd_options.stdout_file = log; + cmd_options.stderr_file = log; + Command cmd{helper, std::move(cmd_options)}; + EXPECT_TRUE(cmd.StartForkServer(test_tmpdir, "ForkServer")); + // WTERMSIG() needs an lvalue on some platforms. + const int ret = cmd.Execute(); + EXPECT_EQ(WTERMSIG(ret), SIGABRT); + std::string log_contents; + ReadFromLocalFile(log, log_contents); + EXPECT_EQ(log_contents, absl::Substitute("Got input: $0", input)); + } + + { + const std::string input = "hang"; + const std::string log = std::filesystem::path{test_tmpdir} / input; + Command::Options cmd_options; + cmd_options.args = {input}; + cmd_options.stdout_file = log; + cmd_options.stderr_file = log; + Command cmd{helper, std::move(cmd_options)}; + ASSERT_TRUE(cmd.StartForkServer(test_tmpdir, "ForkServer")); + ASSERT_TRUE(cmd.ExecuteAsync()); + EXPECT_EQ(cmd.Wait(absl::Now() + absl::Seconds(2)), std::nullopt); + std::string log_contents; + ReadFromLocalFile(log, log_contents); + EXPECT_EQ(log_contents, absl::Substitute("Got input: $0", input)); + } + + // TODO(kcc): [impl] test what happens if the child is interrupted. +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/command_test_helper.cc b/src/third_party/fuzztest/dist/centipede/command_test_helper.cc new file mode 100644 index 00000000000..ac5b00f70e1 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/command_test_helper.cc @@ -0,0 +1,36 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include +#include + +#include "absl/base/nullability.h" + +// A binary linked with the fork server that exits/crashes in different ways. +int main(int argc, char** absl_nonnull argv) { + assert(argc == 2); + printf("Got input: %s", argv[1]); + fflush(stdout); + if (!strcmp(argv[1], "success")) return EXIT_SUCCESS; + if (!strcmp(argv[1], "fail")) return EXIT_FAILURE; + if (!strcmp(argv[1], "ret42")) return 42; + if (!strcmp(argv[1], "abort")) abort(); + // Sleep longer than kTimeout in CommandDeathTest_ForkServerHangingBinary. + if (!strcmp(argv[1], "hang")) sleep(5); + return 17; +} diff --git a/src/third_party/fuzztest/dist/centipede/concurrent_bitset.h b/src/third_party/fuzztest/dist/centipede/concurrent_bitset.h new file mode 100644 index 00000000000..09d1aa12985 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/concurrent_bitset.h @@ -0,0 +1,150 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This library defines the concepts "fuzzing feature" and "feature domain". +// It is used by Centipede, and it can be used by fuzz runners to +// define their features in a way most friendly to Centipede. +// Fuzz runners do not have to use this file nor to obey the rules defined here. +// But using this file and following its rules is the simplest way if you want +// Centipede to understand the details about the features generated by the +// runner. +// +// This library must not depend on anything other than libc so that fuzz targets +// using it doesn't gain redundant coverage. For the same reason this library +// uses raw __builtin_trap instead of CHECKs. +// We make an exception for for std::sort/std::unique, +// since is very lightweight. +// This library is also header-only, with all functions defined as inline. + +#ifndef THIRD_PARTY_CENTIPEDE_CONCURRENT_BITSET_H_ +#define THIRD_PARTY_CENTIPEDE_CONCURRENT_BITSET_H_ + +#include +#include + +// WARNING!!!: Be very careful with what STL headers or other dependencies you +// add here. This header needs to remain mostly bare-bones so that we can +// include it into runner. +#include +#include +#include + +#include "absl/base/const_init.h" +#include "./centipede/concurrent_byteset.h" + +namespace fuzztest::internal { + +// A fixed-size bitset with a lossy concurrent set() function. +// kSize (in bits) must be a multiple of 2**16. +// +// IMPORTANT!!! Objects of this class should only be constructed with static +// storage duration. This is because the class has intentionally uninitialized +// direct and transitive data members that rely on static initialization in the +// compiled process image. +template +class ConcurrentBitSet { + public: + static_assert((kSizeInBits % (1<<16)) == 0); + + // Creates a ConcurrentBitSet with static storage duration. + explicit constexpr ConcurrentBitSet(absl::ConstInitType) + : lines_{absl::kConstInit} {} + + // Clears the bit set. + void clear() { + memset(words_, 0, sizeof(words_)); + lines_.clear(); + } + + // Sets the bit `idx % kSizeInBits`. + // set() can be called concurrently with another set(). + // If several threads race to update adjacent bits, + // the update may be lost (i.e. set() is lossy). + // We could use atomic set-bit instructions to make it non-lossy, + // but it is going to be too expensive. + void set(size_t idx) { + idx %= kSizeInBits; + size_t word_idx = idx / kBitsInWord; + size_t bit_idx = idx % kBitsInWord; + size_t line_idx = word_idx / kWordsInLine; + lines_.Set(line_idx, 1); + word_t mask = 1ULL << bit_idx; + word_t word = __atomic_load_n(&words_[word_idx], __ATOMIC_RELAXED); + if (!(word & mask)) { + word |= mask; + __atomic_store_n(&words_[word_idx], word, __ATOMIC_RELAXED); + } + } + + // Gets the bit at `idx % kSizeInBits`. + uint8_t get(size_t idx) { + idx %= kSizeInBits; + size_t word_idx = idx / kBitsInWord; + size_t bit_idx = idx % kBitsInWord; + word_t word = __atomic_load_n(&words_[word_idx], __ATOMIC_RELAXED); + word_t mask = 1ULL << bit_idx; + return (word & mask) != 0; + } + + // Calls `action(index)` for every index of a non-zero bit in the set, + // then sets all those bits to zero. + __attribute__((noinline)) void ForEachNonZeroBit( + const std::function &action) { + // Iterates over all non-empty lines. + lines_.ForEachNonZeroByte([&](size_t idx, uint8_t value) { + size_t word_idx_beg = idx * kWordsInLine; + size_t word_idx_end = word_idx_beg + kWordsInLine; + ForEachNonZeroBit(action, word_idx_beg, word_idx_end); + }); + } + + private: + // Iterates over the range of words [`word_idx_beg`, `word_idx_end`). + void ForEachNonZeroBit(const std::function &action, + size_t word_idx_beg, size_t word_idx_end) { + for (size_t word_idx = word_idx_beg; word_idx < word_idx_end; ++word_idx) { + if (word_t word = words_[word_idx]) { + words_[word_idx] = 0; + do { + size_t bit_idx = __builtin_ctzll(word); + action(word_idx * kBitsInWord + bit_idx); + word_t mask = 1ULL << bit_idx; + word &= ~mask; + } while (word); + } + } + } + + // A word is the largest integer type convenient for bitwise operations. + using word_t = uintptr_t; + static constexpr size_t kBytesInWord = sizeof(word_t); + static constexpr size_t kBitsInWord = CHAR_BIT * kBytesInWord; + static constexpr size_t kSizeInWords = kSizeInBits / kBitsInWord; + // All words are logically split into lines. + // When `set()` is called, we set the corresponding element of `lines_` to 1, + // so that we now know that at least 1 bit in that line is set. Then, in + // `ForEachNonZeroBit()`, we iterate only those lines that have non-zero bits. + static constexpr size_t kBytesInLine = 64 * 8; + static constexpr size_t kWordsInLine = kBytesInLine / kBytesInWord; + static constexpr size_t kSizeInLines = kSizeInWords / kWordsInLine; + ConcurrentByteSet lines_; + // NOTE: No initializer for performance (`kSizeInWords` can be quite large). + // Relies on static initialization in the process image (see the class + // comment). + word_t words_[kSizeInWords]; +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_CONCURRENT_BITSET_H_ diff --git a/src/third_party/fuzztest/dist/centipede/concurrent_bitset_test.cc b/src/third_party/fuzztest/dist/centipede/concurrent_bitset_test.cc new file mode 100644 index 00000000000..530eab912aa --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/concurrent_bitset_test.cc @@ -0,0 +1,124 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/concurrent_bitset.h" + +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/base/const_init.h" +#include "./centipede/thread_pool.h" + +namespace fuzztest::internal { +namespace { + +TEST(ConcurrentBitSetTest, Set) { + constexpr size_t kSize = 1 << 18; + static ConcurrentBitSet bs(absl::kConstInit); + std::vector in_bits = {0, 1, 2, 100, 102, 1000000}; + std::vector expected_out_bits = {0, 1, 2, 100, 102, 1000000 % kSize}; + std::vector out_bits; + for (auto idx : in_bits) { + bs.set(idx); + } + bs.ForEachNonZeroBit([&](size_t idx) { out_bits.push_back(idx); }); + EXPECT_EQ(out_bits, expected_out_bits); + + bs.clear(); + out_bits.clear(); + bs.ForEachNonZeroBit([&](size_t idx) { out_bits.push_back(idx); }); + EXPECT_TRUE(out_bits.empty()); + bs.set(42); + bs.ForEachNonZeroBit([&](size_t idx) { out_bits.push_back(idx); }); + expected_out_bits = {42}; + EXPECT_EQ(out_bits, expected_out_bits); + // Check that all bits are now clear. + out_bits.clear(); + bs.ForEachNonZeroBit([&](size_t idx) { out_bits.push_back(idx); }); + EXPECT_TRUE(out_bits.empty()); +} + +TEST(ConcurrentBitSetTest, Get) { + constexpr size_t kSize = 1 << 18; + static ConcurrentBitSet bs(absl::kConstInit); + constexpr size_t kInBit1 = 134217728; + constexpr size_t kInBit2 = 134217732; + ASSERT_EQ(bs.get(kInBit1), 0); + ASSERT_EQ(bs.get(kInBit2), 0); + bs.set(kInBit1); + EXPECT_EQ(bs.get(kInBit1), 1); + EXPECT_EQ(bs.get(kInBit2), 0); +} + +// Tests `ConcurrentBitSet` from multiple threads. +TEST(ConcurrentBitSetTest, SetInConcurrentThreads) { + // 3 threads will each set one specific bit in a long loop. + // 4th thread will set another bit, just once. + // The set() function is lossy, i.e. it may fail to set the bit. + // If the value is set in a long loop, it will be set with a probability + // indistinguishable from one (at least this is my theory :). + // But the 4th thread that sets its bit once, may actually fail to do it. + // So, this test allows two outcomes (possible_bits3/possible_bits4). + // WARNING: `bs` must be static (see the class comment). + static ConcurrentBitSet<(1 << 18)> bs(absl::kConstInit); + static auto cb = [](size_t idx) { + for (size_t i = 0; i < 10000000; i++) { + bs.set(idx); + } + }; + { + ThreadPool pool{4}; + pool.Schedule([]() { cb(10); }); + pool.Schedule([]() { cb(11); }); + pool.Schedule([]() { cb(14); }); + pool.Schedule([]() { bs.set(15); }); + } + std::vector bits; + std::vector possible_bits3 = {10, 11, 14}; + std::vector possible_bits4 = {10, 11, 14, 15}; + bs.ForEachNonZeroBit([&bits](size_t idx) { bits.push_back(idx); }); + if (bits.size() == 3) { + EXPECT_EQ(bits, possible_bits3); + } else { + EXPECT_EQ(bits, possible_bits4); + } +} + +// Global ConcurrentBitSet with a absl::kConstInit CTOR. +static ConcurrentBitSet<(1 << 20)> large_concurrent_bitset(absl::kConstInit); +// Test a thread-local object. +static thread_local ConcurrentBitSet<(1 << 20)> large_tls_concurrent_bitset( + absl::kConstInit); + +TEST(ConcurrentBitSetTest, Large) { + for (auto *bs : {&large_concurrent_bitset, &large_tls_concurrent_bitset}) { + const std::vector in_bits = { + 0, 1, 2, 100, 102, 800, 10000, 20000, 30000, 500000, + }; + + for (size_t iter = 0; iter < 100000; ++iter) { + for (auto idx : in_bits) { + bs->set(idx); + } + std::vector out_bits; + bs->ForEachNonZeroBit([&](size_t idx) { out_bits.push_back(idx); }); + EXPECT_EQ(out_bits, in_bits); + } + } +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/concurrent_byteset.h b/src/third_party/fuzztest/dist/centipede/concurrent_byteset.h new file mode 100644 index 00000000000..19ffe1da3b4 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/concurrent_byteset.h @@ -0,0 +1,187 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This library defines the concepts "fuzzing feature" and "feature domain". +// It is used by Centipede, and it can be used by fuzz runners to +// define their features in a way most friendly to Centipede. +// Fuzz runners do not have to use this file nor to obey the rules defined here. +// But using this file and following its rules is the simplest way if you want +// Centipede to understand the details about the features generated by the +// runner. + +#ifndef THIRD_PARTY_CENTIPEDE_CONCURRENT_BYTESET_H_ +#define THIRD_PARTY_CENTIPEDE_CONCURRENT_BYTESET_H_ + +#include +#include +#include +#include + +// WARNING!!!: Be very careful with what STL headers or other dependencies you +// add here. This header needs to remain mostly bare-bones so that we can +// include it into runner. + +#include "absl/base/const_init.h" + +namespace fuzztest::internal { + +// TODO(kcc): replace the standalone ForEachNonZeroByte with code from here. +// TODO(kcc): ConcurrentByteSet is an unoptimized single-layer byte set. +// Implement multi-layer byte set(s). + +// A fixed-size byte set containing kSize bytes, kSize must be a multiple of 64. +// Set() can be called concurrently with another Set(), other uses should be +// synchronized externally. +// Intended usage is to call ForEachNonZeroByte() from one thread. +// +// IMPORTANT!!! Objects of this class should only be constructed with static +// storage duration. This is because the class has intentionally uninitialized +// direct and transitive data members that rely on static initialization in the +// compiled process image. +template +class ConcurrentByteSet { + public: + static constexpr size_t kSizeInBytes = kSize; + // kSize must be multiple of this. + static constexpr size_t kSizeMultiple = 64; + static_assert((kSize % kSizeMultiple) == 0); + + // Creates a ConcurrentByteSet with static storage duration. + explicit constexpr ConcurrentByteSet(absl::ConstInitType) {} + + // Clears the set. + void clear() { memset(bytes_, 0, sizeof(bytes_)); } + + // Sets element `idx` to `value`. `idx` must be <= kSize. + // Can be called concurrently. + void Set(size_t idx, uint8_t value) { + if (idx >= kSize) __builtin_trap(); + __atomic_store_n(&bytes_[idx], value, __ATOMIC_RELAXED); + } + + // Performs a saturated increment of element `idx`. + void SaturatedIncrement(size_t idx) { + if (idx >= kSize) __builtin_trap(); + uint8_t counter = __atomic_load_n(&bytes_[idx], __ATOMIC_RELAXED); + if (counter != 255) + __atomic_store_n(&bytes_[idx], counter + 1, __ATOMIC_RELAXED); + } + + // Calls `action(index, value)` for every {index,value} of a non-zero byte in + // the set, then sets all those bytes to zero. + // `from` and `to` set the range of elements to iterate, both must be + // multiples of kSizeMultiple. + void ForEachNonZeroByte(const std::function &action, + size_t from = 0, size_t to = kSize) { + using word_t = uintptr_t; + constexpr size_t kWordSize = sizeof(word_t); + if (from % kSizeMultiple) __builtin_trap(); + if (to % kSizeMultiple) __builtin_trap(); + if (to > kSize) __builtin_trap(); + // Iterate one word at a time. + for (uint8_t *ptr = &bytes_[from], *end = &bytes_[to]; ptr < end; + ptr += kWordSize) { + word_t word; + __builtin_memcpy(&word, ptr, kWordSize); + if (!word) continue; + __builtin_memset(ptr, 0, kWordSize); + // This loop assumes little-endianness. (Tests will break on big-endian). + for (size_t pos = 0; pos < kWordSize; pos++) { + uint8_t value = word >> (pos * CHAR_BIT); // lowest byte is taken. + if (value) action(ptr - &bytes_[0] + pos, value); + } + } + } + + private: + // No initializer for performance (`kSize` can be quite large). Relies on + // static initialization in the process image (see the class comment). + uint8_t bytes_[kSize] __attribute__((aligned(64))); +}; + +// Similar to ConcurrentByteSet, but consists of two layers, upper and lower. +// The size of the lower layer is a multiple of the size of the upper layer. +// Set() writes 1 to an element in the upper layer and then writes `value` to an +// element of the lower value. This allows ForEachNonZeroByte() to +// skip sub-regions of lower layer that were not written to. Otherwise, the +// interface and the behaviour is equivalent to ConcurrentByteSet. +template > +class LayeredConcurrentByteSet { + public: + static constexpr size_t kSizeInBytes = kSize; + static constexpr size_t kSizeMultiple = + Lower::kSizeMultiple * Upper::kSizeMultiple; + static_assert(kSize == Lower::kSizeInBytes); + + LayeredConcurrentByteSet() = default; + // Creates a LayeredConcurrentByteSet with static storage duration. + explicit constexpr LayeredConcurrentByteSet(absl::ConstInitType) + : upper_layer_(absl::kConstInit), lower_layer_(absl::kConstInit) {} + + void clear() { + upper_layer_.clear(); + lower_layer_.clear(); + } + + void Set(size_t idx, uint8_t value) { + if (idx >= kSize) __builtin_trap(); + upper_layer_.Set(idx / kLayerRatio, 1); + lower_layer_.Set(idx, value); + } + + void SaturatedIncrement(size_t idx) { + if (idx >= kSize) __builtin_trap(); + upper_layer_.Set(idx / kLayerRatio, 1); + lower_layer_.SaturatedIncrement(idx); + } + + void ForEachNonZeroByte(const std::function &action, + size_t from = 0, size_t to = kSize) { + if (to > kSize) __builtin_trap(); + if (from % kSizeMultiple) __builtin_trap(); + if (to % kSizeMultiple) __builtin_trap(); + size_t upper_from = from / kLayerRatio; + size_t upper_to = to / kLayerRatio; + upper_layer_.ForEachNonZeroByte( + [&](size_t idx, uint8_t value) { + size_t lower_from = idx * kLayerRatio; + size_t lower_to = lower_from + kLayerRatio; + lower_layer_.ForEachNonZeroByte(action, lower_from, lower_to); + }, + upper_from, upper_to); + } + + private: + Upper upper_layer_; + Lower lower_layer_; + static constexpr size_t kLayerRatio = + Lower::kSizeInBytes / Upper::kSizeInBytes; + static_assert((Lower::kSizeInBytes % Upper::kSizeInBytes) == 0); +}; + +// Two-layer ConcurrentByteSet() with upper layer 64x smaller than the lower. +template +class TwoLayerConcurrentByteSet + : public LayeredConcurrentByteSet> { + public: + // Creates a TwoLayerConcurrentByteSet with static storage duration. + explicit constexpr TwoLayerConcurrentByteSet(absl::ConstInitType) + : LayeredConcurrentByteSet>( + absl::kConstInit) {} +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_CONCURRENT_BYTESET_H_ diff --git a/src/third_party/fuzztest/dist/centipede/concurrent_byteset_test.cc b/src/third_party/fuzztest/dist/centipede/concurrent_byteset_test.cc new file mode 100644 index 00000000000..3d4277d0213 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/concurrent_byteset_test.cc @@ -0,0 +1,124 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/concurrent_byteset.h" + +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/base/const_init.h" +#include "./centipede/thread_pool.h" + +namespace fuzztest::internal { +namespace { + +TEST(ConcurrentByteSetTest, Basic) { + static ConcurrentByteSet<1024> bs(absl::kConstInit); + const std::vector> in = { + {0, 1}, {1, 42}, {2, 33}, {100, 15}, {102, 1}, {800, 66}}; + + for (const auto &idx_value : in) { + bs.Set(idx_value.first, idx_value.second); + } + + // Test ForEachNonZeroByte. + std::vector> out; + bs.ForEachNonZeroByte( + [&](size_t idx, uint8_t value) { out.emplace_back(idx, value); }); + EXPECT_EQ(out, in); + + // Now bs should be empty. + out.clear(); + bs.ForEachNonZeroByte( + [&](size_t idx, uint8_t value) { out.emplace_back(idx, value); }); + EXPECT_TRUE(out.empty()); + + // Test SaturatedIncrement. + for (const auto &idx_value : in) { + for (auto iter = 0; iter < idx_value.second; ++iter) { + bs.SaturatedIncrement(idx_value.first); + } + } + bs.ForEachNonZeroByte( + [&](size_t idx, uint8_t value) { out.emplace_back(idx, value); }); + EXPECT_EQ(out, in); +} + +// Test a thread_local object. +static thread_local TwoLayerConcurrentByteSet<(1 << 17)> two_layer_byte_set( + absl::kConstInit); + +TEST(ConcurrentByteSetTest, TwoLayer) { + auto &bs = two_layer_byte_set; + const std::vector> in = { + {0, 1}, {1, 42}, {2, 33}, {100, 15}, {102, 1}, {800, 66}}; + + for (const auto &idx_value : in) { + bs.Set(idx_value.first, idx_value.second); + } + + // Test ForEachNonZeroByte. + std::vector> out; + bs.ForEachNonZeroByte( + [&](size_t idx, uint8_t value) { out.emplace_back(idx, value); }); + EXPECT_EQ(out, in); + + // Now bs should be empty. + out.clear(); + bs.ForEachNonZeroByte( + [&](size_t idx, uint8_t value) { out.emplace_back(idx, value); }); + EXPECT_TRUE(out.empty()); + + // Test SaturatedIncrement. + for (const auto &idx_value : in) { + for (auto iter = 0; iter < idx_value.second; ++iter) { + bs.SaturatedIncrement(idx_value.first); + } + } + bs.ForEachNonZeroByte( + [&](size_t idx, uint8_t value) { out.emplace_back(idx, value); }); + EXPECT_EQ(out, in); +} + +// Tests TwoLayerConcurrentByteSet from multiple threads. +TEST(ConcurrentByteSetTest, TwoLayerConcurrentThreads) { + static TwoLayerConcurrentByteSet<(1 << 16)> bs(absl::kConstInit); + // 3 threads will each increment one specific byte in a long loop. + // 4th thread will increment another byte, just once. + static auto cb = [](size_t idx) { + for (size_t i = 0; i < 10000000; i++) { + bs.SaturatedIncrement(idx); + } + }; + { + ThreadPool threads{4}; + threads.Schedule([]() { cb(10); }); + threads.Schedule([]() { cb(11); }); + threads.Schedule([]() { cb(14); }); + threads.Schedule([]() { bs.SaturatedIncrement(15); }); + } // The threads join here. + const std::vector> expected = { + {10, 255}, {11, 255}, {14, 255}, {15, 1}}; + std::vector> out; + bs.ForEachNonZeroByte( + [&](size_t idx, uint8_t value) { out.emplace_back(idx, value); }); + EXPECT_EQ(out, expected); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/config_file.cc b/src/third_party/fuzztest/dist/centipede/config_file.cc new file mode 100644 index 00000000000..2b288f147db --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/config_file.cc @@ -0,0 +1,297 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/config_file.h" + +#include +#include // NOLINT +#include +#include +#include +#include +#include +#include +#include + +#include "absl/base/nullability.h" +#include "absl/flags/declare.h" +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl/flags/reflection.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/strings/match.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_replace.h" +#include "absl/strings/substitute.h" +#include "./centipede/config_init.h" +#include "./centipede/config_util.h" +#include "./centipede/util.h" +#include "./common/logging.h" +#include "./common/remote_file.h" + +// TODO(ussuri): Move these flags next to main() ASAP. They are here +// only temporarily to simplify the APIs and implementation in V1. + +ABSL_FLAG(std::string, config, "", + "Read flags from the specified file. The file can be either local or " + "remote. Relative paths are referenced from the CWD. The format " + "should be:\n" + "--flag=value\n" + "--another_flag=value\n" + "...\n" + "Lines that start with '#' or '//' are comments. Note that this " + "format is compatible with the built-in --flagfile flag (defined by " + "Abseil Flags library); however, unlike this flag, --flagfile " + "supports only local files.\n" + "Nested --load_config's won't work (but nested --flagfile's will," + "provided they point at a local file, e.g. $HOME/.centipede_rc).\n" + "The flag is position-sensitive: flags read from it override (or " + "append, in case of std::vector flags) any previous occurrences of " + "the same flags on the command line, and vice versa."); +ABSL_FLAG(std::string, save_config, "", + "Saves Centipede flags to the specified file and exits the program." + "The file can be either local or remote. Relative paths are " + "referenced from the CWD. Both the command-line flags and defaulted " + "flags are saved (the defaulted flags are commented out). The format " + "is:\n" + "# --flag's help string.\n" + "# --flag's default value.\n" + "--flag=value\n" + "...\n" + "This format can be parsed back by both --config and --flagfile. " + "Unlike those two flags, this flag is not position-sensitive and " + "always saves the final resolved config.\n" + "Special case: if the file's extension is .sh, a runnable shell " + "script is saved instead."); +ABSL_FLAG(bool, update_config, false, + "Must be used in combination with --config=. Writes the final " + "resolved config back to the same file."); +ABSL_FLAG(bool, print_config, false, + "Print the config to stderr upon starting Centipede."); + +// Declare --flagfile defined by the Abseil Flags library. The flag should point +// at a _local_ file is always automatically parsed by Abseil Flags. +ABSL_DECLARE_FLAG(std::vector, flagfile); + +#define DASHED_FLAG_NAME(name) "--" << FLAGS_##name.Name() + +namespace fuzztest::internal { + +AugmentedArgvWithCleanup::AugmentedArgvWithCleanup( + const std::vector& orig_argv, + const Replacements& flag_replacements, const Replacements& replacements, + BackingResourcesCleanup&& cleanup) + : was_augmented_{false}, cleanup_{cleanup} { + argv_.reserve(orig_argv.size()); + for (const auto& old_arg : orig_argv) { + const auto flag_replaced_arg = [&]() -> std::optional { + if (old_arg.empty() || old_arg[0] != '-') return std::nullopt; + std::string_view contents = old_arg; + std::string_view dashes = + (contents.size() > 1 && contents[1] == '-') ? "--" : "-"; + contents = contents.substr(dashes.size()); + for (const auto& flag_replacement : flag_replacements) { + if (absl::StartsWith(contents, flag_replacement.first) && + (contents.size() == flag_replacement.first.size() || + contents[flag_replacement.first.size()] == '=')) { + return absl::StrCat(dashes, flag_replacement.second, + contents.substr(flag_replacement.first.size())); + } + } + return std::nullopt; + }(); + const std::string& new_arg = argv_.emplace_back( + absl::StrReplaceAll(flag_replaced_arg.value_or(old_arg), replacements)); + if (new_arg != old_arg) { + VLOG(1) << "Augmented argv arg:\n" << VV(old_arg) << "\n" << VV(new_arg); + was_augmented_ = true; + } + } +} + +AugmentedArgvWithCleanup::AugmentedArgvWithCleanup( + AugmentedArgvWithCleanup&& rhs) noexcept { + *this = std::move(rhs); +} + +AugmentedArgvWithCleanup& AugmentedArgvWithCleanup::operator=( + AugmentedArgvWithCleanup&& rhs) noexcept { + argv_ = std::move(rhs.argv_); + was_augmented_ = rhs.was_augmented_; + cleanup_ = std::move(rhs.cleanup_); + // Prevent rhs from calling the cleanup in dtor (moving an std::function + // leaves the moved object in a valid, but undefined, state). + rhs.cleanup_ = {}; + return *this; +} + +AugmentedArgvWithCleanup::~AugmentedArgvWithCleanup() { + if (cleanup_) cleanup_(); +} + +AugmentedArgvWithCleanup LocalizeConfigFilesInArgv( + const std::vector& argv) { + const std::filesystem::path path = absl::GetFlag(FLAGS_config); + + if (!path.empty()) { + CHECK_NE(path, absl::GetFlag(FLAGS_save_config)) + << "To update config in place, use " << DASHED_FLAG_NAME(update_config); + } + + // Always need these (--config= can be passed with a local ). + const AugmentedArgvWithCleanup::Replacements flag_replacements = { + {std::string{FLAGS_config.Name()}, std::string{FLAGS_flagfile.Name()}}, + }; + AugmentedArgvWithCleanup::Replacements replacements; + AugmentedArgvWithCleanup::BackingResourcesCleanup cleanup; + + // Copy the remote config file to a temporary local mirror. + if (!path.empty() && !std::filesystem::exists(path)) { // assume remote + // Read the remote file. + std::string contents; + CHECK_OK(RemoteFileGetContents(path.c_str(), contents)); + + // Save a temporary local copy. + const std::filesystem::path tmp_dir = TemporaryLocalDirPath(); + const std::filesystem::path local_path = tmp_dir / path.filename(); + LOG(INFO) << "Localizing remote config: " << VV(path) << VV(local_path); + // NOTE: Ignore "Remote" in the API names here: the paths are always local. + CHECK_OK(RemoteMkdir(tmp_dir.c_str())); + CHECK_OK(RemoteFileSetContents(local_path.c_str(), contents)); + + // Augment the argv to point at the local copy and ensure it is cleaned up. + replacements.emplace_back(path.c_str(), local_path.c_str()); + cleanup = [local_path]() { std::filesystem::remove(local_path); }; + } + + return AugmentedArgvWithCleanup{argv, flag_replacements, replacements, + std::move(cleanup)}; +} + +std::filesystem::path MaybeSaveConfigToFile( + const std::vector& leftover_argv) { + std::filesystem::path path; + + // Initialize `path` if --save_config or --update_config is passed. + if (!absl::GetFlag(FLAGS_save_config).empty()) { + path = absl::GetFlag(FLAGS_save_config); + CHECK_NE(path, absl::GetFlag(FLAGS_config)) + << "To update config in place, use " << DASHED_FLAG_NAME(update_config); + CHECK(!absl::GetFlag(FLAGS_update_config)) + << DASHED_FLAG_NAME(save_config) << " and " + << DASHED_FLAG_NAME(update_config) << " are mutually exclusive"; + } else if (absl::GetFlag(FLAGS_update_config)) { + path = absl::GetFlag(FLAGS_config); + CHECK(!path.empty()) << DASHED_FLAG_NAME(update_config) + << " must be used in combination with " + << DASHED_FLAG_NAME(config); + } + + // Save or update the config file. + if (!path.empty()) { + const std::set excluded_flags = { + FLAGS_config.Name(), + FLAGS_save_config.Name(), + FLAGS_update_config.Name(), + FLAGS_print_config.Name(), + }; + const FlagInfosPerSource flags = + GetFlagsPerSource("centipede", excluded_flags); + const std::string flags_str = FormatFlagfileString( + flags, DefaultedFlags::kCommentedOut, FlagComments::kHelpAndDefault); + std::string file_contents; + if (path.extension() == ".sh") { + // NOTES: 1) The first element of `leftover_argv` is expected to be the + // /path/to/centipede, so the $1 in the stub will run it. + // 2) absl::Substitute() replaces the escaped $$ with a $. + constexpr std::string_view kScriptStub = + R"(#!/bin/bash -eu + +declare -ra flags=( +$0) + +if [[ -n "$1" ]]; then + wd=$1 +else + wd=$$PWD +fi +read -e -p "Clear workdir (which is '$$wd') [y/N]? " yn +# Tip: To default to 'y', change 'yY' to 'nN' below. +if [[ "$${yn}" =~ [yY] ]]; then + rm -rf "$$wd"/corpus* "$$wd"/*report*.txt "$$wd"/*/features* +fi + +set -x +$2 "$${flags[@]}" +)"; + const auto workdir = absl::GetAllFlags()["workdir"]->CurrentValue(); + const auto argv_str = absl::StrJoin(leftover_argv, " "); + file_contents = + absl::Substitute(kScriptStub, flags_str, workdir, argv_str); + } else { + file_contents = flags_str; + } + CHECK_OK(RemoteFileSetContents(path.c_str(), file_contents)); + } + + return path; +} + +std::unique_ptr InitCentipede( // + int argc, char** absl_nonnull argv) { + std::vector leftover_argv; + + // main_runtime_init() is allowed to remove recognized flags from `argv`, so + // we need a copy. + const std::vector saved_argv = CastArgv(argc, argv); + + // Among other things, this performs the initial command line parsing. + std::unique_ptr runtime_state = InitRuntime(argc, argv); + + // If --config= was passed, replace it with the Abseil Flags' built-in + // --flagfile= and reparse the command line. NOTE: It would be + // incorrect to just parse the contents of , because --config (and + // --flagfile for that matter) are position-sensitive, i.e. they may override + // flags that come before on the command line, and vice versa. + const AugmentedArgvWithCleanup localized_argv = + LocalizeConfigFilesInArgv(saved_argv); + if (localized_argv.was_augmented()) { + LOG(INFO) << "Command line was augmented; reparsing"; + runtime_state->leftover_argv() = CastArgv(absl::ParseCommandLine( + localized_argv.argc(), CastArgv(localized_argv.argv()).data())); + } + + // Log the final resolved config. + if (absl::GetFlag(FLAGS_print_config)) { + const FlagInfosPerSource flags = GetFlagsPerSource("centipede"); + const std::string flags_str = FormatFlagfileString( + flags, DefaultedFlags::kCommentedOut, FlagComments::kNone); + LOG(INFO) << "Final resolved config:\n" << flags_str; + } + + // If --save_config was passed, save the final resolved flags to the requested + // file and exit the program. + const auto path = MaybeSaveConfigToFile(leftover_argv); + if (!path.empty()) { + LOG(INFO) << "Config written to file: " << VV(path); + LOG(INFO) << "Nothing left to do; exiting"; + exit(EXIT_SUCCESS); + } + + return runtime_state; +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/config_file.h b/src/third_party/fuzztest/dist/centipede/config_file.h new file mode 100644 index 00000000000..eba095d8b76 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/config_file.h @@ -0,0 +1,114 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_GOOGLE_CONFIG_FILE_H_ +#define THIRD_PARTY_CENTIPEDE_GOOGLE_CONFIG_FILE_H_ + +#include // NOLINT +#include +#include +#include +#include +#include + +#include "absl/base/nullability.h" +#include "./centipede/config_init.h" + +// TODO(ussuri): Move implementation-only functions to .cc. + +namespace fuzztest::internal { + +// Constructs an augmented copy of `argv` with any substrings appearing in the +// original elements replaced according to a list replacements. +// TODO(ussuri): Make more robust. What we really want is replace any possible +// form of --flag=value with an equivalent form of --new_flag=new_value. +// TODO(ussuri): Remove and just use the required bits of logic in .cc. +class AugmentedArgvWithCleanup final { + public: + using Replacements = std::vector>; + using BackingResourcesCleanup = std::function; + + // Ctor. The `orig_argc` and `orig_argv` are compatible with those passed to a + // main(). Each item in `orig_argv` is first processed with + // `flag_replacements` if the item has the format "-flag", "-flag=...", + // "--flag", or "--flag=", and the flag name matches. Then the `replacements` + // map should map an old substring to a new one. Only simple, one-stage string + // replacement is performed: no regexes, placeholders, envvars or recursion. + // The `cleanup` callback should clean up any temporary resources backing the + // modified flags, such as temporary files. + AugmentedArgvWithCleanup(const std::vector& orig_argv, + const Replacements& flag_replacements, + const Replacements& replacements, + BackingResourcesCleanup&& cleanup); + // Dtor. Invokes `cleanup_`. + ~AugmentedArgvWithCleanup(); + + // Movable by not copyable to prevent `cleanup_` from running twice. + AugmentedArgvWithCleanup(const AugmentedArgvWithCleanup&) = delete; + AugmentedArgvWithCleanup& operator=(const AugmentedArgvWithCleanup&) = delete; + AugmentedArgvWithCleanup(AugmentedArgvWithCleanup&&) noexcept; + AugmentedArgvWithCleanup& operator=(AugmentedArgvWithCleanup&&) noexcept; + + // The new argc. Currently, will always match the original argc. + int argc() const { return static_cast(argv_.size()); } + // The new, possibly augmented argv. Note that all its char* elements are + // backed by newly allocated std::strings, so they will all be different from + // their counterparts in the original argv. + const std::vector& argv() const { return argv_; } + // Whether the original argv has been augmented from the original, i.e. if any + // of the requested string replacements actually occurred. + bool was_augmented() const { return was_augmented_; } + + private: + std::vector argv_; + bool was_augmented_; + BackingResourcesCleanup cleanup_; +}; + +// Replaces any --config= in `argv` (or any alternative form of +// that flag) with a --flagfile=, where +// localization means that a remote is copied to a temporary local +// mirror. If is already local, it is used as-is. +// +// The remote file contents is additionally checked for possible nested +// --config, --save_config and --flagfile: such usage is currently unsupported. +// +// The returned AugmentedArgvWithCleanup deletes the localized files (if any) in +// dtor. +AugmentedArgvWithCleanup LocalizeConfigFilesInArgv( + const std::vector& argv); + +// If --save_config= was passed on the command line, saves _all_ +// Centipede flags (i.e. those specified on the command line AND the defaulted +// ones) to in the format compatible with --config (defined by +// Centipede), as well as --flagfile (defined by Abseil Flags), and returns +// . Otherwise, returns an empty string. If the 's extension is .sh, +// saves a runnable script instead. +std::filesystem::path MaybeSaveConfigToFile( + const std::vector& leftover_argv); + +// Initializes Centipede: +// - Calls `InitRuntime()` at the right time to initialize the runtime +// subsystems and perform the initial flag parsing. +// - Handles config-related flags: loads the config from --config, if any, +// and saves it to --save_config (or --update_config), if any. +// - Logs the final resolved config. +// - Returns the runtime state that the caller should take ownership of and +// keep alive the duration of the process. +[[nodiscard]] std::unique_ptr InitCentipede( + int argc, char** absl_nonnull argv); + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_GOOGLE_CONFIG_FILE_H_ diff --git a/src/third_party/fuzztest/dist/centipede/config_file_test.cc b/src/third_party/fuzztest/dist/centipede/config_file_test.cc new file mode 100644 index 00000000000..911a32ec058 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/config_file_test.cc @@ -0,0 +1,91 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/config_file.h" + +#include +#include + +#include "gtest/gtest.h" + +namespace fuzztest::internal { +namespace { + +TEST(ConfigFileTest, AugmentedArgv) { + const std::vector kOrigArgv = { + "--foo=bar", "-baz", "-bazz", "baz", "qux", + }; + + // None of the replacements match. + { + const AugmentedArgvWithCleanup augmented_argv{ + kOrigArgv, + /*flag_replacements=*/{}, + /*replacements=*/ + { + {"mismatching", "mod_mismatching"}, + }, + nullptr}; + EXPECT_FALSE(augmented_argv.was_augmented()); + EXPECT_EQ(augmented_argv.argv()[0], kOrigArgv[0]); + EXPECT_EQ(augmented_argv.argv()[1], kOrigArgv[1]); + EXPECT_EQ(augmented_argv.argv()[2], kOrigArgv[2]); + EXPECT_EQ(augmented_argv.argv()[3], kOrigArgv[3]); + EXPECT_EQ(augmented_argv.argv()[4], kOrigArgv[4]); + } + + // The replacements match and the cleanup runs as a result. + { + bool cleanup_worked = false; + { + const AugmentedArgvWithCleanup augmented_argv{ + kOrigArgv, + /*flag_replacements=*/ + { + {"foo", "mod_foo"}, + {"baz", "mod_baz"}, + }, + /*replacements=*/ + { + {"bar", "mod_bar"}, + {"qux", "mod_qux"}, + }, + [&cleanup_worked]() { cleanup_worked = true; }}; + const std::vector kExpectedArgv = { + "--mod_foo=mod_bar", + "-mod_baz", + // Flag replacement should skip this item because the flag name + // does not match as a whole. + "-bazz", + // Flag replacement should skip this item because it's not a flag. + "baz", + "mod_qux", + }; + EXPECT_TRUE(augmented_argv.was_augmented()); + EXPECT_EQ(augmented_argv.argv()[0], kExpectedArgv[0]); + EXPECT_EQ(augmented_argv.argv()[1], kExpectedArgv[1]); + EXPECT_EQ(augmented_argv.argv()[2], kExpectedArgv[2]); + EXPECT_EQ(augmented_argv.argv()[3], kExpectedArgv[3]); + EXPECT_EQ(augmented_argv.argv()[4], kExpectedArgv[4]); + } + EXPECT_TRUE(cleanup_worked); + } +} + +// TODO(ussuri): The rest of the module is tested by calling Centipede with +// the new flags in centipede_main_cns_test.sh. Consider adding proper C++ +// tests here too. + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/config_init.cc b/src/third_party/fuzztest/dist/centipede/config_init.cc new file mode 100644 index 00000000000..5c5c5554887 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/config_init.cc @@ -0,0 +1,58 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/config_init.h" + +#include +#include +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/log_severity.h" +#include "absl/flags/parse.h" +#include "absl/flags/usage_config.h" +#include "absl/log/globals.h" +#include "absl/log/initialize.h" +#include "absl/strings/match.h" +#include "./centipede/config_util.h" + +namespace fuzztest::internal { + +RuntimeState::RuntimeState(std::vector leftover_argv) + : leftover_argv_(std::move(leftover_argv)) {} + +ABSL_ATTRIBUTE_WEAK std::unique_ptr InitRuntime(int argc, + char* argv[]) { + // NB: The invocation order below is very important. Do not change. + // Make `LOG(INFO)` to go to stderr by default. Note that an explicit + // `--stderrthreshold=N` on the command line will override this. + absl::SetStderrThreshold(absl::LogSeverityAtLeast::kInfo); + // Make --help print any flags defined by any Centipede source. + absl::FlagsUsageConfig usage_config; + usage_config.contains_help_flags = [](std::string_view filename) { + return absl::StrContains(filename, "centipede"); + }; + absl::SetFlagsUsageConfig(usage_config); + // Parse the known flags from the command line. + std::vector leftover_argv = + CastArgv(absl::ParseCommandLine(argc, argv)); + // Initialize the logging system using the just-parsed log-related flags. + absl::InitializeLog(); + + return std::make_unique(leftover_argv); +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/config_init.h b/src/third_party/fuzztest/dist/centipede/config_init.h new file mode 100644 index 00000000000..740d2ccb96d --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/config_init.h @@ -0,0 +1,56 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_GOOGLE_CONFIG_INIT_H_ +#define THIRD_PARTY_CENTIPEDE_GOOGLE_CONFIG_INIT_H_ + +#include +#include +#include + +namespace fuzztest::internal { + +// The runtime state returned by `InitRuntime()`. The caller should take over +// the ownership of this and keep it alive for the duration of the process. +class [[nodiscard]] RuntimeState { + public: + explicit RuntimeState(std::vector leftover_argv); + virtual ~RuntimeState() = default; + + // Not copyable nor movable for simplicity and maximum extensibility. + RuntimeState(const RuntimeState&) = delete; + RuntimeState& operator=(const RuntimeState&) = delete; + RuntimeState(RuntimeState&&) = delete; + RuntimeState& operator=(RuntimeState&&) = delete; + + auto leftover_argv() const { return leftover_argv_; } + auto& leftover_argv() { return leftover_argv_; } + + private: + std::vector leftover_argv_; +}; + +// * Initializes the relevant runtime subsystems in the correct order. +// * Directs all `LOG(INFO)`s to also to stderr (by default, only `LOG(ERROR)`s +// and higher go to stderr). +// * Tweaks --help behavior to print any flags defined by any Centipede source +// (by default, --help only prints flags defined in the source named +// .cc or .cc). +// * Returns the runtime state, which the client should keep alive for the +// duration of the process. +[[nodiscard]] std::unique_ptr InitRuntime(int argc, char* argv[]); + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_GOOGLE_CONFIG_INIT_H_ diff --git a/src/third_party/fuzztest/dist/centipede/config_util.cc b/src/third_party/fuzztest/dist/centipede/config_util.cc new file mode 100644 index 00000000000..1773593cfbf --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/config_util.cc @@ -0,0 +1,109 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/config_util.h" + +#include +#include +#include +#include + +#include "absl/flags/reflection.h" +#include "absl/strings/match.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_replace.h" +#include "absl/strings/substitute.h" + +namespace fuzztest::internal { + +std::vector CastArgv(const std::vector& argv) { + std::vector ret_argv; + ret_argv.reserve(argv.size()); + for (const auto& arg : argv) { + ret_argv.push_back(const_cast(arg.c_str())); + } + return ret_argv; +} + +std::vector CastArgv(const std::vector& argv) { + return {argv.cbegin(), argv.cend()}; +} + +std::vector CastArgv(int argc, char** argv) { + return {argv, argv + argc}; +} + +FlagInfosPerSource GetFlagsPerSource( + std::string_view source_fragment, + const std::set& exclude_flags) { + FlagInfosPerSource flags_per_source; + for (const auto& [name, flag] : absl::GetAllFlags()) { + if (absl::StrContains(flag->Filename(), source_fragment) && + exclude_flags.find(name) == exclude_flags.cend()) { + flags_per_source[flag->Filename()].emplace(FlagInfo{ + name, flag->CurrentValue(), flag->DefaultValue(), flag->Help()}); + } + } + return flags_per_source; +} + +std::string FormatFlagfileString(const FlagInfosPerSource& flags, + DefaultedFlags defaulted, + FlagComments comments) { + std::vector lines; + lines.reserve(flags.size()); // this many files + + if (defaulted == DefaultedFlags::kIncluded) { + lines.emplace_back("# NOTE: Explicit and defaulted flags are included"); + } else if (defaulted == DefaultedFlags::kExcluded) { + lines.emplace_back("# NOTE: Defaulted flags are excluded"); + } else if (defaulted == DefaultedFlags::kCommentedOut) { + lines.emplace_back("# NOTE: Defaulted flags are commented out"); + } + lines.emplace_back(); + + for (const auto& [filename, flag_infos] : flags) { + lines.emplace_back(absl::Substitute("# Flags from $0:", filename)); + for (const auto& [name, value, default_value, help] : flag_infos) { + if (defaulted == DefaultedFlags::kExcluded && value == default_value) { + continue; + } + if (comments == FlagComments::kHelpAndDefault) { + const std::string prepped_help = + absl::StrReplaceAll(help, {{"\n", " "}}); + lines.emplace_back(absl::Substitute(" # $0", prepped_help)); + } + if (comments == FlagComments::kDefault || + comments == FlagComments::kHelpAndDefault) { + lines.emplace_back( + absl::Substitute(" # default: '$0'", default_value)); + } + if (defaulted == DefaultedFlags::kCommentedOut && + value == default_value) { + lines.emplace_back(absl::Substitute(" # --$0=$1", name, value)); + } else { + lines.emplace_back(absl::Substitute(" --$0=$1", name, value)); + } + if (comments == FlagComments::kDefault || + comments == FlagComments::kHelpAndDefault) { + lines.emplace_back(); + } + } + if (!lines.back().empty()) lines.emplace_back(); + } + + return absl::StrJoin(lines, "\n"); +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/config_util.h b/src/third_party/fuzztest/dist/centipede/config_util.h new file mode 100644 index 00000000000..3e13729aa63 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/config_util.h @@ -0,0 +1,100 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_CONFIG_UTIL_H_ +#define THIRD_PARTY_CENTIPEDE_CONFIG_UTIL_H_ + +#include +#include +#include +#include +#include + +namespace fuzztest::internal { + +// A set of overloads to cast argv between vector and main()-compatible +// vector or argc/argv pair in both directions. The result can be used +// like this: +// AugmentedArgvWithCleanup new_argv{CastArgv(argc, argv), ...}; +// std::vector leftover_argv = +// CastArgv(absl::ParseCommandLine( +// new_argv.argc(), CastArgv(new_argv.argv()).data()); +std::vector CastArgv(int argc, char** argv); +std::vector CastArgv(const std::vector& argv); +// WARNING: Beware of the lifetimes. The returned vector referenced the +// passed `argv`, so `argv` must outlive it. +std::vector CastArgv(const std::vector& argv); + +// Types returned from GetFlagsPerSource(). +struct FlagInfo { + const std::string_view name; + const std::string value; + const std::string default_value; + const std::string help; + + friend bool operator<(const FlagInfo& x, const FlagInfo& y) { + return x.name < y.name; + } +}; +using FlagInfosPerSource = + std::map>; + +// Returns a per-source map of all compiled-in flags defined by sources whose +// relative workspace paths contain `source_fragment`. An empty +// `source_fragment` returns flags from all sources. +FlagInfosPerSource GetFlagsPerSource( + std::string_view source_fragment = "", + const std::set& exclude_flags = {}); + +// Returns a string with newline-separated --flag=value tokens for all +// compiled-in flags defined by sources whose relative workspace paths start +// with `source_prefix`. An empty `source_prefix` returns flags from all +// sources. Flag names in `exclude_flags` are excluded from the result. +// +// The flags are grouped by the source filename, and sorted within each group. +// +// +// # Flags from centipede/environment.cc: +// +// --binary="unicorn_x86_64_sancov" +// # --rss_limit_mb="4096" +// --use_pc_features="true" +// +// # Flags from third_party/absl/log/flags.cc: +// +// --alsologtostderr="true" +// # --log_backtrace_at="" +// +// (See config_util_test.cc for more examples of the output). +// +// The returned value is compatible with the standard Abseil's --flagfile flag +// and its remote-enabled Centipede's equivalents --config and --save_config. +enum class DefaultedFlags { + kIncluded = 0, // Include flags with value == default. + kExcluded = 1, // Exclude flags with value == default. + kCommentedOut = 2, // Comment out flags with value == default. +}; +enum class FlagComments { + kNone = 0, // Do not add any comments. + kDefault = 1, // Add a comment with the flag's default. + kHelpAndDefault = 2, // Add a comment with the flag's help and default. +}; +std::string FormatFlagfileString( + const FlagInfosPerSource& flags, + DefaultedFlags defaulted = DefaultedFlags::kIncluded, + FlagComments comments = FlagComments::kNone); + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_CONFIG_UTIL_H_ diff --git a/src/third_party/fuzztest/dist/centipede/config_util_test.cc b/src/third_party/fuzztest/dist/centipede/config_util_test.cc new file mode 100644 index 00000000000..19b9f9f4ca3 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/config_util_test.cc @@ -0,0 +1,245 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/config_util.h" + +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/flags/flag.h" +#include "./centipede/environment_flags.h" +#include "./common/logging.h" + +// Dummy flags for testing. +ABSL_FLAG(std::string, foo, "bar", "foo help"); +ABSL_FLAG(bool, qux, false, "qux help"); + +namespace fuzztest::internal { + +// NOTE: Has to be outside the anonymous namespace. +bool operator==(const FlagInfo& tested, const FlagInfo& expected) { + return tested.name == expected.name && + (expected.value == "*" || tested.value == expected.value) && + (expected.default_value == "*" || + tested.default_value == expected.default_value) && + (expected.help == "*" || tested.help == expected.help); +} + +namespace { + +using ::testing::ElementsAreArray; +using ::testing::IsSupersetOf; + +TEST(FlagUtilTest, GetFlagsPerSource) { + constexpr const char* kCentipedeRoot = "centipede/"; + constexpr const char* kThisCc = "centipede/config_util_test.cc"; + constexpr const char* kCentipedeFlagsInc = + "././centipede/centipede_flags.inc"; + + // Change some flag values to non-defaults. + absl::SetFlag(&FLAGS_foo, "baz"); + absl::SetFlag(&FLAGS_qux, true); + // Create a dummy Environment to touch its flags and prevent them from being + // optimized out. + [[maybe_unused]] auto dummy_env = CreateEnvironmentFromFlags(); + + // All centipede/ modules. + { + const FlagInfosPerSource flags = GetFlagsPerSource(kCentipedeRoot); + SCOPED_TRACE(FormatFlagfileString(flags)); + ASSERT_EQ(flags.count(kThisCc), 1); + ASSERT_EQ(flags.count(kCentipedeFlagsInc), 1); + ASSERT_THAT(flags.at(kThisCc), + ElementsAreArray({ + FlagInfo{"foo", "baz", "bar", "foo help"}, + FlagInfo{"qux", "true", "false", "qux help"}, + })); + ASSERT_THAT(flags.at(kCentipedeFlagsInc), + IsSupersetOf({ + FlagInfo{"binary", "*", "*", "*"}, + FlagInfo{"workdir", "*", "*", "*"}, + })); + } + // Just this file. + { + const FlagInfosPerSource flags = GetFlagsPerSource(kThisCc); + SCOPED_TRACE(FormatFlagfileString(flags)); + ASSERT_EQ(flags.count(kThisCc), 1); + ASSERT_EQ(flags.count(kCentipedeFlagsInc), 0); + ASSERT_THAT(flags.at(kThisCc), + ElementsAreArray({ + FlagInfo{"foo", "baz", "bar", "foo help"}, + FlagInfo{"qux", "true", "false", "qux help"}, + })); + } + // Just this file with one flag excluded. + { + const FlagInfosPerSource flags = + GetFlagsPerSource(kThisCc, /*exclude_flags=*/{"qux"}); + SCOPED_TRACE(FormatFlagfileString(flags)); + ASSERT_EQ(flags.count(kThisCc), 1); + ASSERT_EQ(flags.count(kCentipedeFlagsInc), 0); + ASSERT_THAT(flags.at(kThisCc), + ElementsAreArray({ + FlagInfo{"foo", "baz", "bar", "foo help"}, + })); + } +} + +TEST(FlagUtilTest, FormatFlagfileString) { + // NOTE: Everything is intentionally unsorted: the result is expected to be + // sorted by file, then by flag name. + const FlagInfosPerSource kFlags = { + {"bob.cc", + { + FlagInfo{"bob_x", "bob_x def", "bob_x def", "bob_x help"}, + FlagInfo{"bob_y", "bob_y val", "bob_y def", "bob_y help"}, + }}, + {"alice.cc", + { + FlagInfo{"alice_x", "alice_x val", "alice_x def", "alice_x help"}, + FlagInfo{"alice_y", "alice_y val", "alice_y def", "alice_y help"}, + FlagInfo{"alice_z", "alice_z def", "alice_z def", "alice_z help"}, + }}, + }; + + struct TestCase { + DefaultedFlags defaulted; + FlagComments comments; + std::string_view expected_flagfile_string; + }; + + TestCase kTestCases[] = { + {DefaultedFlags::kExcluded, FlagComments::kNone, + R"(# NOTE: Defaulted flags are excluded + +# Flags from alice.cc: + --alice_x=alice_x val + --alice_y=alice_y val + +# Flags from bob.cc: + --bob_y=bob_y val +)"}, + {DefaultedFlags::kIncluded, FlagComments::kNone, + R"(# NOTE: Explicit and defaulted flags are included + +# Flags from alice.cc: + --alice_x=alice_x val + --alice_y=alice_y val + --alice_z=alice_z def + +# Flags from bob.cc: + --bob_x=bob_x def + --bob_y=bob_y val +)"}, + {DefaultedFlags::kCommentedOut, FlagComments::kNone, + R"(# NOTE: Defaulted flags are commented out + +# Flags from alice.cc: + --alice_x=alice_x val + --alice_y=alice_y val + # --alice_z=alice_z def + +# Flags from bob.cc: + # --bob_x=bob_x def + --bob_y=bob_y val +)"}, + {DefaultedFlags::kIncluded, FlagComments::kDefault, + R"(# NOTE: Explicit and defaulted flags are included + +# Flags from alice.cc: + # default: 'alice_x def' + --alice_x=alice_x val + + # default: 'alice_y def' + --alice_y=alice_y val + + # default: 'alice_z def' + --alice_z=alice_z def + +# Flags from bob.cc: + # default: 'bob_x def' + --bob_x=bob_x def + + # default: 'bob_y def' + --bob_y=bob_y val +)"}, + {DefaultedFlags::kIncluded, FlagComments::kHelpAndDefault, + R"(# NOTE: Explicit and defaulted flags are included + +# Flags from alice.cc: + # alice_x help + # default: 'alice_x def' + --alice_x=alice_x val + + # alice_y help + # default: 'alice_y def' + --alice_y=alice_y val + + # alice_z help + # default: 'alice_z def' + --alice_z=alice_z def + +# Flags from bob.cc: + # bob_x help + # default: 'bob_x def' + --bob_x=bob_x def + + # bob_y help + # default: 'bob_y def' + --bob_y=bob_y val +)"}, + {DefaultedFlags::kCommentedOut, FlagComments::kHelpAndDefault, + R"(# NOTE: Defaulted flags are commented out + +# Flags from alice.cc: + # alice_x help + # default: 'alice_x def' + --alice_x=alice_x val + + # alice_y help + # default: 'alice_y def' + --alice_y=alice_y val + + # alice_z help + # default: 'alice_z def' + # --alice_z=alice_z def + +# Flags from bob.cc: + # bob_x help + # default: 'bob_x def' + # --bob_x=bob_x def + + # bob_y help + # default: 'bob_y def' + --bob_y=bob_y val +)"}, + }; + + for (const auto& test_case : kTestCases) { + const std::string flagfile_string = + FormatFlagfileString(kFlags, test_case.defaulted, test_case.comments); + EXPECT_EQ(flagfile_string, test_case.expected_flagfile_string) + << "\n--------\n" + << VV(flagfile_string) << "--------\n" + << VV(test_case.expected_flagfile_string) << "--------\n" + << VV(static_cast(test_case.defaulted)) + << VV(static_cast(test_case.comments)); + } +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/control_flow.cc b/src/third_party/fuzztest/dist/centipede/control_flow.cc new file mode 100644 index 00000000000..68c443ee0cd --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/control_flow.cc @@ -0,0 +1,235 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/control_flow.h" + +#include +#include +#include +#include // NOLINT +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/strings/match.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "./centipede/command.h" +#include "./centipede/pc_info.h" +#include "./centipede/util.h" +#include "./common/defs.h" +#include "./common/logging.h" +#include "./common/remote_file.h" + +namespace fuzztest::internal { + +PCTable ReadPcTableFromFile(std::string_view file_path) { + ByteArray pc_infos_as_bytes; + ReadFromLocalFile(file_path, pc_infos_as_bytes); + CHECK_EQ(pc_infos_as_bytes.size() % sizeof(PCInfo), 0); + size_t pc_table_size = pc_infos_as_bytes.size() / sizeof(PCInfo); + const auto *pc_infos = reinterpret_cast(pc_infos_as_bytes.data()); + PCTable pc_table{pc_infos, pc_infos + pc_table_size}; + CHECK_EQ(pc_table.size(), pc_table_size); + return pc_table; +} + +PCTable GetPcTableFromBinaryWithTracePC(std::string_view binary_path, + std::string_view objdump_path, + std::string_view tmp_path) { + const std::string stderr_path = absl::StrCat(tmp_path, ".log"); + Command::Options cmd_options; + cmd_options.args = {"-d", std::string(binary_path)}; + cmd_options.stdout_file = std::string(tmp_path); + cmd_options.stderr_file = stderr_path; + Command cmd{objdump_path, std::move(cmd_options)}; + int exit_code = cmd.Execute(); + if (exit_code != EXIT_SUCCESS) { + std::string log_text; + ReadFromLocalFile(stderr_path, log_text); + LOG(ERROR) << "Failed to use objdump to get PC table; stderr is:"; + for (const auto &line : absl::StrSplit(log_text, '\n')) { + LOG(ERROR).NoPrefix() << line; + } + std::filesystem::remove(tmp_path); + std::filesystem::remove(stderr_path); + return {}; + } + std::filesystem::remove(stderr_path); + PCTable pc_table; + std::ifstream in(std::string{tmp_path}); + CHECK(in.good()) << VV(tmp_path); + bool saw_new_function = false; + + // Read the objdump output, find lines that start a function + // and lines that have a call to __sanitizer_cov_trace_pc. + // Reconstruct the PCTable from those. + for (std::string line; std::getline(in, line);) { + if (absl::EndsWith(line, ">:")) { // new function. + saw_new_function = true; + continue; + } + // On MacOS there is an extra underscope before the symbols, so not sealing + // the symbol with `<`. + if (!absl::EndsWith(line, "__sanitizer_cov_trace_pc>") && + !absl::EndsWith(line, "__sanitizer_cov_trace_pc@plt>")) + continue; + uintptr_t pc = std::stoul(line, nullptr, 16); + uintptr_t flags = saw_new_function ? PCInfo::kFuncEntry : 0; + saw_new_function = false; // next trace_pc will be in the same function. + pc_table.push_back({pc, flags}); + } + std::filesystem::remove(tmp_path); + return pc_table; +} + +CFTable ReadCfTable(std::istream &in) { + const std::string input_string(std::istreambuf_iterator(in), {}); + const ByteArray cf_table_as_bytes(input_string.begin(), input_string.end()); + CHECK_EQ(cf_table_as_bytes.size() % sizeof(CFTable::value_type), 0); + const size_t cf_table_size = + cf_table_as_bytes.size() / sizeof(CFTable::value_type); + const auto *cf_entries = + reinterpret_cast(cf_table_as_bytes.data()); + return CFTable{cf_entries, cf_entries + cf_table_size}; +} + +CFTable ReadCfTable(std::string_view file_path) { + std::string cf_table_contents; + CHECK_OK(RemoteFileGetContents(file_path, cf_table_contents)); + std::istringstream cf_table_stream(cf_table_contents); + return ReadCfTable(cf_table_stream); +} + +void WriteCfTable(const CFTable &cf_table, std::ostream &out) { + out.write(reinterpret_cast(cf_table.data()), + sizeof(CFTable::value_type) * cf_table.size()); +} + +DsoTable ReadDsoTableFromFile(std::string_view file_path) { + DsoTable result; + std::string data; + ReadFromLocalFile(file_path, data); + for (const auto &line : absl::StrSplit(data, '\n', absl::SkipEmpty())) { + // Use std::string; there is no std::stoul for std::string_view. + const std::vector tokens = + absl::StrSplit(line, ' ', absl::SkipEmpty()); + CHECK_EQ(tokens.size(), 2) << VV(line); + result.push_back(DsoInfo{tokens[0], std::stoul(tokens[1])}); + } + return result; +} + +void ControlFlowGraph::InitializeControlFlowGraph(const CFTable &cf_table, + const PCTable &pc_table) { + CHECK(!cf_table.empty()); + func_entries_.resize(pc_table.size()); + reachability_.resize(pc_table.size()); + + for (size_t j = 0; j < cf_table.size();) { + std::vector successors; + auto curr_pc = cf_table[j]; + ++j; + + // Iterate over successors. + while (cf_table[j]) { + successors.push_back(cf_table[j]); + ++j; + } + ++j; // Step over the delimiter. + + // Record the list of successors + graph_[curr_pc] = std::move(successors); + // TODO(ussuri): Remove after debugging. + VLOG(100) << "Added PC: " << curr_pc; + + // Iterate over callees. + while (cf_table[j]) { + ++j; + } + ++j; // Step over the delimiter. + CHECK_LE(j, cf_table.size()); + } + // Calculate cyclomatic complexity for all functions. + for (PCIndex i = 0; i < pc_table.size(); ++i) { + pc_index_map_[pc_table[i].pc] = i; + if (pc_table[i].has_flag(PCInfo::kFuncEntry)) { + func_entries_[i] = true; + uintptr_t func_pc = pc_table[i].pc; + auto func_comp = ComputeFunctionCyclomaticComplexity(func_pc, *this); + function_complexities_[func_pc] = func_comp; + } + } +} + +const std::vector &ControlFlowGraph::GetSuccessors( + uintptr_t basic_block) const { + auto it = graph_.find(basic_block); + CHECK(it != graph_.end()) << VV(basic_block); + return it->second; +} + +std::vector ControlFlowGraph::ComputeReachabilityForPc( + uintptr_t pc) const { + absl::flat_hash_set visited_pcs; + std::queue worklist; + + worklist.push(pc); + while (!worklist.empty()) { + auto current_pc = worklist.front(); + worklist.pop(); + if (!visited_pcs.insert(current_pc).second) continue; + for (const auto &successor : graph_.at(current_pc)) { + if (!exists(successor)) continue; + worklist.push(successor); + } + } + return {visited_pcs.begin(), visited_pcs.end()}; +} + +uint32_t ComputeFunctionCyclomaticComplexity(uintptr_t pc, + const ControlFlowGraph &cfg) { + size_t edge_num = 0, node_num = 0; + + absl::flat_hash_set visited_pcs; + std::queue worklist; + + worklist.push(pc); + + while (!worklist.empty()) { + auto current_pc = worklist.front(); + worklist.pop(); + if (!visited_pcs.insert(current_pc).second) continue; + ++node_num; + for (auto &successor : cfg.GetSuccessors(current_pc)) { + if (!cfg.exists(successor)) continue; + ++edge_num; + worklist.push(successor); + } + } + + return edge_num - node_num + 2; +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/control_flow.h b/src/third_party/fuzztest/dist/centipede/control_flow.h new file mode 100644 index 00000000000..f401d84da61 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/control_flow.h @@ -0,0 +1,166 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_CONTROL_FLOW_H_ +#define THIRD_PARTY_CENTIPEDE_CONTROL_FLOW_H_ + +#include +#include +#include +#include //NOLINT +#include +#include +#include + +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "./centipede/pc_info.h" +#include "./common/defs.h" +#include "./common/logging.h" + +namespace fuzztest::internal { + +class SymbolTable; // To avoid mutual inclusion with symbol_table.h. + +// Reads a PCTable from `file_path`, returns it. Returns empty table on error. +PCTable ReadPcTableFromFile(std::string_view file_path); + +// Helper for GetPcTableFromBinary, for binaries built with +// -fsanitize-coverage=trace-pc. Returns the PCTable reconstructed from +// `binary_path` with ` -d`. May create a file `tmp_path`, but +// will delete it afterwards. +PCTable GetPcTableFromBinaryWithTracePC(std::string_view binary_path, + std::string_view objdump_path, + std::string_view tmp_path); + +// PCIndex: an index into the PCTable. +// We use 32-bit int for compactness since PCTable is never too large. +using PCIndex = uint32_t; +// A set of PCIndex-es, order is not important. +using PCIndexVec = std::vector; + +// Array of elements in __sancov_cfs section. +// CFTable is created by the compiler/linker in the instrumented binary. +// https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-control-flow. +using CFTable = std::vector; + +// Reads a CFTable from `file_path`, returns it. Returns empty table on error. +CFTable ReadCfTable(std::string_view file_path); + +// Same as above but reads from a stream. +CFTable ReadCfTable(std::istream &in); + +// Writes the `cf_table` to `out`. +void WriteCfTable(const CFTable &cf_table, std::ostream &out); + +// Reads a DsoTable from `file_path`, returns it. Returns empty table on error. +DsoTable ReadDsoTableFromFile(std::string_view file_path); + +class ControlFlowGraph { + public: + // Reads form __sancov_cfs section. On error it crashes, if the section is not + // there, the graph_ will be empty. + void InitializeControlFlowGraph(const CFTable &cf_table, + const PCTable &pc_table); + + // Returns the vector of successor PCs for the given basic block PC. + const std::vector &GetSuccessors(uintptr_t basic_block) const; + + // Returns the number of cfg entries. + size_t size() const { return graph_.size(); } + + // Checks if basic_block is in cfg. + bool exists(const uintptr_t basic_block) const { + return graph_.contains(basic_block); + } + + // Returns cyclomatic complexity of function PC. CHECK-fails if it is not a + // valid function PC. + uint32_t GetCyclomaticComplexity(uintptr_t pc) const { + auto it = function_complexities_.find(pc); + CHECK(it != function_complexities_.end()); + return it->second; + } + + // Returns true if the given basic block is function entry. + bool BlockIsFunctionEntry(PCIndex pc_index) const { + // TODO(ussuri): Change the following to use CHECK_LE(pc_index, + // func_entries_.size()) and have a death test. + return pc_index < func_entries_.size() ? func_entries_[pc_index] : false; + } + + // Returns the idx in pc_table associated with the PC, CHECK-fails if the PC + // is not in the pc_table. + PCIndex GetPcIndex(uintptr_t pc) const { + auto it = pc_index_map_.find(pc); + CHECK(it != pc_index_map_.end()) << VV(pc) << " is not in pc_table."; + return it->second; + } + + // Returns true if the PC is in PCTable. + bool IsInPcTable(uintptr_t pc) const { return pc_index_map_.contains(pc); } + + // Returns a vector& containing all basic blocks (represented by their PCs) + // reachable from `pc`. The reachability is computed once, lazily. + // The method is const, under the hood it uses a mutable data member. + // Thread-safe: can be called concurrently from multiple threads + const std::vector &LazyGetReachabilityForPc(uintptr_t pc) const { + CHECK_EQ(reachability_.size(), pc_index_map_.size()); + auto pc_index = GetPcIndex(pc); + std::call_once(*(reachability_[pc_index].once), [this, &pc, &pc_index]() { + reachability_[pc_index].reach = ComputeReachabilityForPc(pc); + }); + return reachability_[pc_index].reach; + } + + private: + // Map from PC to the idx in pc_table. + absl::flat_hash_map pc_index_map_; + // A vector of size PCTable. func_entries[idx] is true iff means the PC at idx + // is a function entry. + std::vector func_entries_; + // A map with PC as the keys and vector of PCs as value. + absl::flat_hash_map> graph_; + // A map from function PC to its calculated cyclomatic complexity. It is + // to avoid unnecessary calls to ComputeFunctionCyclomaticComplexity. + absl::flat_hash_map function_complexities_; + + // Returns a vector of PCs reachable from `pc`, not in any particular order. + // The result always includes `pc`, since any block is reachable from itself. + std::vector ComputeReachabilityForPc(uintptr_t pc) const; + FRIEND_TEST(ControlFlowGraph, ComputeReachabilityForPc); + + // ReachInfo is a struct to store reachability information for each PC in + // pc_table. The once flag is used to make sure the reach vector is populated + // only once lazily in a thread-friendly manner. + struct ReachInfo { + mutable std::once_flag *once; + mutable std::vector reach; + ReachInfo() : once(new std::once_flag) {} + ~ReachInfo() { delete once; } + }; + // A vector of size PCTable. reachability_[idx] is reachability info for the + // `idx`th pc. Conceptually it is constant, but we compute it lazily, hence + // 'mutable' + std::vector reachability_; +}; + +// Computes the Cyclomatic Complexity for the given function, +// https://en.wikipedia.org/wiki/Cyclomatic_complexity. +uint32_t ComputeFunctionCyclomaticComplexity(uintptr_t pc, + const ControlFlowGraph &cfg); + +} // namespace fuzztest::internal +#endif // THIRD_PARTY_CENTIPEDE_CONTROL_FLOW_H_ diff --git a/src/third_party/fuzztest/dist/centipede/control_flow_test.cc b/src/third_party/fuzztest/dist/centipede/control_flow_test.cc new file mode 100644 index 00000000000..4d63e591c10 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/control_flow_test.cc @@ -0,0 +1,341 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/control_flow.h" + +#include +#include +#include // NOLINT +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/container/flat_hash_map.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "./centipede/binary_info.h" +#include "./centipede/pc_info.h" +#include "./centipede/symbol_table.h" +#include "./centipede/thread_pool.h" +#include "./common/logging.h" +#include "./common/test_util.h" + +namespace fuzztest::internal { + +// Mock CFTable representing the following cfg: +// 1 +// / \ +// 2 3 +// \ / +// 4 +// TODO(ussuri): Change PCs to 100, 200 etc, to avoid confusion with PCIndex. +static const CFTable g_cf_table = {1, 2, 3, 0, 0, 2, 4, 0, + 0, 3, 4, 0, 0, 4, 0, 0}; +static const PCTable g_pc_table = { + {1, PCInfo::kFuncEntry}, {2, 0}, {3, 0}, {4, 0}}; + +TEST(ControlFlowGraph, ComputeReachabilityForPc) { + ControlFlowGraph cfg; + cfg.InitializeControlFlowGraph(g_cf_table, g_pc_table); + EXPECT_NE(cfg.size(), 0); + + auto reach1 = cfg.ComputeReachabilityForPc(1); + auto reach2 = cfg.ComputeReachabilityForPc(2); + auto reach3 = cfg.ComputeReachabilityForPc(3); + auto reach4 = cfg.ComputeReachabilityForPc(4); + + EXPECT_THAT(reach1, testing::UnorderedElementsAre(1, 2, 3, 4)); + EXPECT_THAT(reach2, testing::UnorderedElementsAre(2, 4)); + EXPECT_THAT(reach3, testing::UnorderedElementsAre(3, 4)); + EXPECT_THAT(reach4, testing::ElementsAre(4)); +} + +namespace { + +TEST(CFTable, MakeCfgFromCfTable) { + ControlFlowGraph cfg; + cfg.InitializeControlFlowGraph(g_cf_table, g_pc_table); + EXPECT_NE(cfg.size(), 0); + + for (auto &pc : {1, 2, 3, 4}) { + SCOPED_TRACE(testing::Message() << VV(pc)); + EXPECT_TRUE(cfg.exists(pc)); + + // Check that cfg traversal is possible. + auto successors = cfg.GetSuccessors(pc); + for (auto &successor : successors) { + EXPECT_TRUE(cfg.exists(successor)); + } + + EXPECT_THAT(cfg.GetSuccessors(1).size(), 2); + EXPECT_THAT(cfg.GetSuccessors(2).size(), 1); + EXPECT_THAT(cfg.GetSuccessors(3).size(), 1); + EXPECT_TRUE(cfg.GetSuccessors(4).empty()); + } + + CHECK_EQ(cfg.GetPcIndex(1), 0); + CHECK_EQ(cfg.GetPcIndex(2), 1); + CHECK_EQ(cfg.GetPcIndex(3), 2); + CHECK_EQ(cfg.GetPcIndex(4), 3); + + EXPECT_TRUE(cfg.BlockIsFunctionEntry(0)); + EXPECT_FALSE(cfg.BlockIsFunctionEntry(1)); + EXPECT_FALSE(cfg.BlockIsFunctionEntry(2)); + EXPECT_FALSE(cfg.BlockIsFunctionEntry(3)); + + CHECK_EQ(cfg.GetCyclomaticComplexity(1), 2); +} + +TEST(CFTable, SerializesAndDeserializesCfTable) { + std::stringstream stream; + WriteCfTable(g_cf_table, stream); + const CFTable cf_table = ReadCfTable(stream); + EXPECT_EQ(cf_table, g_cf_table); +} + +TEST(FunctionComplexity, ComputeFuncComplexity) { + static const CFTable g_cf_table1 = { + 1, 2, 3, 0, 0, // 1 goes to 2 and 3. + 2, 3, 4, 0, 0, // 2 goes to 3 and 4. + 3, 1, 4, 0, 0, // 3 goes to 1 and 4. + 4, 0, 0 // 4 goes nowhere. + }; + static const CFTable g_cf_table2 = { + 1, 0, 0, // 1 goes nowhere. + }; + static const CFTable g_cf_table3 = { + 1, 2, 0, 0, // 1 goes to 2. + 2, 3, 0, 0, // 2 goes to 3. + 3, 1, 0, 0, // 3 goes to 1. + }; + static const CFTable g_cf_table4 = { + 1, 2, 3, 0, 0, // 1 goes to 2 and 3. + 2, 3, 4, 0, 0, // 2 goes to 3 and 4. + 3, 0, 0, // 3 goes nowhere. + 4, 0, 0 // 4 goes nowhere. + }; + + ControlFlowGraph cfg1; + cfg1.InitializeControlFlowGraph(g_cf_table1, g_pc_table); + EXPECT_NE(cfg1.size(), 0); + + ControlFlowGraph cfg2; + cfg2.InitializeControlFlowGraph(g_cf_table2, g_pc_table); + EXPECT_NE(cfg2.size(), 0); + + ControlFlowGraph cfg3; + cfg3.InitializeControlFlowGraph(g_cf_table3, g_pc_table); + EXPECT_NE(cfg3.size(), 0); + + ControlFlowGraph cfg4; + cfg4.InitializeControlFlowGraph(g_cf_table4, g_pc_table); + EXPECT_NE(cfg4.size(), 0); + + EXPECT_EQ(ComputeFunctionCyclomaticComplexity(1, cfg1), 4); + EXPECT_EQ(ComputeFunctionCyclomaticComplexity(1, cfg2), 1); + EXPECT_EQ(ComputeFunctionCyclomaticComplexity(1, cfg3), 2); + EXPECT_EQ(ComputeFunctionCyclomaticComplexity(1, cfg4), 2); +} + +TEST(ControlFlowGraph, LazyReachability) { + ControlFlowGraph cfg; + cfg.InitializeControlFlowGraph(g_cf_table, g_pc_table); + EXPECT_NE(cfg.size(), 0); + + auto rt = [&cfg]() { + for (int i = 0; i < 10; ++i) { + cfg.LazyGetReachabilityForPc(1); + cfg.LazyGetReachabilityForPc(2); + cfg.LazyGetReachabilityForPc(3); + cfg.LazyGetReachabilityForPc(4); + } + const auto &reach1 = cfg.LazyGetReachabilityForPc(1); + const auto &reach2 = cfg.LazyGetReachabilityForPc(2); + const auto &reach3 = cfg.LazyGetReachabilityForPc(3); + const auto &reach4 = cfg.LazyGetReachabilityForPc(4); + + EXPECT_THAT(reach1, testing::UnorderedElementsAre(1, 2, 3, 4)); + EXPECT_THAT(reach2, testing::UnorderedElementsAre(2, 4)); + EXPECT_THAT(reach3, testing::UnorderedElementsAre(3, 4)); + EXPECT_THAT(reach4, testing::ElementsAre(4)); + }; + + { + ThreadPool threads{3}; + threads.Schedule(rt); + threads.Schedule(rt); + threads.Schedule(rt); + } // The threads join here. +} + +// Returns path to test_fuzz_target. +static std::string GetTargetPath() { + return GetDataDependencyFilepath("centipede/testing/test_fuzz_target"); +} + +// Returns path to test_fuzz_target_trace_pc. +static std::string GetTracePCTargetPath() { + return GetDataDependencyFilepath( + "centipede/testing/test_fuzz_target_trace_pc"); +} + +// Tests GetCfTableFromBinary() on test_fuzz_target. +TEST(CFTable, GetCfTable) { + auto target_path = GetTargetPath(); + std::string tmp_path1 = GetTempFilePath(test_info_->name(), 1); + std::string tmp_path2 = GetTempFilePath(test_info_->name(), 2); + + // Load the cf table. + BinaryInfo binary_info; + binary_info.InitializeFromSanCovBinary( + target_path, GetObjDumpPath(), GetLLVMSymbolizerPath(), + GetTestTempDir(test_info_->name()).string()); + const auto &cf_table = binary_info.cf_table; + LOG(INFO) << VV(target_path) << VV(tmp_path1) << VV(cf_table.size()); + if (cf_table.empty()) { + LOG(INFO) << "__sancov_cfs is empty."; + // TODO(ussuri): This should be removed once OSS clang supports + // control-flow. + GTEST_SKIP(); + } + + ASSERT_FALSE( + std::filesystem::exists(tmp_path1.c_str())); // tmp_path1 was deleted. + LOG(INFO) << VV(cf_table.size()); + + const auto &pc_table = binary_info.pc_table; + EXPECT_FALSE(binary_info.uses_legacy_trace_pc_instrumentation); + EXPECT_THAT(pc_table.empty(), false); + + const SymbolTable &symbols = binary_info.symbols; + + absl::flat_hash_map pc_table_index; + for (size_t i = 0; i < pc_table.size(); i++) { + pc_table_index[pc_table[i].pc] = i; + } + + for (size_t j = 0; j < cf_table.size();) { + auto current_pc = cf_table[j]; + ++j; + size_t successor_num = 0; + size_t callee_num = 0; + size_t icallee_num = 0; + + // Iterate over successors. + while (cf_table[j]) { + ++successor_num; + ++j; + } + ++j; // Step over the delimiter. + + // Iterate over callees. + while (cf_table[j]) { + if (cf_table[j] > 0) ++callee_num; + if (cf_table[j] < 0) ++icallee_num; + ++j; + } + ++j; // Step over the delimiter. + + // Determine if current_pc is a function entry. + if (pc_table_index.contains(current_pc)) { + size_t index = pc_table_index[current_pc]; + if (pc_table[index].has_flag(PCInfo::kFuncEntry)) { + const std::string_view current_function = symbols.func(index); + // Check for properties. + SCOPED_TRACE(testing::Message() + << "Checking for " << VV(current_function) + << VV(current_pc)); + if (current_function == "SingleEdgeFunc") { + EXPECT_EQ(successor_num, 0); + EXPECT_EQ(icallee_num, 0); + EXPECT_EQ(callee_num, 0); + } else if (current_function == "MultiEdgeFunc") { + EXPECT_EQ(successor_num, 2); + EXPECT_EQ(icallee_num, 0); + EXPECT_EQ(callee_num, 0); + } else if (current_function == "IndirectCallFunc") { + EXPECT_EQ(successor_num, 0); + EXPECT_EQ(icallee_num, 1); + EXPECT_EQ(callee_num, 0); + } + } + } + } +} + +static void SymbolizeBinary(std::string_view test_dir, + std::string_view target_path, bool use_trace_pc) { + BinaryInfo binary_info; + binary_info.InitializeFromSanCovBinary(target_path, GetObjDumpPath(), + GetLLVMSymbolizerPath(), test_dir); + // Load the pc table. + const auto &pc_table = binary_info.pc_table; + // Check that it's not empty. + EXPECT_NE(pc_table.size(), 0); + // Check that the first PCInfo corresponds to a kFuncEntry. + EXPECT_TRUE(pc_table[0].has_flag(PCInfo::kFuncEntry)); + + // Test the symbols. + const SymbolTable &symbols = binary_info.symbols; + ASSERT_EQ(symbols.size(), pc_table.size()); + + bool has_llvm_fuzzer_test_one_input = false; + size_t single_edge_func_num_edges = 0; + size_t multi_edge_func_num_edges = 0; + // Iterate all symbols, verify that we: + // * Don't have main (coverage instrumentation is disabled for main). + // * Have LLVMFuzzerTestOneInput with the correct location. + // * Have one edge for SingleEdgeFunc. + // * Have several edges for MultiEdgeFunc. + for (size_t i = 0; i < symbols.size(); i++) { + bool is_func_entry = pc_table[i].has_flag(PCInfo::kFuncEntry); + if (is_func_entry) { + LOG(INFO) << symbols.full_description(i); + } + single_edge_func_num_edges += symbols.func(i) == "SingleEdgeFunc"; + multi_edge_func_num_edges += symbols.func(i) == "MultiEdgeFunc"; + EXPECT_NE(symbols.func(i), "main"); + if (is_func_entry && symbols.func(i) == "LLVMFuzzerTestOneInput") { + // This is a function entry block for LLVMFuzzerTestOneInput. + has_llvm_fuzzer_test_one_input = true; + EXPECT_THAT( + symbols.location(i), + testing::HasSubstr("centipede/testing/test_fuzz_target.cc:71")); + } + } + EXPECT_TRUE(has_llvm_fuzzer_test_one_input); + EXPECT_EQ(single_edge_func_num_edges, 1); + EXPECT_GT(multi_edge_func_num_edges, 1); +} + +// Tests GetPcTableFromBinary() and SymbolTable on test_fuzz_target. +TEST(PCTable, GetPcTableFromBinary_And_SymbolTable_PCTable) { + EXPECT_NO_FATAL_FAILURE(SymbolizeBinary( + GetTestTempDir(test_info_->name()).string(), GetTargetPath(), + /*use_trace_pc=*/false)); +} + +// Tests GetPcTableFromBinary() and SymbolTable on test_fuzz_target_trace_pc. +TEST(PCTable, GetPcTableFromBinary_And_SymbolTable_TracePC) { + EXPECT_NO_FATAL_FAILURE(SymbolizeBinary( + GetTestTempDir(test_info_->name()).string(), GetTracePCTargetPath(), + /*use_trace_pc=*/true)); +} + +} // namespace + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/corpus.cc b/src/third_party/fuzztest/dist/centipede/corpus.cc new file mode 100644 index 00000000000..42f3e4bb215 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/corpus.cc @@ -0,0 +1,322 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/corpus.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "absl/strings/substitute.h" +#include "./centipede/control_flow.h" +#include "./centipede/coverage.h" +#include "./centipede/execution_metadata.h" +#include "./centipede/feature.h" +#include "./centipede/feature_set.h" +#include "./centipede/util.h" +#include "./common/defs.h" +#include "./common/logging.h" // IWYU pragma: keep +#include "./common/remote_file.h" +#include "./common/status_macros.h" + +namespace fuzztest::internal { + +//------------------------------------------------------------------------------ +// Corpus +//------------------------------------------------------------------------------ + +// Returns the weight of `fv` computed using `fs` and `coverage_frontier`. +static size_t ComputeWeight(const FeatureVec &fv, const FeatureSet &fs, + const CoverageFrontier &coverage_frontier) { + size_t weight = fs.ComputeWeight(fv); + // The following is checking for the cases where PCTable is not present. In + // such cases, we cannot use any ControlFlow related features. + if (coverage_frontier.MaxPcIndex() == 0) return weight; + size_t frontier_weights_sum = 0; + for (const auto feature : fv) { + if (!feature_domains::kPCs.Contains(feature)) continue; + const auto pc_index = ConvertPCFeatureToPcIndex(feature); + // Avoid checking frontier for out-of-bounds indices. + // TODO(b/299624088): revisit once dlopen is supported. + if (pc_index >= coverage_frontier.MaxPcIndex()) continue; + if (coverage_frontier.PcIndexIsFrontier(pc_index)) { + frontier_weights_sum += coverage_frontier.FrontierWeight(pc_index); + } + } + return weight * (frontier_weights_sum + 1); // Multiply by at least 1. +} + +std::pair Corpus::MaxAndAvgSize() const { + if (records_.empty()) return {0, 0}; + size_t max = 0; + size_t total = 0; + for (const auto &r : records_) { + max = std::max(max, r.data.size()); + total += r.data.size(); + } + return {max, total / records_.size()}; +} + +size_t Corpus::Prune(const FeatureSet &fs, + const CoverageFrontier &coverage_frontier, + size_t max_corpus_size, Rng &rng) { + // TODO(kcc): use coverage_frontier. + CHECK(max_corpus_size); + if (records_.size() < 2UL) return 0; + // Recompute the weights. + size_t num_zero_weights = 0; + for (size_t i = 0, n = records_.size(); i < n; ++i) { + fs.PruneFeaturesAndCountUnseen(records_[i].features); + auto new_weight = + ComputeWeight(records_[i].features, fs, coverage_frontier); + weighted_distribution_.ChangeWeight(i, new_weight); + if (new_weight == 0) ++num_zero_weights; + } + + // Remove zero weights and the corresponding corpus record. + // Also remove some random elements, if the corpus is still too big. + // The corpus must not be empty, hence target_size is at least 1. + // It should also be <= max_corpus_size. + size_t target_size = std::min( + max_corpus_size, std::max(1UL, records_.size() - num_zero_weights)); + auto subset_to_remove = + weighted_distribution_.RemoveRandomWeightedSubset(target_size, rng); + RemoveSubset(subset_to_remove, records_); + + weighted_distribution_.RecomputeInternalState(); + CHECK(!records_.empty()); + + // Features may have shrunk from CountUnseenAndPruneFrequentFeatures. + // Call shrink_to_fit for the features that survived the pruning. + for (auto &record : records_) { + record.features.shrink_to_fit(); + } + + num_pruned_ += subset_to_remove.size(); + return subset_to_remove.size(); +} + +void Corpus::Add(const ByteArray &data, const FeatureVec &fv, + const ExecutionMetadata &metadata, const FeatureSet &fs, + const CoverageFrontier &coverage_frontier) { + // TODO(kcc): use coverage_frontier. + CHECK(!data.empty()) + << "Got request to add empty element to corpus: ignoring"; + CHECK_EQ(records_.size(), weighted_distribution_.size()); + records_.push_back({data, fv, metadata}); + weighted_distribution_.AddWeight(ComputeWeight(fv, fs, coverage_frontier)); +} + +const CorpusRecord &Corpus::WeightedRandom(size_t random) const { + return records_[weighted_distribution_.RandomIndex(random)]; +} + +const CorpusRecord &Corpus::UniformRandom(size_t random) const { + return records_[random % records_.size()]; +} + +void Corpus::DumpStatsToFile(const FeatureSet &fs, std::string_view filepath, + std::string_view description) { + auto *file = ValueOrDie(RemoteFileOpen(filepath, "w")); + CHECK(file != nullptr) << "Failed to open file: " << filepath; + CHECK_OK(RemoteFileSetWriteBufferSize(file, 100UL * 1024 * 1024)); + static constexpr std::string_view kHeaderStub = R"(# $0 +{ + "num_inputs": $1, + "corpus_stats": [)"; + static constexpr std::string_view kRecordStub = R"($0 + {"size": $1, "frequencies": [$2]})"; + static constexpr std::string_view kFooter = R"( + ] +} +)"; + const std::string header_str = + absl::Substitute(kHeaderStub, description, records_.size()); + CHECK_OK(RemoteFileAppend(file, header_str)); + std::string before_record; + for (const auto &record : records_) { + std::vector frequencies; + frequencies.reserve(record.features.size()); + for (const auto feature : record.features) { + frequencies.push_back(fs.Frequency(feature)); + } + const std::string frequencies_str = absl::StrJoin(frequencies, ", "); + const std::string record_str = absl::Substitute( + kRecordStub, before_record, record.data.size(), frequencies_str); + CHECK_OK(RemoteFileAppend(file, record_str)); + before_record = ","; + } + CHECK_OK(RemoteFileAppend(file, std::string{kFooter})); + CHECK_OK(RemoteFileClose(file)); +} + +std::string Corpus::MemoryUsageString() const { + size_t data_size = 0; + size_t features_size = 0; + for (const auto &record : records_) { + data_size += record.data.capacity() * sizeof(record.data[0]); + features_size += record.features.capacity() * sizeof(record.features[0]); + } + return absl::StrCat("d", data_size >> 20, "/f", features_size >> 20); +} + +//------------------------------------------------------------------------------ +// WeightedDistribution +//------------------------------------------------------------------------------ + +void WeightedDistribution::AddWeight(uint64_t weight) { + CHECK_EQ(weights_.size(), cumulative_weights_.size()); + weights_.push_back(weight); + if (cumulative_weights_.empty()) { + cumulative_weights_.push_back(weight); + } else { + cumulative_weights_.push_back(cumulative_weights_.back() + weight); + } +} + +void WeightedDistribution::ChangeWeight(size_t idx, uint64_t new_weight) { + CHECK_LT(idx, size()); + weights_[idx] = new_weight; + cumulative_weights_valid_ = false; +} + +__attribute__((noinline)) // to see it in profile. +void WeightedDistribution::RecomputeInternalState() { + uint64_t partial_sum = 0; + for (size_t i = 0, n = size(); i < n; i++) { + partial_sum += weights_[i]; + cumulative_weights_[i] = partial_sum; + } + cumulative_weights_valid_ = true; +} + +__attribute__((noinline)) // to see it in profile. +size_t +WeightedDistribution::RandomIndex(size_t random) const { + CHECK(!weights_.empty()); + CHECK(cumulative_weights_valid_); + uint64_t sum_of_all_weights = cumulative_weights_.back(); + if (sum_of_all_weights == 0) + return random % size(); // can't do much else here. + random = random % sum_of_all_weights; + auto it = std::upper_bound(cumulative_weights_.begin(), + cumulative_weights_.end(), random); + CHECK(it != cumulative_weights_.end()); + return it - cumulative_weights_.begin(); +} + +uint64_t WeightedDistribution::PopBack() { + uint64_t result = weights_.back(); + weights_.pop_back(); + cumulative_weights_.pop_back(); + return result; +} + +//------------------------------------------------------------------------------ +// CoverageFrontier +//------------------------------------------------------------------------------ + +size_t CoverageFrontier::Compute(const Corpus &corpus) { + return Compute(corpus.Records()); +} + +size_t CoverageFrontier::Compute( + const std::vector &corpus_records) { + // Initialize the vectors. + std::fill(frontier_.begin(), frontier_.end(), false); + std::fill(frontier_weight_.begin(), frontier_weight_.end(), 0); + + // A vector of covered indices in pc_table. Needed for Coverage object. + PCIndexVec covered_pcs; + for (const auto &record : corpus_records) { + for (auto feature : record.features) { + if (!feature_domains::kPCs.Contains(feature)) continue; + size_t idx = ConvertPCFeatureToPcIndex(feature); + if (idx >= binary_info_.pc_table.size()) continue; + covered_pcs.push_back(idx); + frontier_[idx] = true; + } + } + + Coverage coverage(binary_info_.pc_table, covered_pcs); + + num_functions_in_frontier_ = 0; + IteratePcTableFunctions(binary_info_.pc_table, [this, &coverage](size_t beg, + size_t end) { + auto frontier_begin = frontier_.begin() + beg; + auto frontier_end = frontier_.begin() + end; + size_t cov_size_in_this_func = + std::count(frontier_begin, frontier_end, true); + + if (cov_size_in_this_func > 0 && cov_size_in_this_func < end - beg) + ++num_functions_in_frontier_; + + // Reset the frontier_ entries. + std::fill(frontier_begin, frontier_end, false); + + // Iterate over BBs in the function and check the coverage statue. + for (size_t i = beg; i < end; ++i) { + // If the current pc is not covered, it cannot be a frontier. + if (!coverage.BlockIsCovered(i)) continue; + + auto pc = binary_info_.pc_table[i].pc; + + // Current pc is covered, look for a non-covered successor. + for (auto successor : binary_info_.control_flow_graph.GetSuccessors(pc)) { + // Successor pc may not be in PCTable because of pruning. + if (!binary_info_.control_flow_graph.IsInPcTable(successor)) continue; + + auto successor_idx = + binary_info_.control_flow_graph.GetPcIndex(successor); + + // This successor is covered, skip it. + if (coverage.BlockIsCovered(successor_idx)) continue; + + // Now we have a frontier, compute the weight. + frontier_[i] = true; + + // Calculate frontier weight. + // Here we use reachability and coverage to identify all reachable and + // non-covered BBs from successor, and then use all functions called + // in those BBs. + for (auto reachable_bb : + binary_info_.control_flow_graph.LazyGetReachabilityForPc( + successor)) { + if (!binary_info_.control_flow_graph.IsInPcTable(reachable_bb) || + coverage.BlockIsCovered( + binary_info_.control_flow_graph.GetPcIndex(reachable_bb))) { + // This reachable BB is already either processed and added or + // covered via a different path -- not interesting! + continue; + } + frontier_weight_[i] += ComputeFrontierWeight( + coverage, binary_info_.control_flow_graph, + binary_info_.call_graph.GetBasicBlockCallees(reachable_bb)); + } + } + } + }); + + return num_functions_in_frontier_; +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/corpus.h b/src/third_party/fuzztest/dist/centipede/corpus.h new file mode 100644 index 00000000000..e297f06310f --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/corpus.h @@ -0,0 +1,210 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_CORPUS_H_ +#define THIRD_PARTY_CENTIPEDE_CORPUS_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/log/check.h" +#include "./centipede/binary_info.h" +#include "./centipede/execution_metadata.h" +#include "./centipede/feature.h" +#include "./centipede/feature_set.h" +#include "./centipede/util.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +// WeightedDistribution maintains an array of integer weights. +// It allows to compute a random number in range [0,size()) such that +// the probability of each number is proportional to its weight. +class WeightedDistribution { + public: + // Adds one more weight. + void AddWeight(uint64_t weight); + // Removes the last weight and returns it. + // Precondition: size() > 0. + uint64_t PopBack(); + // Changes the existing idx-th weight to new_weight. + void ChangeWeight(size_t idx, uint64_t new_weight); + // Returns a random number in [0,size()), using a random number `random`. + // For proper randomness, `random` should come from a 64-bit RNG. + // RandomIndex() must not be called after ChangeWeight() without first + // calling RecomputeInternalState(). + size_t RandomIndex(size_t random) const; + // Returns the number of weights. + size_t size() const { return weights_.size(); } + // Removes all weights. + void clear() { + weights_.clear(); + cumulative_weights_.clear(); + } + // Fixes the internal state that could become stale after call(s) to + // ChangeWeight(). + void RecomputeInternalState(); + + // Computes a random weighted subset of elements to remove. + // Removes this subset from `this`. + // Returns the subset as a sorted array of indices. + std::vector RemoveRandomWeightedSubset(size_t target_size, Rng &rng) { + auto subset_to_remove = RandomWeightedSubset(weights_, target_size, rng); + RemoveSubset(subset_to_remove, weights_); + RemoveSubset(subset_to_remove, cumulative_weights_); + return subset_to_remove; + } + + private: + // The array of weights. The probability of choosing the index Idx + // is weights_[Idx] / SumOfAllWeights. + std::vector weights_; + // i-th element is the sum of the first i elements of weights_. + std::vector cumulative_weights_; + // If false, cumulative_weights_ needs to be recomputed. + bool cumulative_weights_valid_ = true; +}; + +class CoverageFrontier; // Forward decl, used in Corpus. + +// Input data and metadata. +struct CorpusRecord { + ByteArray data; + FeatureVec features; + ExecutionMetadata metadata; +}; + +// Maintains the corpus of inputs. +// Allows to prune (forget) inputs that become uninteresting. +class Corpus { + public: + Corpus() = default; + + Corpus(const Corpus &) = default; + Corpus(Corpus &&) noexcept = default; + Corpus &operator=(const Corpus &) = default; + Corpus &operator=(Corpus &&) noexcept = default; + + // Mutators. + + // Adds a corpus element, consisting of 'data' (the input bytes, non-empty), + // 'fv' (the features associated with this input), and execution `metadata`. + // `fs` is used to compute weights of `fv`. + void Add(const ByteArray &data, const FeatureVec &fv, + const ExecutionMetadata &metadata, const FeatureSet &fs, + const CoverageFrontier &coverage_frontier); + // Removes elements that contain only frequent features, according to 'fs'. + // Also, randomly removes elements to reduce the size to <= `max_corpus_size`. + // `max_corpus_size` should be positive. + // Returns the number of removed elements. + size_t Prune(const FeatureSet &fs, const CoverageFrontier &coverage_frontier, + size_t max_corpus_size, Rng &rng); + + // Accessors. + + // Returns the inputs. + const std::vector &Records() const { return records_; } + // Returns the total number of inputs added. + size_t NumTotal() const { return num_pruned_ + NumActive(); } + // Return the number of currently active inputs, i.e. inputs that we want to + // keep mutating. + size_t NumActive() const { return records_.size(); } + // Returns the max and avg sizes of the inputs. + std::pair MaxAndAvgSize() const; + // Returns a random active corpus record using weighted distribution. + // See WeightedDistribution. + const CorpusRecord &WeightedRandom(size_t random) const; + // Returns a random active corpus record using uniform distribution. + const CorpusRecord &UniformRandom(size_t random) const; + // Returns the element with index 'idx', where `idx` < NumActive(). + const ByteArray &Get(size_t idx) const { return records_[idx].data; } + // Returns the execution metadata for the element `idx`, `idx` < NumActive(). + const ExecutionMetadata &GetMetadata(size_t idx) const { + return records_[idx].metadata; + } + + // Logging. + + // Saves the corpus stats in JSON format to the `filepath` file, using `fs` + // for feature frequencies. + void DumpStatsToFile(const FeatureSet &fs, std::string_view filepath, + std::string_view description); + // Returns a string used for logging the corpus memory usage. + std::string MemoryUsageString() const; + + private: + std::vector records_; + // Maintains weights for elements of records_. + WeightedDistribution weighted_distribution_; + size_t num_pruned_ = 0; +}; + +// Coverage frontier is a set of PCs that are themselves covered, but some of +// adjacent PCs in the same function are not. +// This class identifies precise frontiers. Each frontier is assigned a weight. +// Frontier weight is a representation of how much code is behind the +// frontier. Therefore, it should be used to prioritize which frontier to focus +// first. +class CoverageFrontier { + public: + explicit CoverageFrontier(const BinaryInfo &binary_info) + : binary_info_(binary_info), + frontier_(binary_info.pc_table.size()), + frontier_weight_(binary_info.pc_table.size()) {} + + // Computes the coverage frontier of `corpus`. + // Returns the number of functions in the frontier. + size_t Compute(const Corpus &corpus); + + // Same as above. + size_t Compute(const std::vector &corpus_records); + + // Returns the number of functions in the frontier. + size_t NumFunctionsInFrontier() const { return num_functions_in_frontier_; } + + // Returns true iff `idx` belongs to the frontier. + bool PcIndexIsFrontier(size_t idx) const { + CHECK_LT(idx, MaxPcIndex()); + return frontier_[idx]; + } + + // Returns the size of the pc_table used to create `this`. + size_t MaxPcIndex() const { return binary_info_.pc_table.size(); } + + // Returns the frontier weight of pc at `idx`, weight of a non-frontier is 0. + uint64_t FrontierWeight(size_t idx) const { + CHECK_LT(idx, MaxPcIndex()); + return frontier_weight_[idx]; + } + + private: + const BinaryInfo &binary_info_; + + // frontier_[idx] is true iff pc_table_[i] is part of the coverage frontier. + std::vector frontier_; + // Stores the weight associated with frontier_[idx]. + std::vector frontier_weight_; + + // The number of functions in the frontier. + size_t num_functions_in_frontier_ = 0; +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_CORPUS_H_ diff --git a/src/third_party/fuzztest/dist/centipede/corpus_io.cc b/src/third_party/fuzztest/dist/centipede/corpus_io.cc new file mode 100644 index 00000000000..23e63c3cd67 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/corpus_io.cc @@ -0,0 +1,157 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "./centipede/corpus_io.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/status/status.h" +#include "absl/time/time.h" +#include "absl/types/span.h" +#include "./centipede/feature.h" +#include "./centipede/rusage_profiler.h" +#include "./centipede/util.h" +#include "./common/blob_file.h" +#include "./common/defs.h" +#include "./common/hash.h" +#include "./common/logging.h" +#include "./common/remote_file.h" + +namespace fuzztest::internal { + +void ReadShard(std::string_view corpus_path, std::string_view features_path, + const std::function &callback) { + const bool good_corpus_path = + !corpus_path.empty() && RemotePathExists(corpus_path); + const bool good_features_path = + !features_path.empty() && RemotePathExists(features_path); + + if (!good_corpus_path) { + LOG(WARNING) << "Corpus file path empty or not found - returning: " + << corpus_path; + return; + } + + RPROF_THIS_FUNCTION_WITH_TIMELAPSE( // + /*enable=*/ABSL_VLOG_IS_ON(10), // + /*timelapse_interval=*/absl::Seconds(30), // + /*also_log_timelapses=*/false); + + // Maps input hashes to inputs. + // NOTE: Using `std::multimap` to prevent auto-deduplication of inputs. + // TODO(ussuri): This is the legacy behavior. At least one test relies on + // it (but doesn't really need it). Investigate and switch to + // `absl::flat_hash_map`. + std::multimap hash_to_input; + // Read inputs from the corpus file into `hash_to_input`. + auto corpus_reader = DefaultBlobFileReaderFactory(); + CHECK_OK(corpus_reader->Open(corpus_path)) << VV(corpus_path); + ByteSpan blob; + while (corpus_reader->Read(blob).ok()) { + std::string hash = Hash(blob); + ByteArray input{blob.begin(), blob.end()}; + hash_to_input.emplace(std::move(hash), std::move(input)); + } + + RPROF_SNAPSHOT("Read inputs"); + + // Input counts of various kinds (for logging). + const size_t num_inputs = hash_to_input.size(); + size_t num_inputs_missing_features = num_inputs; + size_t num_inputs_empty_features = 0; + size_t num_inputs_non_empty_features = 0; + + // If the features file is not passed or doesn't exist, simply ignore it. + if (!good_features_path) { + LOG(WARNING) << "Features file path empty or not found - ignoring: " + << features_path; + } else { + // Read features from the features file. For each feature, find a matching + // input in `hash_to_input`, call `callback` for the pair, and remove the + // entry from `hash_to_input`. In the end, `hash_to_input` will contain + // only inputs without matching features. + auto features_reader = DefaultBlobFileReaderFactory(); + CHECK_OK(features_reader->Open(features_path)) << VV(features_path); + ByteSpan hash_and_features; + while (features_reader->Read(hash_and_features).ok()) { + // Every valid feature record must contain the hash at the end. + // Ignore this record if it is too short. + if (hash_and_features.size() < kHashLen) continue; + FeatureVec features; + std::string hash = UnpackFeaturesAndHash(hash_and_features, &features); + auto input_node = hash_to_input.extract(hash); + if (!input_node.empty()) { + --num_inputs_missing_features; + if (features.empty()) { + // When the features file got created, Centipede did compute features + // for the input, but they came up empty. Indicate to the client that + // there is no need to recompute by passing this special value. + features = {feature_domains::kNoFeature}; + ++num_inputs_empty_features; + } else { + ++num_inputs_non_empty_features; + } + callback(std::move(input_node.mapped()), std::move(features)); + } + } + + RPROF_SNAPSHOT("Read features & reported input/features pairs"); + } + + // Finally, call `callback` on the remaining inputs without matching features. + // This also automatically covers the features file not passed or missing. + for (auto &&[hash, input] : hash_to_input) { + // Indicate to the client that it needs to recompute features for this input + // by passing an empty value. + callback(std::move(input), {}); + } + + RPROF_SNAPSHOT("Reported inputs with no matching features"); + + VLOG(1) // + << "Finished shard reading:\n" + << "Corpus path : " << corpus_path << "\n" + << "Features path : " << features_path << "\n" + << "Inputs : " << num_inputs << "\n" + << "Inputs, non-empty features : " << num_inputs_non_empty_features + << "\n" + << "Inputs, empty features : " << num_inputs_empty_features << "\n" + << "Inputs, missing features : " << num_inputs_missing_features; +} + +void ExportCorpus(absl::Span sharded_file_paths, + std::string_view out_dir) { + LOG(INFO) << "Exporting corpus to " << out_dir; + for (const std::string &file : sharded_file_paths) { + auto reader = DefaultBlobFileReaderFactory(); + CHECK_OK(reader->Open(file)) << VV(file); + ByteSpan blob; + size_t num_read = 0; + while (reader->Read(blob).ok()) { + ++num_read; + WriteToRemoteHashedFileInDir(out_dir, blob); + } + LOG(INFO) << "Exported " << num_read << " inputs from " << file; + } +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/corpus_io.h b/src/third_party/fuzztest/dist/centipede/corpus_io.h new file mode 100644 index 00000000000..0f63e69caf4 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/corpus_io.h @@ -0,0 +1,53 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_SHARD_READER_H_ +#define THIRD_PARTY_CENTIPEDE_SHARD_READER_H_ + +#include +#include +#include + +#include "absl/types/span.h" +#include "./centipede/feature.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +// `corpus_path` is a path to a BlobFile with corpus elements (inputs). If the +// path is empty or non-existent, no processing is done. +// +// `features_path` is a path to a BlobFile with {features/hash} pairs created by +// `PackFeaturesAndHash()`. If the path is empty or non-existent, an empty +// `FeatureVec` is passed to every call of `callback`. +// +// For every {features/hash} pair we need to find an input with this hash. +// This function reads `corpus_path` and `features_path` and calls `callback` +// on every pair {input, features}. +// +// If features are not found for a given input, callback's 2nd argument is {}. +// +// If features are found for a given input but are empty, +// then callback's 2nd argument is {feature_domains::kNoFeature}. +void ReadShard(std::string_view corpus_path, std::string_view features_path, + const std::function &callback); + +// Unpacks the corpus from `sharded_file_paths` and writes each input to an +// individual file in `out_dir`. The file names are the inputs' hashes. +void ExportCorpus(absl::Span sharded_file_paths, + std::string_view out_dir); + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_SHARD_READER_H_ diff --git a/src/third_party/fuzztest/dist/centipede/corpus_io_test.cc b/src/third_party/fuzztest/dist/centipede/corpus_io_test.cc new file mode 100644 index 00000000000..a97572d6f91 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/corpus_io_test.cc @@ -0,0 +1,128 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/corpus_io.h" + +#include // NOLINT +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/log/check.h" +#include "absl/types/span.h" +#include "./centipede/corpus.h" +#include "./centipede/feature.h" +#include "./centipede/util.h" +#include "./centipede/workdir.h" +#include "./common/blob_file.h" +#include "./common/defs.h" +#include "./common/test_util.h" + +namespace fuzztest::internal { +namespace { + +using ::testing::UnorderedElementsAre; + +void WriteBlobsToFile(std::string_view blob_file_path, + absl::Span blobs) { + auto writer = DefaultBlobFileWriterFactory(); + CHECK_OK(writer->Open(blob_file_path, "w")); + for (const ByteArray& blob : blobs) { + CHECK_OK(writer->Write(blob)); + } + CHECK_OK(writer->Close()); +} + +std::vector ReadInputsFromFiles(std::string_view dir) { + std::vector inputs; + for (const auto& file : std::filesystem::directory_iterator(dir)) { + ByteArray input; + ReadFromLocalFile(file.path().c_str(), input); + inputs.push_back(std::move(input)); + } + return inputs; +} + +TEST(ReadShardTest, ReadsInputsAndFeaturesAndCallsCallbackForEachPair) { + ByteArray data1 = {1, 2, 3}; + ByteArray data2 = {3, 4, 5, 6}; + ByteArray data3 = {7, 8, 9, 10, 11}; + ByteArray data4 = {12, 13, 14}; + ByteArray data5 = {15, 16}; + FeatureVec fv1 = {100, 200, 300}; + FeatureVec fv2 = {300, 400, 500, 600}; + FeatureVec fv3 = {700, 800, 900, 1000, 1100}; + FeatureVec fv4 = {}; // empty. + + std::vector corpus_blobs; + corpus_blobs.push_back(data1); + corpus_blobs.push_back(data2); + corpus_blobs.push_back(data3); + corpus_blobs.push_back(data4); + corpus_blobs.push_back(data5); + + std::vector features_blobs; + features_blobs.push_back(PackFeaturesAndHash(data1, fv1)); + features_blobs.push_back(PackFeaturesAndHash(data2, fv2)); + features_blobs.push_back(PackFeaturesAndHash(data3, fv3)); + features_blobs.push_back(PackFeaturesAndHash(data4, fv4)); + + TempDir tmp_dir{test_info_->name()}; + std::string corpus_path = tmp_dir.GetFilePath("corpus"); + std::string features_path = tmp_dir.GetFilePath("features"); + WriteBlobsToFile(corpus_path, corpus_blobs); + WriteBlobsToFile(features_path, features_blobs); + + std::vector res; + ReadShard(corpus_path, features_path, + [&res](const ByteArray& input, const FeatureVec& features) { + res.push_back(CorpusRecord{input, features}); + }); + + EXPECT_EQ(res.size(), 5UL); + EXPECT_EQ(res[0].data, data1); + EXPECT_EQ(res[1].data, data2); + EXPECT_EQ(res[2].data, data3); + EXPECT_EQ(res[3].data, data4); + EXPECT_EQ(res[4].data, data5); + EXPECT_EQ(res[0].features, fv1); + EXPECT_EQ(res[1].features, fv2); + EXPECT_EQ(res[2].features, fv3); + EXPECT_EQ(res[3].features, FeatureVec{feature_domains::kNoFeature}); + EXPECT_EQ(res[4].features, FeatureVec()); +} + +TEST(ExportCorpusTest, ExportsCorpusToIndividualFiles) { + const std::filesystem::path temp_dir = GetTestTempDir(test_info_->name()); + const std::filesystem::path out_dir = temp_dir / "out_dir"; + CHECK(std::filesystem::create_directory(out_dir)); + const WorkDir workdir{temp_dir.c_str(), "fake_binary_name", + "fake_binary_hash", /*my_shard_index=*/0}; + const auto corpus_file_paths = workdir.CorpusFilePaths(); + WriteBlobsToFile(corpus_file_paths.Shard(0), {ByteArray{1, 2}, ByteArray{3}}); + WriteBlobsToFile(corpus_file_paths.Shard(1), {ByteArray{4}, ByteArray{5, 6}}); + + ExportCorpus({corpus_file_paths.Shard(0), corpus_file_paths.Shard(1)}, + out_dir.c_str()); + + EXPECT_THAT(ReadInputsFromFiles(out_dir.c_str()), + UnorderedElementsAre(ByteArray{1, 2}, ByteArray{3}, ByteArray{4}, + ByteArray{5, 6})); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/corpus_test.cc b/src/third_party/fuzztest/dist/centipede/corpus_test.cc new file mode 100644 index 00000000000..13b576ca733 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/corpus_test.cc @@ -0,0 +1,405 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/corpus.h" + +#include +#include +#include +#include // NOLINT +#include +#include + +#include "gtest/gtest.h" +#include "./centipede/binary_info.h" +#include "./centipede/call_graph.h" +#include "./centipede/control_flow.h" +#include "./centipede/feature.h" +#include "./centipede/feature_set.h" +#include "./centipede/pc_info.h" +#include "./centipede/util.h" +#include "./common/defs.h" +#include "./common/test_util.h" + +namespace fuzztest::internal { +namespace { + +TEST(Corpus, GetCmpData) { + PCTable pc_table(100); + CFTable cf_table(100); + BinaryInfo bin_info{pc_table, {}, cf_table, {}, {}, {}}; + CoverageFrontier coverage_frontier(bin_info); + FeatureSet fs(3, {}); + Corpus corpus; + ByteArray cmp_data{2, 0, 1, 2, 3}; + FeatureVec features1 = {10, 20, 30}; + fs.IncrementFrequencies(features1); + corpus.Add({1}, features1, /*metadata=*/{cmp_data}, fs, coverage_frontier); + EXPECT_EQ(corpus.NumActive(), 1); + EXPECT_EQ(corpus.GetMetadata(0).cmp_data, cmp_data); +} + +TEST(Corpus, PrintStats) { + const std::filesystem::path test_tmpdir = GetTestTempDir(test_info_->name()); + PCTable pc_table(100); + CFTable cf_table(100); + BinaryInfo bin_info{pc_table, {}, cf_table, {}, {}, {}}; + CoverageFrontier coverage_frontier(bin_info); + FeatureSet fs(3, {}); + Corpus corpus; + FeatureVec features1 = {10, 20, 30}; + FeatureVec features2 = {20, 40}; + fs.IncrementFrequencies(features1); + corpus.Add({1, 2, 3}, features1, {}, fs, coverage_frontier); + fs.IncrementFrequencies(features2); + corpus.Add({4, 5}, features2, {}, fs, coverage_frontier); + const std::string stats_filepath = test_tmpdir / "corpus.txt"; + corpus.DumpStatsToFile(fs, stats_filepath, "Test corpus"); + std::string stats_file_contents; + ReadFromLocalFile(stats_filepath, stats_file_contents); + EXPECT_EQ(stats_file_contents, + R"(# Test corpus +{ + "num_inputs": 2, + "corpus_stats": [ + {"size": 3, "frequencies": [1, 2, 1]}, + {"size": 2, "frequencies": [2, 1]} + ] +} +)"); +} + +TEST(Corpus, Prune) { + // Prune will remove an input if all of its features appear at least 3 times. + PCTable pc_table(100); + CFTable cf_table(100); + BinaryInfo bin_info{pc_table, {}, cf_table, {}, {}, {}}; + CoverageFrontier coverage_frontier(bin_info); + FeatureSet fs(3, {}); + Corpus corpus; + Rng rng(0); + size_t max_corpus_size = 1000; + + auto Add = [&](const CorpusRecord &record) { + fs.IncrementFrequencies(record.features); + corpus.Add(record.data, record.features, {}, fs, coverage_frontier); + }; + + auto VerifyActiveInputs = [&](std::vector expected_inputs) { + std::vector observed_inputs; + for (size_t i = 0, n = corpus.NumActive(); i < n; i++) { + observed_inputs.push_back(corpus.Get(i)); + } + std::sort(observed_inputs.begin(), observed_inputs.end()); + std::sort(expected_inputs.begin(), expected_inputs.end()); + EXPECT_EQ(observed_inputs, expected_inputs); + }; + + Add({{0}, {20, 40}}); + Add({{1}, {20, 30}}); + Add({{2}, {30, 40}}); + Add({{3}, {40, 50}}); + Add({{4}, {10, 20}}); + + // Prune. Features 20 and 40 are frequent => input {0} will be removed. + EXPECT_EQ(corpus.NumActive(), 5); + EXPECT_EQ(corpus.Prune(fs, coverage_frontier, max_corpus_size, rng), 1); + EXPECT_EQ(corpus.NumActive(), 4); + EXPECT_EQ(corpus.NumTotal(), 5); + VerifyActiveInputs({{1}, {2}, {3}, {4}}); + + Add({{5}, {30, 60}}); + EXPECT_EQ(corpus.NumTotal(), 6); + // Prune. Feature 30 is now frequent => inputs {1} and {2} will be removed. + EXPECT_EQ(corpus.NumActive(), 5); + EXPECT_EQ(corpus.Prune(fs, coverage_frontier, max_corpus_size, rng), 2); + EXPECT_EQ(corpus.NumActive(), 3); + VerifyActiveInputs({{3}, {4}, {5}}); + + // Test with smaller max_corpus_size values. + EXPECT_EQ(corpus.Prune(fs, coverage_frontier, 3, rng), 0); + EXPECT_EQ(corpus.NumActive(), 3); + EXPECT_EQ(corpus.Prune(fs, coverage_frontier, 2, rng), 1); + EXPECT_EQ(corpus.NumActive(), 2); + EXPECT_EQ(corpus.Prune(fs, coverage_frontier, 1, rng), 1); + EXPECT_EQ(corpus.NumActive(), 1); + EXPECT_DEATH(corpus.Prune(fs, coverage_frontier, 0, rng), + "max_corpus_size"); // CHECK-fail. + EXPECT_EQ(corpus.NumTotal(), 6); +} + +// Regression test for a crash in Corpus::Prune(). +TEST(Corpus, PruneRegressionTest1) { + PCTable pc_table(100); + CFTable cf_table(100); + BinaryInfo bin_info{pc_table, {}, cf_table, {}, {}, {}}; + CoverageFrontier coverage_frontier(bin_info); + FeatureSet fs(2, {}); + Corpus corpus; + Rng rng(0); + size_t max_corpus_size = 1000; + + auto Add = [&](const CorpusRecord &record) { + fs.IncrementFrequencies(record.features); + corpus.Add(record.data, record.features, {}, fs, coverage_frontier); + }; + + Add({{1}, {10, 20}}); + Add({{2}, {10}}); + corpus.Prune(fs, coverage_frontier, max_corpus_size, rng); +} + +TEST(WeightedDistribution, WeightedDistribution) { + std::vector freq; + WeightedDistribution wd; + const int kNumIter = 10000; + + auto set_weights = [&](const std::vector &weights) { + wd.clear(); + for (auto weight : weights) { + wd.AddWeight(weight); + } + }; + + auto compute_freq = [&]() { + freq.clear(); + freq.resize(wd.size()); + // We use numbers in [0, kNumIter) instead of random numbers + // for simplicity. + for (int i = 0; i < kNumIter; i++) { + freq[wd.RandomIndex(i)]++; + } + }; + + set_weights({1, 1}); + compute_freq(); + EXPECT_EQ(freq[0], kNumIter / 2); + EXPECT_EQ(freq[1], kNumIter / 2); + + set_weights({1, 2}); + compute_freq(); + EXPECT_GT(freq[0], kNumIter / 4); + EXPECT_LT(freq[0], kNumIter / 2); + EXPECT_GT(freq[1], kNumIter / 2); + + set_weights({10, 100, 1}); + compute_freq(); + EXPECT_LT(9 * freq[2], freq[0]); + EXPECT_LT(9 * freq[0], freq[1]); + + set_weights({0, 1, 2}); + compute_freq(); + EXPECT_EQ(freq[0], 0); + EXPECT_GT(freq[2], freq[1]); + + set_weights({2, 1, 0}); + compute_freq(); + EXPECT_EQ(freq[2], 0); + EXPECT_GT(freq[0], freq[1]); + + // Test ChangeWeight + set_weights({1, 2, 3, 4, 5}); + compute_freq(); + EXPECT_GT(freq[4], freq[3]); + EXPECT_GT(freq[3], freq[2]); + EXPECT_GT(freq[2], freq[1]); + EXPECT_GT(freq[1], freq[0]); + + wd.ChangeWeight(2, 1); + // Calling RandomIndex() after ChangeWeight() w/o calling + // RecomputeInternalState() should crash. + EXPECT_DEATH(compute_freq(), ""); + wd.RecomputeInternalState(); + // Weights: {1, 2, 1, 4, 5} + compute_freq(); + EXPECT_GT(freq[4], freq[3]); + EXPECT_GT(freq[3], freq[2]); + EXPECT_LT(freq[2], freq[1]); + EXPECT_GT(freq[1], freq[0]); + + // Weights: {1, 2, 1, 0, 5} + wd.ChangeWeight(3, 0); + wd.RecomputeInternalState(); + compute_freq(); + EXPECT_GT(freq[4], freq[1]); + EXPECT_GT(freq[1], freq[0]); + EXPECT_GT(freq[1], freq[2]); + EXPECT_EQ(freq[3], 0); + + // Test PopBack(). + wd.PopBack(); + // Weights: {1, 2, 1, 0} after PopBack(). + EXPECT_EQ(wd.size(), 4); + EXPECT_GT(freq[1], freq[0]); + EXPECT_GT(freq[1], freq[2]); + EXPECT_EQ(freq[3], 0); + + // Stress test. If the algorithm is too slow, we may be able to catch it as a + // timeout. + wd.clear(); + for (int i = 1; i < 100000; i++) { + wd.AddWeight(i); + } + compute_freq(); +} + +// TODO(ussuri): This is becoming difficult to maintain: various bits of the +// input data are stored in independent arrays, other bits are dynamically +// initialized, and the matching expected results are listed in two long chains +// of EXPECT's. I think it should be doable to refactor this to use something +// like a TestCase struct tying all that together, then iterate over test_cases +// once to populate pc_table etc, and a second time to e.g. +// EXPECT_EQ(frontier.PcIndexIsFrontier(i), +// test_cases[i].expected_is_frontier). +TEST(CoverageFrontier, Compute) { + // Function [0, 1): Fully covered. + // Function [1, 2): Not covered. + // Function [2, 4): Partially covered => has one frontier. + // Function [4, 6): Not covered. + // Function [6, 9): Partially covered => has one frontier. + // Function [9, 12): Fully covered. + // Function [12, 19): Partially covered => has two frontiers. + PCTable pc_table{{0, PCInfo::kFuncEntry}, // Covered. + {1, PCInfo::kFuncEntry}, + {2, PCInfo::kFuncEntry}, // Covered. + {3, 0}, + {4, PCInfo::kFuncEntry}, + {5, 0}, + {6, PCInfo::kFuncEntry}, // Covered. + {7, 0}, // Covered. + {8, 0}, + {9, PCInfo::kFuncEntry}, // Covered. + {10, 0}, // Covered. + {11, 0}, // Covered. + {12, PCInfo::kFuncEntry}, // Covered. + {13, 0}, // Covered. + {14, 0}, // Covered. + {15, 0}, + {16, 0}, // Covered. + {17, 0}, // Covered. + {18, 0}}; + CFTable cf_table{ + 0, 0, 9, 0, // 0 calls 9. + 1, 0, 6, 0, // 1 calls 6. + 2, 3, 0, 0, // 2 calls 4 in bb 3. + 3, 0, 4, 0, // This bb calls 4. + 4, 5, 0, 0, // 4 calls 9 in bb 5. + 5, 0, 9, 0, // This bb calls 9. + 6, 7, 8, 0, 0, // 6 calls 2 and makes indirect call in bb 8. + 7, 0, 0, 8, 0, 2, -1, 0, // This bb calls 2 and makes an indirect + // call. + 9, 66, 10, 0, 0, // 9 calls no one. 9 has a successor (66) which is not + // in pc_table. This may happen as a result of pruning. + 10, 11, 0, 0, 11, 0, 0, 12, 13, 14, 0, 0, // 12 call 9 and 99 in bb + // 15, and calls 4 in + // bb 18. + 13, 15, 16, 0, 0, 14, 17, 18, 0, 0, 15, 19, 0, 9, 99, + 0, // PC 15 goes to 19 that is not in pc_table. This bb calls 9 and 99. + 16, 13, 0, 0, 17, 0, 0, 18, 0, 4, 0, // This bb calls 4. + 19, 0, 0}; + + BinaryInfo bin_info = {pc_table, {}, cf_table, {}, + ControlFlowGraph(), CallGraph()}; + bin_info.control_flow_graph.InitializeControlFlowGraph(cf_table, pc_table); + bin_info.call_graph.InitializeCallGraph(cf_table, pc_table); + CoverageFrontier frontier(bin_info); + + FeatureVec pcs(pc_table.size()); + for (size_t i = 0; i < pc_table.size(); i++) { + pcs[i] = feature_domains::kPCs.ConvertToMe(i); + } + + FeatureSet fs(100, {}); + Corpus corpus; + + auto Add = [&](feature_t feature) { + fs.IncrementFrequencies({feature}); + corpus.Add({42}, {feature}, {}, fs, frontier); + }; + + // Add PC-based features. + for (size_t idx : {0, 2, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17}) { + Add(pcs[idx]); + } + // add some non-pc features. + for (size_t x : {1, 2, 3, 4}) { + Add(feature_domains::kUnknown.ConvertToMe(x)); + } + + // Compute and check the frontier. + EXPECT_EQ(frontier.Compute(corpus), 3); + EXPECT_EQ(frontier.NumFunctionsInFrontier(), 3); + EXPECT_FALSE(frontier.PcIndexIsFrontier(0)); + EXPECT_FALSE(frontier.PcIndexIsFrontier(1)); + EXPECT_TRUE(frontier.PcIndexIsFrontier(2)); + EXPECT_FALSE(frontier.PcIndexIsFrontier(3)); + EXPECT_FALSE(frontier.PcIndexIsFrontier(4)); + EXPECT_FALSE(frontier.PcIndexIsFrontier(5)); + EXPECT_TRUE(frontier.PcIndexIsFrontier(6)); + EXPECT_FALSE(frontier.PcIndexIsFrontier(7)); + EXPECT_FALSE(frontier.PcIndexIsFrontier(8)); + EXPECT_FALSE(frontier.PcIndexIsFrontier(9)); + EXPECT_FALSE(frontier.PcIndexIsFrontier(10)); + EXPECT_FALSE(frontier.PcIndexIsFrontier(11)); + EXPECT_FALSE(frontier.PcIndexIsFrontier(12)); + EXPECT_TRUE(frontier.PcIndexIsFrontier(13)); + EXPECT_TRUE(frontier.PcIndexIsFrontier(14)); + EXPECT_FALSE(frontier.PcIndexIsFrontier(15)); + EXPECT_FALSE(frontier.PcIndexIsFrontier(16)); + EXPECT_FALSE(frontier.PcIndexIsFrontier(17)); + EXPECT_FALSE(frontier.PcIndexIsFrontier(18)); + + // Check frontier weight. + EXPECT_EQ(frontier.FrontierWeight(0), 0); + EXPECT_EQ(frontier.FrontierWeight(1), 0); + EXPECT_EQ(frontier.FrontierWeight(2), 153); + EXPECT_EQ(frontier.FrontierWeight(3), 0); + EXPECT_EQ(frontier.FrontierWeight(4), 0); + EXPECT_EQ(frontier.FrontierWeight(5), 0); + EXPECT_EQ(frontier.FrontierWeight(6), 230); + EXPECT_EQ(frontier.FrontierWeight(7), 0); + EXPECT_EQ(frontier.FrontierWeight(8), 0); + EXPECT_EQ(frontier.FrontierWeight(9), 0); + EXPECT_EQ(frontier.FrontierWeight(10), 0); + EXPECT_EQ(frontier.FrontierWeight(11), 0); + EXPECT_EQ(frontier.FrontierWeight(12), 0); + EXPECT_EQ(frontier.FrontierWeight(13), 25); + EXPECT_EQ(frontier.FrontierWeight(14), 153); + EXPECT_EQ(frontier.FrontierWeight(15), 0); + EXPECT_EQ(frontier.FrontierWeight(16), 0); + EXPECT_EQ(frontier.FrontierWeight(17), 0); + EXPECT_EQ(frontier.FrontierWeight(18), 0); +} + +TEST(CoverageFrontierDeath, InvalidIndexToFrontier) { + PCTable pc_table = {{0, PCInfo::kFuncEntry}, {1, 0}}; + CFTable cf_table = { + 0, 1, 0, 0, 1, 0, 0, + }; + + BinaryInfo bin_info = {pc_table, {}, cf_table, {}, + ControlFlowGraph(), CallGraph()}; + bin_info.control_flow_graph.InitializeControlFlowGraph(cf_table, pc_table); + bin_info.call_graph.InitializeCallGraph(cf_table, pc_table); + CoverageFrontier frontier(bin_info); + + Corpus corpus; + frontier.Compute(corpus); + // Check with a non-existent idx. + EXPECT_DEATH(frontier.PcIndexIsFrontier(666), ""); + EXPECT_DEATH(frontier.FrontierWeight(666), ""); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/coverage.cc b/src/third_party/fuzztest/dist/centipede/coverage.cc new file mode 100644 index 00000000000..28373ea93b1 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/coverage.cc @@ -0,0 +1,228 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/coverage.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "absl/strings/str_split.h" +#include "absl/synchronization/mutex.h" +#include "./centipede/control_flow.h" +#include "./centipede/feature.h" +#include "./centipede/pc_info.h" +#include "./centipede/symbol_table.h" +#include "./common/remote_file.h" +#include "./common/status_macros.h" + +namespace fuzztest::internal { + +Coverage::Coverage(const PCTable &pc_table, const PCIndexVec &pci_vec) + : func_entries_(pc_table.size()), + fully_covered_funcs_vec_(pc_table.size()), + covered_pcs_vec_(pc_table.size()) { + CHECK_LT(pc_table.size(), std::numeric_limits::max()); + absl::flat_hash_set covered_pcs(pci_vec.begin(), pci_vec.end()); + // Iterate though all the pc_table entries. + // The first one is some function's kFuncEntry. + // Then find the next kFuncEntry or the table end. + // Everything in between corresponds to the current function. + // For fully (un)covered functions, add their entry PCIndex + // to fully_covered_funcs or uncovered_funcs correspondingly. + // For all others add them to partially_covered_funcs. + for (size_t this_func = 0; this_func < pc_table.size();) { + CHECK(pc_table[this_func].has_flag(PCInfo::kFuncEntry)); + func_entries_[this_func] = true; + // Find next entry. + size_t next_func = this_func + 1; + while (next_func < pc_table.size() && + !pc_table[next_func].has_flag(PCInfo::kFuncEntry)) { + next_func++; + } + // Collect covered and uncovered indices. + PartiallyCoveredFunction pcf; + for (size_t i = this_func; i < next_func; i++) { + if (covered_pcs.contains(i)) { + pcf.covered.push_back(i); + covered_pcs_vec_[i] = true; + } else { + pcf.uncovered.push_back(i); + } + } + // Put this function into one of + // {fully_covered_funcs, uncovered_funcs, partially_covered_funcs} + size_t num_func_pcs = next_func - this_func; + if (num_func_pcs == pcf.covered.size()) { + fully_covered_funcs.push_back(this_func); + fully_covered_funcs_vec_[this_func] = true; + } else if (pcf.covered.empty()) { + uncovered_funcs.push_back(this_func); + } else { + CHECK(!pcf.covered.empty()); + CHECK(!pcf.uncovered.empty()); + CHECK_EQ(pcf.covered.size() + pcf.uncovered.size(), num_func_pcs); + partially_covered_funcs.push_back(pcf); + } + // Move to the next function. + this_func = next_func; + } +} + +void Coverage::DumpReportToFile(const SymbolTable &symbols, + std::string_view filepath, + std::string_view description) { + auto *file = ValueOrDie(RemoteFileOpen(filepath, "w")); + CHECK(file != nullptr) << "Failed to open file: " << filepath; + CHECK_OK(RemoteFileSetWriteBufferSize(file, 100UL * 1024 * 1024)); + if (!description.empty()) { + CHECK_OK(RemoteFileAppend(file, "# ")); + CHECK_OK(RemoteFileAppend(file, std::string{description})); + CHECK_OK(RemoteFileAppend(file, ":\n\n")); + } + // Print symbolized function names for all covered functions. + for (auto pc_index : fully_covered_funcs) { + CHECK_OK(RemoteFileAppend(file, "FULL: ")); + CHECK_OK(RemoteFileAppend(file, symbols.full_description(pc_index))); + CHECK_OK(RemoteFileAppend(file, "\n")); + } + CHECK_OK(RemoteFileFlush(file)); + // Same for uncovered functions. + for (auto pc_index : uncovered_funcs) { + CHECK_OK(RemoteFileAppend(file, "NONE: ")); + CHECK_OK(RemoteFileAppend(file, symbols.full_description(pc_index))); + CHECK_OK(RemoteFileAppend(file, "\n")); + } + CHECK_OK(RemoteFileFlush(file)); + // For every partially covered function, first print its name, + // then print its covered edges, then uncovered edges. + for (auto &pcf : partially_covered_funcs) { + CHECK_OK(RemoteFileAppend(file, "PARTIAL: ")); + CHECK_OK(RemoteFileAppend(file, symbols.full_description(pcf.covered[0]))); + CHECK_OK(RemoteFileAppend(file, "\n")); + for (auto pc_index : pcf.covered) { + CHECK_OK(RemoteFileAppend(file, " + ")); + CHECK_OK(RemoteFileAppend(file, symbols.full_description(pc_index))); + CHECK_OK(RemoteFileAppend(file, "\n")); + } + for (auto pc_index : pcf.uncovered) { + CHECK_OK(RemoteFileAppend(file, " - ")); + CHECK_OK(RemoteFileAppend(file, symbols.full_description(pc_index))); + CHECK_OK(RemoteFileAppend(file, "\n")); + } + } + CHECK_OK(RemoteFileFlush(file)); + CHECK_OK(RemoteFileClose(file)); +} + +std::string CoverageLogger::ObserveAndDescribeIfNew(PCIndex pc_index) { + if (pc_table_.empty()) return ""; // Fast-path return (symbolization is off). + absl::MutexLock l(&mu_); + if (!observed_indices_.insert(pc_index).second) return ""; + std::ostringstream os; + if (pc_index >= pc_table_.size()) { + os << "FUNC/EDGE index: " << pc_index; + } else { + os << (pc_table_[pc_index].has_flag(PCInfo::kFuncEntry) ? "FUNC: " + : "EDGE: "); + os << symbols_.full_description(pc_index); + if (!observed_descriptions_.insert(os.str()).second) return ""; + } + return os.str(); +} + +FunctionFilter::FunctionFilter(std::string_view functions_to_filter, + const SymbolTable &symbols) { + // set pcs_[idx] to 1, for any idx that belongs to a filtered function. + // keep pcs_ empty, if no filtered functions are found in symbols. + for (auto &func : absl::StrSplit(functions_to_filter, ',')) { + for (size_t idx = 0, n = symbols.size(); idx < n; ++idx) { + if (func == symbols.func(idx)) { + if (pcs_.empty()) { + pcs_.resize(n); + } + pcs_[idx] = 1; + } + } + } +} + +bool FunctionFilter::filter(const FeatureVec &features) const { + if (pcs_.empty()) return true; + for (auto feature : features) { + if (!feature_domains::kPCs.Contains(feature)) continue; + size_t idx = ConvertPCFeatureToPcIndex(feature); + // idx should normally be within the range. Ignore it if it's not. + if (idx >= pcs_.size()) continue; + if (pcs_[idx]) return true; + } + return false; +} + +static uint8_t SelectMultiplierByCoverageKind(uint8_t uncovered_knob, + uint8_t partially_covered_knob, + uint8_t fully_covered_knob, + PCIndex callee_idx, + const Coverage &coverage) { + if (coverage.FunctionIsFullyCovered(callee_idx)) return fully_covered_knob; + if (coverage.BlockIsCovered(callee_idx)) return partially_covered_knob; + return uncovered_knob; +} + +uint32_t ComputeFrontierWeight(const Coverage &coverage, + const ControlFlowGraph &cfg, + const std::vector &callees) { + // Multiplication factors for different coverage types. + // TODO(ussuri): replace with actual knobs (cl/486229527). + uint8_t uncovered_knob = 153; // ~ (255 * 0.6) + uint8_t partially_covered_knob = 77; // ~ (255 * 0.3) + uint8_t fully_covered_knob = 25; // ~ (255 * 0.1) + + uint32_t weight = 0; + for (auto callee : callees) { + // TODO(ussuri): Figure out a better way for determining the complexity + // of indirect callee. For now using cyclomatic_comp = 1, and factor of + // non-covered callee. + if (callee == -1ULL) { + weight += uncovered_knob; + continue; + } + // This function's body is not in this DSO, like library functions. For now + // skipping it as we have no coverage kind (Fully/Partially covered or + // uncovered) and no complexity for it. + if (!cfg.IsInPcTable(callee)) continue; + + // Retrieve cyclomatic complexity + auto cyclomatic_comp = cfg.GetCyclomaticComplexity(callee); + // Determine knob based on callee coverage kind. + auto callee_idx = cfg.GetPcIndex(callee); + CHECK(cfg.BlockIsFunctionEntry(callee_idx)); + auto coverage_multiplier = SelectMultiplierByCoverageKind( + uncovered_knob, partially_covered_knob, fully_covered_knob, callee_idx, + coverage); + + weight += coverage_multiplier * cyclomatic_comp; + } + return weight; +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/coverage.h b/src/third_party/fuzztest/dist/centipede/coverage.h new file mode 100644 index 00000000000..7e681c0c467 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/coverage.h @@ -0,0 +1,185 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_COVERAGE_H_ +#define THIRD_PARTY_CENTIPEDE_COVERAGE_H_ + +#include + +#include +#include +#include +#include +#include +#include + +#include "absl/base/thread_annotations.h" +#include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "absl/synchronization/mutex.h" +#include "./centipede/control_flow.h" +#include "./centipede/feature.h" +#include "./centipede/pc_info.h" + +namespace fuzztest::internal { + +class SymbolTable; // To avoid mutual inclusion with symbol_table.h. + +// Reads and visualizes the code coverage produced by SanitizerCoverage. +// https://clang.llvm.org/docs/SanitizerCoverage.html +// +// Thread-compatible. +class Coverage { + public: + // PCTable is a property of the binary. + // PCIndexVec is the coverage obtained from specific execution(s). + Coverage(const PCTable &pc_table, + const PCIndexVec &pci_vec); + + // Saves the by-function coverage in human-readable form to the `filepath` + // file, using `symbols` to translate PCs to function names. + void DumpReportToFile(const SymbolTable &symbols, std::string_view filepath, + std::string_view description = ""); + + // Returns true if the function is fully covered. pc_index is for a function + // entry. + bool FunctionIsFullyCovered(PCIndex pc_index) const { + CHECK(func_entries_[pc_index]); + return fully_covered_funcs_vec_[pc_index]; + } + // Returns true if the given basic block is covered. pc_index is for any BB. + bool BlockIsCovered(PCIndex pc_index) const { + return covered_pcs_vec_[pc_index]; + } + + private: + // A vector of size PCTable. func_entries[idx] is true iff means the PC at idx + // is a function entry. + std::vector func_entries_; + // Vector of fully covered functions i.e. functions with all edges covered. + // A Function is represented by its entry block's PCIndex. + // TODO(kcc): fix private variables' name to match the code style. + PCIndexVec fully_covered_funcs; + // A vector of size PCTable. fully_covered_funcs_vec[idx] is true iff the PC + // at idx is an entry block of a fully covered function. + std::vector fully_covered_funcs_vec_; + // A vector of size PCTable. covered_pcs_vec[idx] is true iff the PC at idx is + // covered. + std::vector covered_pcs_vec_; + // Same as `fully_covered_funcs`, but for functions with no edges covered. + PCIndexVec uncovered_funcs; + // Partially covered function: function with some, but not all, edges covered. + // Thus we can represent it as two vectors of PCIndex: covered and uncovered. + struct PartiallyCoveredFunction { + PCIndexVec + covered; // Non-empty, covered[0] is function entry. + PCIndexVec uncovered; // Non-empty. + }; + std::vector partially_covered_funcs; +}; + +// Iterates `pc_table`, calls `callback` on every pair {beg, end}, such that +// pc_table[beg] is PCInfo::kFuncEntry, and pc_table[beg + 1 : end] are not. +template +void IteratePcTableFunctions(const PCTable &pc_table, + Callback callback) { + for (size_t beg = 0, n = pc_table.size(); beg < n;) { + if (pc_table[beg].has_flag(PCInfo::kFuncEntry)) { + size_t end = beg + 1; + while (end < n && + !pc_table[end].has_flag(PCInfo::kFuncEntry)) { + ++end; + } + callback(beg, end); + beg = end; + } + } +} + +// CoverageLogger helps to log coverage locations once for each location. +// CoverageLogger is thread-safe. +class CoverageLogger { + public: + // CTOR. + // Lifetimes of `pc_table` and `symbols` should be longer than for `this`. + CoverageLogger(const PCTable &pc_table, + const SymbolTable &symbols) + : pc_table_(pc_table), symbols_(symbols) {} + + // Checks if `pc_index` or its symbolized description was observed before. + // If yes, returns empty string. + // If this is the first observation, returns a symbolized description. + // If symbolization is not available, returns a non-symbolized description. + std::string ObserveAndDescribeIfNew(PCIndex pc_index); + + private: + const PCTable &pc_table_; + const SymbolTable &symbols_; + + absl::Mutex mu_; + absl::flat_hash_set observed_indices_ + ABSL_GUARDED_BY(mu_); + absl::flat_hash_set observed_descriptions_ ABSL_GUARDED_BY(mu_); +}; + +// FunctionFilter maps a set of function names to a set of features. +class FunctionFilter { + public: + // Initialize the filter. + // `functions_to_filter` is a comma-separated list of function names. + // If a function name is found in `symbols`, the PCs from that function + // will be filtered. + FunctionFilter(std::string_view functions_to_filter, + const SymbolTable &symbols); + + // Returns true if + // * some of the `features` are from feature_domains::kPC + // and belong to a filtered function. + // * either `functions_to_filter` or `symbols` passed to CTOR was empty. + bool filter(const FeatureVec &features) const; + + // Counts PCs that belong to filtered functions. Test-only. + size_t count() const { return std::count(pcs_.begin(), pcs_.end(), 1); } + + private: + // pcs_[idx]==1 means that the PC at idx belongs to the filtered function. + // We don't use vector for performance. + // We don't use a hash set, because CPU is more important here than RAM. + std::vector pcs_; +}; + +// Computes the frontier weight. The weight is calculated based on the functions +// called in the non-covered side of the frontier. For each such callee, the +// cyclomatic complexity (CC) of the callee is multiplied by a factor (MF) +// where MF is determined based on the coverage type of callee: +// +// frontier_weight = 0 +// for f in callees_of_non_covered_successor_bb: +// frontier_weight += CC(f) * MF(f) +// +// The breakdown for MF based on the coverage type of callee is as follows +// (subject to change): +// - Non-covered: %60 +// - Partially-covered: %30 +// - Fully-covered: %10 +// Non-covered callee gets the highest MF as it is very interesting to +// get it covered. That said, going to partially or even fully covered callee +// still have some value as it may trigger new state there. +uint32_t ComputeFrontierWeight(const Coverage &coverage, + const ControlFlowGraph &cfg, + const std::vector &callees); + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_COVERAGE_H_ diff --git a/src/third_party/fuzztest/dist/centipede/coverage_test.cc b/src/third_party/fuzztest/dist/centipede/coverage_test.cc new file mode 100644 index 00000000000..a7538031f44 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/coverage_test.cc @@ -0,0 +1,541 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/coverage.h" + +#include +#include + +#include +#include +#include +#include // NOLINT +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/container/flat_hash_set.h" +#include "./centipede/binary_info.h" +#include "./centipede/control_flow.h" +#include "./centipede/environment.h" +#include "./centipede/feature.h" +#include "./centipede/pc_info.h" +#include "./centipede/symbol_table.h" +#include "./centipede/test_coverage_util.h" +#include "./centipede/thread_pool.h" +#include "./centipede/util.h" +#include "./common/test_util.h" + +namespace fuzztest::internal { +namespace { + +// llvm-symbolizer output for a binary with 3 functions: +// A, BB, CCC. +// A and BB have one control flow edge each. +// CCC has 3 edges. +const char *symbolizer_output = + "A\n" + "a.cc:1:0\n" + "\n" + "BB\n" + "bb.cc:1:0\n" + "\n" + "CCC\n" + "ccc.cc:1:0\n" + "\n" + "CCC\n" + "ccc.cc:2:0\n" + "\n" + "CCC\n" + "ccc.cc:3:0\n" + "\n" + "CCC\n" + "ccc.cc:3:0\n" // same as the previous entry + "\n"; + +// PCTable that corresponds to symbolizer_output above. +static const PCTable g_pc_table = { + {100, PCInfo::kFuncEntry}, + {200, PCInfo::kFuncEntry}, + {300, PCInfo::kFuncEntry}, + {400, 0}, + {500, 0}, + {600, 0}, +}; + +// Tests Coverage and SymbolTable together. +TEST(Coverage, SymbolTable) { + const std::filesystem::path test_dir = GetTestTempDir(test_info_->name()); + + // Initialize and test SymbolTable. + SymbolTable symbols; + std::istringstream iss(symbolizer_output); + symbols.ReadFromLLVMSymbolizer(iss); + EXPECT_EQ(symbols.size(), 6U); + EXPECT_EQ(symbols.func(1), "BB"); + EXPECT_EQ(symbols.location(2), "ccc.cc:1:0"); + EXPECT_EQ(symbols.full_description(0), "A a.cc:1:0"); + EXPECT_EQ(symbols.full_description(4), "CCC ccc.cc:3:0"); + + { + // Tests coverage output for PCIndexVec = {0, 2}, + // i.e. the covered edges are 'A' and the entry of 'CCC'. + Coverage cov(g_pc_table, {0, 2}); + cov.DumpReportToFile(symbols, (test_dir / "coverage.txt").string()); + std::string str; + ReadFromLocalFile((test_dir / "coverage.txt").string(), str); + EXPECT_THAT(str, testing::HasSubstr("FULL: A a.cc:1:0")); + EXPECT_THAT(str, testing::HasSubstr("NONE: BB bb.cc:1:0")); + EXPECT_THAT(str, testing::HasSubstr("PARTIAL: CCC ccc.cc:1:0")); + EXPECT_THAT(str, testing::HasSubstr("+ CCC ccc.cc:1:0")); + EXPECT_THAT(str, testing::HasSubstr("- CCC ccc.cc:2:0")); + EXPECT_THAT(str, testing::HasSubstr("- CCC ccc.cc:3:0")); + } + { + // Same as above, but for PCIndexVec = {1, 2, 3}, + Coverage cov(g_pc_table, {1, 2, 3}); + cov.DumpReportToFile(symbols, (test_dir / "coverage.txt").string()); + std::string str; + ReadFromLocalFile((test_dir / "coverage.txt").string(), str); + EXPECT_THAT(str, testing::HasSubstr("FULL: BB bb.cc:1:0")); + EXPECT_THAT(str, testing::HasSubstr("NONE: A a.cc:1:0")); + EXPECT_THAT(str, testing::HasSubstr("PARTIAL: CCC ccc.cc:1:0")); + EXPECT_THAT(str, testing::HasSubstr("+ CCC ccc.cc:1:0")); + EXPECT_THAT(str, testing::HasSubstr("+ CCC ccc.cc:2:0")); + EXPECT_THAT(str, testing::HasSubstr("- CCC ccc.cc:3:0")); + } + + symbols.SetAllToUnknown(2); + EXPECT_EQ(symbols.size(), 2); + EXPECT_EQ(symbols.full_description(0), "? ?"); + EXPECT_EQ(symbols.full_description(1), "? ?"); +} + +TEST(Coverage, CoverageLoad) { + Coverage cov(g_pc_table, {0, 2, 4, 5}); + + EXPECT_TRUE(cov.BlockIsCovered(0)); + EXPECT_FALSE(cov.BlockIsCovered(1)); + EXPECT_TRUE(cov.BlockIsCovered(2)); + EXPECT_FALSE(cov.BlockIsCovered(3)); + EXPECT_TRUE(cov.BlockIsCovered(4)); + EXPECT_TRUE(cov.BlockIsCovered(5)); + + EXPECT_TRUE(cov.FunctionIsFullyCovered(0)); + EXPECT_FALSE(cov.FunctionIsFullyCovered(1)); + EXPECT_FALSE(cov.FunctionIsFullyCovered(2)); +} + +TEST(Coverage, CoverageLogger) { + SymbolTable symbols; + std::istringstream iss(symbolizer_output); + symbols.ReadFromLLVMSymbolizer(iss); + CoverageLogger logger(g_pc_table, symbols); + // First time logging pc_index=0. + EXPECT_EQ(logger.ObserveAndDescribeIfNew(0), "FUNC: A a.cc:1:0"); + // Second time logger pc_index=0. + EXPECT_EQ(logger.ObserveAndDescribeIfNew(0), ""); + // First time logging pc_index=4. + EXPECT_EQ(logger.ObserveAndDescribeIfNew(4), "EDGE: CCC ccc.cc:3:0"); + // First time logging pc_index=5, but it produces the same description as + // pc_index=4, and so the result is empty. + EXPECT_EQ(logger.ObserveAndDescribeIfNew(5), ""); + + // Logging with pc_index out of bounds. Second time gives empty result. + EXPECT_EQ(logger.ObserveAndDescribeIfNew(42), "FUNC/EDGE index: 42"); + EXPECT_EQ(logger.ObserveAndDescribeIfNew(42), ""); + + CoverageLogger concurrently_used_logger(g_pc_table, symbols); + auto cb = [&]() { + for (int i = 0; i < 1000; i++) { + PCIndex pc_index = i % g_pc_table.size(); + logger.ObserveAndDescribeIfNew(pc_index); + } + }; + { + ThreadPool threads{2}; + threads.Schedule(cb); + threads.Schedule(cb); + } // The threads join here. +} + +// Returns path to test_fuzz_target. +static std::string GetTargetPath() { + return GetDataDependencyFilepath("centipede/testing/test_fuzz_target"); +} + +// Returns path to threaded_fuzz_target. +static std::string GetThreadedTargetPath() { + return GetDataDependencyFilepath("centipede/testing/threaded_fuzz_target"); +} + +// Tests coverage collection on test_fuzz_target +// using two inputs that trigger different code paths. +TEST(Coverage, CoverageFeatures) { + // Prepare the inputs. + Environment env; + env.binary = GetTargetPath(); + auto features = RunInputsAndCollectCoverage(env, {"func1", "func2-A"}); + EXPECT_EQ(features.size(), 2); + EXPECT_NE(features[0], features[1]); + // Get pc_table and symbols. + bool uses_legacy_trace_pc_instrumentation = {}; + BinaryInfo binary_info; + binary_info.InitializeFromSanCovBinary( + GetTargetPath(), GetObjDumpPath(), GetLLVMSymbolizerPath(), + GetTestTempDir(test_info_->name()).string()); + const auto &pc_table = binary_info.pc_table; + EXPECT_FALSE(uses_legacy_trace_pc_instrumentation); + const SymbolTable &symbols = binary_info.symbols; + // pc_table and symbols should have the same size. + EXPECT_EQ(pc_table.size(), symbols.size()); + // Check what's covered. + // Both inputs should cover LLVMFuzzerTestOneInput. + // Input[0] should cover SingleEdgeFunc and not MultiEdgeFunc. + // Input[1] - the other way around. + for (size_t input_idx = 0; input_idx < 2; input_idx++) { + size_t llvm_fuzzer_test_one_input_num_edges = 0; + size_t single_edge_func_num_edges = 0; + size_t multi_edge_func_num_edges = 0; + for (auto feature : features[input_idx]) { + if (!feature_domains::kPCs.Contains(feature)) continue; + auto pc_index = ConvertPCFeatureToPcIndex(feature); + single_edge_func_num_edges += symbols.func(pc_index) == "SingleEdgeFunc"; + multi_edge_func_num_edges += symbols.func(pc_index) == "MultiEdgeFunc"; + llvm_fuzzer_test_one_input_num_edges += + symbols.func(pc_index) == "LLVMFuzzerTestOneInput"; + } + EXPECT_GT(llvm_fuzzer_test_one_input_num_edges, 1); + if (input_idx == 0) { + // This input calls SingleEdgeFunc, but not MultiEdgeFunc. + EXPECT_EQ(single_edge_func_num_edges, 1); + EXPECT_EQ(multi_edge_func_num_edges, 0); + } else { + // This input calls MultiEdgeFunc, but not SingleEdgeFunc. + EXPECT_EQ(single_edge_func_num_edges, 0); + EXPECT_GT(multi_edge_func_num_edges, 1); + } + } +} + +static FeatureVec ExtractDomainFeatures(const FeatureVec &features, + const feature_domains::Domain &domain) { + FeatureVec result; + for (auto feature : features) { + if (domain.Contains(feature)) { + result.push_back(feature); + } + } + return result; +} + +// Tests data flow instrumentation and feature collection. +TEST(Coverage, DataFlowFeatures) { + Environment env; + env.binary = GetTargetPath(); + auto features_g = RunInputsAndCollectCoverage(env, {"glob1", "glob2"}); + auto features_c = RunInputsAndCollectCoverage(env, {"cons1", "cons2"}); + for (auto &features : {features_g, features_c}) { + EXPECT_EQ(features.size(), 2); + // Dataflow features should be different. + EXPECT_NE(ExtractDomainFeatures(features[0], feature_domains::kDataFlow), + ExtractDomainFeatures(features[1], feature_domains::kDataFlow)); + // But control flow features should be the same. + EXPECT_EQ( + ExtractDomainFeatures(features[0], feature_domains::k8bitCounters), + ExtractDomainFeatures(features[1], feature_domains::k8bitCounters)); + } +} + +// Tests feature collection for counters (--use_counter_features). +TEST(Coverage, CounterFeatures) { + Environment env; + env.binary = GetTargetPath(); + + // Inputs that generate the same PC coverage but different counters. + std::vector inputs = {"cnt\x01", "cnt\x02", "cnt\x04", "cnt\x08", + "cnt\x10"}; + const size_t n = inputs.size(); + + // Run with use_counter_features = true. + env.use_counter_features = true; + auto features = RunInputsAndCollectCoverage(env, inputs); + EXPECT_EQ(features.size(), n); + // Counter features should be different. + for (size_t i = 0; i < n; ++i) { + for (size_t j = i + 1; j < n; ++j) { + EXPECT_NE( + ExtractDomainFeatures(features[i], feature_domains::k8bitCounters), + ExtractDomainFeatures(features[j], feature_domains::k8bitCounters)); + } + } + + // Run with use_counter_features = false. + env.use_counter_features = false; + features = RunInputsAndCollectCoverage(env, inputs); + EXPECT_EQ(features.size(), n); + // Counter features should be the same now. + for (size_t i = 0; i < n; ++i) { + for (size_t j = i + 1; j < n; ++j) { + EXPECT_EQ( + ExtractDomainFeatures(features[i], feature_domains::k8bitCounters), + ExtractDomainFeatures(features[j], feature_domains::k8bitCounters)); + } + } +} + +// For each of {ABToCmpModDiff, ABToCmpHamming, ABToCmpDiffLog} verify that +// a) they create all possible values in [0,64) +// b) they don't create any other values. +// c) they are sufficiently different from each other, i.e. not using one of +// them as coverage signal may reduce the overall quality of signal. +TEST(Coverage, CMPFeatures) { + absl::flat_hash_set moddiff, hamming, difflog; + + // clear all hash sets. + auto clear = [&]() { + moddiff.clear(); + hamming.clear(); + difflog.clear(); + }; + + // verifies `value` < 64 and returns it. + auto must_be_6bit = [](uintptr_t value) { + EXPECT_LT(value, 64); + return value; + }; + + // inserts a value into all hash sets. + auto update = [&](uintptr_t a, uintptr_t b) { + moddiff.insert(must_be_6bit(ABToCmpModDiff(a, b))); + hamming.insert(must_be_6bit(ABToCmpHamming(a, b))); + difflog.insert(must_be_6bit(ABToCmpDiffLog(a, b))); + }; + + // Check moddiff. + clear(); + for (uintptr_t a = 0; a <= 64; ++a) { + uintptr_t b = 32; + if (a == b) continue; + update(a, b); + } + EXPECT_EQ(moddiff.size(), 64); + EXPECT_EQ(hamming.size(), 6); + EXPECT_EQ(difflog.size(), 6); + + // Check hamming. + clear(); + for (uintptr_t bits = 0; bits < 64; ++bits) { + uintptr_t minus_one = -1; + uintptr_t a = minus_one << bits; + update(a, 0); + } + EXPECT_EQ(moddiff.size(), 6); + EXPECT_EQ(hamming.size(), 64); + EXPECT_EQ(difflog.size(), 1); + + // Check difflog. + clear(); + for (uintptr_t bits = 0; bits < 64; ++bits) { + uintptr_t a = 1ULL << bits; + uintptr_t b = 0; + update(a, b); + } + EXPECT_EQ(moddiff.size(), 7); + EXPECT_EQ(hamming.size(), 1); + EXPECT_EQ(difflog.size(), 64); +} + +// Tests CMP tracing and feature collection. +TEST(Coverage, CMPFeaturesExecute) { + Environment env; + env.binary = GetTargetPath(); + auto features = + RunInputsAndCollectCoverage(env, {"cmpAAAAAAAA", "cmpAAAABBBB"}); + EXPECT_EQ(features.size(), 2); + // CMP features should be different. + EXPECT_NE(ExtractDomainFeatures(features[0], feature_domains::kCMPEq), + ExtractDomainFeatures(features[1], feature_domains::kCMPEq)); + EXPECT_NE(ExtractDomainFeatures(features[0], feature_domains::kCMPModDiff), + ExtractDomainFeatures(features[1], feature_domains::kCMPModDiff)); + EXPECT_NE(ExtractDomainFeatures(features[0], feature_domains::kCMPHamming), + ExtractDomainFeatures(features[1], feature_domains::kCMPHamming)); + EXPECT_NE(ExtractDomainFeatures(features[0], feature_domains::kCMPDiffLog), + ExtractDomainFeatures(features[1], feature_domains::kCMPDiffLog)); + + // But control flow features should be the same. + EXPECT_EQ(ExtractDomainFeatures(features[0], feature_domains::k8bitCounters), + ExtractDomainFeatures(features[1], feature_domains::k8bitCounters)); +} + +// Tests memcmp interceptor. +TEST(Coverage, CMPFeaturesFromMemcmp) { + Environment env; + env.binary = GetTargetPath(); + auto features = + RunInputsAndCollectCoverage(env, {"mcmpAAAAAAAA", "mcmpAAAABBBB"}); + EXPECT_EQ(features.size(), 2); + // CMP features should be different. + EXPECT_NE(ExtractDomainFeatures(features[0], feature_domains::kCMP), + ExtractDomainFeatures(features[1], feature_domains::kCMP)); + // But control flow features should be the same. + EXPECT_EQ(ExtractDomainFeatures(features[0], feature_domains::k8bitCounters), + ExtractDomainFeatures(features[1], feature_domains::k8bitCounters)); +} + +TEST(Coverage, PathFeatures) { + Environment env; + env.binary = GetTargetPath(); + env.path_level = 10; + // Inputs "pth123" and "pth321" generate different call sequences but exactly + // the same edge coverage. This test verifies that we can capture this. + auto features = RunInputsAndCollectCoverage(env, {"pth123", "pth321"}); + EXPECT_EQ(features.size(), 2); + // Path features should be different. + EXPECT_NE(ExtractDomainFeatures(features[0], feature_domains::kBoundedPath), + ExtractDomainFeatures(features[1], feature_domains::kBoundedPath)); + // But control flow features should be the same. + EXPECT_EQ(ExtractDomainFeatures(features[0], feature_domains::k8bitCounters), + ExtractDomainFeatures(features[1], feature_domains::k8bitCounters)); +} + +TEST(Coverage, FunctionFilter) { + // Initialize coverage data. + BinaryInfo binary_info; + binary_info.InitializeFromSanCovBinary( + GetTargetPath(), GetObjDumpPath(), GetLLVMSymbolizerPath(), + GetTestTempDir(test_info_->name()).string()); + + const PCTable &pc_table = binary_info.pc_table; + EXPECT_FALSE(binary_info.uses_legacy_trace_pc_instrumentation); + const DsoTable dso_table = {{GetTargetPath(), pc_table.size()}}; + SymbolTable symbols; + symbols.GetSymbolsFromBinary(pc_table, dso_table, GetLLVMSymbolizerPath(), + GetTestTempDir(test_info_->name()).string()); + // Empty filter. + FunctionFilter empty_filter("", symbols); + EXPECT_EQ(empty_filter.count(), 0); + + // Single-function filter. The function has one PC. + FunctionFilter sing_edge_func_filter("SingleEdgeFunc", symbols); + EXPECT_EQ(sing_edge_func_filter.count(), 1); + + // Another single-function filter. This function has several PCs. + FunctionFilter multi_edge_func_filter("MultiEdgeFunc", symbols); + EXPECT_GT(multi_edge_func_filter.count(), 1); + + // Two-function-filter. + FunctionFilter both_func_filter("MultiEdgeFunc,SingleEdgeFunc", symbols); + EXPECT_GT(both_func_filter.count(), multi_edge_func_filter.count()); + + // Collect features from the test target by running 3 different inputs. + Environment env; + env.binary = GetTargetPath(); + std::vector features = + RunInputsAndCollectCoverage(env, {"func1", "func2-A", "other"}); + EXPECT_EQ(features.size(), 3); + auto &single = features[0]; + auto &multi = features[1]; + auto &other = features[2]; + + // Check the features against the different filters. + EXPECT_TRUE(empty_filter.filter(single)); + EXPECT_TRUE(empty_filter.filter(multi)); + EXPECT_TRUE(empty_filter.filter(other)); + + EXPECT_TRUE(sing_edge_func_filter.filter(single)); + EXPECT_FALSE(sing_edge_func_filter.filter(multi)); + EXPECT_FALSE(sing_edge_func_filter.filter(other)); + + EXPECT_FALSE(multi_edge_func_filter.filter(single)); + EXPECT_TRUE(multi_edge_func_filter.filter(multi)); + EXPECT_FALSE(multi_edge_func_filter.filter(other)); + + EXPECT_TRUE(both_func_filter.filter(single)); + EXPECT_TRUE(both_func_filter.filter(multi)); + EXPECT_FALSE(both_func_filter.filter(other)); +} + +TEST(Coverage, ThreadedTest) { + Environment env; + env.path_level = 10; + env.binary = GetThreadedTargetPath(); + + std::vector features = + RunInputsAndCollectCoverage(env, {"f", "fu", "fuz", "fuzz"}); + EXPECT_EQ(features.size(), 4); + // For several pairs of inputs, check that their features in + // kPC and kBoundedPath are different. + for (size_t idx0 = 0; idx0 < 3; ++idx0) { + for (size_t idx1 = idx0 + 1; idx1 < 4; ++idx1) { + EXPECT_NE(ExtractDomainFeatures(features[idx0], feature_domains::kPCs), + ExtractDomainFeatures(features[idx1], + feature_domains::k8bitCounters)); + EXPECT_NE( + ExtractDomainFeatures(features[idx0], feature_domains::kBoundedPath), + ExtractDomainFeatures(features[idx1], feature_domains::kBoundedPath)); + } + } +} + +TEST(FrontierWeight, ComputeFrontierWeight) { + PCTable g_pc_table{{0, PCInfo::kFuncEntry}, + {1, PCInfo::kFuncEntry}, + {2, 0}, + {3, PCInfo::kFuncEntry}, + {4, PCInfo::kFuncEntry}}; + // A simple CF table, to get cyclomatic complexity of 1 for all functions. + CFTable g_cf_table{ + 0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 0, 0, 4, 0, 0, + }; + + Coverage g_coverage(g_pc_table, {0, 1}); + ControlFlowGraph cfg; + cfg.InitializeControlFlowGraph(g_cf_table, g_pc_table); + + std::vector callees1 = {0, 1, 3, 4}; + std::vector callees2 = {0, 1}; + std::vector callees3 = {0}; + // PC 99 should have no effect on computed weight. + std::vector callees4 = {1, 3, 99}; + + auto weight1 = ComputeFrontierWeight(g_coverage, cfg, callees1); + ASSERT_EQ(weight1, 408); + + auto weight2 = ComputeFrontierWeight(g_coverage, cfg, callees2); + ASSERT_EQ(weight2, 102); + + auto weight3 = ComputeFrontierWeight(g_coverage, cfg, callees3); + ASSERT_EQ(weight3, 25); + + auto weight4 = ComputeFrontierWeight(g_coverage, cfg, callees4); + ASSERT_EQ(weight4, 230); +} + +TEST(FrontierWeightDeath, InvalidCallee) { + // Makes call to ComputeFrontierWeight with some non-function PCs. + PCTable g_pc_table{{0, PCInfo::kFuncEntry}, {1, 0}, {2, 0}}; + CFTable g_cf_table{0, 1, 0, 0, 1, 2, 0, 0, 2, 0, 0}; + Coverage g_coverage(g_pc_table, {0, 1}); + ControlFlowGraph cfg; + cfg.InitializeControlFlowGraph(g_cf_table, g_pc_table); + EXPECT_DEATH(ComputeFrontierWeight(g_coverage, cfg, {0, 1}), ""); + EXPECT_DEATH(ComputeFrontierWeight(g_coverage, cfg, {1, 2}), ""); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/crash_summary.cc b/src/third_party/fuzztest/dist/centipede/crash_summary.cc new file mode 100644 index 00000000000..6be936833cc --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/crash_summary.cc @@ -0,0 +1,58 @@ +// Copyright 2025 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/crash_summary.h" + +#include + +#include "absl/strings/str_format.h" +#include "./centipede/util.h" +#include "./common/defs.h" + +namespace fuzztest::internal { +namespace { + +ExternalCrashReporter external_crash_reporter = nullptr; + +} // namespace + +void CrashSummary::AddCrash(Crash crash) { + crashes_.push_back(std::move(crash)); +} + +void CrashSummary::Report(absl::FormatRawSink sink) const { + if (external_crash_reporter != nullptr) { + external_crash_reporter(*this); + } + absl::Format(sink, "=== Summary of detected crashes ===\n\n"); + absl::Format(sink, "Binary ID : %s\n", binary_id()); + absl::Format(sink, "Fuzz test : %s\n", fuzz_test()); + absl::Format(sink, "Total crashes: %d\n\n", crashes().size()); + int i = 0; + for (const Crash& crash : crashes()) { + absl::Format(sink, "Crash #%d:\n", ++i); + absl::Format(sink, " Crash ID : %s\n", crash.id); + absl::Format(sink, " Category : %s\n", crash.category); + absl::Format(sink, " Signature : %s\n", + AsPrintableString(AsByteSpan(crash.signature), 32)); + absl::Format(sink, " Description: %s\n\n", crash.description); + } + absl::Format(sink, "=== End of summary of detected crashes ===\n\n"); +} + +void SetExternalCrashReporter(ExternalCrashReporter reporter) { + external_crash_reporter = reporter; +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/crash_summary.h b/src/third_party/fuzztest/dist/centipede/crash_summary.h new file mode 100644 index 00000000000..067ed36fb24 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/crash_summary.h @@ -0,0 +1,84 @@ +// Copyright 2025 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef FUZZTEST_CENTIPEDE_CRASH_SUMMARY_H_ +#define FUZZTEST_CENTIPEDE_CRASH_SUMMARY_H_ + +#include +#include +#include + +#include "absl/strings/str_format.h" +#include "absl/types/span.h" + +namespace fuzztest::internal { + +// Accumulates crashes for a single fuzz test and provides a method to report a +// summary of the crashes. +class CrashSummary { + public: + struct Crash { + std::string id; + std::string category; + std::string signature; + std::string description; + + friend bool operator==(const Crash& lhs, const Crash& rhs) { + return lhs.id == rhs.id && lhs.category == rhs.category && + lhs.signature == rhs.signature && + lhs.description == rhs.description; + } + }; + + explicit CrashSummary(std::string_view binary_id, std::string_view fuzz_test) + : binary_id_(std::string(binary_id)), + fuzz_test_(std::string(fuzz_test)) {} + + CrashSummary(const CrashSummary&) = default; + CrashSummary& operator=(const CrashSummary&) = default; + CrashSummary(CrashSummary&&) = default; + CrashSummary& operator=(CrashSummary&&) = default; + + // Adds a crash to the summary. + void AddCrash(Crash crash); + + // Reports a summary of the crashes to `sink`. + // If an external crash reporter has been set with `SetExternalCrashReporter`, + // calls it with the stored crashes. + void Report(absl::FormatRawSink sink) const; + + std::string_view binary_id() const { return binary_id_; } + std::string_view fuzz_test() const { return fuzz_test_; } + absl::Span crashes() const { return crashes_; } + + friend bool operator==(const CrashSummary& lhs, const CrashSummary& rhs) { + return lhs.binary_id_ == rhs.binary_id_ && + lhs.fuzz_test_ == rhs.fuzz_test_ && lhs.crashes_ == rhs.crashes_; + } + + private: + std::string binary_id_; + std::string fuzz_test_; + std::vector crashes_; +}; + +using ExternalCrashReporter = void (*)(const CrashSummary&); + +// Sets an external crash reporter that will be called when a `CrashSummary` is +// reported. +void SetExternalCrashReporter(ExternalCrashReporter reporter); + +} // namespace fuzztest::internal + +#endif // FUZZTEST_CENTIPEDE_CRASH_SUMMARY_H_ diff --git a/src/third_party/fuzztest/dist/centipede/crash_summary_test.cc b/src/third_party/fuzztest/dist/centipede/crash_summary_test.cc new file mode 100644 index 00000000000..891bd5c7f1e --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/crash_summary_test.cc @@ -0,0 +1,87 @@ +// Copyright 2025 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/crash_summary.h" + +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/log/check.h" + +namespace fuzztest::internal { +namespace { + +using ::testing::AllOf; +using ::testing::HasSubstr; +using ::testing::Pointee; + +class CrashSummaryTest : public testing::Test { + public: + ~CrashSummaryTest() { + if (dumped_summary_ != nullptr) { + delete dumped_summary_; + dumped_summary_ = nullptr; + } + } + + protected: + static void DumpCrashSummary(const CrashSummary& summary) { + CHECK(dumped_summary_ == nullptr); + dumped_summary_ = new CrashSummary{summary}; + }; + + static CrashSummary* dumped_summary_; +}; + +CrashSummary* CrashSummaryTest::dumped_summary_ = nullptr; + +TEST_F(CrashSummaryTest, ReportPrintsSummary) { + CrashSummary summary("binary_id", "fuzz_test"); + summary.AddCrash({"id1", "category1", "signature1", "description1"}); + summary.AddCrash({"id2", "category2", + "Unprintable (\xbe\xef) and very long signature", + "description2"}); + std::string output; + summary.Report(&output); + + EXPECT_THAT( + output, + AllOf(HasSubstr("Binary ID : binary_id"), + HasSubstr("Fuzz test : fuzz_test"), + HasSubstr("Total crashes: 2"), // + HasSubstr("Crash ID : id1"), // + HasSubstr("Category : category1"), + HasSubstr("Signature : signature1"), + HasSubstr("Description: description1"), + HasSubstr("Crash ID : id2"), // + HasSubstr("Category : category2"), + HasSubstr("Signature : Unprintable (\\xBE\\xEF) and very long s"), + HasSubstr("Description: description2"))); +} + +TEST_F(CrashSummaryTest, ReportCallsExternalCrashReporter) { + CrashSummary summary("binary_id", "fuzz_test"); + summary.AddCrash({"id1", "category1", "signature1", "description1"}); + summary.AddCrash({"id2", "category2", "signature2", "description2"}); + SetExternalCrashReporter(DumpCrashSummary); + std::string output; + summary.Report(&output); + + EXPECT_THAT(dumped_summary_, Pointee(summary)); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/dispatcher.cc b/src/third_party/fuzztest/dist/centipede/dispatcher.cc new file mode 100644 index 00000000000..0d3fbd2f892 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/dispatcher.cc @@ -0,0 +1,601 @@ +// Copyright 2025 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/dispatcher.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/base/nullability.h" +#include "./centipede/execution_metadata.h" +#include "./centipede/runner_request.h" +#include "./centipede/runner_result.h" +#include "./centipede/shared_memory_blob_sequence.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +namespace { + +// Logging needs to be signal safe. + +struct LogErrNo {}; +struct LogLnSync {}; + +void DispatcherLog() {} + +template +void DispatcherLog(const T& first, const Rest&... rest) { + if constexpr (std::is_same_v) { + auto saved_errno = errno; + char err_buf[80]; + if (strerror_r(saved_errno, err_buf, sizeof(err_buf)) != 0) { + constexpr std::string_view kFallbackMsg = "[strerror_r failed]"; + static_assert(kFallbackMsg.size() < sizeof(err_buf)); + std::memcpy(err_buf, kFallbackMsg.data(), kFallbackMsg.size()); + err_buf[kFallbackMsg.size()] = 0; + } + DispatcherLog(err_buf); + } else if constexpr (std::is_same_v) { + write(STDERR_FILENO, "\n", 1); + fsync(STDERR_FILENO); + } else { + std::string_view sv = first; + while (!sv.empty()) { + const int r = write(STDERR_FILENO, sv.data(), sv.size()); + if (r <= 0) break; + sv = sv.substr(r); + } + } + DispatcherLog(rest...); +} + +inline void DispatcherCheck(bool condition, std::string_view error) { + if (!condition) { + DispatcherLog(error, LogLnSync{}); + std::_Exit(1); + } +} + +const char* GetDispatcherFlags() { + static auto dispatcher_flags = []() -> const char* { + // TODO(xinhaoyuan): Rename the env name to FUZZTEST_DISPATCHER_FLAGS. + const char* env_flags = std::getenv("CENTIPEDE_RUNNER_FLAGS"); + if (env_flags == nullptr) return nullptr; + const char* result = strdup(env_flags); + DispatcherCheck(result != nullptr, "Cannot copy the dispatcher flags"); + return result; + }(); + return dispatcher_flags; +} + +std::optional GetDispatcherFlag( + const char* absl_nonnull flag_header) { + const char* dispatcher_flags = GetDispatcherFlags(); + if (dispatcher_flags == nullptr) return std::nullopt; + // Extract "value" from ":flag=value:" + const char* beg = std::strstr(dispatcher_flags, flag_header); + if (!beg) return std::nullopt; + const char* value_beg = beg + std::strlen(flag_header); + const char* value_end = std::strstr(value_beg, ":"); + if (!value_end) return std::nullopt; + return std::string_view{value_beg, + static_cast(value_end - value_beg)}; +} + +bool HasDispatcherSwitchFlag(const char* absl_nonnull switch_flag) { + const char* dispatcher_flags = GetDispatcherFlags(); + if (dispatcher_flags == nullptr) return false; + return std::strstr(dispatcher_flags, switch_flag) != nullptr; +} + +enum class DispatcherAction { + kGetBinaryId, + kListTests, + kTestGetSeeds, + kTestMutate, + kTestExecute, +}; + +constexpr char kDispatcherBinaryIdOutputFlagHeader[] = ":binary_id_output="; +constexpr char kDispatcherTestNameFlagHeader[] = ":test="; +constexpr char kDispatcherTestListingPrefixFlagHeader[] = + ":test_listing_prefix="; +constexpr char kDispatcherTestGetSeedsOutputDirFlagHeader[] = + ":arg1="; // TODO: Use better flag names when standardizing the protocol. +constexpr char kDispatcherFailureDescriptionPathFlagHeader[] = + ":failure_description_path="; +constexpr char kDispatcherFailureSignaturePathFlagHeader[] = + ":failure_signature_path="; +constexpr char kDispatcherInputsBlobSequencePathFlagHeader[] = + ":arg1="; // TODO: Use better flag names when standardizing the protocol. +constexpr char kDispatcherOutputsBlobSequencePathFlagHeader[] = + ":arg2="; // TODO: Use better flag names when standardizing the protocol. + +BlobSequence* GetInputsBlobSequence() { + static auto result = []() -> BlobSequence* { + if (std::strstr(GetDispatcherFlags(), ":shmem:") == nullptr) { + return nullptr; + } + auto input_path = + GetDispatcherFlag(kDispatcherInputsBlobSequencePathFlagHeader); + DispatcherCheck(input_path.has_value(), "inputs blob sequence is missing"); + return new SharedMemoryBlobSequence(std::string(*input_path).c_str()); + }(); + return result; +} + +BlobSequence* GetOutputsBlobSequence() { + static auto result = []() -> BlobSequence* { + if (std::strstr(GetDispatcherFlags(), ":shmem:") == nullptr) { + return nullptr; + } + auto output_path = + GetDispatcherFlag(kDispatcherOutputsBlobSequencePathFlagHeader); + DispatcherCheck(output_path.has_value(), + "outputs blob sequence is missing"); + return new SharedMemoryBlobSequence(std::string(*output_path).c_str()); + }(); + return result; +} + +DispatcherAction GetDispatcherAction() { + static DispatcherAction dispatcher_action = [] { + if (HasDispatcherSwitchFlag(":dump_binary_id:")) { + return DispatcherAction::kGetBinaryId; + } + if (HasDispatcherSwitchFlag(":list_tests:")) { + return DispatcherAction::kListTests; + } + if (HasDispatcherSwitchFlag(":dump_seed_inputs:")) { + return DispatcherAction::kTestGetSeeds; + } + auto* inputs_blobseq = GetInputsBlobSequence(); + DispatcherCheck(inputs_blobseq != nullptr, + "input blob sequence is not found"); + auto request_type_blob = inputs_blobseq->Read(); + if (IsMutationRequest(request_type_blob)) { + inputs_blobseq->Reset(); + return DispatcherAction::kTestMutate; + } + if (IsExecutionRequest(request_type_blob)) { + inputs_blobseq->Reset(); + return DispatcherAction::kTestExecute; + } + DispatcherCheck(false, "unknown dispatcher action from the flags"); + // should not reach here. + std::abort(); + }(); + return dispatcher_action; +} + +template +void TrySetFileContents(const char* absl_nonnull path, C... contents) { + // Needs to be signal-safe. + int f = open(path, O_CREAT | O_TRUNC | O_WRONLY, /*mode=*/0660); + if (f == -1) { + DispatcherLog("cannot open path ", path, ": ", LogErrNo{}, LogLnSync{}); + return; + } + ([&] { + std::string_view sv = contents; + while (!sv.empty()) { + const int r = write(f, sv.data(), sv.size()); + if (r < 0) { + DispatcherLog("write() failed on ", path, ": ", LogErrNo{}, + LogLnSync{}); + return false; + } + if (r == 0) { + DispatcherLog("write() on ", path, + " returns 0 unexpectedly. Stopping writing the file."); + return false; + } + sv = sv.substr(r); + } + return true; + }() && + ...); // NOLINT - stop fighting with auto-fomatting. + if (fsync(f) != 0) { + DispatcherLog("fsync() failed on ", path, ": ", LogErrNo{}, LogLnSync{}); + } + if (close(f) != 0) { + DispatcherLog("close() failed on ", path, ": ", LogErrNo{}, LogLnSync{}); + } +} + +static std::atomic in_test_callback = false; + +class TestCallbackGuard { + public: + TestCallbackGuard() { + DispatcherCheck(!in_test_callback.exchange(true), + "test callback is already activated"); + } + + ~TestCallbackGuard() { in_test_callback = false; } +}; + +void DispatcherDoGetBinaryId(const FuzzTestDispatcherCallbacks& callbacks) { + const auto binary_id_output_path = + GetDispatcherFlag(kDispatcherBinaryIdOutputFlagHeader); + DispatcherCheck(binary_id_output_path.has_value(), + "binary ID output path is not set"); + std::string binary_id; + { + TestCallbackGuard guard; + binary_id = callbacks.get_binary_id ? callbacks.get_binary_id() : ""; + } + TrySetFileContents(std::string{*binary_id_output_path}.c_str(), binary_id); +} + +void DispatcherDoListTests(const FuzzTestDispatcherCallbacks& callbacks) { + DispatcherCheck(callbacks.list_tests != nullptr, + "list_tests callback must be set"); + TestCallbackGuard guard; + callbacks.list_tests(); +} + +void DispatcherDoGetSeeds(const FuzzTestDispatcherCallbacks& callbacks) { + if (callbacks.get_seeds == nullptr) { + return; + } + TestCallbackGuard guard; + callbacks.get_seeds(); +} + +int DispatcherDoMutate(const FuzzTestDispatcherCallbacks& callbacks) { + auto* inputs_blobseq = GetInputsBlobSequence(); + auto* outputs_blobseq = GetOutputsBlobSequence(); + DispatcherCheck(inputs_blobseq != nullptr && outputs_blobseq != nullptr, + "inputs/outputs blob sequences must be specified"); + + bool has_mutate = callbacks.mutate != nullptr; + if (!MutationResult::WriteHasCustomMutator(has_mutate, *outputs_blobseq)) { + std::fprintf(stderr, "Failed to write custom mutator indicator!\n"); + return EXIT_FAILURE; + } + if (!has_mutate) { + return EXIT_SUCCESS; + } + + // Read max_num_mutants. + size_t num_mutants = 0; + size_t num_inputs = 0; + if (!IsMutationRequest(inputs_blobseq->Read())) { + std::fprintf(stderr, "Not mutation request!\n"); + return EXIT_FAILURE; + } + if (!IsNumMutants(inputs_blobseq->Read(), num_mutants)) { + std::fprintf(stderr, "No num mutants\n"); + return EXIT_FAILURE; + } + if (!IsNumInputs(inputs_blobseq->Read(), num_inputs)) { + std::fprintf(stderr, "No num inputs\n"); + return EXIT_FAILURE; + } + + struct OwningMutateInput { + ByteArray data; + ExecutionMetadata metadata; + }; + // Note: unclear if we can continue using std::vector (or other STL) + // in the runner. But for now use std::vector. + // + // Collect the inputs into a vector. We copy them instead of using pointers + // into shared memory so that the user code doesn't touch the shared memory. + std::vector owning_inputs; + owning_inputs.reserve(num_inputs); + std::vector inputs; + inputs.reserve(num_inputs); + for (size_t i = 0; i < num_inputs; ++i) { + // If inputs_blobseq have overflown in the engine, we still want to + // handle the first few inputs. + ExecutionMetadata metadata; + if (!IsExecutionMetadata(inputs_blobseq->Read(), metadata)) { + break; + } + auto blob = inputs_blobseq->Read(); + if (!IsDataInput(blob)) break; + owning_inputs.push_back( + OwningMutateInput{/*data=*/ByteArray{blob.data, blob.data + blob.size}, + /*metadata=*/std::move(metadata)}); + inputs.push_back(FuzzTestDispatcherInputForMutate{ + /*input=*/owning_inputs.back().data.data(), + /*input_size=*/owning_inputs.back().data.size(), + /*metadata=*/owning_inputs.back().metadata.cmp_data.data(), + /*metadata_size=*/owning_inputs.back().metadata.cmp_data.size()}); + } + + { + TestCallbackGuard guard; + fprintf(stderr, "calling custom mutator\n"); + // We ensure that: + // * `inputs` is a valid pointer to an array of + // `FuzzTestDispatcherInputForMutate` objects with length `num_inputs`. + // * Each object of the array contains a valid `input` pointer to + // `input_size` bytes, and a valid `metadata` pointer to `metadata_size` + // bytes. + callbacks.mutate(inputs.data(), inputs.size(), num_mutants, + /*shrink=*/0); + } + return EXIT_SUCCESS; +} + +int DispatcherDoExecute(const FuzzTestDispatcherCallbacks& callbacks) { + DispatcherCheck(callbacks.execute != nullptr, "execute callback must be set"); + auto* inputs_blobseq = GetInputsBlobSequence(); + auto* outputs_blobseq = GetOutputsBlobSequence(); + DispatcherCheck(inputs_blobseq != nullptr && outputs_blobseq != nullptr, + "inputs/ouptuts blob sequence must exist"); + + size_t num_inputs = 0; + DispatcherCheck(IsExecutionRequest(inputs_blobseq->Read()), + "not an execution request"); + DispatcherCheck(IsNumInputs(inputs_blobseq->Read(), num_inputs), + "failed to read num_inputs"); + + for (size_t i = 0; i < num_inputs; i++) { + auto blob = inputs_blobseq->Read(); + if (!blob.IsValid()) return EXIT_SUCCESS; // no more blobs to read. + if (!IsDataInput(blob)) return EXIT_FAILURE; + + // Copy from blob to data so that to not pass the shared memory further. + ByteArray data(blob.data, blob.data + blob.size); + + if (!BatchResult::WriteInputBegin(*outputs_blobseq)) { + // TODO: This is to follow the previous behavior, but should we abort + // here? + break; + } + { + TestCallbackGuard guard; + // We ensure that `input` is a valid pointer to an array of `size` bytes. + callbacks.execute(data.data(), data.size()); + } + if (!BatchResult::WriteInputEnd(*outputs_blobseq)) { + // TODO: This is to follow the previous behavior, but should we abort + // here? + break; + } + } + + return EXIT_SUCCESS; +} + +void DispatcherEmitFailure(const char* absl_nonnull prefix, + const char* absl_nonnull description, + const char* signature, size_t signature_size) { + bool success = false; + [[maybe_unused]] static bool write_once = [=, &success] { + if (const auto failure_description_path = + GetDispatcherFlag(kDispatcherFailureDescriptionPathFlagHeader); + failure_description_path.has_value()) { + TrySetFileContents(std::string{*failure_description_path}.c_str(), prefix, + description); + } + if (const auto failure_signature_path = + GetDispatcherFlag(kDispatcherFailureSignaturePathFlagHeader); + failure_signature_path.has_value()) { + TrySetFileContents(std::string{*failure_signature_path}.c_str(), + std::string_view{signature, signature_size}); + } + success = true; + return true; + }(); + if (!success) { + DispatcherLog("Failed to emit failure ", prefix, description, LogLnSync{}); + } +} + +} // namespace + +} // namespace fuzztest::internal + +using fuzztest::internal::BatchResult; +using fuzztest::internal::DispatcherAction; +using fuzztest::internal::DispatcherCheck; +using fuzztest::internal::DispatcherDoExecute; +using fuzztest::internal::DispatcherDoGetBinaryId; +using fuzztest::internal::DispatcherDoGetSeeds; +using fuzztest::internal::DispatcherDoListTests; +using fuzztest::internal::DispatcherDoMutate; +using fuzztest::internal::DispatcherEmitFailure; +using fuzztest::internal::GetDispatcherAction; +using fuzztest::internal::GetDispatcherFlag; +using fuzztest::internal::GetDispatcherFlags; +using fuzztest::internal::GetOutputsBlobSequence; +using fuzztest::internal::HasDispatcherSwitchFlag; +using fuzztest::internal::in_test_callback; +using fuzztest::internal::kDispatcherTestGetSeedsOutputDirFlagHeader; +using fuzztest::internal::kDispatcherTestListingPrefixFlagHeader; +using fuzztest::internal::kDispatcherTestNameFlagHeader; +using fuzztest::internal::MutationResult; + +int FuzzTestDispatcherIsEnabled() { + const char* flags = GetDispatcherFlags(); + if (flags == nullptr) return 0; + fprintf(stderr, "Dispatcher is enabled with flags: %s\n", flags); + return 1; +} + +const char* FuzzTestDispatcherGetTestName() { + static auto test_name = []() -> const char* { + const auto test_name = GetDispatcherFlag(kDispatcherTestNameFlagHeader); + if (!test_name.has_value()) return nullptr; + return strndup(test_name->data(), test_name->size()); + }(); + return test_name; +} + +int FuzzTestDispatcherRun(const FuzzTestDispatcherCallbacks* callbacks) { + DispatcherCheck(callbacks != nullptr, "callbacks must be set"); + if (HasDispatcherSwitchFlag(":dump_configuration:")) { + return 0; + } + switch (GetDispatcherAction()) { + case DispatcherAction::kGetBinaryId: + DispatcherDoGetBinaryId(*callbacks); + break; + case DispatcherAction::kListTests: + DispatcherDoListTests(*callbacks); + break; + case DispatcherAction::kTestGetSeeds: + DispatcherDoGetSeeds(*callbacks); + break; + case DispatcherAction::kTestMutate: + DispatcherDoMutate(*callbacks); + break; + case DispatcherAction::kTestExecute: + DispatcherDoExecute(*callbacks); + break; + default: + DispatcherCheck(false, "unknown dispatcher action to take"); + } + return 0; +} + +void FuzzTestDispatcherEmitTestName(const char* name) { + DispatcherCheck( + GetDispatcherAction() == DispatcherAction::kListTests && in_test_callback, + "must be called inside test callback for listing tests"); + static auto test_listing_prefix = + GetDispatcherFlag(kDispatcherTestListingPrefixFlagHeader); + DispatcherCheck(test_listing_prefix.has_value(), + "test listing path prefix must be set"); + DispatcherCheck(name != nullptr, "test name must be set"); + auto test_output_path = std::string{*test_listing_prefix}; + test_output_path += name; + FILE* f = std::fopen(test_output_path.c_str(), "w"); + if (f == nullptr) { + std::perror("FAILURE: fopen()"); + } + std::fclose(f); +} + +void FuzzTestDispatcherEmitSeed(const void* data, size_t size) { + DispatcherCheck(GetDispatcherAction() == DispatcherAction::kTestGetSeeds && + in_test_callback, + "must be called inside test callback for getting seeds"); + DispatcherCheck(size > 0 && data != nullptr, + "seed must be non-empty with a valid pointer"); + static size_t seed_index = 0; + static const char* output_dir = [] { + const auto flag_value = + GetDispatcherFlag(kDispatcherTestGetSeedsOutputDirFlagHeader); + DispatcherCheck(flag_value.has_value(), + "seeds output path must be specified"); + const char* result = strndup(flag_value->data(), flag_value->size()); + DispatcherCheck(result != nullptr, "failed to copy the seeds output path"); + return result; + }(); + // Cap seed index within 9 digits. If this was triggered, the dumping would + // take forever.. + if (seed_index >= 1000000000) return; + char seed_path_buf[PATH_MAX]; + const size_t num_path_chars = + snprintf(seed_path_buf, PATH_MAX, "%s/%09lu", output_dir, seed_index); + DispatcherCheck(num_path_chars < PATH_MAX, "seed path reaches PATH_MAX"); + FILE* output_file = fopen(seed_path_buf, "w"); + const size_t num_bytes_written = fwrite(data, 1, size, output_file); + DispatcherCheck(num_bytes_written == size, + "wrong number of bytes written for seed"); + fclose(output_file); + ++seed_index; +} + +void FuzzTestDispatcherEmitMutant(const void* data, size_t size) { + DispatcherCheck(GetDispatcherAction() == DispatcherAction::kTestMutate && + in_test_callback, + "must be called inside test callback for mutating"); + DispatcherCheck(size > 0 && data != nullptr, + "mutant must be non-empty with a valid pointer"); + auto* output = GetOutputsBlobSequence(); + DispatcherCheck(output != nullptr, "outputs blob sequence must exist"); + DispatcherCheck(MutationResult::WriteMutant( + {static_cast(data), size}, *output), + "failed to write mutant"); +} + +void FuzzTestDispatcherEmitFeedbackAs32BitFeatures(const uint32_t* features, + size_t num_features) { + DispatcherCheck(GetDispatcherAction() == DispatcherAction::kTestExecute && + in_test_callback, + "must be called inside test callback of executing"); + DispatcherCheck(num_features > 0 && features != nullptr, + "feature array must be non-empty with a valid pointer"); + auto* output = GetOutputsBlobSequence(); + DispatcherCheck(output != nullptr, "outputs blob sequence must exist"); + DispatcherCheck(BatchResult::WriteDispatcher32BitFeatures( + features, num_features, *output), + "failed to write feedback"); +} + +void FuzzTestDispatcherEmitExecutionMetadata(const void* metadata, + size_t size) { + DispatcherCheck(GetDispatcherAction() == DispatcherAction::kTestExecute && + in_test_callback, + "must be called inside test callback of executing"); + DispatcherCheck(size > 0 && metadata != nullptr, + "metadata must be non-empty with a valid pointer"); + auto* output = GetOutputsBlobSequence(); + DispatcherCheck(output != nullptr, "outputs blob sequence must exist"); + DispatcherCheck(BatchResult::WriteMetadata( + {static_cast(metadata), size}, *output), + "failed to write metadata"); +} + +void FuzzTestDispatcherEmitInputFailure(const char* description, + const void* signature, + size_t signature_size) { + DispatcherCheck(GetDispatcherAction() == DispatcherAction::kTestExecute && + in_test_callback, + "must be called inside test callback for executing"); + DispatcherCheck((signature == nullptr) == (signature_size == 0), + "violated invariant: signature should be nullptr if and only " + "if signature_size is 0"); + DispatcherEmitFailure( + "INPUT FAILURE: ", description != nullptr ? description : "", + reinterpret_cast(signature), signature_size); +} + +void FuzzTestDispatcherEmitIgnoredFailure(const char* description) { + DispatcherEmitFailure( + "IGNORED FAILURE: ", description != nullptr ? description : "", + /*signature=*/nullptr, /*signature_size=*/0); +} + +void FuzzTestDispatcherEmitSetupFailure(const char* description) { + DispatcherEmitFailure( + "SETUP FAILURE: ", description != nullptr ? description : "", + /*signature=*/nullptr, /*signature_size=*/0); +} + +void FuzzTestDispatcherEmitSkippedTestFailure(const char* description) { + DispatcherEmitFailure( + "SKIPPED TEST: ", description != nullptr ? description : "", + /*signature=*/nullptr, /*signature_size=*/0); +} diff --git a/src/third_party/fuzztest/dist/centipede/dispatcher.h b/src/third_party/fuzztest/dist/centipede/dispatcher.h new file mode 100644 index 00000000000..63f4f089bb5 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/dispatcher.h @@ -0,0 +1,141 @@ +// Copyright 2025 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_DISPATCHER_H_ +#define THIRD_PARTY_CENTIPEDE_DISPATCHER_H_ + +// Dispatcher interface. +// +// This header needs to be C compatible. + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Inputs to perform mutations. +struct FuzzTestDispatcherInputForMutate { + const void* input; + size_t input_size; + const void* metadata; + size_t metadata_size; +}; + +// Callbacks to be provided by the fuzz testing framework to +// `FuzzTestDispatcherRun`. +struct FuzzTestDispatcherCallbacks { + // Optional callback to return an ID for the current binary. If not + // implemented, the controller will generate a default ID based on the binary + // path. + const char* (*get_binary_id)(); + // Callback to emit the list of available tests in the binary using + // `FuzzTestDispatcherEmitTestName`. + void (*list_tests)(); + // Callback to emit the seed inputs for a test using + // `FuzzTestDispatcherEmitSeed`. + void (*get_seeds)(); + // Optional callback to emit at most `num_mutants` from `inputs` with + // `num_inputs` entries using `FuzzTestDispatcherEmitMutant`. `shrink` != 0 + // means to generate smaller mutants than the inputs used for mutation. If not + // implemented, the controller will perform basic string-based mutations. + // + // TODO: xinhaoyuan - Reconsider mutation interface design instead of + // following the existing Centipede/runner protocol. + void (*mutate)(const struct FuzzTestDispatcherInputForMutate* inputs, + size_t num_inputs, size_t num_mutants, int shrink); + // Callback to execute `input` with `size` bytes. The callback should emit + // coverage feedback using `FuzzTestDispatcherEmitFeedback*` functions, and + // any metadata for further mutation using + // `FuzzTestDispatEmitExecutionMetadata`. In case the input caused a failure, + // the callback should emit the failure using + // `FuzzTestDispatcherEmitInputFailure`. + void (*execute)(const void* input, size_t size); +}; + +// Functions provided by the FuzzTest engine. + +// Returns 0 if the dispatcher mode is not enabled in the current process; 1 if +// the dispatcher mode is enabled; other values for unexpected errors. +int FuzzTestDispatcherIsEnabled(); + +// All functions below should be called only after `FuzzTestDispatcherIsEnabled` +// returns 1 in the current process. + +// Returns the test name under operation as an unowned, static, and +// null-terminated string. Returns nullptr if the current process is not +// operating on a specific test. +const char* FuzzTestDispatcherGetTestName(); + +// Give control to the FuzzTest engine to invoke `callbacks`. Returns an exit +// code for the current process desired by the engine. +int FuzzTestDispatcherRun(const struct FuzzTestDispatcherCallbacks* callbacks); + +// Emits a test name. Must be called from the `list_tests` callback. `name` must +// be a null-terminated string. +void FuzzTestDispatcherEmitTestName(const char* name); + +// Emits a seed input. Must be called from the `get_seeds` callback. `data` must +// not be nullptr and `size > 0` must hold. +void FuzzTestDispatcherEmitSeed(const void* data, size_t size); + +// Emits a mutant. Must be called from the `mutate` callback. `data` must not be +// nullptr and `size > 0` must hold. +void FuzzTestDispatcherEmitMutant(const void* data, size_t size); + +// Emits coverage feedback for the current input as an array of 32-bit features. +// +// For each 32-bit feature, the bit [31] is ignored; the 4 bits [30-27] +// indicate the feature domain for engine prioritization. The remaining 27 bits +// [26-0] represent the actual 27-bit feature ID in the domain. +// +// Must be called from the `execute` callback. `features` must not be nullptr +// and `num_features > 0` must hold. +void FuzzTestDispatcherEmitFeedbackAs32BitFeatures(const uint32_t* features, + size_t num_features); +// Emits metadata of the current input as raw bytes. Must be called from +// the `execute` callback. +void FuzzTestDispatcherEmitExecutionMetadata(const void* metadata, size_t size); + +// Functions for emitting various types of failures. After calling any of these +// functions, later calls of these functions would have no effect, and the +// current process should exit after necessary cleanup. + +// Emits a failure caused by executing an input. Must be called within the +// `execute` callback. `description` should be a null-terminated string, or +// nullptr can be passed for an empty string; `signature` should be nullptr if +// and only if `signature_size == 0`. +void FuzzTestDispatcherEmitInputFailure(const char* description, + const void* signature, + size_t signature_size); + +// Emits a failure that should be ignored (i.e. not affecting the fuzzing +// workflows). `description` should be a null-terminated string, or nullptr can +// be passed for an empty string. +void FuzzTestDispatcherEmitIgnoredFailure(const char* description); + +// Emits a failure caused by the test setup. `description` should be a +// null-terminated string, or nullptr can be passed for an empty string. +void FuzzTestDispatcherEmitSetupFailure(const char* description); + +// Emits a failure due to reasons to skip the entire test. `description` should +// be a null-terminated string, or nullptr can be passed for an empty string. +void FuzzTestDispatcherEmitSkippedTestFailure(const char* description); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/src/third_party/fuzztest/dist/centipede/distill.cc b/src/third_party/fuzztest/dist/centipede/distill.cc new file mode 100644 index 00000000000..1b16a2b4d58 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/distill.cc @@ -0,0 +1,473 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/distill.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/base/thread_annotations.h" +#include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/time.h" +#include "./centipede/corpus_io.h" +#include "./centipede/environment.h" +#include "./centipede/feature.h" +#include "./centipede/feature_set.h" +#include "./centipede/periodic_action.h" +#include "./centipede/resource_pool.h" +#include "./centipede/rusage_profiler.h" +#include "./centipede/rusage_stats.h" +#include "./centipede/thread_pool.h" +#include "./centipede/util.h" +#include "./centipede/workdir.h" +#include "./common/blob_file.h" +#include "./common/defs.h" +#include "./common/hash.h" +#include "./common/logging.h" +#include "./common/remote_file.h" +#include "./common/status_macros.h" + +namespace fuzztest::internal { + +namespace { + +// A corpus element. Consists of a fuzz test input and its matching features. +struct CorpusElt { + ByteArray input; + FeatureVec features; + + CorpusElt(const ByteArray &input, FeatureVec features) + : input(input), features(std::move(features)) {} + + // Movable, but not copyable for efficiency. + CorpusElt(const CorpusElt &) = delete; + CorpusElt &operator=(const CorpusElt &) = delete; + CorpusElt(CorpusElt &&) = default; + CorpusElt &operator=(CorpusElt &&) = default; + + ByteArray PackedFeatures() const { + return PackFeaturesAndHash(input, features); + } +}; + +using CorpusEltVec = std::vector; + +// The maximum number of threads reading input shards concurrently. This is +// mainly to prevent I/O congestion. +inline constexpr size_t kMaxReadingThreads = 50; +// The maximum number of threads writing shards concurrently. These in turn +// launch up to `kMaxReadingThreads` reading threads. +inline constexpr size_t kMaxWritingThreads = 100; +// A global cap on the total number of threads, both writing and reading. Unlike +// the other two limits, this one is purely to prevent too many threads in the +// process. +inline constexpr size_t kMaxTotalThreads = 5000; +static_assert(kMaxReadingThreads * kMaxWritingThreads <= kMaxTotalThreads); + +inline constexpr MemSize kGB = 1024L * 1024L * 1024L; +// The total approximate amount of RAM to be shared by the concurrent threads. +// TODO(ussuri): Replace by a function of free RSS on the system. +inline constexpr RUsageMemory kRamQuota{/*mem_vsize=*/0, /*mem_vpeak=*/0, + /*mem_rss=*/25 * kGB}; +// The amount of time that each thread will wait for enough RAM to be freed up +// by its concurrent siblings. +inline constexpr absl::Duration kRamLeaseTimeout = absl::Hours(5); + +std::string LogPrefix(const Environment &env) { + return absl::StrCat("DISTILL[S.", env.my_shard_index, "]: "); +} + +std::string LogPrefix() { return absl::StrCat("DISTILL[ALL]: "); } + +// TODO(ussuri): Move the reader/writer classes to shard_reader.cc, rename it +// to corpus_io.cc, and reuse the new APIs where useful in the code base. + +// A helper class for reading input corpus shards. Thread-safe. +class InputCorpusShardReader { + public: + InputCorpusShardReader(const Environment &env) + : workdir_{env}, log_prefix_{LogPrefix(env)} {} + + MemSize EstimateRamFootprint(size_t shard_idx) const { + const auto corpus_path = workdir_.CorpusFilePaths().Shard(shard_idx); + const auto features_path = workdir_.FeaturesFilePaths().Shard(shard_idx); + const MemSize corpus_file_size = ValueOrDie(RemoteFileGetSize(corpus_path)); + const MemSize features_file_size = + ValueOrDie(RemoteFileGetSize(features_path)); + // Conservative compression factors for the two file types. These have been + // observed empirically for the Riegeli blob format. The legacy format is + // approximately 1:1, but use the stricter Riegeli numbers, as the legacy + // should be considered obsolete. + // TODO(b/322880269): Use the actual in-memory footprint once available. + constexpr double kMaxCorpusCompressionRatio = 5.0; + constexpr double kMaxFeaturesCompressionRatio = 10.0; + return corpus_file_size * kMaxCorpusCompressionRatio + + features_file_size * kMaxFeaturesCompressionRatio; + } + + // Reads and returns a single shard's elements. Thread-safe. + CorpusEltVec ReadShard(size_t shard_idx) { + const auto corpus_path = workdir_.CorpusFilePaths().Shard(shard_idx); + const auto features_path = workdir_.FeaturesFilePaths().Shard(shard_idx); + VLOG(1) << log_prefix_ << "reading input shard " << shard_idx << ":\n" + << VV(corpus_path) << "\n" + << VV(features_path); + CorpusEltVec elts; + // Read elements from the current shard. + fuzztest::internal::ReadShard( // + corpus_path, features_path, + [&elts](ByteArray input, FeatureVec features) { + elts.emplace_back(std::move(input), std::move(features)); + }); + return elts; + } + + private: + const WorkDir workdir_; + const std::string log_prefix_; +}; + +// A helper class for writing corpus shards. Thread-safe. +class CorpusShardWriter { + public: + // The writing stats so far. + struct Stats { + size_t num_total_elts = 0; + size_t num_written_elts = 0; + size_t num_written_batches = 0; + }; + + CorpusShardWriter(const Environment &env, bool append) + : workdir_{env}, + log_prefix_{LogPrefix(env)}, + corpus_path_{workdir_.DistilledCorpusFilePaths().MyShard()}, + features_path_{workdir_.DistilledFeaturesFilePaths().MyShard()}, + corpus_writer_{DefaultBlobFileWriterFactory()}, + feature_writer_{DefaultBlobFileWriterFactory()} { + CHECK_OK(corpus_writer_->Open(corpus_path_, append ? "a" : "w")); + CHECK_OK(feature_writer_->Open(features_path_, append ? "a" : "w")); + } + + virtual ~CorpusShardWriter() = default; + + void WriteElt(CorpusElt elt) { + absl::MutexLock lock(&mu_); + WriteEltImpl(std::move(elt)); + } + + void WriteBatch(CorpusEltVec elts) { + absl::MutexLock lock(&mu_); + VLOG(1) << log_prefix_ << "writing " << elts.size() + << " elements to output shard:\n" + << VV(corpus_path_) << "\n" + << VV(features_path_); + for (auto &elt : elts) { + WriteEltImpl(std::move(elt)); + } + ++stats_.num_written_batches; + } + + Stats GetStats() const { + absl::MutexLock lock(&mu_); + return stats_; + } + + protected: + // A behavior customization point: a derived class gets an opportunity to + // analyze and/or preprocess `elt` before it is written. For example, a + // derived class can trim the element's feature set before it is written, or + // choose to skip writing it entirely by returning `std::nullopt`. + virtual std::optional PreprocessElt(CorpusElt elt) { + return std::move(elt); + } + + private: + void WriteEltImpl(CorpusElt elt) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_) { + ++stats_.num_total_elts; + const auto preprocessed_elt = PreprocessElt(std::move(elt)); + if (preprocessed_elt.has_value()) { + // Append to the distilled corpus and features files. + CHECK_OK(corpus_writer_->Write(preprocessed_elt->input)); + CHECK_OK(feature_writer_->Write(preprocessed_elt->PackedFeatures())); + ++stats_.num_written_elts; + } + } + + // Const state. + const WorkDir workdir_; + const std::string log_prefix_; + const std::string corpus_path_; + const std::string features_path_; + + // Mutable state. + mutable absl::Mutex mu_; + std::unique_ptr corpus_writer_ ABSL_GUARDED_BY(mu_); + std::unique_ptr feature_writer_ ABSL_GUARDED_BY(mu_); + Stats stats_ ABSL_GUARDED_BY(mu_); +}; + +// A distilling input filter: +// - Deduplicates byte-identical inputs: only the first one is allowed to pass. +// - Deduplicates feature-equivalent inputs: up to N from each equivalency set +// are allowed to pass. +// - Discards the specified set of "uninteresting" feature domains from the +// feature sets of filtered inputs. +class DistillingInputFilter { + public: + // An extension to the parent class's `Stats`. + struct Stats { + size_t num_total_elts = 0; + size_t num_byte_unique_elts = 0; + size_t num_feature_unique_elts = 0; + // The accumulated features of the distilled corpus so far, represents in + // the same compact textual form that Centipede uses in its fuzzing progress + // log messages, e.g.: "ft: 96331 cov: 81793 usr1: 5045 ...". + std::string coverage_str; + }; + + // `feature_equiv_redundancy` specifies how many inputs with equivalent + // feature sets are allowed to pass the filter. Any subsequent inputs with the + // equivalent set will be rejected. + // `should_discard_domains` specifies the domains that should be discarded + // from the feature set of a filtered input. + DistillingInputFilter( // + uint8_t feature_frequency_threshold, + const FeatureSet::FeatureDomainSet &domains_to_discard) + : seen_inputs_{}, + seen_features_{ + /*frequency_threshold=*/feature_frequency_threshold, + /*should_discard_domain=*/domains_to_discard, + } {} + + std::optional FilterElt(CorpusElt elt) { + absl::MutexLock lock{&mu_}; + + ++stats_.num_total_elts; + + // Filter out approximately byte-identical inputs ("approximately" because + // we use hashes). + std::string hash = Hash(elt.input); + const auto [iter, inserted] = seen_inputs_.insert(std::move(hash)); + if (!inserted) return std::nullopt; + ++stats_.num_byte_unique_elts; + + // Filter out feature-equivalent inputs. + seen_features_.PruneDiscardedDomains(elt.features); + if (!seen_features_.HasUnseenFeatures(elt.features)) return std::nullopt; + seen_features_.IncrementFrequencies(elt.features); + ++stats_.num_feature_unique_elts; + + return std::move(elt); + } + + Stats GetStats() { + absl::MutexLock lock{&mu_}; + std::stringstream ss; + ss << seen_features_; + stats_.coverage_str = std::move(ss).str(); + return stats_; + } + + private: + absl::Mutex mu_; + absl::flat_hash_set seen_inputs_ ABSL_GUARDED_BY(mu_); + FeatureSet seen_features_ ABSL_GUARDED_BY(mu_); + Stats stats_ ABSL_GUARDED_BY(mu_); +}; + +// A helper class for writing distilled corpus shards. NOT thread-safe because +// all writes go to a single file. +class DistilledCorpusShardWriter : public CorpusShardWriter { + public: + DistilledCorpusShardWriter( // + const Environment &env, bool append, DistillingInputFilter &filter) + : CorpusShardWriter{env, append}, input_filter_{filter} {} + + ~DistilledCorpusShardWriter() override = default; + + protected: + std::optional PreprocessElt(CorpusElt elt) override { + return input_filter_.FilterElt(std::move(elt)); + } + + private: + DistillingInputFilter &input_filter_; +}; + +} // namespace + +// Runs one independent distillation task. Reads shards in the order specified +// by `shard_indices`, distills inputs from them using `input_filter`, and +// writes the result to `WorkDir{env}.DistilledPath()`. Every task gets its own +// `env.my_shard_index`, and so every task creates its own independent distilled +// corpus file. `parallelism` is the maximum number of concurrent +// reading/writing threads. Values > 1 can cause non-determinism in which of the +// same-coverage inputs gets selected to be written to the output shard; set to +// 1 for tests. +void DistillToOneOutputShard( // + const Environment &env, // + const std::vector &shard_indices, // + DistillingInputFilter &input_filter, // + ResourcePool &ram_pool, // + int parallelism) { + LOG(INFO) << LogPrefix(env) << "Distilling to output shard " + << env.my_shard_index << "; input shard indices:\n" + << absl::StrJoin(shard_indices, ", "); + + // Read and write the shards in parallel, but gate reading of each on the + // availability of free RAM to keep the peak RAM usage under control. + const size_t num_shards = shard_indices.size(); + InputCorpusShardReader reader{env}; + // NOTE: Always overwrite corpus and features files, never append. + DistilledCorpusShardWriter writer{env, /*append=*/false, input_filter}; + + { + ThreadPool threads{parallelism}; + for (size_t shard_idx : shard_indices) { + threads.Schedule([shard_idx, &reader, &writer, &env, num_shards, + &ram_pool] { + const auto ram_lease = ram_pool.AcquireLeaseBlocking({ + /*id=*/absl::StrCat("out_", env.my_shard_index, "/in_", shard_idx), + /*amount=*/ + {/*mem_vsize=*/0, /*mem_vpeak=*/0, + /*mem_rss=*/reader.EstimateRamFootprint(shard_idx)}, + /*timeout=*/kRamLeaseTimeout, + }); + CHECK_OK(ram_lease.status()); + + CorpusEltVec shard_elts = reader.ReadShard(shard_idx); + // Reverse the order of elements. The intuition is as follows: + // * If the shard is the result of fuzzing with Centipede, the inputs + // that are closer to the end are more interesting, so we start there. + // * If the shard resulted from somethening else, the reverse order is + // not any better or worse than any other order. + std::reverse(shard_elts.begin(), shard_elts.end()); + writer.WriteBatch(std::move(shard_elts)); + const CorpusShardWriter::Stats shard_stats = writer.GetStats(); + LOG(INFO) << LogPrefix(env) + << "batches: " << shard_stats.num_written_batches << "/" + << num_shards << " inputs: " << shard_stats.num_total_elts + << " written: " << shard_stats.num_written_elts; + }); + } + } // The threads join here. + + LOG(INFO) << LogPrefix(env) << "Done distilling to output shard " + << env.my_shard_index; +} + +int Distill(const Environment &env, const DistillOptions &opts) { + RPROF_THIS_FUNCTION_WITH_TIMELAPSE( // + /*enable=*/ABSL_VLOG_IS_ON(1), // + /*timelapse_interval=*/absl::Seconds(ABSL_VLOG_IS_ON(2) ? 10 : 60), // + /*also_log_timelapses=*/ABSL_VLOG_IS_ON(10)); + + // Prepare the per-thread envs. + std::vector envs_per_thread(env.num_threads, env); + for (size_t thread_idx = 0; thread_idx < env.num_threads; ++thread_idx) { + envs_per_thread[thread_idx].my_shard_index += thread_idx; + } + + // Prepare the per-thread input shard indices. This assigns a randomized and + // shuffled subset of the input shards to each output shard writer. The subset + // sizes are roughly equal between the writers. + std::vector> shard_indices_per_thread(env.num_threads); + std::vector all_shard_indices(env.total_shards); + std::iota(all_shard_indices.begin(), all_shard_indices.end(), 0); + Rng rng{GetRandomSeed(env.seed)}; + std::shuffle(all_shard_indices.begin(), all_shard_indices.end(), rng); + size_t thread_idx = 0; + for (size_t shard_idx : all_shard_indices) { + shard_indices_per_thread[thread_idx].push_back(shard_idx); + thread_idx = (thread_idx + 1) % env.num_threads; + } + + // Run the distillation threads in parallel. + { + // A global input filter shared by all output shard writers. The output + // shards will collectively contain a deduplicated set of byte- and + // feature-unique inputs. + DistillingInputFilter input_filter{ + opts.feature_frequency_threshold, + env.MakeDomainDiscardMask(), + }; + // A periodic logger of the global distillation progress. Runs on a separate + // thread. + PeriodicAction progress_logger{ + [&input_filter]() { + const auto stats = input_filter.GetStats(); + LOG(INFO) << LogPrefix() << stats.coverage_str + << " inputs: " << stats.num_total_elts + << " unique: " << stats.num_byte_unique_elts + << " distilled: " << stats.num_feature_unique_elts; + }, + // Seeing 0's at the beginning is not interesting, unless debugging. + // Likewise, increase the frequency --v >= 1 to aid debugging. + PeriodicAction::ConstDelayConstInterval( + absl::Seconds(ABSL_VLOG_IS_ON(1) ? 0 : 60), + absl::Seconds(ABSL_VLOG_IS_ON(1) ? 10 : 60)), + }; + // The RAM pool shared between all the `DistillToOneOutputShard()` threads. + ResourcePool ram_pool{kRamQuota}; + const size_t num_threads = std::min(env.num_threads, kMaxWritingThreads); + ThreadPool threads{static_cast(num_threads)}; + for (size_t thread_idx = 0; thread_idx < env.num_threads; ++thread_idx) { + threads.Schedule( + [&thread_env = envs_per_thread[thread_idx], + &thread_shard_indices = shard_indices_per_thread[thread_idx], + &input_filter, &progress_logger, &ram_pool]() { + DistillToOneOutputShard( // + thread_env, thread_shard_indices, input_filter, ram_pool, + kMaxReadingThreads); + // In addition to periodic progress reports, also log the progress + // after writing each output shard. + progress_logger.Nudge(); + }); + } + } // The threads join here. + + return EXIT_SUCCESS; +} + +void DistillForTests(const Environment &env, + const std::vector &shard_indices) { + DistillingInputFilter input_filter{ + /*feature_frequency_threshold=*/1, + env.MakeDomainDiscardMask(), + }; + // Do not limit the max RAM. + ResourcePool ram_pool{RUsageMemory::Max()}; + // Read the input shards sequentially and in order to ensure deterministic + // outputs. + DistillToOneOutputShard( // + env, shard_indices, input_filter, ram_pool, /*parallelism=*/1); +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/distill.h b/src/third_party/fuzztest/dist/centipede/distill.h new file mode 100644 index 00000000000..54920c6ee0e --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/distill.h @@ -0,0 +1,51 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_DISTILL_H_ +#define THIRD_PARTY_CENTIPEDE_DISTILL_H_ + +#include +#include +#include + +#include "./centipede/environment.h" + +namespace fuzztest::internal { + +// Options for `Distill()`. +struct DistillOptions { + // From each feature-equivalent set of inputs, select up to this many winners. + uint8_t feature_frequency_threshold = 1; +}; + +// Reads `env.total_shards` input shards from `WorkDir{env}.CorpusFiles()` and +// `WorkDir{env}.FeaturesFiles()`, distills them, and writes out the winning +// inputs to `env.num_threads` output shards. +// +// All reads and writes are parallelized for higher throughput. A side effect of +// that is that the results are generally non-deterministic (for a given +// feature-equivalent set of inputs, any one can win and make it to the output). +// +// Returns EXIT_SUCCESS. +int Distill(const Environment &env, const DistillOptions &opts = {}); + +// Same as `Distill()`, but runs distillation without I/O parallelization and +// reads shards in the order specified by `shard_indices` for deterministic +// results. +void DistillForTests(const Environment &env, + const std::vector &shard_indices); + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_DISTILL_H_ diff --git a/src/third_party/fuzztest/dist/centipede/distill_test.cc b/src/third_party/fuzztest/dist/centipede/distill_test.cc new file mode 100644 index 00000000000..2d8801029fb --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/distill_test.cc @@ -0,0 +1,193 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/distill.h" + +#include +#include +#include // NOLINT +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/flags/reflection.h" +#include "absl/log/check.h" +#include "./centipede/corpus_io.h" +#include "./centipede/environment.h" +#include "./centipede/feature.h" +#include "./centipede/util.h" +#include "./centipede/workdir.h" +#include "./common/blob_file.h" +#include "./common/defs.h" +#include "./common/test_util.h" + +namespace fuzztest::internal { +namespace { + +using testing::UnorderedElementsAreArray; + +struct TestCorpusRecord { + ByteArray input; + FeatureVec feature_vec; +}; + +// Custom matcher for TestCorpusRecord. Compares `expected_input` with +// actual TestCorpusRecord::input and compares `expected_features` with +// actual TestCorpusRecord::feature_vec. +MATCHER_P2(EqualsTestCorpusRecord, expected_input, expected_features, "") { + return testing::ExplainMatchResult( + testing::Field(&TestCorpusRecord::input, expected_input), arg, + result_listener) && + testing::ExplainMatchResult( + testing::Field(&TestCorpusRecord::feature_vec, + testing::ElementsAreArray(expected_features)), + arg, result_listener); +} + +using Shard = std::vector; +using ShardVec = std::vector; +using InputVec = std::vector; + +// Writes `record` to shard `shard_index`. +void WriteToShard(const Environment &env, const TestCorpusRecord &record, + size_t shard_index) { + const WorkDir wd{env}; + const auto corpus_path = wd.CorpusFilePaths().Shard(shard_index); + const auto features_path = wd.FeaturesFilePaths().Shard(shard_index); + const auto corpus_appender = DefaultBlobFileWriterFactory(env.riegeli); + const auto features_appender = DefaultBlobFileWriterFactory(env.riegeli); + CHECK_OK(corpus_appender->Open(corpus_path, "a")); + CHECK_OK(features_appender->Open(features_path, "a")); + CHECK_OK(corpus_appender->Write(record.input)); + CHECK_OK(features_appender->Write( + PackFeaturesAndHash(record.input, record.feature_vec))); +} + +// Reads and returns the distilled corpus record from +// `wd.DistilledCorpusPath()` and `wd.DistilledFeaturesPath()`. +std::vector ReadFromDistilled(const WorkDir &wd) { + const auto distilled_corpus_path = wd.DistilledCorpusFilePaths().MyShard(); + const auto distilled_features_path = + wd.DistilledFeaturesFilePaths().MyShard(); + + std::vector result; + auto shard_reader_callback = [&result](ByteArray input, FeatureVec features) { + result.push_back({std::move(input), std::move(features)}); + }; + ReadShard(distilled_corpus_path, distilled_features_path, + shard_reader_callback); + return result; +} + +// Distills `shards` in the order specified by `shard_indices`, +// returns the distilled corpus as a vector of inputs. +std::vector TestDistill( + const ShardVec &shards, const std::vector &shard_indices, + std::string_view test_name, uint64_t user_feature_domain_mask) { + // Set up the environment. + // We need to set at least --binary_hash before `env` is constructed, + // so we do this by overriding the flags. + absl::FlagSaver flag_saver; + std::string dir = GetTestTempDir(test_name); + std::filesystem::remove_all(dir); + std::filesystem::create_directories(dir); + Environment env; + env.workdir = dir; + env.binary = "binary_that_is_not_here"; + env.binary_hash = "01234567890"; + env.total_shards = shards.size(); + env.my_shard_index = 1; // an arbitrary shard index. + env.user_feature_domain_mask = user_feature_domain_mask; + const WorkDir wd{env}; + std::filesystem::create_directories(wd.CoverageDirPath()); + + // Write the shards. + for (size_t shard_index = 0; shard_index < shards.size(); ++shard_index) { + for (const auto &record : shards[shard_index]) { + WriteToShard(env, record, shard_index); + } + } + // Distill. + DistillForTests(env, shard_indices); + // Read the result back. + return ReadFromDistilled(wd); +} + +TEST(Distill, BasicDistill) { + ByteArray in0 = {0}; + ByteArray in1 = {1}; + ByteArray in2 = {2}; + ByteArray in3 = {3}; + feature_t usr0 = feature_domains::kUserDomains[0].ConvertToMe(100); + feature_t usr1 = feature_domains::kUserDomains[1].ConvertToMe(101); + + ShardVec shards = { + // shard 0; note: distillation iterates the shards backwards. + { + {in3, {10}}, + {in0, {10, 20}}, + }, + // shard 1 + { + {in1, {20, 30, usr0}}, + }, + // shard 2 + { + {in2, {30, 40, usr1}}, + }, + }; + // Distill these 3 shards in different orders, observe different results. + EXPECT_THAT(TestDistill(shards, {0, 1, 2}, test_info_->name(), 0), + UnorderedElementsAreArray({ + EqualsTestCorpusRecord(in0, FeatureVec{10, 20}), + EqualsTestCorpusRecord(in1, FeatureVec{20, 30}), + EqualsTestCorpusRecord(in2, FeatureVec{30, 40}), + })); + EXPECT_THAT(TestDistill(shards, {2, 0, 1}, test_info_->name(), 0), + UnorderedElementsAreArray({ + EqualsTestCorpusRecord(in2, FeatureVec{30, 40}), + EqualsTestCorpusRecord(in0, FeatureVec{10, 20}), + })); + EXPECT_THAT(TestDistill(shards, {2, 0, 1}, test_info_->name(), 0x1), + UnorderedElementsAreArray({ + EqualsTestCorpusRecord(in2, FeatureVec{30, 40}), + EqualsTestCorpusRecord(in0, FeatureVec{10, 20}), + EqualsTestCorpusRecord(in1, FeatureVec{20, 30, usr0}), + })); + EXPECT_THAT(TestDistill(shards, {2, 0, 1}, test_info_->name(), 0x2), + UnorderedElementsAreArray({ + EqualsTestCorpusRecord(in2, FeatureVec{30, 40, usr1}), + EqualsTestCorpusRecord(in0, FeatureVec{10, 20}), + })); + EXPECT_THAT(TestDistill(shards, {2, 0, 1}, test_info_->name(), 0x3), + UnorderedElementsAreArray({ + EqualsTestCorpusRecord(in2, FeatureVec{30, 40, usr1}), + EqualsTestCorpusRecord(in0, FeatureVec{10, 20}), + EqualsTestCorpusRecord(in1, FeatureVec{20, 30, usr0}), + })); + EXPECT_THAT(TestDistill(shards, {1, 0, 2}, test_info_->name(), 0), + UnorderedElementsAreArray({ + EqualsTestCorpusRecord(in1, FeatureVec{20, 30}), + EqualsTestCorpusRecord(in0, FeatureVec{10, 20}), + EqualsTestCorpusRecord(in2, FeatureVec{30, 40}), + })); +} + +// TODO(kcc): add more tests once we settle on the testing code above. + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/environment.cc b/src/third_party/fuzztest/dist/centipede/environment.cc new file mode 100644 index 00000000000..a30b5f75f1b --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/environment.cc @@ -0,0 +1,351 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/environment.h" + +#include +#include +#include +#include +#include +#include +#include +#include // NOLINT +#include + +#include "absl/base/no_destructor.h" +#include "absl/container/flat_hash_map.h" +#include "absl/flags/marshalling.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/time/time.h" +#include "./centipede/feature.h" +#include "./centipede/knobs.h" +#include "./centipede/util.h" +#include "./common/defs.h" +#include "./common/logging.h" +#include "./common/remote_file.h" +#include "./common/status_macros.h" +#include "./fuzztest/internal/configuration.h" + +namespace fuzztest::internal { +namespace { + +size_t ComputeTimeoutPerBatch(size_t timeout_per_input, size_t batch_size) { + CHECK_GT(batch_size, 0); + // NOTE: If `timeout_per_input` == 0, leave `timeout_per_batch` at 0 too: + // the implementation interprets both as "no limit". + if (timeout_per_input == 0) return 0; + // TODO(ussuri): The formula here is an unscientific heuristic conjured + // up for CPU instruction fuzzing. `timeout_per_input` is interpreted as + // the long tail of the input runtime distribution of yet-unknown nature. + // It might be the exponential, log-normal distribution or similar, and + // the distribution of the total time per batch could be modeled by the + // gamma distribution. Work out the math later. Right now, this naive + // formula gives ~18 min per batch with the input flags' defaults (this + // has worked in test runs so far). + constexpr double kScale = 12; + const double estimated_mean_time_per_input = + std::max(timeout_per_input / kScale, 1.0); + return std::ceil(std::log(estimated_mean_time_per_input + 1.0) * batch_size); +} + +} // namespace + +const Environment &Environment::Default() { + static absl::NoDestructor default_env; + return *default_env; +} + +bool Environment::DumpCorpusTelemetryInThisShard() const { + // Corpus stats are global across all shards on all machines. + return my_shard_index == 0; +} + +bool Environment::DumpRUsageTelemetryInThisShard() const { + // Unlike the corpus stats, we want to measure/dump rusage stats for each + // Centipede process running on a separate machine: assign that to the first + // shard (i.e. thread) on the machine. + return my_shard_index % num_threads == 0; +} + +bool Environment::DumpTelemetryForThisBatch(size_t batch_index) const { + // Always dump for batch 0 (i.e. at the beginning of execution). + if (telemetry_frequency != 0 && batch_index == 0) { + return true; + } + // Special mode for negative --telemetry_frequency: dump when batch_index + // is a power-of-two and is >= than 2^abs(--telemetry_frequency). + if (telemetry_frequency < 0 && batch_index >= (1 << -telemetry_frequency) && + ((batch_index - 1) & batch_index) == 0) { + return true; + } + // Normal mode: dump when requested number of batches get processed. + if (((telemetry_frequency > 0) && (batch_index % telemetry_frequency == 0))) { + return true; + } + return false; +} + +std::bitset Environment::MakeDomainDiscardMask() + const { + constexpr size_t kNumUserDomains = std::size(feature_domains::kUserDomains); + std::bitset user_feature_domain_enabled( + user_feature_domain_mask); + std::bitset discard; + for (size_t i = 0; i < kNumUserDomains; ++i) { + if (!user_feature_domain_enabled.test(i)) { + discard.set(feature_domains::kUserDomains[i].domain_id()); + } + } + return discard; +} + +// Returns true if `value` is one of "1", "true". +// Returns true if `value` is one of "0", "false". +// CHECK-fails otherwise. +static bool GetBoolFlag(std::string_view value) { + if (value == "0" || value == "false") return false; + CHECK(value == "1" || value == "true") << value; + return true; +} + +// Returns `value` as a size_t, CHECK-fails on parse error. +static size_t GetIntFlag(std::string_view value) { + size_t result{}; + CHECK(std::from_chars(value.data(), value.data() + value.size(), result).ec == + std::errc()) + << value; + return result; +} + +void Environment::SetFlagForExperiment(std::string_view name, + std::string_view value) { + // TODO(kcc): support more flags, as needed. + + // Handle bool flags. + absl::flat_hash_map bool_flags{ + {"use_cmp_features", &use_cmp_features}, + {"use_auto_dictionary", &use_auto_dictionary}, + {"use_dataflow_features", &use_dataflow_features}, + {"use_counter_features", &use_counter_features}, + {"use_pcpair_features", &use_pcpair_features}, + {"use_coverage_frontier", &use_coverage_frontier}, + {"use_legacy_default_mutator", &use_legacy_default_mutator}, + }; + auto bool_iter = bool_flags.find(name); + if (bool_iter != bool_flags.end()) { + *bool_iter->second = GetBoolFlag(value); + return; + } + + // Handle int flags. + absl::flat_hash_map int_flags{ + {"path_level", &path_level}, + {"callstack_level", &callstack_level}, + {"max_corpus_size", &max_corpus_size}, + {"max_len", &max_len}, + {"crossover_level", &crossover_level}, + {"mutate_batch_size", &mutate_batch_size}, + {"feature_frequency_threshold", &feature_frequency_threshold}, + }; + auto int_iter = int_flags.find(name); + if (int_iter != int_flags.end()) { + *int_iter->second = GetIntFlag(value); + return; + } + + LOG(FATAL) << "Unknown flag for experiment: " << name << "=" << value; +} + +void Environment::UpdateForExperiment() { + if (experiment.empty()) return; + + // Parse the --experiments flag. + struct Experiment { + std::string flag_name; + std::vector flag_values; + }; + std::vector experiments; + for (auto flag : absl::StrSplit(this->experiment, ':', absl::SkipEmpty())) { + std::vector flag_and_value = absl::StrSplit(flag, '='); + CHECK_EQ(flag_and_value.size(), 2) << flag; + experiments.emplace_back( + Experiment{flag_and_value[0], absl::StrSplit(flag_and_value[1], ',')}); + } + + // Count the number of flag combinations. + size_t num_combinations = 1; + for (const auto &exp : experiments) { + CHECK_NE(exp.flag_values.size(), 0) << exp.flag_name; + num_combinations *= exp.flag_values.size(); + } + CHECK_GT(num_combinations, 0); + CHECK_EQ(num_threads % num_combinations, 0) + << VV(num_threads) << VV(num_combinations); + + // Update the flags for the current shard and compute experiment_name. + CHECK_LT(my_shard_index, num_threads); + size_t my_combination_num = my_shard_index % num_combinations; + experiment_name.clear(); + experiment_flags.clear(); + // Reverse the flags. + // This way, the flag combinations will go in natural order. + // E.g. for --experiment='foo=1,2,3:bar=10,20' the order of combinations is + // foo=1 bar=10 + // foo=1 bar=20 + // foo=2 bar=10 ... + // Alternative would be to iterate in reverse order with rbegin()/rend(). + std::reverse(experiments.begin(), experiments.end()); + for (const auto &exp : experiments) { + size_t idx = my_combination_num % exp.flag_values.size(); + SetFlagForExperiment(exp.flag_name, exp.flag_values[idx]); + my_combination_num /= exp.flag_values.size(); + experiment_name = std::to_string(idx) + experiment_name; + experiment_flags = + exp.flag_name + "=" + exp.flag_values[idx] + ":" + experiment_flags; + } + experiment_name = "E" + experiment_name; + load_other_shard_frequency = 0; // The experiments should be independent. +} + +void Environment::ReadKnobsFileIfSpecified() { + const std::string_view knobs_file_path = knobs_file; + if (knobs_file_path.empty()) return; + ByteArray knob_bytes; + auto *f = ValueOrDie(RemoteFileOpen(knobs_file, "r")); + CHECK(f) << "Failed to open remote file " << knobs_file; + CHECK_OK(RemoteFileRead(f, knob_bytes)); + CHECK_OK(RemoteFileClose(f)); + VLOG(1) << "Knobs: " << knob_bytes.size() << " knobs read from " + << knobs_file; + knobs.Set(knob_bytes); + knobs.ForEachKnob([](std::string_view name, Knobs::value_type value) { + VLOG(1) << "knob " << name << ": " << static_cast(value); + }); +} + +void Environment::UpdateWithTargetConfig( + const fuzztest::internal::Configuration &config) { + // Allow more crashes to be reported when running with FuzzTest. This allows + // more unique crashes to collected after deduplication. But we don't want to + // make the limit too large to stress the filesystem, so this is not a perfect + // solution. Currently we just increase the default to be seemingly large + // enough. + if (max_num_crash_reports == Default().max_num_crash_reports) { + max_num_crash_reports = 20; + LOG(INFO) << "Overriding the default max_num_crash_reports to " + << max_num_crash_reports << " for FuzzTest."; + } + if (config.jobs != 0) { + CHECK(j == Default().j || j == config.jobs) + << "Value for --j is inconsistent with the value for jobs in the " + "target binary:" + << VV(j) << VV(config.jobs); + j = config.jobs; + total_shards = config.jobs; + num_threads = config.jobs; + my_shard_index = 0; + } + + const auto convert_to_seconds = + [&](absl::Duration duration, absl::string_view duration_name) -> size_t { + if (duration == absl::InfiniteDuration()) return 0; + // Centipede's time-related fields are in seconds, so we need at least 1s. + CHECK_GE(duration, absl::Seconds(1)) + << duration_name << " must not be less than one second"; + return static_cast(absl::ToInt64Seconds(duration)); + }; + + // Update `timeout_per_input` and consequently `timeout_per_batch`. + const size_t time_limit_per_input_sec = + convert_to_seconds(config.time_limit_per_input, "Time limit per input"); + CHECK(timeout_per_input == 0 || + timeout_per_input == Default().timeout_per_input || + timeout_per_input == time_limit_per_input_sec) + << "Value for --timeout_per_input is inconsistent with the value for " + "time_limit_per_input in the target binary:" + << VV(timeout_per_input) << VV(config.time_limit_per_input); + const size_t autocomputed_timeout_per_batch = + ComputeTimeoutPerBatch(timeout_per_input, batch_size); + timeout_per_input = time_limit_per_input_sec; + UpdateTimeoutPerBatchIfEqualTo(autocomputed_timeout_per_batch); + + // Adjust `timeout_per_batch` to never exceed the test time limit. + if (const auto test_time_limit = config.GetTimeLimitPerTest(); + test_time_limit < absl::InfiniteDuration()) { + const size_t test_time_limit_seconds = + convert_to_seconds(test_time_limit, "Test time limit"); + timeout_per_batch = + timeout_per_batch == 0 + ? test_time_limit_seconds + : std::min(timeout_per_batch, test_time_limit_seconds); + } + + // Convert bytes to MB by rounding up. + constexpr auto bytes_to_mb = [](size_t bytes) { + return bytes == 0 ? 0 : (bytes - 1) / 1024 / 1024 + 1; + }; + CHECK(rss_limit_mb == Default().rss_limit_mb || + rss_limit_mb == bytes_to_mb(config.rss_limit)) + << "Value for --rss_limit_mb is inconsistent with the value for " + "rss_limit in the target binary:" + << VV(rss_limit_mb) << VV(config.rss_limit); + rss_limit_mb = bytes_to_mb(config.rss_limit); + + // Convert bytes to KB by rounding up. + constexpr auto bytes_to_kb = [](size_t bytes) { + return bytes == 0 ? 0 : (bytes - 1) / 1024 + 1; + }; + CHECK(stack_limit_kb == Default().stack_limit_kb || + stack_limit_kb == bytes_to_kb(config.stack_limit)) + << "Value for --stack_limit_kb is inconsistent with the value for " + "stack_limit in the target binary:" + << VV(stack_limit_kb) << VV(config.stack_limit); + stack_limit_kb = bytes_to_kb(config.stack_limit); + + if (config.only_replay) { + load_shards_only = true; + populate_binary_info = false; + } +} + +void Environment::UpdateTimeoutPerBatchIfEqualTo(size_t val) { + if (timeout_per_batch != val) return; + timeout_per_batch = ComputeTimeoutPerBatch(timeout_per_input, batch_size); + VLOG(1) << "--timeout_per_batch auto-computed: " << timeout_per_batch + << " sec (see --help for details)"; +} + +void Environment::UpdateBinaryHashIfEmpty() { + if (binary_hash.empty()) { + binary_hash = HashOfFileContents(coverage_binary); + } +} + +std::vector Environment::CreateFlags() const { + std::vector flags; +#define CENTIPEDE_FLAG(_TYPE, NAME, _DEFAULT, _DESC) \ + if (NAME != Default().NAME) { \ + flags.push_back(absl::StrCat("--" #NAME "=", absl::UnparseFlag(NAME))); \ + } +#include "./centipede/centipede_flags.inc" +#undef CENTIPEDE_FLAG + return flags; +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/environment.h b/src/third_party/fuzztest/dist/centipede/environment.h new file mode 100644 index 00000000000..aeaeb4431e8 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/environment.h @@ -0,0 +1,140 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_ENVIRONMENT_H_ +#define THIRD_PARTY_CENTIPEDE_ENVIRONMENT_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/time/time.h" +#include "./centipede/feature.h" +#include "./centipede/knobs.h" +#include "./fuzztest/internal/configuration.h" + +namespace fuzztest::internal { + +// Fuzzing environment controlling the behavior of +// CentipedeMain(). Centipede binaries are creating Environment instances using +// the flags defined in environment_flags.cc, while other users can use +// CentipedeMain() as a library function without importing the flags. +struct Environment { +#define CENTIPEDE_FLAG(TYPE, NAME, DEFAULT, _DESC) TYPE NAME = DEFAULT; +#include "./centipede/centipede_flags.inc" +#undef CENTIPEDE_FLAG + + // Command line-related fields ----------------------------------------------- + + std::string exec_name; // copied from argv[0] + std::vector args; // copied from argv[1:]. + std::string binary_name; // Name of `coverage_binary`, w/o directories. + bool has_input_wildcards = false; // Set to true iff `binary` contains "@@". + + // Experiment-related settings ----------------------------------------------- + + std::string experiment_name; // Set by `UpdateForExperiment`. + std::string experiment_flags; // Set by `UpdateForExperiment`. + + // Other --------------------------------------------------------------------- + + Knobs knobs; // Read from a file by `ReadKnobsFileIfSpecified`, see knobs.h. + + // Defines internal logging level. Set to zero to reduce logging in tests. + // TODO(ussuri): Retire in favor of VLOGs? + size_t log_level = 1; + + // Path to a file with PCs. This file is created and the field is set in + // `CentipedeMain()` once per process if trace_pc instrumentation is detected. + std::string pcs_file_path; + + // APIs ---------------------------------------------------------------------- + + // Returns an instance of the environment with default values. + static const Environment& Default(); + + // Should certain actions be performed --------------------------------------- + + // Returns true if we want to log features as symbols in this shard. + bool LogFeaturesInThisShard() const { + return my_shard_index < log_features_shards; + } + // Returns true if we want to generate the corpus telemetry files (coverage + // report, corpus stats, etc.) in this shard. + bool DumpCorpusTelemetryInThisShard() const; + // Returns true if we want to generate the resource usage report in this + // shard. See the related RUsageTelemetryScope(). + bool DumpRUsageTelemetryInThisShard() const; + // Returns true if we want to generate the telemetry files (coverage report, + // the corpus stats, etc.) after processing `batch_index`-th batch. + bool DumpTelemetryForThisBatch(size_t batch_index) const; + // Returns a bitmask indicating which domains Centipede should discard. + std::bitset MakeDomainDiscardMask() const; + + // Experiment-related functions ---------------------------------------------- + + // Updates `this` according to the `--experiment` flag. + // The `--experiment` flag, if not empty, has this form: + // foo=1,2,3:bar=10,20 + // where foo and bar are some of the flag names supported for experimentation, + // see `SetFlag()`. + // `--experiment` defines the flag values to be set differently in different + // shards. E.g. in this case, + // shard 0 will have {foo=1,bar=10}, + // shard 1 will have {foo=1,bar=20}, + // ... + // shard 3 will have {foo=2,bar=10}, + // ... + // shard 5 will have {foo=2,bar=30}, + // and so on. + // + // CHECK-fails if the `--experiment` flag is not well-formed, + // or if num_threads is not a multiple of the number of flag combinations + // (which is 6 in this example). + // + // Sets load_other_shard_frequency=0 (experiments should be independent). + // + // Sets this->experiment_name to a string like "E01", + // which means "value #0 is used for foo and value #1 is used for bar". + void UpdateForExperiment(); + + // Sets flag 'name' to `value` for an experiment. CHECK-fails on + // invalid name/value combination. Used in `UpdateForExperiment()`. + void SetFlagForExperiment(std::string_view name, std::string_view value); + + // Other --------------------------------------------------------------------- + + // Reads `knobs` from `knobs_file`. Does nothing if the `knobs_file` is empty. + void ReadKnobsFileIfSpecified(); + // Updates `this` with `config` obtained from the target binary. CHECK-fails + // if the fields are non-default and inconsistent with the corresponding + // values in `config`. + void UpdateWithTargetConfig(const fuzztest::internal::Configuration& config); + // If `timeout_per_batch` is `val`, computes it as a function of + // `timeout_per_input` and `batch_size` and updates it. Otherwise, leaves it + // unchanged. + void UpdateTimeoutPerBatchIfEqualTo(size_t val); + // If `binary_hash` is empty, updates it using the file in `coverage_binary`. + void UpdateBinaryHashIfEmpty(); + + std::vector CreateFlags() const; +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_ENVIRONMENT_H_ diff --git a/src/third_party/fuzztest/dist/centipede/environment_flags.cc b/src/third_party/fuzztest/dist/centipede/environment_flags.cc new file mode 100644 index 00000000000..72b3d1c0a9a --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/environment_flags.cc @@ -0,0 +1,139 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/environment_flags.h" + +#include +#include // NOLINT +#include +#include + +#include "absl/flags/flag.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/strings/match.h" +#include "absl/strings/str_split.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "./centipede/environment.h" +#include "./common/logging.h" + +using ::fuzztest::internal::Environment; + +#define CENTIPEDE_FLAG(TYPE, NAME, DEFAULT, DESC) \ + ABSL_FLAG(TYPE, NAME, DEFAULT, DESC); +#include "./centipede/centipede_flags.inc" +#undef CENTIPEDE_FLAG + +#define CENTIPEDE_FLAG_ALIAS(ALIAS_NAME, ORIGINAL_NAME) \ + ABSL_FLAG(decltype(Environment::Default().ORIGINAL_NAME), ALIAS_NAME, \ + Environment::Default().ORIGINAL_NAME, \ + "Alias of --" #ORIGINAL_NAME) \ + .OnUpdate([]() { \ + absl::SetFlag(&FLAGS_##ORIGINAL_NAME, \ + absl::GetFlag(FLAGS_##ALIAS_NAME)); \ + }); +CENTIPEDE_FLAG_ALIAS(first_shard_index, my_shard_index) +CENTIPEDE_FLAG_ALIAS(timeout, timeout_per_input) +CENTIPEDE_FLAG_ALIAS(num_crash_reports, max_num_crash_reports) +CENTIPEDE_FLAG_ALIAS(minimize_crash, minimize_crash_file_path) +#undef CENTIPEDE_FLAG_ALIAS + +ABSL_FLAG(absl::Duration, stop_after, absl::InfiniteDuration(), + "Equivalent to setting --stop_at to the current date/time + this " + "duration. These two flags are mutually exclusive."); +ABSL_RETIRED_FLAG(size_t, distill_shards, 0, + "No longer supported: use --distill instead."); + +namespace fuzztest::internal { + +namespace { + +// Computes the final stop-at time based on the possibly user-provided inputs. +absl::Time GetStopAtTime(absl::Time stop_at, absl::Duration stop_after) { + const bool stop_at_is_non_default = stop_at != absl::InfiniteFuture(); + const bool stop_after_is_non_default = stop_after != absl::InfiniteDuration(); + CHECK_LE(stop_at_is_non_default + stop_after_is_non_default, 1) + << "At most one of --stop_at and --stop_after should be specified, " + "including via --config file: " + << VV(stop_at) << VV(stop_after); + if (stop_at_is_non_default) { + return stop_at; + } else if (stop_after_is_non_default) { + return absl::Now() + stop_after; + } else { + return absl::InfiniteFuture(); + } +} + +} // namespace + +Environment CreateEnvironmentFromFlags(const std::vector &argv) { + Environment env_from_flags = { +#define CENTIPEDE_FLAG(_TYPE, NAME, _DEFAULT, _DESC) \ + absl::GetFlag(FLAGS_##NAME), +#include "./centipede/centipede_flags.inc" +#undef CENTIPEDE_FLAG + }; + + env_from_flags.stop_at = + GetStopAtTime(env_from_flags.stop_at, absl::GetFlag(FLAGS_stop_after)); + + if (env_from_flags.coverage_binary.empty()) { + env_from_flags.coverage_binary = + *absl::StrSplit(env_from_flags.binary, ' ').begin(); + } + env_from_flags.binary_name = + std::filesystem::path(env_from_flags.coverage_binary).filename().string(); + env_from_flags.UpdateBinaryHashIfEmpty(); + + env_from_flags.UpdateTimeoutPerBatchIfEqualTo( + Environment::Default().timeout_per_batch); + + if (size_t j = absl::GetFlag(FLAGS_j)) { + env_from_flags.total_shards = j; + env_from_flags.num_threads = j; + env_from_flags.my_shard_index = 0; + } + CHECK_GE(env_from_flags.total_shards, 1); + CHECK_GE(env_from_flags.batch_size, 1); + CHECK_GE(env_from_flags.num_threads, 1); + CHECK_LE(env_from_flags.num_threads, env_from_flags.total_shards); + CHECK_LE(env_from_flags.my_shard_index + env_from_flags.num_threads, + env_from_flags.total_shards) + << VV(env_from_flags.my_shard_index) << VV(env_from_flags.num_threads); + + if (!argv.empty()) { + env_from_flags.exec_name = argv[0]; + for (size_t i = 1; i < argv.size(); ++i) { + env_from_flags.args.emplace_back(argv[i]); + } + } + + if (!env_from_flags.clang_coverage_binary.empty()) + env_from_flags.extra_binaries.push_back( + env_from_flags.clang_coverage_binary); + + if (absl::StrContains(env_from_flags.binary, "@@")) { + LOG(INFO) << "@@ detected; running in standalone mode with batch_size=1"; + env_from_flags.has_input_wildcards = true; + env_from_flags.batch_size = 1; + // TODO(kcc): do we need to check if extra_binaries have @@? + } + + env_from_flags.ReadKnobsFileIfSpecified(); + return env_from_flags; +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/environment_flags.h b/src/third_party/fuzztest/dist/centipede/environment_flags.h new file mode 100644 index 00000000000..5b96087e399 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/environment_flags.h @@ -0,0 +1,32 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_ENVIRONMENT_FLAGS_H_ +#define THIRD_PARTY_CENTIPEDE_ENVIRONMENT_FLAGS_H_ + +#include +#include + +#include "./centipede/environment.h" + +namespace fuzztest::internal { + +// Create an Environment object from command line flags defined in +// environment_flags.cc. +Environment CreateEnvironmentFromFlags( + const std::vector &argv = {}); + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_ENVIRONMENT_FLAGS_H_ diff --git a/src/third_party/fuzztest/dist/centipede/environment_test.cc b/src/third_party/fuzztest/dist/centipede/environment_test.cc new file mode 100644 index 00000000000..f78c1be79fa --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/environment_test.cc @@ -0,0 +1,222 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/environment.h" + +#include +#include + +#include "gtest/gtest.h" +#include "absl/log/check.h" +#include "absl/time/time.h" +#include "./fuzztest/internal/configuration.h" + +namespace fuzztest::internal { + +TEST(Environment, UpdateForExperiment) { + Environment env; + env.num_threads = 12; + env.experiment = "use_cmp_features=false,true:path_level=10,20,30"; + + auto Experiment = [&](size_t shard_index, bool val1, size_t val2, + std::string_view experiment_name, + std::string_view experiment_flags) { + env.my_shard_index = shard_index; + env.UpdateForExperiment(); + EXPECT_EQ(env.load_other_shard_frequency, 0); + EXPECT_EQ(env.use_cmp_features, val1); + EXPECT_EQ(env.path_level, val2); + EXPECT_EQ(env.experiment_name, experiment_name); + EXPECT_EQ(env.experiment_flags, experiment_flags); + }; + + Experiment(0, false, 10, "E00", "use_cmp_features=false:path_level=10:"); + Experiment(1, false, 20, "E01", "use_cmp_features=false:path_level=20:"); + Experiment(2, false, 30, "E02", "use_cmp_features=false:path_level=30:"); + Experiment(3, true, 10, "E10", "use_cmp_features=true:path_level=10:"); + Experiment(4, true, 20, "E11", "use_cmp_features=true:path_level=20:"); + Experiment(5, true, 30, "E12", "use_cmp_features=true:path_level=30:"); + Experiment(6, false, 10, "E00", "use_cmp_features=false:path_level=10:"); + Experiment(7, false, 20, "E01", "use_cmp_features=false:path_level=20:"); + Experiment(8, false, 30, "E02", "use_cmp_features=false:path_level=30:"); + Experiment(9, true, 10, "E10", "use_cmp_features=true:path_level=10:"); + Experiment(10, true, 20, "E11", "use_cmp_features=true:path_level=20:"); + Experiment(11, true, 30, "E12", "use_cmp_features=true:path_level=30:"); +} + +TEST(Environment, UpdatesNumberOfShardsAndThreadsFromTargetConfigJobs) { + Environment env; + env.total_shards = 20; + env.my_shard_index = 10; + env.num_threads = 5; + fuzztest::internal::Configuration config; + config.jobs = 10; + env.UpdateWithTargetConfig(config); + EXPECT_EQ(env.j, 10); + EXPECT_EQ(env.total_shards, 10); + EXPECT_EQ(env.my_shard_index, 0); + EXPECT_EQ(env.num_threads, 10); +} + +TEST(Environment, DiesOnInconsistentJAndTargetConfigJobs) { + Environment env; + env.j = 10; + fuzztest::internal::Configuration config; + config.jobs = 20; + EXPECT_DEATH(env.UpdateWithTargetConfig(config), + "Value for --j is inconsistent with the value for jobs in the " + "target binary"); +} + +TEST(Environment, UpdatesTimeoutPerBatchFromTimeoutPerInputAndBatchSize) { + Environment env; + env.batch_size = 1000; + env.timeout_per_input = 100; + env.timeout_per_batch = 0; + env.UpdateTimeoutPerBatchIfEqualTo(0); + EXPECT_GT(env.timeout_per_batch, 0); + + env.timeout_per_batch = 123; + env.UpdateTimeoutPerBatchIfEqualTo(0); + EXPECT_EQ(env.timeout_per_batch, 123); +} + +TEST(Environment, + UpdatesTimeoutPerInputFromFiniteTargetConfigTimeLimitPerInput) { + Environment env; + env.timeout_per_input = Environment::Default().timeout_per_input; + fuzztest::internal::Configuration config; + config.time_limit_per_input = absl::Seconds(456); + env.UpdateWithTargetConfig(config); + EXPECT_EQ(env.timeout_per_input, 456); +} + +TEST(Environment, + UpdatesTimeoutPerInputFromInfiniteTargetConfigTimeLimitPerInput) { + Environment env; + env.timeout_per_input = Environment::Default().timeout_per_input; + fuzztest::internal::Configuration config; + config.time_limit_per_input = absl::InfiniteDuration(); + env.UpdateWithTargetConfig(config); + EXPECT_EQ(env.timeout_per_input, 0); +} + +TEST(Environment, + DiesOnInconsistentTimeoutPerInputAndTargetConfigTimeLimitPerInput) { + Environment env; + env.timeout_per_input = 123; + fuzztest::internal::Configuration config; + config.time_limit_per_input = absl::Seconds(456); + EXPECT_DEATH( + env.UpdateWithTargetConfig(config), + "Value for --timeout_per_input is inconsistent with the value for " + "time_limit_per_input in the target binary"); +} + +TEST(Environment, + UpdatesTimeoutPerBatchFromFiniteTargetConfigTimeLimitPerInput) { + Environment env; + env.timeout_per_input = Environment::Default().timeout_per_input; + env.UpdateTimeoutPerBatchIfEqualTo(Environment::Default().timeout_per_batch); + const size_t autocomputed_timeout_per_batch = env.timeout_per_batch; + fuzztest::internal::Configuration config; + config.time_limit_per_input = absl::Seconds(456); + env.UpdateWithTargetConfig(config); + EXPECT_NE(env.timeout_per_batch, autocomputed_timeout_per_batch); +} + +TEST(Environment, + UpdatesTimeoutPerBatchFromInfiniteTargetConfigTimeLimitPerInput) { + Environment env; + env.timeout_per_input = Environment::Default().timeout_per_input; + env.UpdateTimeoutPerBatchIfEqualTo(Environment::Default().timeout_per_batch); + fuzztest::internal::Configuration config; + config.time_limit_per_input = absl::InfiniteDuration(); + env.UpdateWithTargetConfig(config); + EXPECT_EQ(env.timeout_per_batch, 0); +} + +TEST(Environment, UpdatesTimeoutPerBatchFromTargetConfigTimeLimit) { + Environment env; + fuzztest::internal::Configuration config; + config.time_limit = absl::Seconds(123); + config.time_budget_type = fuzztest::internal::TimeBudgetType::kPerTest; + CHECK(config.GetTimeLimitPerTest() == absl::Seconds(123)); + env.UpdateWithTargetConfig(config); + EXPECT_EQ(env.timeout_per_batch, 123) + << "`timeout_per_batch` should be set to the test time limit when it was " + "previously unset"; + + env.timeout_per_batch = 456; + env.UpdateWithTargetConfig(config); + EXPECT_EQ(env.timeout_per_batch, 123) + << "`timeout_per_batch` should be set to test time limit when it is " + "shorter than the previous value"; + + env.timeout_per_batch = 56; + env.UpdateWithTargetConfig(config); + EXPECT_EQ(env.timeout_per_batch, 56) + << "`timeout_per_batch` should not be updated with the test time limit " + "when it is longer than the previous value"; +} + +TEST(Environment, UpdatesRssLimitMbFromTargetConfigRssLimit) { + Environment env; + env.rss_limit_mb = Environment::Default().rss_limit_mb; + fuzztest::internal::Configuration config; + config.rss_limit = 5UL * 1024 * 1024 * 1024; + env.UpdateWithTargetConfig(config); + EXPECT_EQ(env.rss_limit_mb, 5 * 1024); +} + +TEST(Environment, DiesOnInconsistentRssLimitMbAndTargetConfigRssLimit) { + Environment env; + env.rss_limit_mb = 123; + fuzztest::internal::Configuration config; + config.rss_limit = 5UL * 1024 * 1024 * 1024; + EXPECT_DEATH( + env.UpdateWithTargetConfig(config), + "Value for --rss_limit_mb is inconsistent with the value for rss_limit " + "in the target binary"); +} + +TEST(Environment, UpdatesStackLimitKbFromTargetConfigStackLimit) { + Environment env; + env.stack_limit_kb = Environment::Default().stack_limit_kb; + fuzztest::internal::Configuration config; + config.stack_limit = 5UL * 1024; + env.UpdateWithTargetConfig(config); + EXPECT_EQ(env.stack_limit_kb, 5); +} + +TEST(Environment, DiesOnInconsistentStackLimitKbAndTargetConfigStackLimit) { + Environment env; + env.stack_limit_kb = 123; + fuzztest::internal::Configuration config; + config.stack_limit = 5UL * 1024; + EXPECT_DEATH(env.UpdateWithTargetConfig(config), + "Value for --stack_limit_kb is inconsistent with the value for " + "stack_limit in the target binary"); +} + +TEST(Environment, UpdatesReplayOnlyConfiguration) { + Environment env; + fuzztest::internal::Configuration config; + config.only_replay = true; + env.UpdateWithTargetConfig(config); + EXPECT_TRUE(env.load_shards_only); + EXPECT_FALSE(env.populate_binary_info); +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/execution_metadata.cc b/src/third_party/fuzztest/dist/centipede/execution_metadata.cc new file mode 100644 index 00000000000..4b9611671a3 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/execution_metadata.cc @@ -0,0 +1,58 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/execution_metadata.h" + +#include +#include + +#include "./centipede/shared_memory_blob_sequence.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +bool ExecutionMetadata::AppendCmpEntry(ByteSpan a, ByteSpan b) { + if (a.size() != b.size()) return false; + // Size must fit in a byte. + if (a.size() >= 256) return false; + cmp_data.push_back(a.size()); + cmp_data.insert(cmp_data.end(), a.begin(), a.end()); + cmp_data.insert(cmp_data.end(), b.begin(), b.end()); + return true; +} + +bool ExecutionMetadata::Write(Blob::SizeAndTagT tag, + BlobSequence &outputs_blobseq) const { + return outputs_blobseq.Write({tag, cmp_data.size(), cmp_data.data()}); +} + +void ExecutionMetadata::Read(Blob blob) { + cmp_data.assign(blob.data, blob.data + blob.size); +} + +bool ExecutionMetadata::ForEachCmpEntry( + std::function callback) const { + size_t i = 0; + while (i < cmp_data.size()) { + auto size = cmp_data[i]; + if (i + 2 * size + 1 > cmp_data.size()) return false; + ByteSpan a(cmp_data.data() + i + 1, size); + ByteSpan b(cmp_data.data() + i + size + 1, size); + i += 1 + 2 * size; + callback(a, b); + } + return true; +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/execution_metadata.h b/src/third_party/fuzztest/dist/centipede/execution_metadata.h new file mode 100644 index 00000000000..1e28e60ac79 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/execution_metadata.h @@ -0,0 +1,60 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Abstraction of metadata collected from executions that does not +// contribute to coverage but can be useful in mutation. +// +// This library is for both engine and runner. + +#ifndef THIRD_PARTY_CENTIPEDE_EXECUTION_METADATA_H_ +#define THIRD_PARTY_CENTIPEDE_EXECUTION_METADATA_H_ + +#include + +#include "./centipede/shared_memory_blob_sequence.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +struct ExecutionMetadata { + // Appends a CMP entry comparing `a` and `b` to the metadata. Returns false if + // the entry cannot be appended. Return true otherwise. + bool AppendCmpEntry(ByteSpan a, ByteSpan b); + + // Enumerates through all CMP entries in the metadata by calling + // `callback` on each of them. Returns false if there are invalid + // entries. Returns true otherwise. + bool ForEachCmpEntry(std::function callback) const; + + // Writes the contents to `outputs_blobseq` with header `tag`. Returns true + // iff successful. + bool Write(Blob::SizeAndTagT tag, BlobSequence &outputs_blobseq) const; + + // Reads the contents from `blob`. + // + // Note that the method does not check the blob tag, it should be checked by + // the method users. + void Read(Blob blob); + + // CMP entries are stored in one large ByteArray to minimize RAM consumption. + // One CMP arg pair is stored as + // * `size` (1-byte value) + // * `value0` (`size` bytes) + // * `value1` (`size` bytes) + ByteArray cmp_data; +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_EXECUTION_METADATA_H_ diff --git a/src/third_party/fuzztest/dist/centipede/execution_metadata_test.cc b/src/third_party/fuzztest/dist/centipede/execution_metadata_test.cc new file mode 100644 index 00000000000..1bd0c42d4b5 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/execution_metadata_test.cc @@ -0,0 +1,122 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/execution_metadata.h" + +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "./centipede/shared_memory_blob_sequence.h" +#include "./common/defs.h" + +namespace fuzztest::internal { +namespace { + +using ::testing::IsEmpty; +using ::testing::UnorderedElementsAreArray; + +TEST(ExecutionMetadata, ForEachCmpEntryEnumeratesEntriesInRawBytes) { + ExecutionMetadata metadata; + metadata.cmp_data = { + 2, // size + 1, 2, // a + 3, 4, // b + 0, // zero-sized entry + 3, // size + 5, 6, 7, // a + 8, 9, 10, // b + }; + std::vector> enumeration_result; + EXPECT_TRUE(metadata.ForEachCmpEntry( + [&](ByteSpan a, ByteSpan b) { enumeration_result.emplace_back(a, b); })); + + EXPECT_THAT( + enumeration_result, + UnorderedElementsAreArray(std::vector>{ + {{1, 2}, {3, 4}}, + {{}, {}}, + {{5, 6, 7}, {8, 9, 10}}, + })); +} + +TEST(ExecutionMetadata, ForEachCmpEntryHandlesEmptyCmpData) { + auto noop_callback = [](ByteSpan, ByteSpan) {}; + EXPECT_TRUE(ExecutionMetadata{}.ForEachCmpEntry(noop_callback)); +} + +TEST(ExecutionMetadata, + ForEachCmpEntryReturnsFalseOnCmpDataWithNotEnoughBytes) { + auto noop_callback = [](ByteSpan, ByteSpan) {}; + auto bad_metadata_1 = ExecutionMetadata{}; + bad_metadata_1.cmp_data = {3, 1, 2, 3}; + EXPECT_FALSE(bad_metadata_1.ForEachCmpEntry(noop_callback)); + auto bad_metadata_2 = ExecutionMetadata{}; + bad_metadata_2.cmp_data = {3, 1, 2, 3, 4, 5}; + EXPECT_FALSE(bad_metadata_2.ForEachCmpEntry(noop_callback)); +} + +TEST(ExecutionMetadata, ForEachCmpEntryEnumeratesEntriesFromAppendCmpEntry) { + ExecutionMetadata metadata; + ASSERT_TRUE(metadata.AppendCmpEntry({1, 2}, {3, 4})); + std::vector> enumeration_result; + EXPECT_TRUE(metadata.ForEachCmpEntry( + [&](ByteSpan a, ByteSpan b) { enumeration_result.emplace_back(a, b); })); + EXPECT_THAT( + enumeration_result, + UnorderedElementsAreArray(std::vector>{ + {{1, 2}, {3, 4}}, + })); +} + +TEST(ExecutionMetadata, AppendCmpEntryReturnsFalseAndSkipsOnBadArgs) { + ExecutionMetadata metadata; + // Sizes don't match. + EXPECT_FALSE(metadata.AppendCmpEntry({}, {1})); + ByteArray long_byte_array; + long_byte_array.resize(256); + // Args too long. + EXPECT_FALSE(metadata.AppendCmpEntry(long_byte_array, long_byte_array)); + // Should leave no entries and keep metadata well-formed. + std::vector> enumeration_result; + EXPECT_TRUE(metadata.ForEachCmpEntry( + [&](ByteSpan a, ByteSpan b) { enumeration_result.emplace_back(a, b); })); + EXPECT_THAT(enumeration_result, IsEmpty()); +} + +TEST(ExecutionMetadata, ReadAndWriteKeepsCmpEntries) { + ExecutionMetadata metadata_in; + ASSERT_TRUE(metadata_in.AppendCmpEntry({1, 2}, {3, 4})); + std::vector blob_storage; + blob_storage.resize(1024); + BlobSequence blobseq(blob_storage.data(), blob_storage.size()); + EXPECT_TRUE(metadata_in.Write(/*tag=*/1, blobseq)); + blobseq.Reset(); + Blob blob = blobseq.Read(); + ExecutionMetadata metadata_out; + metadata_out.Read(blob); + std::vector> enumeration_result; + EXPECT_TRUE(metadata_out.ForEachCmpEntry( + [&](ByteSpan a, ByteSpan b) { enumeration_result.emplace_back(a, b); })); + EXPECT_THAT( + enumeration_result, + UnorderedElementsAreArray(std::vector>{ + {{1, 2}, {3, 4}}, + })); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/feature.cc b/src/third_party/fuzztest/dist/centipede/feature.cc new file mode 100644 index 00000000000..d725c963bbe --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/feature.cc @@ -0,0 +1,15 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// TODO(kcc): remove this file if nothing else gets added here. diff --git a/src/third_party/fuzztest/dist/centipede/feature.h b/src/third_party/fuzztest/dist/centipede/feature.h new file mode 100644 index 00000000000..80cee6b2655 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/feature.h @@ -0,0 +1,287 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This library defines the concepts "fuzzing feature" and "feature domain". +// It is used by Centipede, and it can be used by fuzz runners to +// define their features in a way most friendly to Centipede. +// Fuzz runners do not have to use this file nor to obey the rules defined here. +// But using this file and following its rules is the simplest way if you want +// Centipede to understand the details about the features generated by the +// runner. +// +// This library must not depend on anything other than libc so that fuzz targets +// using it doesn't gain redundant coverage. For the same reason this library +// uses raw __builtin_trap instead of CHECKs. +// We make an exception for for std::sort/std::unique, +// since is very lightweight. +// This library is also header-only, with all functions defined as inline. + +#ifndef THIRD_PARTY_CENTIPEDE_FEATURE_H_ +#define THIRD_PARTY_CENTIPEDE_FEATURE_H_ + +// WARNING!!!: Be very careful with what STL headers or other dependencies you +// add here. This header needs to remain mostly bare-bones so that we can +// include it into runner. +// is an exception, because it's too clumsy w/o it, and it introduces +// minimal code footprint. +#include +#include +#include +#include +#include + +namespace fuzztest::internal { + +// Feature is an integer that identifies some unique behaviour +// of the fuzz target exercised by a given input. +// We say, this input has this feature with regard to this fuzz target. +// One example of a feature: a certain control flow edge being executed. +using feature_t = uint64_t; + +// A vector of features. It is not expected to be ordered. +// It typically does not contain repetitions, but it's ok to have them. +using FeatureVec = std::vector; + +namespace feature_domains { + +// Feature domain is a subset of 64-bit integers dedicated to a certain +// kind of fuzzing features. +// All domains are of the same size (kDomainSize), This way, we can compute +// a domain for a given feature by dividing by kDomainSize. +class Domain { + public: + // kDomainSize is a large enough value to hold all PCs of our largest target. + // It is also large enough to avoid too many collisions in other domains. + // At the same time, it is small enough that all domains combined require + // not too many bits (e.g. 32 bits is a good practical limit). + // TODO(kcc): consider making feature_t a 32-bit type if we expect to not + // use more than 32 bits. + // NOTE: this value may change in future. + static constexpr size_t kDomainSize = 1ULL << 27; + + constexpr Domain(size_t domain_id) : domain_id_(domain_id) {} + + constexpr feature_t begin() const { return kDomainSize * domain_id_; } + constexpr feature_t end() const { return begin() + kDomainSize; } + bool Contains(feature_t feature) const { + return feature >= begin() && feature < end(); + } + constexpr size_t domain_id() const { return domain_id_; } + + // Converts any `number` into a feature in this domain. + feature_t ConvertToMe(size_t number) const { + return begin() + number % kDomainSize; + } + + // Returns the DomainId of the domain that the feature belongs to. + static size_t FeatureToDomainId(feature_t feature) { + return feature / kDomainSize; + } + + // Returns the index into the domain of a feature. + static size_t FeatureToIndexInDomain(feature_t feature) { + return feature % kDomainSize; + } + + private: + const size_t domain_id_; +}; + +// Notes on Designing Features and Domains +// +// Abstractly, a "feature" signals that there was something interesting about +// the input that Centipede should keep investigating. After seeing a particular +// feature occur often enough, Centipede will become less interested. +// +// Generally, different types of features should be put in different domains. +// This is useful for two reasons. First, Centipede can display the feature +// count for each domain separately. Second, Centipede calculates features +// weights relative to the size of the domain. If two different types of +// features are squeezed into the same domain, an overabundance of one type of +// feature can cause the other type of feature to be undervalued. +// +// The number of features can fit inside a particular domain is finite (see +// kDomainSize). A feature outside that range will be mapped inside that range. +// If the space of all possible features is larger than kDomainSize, it is +// recommended that the feature value is hashed as it is calculated. Feature +// spaces typically have some sort of internal structure and mapping a +// structured feature space into kDomainSize via a modulus can create +// predictable aliasing. Hashing the feature value reduces the worst case effect +// of the feature aliasing. If hashing, it is also recommended that the domain +// is defined in such a way so that the number of features actually discovered +// in that domain stays below a fraction of kDomainSize, even if the number of +// possible features is huge. The more feature aliasing that occurs in practice, +// the less effective the domain. + +// Catch-all domain for unknown features. +inline constexpr Domain kUnknown = {__COUNTER__}; +static_assert(kUnknown.domain_id() == 0); // No one used __COUNTER__ before. +// Represents PCs, i.e. control flow edges. +// Use ConvertPCFeatureToPcIndex() to convert back to a PC index. +inline constexpr Domain kPCs = {__COUNTER__}; +static_assert(kPCs.domain_id() != kUnknown.domain_id()); // just in case. +// Features derived from edge counters. See Convert8bitCounterToNumber(). +inline constexpr Domain k8bitCounters = {__COUNTER__}; +// Features derived from data flow edges. +// A typical data flow edge is a pair of PCs: {store-PC, load-PC}. +// Another variant of a data flow edge is a pair of {global-address, load-PC}. +inline constexpr Domain kDataFlow = {__COUNTER__}; +// Features derived from instrumenting CMP instructions. TODO(kcc): remove. +inline constexpr Domain kCMP = {__COUNTER__}; +// Features in the following domains are created for comparison instructions +// 'a CMP b'. One component of the feature is the context, i.e. where the +// comparison happened. Another component depends on {a,b}. +// +// a == b. +// The other domains (kCMPModDiff, kCMPHamming, kCMPDiffLog) are for a != b. +inline constexpr Domain kCMPEq = {__COUNTER__}; +// (a - b) if |a-b| < 32, see ABToCmpModDiff. +inline constexpr Domain kCMPModDiff = {__COUNTER__}; +// hamming_distance(a, b), ABToCmpHamming. +inline constexpr Domain kCMPHamming = {__COUNTER__}; +// log2(a > b ? a - b : b - a), see ABToCmpDiffLog. +inline constexpr Domain kCMPDiffLog = {__COUNTER__}; +// A list of all the CMP domains. +inline constexpr std::array kCMPDomains = {{ + kCMP, + kCMPEq, + kCMPModDiff, + kCMPHamming, + kCMPDiffLog, +}}; +// Features derived from observing function call stacks. +inline constexpr Domain kCallStack = {__COUNTER__}; +// Features derived from computing (bounded) control flow paths. +inline constexpr Domain kBoundedPath = {__COUNTER__}; +// Features derived from (unordered) pairs of PCs. +inline constexpr Domain kPCPair = {__COUNTER__}; +// Features defined by a user via +// __attribute__((section("__centipede_extra_features"))). +// There is no hard guarantee how many user domains are available, feel free to +// add or remove domains as needed. +inline constexpr std::array kUserDomains = {{ + {__COUNTER__}, + {__COUNTER__}, + {__COUNTER__}, + {__COUNTER__}, + {__COUNTER__}, + {__COUNTER__}, + {__COUNTER__}, + {__COUNTER__}, + {__COUNTER__}, + {__COUNTER__}, + {__COUNTER__}, + {__COUNTER__}, + {__COUNTER__}, + {__COUNTER__}, + {__COUNTER__}, + {__COUNTER__}, +}}; +// A fake domain, not actually used, must be last. +inline constexpr Domain kLastDomain = {__COUNTER__}; +// For now, check that all domains (except maybe for kLastDomain) fit +// into 32 bits. +static_assert(kLastDomain.begin() <= (1ULL << 32)); + +inline constexpr size_t kNumDomains = kLastDomain.domain_id(); + +// Special feature used to indicate an absence of features. Typically used where +// a feature array must not be empty, but doesn't have any other features. +inline constexpr feature_t kNoFeature = kUnknown.begin(); + +} // namespace feature_domains + +// Converts an 8-bit coverage counter, i.e. a pair of {`pc_index`, +// `counter_value` must not be zero. +// +// We convert the 8-bit counter value to a number from 0 to 7 +// by computing its binary log, i.e. 1=>0, 2=>1, 4=>2, 8=>3, ..., 128=>7. +// This is a heuristic, similar to that of AFL or libFuzzer +// that tries to encourage inputs with different number of repetitions +// of the same PC. +inline size_t Convert8bitCounterToNumber(size_t pc_index, + uint8_t counter_value) { + if (counter_value == 0) __builtin_trap(); // Wrong input. + // Compute a log2 of counter_value, i.e. a value between 0 and 7. + // __builtin_clz consumes a 32-bit integer. + uint32_t counter_log2 = + sizeof(uint32_t) * 8 - 1 - __builtin_clz(counter_value); + return pc_index * 8 + counter_log2; +} + +// Given the `feature` from the PC domain, returns the feature's +// pc_index. I.e. reverse of kPC.ConvertToMe(), assuming all PCs originally +// converted to features were less than Domain::kDomainSize. +inline size_t ConvertPCFeatureToPcIndex(feature_t feature) { + auto domain = feature_domains::kPCs; + if (!domain.Contains(feature)) __builtin_trap(); + return feature - domain.begin(); +} + +// Encodes {`pc1`, `pc2`} into a number. +// `pc1` and `pc2` are in range [0, `max_pc`) +inline size_t ConvertPcPairToNumber(uintptr_t pc1, uintptr_t pc2, + uintptr_t max_pc) { + return pc1 * max_pc + pc2; +} + +// Transforms {a,b}, a!=b, into a number in [0,64) using a-b. +inline uintptr_t ABToCmpModDiff(uintptr_t a, uintptr_t b) { + uintptr_t diff = a - b; + return diff <= 32 ? diff : -diff < 32 ? 32 + -diff : 0; +} + +// Transforms {a,b}, a!=b, into a number in [0,64) using hamming distance. +inline uintptr_t ABToCmpHamming(uintptr_t a, uintptr_t b) { + return __builtin_popcountll(a ^ b) - 1; +} + +// Transforms {a,b}, a!=b, into a number in [0,64) using log2(a-b). +inline uintptr_t ABToCmpDiffLog(uintptr_t a, uintptr_t b) { + return __builtin_clzll(a > b ? a - b : b - a); +} + +// A simple fixed-capacity array with push_back. +// Thread-compatible. +template +class FeatureArray { + public: + // Constructs an empty feature array. + FeatureArray() = default; + + // pushes `feature` back if there is enough space. + void push_back(feature_t feature) { + if (num_features_ < kSize) { + features_[num_features_++] = feature; + } + } + + // Makes the array empty. + void clear() { num_features_ = 0; } + + // Returns the array's raw data. + feature_t *data() { return &features_[0]; } + + // Returns the number of elements in the array. + size_t size() const { return num_features_; } + + private: + // NOTE: No initializer needed: object state is captured by `num_features_`. + feature_t features_[kSize]; + size_t num_features_ = 0; +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_FEATURE_H_ diff --git a/src/third_party/fuzztest/dist/centipede/feature_set.cc b/src/third_party/fuzztest/dist/centipede/feature_set.cc new file mode 100644 index 00000000000..09d936132b9 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/feature_set.cc @@ -0,0 +1,145 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/feature_set.h" + +#include +#include +#include +#include +#include +#include + +#include "absl/log/check.h" +#include "absl/strings/str_cat.h" +#include "./centipede/control_flow.h" +#include "./centipede/feature.h" +#include "./common/logging.h" + +namespace fuzztest::internal { + +//------------------------------------------------------------------------------ +// FeatureSet +//------------------------------------------------------------------------------ + +// This implementation is slow (needs to iterate over the entire domain), +// but there is no need for it to be fast. +PCIndexVec FeatureSet::ToCoveragePCs() const { + PCIndexVec pcs; + for (size_t idx = 0; idx < feature_domains::Domain::kDomainSize; ++idx) { + if (frequencies_[feature_domains::kPCs.ConvertToMe(idx)]) + pcs.push_back(idx); + } + return pcs; +} + +size_t FeatureSet::CountFeatures(feature_domains::Domain domain) const { + return features_per_domain_[domain.domain_id()]; +} + +bool FeatureSet::HasUnseenFeatures(const FeatureVec &features) const { + for (auto feature : features) { + if (frequencies_[feature] == 0) return true; + } + return false; +} + +__attribute__((noinline)) // to see it in profile. +size_t +FeatureSet::PruneFeaturesAndCountUnseen(FeatureVec &features) const { + size_t number_of_unseen_features = 0; + size_t num_kept = 0; + for (auto feature : features) { + if (ShouldDiscardFeature(feature)) continue; + auto freq = frequencies_[feature]; + if (freq == 0) ++number_of_unseen_features; + if (freq < FrequencyThreshold(feature)) features[num_kept++] = feature; + } + features.resize(num_kept); + return number_of_unseen_features; +} + +void FeatureSet::PruneDiscardedDomains(FeatureVec &features) const { + size_t num_kept = 0; + for (auto feature : features) { + if (ShouldDiscardFeature(feature)) continue; + features[num_kept++] = feature; + } + features.resize(num_kept); +} + +void FeatureSet::IncrementFrequencies(const FeatureVec &features) { + for (auto f : features) { + auto &freq = frequencies_[f]; + if (freq == 0) { + ++num_features_; + ++features_per_domain_[feature_domains::Domain::FeatureToDomainId(f)]; + } + if (freq < FrequencyThreshold(f)) ++freq; + } +} + +__attribute__((noinline)) // to see it in profile. +uint64_t +FeatureSet::ComputeWeight(const FeatureVec &features) const { + uint64_t weight = 0; + for (auto feature : features) { + // The less frequent is the feature, the more valuable it is. + // (frequency == 1) => (weight == 256) + // (frequency == 2) => (weight == 128) + // and so on. + // The less frequent is the domain, the more valuable are its features. + auto domain_id = feature_domains::Domain::FeatureToDomainId(feature); + auto features_in_domain = features_per_domain_[domain_id]; + CHECK(features_in_domain); + auto domain_weight = num_features_ / features_in_domain; + auto feature_frequency = frequencies_[feature]; + CHECK_GT(feature_frequency, 0) + << VV(feature) << VV(domain_id) << VV(features_in_domain) + << VV(domain_weight) << VV((int)feature_frequency) << DebugString(); + weight += domain_weight * (256 / feature_frequency); + } + return weight; +} + +std::string FeatureSet::DebugString() const { + std::ostringstream os; + os << VV((int)frequency_threshold_); + os << VV(num_features_); + os << this; + return os.str(); +} + +std::ostream &operator<<(std::ostream &out, const FeatureSet &fs) { + auto LogIfNotZero = [&out](size_t value, std::string_view name) { + if (!value) return; + out << " " << name << ": " << value; + }; + out << "ft: " << fs.size(); + LogIfNotZero(fs.CountFeatures(feature_domains::kPCs), "cov"); + LogIfNotZero(fs.CountFeatures(feature_domains::k8bitCounters), "cnt"); + LogIfNotZero(fs.CountFeatures(feature_domains::kDataFlow), "df"); + LogIfNotZero(fs.CountFeatures(feature_domains::kCMPDomains), "cmp"); + LogIfNotZero(fs.CountFeatures(feature_domains::kCallStack), "stk"); + LogIfNotZero(fs.CountFeatures(feature_domains::kBoundedPath), "path"); + LogIfNotZero(fs.CountFeatures(feature_domains::kPCPair), "pair"); + for (size_t i = 0; i < std::size(feature_domains::kUserDomains); ++i) { + LogIfNotZero(fs.CountFeatures(feature_domains::kUserDomains[i]), + absl::StrCat("usr", i)); + } + LogIfNotZero(fs.CountFeatures(feature_domains::kUnknown), "unknown"); + return out; +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/feature_set.h b/src/third_party/fuzztest/dist/centipede/feature_set.h new file mode 100644 index 00000000000..ce0571252bd --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/feature_set.h @@ -0,0 +1,144 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_FEATURE_SET_H_ +#define THIRD_PARTY_CENTIPEDE_FEATURE_SET_H_ + +#include +#include +#include +#include +#include +#include + +#include "absl/log/log.h" +#include "./centipede/control_flow.h" +#include "./centipede/feature.h" +#include "./centipede/util.h" + +namespace fuzztest::internal { + +// Set of features with their frequencies. +// Features that have a frequency >= frequency_threshold +// are considered too frequent and thus less interesting for further fuzzing. +// All features must be in [0, feature_domains::kLastDomain.begin()). +class FeatureSet { + public: + using FeatureDomainSet = std::bitset; + + explicit FeatureSet(uint8_t frequency_threshold, + FeatureDomainSet should_discard_domain) + : frequency_threshold_(frequency_threshold), + should_discard_domain_(should_discard_domain) {} + + // Returns true if there are features in `features` not present in `this`. + bool HasUnseenFeatures(const FeatureVec &features) const; + + // Removes all features from `features` that are too frequent or are in + // discarded domains. + // Returns the number of unpruned features in `features` that were not + // previously present in `this`. + size_t PruneFeaturesAndCountUnseen(FeatureVec &features) const; + + // Prune the features that are in discarded domains. + // Effectively a subset of PruneFeaturesAndCountUnseen. + void PruneDiscardedDomains(FeatureVec &features) const; + + // For every feature in `features` increment its frequency. + // If a feature wasn't seen before, it is added to `this`. + void IncrementFrequencies(const FeatureVec &features); + + // How many different features are in the set. + size_t size() const { return num_features_; } + + // Returns features that originate from CFG counters, converted to PCIndexVec. + PCIndexVec ToCoveragePCs() const; + + // Returns the number of features in `this` from the given feature domain. + size_t CountFeatures(feature_domains::Domain domain) const; + // Returns the number of features in `this` from the given feature domains. + template + size_t CountFeatures(const DomainListT &domains) const { + size_t count = 0; + for (auto domain : domains) { + count += features_per_domain_[domain.domain_id()]; + } + return count; + } + // The same for an `initializer_list`, to enable usages like + // `CountFeatures({kPCs, kCMP})`. + size_t CountFeatures( + std::initializer_list domains) const { + return CountFeatures<>(domains); + } + + // Returns the frequency associated with `feature`. + size_t Frequency(feature_t feature) const { return frequencies_[feature]; } + + // Computes combined weight of `features`. + // The less frequent the feature is, the bigger its weight. + // The weight of a FeatureVec is a sum of individual feature weights. + uint64_t ComputeWeight(const FeatureVec &features) const; + + // Returns a debug string representing the state of *this. + std::string DebugString() const; + + private: + // Computes the frequency threshold based on the domain of `feature`. + // For now, just uses 1 for kPCPair and frequency_threshold_ for all others. + // Rationale: the kPCPair features might be too numerous, we don't want to + // store more than one of each such feature in the corpus. + uint8_t FrequencyThreshold(feature_t feature) const { + if (feature_domains::kPCPair.Contains(feature)) return 1; + return frequency_threshold_; + } + + // Returns 'true' if we should always filter out this specific feature ID. + // This is a configurable policy that does not depend on the frequency of the + // feature. + bool ShouldDiscardFeature(feature_t feature) const { + size_t domain_id = feature_domains::Domain::FeatureToDomainId(feature); + // TODO(b/385774476): Remove this check once the root cause is fixed. + if (domain_id >= feature_domains::kNumDomains) { + LOG(ERROR) << "Unexpected feature with id: " << feature; + return true; + } + return should_discard_domain_.test(domain_id); + } + + const uint8_t frequency_threshold_; + + static constexpr size_t kSize = feature_domains::kLastDomain.begin(); + + // Maps features to their frequencies. + // This array is huge but sparse, and depending on the enabled features + // some parts of it will never be written to or read from. + // Unused parts of MmapNoReserveArray don't actually reserve memory. + MmapNoReserveArray frequencies_; + + // Counts all unique features added to this. + size_t num_features_ = 0; + + // Counts features in each domain. + size_t features_per_domain_[feature_domains::kNumDomains] = {}; + + FeatureDomainSet should_discard_domain_; +}; + +// Stream out description and count of features in feature set. +std::ostream &operator<<(std::ostream &out, const FeatureSet &fs); + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_FEATURE_SET_H_ diff --git a/src/third_party/fuzztest/dist/centipede/feature_set_test.cc b/src/third_party/fuzztest/dist/centipede/feature_set_test.cc new file mode 100644 index 00000000000..7eca2ba0c47 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/feature_set_test.cc @@ -0,0 +1,204 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/feature_set.h" + +#include +#include +#include + +#include "gtest/gtest.h" +#include "./centipede/feature.h" + +namespace fuzztest::internal { +namespace { + +TEST(FeatureSet, ComputeWeight) { + FeatureSet feature_set(10, {}); + + auto W = [&](const FeatureVec &features) -> uint64_t { + return feature_set.ComputeWeight(features); + }; + + feature_set.IncrementFrequencies({1, 2, 3}); + EXPECT_EQ(W({1}), W({2})); + EXPECT_EQ(W({1}), W({3})); + EXPECT_DEATH(W({4}), ""); + + feature_set.IncrementFrequencies({1, 2}); + EXPECT_GT(W({3}), W({2})); + EXPECT_GT(W({3}), W({1})); + EXPECT_GT(W({3, 1}), W({2, 1})); + EXPECT_GT(W({3, 2}), W({2})); + + feature_set.IncrementFrequencies({1}); + EXPECT_GT(W({3}), W({2})); + EXPECT_GT(W({2}), W({1})); + EXPECT_GT(W({3, 2}), W({3, 1})); +} + +TEST(FeatureSet, ComputeWeightWithDifferentDomains) { + FeatureSet feature_set(10, {}); + // Increment the feature frequencies such that the domain #1 is the rarest and + // the domain #3 is the most frequent. + auto f1 = feature_domains::k8bitCounters.begin(); + auto f2 = feature_domains::kCMP.begin(); + auto f3 = feature_domains::kBoundedPath.begin(); + feature_set.IncrementFrequencies( + {/* one feature from domain #1 */ f1, + /* two features from domain #2 */ f2, f2 + 1, + /* three features from domain #3 */ f3, f3 + 1, f3 + 2}); + + auto weight = [&](const FeatureVec &features) -> uint64_t { + return feature_set.ComputeWeight(features); + }; + + // Test that features from a less frequent domain have more weight. + EXPECT_GT(weight({f1}), weight({f2})); + EXPECT_GT(weight({f2}), weight({f3})); +} + +TEST(FeatureSet, HasUnseenFeatures_IncrementFrequencies) { + size_t frequency_threshold = 2; + FeatureSet feature_set(frequency_threshold, {}); + FeatureVec features = {10}; + EXPECT_TRUE(feature_set.HasUnseenFeatures(features)); + + feature_set.IncrementFrequencies(features); + EXPECT_FALSE(feature_set.HasUnseenFeatures(features)); + + features = {10, 20}; + EXPECT_TRUE(feature_set.HasUnseenFeatures(features)); + feature_set.IncrementFrequencies(features); + EXPECT_FALSE(feature_set.HasUnseenFeatures(features)); + + features = {50}; + EXPECT_TRUE(feature_set.HasUnseenFeatures(features)); + feature_set.IncrementFrequencies(features); + + features = {10, 20}; + EXPECT_FALSE(feature_set.HasUnseenFeatures(features)); +} + +TEST(FeatureSet, PruneFeaturesAndCountUnseen_IncrementFrequencies) { + size_t frequency_threshold = 3; + FeatureSet feature_set(frequency_threshold, {}); + FeatureVec features; + // Shorthand for PruneFeaturesAndCountUnseen. + auto PruneAndCountUnseen = [&]() -> size_t { + return feature_set.PruneFeaturesAndCountUnseen(features); + }; + // Shorthand for IncrementFrequencies. + auto Increment = [&](const FeatureVec &features) { + feature_set.IncrementFrequencies(features); + }; + + // PruneAndCountUnseen on the empty set. + features = {10, 20}; + EXPECT_EQ(PruneAndCountUnseen(), 2); + EXPECT_EQ(feature_set.size(), 0); + EXPECT_EQ(features, FeatureVec({10, 20})); + + // Add {10} for the first time. + features = {10, 20}; + Increment({10}); + EXPECT_EQ(PruneAndCountUnseen(), 1); + EXPECT_EQ(feature_set.size(), 1); + EXPECT_EQ(features, FeatureVec({10, 20})); + + // Add {10} for the second time. + features = {10, 20}; + Increment({10}); + EXPECT_EQ(PruneAndCountUnseen(), 1); + EXPECT_EQ(feature_set.size(), 1); + EXPECT_EQ(features, FeatureVec({10, 20})); + + // Add {10} for the third time. {10} becomes "frequent", prune removes it. + features = {10, 20}; + Increment({10}); + EXPECT_EQ(PruneAndCountUnseen(), 1); + EXPECT_EQ(feature_set.size(), 1); + EXPECT_EQ(features, FeatureVec({20})); + + // Add {30} for the first time. {10, 20} still gets pruned to {20}. + features = {10, 20}; + Increment({30}); + EXPECT_EQ(PruneAndCountUnseen(), 1); + EXPECT_EQ(feature_set.size(), 2); + EXPECT_EQ(features, FeatureVec({20})); + + // {10, 20, 30} => {20, 30}; 1 unseen. + features = {10, 20, 30}; + EXPECT_EQ(PruneAndCountUnseen(), 1); + EXPECT_EQ(feature_set.size(), 2); + EXPECT_EQ(features, FeatureVec({20, 30})); + + // {10, 20, 30} => {20}; 1 unseen. + features = {10, 20, 30}; + Increment({30}); + Increment({30}); + EXPECT_EQ(PruneAndCountUnseen(), 1); + EXPECT_EQ(feature_set.size(), 2); + EXPECT_EQ(features, FeatureVec({20})); + + // {10, 20, 30} => {20}; 0 unseen. + features = {10, 20, 30}; + Increment({20}); + Increment({20}); + EXPECT_EQ(PruneAndCountUnseen(), 0); + EXPECT_EQ(feature_set.size(), 3); + EXPECT_EQ(features, FeatureVec({20})); + + // {10, 20, 30} => {}; 0 unseen. + features = {10, 20, 30}; + Increment({20}); + EXPECT_EQ(PruneAndCountUnseen(), 0); + EXPECT_EQ(feature_set.size(), 3); + EXPECT_EQ(features, FeatureVec({})); +} + +TEST(FeatureSet, PruneDiscardedDomains) { + for (size_t i = 0; i < feature_domains::kNumDomains; ++i) { + SCOPED_TRACE(i); + + // Ban one domain. + std::bitset discarded_domains; + discarded_domains.set(i); + FeatureSet feature_set(10, discarded_domains); + + FeatureVec features; + FeatureVec expected; + for (size_t j = 0; j < feature_domains::kNumDomains; ++j) { + feature_t f = feature_domains::Domain(j).ConvertToMe(0); + // Input vector with a feature in every domain. + features.push_back(f); + if (j != i) expected.push_back(f); + } + + FeatureVec f1 = features; + feature_set.PruneDiscardedDomains(f1); + EXPECT_EQ(f1.size(), features.size() - 1); + EXPECT_EQ(f1, expected); + + // PruneFeaturesAndCountUnseen should, at minimum, prune the same domains as + // PruneDiscardedDomains. + FeatureVec f2 = features; + feature_set.PruneFeaturesAndCountUnseen(f2); + EXPECT_EQ(f2.size(), features.size() - 1); + EXPECT_EQ(f2, expected); + } +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/feature_test.cc b/src/third_party/fuzztest/dist/centipede/feature_test.cc new file mode 100644 index 00000000000..be61c36f1c2 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/feature_test.cc @@ -0,0 +1,43 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/feature.h" + +#include "gtest/gtest.h" + +namespace fuzztest::internal { +namespace { + +TEST(Feature, FeatureArray) { + FeatureArray<3> array; + EXPECT_EQ(array.size(), 0); + array.push_back(10); + EXPECT_EQ(array.size(), 1); + array.push_back(20); + EXPECT_EQ(array.size(), 2); + array.clear(); + EXPECT_EQ(array.size(), 0); + array.push_back(10); + array.push_back(20); + array.push_back(30); + EXPECT_EQ(array.size(), 3); + array.push_back(40); // no space left. + EXPECT_EQ(array.size(), 3); + EXPECT_EQ(array.data()[0], 10); + EXPECT_EQ(array.data()[1], 20); + EXPECT_EQ(array.data()[2], 30); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/foreach_nonzero.h b/src/third_party/fuzztest/dist/centipede/foreach_nonzero.h new file mode 100644 index 00000000000..a3de9d6657d --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/foreach_nonzero.h @@ -0,0 +1,70 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_FOREACH_NONZERO_H_ +#define THIRD_PARTY_CENTIPEDE_FOREACH_NONZERO_H_ + +// WARNING!!!: Be very careful with what STL headers or other dependencies you +// add here. This header needs to remain mostly bare-bones so that we can +// include it into runner. +#include +#include +#include +#include + +namespace fuzztest::internal { + +// Iterates over [bytes, bytes + num_bytes) and calls action(idx, bytes[idx]), +// for every non-zero bytes[idx]. Then clears those non-zero bytes. +// Optimized for the case where lots of bytes are zero. +inline void ForEachNonZeroByte(uint8_t *bytes, size_t num_bytes, + std::function action) { + // The main loop will read words of this size. + constexpr uintptr_t kWordSize = sizeof(uintptr_t); + const uintptr_t initial_alignment = + reinterpret_cast(bytes) % kWordSize; + size_t idx = 0; + uintptr_t alignment = initial_alignment; + // Iterate the first few until we reach alignment by word size. + for (; idx < num_bytes && alignment != 0; + idx++, alignment = (alignment + 1) % kWordSize) { + if (bytes[idx]) { + action(idx, bytes[idx]); + bytes[idx] = 0; + } + } + // Iterate one word at a time. If the word is != 0, iterate its bytes. + for (; idx + kWordSize - 1 < num_bytes; idx += kWordSize) { + uintptr_t wide_load; + __builtin_memcpy(&wide_load, bytes + idx, kWordSize); // force inline. + if (!wide_load) continue; + __builtin_memset(bytes + idx, 0, kWordSize); // // force inline. + // This loop assumes little-endianness. (Tests will break on big-endian). + for (size_t pos = 0; pos < kWordSize; pos++) { + uint8_t value = wide_load >> (pos * 8); // lowest byte is taken. + if (value) action(idx + pos, value); + } + } + // Iterate the last few. + for (; idx < num_bytes; idx++) { + if (bytes[idx]) { + action(idx, bytes[idx]); + bytes[idx] = 0; + } + } +} + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_FOREACH_NONZERO_H_ diff --git a/src/third_party/fuzztest/dist/centipede/foreach_nonzero_test.cc b/src/third_party/fuzztest/dist/centipede/foreach_nonzero_test.cc new file mode 100644 index 00000000000..48994f34d10 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/foreach_nonzero_test.cc @@ -0,0 +1,89 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/foreach_nonzero.h" + +#include +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" + +namespace fuzztest::internal { +namespace { + +void TrivialForEachNonZeroByte(uint8_t *bytes, size_t num_bytes, + std::function action) { + for (size_t i = 0; i < num_bytes; i++) { + uint8_t value = bytes[i]; + if (value) { + action(i, value); + bytes[i] = 0; + } + } +} + +TEST(ForEachNonZeroByte, ProcessesSubArrays) { + // Some long data with long spans of zeros and a few non-zeros. + // We will test all sub-arrays of this array. + const uint8_t test_data[] = { + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + const size_t kTestDataSize = sizeof(test_data); + uint8_t test_data_copy[kTestDataSize]; + + auto CheckResult = [&](size_t offset, size_t size, + const std::vector> &v) { + for (size_t i = 0; i < kTestDataSize; ++i) { + if (i >= offset && i < offset + size) { + EXPECT_EQ(test_data_copy[i], 0); + } else { + EXPECT_EQ(test_data_copy[i], test_data[i]); + } + } + }; + + for (size_t offset = 0; offset <= kTestDataSize; offset++) { + for (size_t size = 0; offset + size <= kTestDataSize; size++) { + std::vector> v1, v2; + memcpy(test_data_copy, test_data, kTestDataSize); + TrivialForEachNonZeroByte( + test_data_copy + offset, size, + [&](size_t idx, uint8_t value) { v1.emplace_back(idx, value); }); + CheckResult(offset, size, v1); + + memcpy(test_data_copy, test_data, kTestDataSize); + ForEachNonZeroByte( + test_data_copy + offset, size, + [&](size_t idx, uint8_t value) { v2.emplace_back(idx, value); }); + CheckResult(offset, size, v2); + + EXPECT_EQ(v1, v2); + } + } +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/fuzztest_mutator.cc b/src/third_party/fuzztest/dist/centipede/fuzztest_mutator.cc new file mode 100644 index 00000000000..a7c44931fbc --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/fuzztest_mutator.cc @@ -0,0 +1,151 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/fuzztest_mutator.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/random/random.h" +#include "absl/types/span.h" +#include "./centipede/byte_array_mutator.h" +#include "./centipede/execution_metadata.h" +#include "./centipede/knobs.h" +#include "./centipede/mutation_input.h" +#include "./common/defs.h" +#include "./fuzztest/domain_core.h" +#include "./fuzztest/internal/table_of_recent_compares.h" + +namespace fuzztest::internal { + +namespace { + +using MutatorDomainBase = + decltype(fuzztest::VectorOf(fuzztest::Arbitrary())); + +} // namespace + +struct FuzzTestMutator::MutationMetadata { + fuzztest::internal::TablesOfRecentCompares cmp_tables; +}; + +class FuzzTestMutator::MutatorDomain : public MutatorDomainBase { + public: + MutatorDomain() + : MutatorDomainBase(fuzztest::VectorOf(fuzztest::Arbitrary())) {} + + ~MutatorDomain() {} +}; + +FuzzTestMutator::FuzzTestMutator(const Knobs &knobs, uint64_t seed) + : knobs_(knobs), + prng_(seed), + mutation_metadata_(std::make_unique()), + domain_(std::make_unique()) { + domain_->WithMinSize(1).WithMaxSize(max_len_); +} + +FuzzTestMutator::~FuzzTestMutator() = default; + +void FuzzTestMutator::CrossOverInsert(ByteArray &data, const ByteArray &other) { + // insert other[first:first+size] at data[pos] + const auto size = absl::Uniform( + prng_, 1, std::min(max_len_ - data.size(), other.size()) + 1); + const auto first = absl::Uniform(prng_, 0, other.size() - size + 1); + const auto pos = absl::Uniform(prng_, 0, data.size() + 1); + data.insert(data.begin() + pos, other.begin() + first, + other.begin() + first + size); +} + +void FuzzTestMutator::CrossOverOverwrite(ByteArray &data, + const ByteArray &other) { + // Overwrite data[pos:pos+size] with other[first:first+size]. + // Overwrite no more than half of data. + size_t max_size = std::max(1UL, data.size() / 2); + const auto first = absl::Uniform(prng_, 0, other.size()); + max_size = std::min(max_size, other.size() - first); + const auto size = absl::Uniform(prng_, 1, max_size + 1); + const auto pos = absl::Uniform(prng_, 0, data.size() - size + 1); + std::copy(other.begin() + first, other.begin() + first + size, + data.begin() + pos); +} + +void FuzzTestMutator::CrossOver(ByteArray &data, const ByteArray &other) { + if (data.size() >= max_len_) { + CrossOverOverwrite(data, other); + } else { + if (knobs_.GenerateBool(knob_cross_over_insert_or_overwrite, prng_())) { + CrossOverInsert(data, other); + } else { + CrossOverOverwrite(data, other); + } + } +} + +std::vector FuzzTestMutator::MutateMany( + const std::vector &inputs, size_t num_mutants) { + if (inputs.empty()) abort(); + // TODO(xinhaoyuan): Consider metadata in other inputs instead of always the + // first one. + SetMetadata(inputs[0].metadata != nullptr ? *inputs[0].metadata + : ExecutionMetadata()); + std::vector mutants; + mutants.reserve(num_mutants); + for (int i = 0; i < num_mutants; ++i) { + auto mutant = inputs[absl::Uniform(prng_, 0, inputs.size())].data; + if (mutant.size() > max_len_) mutant.resize(max_len_); + if (knobs_.GenerateBool(knob_mutate_or_crossover, prng_())) { + // Perform crossover with some other input. It may be the same input. + const auto &other_input = + inputs[absl::Uniform(prng_, 0, inputs.size())].data; + CrossOver(mutant, other_input); + } else { + domain_->Mutate(mutant, prng_, + {/*cmp_tables=*/&mutation_metadata_->cmp_tables}, + /*only_shrink=*/false); + } + mutants.push_back(std::move(mutant)); + } + return mutants; +} + +void FuzzTestMutator::SetMetadata(const ExecutionMetadata &metadata) { + metadata.ForEachCmpEntry([this](ByteSpan a, ByteSpan b) { + size_t size = a.size(); + if (size < kMinCmpEntrySize) return; + if (size > kMaxCmpEntrySize) return; + // Use the memcmp table to avoid subtlety of the container domain mutation + // with integer tables. E.g. it won't insert integer comparison data. + mutation_metadata_->cmp_tables.GetMutable<0>().Insert(a.data(), b.data(), + size); + }); +} + +bool FuzzTestMutator::set_max_len(size_t max_len) { + max_len_ = max_len; + domain_->WithMaxSize(max_len); + return true; +} + +void FuzzTestMutator::AddToDictionary( + const std::vector &dict_entries) { + domain_->WithDictionary(dict_entries); +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/fuzztest_mutator.h b/src/third_party/fuzztest/dist/centipede/fuzztest_mutator.h new file mode 100644 index 00000000000..5bd0e2ee7ac --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/fuzztest_mutator.h @@ -0,0 +1,82 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_FUZZTEST_MUTATOR_H_ +#define THIRD_PARTY_CENTIPEDE_FUZZTEST_MUTATOR_H_ + +#include +#include +#include +#include + +#include "./centipede/execution_metadata.h" +#include "./centipede/knobs.h" +#include "./centipede/mutation_input.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +// Mutator based on the FuzzTest std::vector domain. It always +// generates non-empty results, with a default limit on the mutant +// size unless changed by `set_max_len`. +// +// This class is thread-compatible. +class FuzzTestMutator { + public: + // Initialize the mutator with the given `knobs` and RNG `seed`. + explicit FuzzTestMutator(const Knobs &knobs, uint64_t seed); + ~FuzzTestMutator(); + + // Takes non-empty `inputs` and produces `num_mutants` mutants. + std::vector MutateMany(const std::vector &inputs, + size_t num_mutants); + + // Adds `dict_entries` to the internal mutation dictionary. + void AddToDictionary(const std::vector& dict_entries); + + // Sets max length in bytes for mutants with modified sizes. + // + // Returns false on invalid `max_len`, true otherwise. + bool set_max_len(size_t max_len); + + // TODO(xinhaoyuan): Support set_alignment(). + + private: + struct MutationMetadata; + class MutatorDomain; + + // Propagates the execution `metadata` to the internal mutation dictionary. + void SetMetadata(const ExecutionMetadata& metadata); + + // The crossover algorithm based on the legacy ByteArrayMutator. + // TODO(ussuri): Implement and use the domain level crossover. + void CrossOverInsert(ByteArray &data, const ByteArray &other); + void CrossOverOverwrite(ByteArray &data, const ByteArray &other); + void CrossOver(ByteArray &data, const ByteArray &other); + + // Size limits on the cmp entries to be used in mutation. + static constexpr uint8_t kMaxCmpEntrySize = 15; + static constexpr uint8_t kMinCmpEntrySize = 2; + + const Knobs &knobs_; + Rng prng_; + size_t max_len_ = 1000; + + std::unique_ptr mutation_metadata_; + std::unique_ptr domain_; +}; + +} // namespace fuzztest::internal + +#endif diff --git a/src/third_party/fuzztest/dist/centipede/fuzztest_mutator_test.cc b/src/third_party/fuzztest/dist/centipede/fuzztest_mutator_test.cc new file mode 100644 index 00000000000..327bc836b82 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/fuzztest_mutator_test.cc @@ -0,0 +1,308 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/fuzztest_mutator.h" + +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/container/flat_hash_set.h" +#include "absl/strings/str_join.h" +#include "./centipede/execution_metadata.h" +#include "./centipede/knobs.h" +#include "./centipede/mutation_input.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +namespace { + +using ::testing::AllOf; +using ::testing::Each; +using ::testing::IsSupersetOf; +using ::testing::Le; +using ::testing::SizeIs; +using ::testing::Values; + +TEST(FuzzTestMutator, DifferentRngSeedsLeadToDifferentMutantSequences) { + const Knobs knobs; + FuzzTestMutator mutator[2]{FuzzTestMutator(knobs, /*seed=*/1), + FuzzTestMutator(knobs, /*seed=*/2)}; + + std::vector res[2]; + for (size_t i = 0; i < 2; i++) { + ByteArray data = {0}; + std::vector mutation_inputs = {{data}}; + constexpr size_t kMutantSequenceLength = 100; + for (size_t iter = 0; iter < kMutantSequenceLength; iter++) { + const std::vector mutants = + mutator[i].MutateMany(mutation_inputs, 1); + ASSERT_EQ(mutants.size(), 1); + res[i].push_back(mutants[0]); + } + } + EXPECT_NE(res[0], res[1]); +} + +TEST(FuzzTestMutator, MutateManyWorksWithInputsLargerThanMaxLen) { + constexpr size_t kMaxLen = 4; + const Knobs knobs; + FuzzTestMutator mutator(knobs, /*seed=*/1); + EXPECT_TRUE(mutator.set_max_len(kMaxLen)); + constexpr size_t kNumMutantsToGenerate = 10000; + const std::vector mutants = mutator.MutateMany( + { + {/*data=*/{0, 1, 2, 3, 4, 5, 6, 7}}, + {/*data=*/{0}}, + {/*data=*/{0, 1}}, + {/*data=*/{0, 1, 2}}, + {/*data=*/{0, 1, 2, 3}}, + }, + kNumMutantsToGenerate); + + EXPECT_THAT(mutants, + AllOf(SizeIs(kNumMutantsToGenerate), Each(SizeIs(Le(kMaxLen))))); +} + +TEST(FuzzTestMutator, CrossOverInsertsDataFromOtherInputs) { + const Knobs knobs; + FuzzTestMutator mutator(knobs, /*seed=*/1); + constexpr size_t kNumMutantsToGenerate = 100000; + const std::vector mutants = mutator.MutateMany( + { + {/*data=*/{0, 1, 2, 3}}, + {/*data=*/{4, 5, 6, 7}}, + }, + kNumMutantsToGenerate); + + EXPECT_THAT(mutants, IsSupersetOf(std::vector{ + // The entire other input + {4, 5, 6, 7, 0, 1, 2, 3}, + {0, 1, 4, 5, 6, 7, 2, 3}, + {0, 1, 2, 3, 4, 5, 6, 7}, + // The prefix of other input + {4, 5, 6, 0, 1, 2, 3}, + {0, 1, 4, 5, 6, 2, 3}, + {0, 1, 2, 3, 4, 5, 6}, + // The suffix of other input + {5, 6, 7, 0, 1, 2, 3}, + {0, 1, 5, 6, 7, 2, 3}, + {0, 1, 2, 3, 5, 6, 7}, + // The middle of other input + {5, 6, 0, 1, 2, 3}, + {0, 1, 5, 6, 2, 3}, + {0, 1, 2, 3, 5, 6}, + })); +} + +TEST(FuzzTestMutator, CrossOverOverwritesDataFromOtherInputs) { + const Knobs knobs; + FuzzTestMutator mutator(knobs, /*seed=*/1); + constexpr size_t kNumMutantsToGenerate = 100000; + const std::vector mutants = mutator.MutateMany( + { + {/*data=*/{0, 1, 2, 3, 4, 5, 6, 7}}, + {/*data=*/{100, 101, 102, 103}}, + }, + kNumMutantsToGenerate); + + EXPECT_THAT(mutants, IsSupersetOf(std::vector{ + // The entire other input + {100, 101, 102, 103, 4, 5, 6, 7}, + {0, 1, 100, 101, 102, 103, 6, 7}, + {0, 1, 2, 3, 100, 101, 102, 103}, + // The prefix of other input + {100, 101, 102, 3, 4, 5, 6, 7}, + {0, 1, 2, 100, 101, 102, 6, 7}, + {0, 1, 2, 3, 4, 100, 101, 102}, + // The suffix of other input + {101, 102, 103, 3, 4, 5, 6, 7}, + {0, 1, 2, 101, 102, 103, 6, 7}, + {0, 1, 2, 3, 4, 101, 102, 103}, + // The middle of other input + {101, 102, 2, 3, 4, 5, 6, 7}, + {0, 1, 2, 101, 102, 5, 6, 7}, + {0, 1, 2, 3, 4, 5, 101, 102}, + })); +} + +// Test parameter containing the mutation settings and the expectations of a +// single mutation step. +struct MutationStepTestParameter { + // The input to be mutated. + ByteArray seed_input; + // The set of mutants to be expected by mutating `seed_input`. + absl::flat_hash_set expected_mutants; + // The set of mutants not supposed to be seen by mutating `seed_input`. + absl::flat_hash_set unexpected_mutants; + // The max length of the mutants. If unset, will not set the limit. + std::optional max_len; + // The mutation dictionary. + std::vector dictionary; + // The comparison data following the format of ExecutionMetadata::cmp_data. + ByteArray cmp_data; + // The minimum number of iterations regardless of whether all mutants in + // `expected_mutants` are found or not. + size_t min_num_iterations = 1000; + // The maximum number of iterations to try before all mutants in + // `expected_mutants` are found. + size_t max_num_iterations = 100000000; +}; + +class MutationStepTest + : public testing::TestWithParam {}; + +TEST_P(MutationStepTest, GeneratesExpectedMutantsAndAvoidsUnexpectedMutants) { + const Knobs knobs; + FuzzTestMutator mutator(knobs, /*seed=*/1); + ASSERT_LE(GetParam().min_num_iterations, GetParam().max_num_iterations); + if (GetParam().max_len.has_value()) + EXPECT_TRUE(mutator.set_max_len(*GetParam().max_len)); + mutator.AddToDictionary(GetParam().dictionary); + absl::flat_hash_set unmatched_expected_mutants = + GetParam().expected_mutants; + const auto& unexpected_mutants = GetParam().unexpected_mutants; + ExecutionMetadata metadata; + metadata.cmp_data = GetParam().cmp_data; + const std::vector inputs = { + {/*data=*/GetParam().seed_input, /*metadata=*/&metadata}}; + for (size_t i = 0; i < GetParam().max_num_iterations; i++) { + const std::vector mutants = mutator.MutateMany(inputs, 1); + ASSERT_EQ(mutants.size(), 1); + const auto& mutant = mutants[0]; + EXPECT_FALSE(unexpected_mutants.contains(mutant)) + << "Unexpected mutant: {" << absl::StrJoin(mutant, ",") << "}"; + unmatched_expected_mutants.erase(mutant); + if (unmatched_expected_mutants.empty() && + i >= GetParam().min_num_iterations) + break; + } + EXPECT_TRUE(unmatched_expected_mutants.empty()); +} + +INSTANTIATE_TEST_SUITE_P(InsertByteUpToMaxLen, MutationStepTest, Values([] { + MutationStepTestParameter params; + params.seed_input = {0, 1, 2}; + params.expected_mutants = { + {0, 1, 2, 3}, + {0, 3, 1, 2}, + {3, 0, 1, 2}, + }; + params.unexpected_mutants = { + {0, 1, 2, 3, 4}, + {0, 3, 4, 1, 2}, + {3, 4, 0, 1, 2}, + }; + params.max_len = 4; + return params; + }())); + +INSTANTIATE_TEST_SUITE_P(OverwriteFromDictionary, MutationStepTest, Values([] { + MutationStepTestParameter params; + params.seed_input = {1, 2, 3, 4, 5}; + params.expected_mutants = { + {1, 2, 7, 8, 9}, {1, 7, 8, 9, 5}, + {7, 8, 9, 4, 5}, {1, 2, 3, 0, 6}, + {1, 2, 0, 6, 5}, {1, 0, 6, 4, 5}, + {0, 6, 3, 4, 5}, {42, 2, 3, 4, 5}, + {1, 42, 3, 4, 5}, {1, 2, 42, 4, 5}, + {1, 2, 3, 42, 5}, {1, 2, 3, 4, 42}, + }; + params.dictionary = { + {7, 8, 9}, + {0, 6}, + {42}, + }; + return params; + }())); + +INSTANTIATE_TEST_SUITE_P(OverwriteFromCmpDictionary, MutationStepTest, + Values([] { + MutationStepTestParameter params; + params.seed_input = {1, 2, 40, 50, 60}; + params.expected_mutants = { + {3, 4, 40, 50, 60}, + {1, 2, 10, 20, 30}, + }; + params.cmp_data = {2, // size + 1, 2, // lhs + 3, 4, // rhs + 3, // size + 10, 20, 30, // lhs + 40, 50, 60}; // rhs + return params; + }())); + +INSTANTIATE_TEST_SUITE_P(InsertFromDictionary, MutationStepTest, Values([] { + MutationStepTestParameter params; + params.seed_input = {1, 2, 3}; + params.expected_mutants = { + {1, 2, 3, 4, 5}, {1, 2, 4, 5, 3}, + {1, 4, 5, 2, 3}, {4, 5, 1, 2, 3}, + {1, 2, 3, 6, 7, 8}, {1, 2, 6, 7, 8, 3}, + {1, 6, 7, 8, 2, 3}, {6, 7, 8, 1, 2, 3}, + }; + params.dictionary = { + {4, 5}, + {6, 7, 8}, + }; + return params; + }())); + +INSTANTIATE_TEST_SUITE_P(InsertFromCmpDictionary, MutationStepTest, Values([] { + MutationStepTestParameter params; + params.seed_input = {1, 2, 3}; + params.expected_mutants = { + {1, 2, 3, 4, 5}, {1, 2, 4, 5, 3}, + {1, 4, 5, 2, 3}, {4, 5, 1, 2, 3}, + {1, 2, 3, 6, 7, 8}, {1, 2, 6, 7, 8, 3}, + {1, 6, 7, 8, 2, 3}, {6, 7, 8, 1, 2, 3}, + }; + params.cmp_data = {2, // size + 4, 5, // lhs + 4, 5, // rhs + 3, // size + 6, 7, 8, // lhs + 6, 7, 8}; // rhs + return params; + }())); + +INSTANTIATE_TEST_SUITE_P(SkipsLongCmpEntry, MutationStepTest, Values([] { + MutationStepTestParameter params; + params.seed_input = {0}; + params.expected_mutants = { + {0, 1, 2, 3, 4}, + }; + params.unexpected_mutants = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, + }; + params.cmp_data = { + 20, // size + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, // lhs + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, // rhs + 4, // size + 1, 2, 3, 4, // lhs + 1, 2, 3, 4}; // rhs + return params; + }())); + +} // namespace + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/hashed_ring_buffer.h b/src/third_party/fuzztest/dist/centipede/hashed_ring_buffer.h new file mode 100644 index 00000000000..f7bc6461978 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/hashed_ring_buffer.h @@ -0,0 +1,86 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This library defines the concepts "fuzzing feature" and "feature domain". +// It is used by Centipede, and it can be used by fuzz runners to +// define their features in a way most friendly to Centipede. +// Fuzz runners do not have to use this file nor to obey the rules defined here. +// But using this file and following its rules is the simplest way if you want +// Centipede to understand the details about the features generated by the +// runner. +// +// This library must not depend on anything other than libc so that fuzz targets +// using it doesn't gain redundant coverage. For the same reason this library +// uses raw __builtin_trap instead of CHECKs. +// We make an exception for for std::sort/std::unique, +// since is very lightweight. +// This library is also header-only, with all functions defined as inline. + +#ifndef THIRD_PARTY_CENTIPEDE_HASHED_RING_BUFFER_H_ +#define THIRD_PARTY_CENTIPEDE_HASHED_RING_BUFFER_H_ + +#include + +// WARNING!!!: Be very careful with what STL headers or other dependencies you +// add here. This header needs to remain mostly bare-bones so that we can +// include it into runner. +#include +#include + +#include "./centipede/rolling_hash.h" + +namespace fuzztest::internal { + +// Fixed-size ring buffer that maintains a 32-bit hash of its elements. +// Create objects of this type as zero-initialized globals or thread-locals. +// In a zero-initialized object all values and the hash are zero. +// `kSize` indicates the maximum possible size for the ring-buffer. +// The actual size is passed to Reset(). +template +class HashedRingBuffer { + public: + // Adds `new_item` and returns the new hash of the entire collection. + // Evicts an old item. + // Returns the new hash. + uint32_t push(size_t new_item) { + size_t new_pos = last_added_pos_ + 1; + if (new_pos >= size_) new_pos = 0; + size_t evicted_item = buffer_[new_pos]; + buffer_[new_pos] = new_item; + hash_.Update(new_item, evicted_item); + last_added_pos_ = new_pos; + return hash_.Hash(); + } + + // Returns the current hash. + uint32_t hash() const { return hash_.Hash(); } + + // Resets the current state, sets the ring buffer size to `size_` (<= kSize). + void Reset(size_t size) { + memset(this, 0, sizeof(*this)); + if (size > kSize) __builtin_trap(); // can't use CHECK in the runner. + size_ = size; + hash_.Reset(size); + } + + private: + size_t buffer_[kSize]; // All elements. + size_t last_added_pos_; // Position of the last added element. + size_t size_; // Real size of the ring buffer, <= kSize. + RollingHash hash_; +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_HASHED_RING_BUFFER_H_ diff --git a/src/third_party/fuzztest/dist/centipede/hashed_ring_buffer_test.cc b/src/third_party/fuzztest/dist/centipede/hashed_ring_buffer_test.cc new file mode 100644 index 00000000000..2904bf90e39 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/hashed_ring_buffer_test.cc @@ -0,0 +1,64 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/hashed_ring_buffer.h" + +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/container/flat_hash_set.h" + +namespace fuzztest::internal { +namespace { + +TEST(Feature, HashedRingBuffer) { + HashedRingBuffer<32> rb16; // used with ring_buffer_size == 16 + HashedRingBuffer<32> rb32; // used with ring_buffer_size == 32 + rb16.Reset(16); + rb32.Reset(32); + absl::flat_hash_set hashes16, hashes32; + size_t kNumIter = 10000000; + // push a large number of different numbers into rb, ensure that most of the + // resulting hashes are different. + for (size_t i = 0; i < kNumIter; i++) { + hashes16.insert(rb16.push(i)); + hashes32.insert(rb32.push(i)); + } + // No collisions. + EXPECT_EQ(hashes16.size(), kNumIter); + EXPECT_EQ(hashes32.size(), kNumIter); + + // Try all permutations of {0, 1, 2, ... 9}, ensure we have at least half + // this many different hashes. + std::vector numbers(10); + std::iota(numbers.begin(), numbers.end(), 0); + hashes32.clear(); + size_t num_permutations = 0; + while (std::next_permutation(numbers.begin(), numbers.end())) { + ++num_permutations; + rb32.Reset(32); + for (const auto number : numbers) { + rb32.push(number); + } + hashes32.insert(rb32.hash()); + } + EXPECT_GT(hashes32.size(), num_permutations / 2); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/install_dependencies_debian.sh b/src/third_party/fuzztest/dist/centipede/install_dependencies_debian.sh new file mode 100755 index 00000000000..10fc031174c --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/install_dependencies_debian.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# Copyright 2022 The Centipede Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Tested on Debian GNU/Linux 11 (bullseye) +# +# * git: to get the Centipede sources. +# * bazel: to build Centipede. +# * libssl-dev: to link Centipede (it uses SHA1). +# * binutils: Centipede uses objdump. +# * clang: to build Centipede and the targets. +# For most of the functionality clang 11 or newer will work. +# To get all of the functionality you may need to install fresh clang from +# source: https://llvm.org/. +# The functionality currently requiring fresh clang from source: +# * -fsanitize-coverage=trace-loads +# (https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow) + +set -eux -o pipefail +declare MAYBE_SUDO="" +if (( "$EUID" != 0 )); then + MAYBE_SUDO="sudo" +fi + +${MAYBE_SUDO} apt update + +# Add Bazel distribution URI as a package source following: +# https://docs.bazel.build/versions/main/install-ubuntu.html +${MAYBE_SUDO} apt install -y curl gnupg apt-transport-https +curl -fsSL https://bazel.build/bazel-release.pub.gpg \ + | gpg --dearmor | ${MAYBE_SUDO} tee /etc/apt/trusted.gpg.d/bazel.gpg >/dev/null +echo "deb [arch=amd64] https://storage.googleapis.com/bazel-apt stable jdk1.8" \ + | ${MAYBE_SUDO} tee /etc/apt/sources.list.d/bazel.list >/dev/null +${MAYBE_SUDO} apt update + +# Install LLVM, which provides llvm-symbolizer required for running Centipede in +# some modes. +${MAYBE_SUDO} apt install -y llvm + +# Install other dependencies. +${MAYBE_SUDO} apt install -y git bazel binutils libssl-dev + +# Get Clang-14, the earliest version that supports dataflow tracing: +# * Download Clang from Chromium to support old OS (e.g. Ubuntu 16). +# * Alternatively, download the fresh Clang from https://releases.llvm.org/ +declare -r CLANG_URL="https://commondatastorage.googleapis.com/chromium-browser-clang/Linux_x64/clang-llvmorg-14-init-9436-g65120988-1.tgz" +declare -r CLANG_DIR="/tmp/clang" +mkdir "${CLANG_DIR}" +tar zxvf <(curl "${CLANG_URL}") -C "${CLANG_DIR}" +export CLANG_BIN_DIR="${CLANG_DIR}/bin" diff --git a/src/third_party/fuzztest/dist/centipede/instrument.bzl b/src/third_party/fuzztest/dist/centipede/instrument.bzl new file mode 100644 index 00000000000..605ce905ba2 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/instrument.bzl @@ -0,0 +1,86 @@ +# Copyright 2023 The Centipede Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for applying or removing instrumentation to binary targets.""" + +def _strip_instrumentation_transition_impl(settings, _): + return { + "//command_line_option:compilation_mode": "opt", + "//command_line_option:compiler": None, + "//command_line_option:copt": [], + "//command_line_option:custom_malloc": None, + "//command_line_option:dynamic_mode": "default", + "//command_line_option:features": [ + feature + for feature in settings["//command_line_option:features"] + if feature not in ["asan", "tsan", "msan"] + ], + "//command_line_option:linkopt": [], + "//command_line_option:per_file_copt": [], + "//command_line_option:strip": "never", + } + +strip_instrumentation_transition = transition( + implementation = _strip_instrumentation_transition_impl, + inputs = [ + "//command_line_option:features", + ], + outputs = [ + "//command_line_option:compilation_mode", + "//command_line_option:compiler", + "//command_line_option:copt", + "//command_line_option:custom_malloc", + "//command_line_option:dynamic_mode", + "//command_line_option:features", + "//command_line_option:linkopt", + "//command_line_option:per_file_copt", + "//command_line_option:strip", + ], +) + +def _cc_uninstrumented_binary_impl(ctx): + output_file = ctx.actions.declare_file(ctx.label.name) + ctx.actions.symlink( + output = output_file, + target_file = ctx.executable.binary, + is_executable = True, + ) + runfiles = ctx.runfiles() + runfiles = runfiles.merge(ctx.attr.binary[0][DefaultInfo].default_runfiles) + return [ + DefaultInfo( + executable = output_file, + runfiles = runfiles, + ), + ] + +cc_uninstrumented_binary = rule( + implementation = _cc_uninstrumented_binary_impl, + doc = """ +Removes all known Centipede instrumentation that might have been applied to a +target cc_binary. +""", + attrs = { + "binary": attr.label( + doc = "A cc_binary target to apply the instrumentation to.", + executable = True, + cfg = strip_instrumentation_transition, + mandatory = True, + ), + "_allowlist_function_transition": attr.label( + default = "@bazel_tools//tools/allowlists/function_transition_allowlist", + ), + }, + executable = True, +) diff --git a/src/third_party/fuzztest/dist/centipede/int_utils.h b/src/third_party/fuzztest/dist/centipede/int_utils.h new file mode 100644 index 00000000000..e8bbcee708d --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/int_utils.h @@ -0,0 +1,33 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_INT_UTILS_H_ +#define THIRD_PARTY_CENTIPEDE_INT_UTILS_H_ + +#include + +namespace fuzztest::internal { + +// Computes a hash of `bits`. The purpose is to use the result for XOR-ing with +// some other values, such that all resulting bits look random. +inline uint64_t Hash64Bits(uint64_t bits) { + // This particular prime number seems to mix bits well. + // TODO(kcc): find a more scientific way to mix bits, e.g. switch to Murmur. + constexpr uint64_t kPrime = 13441014529ULL; + return bits * kPrime; +} + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_INT_UTILS_H_ diff --git a/src/third_party/fuzztest/dist/centipede/int_utils_test.cc b/src/third_party/fuzztest/dist/centipede/int_utils_test.cc new file mode 100644 index 00000000000..76a8980c34c --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/int_utils_test.cc @@ -0,0 +1,64 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/int_utils.h" + +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/container/flat_hash_set.h" + +namespace fuzztest::internal { +namespace { + +TEST(IntUtilsTest, Hash64Bits) { + // Run a large sample of small integers and verify that lower X bits + // of Hash64Bits(), for X in 64, 48, 32, and 20, are unique. + absl::flat_hash_set set64; + absl::flat_hash_set set48; + absl::flat_hash_set set32; + absl::flat_hash_set set20; + size_t num_values = 0; + constexpr uint64_t kMaxIntToCheck = 1ULL << 28; + constexpr uint64_t kMask48 = (1ULL << 48) - 1; + constexpr uint64_t kMask32 = (1ULL << 32) - 1; + constexpr uint64_t kMask20 = (1ULL << 20) - 1; + for (uint64_t i = 0; i < kMaxIntToCheck; i += 101, ++num_values) { + set64.insert(Hash64Bits(i)); + set48.insert(Hash64Bits(i) & kMask48); + set32.insert(Hash64Bits(i) & kMask32); + set20.insert(Hash64Bits(i) & kMask20); + } + EXPECT_EQ(set64.size(), num_values); + EXPECT_EQ(set48.size(), num_values); + EXPECT_EQ(set32.size(), num_values); + EXPECT_EQ(set20.size(), 1 << 20); // all possible 20-bit numbers. + + // For a large number of pairs of small integers {i, j} verify that + // values of Hash64Bits(i) ^ (j) are unique. + set64.clear(); + num_values = 0; + for (uint64_t i = 0; i < kMaxIntToCheck; i += 100000) { + for (uint64_t j = 1; j < kMaxIntToCheck; j += 100000) { + set64.insert(Hash64Bits(i) ^ (j)); + ++num_values; + } + } + EXPECT_EQ(set64.size(), num_values); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/knobs.cc b/src/third_party/fuzztest/dist/centipede/knobs.cc new file mode 100644 index 00000000000..e2025b93226 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/knobs.cc @@ -0,0 +1,33 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/knobs.h" + +#include +#include + +namespace fuzztest::internal { +size_t Knobs::next_id_ = 0; +std::string_view Knobs::knob_names_[kNumKnobs]; + +KnobId Knobs::NewId(std::string_view knob_name) { + if (next_id_ >= kNumKnobs) { + // If we've run out of IDs, log using stderr (don't use extra deps). + fprintf(stderr, "Knobs::NewId: no more IDs left, aborting\n"); + __builtin_trap(); + } + knob_names_[next_id_] = knob_name; + return next_id_++; +} +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/knobs.h b/src/third_party/fuzztest/dist/centipede/knobs.h new file mode 100644 index 00000000000..177cd578a34 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/knobs.h @@ -0,0 +1,203 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_KNOBS_H_ +#define THIRD_PARTY_CENTIPEDE_KNOBS_H_ + +#include +#include +#include +#include +#include + +#include "absl/types/span.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +// Opaque ID object to be used by Knobs. +// Supported usage: +// * Create a new KnobId global object via Knobs::New(). +// * Compare two KnobIds for equality. +// * Pass to Knobs' member functions. +class KnobId { + public: + bool operator==(const KnobId& other) const { return id_ == other.id_; } + + private: + friend class Knobs; + FRIEND_TEST(Knobs, Choose); + KnobId(size_t id) : id_(id) {} + KnobId() = default; + size_t id() const { return id_; } + + size_t id_ = {}; +}; + +// Knobs (will) control all randomized choices made by the fuzzing engine. +// +// The intent is to find optimal values for knobs using machine learning. +// +// Examples of the choices that the engine can make using knobs: +// * Choosing whether to add a given element to the corpus based on what +// features it has, its size, its resource consumption, etc. +// * Choosing a corpus element to mutate, or an element pair to cross-over. +// E.g. make the choice depending on the features associated with elements, +// their sizes, etc. +// * Choosing how to mutate. +// E.g. whether to insert, overwrite, swap, etc., or whether to cross-over. +// +// `Knobs` is effectively a fixed-size array of bytes with named elements. +// The engine loads this array at startup or uses a default value zero. +// The engine may also pass Knobs to a custom mutator that supports it. +// +// Each knob has its own interpretation. +// Some knobs are probability weights, with `0` meaning "never" or "rare" +// and 255 meaning "frequently". +// Some knobs have a meaning in combination with other knobs, e.g. +// when choosing one of N strategies, N knobs will be used as weights. +// Some knobs may mean the number of repetitions of a certain process. +// +// A knob value is accessed via a KnobId. +// KnobIds are created by Knobs::New() as file-scope globals. +// The allocation of KnobIds is stable between the executions of the engine, +// but will change when the engine changes in some significant way +// (e.g. new knobs are added/removed or linking order changes). +// I.e. the optimal knob values will need to be re-learned after major changes +// in the engine. +// This way knobs can be created locally in every source file, w/o having a +// centralized knob repository. +// +// A KnobId can be used to access a knob value: Knobs::Value(). +// A set of KnobIds can be used to choose from several choices: Knobs::Choose(). +// One KnobID can be used to choose from two choices: Knobs::GenerateBool(). +// +// TODO(kcc): figure out how to share knobs with other processes/binaries, +// such as custom mutators. +class Knobs { + public: + // Total number of knobs. Keep it small-ish for now. + static constexpr size_t kNumKnobs = 32; + using value_type = uint8_t; + using signed_value_type = int8_t; + + // Creates and returns a new KnobId and associates a `knob_name` with it. + // Must be called at the process startup (assign the result to a global): + // static const KnobId knob_weight_of_foo = Knobs::NewId("weight_of_foo"); + // Will trap if runs out of IDs. + static KnobId NewId(std::string_view knob_name); + + // Returns the name associated with `knob_id`. + static std::string_view Name(KnobId knob_id) { + return knob_names_[knob_id.id()]; + } + + // Sets all knobs to the same value `value`. + void Set(value_type value) { + for (auto& knob : knobs_) { + knob = value; + } + } + + // Sets the knobs to values from `values`. If `values.size() < kNumKnobs`, + // only the first `values.size()` values will be set. + void Set(absl::Span values) { + size_t n = std::min(kNumKnobs, values.size()); + for (size_t i = 0; i < n; ++i) { + knobs_[i] = values[i]; + } + } + + // Returns the value associated with `knob_id`. + value_type Value(KnobId knob_id) const { + if (knob_id.id() >= kNumKnobs) __builtin_trap(); + return knobs_[knob_id.id()]; + } + + // Returns the signed value associated with `knob_id`. + signed_value_type SignedValue(KnobId knob_id) const { return Value(knob_id); } + + // Calls `callback(Name, Value)` for every KnobId created by NewId(). + void ForEachKnob( + const std::function& callback) + const { + for (size_t i = 0; i < next_id_; ++i) { + callback(Name(i), Value(i)); + } + } + + // Returns one of the `choices`. + // `knob_ids` and `choices` must have the same size and be non-empty. + // Uses knob values associated with knob_ids as probability weights for + // respective choices. + // E.g. if knobs.Value(knobA) == 100 and knobs.Value(knobB) == 10, then + // Choose<...>({knobA, knobB}, {A, B}, rng()) is approximately 10x more likely + // to return A than B. + // + // If all knob values are zero, behaves as if they were all 1. + // + // `random` is a random number derived from an RNG. + // TODO(kcc): consider making this more similar to GenerateBool() and + // requiring 1 knob fewer than choices.size(). + template + T Choose(absl::Span knob_ids, absl::Span choices, + uint64_t random) const { + if (choices.empty()) __builtin_trap(); + if (knob_ids.size() != choices.size()) __builtin_trap(); + size_t sum = 0; + for (auto knob_id : knob_ids) { + sum += Value(knob_id); + } + if (sum == 0) return choices[random % choices.size()]; + random %= sum; + size_t partial_sum = 0; + size_t idx = 0; + for (auto knob_id : knob_ids) { + partial_sum += Value(knob_id); + if (partial_sum > random) return choices[idx]; + ++idx; + } + __builtin_unreachable(); + } + + // Chooses between two strategies, i.e. returns true or false. + // Treats the value of the knob associated with `knob_id` as signed integer. + // If knob == -128, returns false. If knob == 127 returns true. + // For other values, returns randomly true of false, with higher probability + // of true for higher values of knob. + // If knob == 0, returns true with a ~ 50% chance. + // `random` is a random number used to produce random choice. + bool GenerateBool(KnobId knob_id, uint64_t random) const { + signed_value_type signed_value = SignedValue(knob_id); // in [-128,127] + signed_value_type rand = random % 255 - 127; // in [-127,127] + // signed_value == 127 => always true. + // signed_value == -128 => always false. + // signed_value == 0 => true ~ half the time. + return signed_value >= rand; + } + + // Variant of Choose() where the choices are KnobIds themselves. + // Returns one of the `choices` based on the respective knobs. + KnobId Choose(absl::Span choices, uint64_t random) const { + return Choose(choices, choices, random); + } + + private: + static size_t next_id_; + static std::string_view knob_names_[kNumKnobs]; + value_type knobs_[kNumKnobs] = {}; +}; +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_KNOBS_H_ diff --git a/src/third_party/fuzztest/dist/centipede/knobs_test.cc b/src/third_party/fuzztest/dist/centipede/knobs_test.cc new file mode 100644 index 00000000000..ec37a51a90a --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/knobs_test.cc @@ -0,0 +1,120 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/knobs.h" + +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/container/flat_hash_map.h" +#include "absl/strings/str_cat.h" +#include "absl/types/span.h" + +namespace fuzztest::internal { + +static const KnobId knob0 = Knobs::NewId("kn0"); +static const KnobId knob1 = Knobs::NewId("kn1"); +static const KnobId knob2 = Knobs::NewId("kn2"); +static const KnobId knob3 = Knobs::NewId("kn3"); + +TEST(Knobs, Name) { + EXPECT_EQ(Knobs::Name(knob0), "kn0"); + EXPECT_EQ(Knobs::Name(knob1), "kn1"); + EXPECT_EQ(Knobs::Name(knob2), "kn2"); + EXPECT_EQ(Knobs::Name(knob3), "kn3"); +} + +// Depends on FRIEND_TEST in KnobId - don't rename. +TEST(Knobs, Choose) { + Knobs knobs; + + EXPECT_EQ(knob0.id(), 0); + EXPECT_EQ(knob1.id(), 1); + EXPECT_EQ(knobs.Choose({knob3, knob2, knob1}, 0), knob3); + EXPECT_EQ(knobs.Choose({knob3, knob2, knob1}, 1), knob2); + EXPECT_EQ(knobs.Choose({knob3, knob2, knob1}, 2), knob1); + + constexpr size_t kNumIter = 1000000; + knobs.Set(16); + absl::flat_hash_map id_to_freq; + for (size_t iter = 0; iter < kNumIter; ++iter) { + ++id_to_freq[knobs.Choose({knob3, knob2, knob1}, iter).id()]; + } + EXPECT_EQ(id_to_freq[knob0.id()], 0); + EXPECT_GE(id_to_freq[knob1.id()], kNumIter / 4); + EXPECT_GE(id_to_freq[knob2.id()], kNumIter / 4); + EXPECT_GE(id_to_freq[knob3.id()], kNumIter / 4); + + knobs.Set({100, 0, 10, 1}); + id_to_freq.clear(); + for (size_t iter = 0; iter < kNumIter; ++iter) { + ++id_to_freq[knobs.Choose({knob0, knob1, knob2, knob3}, iter).id()]; + } + EXPECT_EQ(id_to_freq[knob1.id()], 0); + EXPECT_GT(id_to_freq[knob0.id()], 9 * id_to_freq[knob2.id()]); + EXPECT_GT(id_to_freq[knob2.id()], 9 * id_to_freq[knob3.id()]); + EXPECT_GT(id_to_freq[knob3.id()], kNumIter / 200); + + absl::flat_hash_map str_to_freq; + for (size_t iter = 0; iter < kNumIter; ++iter) { + ++str_to_freq[knobs.Choose({knob0, knob2}, {"AAA", "BBB"}, + iter)]; + } + EXPECT_GT(str_to_freq["AAA"], 9 * str_to_freq["BBB"]); + EXPECT_GT(str_to_freq["BBB"], kNumIter / 200); +} + +TEST(Knobs, GenerateBool) { + Knobs knobs; + constexpr size_t kNumIter = 255; + // Checks the GenerateBool on kNumIter different (fake) random values, + // verifies the expected number of "true" results. + auto check = [&](Knobs::value_type knob_value, + size_t expected_num_true_results) { + knobs.Set(knob_value); + size_t num_true = 0; + for (size_t fake_random = 0; fake_random < kNumIter; ++fake_random) { + if (knobs.GenerateBool(knob0, fake_random)) ++num_true; + } + EXPECT_EQ(num_true, expected_num_true_results); + }; + + check(0, kNumIter / 2 + 1); // true half the time + check(-128, 0); // Never true + check(127, kNumIter); // Always true. + for (int8_t i = -127; i < 127; i++) { + // The greater the knob value, the more frequently we see true. + check(i, 128 + i); + } +} + +TEST(KnobsDeathTest, NewId) { + auto allocate_too_many_knob_ids = []() { + for (size_t i = 0; i < Knobs::kNumKnobs; ++i) { + Knobs::NewId(absl::StrCat("kn", i)); + } + }; + EXPECT_DEATH(allocate_too_many_knob_ids(), + "Knobs::NewId: no more IDs left, aborting"); +} + +TEST(KnobsDeathTest, Choose) { + Knobs knobs; + EXPECT_DEATH(knobs.Choose({}, {}, 0), ""); + EXPECT_DEATH(knobs.Choose({knob1, knob2}, {1}, 0), ""); +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/minimize_crash.cc b/src/third_party/fuzztest/dist/centipede/minimize_crash.cc new file mode 100644 index 00000000000..a9c081f4c5b --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/minimize_crash.cc @@ -0,0 +1,169 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/minimize_crash.h" + +#include +#include +#include +#include // NOLINT +#include +#include +#include + +#include "absl/base/thread_annotations.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/synchronization/mutex.h" +#include "./centipede/centipede_callbacks.h" +#include "./centipede/environment.h" +#include "./centipede/mutation_input.h" +#include "./centipede/runner_result.h" +#include "./centipede/stop.h" +#include "./centipede/thread_pool.h" +#include "./centipede/util.h" +#include "./centipede/workdir.h" +#include "./common/defs.h" +#include "./common/hash.h" +#include "./common/logging.h" // IWYU pragma: keep + +namespace fuzztest::internal { + +// Work queue for the minimizer. +// Thread-safe. +struct MinimizerWorkQueue { + public: + // Creates the queue. + // `crash_dir_path` is the directory path where new crashers are written. + // `crasher` is the initial crashy input. + MinimizerWorkQueue(const std::string_view crash_dir_path, + const ByteArray crasher) + : crash_dir_path_(crash_dir_path), crashers_{ByteArray(crasher)} { + std::filesystem::create_directory(crash_dir_path_); + } + + // Returns up to `max_num_crashers` most recently added crashers. + std::vector GetRecentCrashers(size_t max_num_crashers) { + absl::MutexLock lock(&mutex_); + size_t num_crashers_to_return = + std::min(crashers_.size(), max_num_crashers); + return {crashers_.end() - num_crashers_to_return, crashers_.end()}; + } + + // Adds `crasher` to the queue, writes it to `crash_dir_path_/Hash(crasher)`. + // The crasher must be smaller than the original one. + void AddCrasher(ByteArray crasher) { + absl::MutexLock lock(&mutex_); + CHECK_LT(crasher.size(), crashers_.front().size()); + crashers_.emplace_back(crasher); + // Write the crasher to disk. + auto hash = Hash(crasher); + auto dir = crash_dir_path_; + std::string file_path = dir.append(hash); + WriteToLocalFile(file_path, crasher); + } + + // Returns true if new smaller crashes were found. + bool SmallerCrashesFound() const { + absl::MutexLock lock(&mutex_); + return crashers_.size() > 1; + } + + private: + mutable absl::Mutex mutex_; + const std::filesystem::path crash_dir_path_; + std::vector crashers_ ABSL_GUARDED_BY(mutex_); +}; + +// Performs a minimization loop in one thread. +static void MinimizeCrash(const Environment &env, + CentipedeCallbacksFactory &callbacks_factory, + MinimizerWorkQueue &queue) { + ScopedCentipedeCallbacks scoped_callback(callbacks_factory, env); + auto callbacks = scoped_callback.callbacks(); + BatchResult batch_result; + + size_t num_batches = env.num_runs / env.batch_size; + for (size_t i = 0; i < num_batches; ++i) { + LOG_EVERY_POW_2(INFO) << "[" << i << "] Minimizing... Interrupt to stop"; + if (ShouldStop()) break; + // Get up to kMaxNumCrashersToGet most recent crashers. We don't want just + // the most recent crasher to avoid being stuck in local minimum. + constexpr size_t kMaxNumCrashersToGet = 20; + const auto recent_crashers = queue.GetRecentCrashers(kMaxNumCrashersToGet); + CHECK(!recent_crashers.empty()); + // Compute the minimal known crasher size. + size_t min_known_size = recent_crashers.front().size(); + for (const auto &crasher : recent_crashers) { + min_known_size = std::min(min_known_size, crasher.size()); + } + + // Create several mutants that are smaller than the current smallest one. + // + // Currently, we do this by calling the vanilla mutator and + // discarding all inputs that are too large. + // TODO(kcc): modify the Mutate() interface such that max_len can be passed. + // + const std::vector mutants = callbacks->Mutate( + GetMutationInputRefsFromDataInputs(recent_crashers), env.batch_size); + std::vector smaller_mutants; + for (const auto &m : mutants) { + if (m.size() < min_known_size) smaller_mutants.push_back(m); + } + + // Execute all mutants. If a new crasher is found, add it to `queue`. + if (!callbacks->Execute(env.binary, smaller_mutants, batch_result)) { + size_t crash_inputs_idx = batch_result.num_outputs_read(); + CHECK_LT(crash_inputs_idx, smaller_mutants.size()); + const auto &new_crasher = smaller_mutants[crash_inputs_idx]; + LOG(INFO) << "Crasher: size: " << new_crasher.size() << ": " + << AsPrintableString(new_crasher, /*max_len=*/40); + queue.AddCrasher(new_crasher); + } + } +} + +int MinimizeCrash(ByteSpan crashy_input, const Environment &env, + CentipedeCallbacksFactory &callbacks_factory) { + ScopedCentipedeCallbacks scoped_callback(callbacks_factory, env); + auto callbacks = scoped_callback.callbacks(); + + LOG(INFO) << "MinimizeCrash: trying the original crashy input"; + + BatchResult batch_result; + ByteArray original_crashy_input(crashy_input.begin(), crashy_input.end()); + if (callbacks->Execute(env.binary, {original_crashy_input}, batch_result)) { + LOG(INFO) << "The original crashy input did not crash; exiting"; + return EXIT_FAILURE; + } + + LOG(INFO) << "Starting the crash minimization loop in " << env.num_threads + << "threads"; + + MinimizerWorkQueue queue(WorkDir{env}.CrashReproducerDirPaths().MyShard(), + original_crashy_input); + + { + ThreadPool threads{static_cast(env.num_threads)}; + for (size_t i = 0; i < env.num_threads; ++i) { + threads.Schedule([&env, &callbacks_factory, &queue]() { + MinimizeCrash(env, callbacks_factory, queue); + }); + } + } // The threads join here. + + return queue.SmallerCrashesFound() ? EXIT_SUCCESS : EXIT_FAILURE; +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/minimize_crash.h b/src/third_party/fuzztest/dist/centipede/minimize_crash.h new file mode 100644 index 00000000000..5677565d1be --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/minimize_crash.h @@ -0,0 +1,36 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_MINIMIZE_CRASH_H_ +#define THIRD_PARTY_CENTIPEDE_MINIMIZE_CRASH_H_ + +#include "./centipede/centipede_callbacks.h" +#include "./centipede/environment.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +// Tries to minimize `crashy_input`. +// Uses `callbacks_factory` to create `env.num_threads` workers. +// Returns EXIT_SUCCESS if at least one smaller crasher was found, +// EXIT_FAILURE otherwise. +// Also returns EXIT_FAILURE if the original input didn't crash. +// Stores the newly found crashy inputs in +// `WorkDir{env}.CrashReproducerDirPath()`. +int MinimizeCrash(ByteSpan crashy_input, const Environment &env, + CentipedeCallbacksFactory &callbacks_factory); + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_MINIMIZE_CRASH_H_ diff --git a/src/third_party/fuzztest/dist/centipede/minimize_crash_test.cc b/src/third_party/fuzztest/dist/centipede/minimize_crash_test.cc new file mode 100644 index 00000000000..30a145a9e9b --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/minimize_crash_test.cc @@ -0,0 +1,113 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/minimize_crash.h" + +#include +#include // NOLINT +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/nullability.h" +#include "./centipede/centipede_callbacks.h" +#include "./centipede/environment.h" +#include "./centipede/runner_result.h" +#include "./centipede/util.h" +#include "./centipede/workdir.h" +#include "./common/defs.h" +#include "./common/test_util.h" + +namespace fuzztest::internal { +namespace { + +// A mock for CentipedeCallbacks. +class MinimizerMock : public CentipedeCallbacks { + public: + MinimizerMock(const Environment &env) : CentipedeCallbacks(env) {} + + // Runs FuzzMe() on every input, imitates failure if FuzzMe() returns true. + bool Execute(std::string_view binary, const std::vector &inputs, + BatchResult &batch_result) override { + batch_result.ClearAndResize(inputs.size()); + for (auto &input : inputs) { + if (FuzzMe(input)) { + batch_result.exit_code() = EXIT_FAILURE; + return false; + } + ++batch_result.num_outputs_read(); + } + return true; + } + + private: + // Returns true on inputs that look like 'f???u???z', false otherwise. + // The minimal input on which this function returns true is 'fuz'. + bool FuzzMe(ByteSpan data) { + if (data.empty()) return false; + if (data.front() == 'f' && data[data.size() / 2] == 'u' && + data.back() == 'z') { + return true; + } + return false; + } +}; + +// Factory that creates/destroys MinimizerMock. +class MinimizerMockFactory : public CentipedeCallbacksFactory { + public: + CentipedeCallbacks *absl_nonnull create(const Environment &env) override { + return new MinimizerMock(env); + } + void destroy(CentipedeCallbacks *cb) override { delete cb; } +}; + +TEST(MinimizeTest, MinimizeTest) { + TempDir tmp_dir{test_info_->name()}; + Environment env; + env.workdir = tmp_dir.path(); + env.num_runs = 100000; + const WorkDir wd{env}; + MinimizerMockFactory factory; + + // Test with a non-crashy input. + EXPECT_EQ(MinimizeCrash({1, 2, 3}, env, factory), EXIT_FAILURE); + + ByteArray expected_minimized = {'f', 'u', 'z'}; + + // Test with a crashy input that can't be minimized further. + EXPECT_EQ(MinimizeCrash(expected_minimized, env, factory), EXIT_FAILURE); + + // Test the actual minimization. + ByteArray original_crasher = {'f', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', 'u', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', 'z'}; + EXPECT_EQ(MinimizeCrash(original_crasher, env, factory), EXIT_SUCCESS); + // Collect the new crashers from the crasher dir. + std::vector crashers; + for (auto const &dir_entry : std::filesystem::directory_iterator{ + wd.CrashReproducerDirPaths().MyShard()}) { + ByteArray crasher; + const std::string &path = dir_entry.path(); + ReadFromLocalFile(path, crasher); + EXPECT_LT(crasher.size(), original_crasher.size()); + crashers.push_back(crasher); + } + EXPECT_THAT(crashers, testing::Contains(expected_minimized)); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/mutation_input.h b/src/third_party/fuzztest/dist/centipede/mutation_input.h new file mode 100644 index 00000000000..504c75327f9 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/mutation_input.h @@ -0,0 +1,53 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Data types used for mutation inputs. +// +// This library is for both engine and runner. + +#ifndef THIRD_PARTY_CENTIPEDE_MUTATION_INPUT_H_ +#define THIRD_PARTY_CENTIPEDE_MUTATION_INPUT_H_ + +#include + +#include "./centipede/execution_metadata.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +// {data (required), metadata (optional)} reference pairs as mutation inputs. +struct MutationInputRef { + const ByteArray &data; + const ExecutionMetadata *metadata = nullptr; +}; + +inline std::vector CopyDataFromMutationInputRefs( + const std::vector &inputs) { + std::vector results; + results.reserve(inputs.size()); + for (const auto &input : inputs) results.push_back(input.data); + return results; +} + +inline std::vector GetMutationInputRefsFromDataInputs( + const std::vector &inputs) { + std::vector results; + results.reserve(inputs.size()); + for (const auto &input : inputs) results.push_back({/*data=*/input}); + return results; +} + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_MUTATION_INPUT_H_ diff --git a/src/third_party/fuzztest/dist/centipede/mutation_input_test.cc b/src/third_party/fuzztest/dist/centipede/mutation_input_test.cc new file mode 100644 index 00000000000..f1bc7414c48 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/mutation_input_test.cc @@ -0,0 +1,37 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/mutation_input.h" + +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "./common/defs.h" + +namespace fuzztest::internal { +namespace { + +TEST(MutationInputTest, ConvertsDataToMutationInputRefsAndBack) { + EXPECT_THAT( + CopyDataFromMutationInputRefs(GetMutationInputRefsFromDataInputs({})), + testing::IsEmpty()); + std::vector data_inputs = {{0}, {1}}; + EXPECT_EQ(CopyDataFromMutationInputRefs( + GetMutationInputRefsFromDataInputs(data_inputs)), + data_inputs); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/pc_info.cc b/src/third_party/fuzztest/dist/centipede/pc_info.cc new file mode 100644 index 00000000000..9d99cb528c8 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/pc_info.cc @@ -0,0 +1,53 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/pc_info.h" + +#include +#include +#include +#include +#include + +#include "absl/log/check.h" +#include "absl/types/span.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +bool PCInfo::operator==(const PCInfo &rhs) const { + return this->pc == rhs.pc && this->flags == rhs.flags; +} + +PCTable ReadPcTable(std::istream &in) { + std::string input_string(std::istreambuf_iterator(in), {}); + + ByteArray pc_infos_as_bytes(input_string.begin(), input_string.end()); + CHECK_EQ(pc_infos_as_bytes.size() % sizeof(PCInfo), 0); + size_t pc_table_size = pc_infos_as_bytes.size() / sizeof(PCInfo); + const auto *pc_infos = reinterpret_cast(pc_infos_as_bytes.data()); + PCTable pc_table{pc_infos, pc_infos + pc_table_size}; + CHECK_EQ(pc_table.size(), pc_table_size); + + return pc_table; +} + +void WritePcTable(const PCTable &pc_table, std::ostream &out) { + auto pc_infos_as_bytes = + absl::Span(reinterpret_cast(pc_table.data()), + sizeof(PCInfo) * pc_table.size()); + out.write(pc_infos_as_bytes.data(), pc_infos_as_bytes.size()); +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/pc_info.h b/src/third_party/fuzztest/dist/centipede/pc_info.h new file mode 100644 index 00000000000..d41a5523ab7 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/pc_info.h @@ -0,0 +1,96 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_PC_INFO_H_ +#define THIRD_PARTY_CENTIPEDE_PC_INFO_H_ + +#include +#include +#include +#include +#include +#include + +namespace fuzztest::internal { + +// PCInfo is a pair {PC, bit mask with PC flags}. +// PCInfo objects are generated by the compiler and can be extracted from the +// binary, see https://clang.llvm.org/docs/SanitizerCoverage.html#pc-table. +// PCInfo objects can also be created by analyzing the binary with objdump. +struct PCInfo { + enum PCFlags : uintptr_t { + kFuncEntry = 1 << 0, // The PC is the function entry block. + }; + + uintptr_t pc{}; + uintptr_t flags{}; + + bool has_flag(PCFlags f) const { return flags & f; } + + bool operator==(const PCInfo &rhs) const; +}; + +// Array of PCInfo-s. +// PCTable is created by the compiler/linker in the instrumented binary. +// The order of elements is significant: each element corresponds +// to the coverage counter with the same index. +// Every PCInfo that is kFuncEntry is followed by PCInfo-s from the same +// function. +using PCTable = std::vector; + +// Reads a PCTable from `in`, returns it. Returns empty table on error. +PCTable ReadPcTable(std::istream &in); + +// Writes the contents of `pc_table` in the format expected by `ReadPCTable`. +void WritePcTable(const PCTable &pc_table, std::ostream &out); + +// PCGuard is used during run-time as a compressed reference to PCInfo. +// The SanitizerCoverage's 'Tracing PCs with guards' +// (https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs-with-guards) +// passes a pointer to a 32-bit object, unique for every instrumented PC. +// At the DSO initialization time, we initialize these unique 32-bit objects +// to represent the information about the PCs. +struct PCGuard { + // True if this is a function's entry PC. + // This information may not be available, and so the code should tolerate + // the situation where no PC is marked as a function entry. + uint32_t is_function_entry : 1; + // The index of the PC. + uint32_t pc_index : 31; + + // pc_index is 31-bit, so we can't have more than this number of PCs. + static constexpr size_t kMaxNumPCs = 1ULL << 31; + + // Invalid value of pc_index. + static constexpr uint32_t kInvalidPcIndex = kMaxNumPCs - 1; + + // Returns true if `*this` is valid. + bool IsValid() const { return pc_index != kInvalidPcIndex; } +}; + +// DsoInfo represents a single SanCov-instrumented DSO (library or main binary). +struct DsoInfo { + // Path to the file on disk, which can be used for symbolization. + std::string path; + + // Number of SanCov-instrumented PCs in this DSO. + size_t num_instrumented_pcs = 0; +}; + +// Array of DsoInfo. +using DsoTable = std::vector; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_PC_INFO_H_ diff --git a/src/third_party/fuzztest/dist/centipede/pc_info_test.cc b/src/third_party/fuzztest/dist/centipede/pc_info_test.cc new file mode 100644 index 00000000000..c0d92d1db41 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/pc_info_test.cc @@ -0,0 +1,36 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/pc_info.h" + +#include + +#include "gtest/gtest.h" + +namespace fuzztest::internal { +namespace { + +TEST(PCTableTest, SerializesAndDeserializesPCInfoSuccessfully) { + PCTable input = {{/*pc=*/0, /*flags=*/1}, {/*pc=*/2, /*flags=*/3}}; + + std::stringstream stream; + WritePcTable(input, stream); + + PCTable output = ReadPcTable(stream); + + EXPECT_EQ(input, output); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/periodic_action.cc b/src/third_party/fuzztest/dist/centipede/periodic_action.cc new file mode 100644 index 00000000000..98354965029 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/periodic_action.cc @@ -0,0 +1,127 @@ +// Copyright 2024 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/periodic_action.h" + +#include +#include +#include +#include + +#include "absl/base/thread_annotations.h" +#include "absl/functional/any_invocable.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/time.h" + +namespace fuzztest::internal { + +class PeriodicAction::Impl { + public: + Impl(absl::AnyInvocable action, PeriodicAction::Options options) + : action_{std::move(action)}, + options_{std::move(options)}, + thread_{[this]() { RunLoop(); }} {} + + void Stop() { + StopAsync(); + // The run-loop should exit the next time it checks `stop_`. Note that if + // the loop is currently in the middle of an invocation of `action_`, it + // will wait for the invocation to finish, so we might block here for an + // `action_`-dependent amount of time. + if (thread_.joinable()) { + thread_.join(); + } + } + + void StopAsync() { + absl::MutexLock lock{&mu_}; + stop_ = true; + } + + void Nudge() { + absl::MutexLock lock{&mu_}; + nudge_ = true; + } + + private: + void RunLoop() { + uint64_t iteration = 0; + while (true) { + SleepOrWakeEarly(options_.sleep_before_each(iteration)); + const bool schedule = !nudge_ && !stop_; + const bool nudge = nudge_; + const bool stop = stop_; + mu_.Unlock(); + // NOTE: The caller might call `Stop()` immediately after one final + // `Nudge()`: in that case we still should run the action, and only then + // terminate the loop. This is in contrast to waking after sleeping the + // full duration while the caller calls `Stop()` during that time: in that + // case, we should NOT run the action and terminate the loop immediately. + if (schedule || nudge) { + action_(); + } + if (stop) { + return; + } + ++iteration; + } + } + + void SleepOrWakeEarly(absl::Duration duration) + ABSL_EXCLUSIVE_LOCK_FUNCTION(mu_) { + mu_.Lock(); + // NOTE: Reset only `nudge_`, but not `stop_`: nudging is transient and + // can be activated repeatedly, the latter is persistent and can be + // activated only once (repeated calls to `Stop()` are no-ops). + nudge_ = false; + mu_.Unlock(); + const auto wake_early = [this]() { + mu_.AssertReaderHeld(); + return nudge_ || stop_; + }; + mu_.LockWhenWithTimeout(absl::Condition{&wake_early}, duration); + mu_.AssertHeld(); + } + + absl::AnyInvocable action_; + PeriodicAction::Options options_; + + // WARNING!!! The order below is important. + absl::Mutex mu_; + bool nudge_ ABSL_GUARDED_BY(mu_) = false; + bool stop_ ABSL_GUARDED_BY(mu_) = false; + std::thread thread_; +}; + +PeriodicAction::PeriodicAction( // + absl::AnyInvocable action, Options options) + : pimpl_{std::make_unique(std::move(action), std::move(options))} {} + +PeriodicAction::~PeriodicAction() { + // NOTE: `pimpl_` will be null if this object has been moved to another one. + if (pimpl_ != nullptr) pimpl_->Stop(); +} + +void PeriodicAction::Stop() { pimpl_->Stop(); } + +void PeriodicAction::StopAsync() { pimpl_->StopAsync(); } + +void PeriodicAction::Nudge() { pimpl_->Nudge(); } + +// NOTE: Even though these are defaulted, they still must be defined here in the +// .cc, because `Impl` is an incomplete type in the .h. +PeriodicAction::PeriodicAction(PeriodicAction&&) = default; +PeriodicAction& PeriodicAction::operator=(PeriodicAction&&) = default; + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/periodic_action.h b/src/third_party/fuzztest/dist/centipede/periodic_action.h new file mode 100644 index 00000000000..302d9a07a5a --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/periodic_action.h @@ -0,0 +1,116 @@ +// Copyright 2024 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// `PeriodicAction` runs a separate thread that invokes a user-provided callback +// at the specified interval. The user can request an out-of-schedule invocation +// of the callback by "nudging" the action object. +// +// Example: +// MyStats stats = ...; +// PeriodicAction stats_logger{ +// [&stats]() { LOG(INFO) << "Current stats are: " << stats; }, +// {.delay = absl::Minutes(5), .interval = absl::Minutes(1)} +// }; +// while (true) { +// Foo(); +// Bar(); +// if (HaveUpdate()) { +// stats_logger.Nudge(); +// } +// } + +#ifndef FUZZTEST_CENTIPEDE_PERIODIC_ACTION_H_ +#define FUZZTEST_CENTIPEDE_PERIODIC_ACTION_H_ + +#include +#include + +#include "absl/functional/any_invocable.h" +#include "absl/time/time.h" + +namespace fuzztest::internal { + +class PeriodicAction { + public: + struct Options { + // The interval to sleep for before a given iteration. Iteration numbers are + // 0-based. + // + // Thus, the interval before `iteration == 0` is the delay before the first + // invocation of the action, the interval before `iteration == 1` is the + // interval between the first and the second invocation, etc. + // + // This is a functor and not a fixed value to enable dynamic intervals (the + // caller can use static functor state for that). Note that + // `PeriodicAction::Nudge()` calls trigger out-of-schedule invocations and + // count as iterations (therefore incrementing the internal iteration + // counter and resetting the timer). + // + // If `sleep_before_each()` ever returns an `absl::InfiniteDuration()`, then + // periodic action execution will be paused and resumed only by the next + // `Nudge()` call. + absl::AnyInvocable sleep_before_each; + }; + + // Convenience factory methods for common options. + static Options ConstDelayConstInterval( // + absl::Duration delay, absl::Duration interval) { + return { + [delay, interval](uint64_t i) { return i == 0 ? delay : interval; }, + }; + } + static Options ZeroDelayZeroInterval() { + return ConstDelayConstInterval(absl::ZeroDuration(), absl::ZeroDuration()); + } + static Options ZeroDelayConstInterval(absl::Duration interval) { + return ConstDelayConstInterval(absl::ZeroDuration(), interval); + } + static Options ConstDelayZeroInterval(absl::Duration delay) { + return ConstDelayConstInterval(delay, absl::ZeroDuration()); + } + + PeriodicAction(absl::AnyInvocable action, Options options); + + // Movable, but not copyable. + PeriodicAction(PeriodicAction&&); + PeriodicAction& operator=(PeriodicAction&&); + + // Stops the periodic action via RAII. May block: waits for any currently + // active invocation of the action to finish first before returning. + ~PeriodicAction(); + + // Stops the periodic action explicitly. May block: waits for any currently + // active invocation of the action to finish first before returning. + void Stop(); + // The same as `Stop()`, but returns immediately without waiting for any + // currently active invocation to finish. + void StopAsync(); + + // Triggers an out-of-schedule invocation of the action and resets the + // timer. If a previously scheduled or nudged invocation of the action is + // currently active, it will be allowed to finish before the nudged one + // starts. However, the `Nudge()` call itself returns immediately without + // waiting for either one to finish. + void Nudge(); + + private: + // Use the "pointer to implementation" idiom to make the class movable and + // move-constructible. + class Impl; + std::unique_ptr pimpl_; +}; + +} // namespace fuzztest::internal + +#endif // FUZZTEST_CENTIPEDE_PERIODIC_ACTION_H_ diff --git a/src/third_party/fuzztest/dist/centipede/periodic_action_test.cc b/src/third_party/fuzztest/dist/centipede/periodic_action_test.cc new file mode 100644 index 00000000000..d858e0d6e7e --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/periodic_action_test.cc @@ -0,0 +1,228 @@ +// Copyright 2024 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/periodic_action.h" + +#include +#include +#include +#include // NOLINT: For `std::this_thread::get_id()` only. +#include +#include + +#include "gtest/gtest.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "./common/logging.h" + +namespace fuzztest::internal { +namespace { + +TEST(PeriodicActionTest, OnlyPeriodicInvocations) { + constexpr absl::Duration kDuration = absl::Seconds(3); + constexpr absl::Duration kPeriodicInterval = absl::Milliseconds(100); + const int kApproxCount = + std::floor(absl::FDivDuration(kDuration, kPeriodicInterval)); + int count = 0; + PeriodicAction action{ + [&count]() { ++count; }, + PeriodicAction::ZeroDelayConstInterval(kPeriodicInterval), + }; + absl::SleepFor(kDuration); + action.Stop(); + EXPECT_GE(count, kApproxCount * 0.9) << VV(kApproxCount); + EXPECT_LE(count, kApproxCount * 1.1) << VV(kApproxCount); +} + +TEST(PeriodicActionTest, OnlyNudgedInvocations) { + constexpr absl::Duration kDuration = absl::Seconds(3); + constexpr absl::Duration kNudgeInterval = absl::Milliseconds(100); + int count = 0; + PeriodicAction::Options options; + // Effectively disable periodic invocations: only `Nudge()` calls + // below will trigger them. + options.sleep_before_each = [](size_t) { return absl::InfiniteDuration(); }; + PeriodicAction action{ + [&count]() { ++count; }, + std::move(options), + }; + int expected_count = 0; + const absl::Time end_time = absl::Now() + kDuration; + while (absl::Now() < end_time) { + action.Nudge(); + // Sleep after a nudge, not before, to guarantee that the action has time + // to finish and increment `count`. + absl::SleepFor(kNudgeInterval); + ++expected_count; + } + action.Stop(); + EXPECT_GE(count, expected_count * 0.9) << VV(expected_count); + EXPECT_LE(count, expected_count * 1.1) << VV(expected_count); +} + +TEST(PeriodicActionTest, PeriodicAndNudgedInvocations) { + constexpr absl::Duration kDuration = absl::Seconds(3); + constexpr absl::Duration kPeriodicInterval = absl::Milliseconds(100); + // NOTE: Use a nudge interval that is not wholly divisible by the periodic + // interval so the two events never clash. This is to make `count` + // incrementing more deterministic so that tighter bounds on its final value + // can be asserted. A looser version with clashing periodic and nudged + // invocations is implemented in another test case below. + constexpr absl::Duration kNudgeInterval = absl::Milliseconds(345); + const int kApproxPeriodicCount = + std::floor(absl::FDivDuration(kDuration, kPeriodicInterval)); + const int kApproxNudgedCount = + std::floor(absl::FDivDuration(kDuration, kNudgeInterval)); + const int kApproxCount = kApproxPeriodicCount + kApproxNudgedCount; + int count = 0; + PeriodicAction action{ + [&count]() { ++count; }, + PeriodicAction::ZeroDelayConstInterval(kPeriodicInterval), + }; + const absl::Time end_time = absl::Now() + kDuration; + while (absl::Now() < end_time) { + action.Nudge(); + // Sleep after a nudge, not before, to guarantee that the action has time to + // finish and increment `count`. + absl::SleepFor(kNudgeInterval); + } + action.Stop(); + EXPECT_GE(count, kApproxCount * 0.9) + << VV(kApproxCount) << VV(kApproxPeriodicCount) << VV(kApproxNudgedCount); + EXPECT_LE(count, kApproxCount * 1.1) + << VV(kApproxCount) << VV(kApproxPeriodicCount) << VV(kApproxNudgedCount); +} + +TEST(PeriodicActionTest, ClashingPeriodicAndNudgedInvocations) { + constexpr absl::Duration kDuration = absl::Seconds(3); + constexpr absl::Duration kPeriodicInterval = absl::Milliseconds(10); + // NOTE: Use a nudge interval that is wholly divisible by the periodic + // interval so the two events overlap with high probability. + constexpr absl::Duration kNudgeInterval = absl::Milliseconds(2); + const int kMaxPeriodicCount = + std::floor(absl::FDivDuration(kDuration, kPeriodicInterval)); + const int kMaxNudgedCount = + std::floor(absl::FDivDuration(kDuration, kNudgeInterval)); + int count = 0; + PeriodicAction action{ + [&count]() { ++count; }, + PeriodicAction::ZeroDelayConstInterval(kPeriodicInterval), + }; + const absl::Time end_time = absl::Now() + kDuration; + while (absl::Now() < end_time) { + action.Nudge(); + // Sleep after a nudge, not before, to guarantee that the action has time to + // finish and increment `count`. + absl::SleepFor(kNudgeInterval); + } + action.Stop(); + // The frequent nudging should have interrupted the sleeping phase and reset + // the periodic timer a lot, so we can assert only very loose bounds on the + // final value of `count`. + EXPECT_GE(count, std::min(kMaxPeriodicCount, kMaxNudgedCount)) + << VV(kMaxPeriodicCount) << VV(kMaxNudgedCount); + EXPECT_LE(count, kMaxPeriodicCount + kMaxNudgedCount) + << VV(kMaxPeriodicCount) << VV(kMaxNudgedCount); +} + +// Test that a `Nudge()` immediately followed by an explicit `Stop()` still +// runs the action. +TEST(PeriodicActionTest, NudgeThenStopStillRunsAction) { + int count = 0; + absl::Mutex count_mu; + PeriodicAction action{ + [&count, &count_mu]() { + absl::MutexLock lock{&count_mu}; + ++count; + }, + PeriodicAction::ZeroDelayConstInterval(absl::InfiniteDuration()), + }; + absl::SleepFor(absl::Seconds(1)); + { + absl::MutexLock lock{&count_mu}; + EXPECT_EQ(count, 1); + } + action.Nudge(); + action.Stop(); + { + absl::MutexLock lock{&count_mu}; + EXPECT_EQ(count, 2); + } +} + +// Test that a `Nudge()` immediately followed by an implicit `Stop()` in +// `~PeriodicAction()` still runs the action. +TEST(PeriodicActionTest, NudgeThenDtorStillRunsAction) { + int count = 0; + absl::Mutex count_mu; + { + PeriodicAction action{ + [&count, &count_mu]() { + absl::MutexLock lock{&count_mu}; + ++count; + }, + PeriodicAction::ZeroDelayConstInterval(absl::InfiniteDuration()), + }; + absl::SleepFor(absl::Seconds(1)); + { + absl::MutexLock lock{&count_mu}; + EXPECT_EQ(count, 1); + } + EXPECT_EQ(count, 1); + action.Nudge(); + } + { + absl::MutexLock lock{&count_mu}; + EXPECT_EQ(count, 2); + } +} + +// The main purpose of this test is to make sure that a `PeriodicAction` object +// can be moved to another such that the original object's dtor doesn't blow up +// when it runs. +TEST(PeriodicActionTest, ActionIsMoveable) { + absl::Mutex mu; + std::vector thread_ids; + { + PeriodicAction moved_from{ + [&mu, &thread_ids]() { + absl::WriterMutexLock lock{&mu}; + thread_ids.push_back(std::this_thread::get_id()); + }, + PeriodicAction::ZeroDelayConstInterval(absl::Milliseconds(10)), + }; + absl::SleepFor(absl::Milliseconds(100)); + // Sanity check that the action is running and is healthy. + moved_from.Nudge(); + absl::SleepFor(absl::Milliseconds(100)); + // Move the action to another object. + PeriodicAction moved_to = std::move(moved_from); + absl::SleepFor(absl::Milliseconds(100)); + // The moved object should now be running the run-loop thread. + moved_to.Nudge(); + absl::SleepFor(absl::Milliseconds(100)); + moved_to.Stop(); + } // The dtors for both moved-from and moved-to objects run here. + // If we reached this point, at least the dtors ran without blowing up. + ASSERT_GT(thread_ids.size(), 1); + // A single instance of the run-loop thread should have been running + // throughout the whole process, including the move: the moved-from object + // should have just handed over the thread to the moved-to object. + std::sort(thread_ids.begin(), thread_ids.end()); + ASSERT_EQ(thread_ids.front(), thread_ids.back()); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/resource_pool.cc b/src/third_party/fuzztest/dist/centipede/resource_pool.cc new file mode 100644 index 00000000000..28a2aec136c --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/resource_pool.cc @@ -0,0 +1,178 @@ +// Copyright 2024 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/resource_pool.h" + +#include +#include +#include // NOLINT: For thread IDs. +#include + +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/status/status.h" +#include "absl/strings/str_cat.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "./centipede/rusage_stats.h" + +namespace fuzztest::internal { + +template +ResourcePool::LeaseToken::LeaseToken( // + ResourcePool& leaser, LeaseRequest request) + : leaser_{leaser}, request_{std::move(request)} {} + +template +ResourcePool::LeaseToken::LeaseToken( // + ResourcePool& leaser, LeaseRequest request, absl::Status error) + : leaser_{leaser}, + request_{std::move(request)}, + status_{std::move(error)} {} + +template +ResourcePool::LeaseToken::~LeaseToken() { + CHECK(status_checked_) // + << "status() was never consulted by caller: " << *this; + if (status_.ok()) { + leaser_.ReturnLease(*this); + } +} + +template +const typename ResourcePool::LeaseRequest& +ResourcePool::LeaseToken::request() const { + return request_; +} + +template +const absl::Status& ResourcePool::LeaseToken::status() const { + status_checked_ = true; + return status_; +} + +template +std::string ResourcePool::LeaseToken::id() const { + std::stringstream ss; + ss << thread_id_; + return absl::StrCat("lease_tid_", ss.str(), "_rid_", request_.id); +} + +template +std::thread::id ResourcePool::LeaseToken::thread_id() const { + return thread_id_; +} + +template +absl::Time ResourcePool::LeaseToken::created_at() const { + return created_at_; +} + +template +absl::Duration ResourcePool::LeaseToken::age() const { + return absl::Now() - created_at_; +} + +template +ResourcePool::ResourcePool(const ResourceT& quota) + : quota_{quota}, pool_{quota} { + LOG(INFO) << "Creating pool with quota=[" << quota.ShortStr() << "]"; +} + +template +typename ResourcePool::LeaseToken +ResourcePool::AcquireLeaseBlocking(LeaseRequest&& request) { + if (ABSL_VLOG_IS_ON(1)) { + absl::ReaderMutexLock lock{&pool_mu_}; + VLOG(1) << "Received lease request " << request.id // + << "\nrequested: " << request.amount.FormattedStr() // + << "\nquota: " << quota_.FormattedStr() // + << "\navailable: " << pool_.FormattedStr(); + } + + if (request.amount == ResourceT::Zero()) { + absl::Status error = // + absl::InvalidArgumentError(absl::StrCat( // + "Invalid lease request ", request.id, ": amount is zero")); + return LeaseToken{*this, std::move(request), std::move(error)}; + } + // NOTE: Using `amount > quota` would be semantically wrong, because it is + // true only when _all_ components of `amount` are strictly greater than their + // counterparts in `quota_`. + if (!(request.amount <= quota_)) { + absl::Status error = // + absl::ResourceExhaustedError(absl::StrCat( // + "Invalid lease request ", request.id, ": amount exceeds quota: [", + request.amount.ShortStr(), "] vs [", quota_.ShortStr(), "]")); + return LeaseToken{*this, std::move(request), std::move(error)}; + } + + const auto got_enough_free_pool = [this, &request]() { + pool_mu_.AssertReaderHeld(); + const bool got_pool = request.amount <= pool_; + if (!got_pool) { + VLOG(10) // + << "Pending lease '" << request.id << "':" // + << "\nreq age : " << request.age() // + << "\navailable : " << pool_.FormattedStr() // + << "\nrequested : " << (-request.amount).FormattedStr() // + << "\nmissing : " << (pool_ - request.amount).FormattedStr(); + } + return got_pool; + }; + + // Block and wait until enough of the pool becomes available to satisfy + // this request, then acquire the mutex and proceed to the true-branch. If + // the timeout is reached, proceed to the else-branch. + if (pool_mu_.LockWhenWithTimeout( // + absl::Condition{&got_enough_free_pool}, request.timeout)) { + VLOG(1) // + << "Granting lease " << request.id // + << "\nreq age : " << request.age() // + << "\nbefore : " << pool_.FormattedStr() // + << "\nleased : " << (-request.amount).FormattedStr() // + << "\nafter : " << (pool_ - request.amount).FormattedStr(); + pool_ = pool_ - request.amount; + pool_mu_.Unlock(); + return LeaseToken{*this, std::move(request)}; + } else { + absl::Status error = // + absl::DeadlineExceededError(absl::StrCat( // + "Lease request ", request.id, " timed out; timeout: ", + request.timeout, " requested: [", request.amount.ShortStr(), + "] current pool: [", pool_.ShortStr(), "]")); + pool_mu_.Unlock(); + return LeaseToken{*this, std::move(request), std::move(error)}; + } +} + +template +void ResourcePool::ReturnLease(const LeaseToken& lease) { + absl::WriterMutexLock lock{&pool_mu_}; + VLOG(1) // + << "Returning lease " << lease.request().id // + << "\nreq age : " << lease.request().age() // + << "\nlease age : " << lease.age() // + << "\nbefore : " << pool_.FormattedStr() // + << "\nreturned : " << (+lease.request().amount).FormattedStr() // + << "\nafter : " << (pool_ + lease.request().amount).FormattedStr(); + pool_ = pool_ + lease.request().amount; +} + +// Explicit instantiations for the currently supported `ResourceT`s. +template class ResourcePool; +template class ResourcePool; + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/resource_pool.h b/src/third_party/fuzztest/dist/centipede/resource_pool.h new file mode 100644 index 00000000000..241150f1313 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/resource_pool.h @@ -0,0 +1,204 @@ +// Copyright 2024 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef FUZZTEST_CENTIPEDE_RESOURCE_RESOURCE_POOL_H_ +#define FUZZTEST_CENTIPEDE_RESOURCE_RESOURCE_POOL_H_ + +#include +#include +#include + +#include +#include +#include // NOLINT: for thread IDs. + +#include "absl/base/thread_annotations.h" +#include "absl/status/status.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" + +namespace fuzztest::internal { + +//------------------------------------------------------------------------------ +// ResourcePool +// +// `ResourcePool` is an accounting mechanism to effectively share a limited +// resource between concurrent consumer threads, never exceeding a quota while +// maximizing resource utilization, and thus parallelism. +// +// The quota amount is picked by the client. It can be arbitrary, or it can +// reflect an actual amount of the resource on the system (e.g. the available +// RAM). +// +// Each of the consumer threads determines a conservative estimate of its peak +// resource utilization, and requests that amount from the pool. The request +// blocks until a sufficient amount becomes available. The amount is then +// "leased" to the thread for as long as it holds the lease token, and +// auto-returned back to the pool via RAII. +// +// Notes on using in combination with `ThreadPool`: +// 1. The requested number of concurrent threads in a `ThreadPool` is often an +// attempt to indirectly control the resource usage. `ResourcePool` enables a +// more direct way of controlling it, and therefore `ThreadPool`'s thread +// count can be made as high as necessary for other purposes. +// 2. The `ResourcePool` object must is defined before the `ThreadPool` one to +// avoid dangling references to a destructed pool in the threads. +// +// The currently supported (and explicitly instantiated in the .cc) types of +// the `ResourceT` template argument are `RUsageMemory` and `RUsageTiming`. +// +// Example: +// +// { +// constexpr RUsageMemory kRssQuota{.mem_rss = RLimits::FreeRss() * 0.75}; +// ResourcePool rss_pool{kRssQuota}; +// ThreadPool threads{100}; +// for (...) { +// threads.Schedule([&rss_pool]() { +// // The thread blocks here until either the requested amount of RSS +// // becomes available as the peer threads return their leases, or the +// // 10-minute timeout expires. +// const ResourcePool::LeaseToken rss_lease = +// rss_pool.AcquireLeaseBlocking({ +// .id = absl::StrCat("rss_", shard_id), +// .amount = RUsageMemory{.mem_rss = EstimateShardPeakRss()}, +// .timeout = absl::Minutes(10), +// }); +// CHECK_OK(rss_lease.status()); +// ... +// } +// // `rss_lease` dtor returns the leased RSS to `rss_pool` and unblocks +// // other waiting threads. +// ); +// } +// } // `threads` dtor runs and joins the threads; then `rss_pool` dtor runs. +// +// TODO(ussuri): Add monitoring of claimed vs actual use by each leaser and +// a final report of over- and underutilization (possibly via RUsageProfiler). +//------------------------------------------------------------------------------ +template +class ResourcePool { + public: + //---------------------------------------------------------------------------- + // Request + // + // Specifies a projected resource consumption between the time this request is + // submitted and the time the acquired LeaseToken goes out of scope. A + // convenient way to construct Requests is by using designated initializers + // (cf. ResourcePool's top-level doc just above). + struct LeaseRequest { + // Optional. Used in the debug logging and always included in returned + // failure statuses. + std::string id = ""; + // Mandatory. Must be > `ResourceT::Zero()`; otherwise, + // `AcquireLeaseBlocking()` immediately returns a failure. + ResourceT amount; + // Optional. `AcquireLeaseBlocking()` waits for up to this long for other + // resource consumers to free up enough of it to satisfy this request. If + // the required amount is still unavailable, `absl::DeadlineExceededError` + // is returned. The default is to acquire or fail immediately. + absl::Duration timeout = absl::ZeroDuration(); + // Should not normally be overridden by clients (but can be). Used for + // logging only. + absl::Time created_at = absl::Now(); + + // The age of this request. + absl::Duration age() const { return absl::Now() - created_at; } + }; + + //---------------------------------------------------------------------------- + // LeaseToken + // + // A RAII-based resource lock, similar to `MutexLock`. Must be held by a + // client that called `AcquireLeaseBlocking()` for as long as it continues to + // use the leased amount of the resource. Returns the resource to the leaser + // `ResourcePool` in the dtor. + class [[nodiscard]] LeaseToken { + public: + // Move-copyable only. + LeaseToken(const LeaseToken&) = delete; + LeaseToken& operator=(const LeaseToken&) = delete; + LeaseToken(LeaseToken&&) noexcept = default; + LeaseToken& operator=(LeaseToken&&) noexcept = delete; + + // Automatically returns itself to the leaser (the issuing ResourcePool). + ~LeaseToken(); + + // The outcome of resource acquisition (ie. of + // `ResourcePool::AcquireLeaseBlocking()`). Must be consulted by the client + // at least once, otherwise the dtor will CHECK. + const absl::Status& status() const; + // The originating request. + const LeaseRequest& request() const; + // A short description that can be used in logs. + std::string id() const; + // The thread ID that submitted the request. + std::thread::id thread_id() const; + // The creation time and the age of the lease. + absl::Time created_at() const; + absl::Duration age() const; + + private: + // Only ResourcePool can create. + friend class ResourcePool; + + // Constructs a token for a successfully acquired resource. + LeaseToken(ResourcePool& leaser, LeaseRequest request); + // Constructs a token for a resource that couldn't be acquired. + LeaseToken(ResourcePool& leaser, LeaseRequest request, absl::Status error); + + friend std::ostream& operator<<(std::ostream& os, const LeaseToken& lt) { + return os << lt.id() << ": " << lt.request().amount.ShortStr(); + } + + ResourcePool& leaser_; + LeaseRequest request_ = {}; + absl::Status status_ = absl::OkStatus(); + mutable bool status_checked_ = false; + std::thread::id thread_id_ = std::this_thread::get_id(); + absl::Time created_at_ = absl::Now(); + }; + + // `quota` is the initially available amount of the resource to be shared + // between all concurrent consumers. + // Example: `ResourcePool pool{RUsageMemory{.mem_rss = ComputeFreeRss()}};`. + explicit ResourcePool(const ResourceT& quota); + + // Blocks the current thread and waits until `request.amount` of the resources + // becomes available in the pool or until `request.timeout` expires, whichever + // comes first. When the returned object goes out of scope, the leased + // resource gets automatically returned to the pool via RAII. + // Example: `const auto lease = pool.AcquireLeaseBlocking({.mem_rss = 100});`. + LeaseToken AcquireLeaseBlocking(LeaseRequest&& request); + + private: + // `LeaseToken`'s dtor calls this to return the leased resource to the pool. + void ReturnLease(const LeaseToken& lease); + + // The total pool capacity. + const ResourceT quota_; + + // The currently available amount. + absl::Mutex pool_mu_; + ResourceT pool_ ABSL_GUARDED_BY(pool_mu_); +}; + +// An explicit deduction guide to allow `ResourcePool pool{RUsageMemory{...}}`. +template +ResourcePool(R r) -> ResourcePool; + +} // namespace fuzztest::internal + +#endif // FUZZTEST_CENTIPEDE_RESOURCE_RESOURCE_POOL_H_ diff --git a/src/third_party/fuzztest/dist/centipede/resource_pool_test.cc b/src/third_party/fuzztest/dist/centipede/resource_pool_test.cc new file mode 100644 index 00000000000..23bf03bbbb6 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/resource_pool_test.cc @@ -0,0 +1,155 @@ +// Copyright 2024 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/resource_pool.h" + +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/log/log.h" +#include "absl/status/status.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "./centipede/rusage_stats.h" +#include "./centipede/thread_pool.h" +#include "./common/logging.h" + +namespace fuzztest::internal { +namespace { + +constexpr RUsageMemory MakeMemRss(MemSize mem_rss) { + return RUsageMemory{/*mem_vsize=*/0, /*mem_vpeak=*/0, mem_rss}; +} + +TEST(ResourcePoolTest, InvalidLeaseRequests) { + constexpr RUsageMemory kQuota = MakeMemRss(1000); + constexpr RUsageMemory kZero = MakeMemRss(0); + constexpr RUsageMemory kEpsilon = MakeMemRss(1); + ResourcePool pool{kQuota}; + { + ResourcePool::LeaseRequest request; + request.amount = kZero; + const auto lease = pool.AcquireLeaseBlocking(std::move(request)); + EXPECT_EQ(lease.status().code(), absl::StatusCode::kInvalidArgument) + << VV(lease.status()); + } + { + ResourcePool::LeaseRequest request; + request.amount = kQuota - kEpsilon; + const auto lease = pool.AcquireLeaseBlocking(std::move(request)); + EXPECT_EQ(lease.status().code(), absl::StatusCode::kOk) + << VV(lease.status()); + } + { + ResourcePool::LeaseRequest request; + request.amount = kQuota; + const auto lease = pool.AcquireLeaseBlocking(std::move(request)); + EXPECT_EQ(lease.status().code(), absl::StatusCode::kOk) + << VV(lease.status()); + } + { + ResourcePool::LeaseRequest request; + request.amount = kQuota + kEpsilon; + const auto lease = pool.AcquireLeaseBlocking(std::move(request)); + EXPECT_EQ(lease.status().code(), absl::StatusCode::kResourceExhausted) + << VV(lease.status()); + } +} + +TEST(ResourcePoolTest, Dynamic) { + struct TaskSpec { + std::string_view id; + RUsageMemory ram_chunk; + // The times are relative to time zero, when all the tasks roughly start. + int request_at_secs; + int timeout_at_secs; + int release_at_secs; + absl::StatusCode expected_lease_status; + }; + + constexpr RUsageMemory kRssQuota = MakeMemRss(5); + constexpr int kNumTasks = 9; + constexpr std::array kTaskSpecs = {{ + // Can't request 0 amount. + {"0", /*ram_chunk=*/MakeMemRss(0), 0, 1, 3, + absl::StatusCode::kInvalidArgument}, + // Exceeds the initial pool capacity. + {"1", /*ram_chunk=*/MakeMemRss(10), 0, 1, 3, + absl::StatusCode::kResourceExhausted}, + // "2" gets the resource first. + {"2", /*ram_chunk=*/MakeMemRss(2), 0, 0, 2, absl::StatusCode::kOk}, + // "1" gets the resource immediately after "2" and runs concurrently. + {"3", /*ram_chunk=*/MakeMemRss(2), 0, 0, 4, absl::StatusCode::kOk}, + // "4" can't get the resource right away - 1 sec later than "2" and "3" - + // because they almost exhaust the pool; but it waits long enough for "2" + // to finish (while "3" is still running) and free up enough of the pool; + // then "4" gets the resource and runs fine. + {"4", /*ram_chunk=*/MakeMemRss(1), 1, 3, 4, absl::StatusCode::kOk}, + // "5" starts while "2" and "3", and later on "3" and "4", are still + // running. They all continuously hold enough of the pool to prevent "5" + // from ever getting its resource. Eventually, "5" runs out of time. + {"5", /*ram_chunk=*/MakeMemRss(4), 2, 3, 5, + absl::StatusCode::kDeadlineExceeded}, + // "6" is like "5", but it waits long enough for "3" and "4" to free up + // the pool; then "6" gets the resource and runs fine. + {"6", /*ram_chunk=*/MakeMemRss(4), 2, 5, 6, absl::StatusCode::kOk}, + // "7" is also like "5", but is less greedy, so although it starts 1 sec + // later, it is allowed in front of "5" and "6" and runs fine, partially + // sharing the pool with "3" and "4". + {"7", /*ram_chunk=*/MakeMemRss(1), 3, 3, 5, absl::StatusCode::kOk}, + // "8" starts waiting for the maximum available amount when other + // consumers already use some of the pool. It waits long enough for all of + // them to finish, then finally grabs the entire quota and runs. + {"8", /*ram_chunk=*/MakeMemRss(5), 2, 9, 10, absl::StatusCode::kOk}, + }}; + std::array task_lease_statuses; + + { + ResourcePool pool{kRssQuota}; + ThreadPool threads{kNumTasks}; + for (size_t i = 0; i < kNumTasks; ++i) { + const auto& t = kTaskSpecs[i]; + auto& lease_status = task_lease_statuses[i]; + threads.Schedule([&t, &pool, &lease_status]() { + // All the tasks start roughly at the same time (because there are just + // as many threads, and scheduling is fast), so they are on roughly the + // same relative timetable. + absl::SleepFor(absl::Seconds(t.request_at_secs)); + ResourcePool::LeaseRequest request; + request.id = std::string(t.id); + request.amount = t.ram_chunk; + request.timeout = absl::Seconds(t.timeout_at_secs - t.request_at_secs); + const auto lease = pool.AcquireLeaseBlocking(std::move(request)); + lease_status = lease.status(); + if (lease_status.ok()) { + absl::SleepFor(absl::Seconds(t.release_at_secs - t.request_at_secs)); + } + }); + } + } // Threads join here. + + for (size_t i = 0; i < kNumTasks; ++i) { + const auto& task = kTaskSpecs[i]; + auto& lease_status = task_lease_statuses[i]; + EXPECT_EQ(lease_status.code(), task.expected_lease_status) + << VV(task.id) << VV(lease_status); + } +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/reverse_pc_table.h b/src/third_party/fuzztest/dist/centipede/reverse_pc_table.h new file mode 100644 index 00000000000..01399994fd7 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/reverse_pc_table.h @@ -0,0 +1,98 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_REVERSE_PC_TABLE_H_ +#define THIRD_PARTY_CENTIPEDE_REVERSE_PC_TABLE_H_ + +#include +#include +#include + +#include "./centipede/pc_info.h" + +namespace fuzztest::internal { + +// Maps PCs to PCGuard objects. +class ReversePCTable { + public: + ReversePCTable() = default; + // Non copyable, non-movable. + ReversePCTable(const ReversePCTable &) = delete; + ReversePCTable &operator=(const ReversePCTable &) = delete; + ReversePCTable(ReversePCTable &&) = default; + ReversePCTable &operator=(ReversePCTable &&) = default; + + // Constructs the reverse PC table from `pc_table`. + // The assumption is that all PCs are relatively small, such that the + // implementation is allowed to create an array indexed by a PC. + void SetFromPCs(const PCTable& pc_table) { + num_pcs_ = pc_table.size(); + if (table_ != nullptr) delete[] table_; + if (num_pcs_ == 0) { + size_ = 0; + table_ = nullptr; + return; + } + // Compute max_pc. + uintptr_t max_pc = 0; + for (const auto& pc_info : pc_table) { + max_pc = std::max(max_pc, pc_info.pc); + } + // Create an array of max_pc + 1 elements such that we can directly + // index this array with any valid PC. + size_ = max_pc + 1; + table_ = new PCGuard[size_]; + std::fill(table_, table_ + size_, kInvalidPCGuard); + // Make sure all PC indices fit into PCGuard::kMaxNumPCs. + if (pc_table.size() >= PCGuard::kMaxNumPCs) + __builtin_trap(); // no logging in runner. TODO(kcc): use RunnerCheck. + // Fill in the table. + for (size_t idx = 0; idx < pc_table.size(); ++idx) { + const auto &pc_info = pc_table[idx]; + if (pc_info.pc >= size_) __builtin_trap(); // TODO(kcc): use RunnerCheck. + table_[pc_info.pc] = { + /*is_function_entry=*/pc_info.has_flag(PCInfo::kFuncEntry), + /*pc_index=*/static_cast(idx)}; + } + } + + // Returns PCGuard that corresponds to `pc`. If `pc` was not present in + // `pc_table` passed to SetFromPCs, returns kInvalidPCGuard. This is a hot + // function and needs to be as simple and fast as possible. + PCGuard GetPCGuard(uintptr_t pc) const { + if (pc >= size_) return kInvalidPCGuard; + return table_[pc]; + } + + // Returns the number of PCs that was passed to SetFromPCs(). + size_t NumPcs() const { return num_pcs_; } + + private: + // A PCGuard object, such that IsValid() will return false. + static constexpr PCGuard kInvalidPCGuard = {0, PCGuard::kInvalidPcIndex}; + + // We use size_ and table_ pointer instead of std::vector<> because + // (1) we need ReversePCTable object to be accessible even after the + // destruction (in static storage duration); (2) size_ is cheaper to + // compute inside GetPCIndex(). This would cause leakage if not + // declared as static - one can explicitly call SetFromPCs({}) to + // free the table. + size_t size_ = 0; + size_t num_pcs_ = 0; + PCGuard *table_ = nullptr; +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_REVERSE_PC_TABLE_H_ diff --git a/src/third_party/fuzztest/dist/centipede/reverse_pc_table_test.cc b/src/third_party/fuzztest/dist/centipede/reverse_pc_table_test.cc new file mode 100644 index 00000000000..83dc7e397c2 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/reverse_pc_table_test.cc @@ -0,0 +1,59 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/reverse_pc_table.h" + +#include "gtest/gtest.h" +#include "./centipede/pc_info.h" + +namespace fuzztest::internal { +namespace { + +TEST(ReversePCTable, ReversePCTable) { + static ReversePCTable table; + const PCTable pc_table = {{500, PCInfo::kFuncEntry}, + {400, 0}, + {100, PCInfo::kFuncEntry}, + {200, 0}, + {300, 0}}; + table.SetFromPCs(pc_table); + + EXPECT_EQ(table.NumPcs(), 5); + EXPECT_FALSE(table.GetPCGuard(0).IsValid()); + EXPECT_FALSE(table.GetPCGuard(50).IsValid()); + EXPECT_FALSE(table.GetPCGuard(150).IsValid()); + EXPECT_FALSE(table.GetPCGuard(501).IsValid()); + + EXPECT_EQ(table.GetPCGuard(500).pc_index, 0); + EXPECT_TRUE(table.GetPCGuard(500).is_function_entry); + EXPECT_EQ(table.GetPCGuard(400).pc_index, 1); + EXPECT_FALSE(table.GetPCGuard(400).is_function_entry); + EXPECT_EQ(table.GetPCGuard(100).pc_index, 2); + EXPECT_TRUE(table.GetPCGuard(100).is_function_entry); + EXPECT_EQ(table.GetPCGuard(200).pc_index, 3); + EXPECT_FALSE(table.GetPCGuard(200).is_function_entry); + EXPECT_EQ(table.GetPCGuard(300).pc_index, 4); + EXPECT_FALSE(table.GetPCGuard(300).is_function_entry); + + // Reset the table and try new values. + const PCTable pc_table1 = {{40, 0}, {20, 0}, {30, 0}}; + table.SetFromPCs(pc_table1); + EXPECT_FALSE(table.GetPCGuard(200).IsValid()); + EXPECT_EQ(table.GetPCGuard(40).pc_index, 0); + EXPECT_EQ(table.GetPCGuard(20).pc_index, 1); + EXPECT_EQ(table.GetPCGuard(30).pc_index, 2); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/rolling_hash.h b/src/third_party/fuzztest/dist/centipede/rolling_hash.h new file mode 100644 index 00000000000..dc2397b2c02 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/rolling_hash.h @@ -0,0 +1,75 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_ROLLING_HASH_H_ +#define THIRD_PARTY_CENTIPEDE_ROLLING_HASH_H_ + +#include +#include + +namespace fuzztest::internal { + +// Computes a rolling hash for a fixed-size window in a sequence of 32-bit ints. +// Inspired by https://en.wikipedia.org/wiki/Rolling_hash#Rabin_fingerprint. +// +// Objects of this class must be created as global or TLS. +// The typical non-test usage is to create on TLS. +// Which is why we pass `window_size` via a separate function. +// There is no CTOR, the objects are zero-initialized. +// We currently do not use a CTOR with absl::ConstInitType so that the objects +// can be declared as __thread. +// TODO(kcc): reconsider once we can use c++20; the current warning is +// error: constexpr constructor that does not initialize all members is a +// C++20 extension +class RollingHash { + public: + // Resets the object to use the specified window size. + void Reset(size_t window_size) { + hash_ = 0; + multiplier_power_window_size_ = 1; + for (size_t i = 0; i < window_size; ++i) { + multiplier_power_window_size_ *= kMultiplier; + } + } + + // Updates the hash by adding `add` and removing `remove`. + uint32_t Update(uint32_t hash, uint32_t add, uint32_t remove) const { + // Intermediate computations are done in 64-bit. + return hash * kMultiplier - remove * multiplier_power_window_size_ + add; + } + + void Update(uint32_t add, uint32_t remove) { + hash_ = Update(hash_, add, remove); + } + + // Returns the hash as a 32-bit int. + uint32_t Hash() const { return hash_; } + + // Test-only function to use as a slow but simple reference implementation. + // Adds `add` to `hash`. + static uint32_t TestOnlyUpdate(uint32_t hash, uint32_t add) { + return hash * kMultiplier + add; + } + + private: + // A prime number less than 2**32 (https://t5k.org/lists/2small/0bit.html). + static constexpr uint64_t kMultiplier = (1ULL << 32) - 267; + + uint32_t hash_; + uint64_t multiplier_power_window_size_; // kMultiplier ** window_size. +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_ROLLING_HASH_H_ diff --git a/src/third_party/fuzztest/dist/centipede/rolling_hash_test.cc b/src/third_party/fuzztest/dist/centipede/rolling_hash_test.cc new file mode 100644 index 00000000000..8fc27586a3f --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/rolling_hash_test.cc @@ -0,0 +1,122 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/rolling_hash.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "./centipede/feature.h" + +namespace fuzztest::internal { +namespace { + +// Reference implementation for RollingHash. +// Maintains the entire window of hash-ed ints in memory. +// Otherwise, equivalent to RollingHash. +class TestOnlyRollingHash { + public: + void Reset(size_t window_size) { + window_size_ = window_size; + deq_.clear(); + } + + void Update(uint32_t add, uint32_t remove) { + deq_.push_back(add); + if (deq_.size() > window_size_) deq_.pop_front(); + } + + uint32_t Hash() const { + uint64_t hash = 0; + for (const auto &value : deq_) { + hash = RollingHash::TestOnlyUpdate(hash, value); + } + return hash; + } + + protected: + size_t window_size_ = 0; + std::deque deq_; +}; + +// Tests RollingHashType, compares the results with TestOnlyRollingHashType. +template +void TestRollingHash() { + static RollingHashType hasher; // must be static. + TestOnlyRollingHashType test_hasher; + + // Tests on this many ints. + constexpr size_t kDataSize = 1 << 23; + // We test collisions for the full 32-bit hash, and also + // for the hash masked-off by kMask, to ensure that it remains + // a good hash if we only take a subset of bits. + // Our main use case is using the number of bits required for kDomainSize. + constexpr uint32_t kMask = feature_domains::Domain::kDomainSize - 1; + // Allow this many collisions for the masked hash. + constexpr size_t kMaxNumMaskCollisions = 3; + constexpr size_t kNumWindowsSizes = 20; + + // kDataSize ints: 0, 1, 2, ... + std::vector data(kDataSize); + std::iota(data.begin(), data.end(), 0); + + // Bitset is a bit faster for this test than a hash set. + using BS = std::bitset<(1ULL << 32)>; + // Allocate BS objects on heap, because they are too large for stack. + std::unique_ptr collisions_full(new BS()); + std::unique_ptr collisions_mask(new BS()); + + for (size_t window_size = 1; window_size <= kNumWindowsSizes; ++window_size) { + hasher.Reset(window_size); + test_hasher.Reset(window_size); + // Clear all bitset bits (->reset(), not .reset()). + collisions_full->reset(); + collisions_mask->reset(); + // pipe all ints in `data` through the hasher, maintaining the window + // of `window_size` elements. Count hash collisions. + size_t num_collisions_full = 0, num_collisions_mask = 0; + for (size_t idx = 0; idx < data.size(); ++idx) { + uint32_t remove = idx >= window_size ? data[idx - window_size] : 0; + hasher.Update(data[idx], remove); + uint32_t hash = hasher.Hash(); + num_collisions_full += collisions_full->test(hash); + num_collisions_mask += collisions_mask->test(hash & kMask); + collisions_full->set(hash); + collisions_mask->set(hash & kMask); + if (idx < 100000) { + // test_hasher is much more expensive, test only first few iterations. + test_hasher.Update(data[idx], remove); + EXPECT_EQ(hash, test_hasher.Hash()); + } + } + EXPECT_EQ(num_collisions_full, 0); + EXPECT_LE(num_collisions_mask, kMaxNumMaskCollisions); + } +} + +TEST(RollingHash, RollingHash) { + TestRollingHash(); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/run_test_workflow.sh b/src/third_party/fuzztest/dist/centipede/run_test_workflow.sh new file mode 100755 index 00000000000..453f589c260 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/run_test_workflow.sh @@ -0,0 +1,108 @@ +#!/bin/bash + +# Copyright 2018 Google LLC +# Copyright 2022 Centipede Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -eu -o pipefail + +source ./centipede/install_dependencies_debian.sh +# The above script installs a custom version of clang and exports its bin subdir +# in CLANG_BIN_DIR envvar. +export PATH="${CLANG_BIN_DIR}:$PATH" +declare MAYBE_SUDO="" +if (( "$EUID" != 0 )); then + MAYBE_SUDO="sudo" +fi + +${MAYBE_SUDO} apt install -y rename + +######################################## +# LOG ENVIRONMENT DEBUG INFO +######################################## +date --rfc-3339=seconds +echo "Debug Info" +echo "OUTPUT_ARTIFACTS_DIR=${OUTPUT_ARTIFACTS_DIR}" + +mkdir -p "${OUTPUT_ARTIFACTS_DIR}" +find . >"${OUTPUT_ARTIFACTS_DIR}/full_file_list.log" +{ + printenv + which bazel + bazel version + bazel info --show_make_env +} >"${OUTPUT_ARTIFACTS_DIR}/build_environment.log" + +######################################## +# RUN TESTS +######################################## +date --rfc-3339=seconds +echo "Building and testing with Bazel" + +declare BAZEL_OUTPUT_DIR +BAZEL_OUTPUT_DIR="$(bazel info output_base)" +readonly BAZEL_OUTPUT_DIR +declare -ra BAZEL_ARGS=("--color=no" "--curses=no" "--noshow_progress") + +set +e +# TODO(b/259298232): As remaining parts of the bug get fixed, do: +# - Remove `--local_test_jobs=1`. +# - When all tests under `testing` pass, remove separate tests for each +# subdirectory and replace `centipede:all` with `centipede/...`. +# - Use a single `bazel test "${BAZEL_ARGS[@]}" ...`. +bazel test "${BAZEL_ARGS[@]}" --local_test_jobs=1 --test_output=streamed \ + centipede:all && +bazel test "${BAZEL_ARGS[@]}" centipede/testing:instrumentation_test centipede/testing:runner_test && +bazel test "${BAZEL_ARGS[@]}" centipede/puzzles:all +bazel test "${BAZEL_ARGS[@]}" --linkopt=-fsanitize=address --copt=-fsanitize=address centipede/puzzles:all +bazel test "${BAZEL_ARGS[@]}" --no//centipede:use_riegeli --local_test_jobs=1 --test_output=streamed centipede:all && +bazel test "${BAZEL_ARGS[@]}" --no//centipede:use_riegeli centipede/testing:instrumentation_test centipede/testing:runner_test && +bazel test "${BAZEL_ARGS[@]}" --no//centipede:use_riegeli centipede/puzzles:all +bazel test "${BAZEL_ARGS[@]}" --no//centipede:use_riegeli --linkopt=-fsanitize=address --copt=-fsanitize=address centipede/puzzles:all + +declare -ri exit_code=$? +set -e + +# Capture the build log as a fake test to reduce download spam +declare -r FULL_BUILD_LOG_DIR="bazel_full_build_log" +mkdir -p "${OUTPUT_ARTIFACTS_DIR}/${FULL_BUILD_LOG_DIR}" +cp "${BAZEL_OUTPUT_DIR}/command.log" "${OUTPUT_ARTIFACTS_DIR}/${FULL_BUILD_LOG_DIR}/sponge_log.log" +cat >"${OUTPUT_ARTIFACTS_DIR}/${FULL_BUILD_LOG_DIR}/sponge_log.xml" < + + + +DOC +chmod -R a+w "${OUTPUT_ARTIFACTS_DIR}/${FULL_BUILD_LOG_DIR}" + +######################################## +# REMAP OUTPUT FILES +######################################## +declare -r OUTPUT_BAZEL_LOGS_DIR="${OUTPUT_ARTIFACTS_DIR}/bazel_test_logs" +rm -rf "${OUTPUT_BAZEL_LOGS_DIR}" # For local testing +mkdir -p "${OUTPUT_BAZEL_LOGS_DIR}" + +# Copy test.{log,xml} files to kokoro artifacts directory, then rename them. +find -L bazel-testlogs -name "test.log" -exec cp --parents {} "${OUTPUT_BAZEL_LOGS_DIR}" \; +find -L "${OUTPUT_BAZEL_LOGS_DIR}" -name "test.log" -exec rename 's/test\.log/sponge_log.log/' {} \; +find -L bazel-testlogs -name "test.xml" -exec cp --parents {} "${OUTPUT_BAZEL_LOGS_DIR}" \; +find -L "${OUTPUT_BAZEL_LOGS_DIR}" -name "test.xml" -exec rename 's/test\.xml/sponge_log.xml/' {} \; + +chmod -R a+w "${OUTPUT_BAZEL_LOGS_DIR}" + +date --rfc-3339=seconds +echo "Exiting test workflow" + +exit ${exit_code} diff --git a/src/third_party/fuzztest/dist/centipede/run_test_workflow_using_docker.sh b/src/third_party/fuzztest/dist/centipede/run_test_workflow_using_docker.sh new file mode 100755 index 00000000000..a80104d4beb --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/run_test_workflow_using_docker.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright 2022 Centipede Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eu + +SCRIPT_DIR="$(cd -L "$(dirname "$0")" && echo "${PWD}")" +readonly SCRIPT_DIR + +declare -r FUZZTEST_DIR="$(cd "${SCRIPT_DIR}/.." && echo "${PWD}")" +declare -r OUTPUT_ARTIFACTS_DIR="${FUZZTEST_DIR}/test-outputs" +# Must run under sudo, or else docker trips over insufficient permissions. +declare -r DOCKER_CMD="sudo docker" +declare -r DOCKER_IMAGE=debian + +echo "Will save test output artifacts to $OUTPUT_ARTIFACTS_DIR" + +${DOCKER_CMD} run \ + -v "${FUZZTEST_DIR}:/app" \ + -v "${OUTPUT_ARTIFACTS_DIR}:/output" \ + --env OUTPUT_ARTIFACTS_DIR=/output \ + -w /app \ + "${DOCKER_IMAGE}" \ + /app/centipede/run_test_workflow.sh diff --git a/src/third_party/fuzztest/dist/centipede/runner.cc b/src/third_party/fuzztest/dist/centipede/runner.cc new file mode 100644 index 00000000000..d998350b510 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner.cc @@ -0,0 +1,1313 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Fuzz target runner (engine) for Centipede. +// Reads the input files and feeds their contents to +// the fuzz target (RunnerCallbacks::Execute), then dumps the coverage data. +// If the input path is "/path/to/foo", +// the coverage features are dumped to "/path/to/foo-features" +// +// WARNING: please avoid any C++ libraries here, such as Absl and (most of) STL, +// in order to avoid creating new coverage edges in the binary. +#include "./centipede/runner.h" + +#include // NOLINT: use pthread to avoid extra dependencies. +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/base/nullability.h" +#include "./centipede/byte_array_mutator.h" +#include "./centipede/execution_metadata.h" +#include "./centipede/feature.h" +#include "./centipede/int_utils.h" +#include "./centipede/mutation_input.h" +#include "./centipede/pc_info.h" +#include "./centipede/runner_dl_info.h" +#include "./centipede/runner_interface.h" +#include "./centipede/runner_request.h" +#include "./centipede/runner_result.h" +#include "./centipede/runner_utils.h" +#include "./centipede/shared_memory_blob_sequence.h" +#include "./common/defs.h" + +__attribute__((weak)) extern fuzztest::internal::feature_t + __start___centipede_extra_features; +__attribute__((weak)) extern fuzztest::internal::feature_t + __stop___centipede_extra_features; + +namespace fuzztest::internal { +namespace { + +// Returns the length of the common prefix of `s1` and `s2`, but not more +// than 63. I.e. the returned value is in [0, 64). +size_t LengthOfCommonPrefix(const void *s1, const void *s2, size_t n) { + const auto *p1 = static_cast(s1); + const auto *p2 = static_cast(s2); + static constexpr size_t kMaxLen = 63; + if (n > kMaxLen) n = kMaxLen; + for (size_t i = 0; i < n; ++i) { + if (p1[i] != p2[i]) return i; + } + return n; +} + +class ThreadTerminationDetector { + public: + // A dummy method to trigger the construction and make sure that the + // destructor will be called on the thread termination. + __attribute__((optnone)) void EnsureAlive() {} + + ~ThreadTerminationDetector() { tls.OnThreadStop(); } +}; + +thread_local ThreadTerminationDetector termination_detector; + +} // namespace + +GlobalRunnerState state __attribute__((init_priority(200))); +// We use __thread instead of thread_local so that the compiler warns if +// the initializer for `tls` is not a constant expression. +// `tls` thus must not have a CTOR. +// This avoids calls to __tls_init() in hot functions that use `tls`. +__thread ThreadLocalRunnerState tls; + +void ThreadLocalRunnerState::TraceMemCmp(uintptr_t caller_pc, const uint8_t *s1, + const uint8_t *s2, size_t n, + bool is_equal) { + if (state.run_time_flags.use_cmp_features) { + const uintptr_t pc_offset = caller_pc - state.main_object.start_address; + const uintptr_t hash = + fuzztest::internal::Hash64Bits(pc_offset) ^ tls.path_ring_buffer.hash(); + const size_t lcp = LengthOfCommonPrefix(s1, s2, n); + // lcp is a 6-bit number. + state.cmp_feature_set.set((hash << 6) | lcp); + } + if (!is_equal && state.run_time_flags.use_auto_dictionary) { + cmp_traceN.Capture(n, s1, s2); + } +} + +void ThreadLocalRunnerState::OnThreadStart() { + termination_detector.EnsureAlive(); + tls.started = true; + tls.lowest_sp = tls.top_frame_sp = + reinterpret_cast(__builtin_frame_address(0)); + tls.stack_region_low = GetCurrentThreadStackRegionLow(); + if (tls.stack_region_low == 0) { + fprintf(stderr, + "Disabling stack limit check due to missing stack region info.\n"); + } + tls.call_stack.Reset(state.run_time_flags.callstack_level); + tls.path_ring_buffer.Reset(state.run_time_flags.path_level); + LockGuard lock(state.tls_list_mu); + // Add myself to state.tls_list. + auto *old_list = state.tls_list; + tls.next = old_list; + state.tls_list = &tls; + if (old_list != nullptr) old_list->prev = &tls; +} + +void ThreadLocalRunnerState::OnThreadStop() { + LockGuard lock(state.tls_list_mu); + // Remove myself from state.tls_list. The list never + // becomes empty because the main thread does not call OnThreadStop(). + if (&tls == state.tls_list) { + state.tls_list = tls.next; + tls.prev = nullptr; + } else { + auto *prev_tls = tls.prev; + auto *next_tls = tls.next; + prev_tls->next = next_tls; + if (next_tls != nullptr) next_tls->prev = prev_tls; + } + tls.next = tls.prev = nullptr; + if (tls.ignore) return; + // Create a detached copy on heap and add it to detached_tls_list to + // collect its coverage later. + // + // TODO(xinhaoyuan): Consider refactoring the list operations into class + // methods instead of duplicating them. + ThreadLocalRunnerState *detached_tls = new ThreadLocalRunnerState(tls); + auto *old_list = state.detached_tls_list; + detached_tls->next = old_list; + state.detached_tls_list = detached_tls; + if (old_list != nullptr) old_list->prev = detached_tls; +} + +static size_t GetPeakRSSMb() { + struct rusage usage = {}; + if (getrusage(RUSAGE_SELF, &usage) != 0) return 0; +#ifdef __APPLE__ + // On MacOS, the unit seems to be byte according to experiment, while some + // documents mentioned KiB. This could depend on OS variants. + return usage.ru_maxrss >> 20; +#else // __APPLE__ + // On Linux, ru_maxrss is in KiB + return usage.ru_maxrss >> 10; +#endif // __APPLE__ +} + +// Returns the current time in microseconds. +static uint64_t TimeInUsec() { + struct timeval tv = {}; + constexpr size_t kUsecInSec = 1000000; + gettimeofday(&tv, nullptr); + return tv.tv_sec * kUsecInSec + tv.tv_usec; +} + +static void CheckWatchdogLimits() { + const uint64_t curr_time = time(nullptr); + struct Resource { + const char *what; + const char *units; + uint64_t value; + uint64_t limit; + bool ignore_report; + const char *failure; + }; + const uint64_t input_start_time = state.input_start_time; + const uint64_t batch_start_time = state.batch_start_time; + if (input_start_time == 0 || batch_start_time == 0) return; + const Resource resources[] = { + {Resource{ + /*what=*/"Per-input timeout", + /*units=*/"sec", + /*value=*/curr_time - input_start_time, + /*limit=*/state.run_time_flags.timeout_per_input, + /*ignore_report=*/state.run_time_flags.ignore_timeout_reports != 0, + /*failure=*/kExecutionFailurePerInputTimeout.data(), + }}, + {Resource{ + /*what=*/"Per-batch timeout", + /*units=*/"sec", + /*value=*/curr_time - batch_start_time, + /*limit=*/state.run_time_flags.timeout_per_batch, + /*ignore_report=*/state.run_time_flags.ignore_timeout_reports != 0, + /*failure=*/kExecutionFailurePerBatchTimeout.data(), + }}, + {Resource{ + /*what=*/"RSS limit", + /*units=*/"MB", + /*value=*/GetPeakRSSMb(), + /*limit=*/state.run_time_flags.rss_limit_mb, + /*ignore_report=*/false, + /*failure=*/kExecutionFailureRssLimitExceeded.data(), + }}, + }; + for (const auto &resource : resources) { + if (resource.limit != 0 && resource.value > resource.limit) { + // Allow only one invocation to handle a failure: needed because we call + // this function periodically in `WatchdogThread()`, but also call it in + // `RunOneInput()` after all the work is done. + static std::atomic already_handling_failure = false; + if (!already_handling_failure.exchange(true)) { + if (resource.ignore_report) { + fprintf(stderr, + "========= %s exceeded: %" PRIu64 " > %" PRIu64 + " (%s); exiting without reporting as an error\n", + resource.what, resource.value, resource.limit, + resource.units); + std::_Exit(0); + // should not return here. + } + fprintf(stderr, + "========= %s exceeded: %" PRIu64 " > %" PRIu64 + " (%s); exiting\n", + resource.what, resource.value, resource.limit, resource.units); + fprintf( + stderr, + "==============================================================" + "===\n" + "=== BUG FOUND!\n The %s is set to %" PRIu64 + " (%s), but it exceeded %" PRIu64 + ".\n" + "Find out how to adjust the resource limits at " + "https://github.com/google/fuzztest/tree/main/doc/flags-reference.md" + "\n", + resource.what, resource.limit, resource.units, resource.value); + CentipedeSetFailureDescription(resource.failure); + std::abort(); + } + } + } +} + +// Watchdog thread. Periodically checks if it's time to abort due to a +// timeout/OOM. +[[noreturn]] static void *WatchdogThread(void *unused) { + tls.ignore = true; + state.watchdog_thread_started = true; + while (true) { + sleep(1); + + // No calls to ResetInputTimer() yet: input execution hasn't started. + if (state.input_start_time == 0) continue; + + CheckWatchdogLimits(); + } +} + +__attribute__((noinline)) void CheckStackLimit(uintptr_t sp) { + static std::atomic_flag stack_limit_exceeded = ATOMIC_FLAG_INIT; + const size_t stack_limit = state.run_time_flags.stack_limit_kb.load() << 10; + // Check for the stack limit only if sp is inside the stack region. + if (stack_limit > 0 && tls.stack_region_low && + tls.top_frame_sp - sp > stack_limit) { + const bool test_not_running = state.input_start_time == 0; + if (test_not_running) return; + if (stack_limit_exceeded.test_and_set()) return; + fprintf(stderr, + "========= Stack limit exceeded: %" PRIuPTR + " > %zu" + " (byte); aborting\n", + tls.top_frame_sp - sp, stack_limit); + CentipedeSetFailureDescription( + fuzztest::internal::kExecutionFailureStackLimitExceeded.data()); + std::abort(); + } +} + +void GlobalRunnerState::CleanUpDetachedTls() { + LockGuard lock(tls_list_mu); + ThreadLocalRunnerState *it_next = nullptr; + for (auto *it = detached_tls_list; it; it = it_next) { + it_next = it->next; + delete it; + } + detached_tls_list = nullptr; +} + +void GlobalRunnerState::StartWatchdogThread() { + fprintf(stderr, + "Starting watchdog thread: timeout_per_input: %" PRIu64 + " sec; timeout_per_batch: %" PRIu64 " sec; rss_limit_mb: %" PRIu64 + " MB; stack_limit_kb: %" PRIu64 " KB\n", + state.run_time_flags.timeout_per_input.load(), + state.run_time_flags.timeout_per_batch, + state.run_time_flags.rss_limit_mb.load(), + state.run_time_flags.stack_limit_kb.load()); + pthread_t watchdog_thread; + pthread_create(&watchdog_thread, nullptr, WatchdogThread, nullptr); + pthread_detach(watchdog_thread); + // Wait until the watchdog actually starts and initializes itself. + while (!state.watchdog_thread_started) { + sleep(0); + } +} + +void GlobalRunnerState::ResetTimers() { + const auto curr_time = time(nullptr); + input_start_time = curr_time; + // batch_start_time is set only once -- just before the first input of the + // batch is about to start running. + if (batch_start_time == 0) { + batch_start_time = curr_time; + } +} + +// Byte array mutation fallback for a custom mutator, as defined here: +// https://github.com/google/fuzzing/blob/master/docs/structure-aware-fuzzing.md +extern "C" __attribute__((weak)) size_t +CentipedeLLVMFuzzerMutateCallback(uint8_t *data, size_t size, size_t max_size) { + // TODO(kcc): [as-needed] fix the interface mismatch. + // LLVMFuzzerMutate is an array-based interface (for compatibility reasons) + // while ByteArray has a vector-based interface. + // This incompatibility causes us to do extra allocate/copy per mutation. + // It may not cause big problems in practice though. + if (max_size == 0) return 0; // just in case, not expected to happen. + if (size == 0) { + // Don't mutate empty data, just return a 1-byte result. + data[0] = 0; + return 1; + } + + ByteArray array(data, data + size); + state.byte_array_mutator->set_max_len(max_size); + state.byte_array_mutator->Mutate(array); + if (array.size() > max_size) { + array.resize(max_size); + } + memcpy(data, array.data(), array.size()); + return array.size(); +} + +extern "C" size_t LLVMFuzzerMutate(uint8_t *data, size_t size, + size_t max_size) { + return CentipedeLLVMFuzzerMutateCallback(data, size, max_size); +} + +// An arbitrary large size for input data. +static const size_t kMaxDataSize = 1 << 20; + +static void WriteFeaturesToFile(FILE *file, const feature_t *features, + size_t size) { + if (!size) return; + auto bytes_written = fwrite(features, 1, sizeof(features[0]) * size, file); + PrintErrorAndExitIf(bytes_written != size * sizeof(features[0]), + "wrong number of bytes written for coverage"); +} + +// Clears all coverage data. +// All bitsets, counter arrays and such need to be clear before every execution. +// However, clearing them is expensive because they are sparse. +// Instead, we rely on ForEachNonZeroByte() and +// ConcurrentBitSet::ForEachNonZeroBit to clear the bits/bytes after they +// finish iterating. +// We still need to clear all the thread-local data updated during execution. +// If `full_clear==true` clear all coverage anyway - useful to remove the +// coverage accumulated during startup. +__attribute__((noinline)) // so that we see it in profile. +static void +PrepareCoverage(bool full_clear) { + state.CleanUpDetachedTls(); + if (state.run_time_flags.path_level != 0) { + state.ForEachTls([](ThreadLocalRunnerState &tls) { + tls.path_ring_buffer.Reset(state.run_time_flags.path_level); + tls.call_stack.Reset(state.run_time_flags.callstack_level); + tls.lowest_sp = tls.top_frame_sp; + }); + } + { + fuzztest::internal::LockGuard lock(state.execution_result_override_mu); + if (state.execution_result_override != nullptr) { + state.execution_result_override->ClearAndResize(0); + } + } + if (!full_clear) return; + state.ForEachTls([](ThreadLocalRunnerState &tls) { + if (state.run_time_flags.use_auto_dictionary) { + tls.cmp_trace2.Clear(); + tls.cmp_trace4.Clear(); + tls.cmp_trace8.Clear(); + tls.cmp_traceN.Clear(); + } + }); + state.pc_counter_set.ForEachNonZeroByte( + [](size_t idx, uint8_t value) {}, 0, + state.actual_pc_counter_set_size_aligned); + if (state.run_time_flags.use_dataflow_features) + state.data_flow_feature_set.ForEachNonZeroBit([](size_t idx) {}); + if (state.run_time_flags.use_cmp_features) { + state.cmp_feature_set.ForEachNonZeroBit([](size_t idx) {}); + state.cmp_eq_set.ForEachNonZeroBit([](size_t idx) {}); + state.cmp_moddiff_set.ForEachNonZeroBit([](size_t idx) {}); + state.cmp_hamming_set.ForEachNonZeroBit([](size_t idx) {}); + state.cmp_difflog_set.ForEachNonZeroBit([](size_t idx) {}); + } + if (state.run_time_flags.path_level != 0) + state.path_feature_set.ForEachNonZeroBit([](size_t idx) {}); + if (state.run_time_flags.callstack_level != 0) + state.callstack_set.ForEachNonZeroBit([](size_t idx) {}); + for (auto *p = state.user_defined_begin; p != state.user_defined_end; ++p) { + *p = 0; + } + state.sancov_objects.ClearInlineCounters(); +} + +static void MaybeAddFeature(feature_t feature) { + if (!state.run_time_flags.skip_seen_features) { + state.g_features.push_back(feature); + } else if (!state.seen_features.get(feature)) { + state.g_features.push_back(feature); + state.seen_features.set(feature); + } +} + +// Adds a kPCs and/or k8bitCounters feature to `g_features` based on arguments. +// `idx` is a pc_index. +// `counter_value` (non-zero) is a counter value associated with that PC. +static void AddPcIndxedAndCounterToFeatures(size_t idx, uint8_t counter_value) { + if (state.run_time_flags.use_pc_features) { + MaybeAddFeature(feature_domains::kPCs.ConvertToMe(idx)); + } + if (state.run_time_flags.use_counter_features) { + MaybeAddFeature(feature_domains::k8bitCounters.ConvertToMe( + Convert8bitCounterToNumber(idx, counter_value))); + } +} + +// Post-processes all coverage data, puts it all into `g_features`. +// `target_return_value` is the value returned by LLVMFuzzerTestOneInput. +// +// If `target_return_value == -1`, sets `g_features` to empty. This way, +// the engine will reject any input that causes the target to return -1. +// LibFuzzer supports this return value as of 2022-07: +// https://llvm.org/docs/LibFuzzer.html#rejecting-unwanted-inputs +__attribute__((noinline)) // so that we see it in profile. +static void +PostProcessCoverage(int target_return_value) { + state.g_features.clear(); + + if (target_return_value == -1) return; + + // Convert counters to features. + state.pc_counter_set.ForEachNonZeroByte( + [](size_t idx, uint8_t value) { + AddPcIndxedAndCounterToFeatures(idx, value); + }, + 0, state.actual_pc_counter_set_size_aligned); + + // Convert data flow bit set to features. + if (state.run_time_flags.use_dataflow_features) { + state.data_flow_feature_set.ForEachNonZeroBit([](size_t idx) { + MaybeAddFeature(feature_domains::kDataFlow.ConvertToMe(idx)); + }); + } + + // Convert cmp bit set to features. + if (state.run_time_flags.use_cmp_features) { + // TODO(kcc): remove cmp_feature_set. + state.cmp_feature_set.ForEachNonZeroBit([](size_t idx) { + MaybeAddFeature(feature_domains::kCMP.ConvertToMe(idx)); + }); + state.cmp_eq_set.ForEachNonZeroBit([](size_t idx) { + MaybeAddFeature(feature_domains::kCMPEq.ConvertToMe(idx)); + }); + state.cmp_moddiff_set.ForEachNonZeroBit([](size_t idx) { + MaybeAddFeature(feature_domains::kCMPModDiff.ConvertToMe(idx)); + }); + state.cmp_hamming_set.ForEachNonZeroBit([](size_t idx) { + MaybeAddFeature(feature_domains::kCMPHamming.ConvertToMe(idx)); + }); + state.cmp_difflog_set.ForEachNonZeroBit([](size_t idx) { + MaybeAddFeature(feature_domains::kCMPDiffLog.ConvertToMe(idx)); + }); + } + + // Convert path bit set to features. + if (state.run_time_flags.path_level != 0) { + state.path_feature_set.ForEachNonZeroBit([](size_t idx) { + MaybeAddFeature(feature_domains::kBoundedPath.ConvertToMe(idx)); + }); + } + + // Iterate all threads and get features from TLS data. + state.ForEachTls([](ThreadLocalRunnerState &tls) { + if (state.run_time_flags.callstack_level != 0) { + RunnerCheck(tls.top_frame_sp >= tls.lowest_sp, + "bad values of tls.top_frame_sp and tls.lowest_sp"); + size_t sp_diff = tls.top_frame_sp - tls.lowest_sp; + MaybeAddFeature(feature_domains::kCallStack.ConvertToMe(sp_diff)); + } + }); + + if (state.run_time_flags.callstack_level != 0) { + state.callstack_set.ForEachNonZeroBit([](size_t idx) { + MaybeAddFeature(feature_domains::kCallStack.ConvertToMe(idx)); + }); + } + + // Copy the features from __centipede_extra_features to g_features. + // Zero features are ignored - we treat them as default (unset) values. + for (auto *p = state.user_defined_begin; p != state.user_defined_end; ++p) { + if (auto user_feature = *p) { + // User domain ID is upper 32 bits + feature_t user_domain_id = user_feature >> 32; + // User feature ID is lower 32 bits. + feature_t user_feature_id = user_feature & ((1ULL << 32) - 1); + // There is no hard guarantee how many user domains are actually + // available. If a user domain ID is out of range, alias it to an existing + // domain. This is kinder than silently dropping the feature. + user_domain_id %= std::size(feature_domains::kUserDomains); + MaybeAddFeature(feature_domains::kUserDomains[user_domain_id].ConvertToMe( + user_feature_id)); + *p = 0; // cleanup for the next iteration. + } + } + + // Iterates all non-zero inline 8-bit counters, if they are present. + // Calls AddPcIndxedAndCounterToFeatures on non-zero counters and zeroes them. + if (state.run_time_flags.use_pc_features || + state.run_time_flags.use_counter_features) { + state.sancov_objects.ForEachNonZeroInlineCounter( + [](size_t idx, uint8_t counter_value) { + AddPcIndxedAndCounterToFeatures(idx, counter_value); + }); + } +} + +void RunnerCallbacks::GetSeeds(std::function seed_callback) { + seed_callback({0}); +} + +std::string RunnerCallbacks::GetSerializedTargetConfig() { return ""; } + +bool RunnerCallbacks::Mutate( + const std::vector & /*inputs*/, size_t /*num_mutants*/, + std::function /*new_mutant_callback*/) { + RunnerCheck(!HasCustomMutator(), + "Class deriving from RunnerCallbacks must implement Mutate() if " + "HasCustomMutator() returns true."); + return true; +} + +class LegacyRunnerCallbacks : public RunnerCallbacks { + public: + LegacyRunnerCallbacks(FuzzerTestOneInputCallback test_one_input_cb, + FuzzerCustomMutatorCallback custom_mutator_cb, + FuzzerCustomCrossOverCallback custom_crossover_cb) + : test_one_input_cb_(test_one_input_cb), + custom_mutator_cb_(custom_mutator_cb), + custom_crossover_cb_(custom_crossover_cb) {} + + bool Execute(ByteSpan input) override { + PrintErrorAndExitIf(test_one_input_cb_ == nullptr, + "missing test_on_input_cb"); + const int retval = test_one_input_cb_(input.data(), input.size()); + PrintErrorAndExitIf( + retval != -1 && retval != 0, + "test_on_input_cb returns invalid value other than -1 and 0"); + return retval == 0; + } + + bool HasCustomMutator() const override { + return custom_mutator_cb_ != nullptr; + } + + bool Mutate(const std::vector &inputs, size_t num_mutants, + std::function new_mutant_callback) override; + + private: + FuzzerTestOneInputCallback test_one_input_cb_; + FuzzerCustomMutatorCallback custom_mutator_cb_; + FuzzerCustomCrossOverCallback custom_crossover_cb_; +}; + +std::unique_ptr CreateLegacyRunnerCallbacks( + FuzzerTestOneInputCallback test_one_input_cb, + FuzzerCustomMutatorCallback custom_mutator_cb, + FuzzerCustomCrossOverCallback custom_crossover_cb) { + return std::make_unique( + test_one_input_cb, custom_mutator_cb, custom_crossover_cb); +} + +static void RunOneInput(const uint8_t *data, size_t size, + RunnerCallbacks &callbacks) { + state.stats = {}; + size_t last_time_usec = 0; + auto UsecSinceLast = [&last_time_usec]() { + uint64_t t = TimeInUsec(); + uint64_t ret_val = t - last_time_usec; + last_time_usec = t; + return ret_val; + }; + UsecSinceLast(); + PrepareCoverage(/*full_clear=*/false); + state.stats.prep_time_usec = UsecSinceLast(); + state.ResetTimers(); + int target_return_value = callbacks.Execute({data, size}) ? 0 : -1; + state.stats.exec_time_usec = UsecSinceLast(); + CheckWatchdogLimits(); + if (fuzztest::internal::state.input_start_time.exchange(0) != 0) { + PostProcessCoverage(target_return_value); + } + state.stats.post_time_usec = UsecSinceLast(); + state.stats.peak_rss_mb = GetPeakRSSMb(); +} + +template +static std::vector ReadBytesFromFilePath(const char *input_path) { + FILE *input_file = fopen(input_path, "r"); + RunnerCheck(input_file != nullptr, "can't open the input file"); + struct stat statbuf = {}; + RunnerCheck(fstat(fileno(input_file), &statbuf) == 0, "fstat failed"); + size_t size_in_bytes = statbuf.st_size; + RunnerCheck(size_in_bytes != 0, "empty file"); + RunnerCheck((size_in_bytes % sizeof(Type)) == 0, + "file size is not multiple of the type size"); + std::vector data(size_in_bytes / sizeof(Type)); + auto num_bytes_read = fread(data.data(), 1, size_in_bytes, input_file); + RunnerCheck(num_bytes_read == size_in_bytes, "read failed"); + RunnerCheck(fclose(input_file) == 0, "fclose failed"); + return data; +} + +// Runs one input provided in file `input_path`. +// Produces coverage data in file `input_path`-features. +__attribute__((noinline)) // so that we see it in profile. +static void +ReadOneInputExecuteItAndDumpCoverage(const char *input_path, + RunnerCallbacks &callbacks) { + // Read the input. + auto data = ReadBytesFromFilePath(input_path); + + RunOneInput(data.data(), data.size(), callbacks); + + // Dump features to a file. + char features_file_path[PATH_MAX]; + snprintf(features_file_path, sizeof(features_file_path), "%s-features", + input_path); + FILE *features_file = fopen(features_file_path, "w"); + PrintErrorAndExitIf(features_file == nullptr, "can't open coverage file"); + WriteFeaturesToFile(features_file, state.g_features.data(), + state.g_features.size()); + fclose(features_file); +} + +// Calls ExecutionMetadata::AppendCmpEntry for every CMP arg pair +// found in `cmp_trace`. +// Returns true if all appending succeeded. +// "noinline" so that we see it in a profile, if it becomes hot. +template +__attribute__((noinline)) bool AppendCmpEntries(CmpTrace &cmp_trace, + ExecutionMetadata &metadata) { + bool append_failed = false; + cmp_trace.ForEachNonZero( + [&](uint8_t size, const uint8_t *v0, const uint8_t *v1) { + if (!metadata.AppendCmpEntry({v0, size}, {v1, size})) + append_failed = true; + }); + return !append_failed; +} + +// Starts sending the outputs (coverage, etc.) to `outputs_blobseq`. +// Returns true on success. +static bool StartSendingOutputsToEngine(BlobSequence &outputs_blobseq) { + return BatchResult::WriteInputBegin(outputs_blobseq); +} + +// Copy all the `g_features` to `data` with given `capacity` in bytes. +// Returns the byte size of `g_features`. +static size_t CopyFeatures(uint8_t *data, size_t capacity) { + const size_t features_len_in_bytes = + state.g_features.size() * sizeof(feature_t); + if (features_len_in_bytes > capacity) return 0; + memcpy(data, state.g_features.data(), features_len_in_bytes); + return features_len_in_bytes; +} + +// Finishes sending the outputs (coverage, etc.) to `outputs_blobseq`. +// Returns true on success. +static bool FinishSendingOutputsToEngine(BlobSequence &outputs_blobseq) { + { + LockGuard lock(state.execution_result_override_mu); + bool has_overridden_execution_result = false; + if (state.execution_result_override != nullptr) { + RunnerCheck(state.execution_result_override->results().size() <= 1, + "unexpected number of overridden execution results"); + has_overridden_execution_result = + state.execution_result_override->results().size() == 1; + } + if (has_overridden_execution_result) { + const auto &result = state.execution_result_override->results()[0]; + return BatchResult::WriteOneFeatureVec(result.features().data(), + result.features().size(), + outputs_blobseq) && + BatchResult::WriteMetadata(result.metadata(), outputs_blobseq) && + BatchResult::WriteStats(result.stats(), outputs_blobseq) && + BatchResult::WriteInputEnd(outputs_blobseq); + } + } + + // Copy features to shared memory. + if (!BatchResult::WriteOneFeatureVec( + state.g_features.data(), state.g_features.size(), outputs_blobseq)) { + return false; + } + + ExecutionMetadata metadata; + // Copy the CMP traces to shared memory. + if (state.run_time_flags.use_auto_dictionary) { + bool append_failed = false; + state.ForEachTls([&metadata, &append_failed](ThreadLocalRunnerState &tls) { + if (!AppendCmpEntries(tls.cmp_trace2, metadata)) append_failed = true; + if (!AppendCmpEntries(tls.cmp_trace4, metadata)) append_failed = true; + if (!AppendCmpEntries(tls.cmp_trace8, metadata)) append_failed = true; + if (!AppendCmpEntries(tls.cmp_traceN, metadata)) append_failed = true; + }); + if (append_failed) return false; + } + if (!BatchResult::WriteMetadata(metadata, outputs_blobseq)) return false; + + // Write the stats. + if (!BatchResult::WriteStats(state.stats, outputs_blobseq)) return false; + // We are done with this input. + if (!BatchResult::WriteInputEnd(outputs_blobseq)) return false; + return true; +} + +// Handles an ExecutionRequest, see RequestExecution(). Reads inputs from +// `inputs_blobseq`, runs them, saves coverage features to `outputs_blobseq`. +// Returns EXIT_SUCCESS on success and EXIT_FAILURE otherwise. +static int ExecuteInputsFromShmem(BlobSequence &inputs_blobseq, + BlobSequence &outputs_blobseq, + RunnerCallbacks &callbacks) { + size_t num_inputs = 0; + if (!IsExecutionRequest(inputs_blobseq.Read())) return EXIT_FAILURE; + if (!IsNumInputs(inputs_blobseq.Read(), num_inputs)) return EXIT_FAILURE; + + CentipedeBeginExecutionBatch(); + + for (size_t i = 0; i < num_inputs; i++) { + auto blob = inputs_blobseq.Read(); + // TODO(kcc): distinguish bad input from end of stream. + if (!blob.IsValid()) return EXIT_SUCCESS; // no more blobs to read. + if (!IsDataInput(blob)) return EXIT_FAILURE; + + // TODO(kcc): [impl] handle sizes larger than kMaxDataSize. + size_t size = std::min(kMaxDataSize, blob.size); + // Copy from blob to data so that to not pass the shared memory further. + std::vector data(blob.data, blob.data + size); + + // Starting execution of one more input. + if (!StartSendingOutputsToEngine(outputs_blobseq)) break; + + RunOneInput(data.data(), data.size(), callbacks); + + if (!FinishSendingOutputsToEngine(outputs_blobseq)) break; + } + + CentipedeEndExecutionBatch(); + + return EXIT_SUCCESS; +} + +// Dumps the pc table to `output_path`. +// Requires that state.main_object is already computed. +static void DumpPcTable(const char *absl_nonnull output_path) { + PrintErrorAndExitIf(!state.main_object.IsSet(), "main_object is not set"); + FILE *output_file = fopen(output_path, "w"); + PrintErrorAndExitIf(output_file == nullptr, "can't open output file"); + std::vector pcs = state.sancov_objects.CreatePCTable(); + // Dump the pc table. + const auto data_size_in_bytes = pcs.size() * sizeof(PCInfo); + auto num_bytes_written = + fwrite(pcs.data(), 1, data_size_in_bytes, output_file); + PrintErrorAndExitIf(num_bytes_written != data_size_in_bytes, + "wrong number of bytes written for pc table"); + fclose(output_file); +} + +// Dumps the control-flow table to `output_path`. +// Requires that state.main_object is already computed. +static void DumpCfTable(const char *absl_nonnull output_path) { + PrintErrorAndExitIf(!state.main_object.IsSet(), "main_object is not set"); + FILE *output_file = fopen(output_path, "w"); + PrintErrorAndExitIf(output_file == nullptr, "can't open output file"); + std::vector data = state.sancov_objects.CreateCfTable(); + size_t data_size_in_bytes = data.size() * sizeof(data[0]); + // Dump the table. + auto num_bytes_written = + fwrite(data.data(), 1, data_size_in_bytes, output_file); + PrintErrorAndExitIf(num_bytes_written != data_size_in_bytes, + "wrong number of bytes written for cf table"); + fclose(output_file); +} + +// Dumps a DsoTable as a text file. Each line contains the file path and the +// number of instrumented PCs. +static void DumpDsoTable(const char *absl_nonnull output_path) { + FILE *output_file = fopen(output_path, "w"); + RunnerCheck(output_file != nullptr, "DumpDsoTable: can't open output file"); + DsoTable dso_table = state.sancov_objects.CreateDsoTable(); + for (const auto &entry : dso_table) { + fprintf(output_file, "%s %zd\n", entry.path.c_str(), + entry.num_instrumented_pcs); + } + fclose(output_file); +} + +// Dumps seed inputs to `output_dir`. Also see `GetSeedsViaExternalBinary()`. +static void DumpSeedsToDir(RunnerCallbacks &callbacks, const char *output_dir) { + size_t seed_index = 0; + callbacks.GetSeeds([&](ByteSpan seed) { + // Cap seed index within 9 digits. If this was triggered, the dumping would + // take forever.. + if (seed_index >= 1000000000) return; + char seed_path_buf[PATH_MAX]; + const size_t num_path_chars = + snprintf(seed_path_buf, PATH_MAX, "%s/%09lu", output_dir, seed_index); + PrintErrorAndExitIf(num_path_chars >= PATH_MAX, + "seed path reaches PATH_MAX"); + FILE *output_file = fopen(seed_path_buf, "w"); + const size_t num_bytes_written = + fwrite(seed.data(), 1, seed.size(), output_file); + PrintErrorAndExitIf(num_bytes_written != seed.size(), + "wrong number of bytes written for cf table"); + fclose(output_file); + ++seed_index; + }); +} + +// Dumps serialized target config to `output_file_path`. Also see +// `GetSerializedTargetConfigViaExternalBinary()`. +static void DumpSerializedTargetConfigToFile(RunnerCallbacks &callbacks, + const char *output_file_path) { + const std::string config = callbacks.GetSerializedTargetConfig(); + FILE *output_file = fopen(output_file_path, "w"); + const size_t num_bytes_written = + fwrite(config.data(), 1, config.size(), output_file); + PrintErrorAndExitIf( + num_bytes_written != config.size(), + "wrong number of bytes written for serialized target configuration"); + fclose(output_file); +} + +// Returns a random seed. No need for a more sophisticated seed. +// TODO(kcc): [as-needed] optionally pass an external seed. +static unsigned GetRandomSeed() { return time(nullptr); } + +// Handles a Mutation Request, see RequestMutation(). +// Mutates inputs read from `inputs_blobseq`, +// writes the mutants to `outputs_blobseq` +// Returns EXIT_SUCCESS on success and EXIT_FAILURE on failure +// so that main() can return its result. +// If both `custom_mutator_cb` and `custom_crossover_cb` are nullptr, +// returns EXIT_FAILURE. +// +// TODO(kcc): [impl] make use of custom_crossover_cb, if available. +static int MutateInputsFromShmem(BlobSequence &inputs_blobseq, + BlobSequence &outputs_blobseq, + RunnerCallbacks &callbacks) { + // Read max_num_mutants. + size_t num_mutants = 0; + size_t num_inputs = 0; + if (!IsMutationRequest(inputs_blobseq.Read())) return EXIT_FAILURE; + if (!IsNumMutants(inputs_blobseq.Read(), num_mutants)) return EXIT_FAILURE; + if (!IsNumInputs(inputs_blobseq.Read(), num_inputs)) return EXIT_FAILURE; + + // Mutation input with ownership. + struct MutationInput { + ByteArray data; + ExecutionMetadata metadata; + }; + // TODO(kcc): unclear if we can continue using std::vector (or other STL) + // in the runner. But for now use std::vector. + // Collect the inputs into a vector. We copy them instead of using pointers + // into shared memory so that the user code doesn't touch the shared memory. + std::vector inputs; + inputs.reserve(num_inputs); + std::vector input_refs; + input_refs.reserve(num_inputs); + for (size_t i = 0; i < num_inputs; ++i) { + // If inputs_blobseq have overflown in the engine, we still want to + // handle the first few inputs. + ExecutionMetadata metadata; + if (!IsExecutionMetadata(inputs_blobseq.Read(), metadata)) { + break; + } + auto blob = inputs_blobseq.Read(); + if (!IsDataInput(blob)) break; + inputs.push_back( + MutationInput{/*data=*/ByteArray{blob.data, blob.data + blob.size}, + /*metadata=*/std::move(metadata)}); + input_refs.push_back( + MutationInputRef{/*data=*/inputs.back().data, + /*metadata=*/&inputs.back().metadata}); + } + + if (!inputs.empty()) { + state.byte_array_mutator->SetMetadata(inputs[0].metadata); + } + + if (!MutationResult::WriteHasCustomMutator(callbacks.HasCustomMutator(), + outputs_blobseq)) { + return EXIT_FAILURE; + } + if (!callbacks.HasCustomMutator()) return EXIT_SUCCESS; + + if (!callbacks.Mutate(input_refs, num_mutants, [&](ByteSpan mutant) { + MutationResult::WriteMutant(mutant, outputs_blobseq); + })) { + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + +bool LegacyRunnerCallbacks::Mutate( + const std::vector &inputs, size_t num_mutants, + std::function new_mutant_callback) { + if (custom_mutator_cb_ == nullptr) return false; + unsigned int seed = GetRandomSeed(); + const size_t num_inputs = inputs.size(); + const size_t max_mutant_size = state.run_time_flags.max_len; + constexpr size_t kAverageMutationAttempts = 2; + ByteArray mutant(max_mutant_size); + for (size_t attempt = 0, num_outputs = 0; + attempt < num_mutants * kAverageMutationAttempts && + num_outputs < num_mutants; + ++attempt) { + const auto &input_data = inputs[rand_r(&seed) % num_inputs].data; + + size_t size = std::min(input_data.size(), max_mutant_size); + std::copy(input_data.cbegin(), input_data.cbegin() + size, mutant.begin()); + size_t new_size = 0; + if ((custom_crossover_cb_ != nullptr) && + rand_r(&seed) % 100 < state.run_time_flags.crossover_level) { + // Perform crossover `crossover_level`% of the time. + const auto &other_data = inputs[rand_r(&seed) % num_inputs].data; + new_size = custom_crossover_cb_( + input_data.data(), input_data.size(), other_data.data(), + other_data.size(), mutant.data(), max_mutant_size, rand_r(&seed)); + } else { + new_size = custom_mutator_cb_(mutant.data(), size, max_mutant_size, + rand_r(&seed)); + } + if (new_size == 0) continue; + new_mutant_callback({mutant.data(), new_size}); + ++num_outputs; + } + return true; +} + +// Returns the current process VmSize, in bytes. +static size_t GetVmSizeInBytes() { + FILE *f = fopen("/proc/self/statm", "r"); // man proc + if (!f) return 0; + size_t vm_size = 0; + // NOTE: Ignore any (unlikely) failures to suppress a compiler warning. + (void)fscanf(f, "%zd", &vm_size); + fclose(f); + return vm_size * getpagesize(); // proc gives VmSize in pages. +} + +// Sets RLIMIT_CORE, RLIMIT_AS +static void SetLimits() { + // Disable core dumping. + struct rlimit core_limits; + getrlimit(RLIMIT_CORE, &core_limits); + core_limits.rlim_cur = 0; + core_limits.rlim_max = 0; + setrlimit(RLIMIT_CORE, &core_limits); + + // ASAN/TSAN/MSAN can not be used with RLIMIT_AS. + // We get the current VmSize, if it is greater than 1Tb, we assume we + // are running under one of ASAN/TSAN/MSAN and thus cannot use RLIMIT_AS. + constexpr size_t one_tb = 1ULL << 40; + size_t vm_size_in_bytes = GetVmSizeInBytes(); + // Set the address-space limit (RLIMIT_AS). + // No-op under ASAN/TSAN/MSAN - those may still rely on rss_limit_mb. + if (vm_size_in_bytes < one_tb) { + size_t address_space_limit_mb = + state.HasIntFlag(":address_space_limit_mb=", 0); + if (address_space_limit_mb > 0) { + size_t limit_in_bytes = address_space_limit_mb << 20; + struct rlimit rlimit_as = {limit_in_bytes, limit_in_bytes}; + setrlimit(RLIMIT_AS, &rlimit_as); + } + } else { + fprintf(stderr, + "Not using RLIMIT_AS; " + "VmSize is %zdGb, suspecting ASAN/MSAN/TSAN\n", + vm_size_in_bytes >> 30); + } +} + +static void MaybePopulateReversePcTable() { + const char *pcs_file_path = state.GetStringFlag(":pcs_file_path="); + if (!pcs_file_path) return; + const auto pc_table = ReadBytesFromFilePath(pcs_file_path); + state.reverse_pc_table.SetFromPCs(pc_table); +} + +// Create a fake reference to ForkServerCallMeVeryEarly() here so that the +// fork server module is not dropped during linking. +// Alternatives are +// * Use -Wl,--whole-archive when linking with the runner archive. +// * Use -Wl,-u,ForkServerCallMeVeryEarly when linking with the runner archive. +// (requires ForkServerCallMeVeryEarly to be extern "C"). +// These alternatives require extra flags and are thus more fragile. +// We declare ForkServerCallMeVeryEarly() here instead of doing it in some +// header file, because we want to keep the fork server header-free. +extern void ForkServerCallMeVeryEarly(); +[[maybe_unused]] auto fake_reference_for_fork_server = + &ForkServerCallMeVeryEarly; +// Same for runner_sancov.cc. Avoids the following situation: +// * weak implementations of sancov callbacks are given in the command line +// before centipede.a. +// * linker sees them and decides to drop runner_sancov.o. +extern void RunnerSancov(); +[[maybe_unused]] auto fake_reference_for_runner_sancov = &RunnerSancov; +// Same for runner_interceptor.cc. +extern void RunnerInterceptor(); +[[maybe_unused]] auto fake_reference_for_runner_interceptor = + &RunnerInterceptor; + +GlobalRunnerState::GlobalRunnerState() { + // Make sure fork server is started if needed. + ForkServerCallMeVeryEarly(); + + // TODO(kcc): move some code from CentipedeRunnerMain() here so that it works + // even if CentipedeRunnerMain() is not called. + tls.OnThreadStart(); + state.StartWatchdogThread(); + + SetLimits(); + + // Compute main_object. + main_object = GetDlInfo(state.GetStringFlag(":dl_path_suffix=")); + if (!main_object.IsSet()) { + fprintf( + stderr, + "Failed to compute main_object. This may happen" + " e.g. when instrumented code is in a DSO opened later by dlopen()\n"); + } + + // Dump the binary info tables. + if (state.HasFlag(":dump_binary_info:")) { + RunnerCheck(state.arg1 && state.arg2 && state.arg3, + "dump_binary_info requires 3 arguments"); + if (!state.arg1 || !state.arg2 || !state.arg3) _exit(EXIT_FAILURE); + DumpPcTable(state.arg1); + DumpCfTable(state.arg2); + DumpDsoTable(state.arg3); + _exit(EXIT_SUCCESS); + } + + MaybePopulateReversePcTable(); + + // initialize the user defined section. + user_defined_begin = &__start___centipede_extra_features; + user_defined_end = &__stop___centipede_extra_features; + if (user_defined_begin && user_defined_end) { + fprintf( + stderr, + "section(\"__centipede_extra_features\") detected with %zd elements\n", + user_defined_end - user_defined_begin); + } +} + +GlobalRunnerState::~GlobalRunnerState() { + // The process is winding down, but CentipedeRunnerMain did not run. + // This means, the binary is standalone with its own main(), and we need to + // report the coverage now. + if (!state.centipede_runner_main_executed && state.HasFlag(":shmem:")) { + int exit_status = EXIT_SUCCESS; // TODO(kcc): do we know our exit status? + PostProcessCoverage(exit_status); + SharedMemoryBlobSequence outputs_blobseq(state.arg2); + StartSendingOutputsToEngine(outputs_blobseq); + FinishSendingOutputsToEngine(outputs_blobseq); + } + { + LockGuard lock(state.execution_result_override_mu); + if (state.execution_result_override != nullptr) { + delete state.execution_result_override; + state.execution_result_override = nullptr; + } + } + // Always clean up detached TLSs to avoid leakage. + CleanUpDetachedTls(); +} + +// If HasFlag(:shmem:), state.arg1 and state.arg2 are the names +// of in/out shared memory locations. +// Read inputs and write outputs via shared memory. +// +// Default: Execute ReadOneInputExecuteItAndDumpCoverage() for all inputs.// +// +// Note: argc/argv are used for only ReadOneInputExecuteItAndDumpCoverage(). +int RunnerMain(int argc, char **argv, RunnerCallbacks &callbacks) { + state.centipede_runner_main_executed = true; + + fprintf(stderr, "Centipede fuzz target runner; argv[0]: %s flags: %s\n", + argv[0], state.centipede_runner_flags); + + if (state.HasFlag(":dump_configuration:")) { + DumpSerializedTargetConfigToFile(callbacks, + /*output_file_path=*/state.arg1); + return EXIT_SUCCESS; + } + + if (state.HasFlag(":dump_seed_inputs:")) { + // Seed request. + DumpSeedsToDir(callbacks, /*output_dir=*/state.arg1); + return EXIT_SUCCESS; + } + + // Inputs / outputs from shmem. + if (state.HasFlag(":shmem:")) { + if (!state.arg1 || !state.arg2) return EXIT_FAILURE; + SharedMemoryBlobSequence inputs_blobseq(state.arg1); + SharedMemoryBlobSequence outputs_blobseq(state.arg2); + // Read the first blob. It indicates what further actions to take. + auto request_type_blob = inputs_blobseq.Read(); + if (IsMutationRequest(request_type_blob)) { + // Since we are mutating, no need to spend time collecting the coverage. + // We still pay for executing the coverage callbacks, but those will + // return immediately. + // TODO(kcc): do this more consistently, for all coverage types. + state.run_time_flags.use_cmp_features = false; + state.run_time_flags.use_pc_features = false; + state.run_time_flags.use_dataflow_features = false; + state.run_time_flags.use_counter_features = false; + // Mutation request. + inputs_blobseq.Reset(); + state.byte_array_mutator = + new ByteArrayMutator(state.knobs, GetRandomSeed()); + return MutateInputsFromShmem(inputs_blobseq, outputs_blobseq, callbacks); + } + if (IsExecutionRequest(request_type_blob)) { + // Execution request. + inputs_blobseq.Reset(); + return ExecuteInputsFromShmem(inputs_blobseq, outputs_blobseq, callbacks); + } + return EXIT_FAILURE; + } + + // By default, run every input file one-by-one. + for (int i = 1; i < argc; i++) { + ReadOneInputExecuteItAndDumpCoverage(argv[i], callbacks); + } + return EXIT_SUCCESS; +} + +} // namespace fuzztest::internal + +extern "C" int LLVMFuzzerRunDriver( + int *absl_nonnull argc, char ***absl_nonnull argv, + FuzzerTestOneInputCallback test_one_input_cb) { + if (LLVMFuzzerInitialize) LLVMFuzzerInitialize(argc, argv); + return RunnerMain(*argc, *argv, + *fuzztest::internal::CreateLegacyRunnerCallbacks( + test_one_input_cb, LLVMFuzzerCustomMutator, + LLVMFuzzerCustomCrossOver)); +} + +extern "C" __attribute__((used)) void CentipedeIsPresent() {} +extern "C" __attribute__((used)) void __libfuzzer_is_present() {} + +extern "C" void CentipedeSetRssLimit(size_t rss_limit_mb) { + fprintf(stderr, "CentipedeSetRssLimit: changing rss_limit_mb to %zu\n", + rss_limit_mb); + fuzztest::internal::state.run_time_flags.rss_limit_mb = rss_limit_mb; +} + +extern "C" void CentipedeSetStackLimit(size_t stack_limit_kb) { + fprintf(stderr, "CentipedeSetStackLimit: changing stack_limit_kb to %zu\n", + stack_limit_kb); + fuzztest::internal::state.run_time_flags.stack_limit_kb = stack_limit_kb; +} + +extern "C" void CentipedeSetTimeoutPerInput(uint64_t timeout_per_input) { + fprintf(stderr, + "CentipedeSetTimeoutPerInput: changing timeout_per_input to %" PRIu64 + "\n", + timeout_per_input); + fuzztest::internal::state.run_time_flags.timeout_per_input = + timeout_per_input; +} + +extern "C" __attribute__((weak)) const char *absl_nullable +CentipedeGetRunnerFlags() { + if (const char *runner_flags_env = getenv("CENTIPEDE_RUNNER_FLAGS")) + return strdup(runner_flags_env); + return nullptr; +} + +static std::atomic in_execution_batch = false; + +extern "C" void CentipedeBeginExecutionBatch() { + if (in_execution_batch) { + fprintf(stderr, + "CentipedeBeginExecutionBatch called twice without calling " + "CentipedeEndExecutionBatch in between\n"); + _exit(EXIT_FAILURE); + } + in_execution_batch = true; + fuzztest::internal::PrepareCoverage(/*full_clear=*/true); +} + +extern "C" void CentipedeEndExecutionBatch() { + if (!in_execution_batch) { + fprintf(stderr, + "CentipedeEndExecutionBatch called without calling " + "CentipedeBeginExecutionBatch before\n"); + _exit(EXIT_FAILURE); + } + in_execution_batch = false; + fuzztest::internal::state.input_start_time = 0; + fuzztest::internal::state.batch_start_time = 0; +} + +extern "C" void CentipedePrepareProcessing() { + fuzztest::internal::PrepareCoverage(/*full_clear=*/!in_execution_batch); + fuzztest::internal::state.ResetTimers(); +} + +extern "C" void CentipedeFinalizeProcessing() { + fuzztest::internal::CheckWatchdogLimits(); + if (fuzztest::internal::state.input_start_time.exchange(0) != 0) { + fuzztest::internal::PostProcessCoverage(/*target_return_value=*/0); + } +} + +extern "C" size_t CentipedeGetExecutionResult(uint8_t *data, size_t capacity) { + fuzztest::internal::BlobSequence outputs_blobseq(data, capacity); + if (!fuzztest::internal::StartSendingOutputsToEngine(outputs_blobseq)) + return 0; + if (!fuzztest::internal::FinishSendingOutputsToEngine(outputs_blobseq)) + return 0; + return outputs_blobseq.offset(); +} + +extern "C" size_t CentipedeGetCoverageData(uint8_t *data, size_t capacity) { + return fuzztest::internal::CopyFeatures(data, capacity); +} + +extern "C" void CentipedeSetExecutionResult(const uint8_t *data, size_t size) { + using fuzztest::internal::state; + fuzztest::internal::LockGuard lock(state.execution_result_override_mu); + if (!state.execution_result_override) + state.execution_result_override = new fuzztest::internal::BatchResult(); + state.execution_result_override->ClearAndResize(1); + if (data == nullptr) return; + // Removing const here should be fine as we don't write to `blobseq`. + fuzztest::internal::BlobSequence blobseq(const_cast(data), size); + state.execution_result_override->Read(blobseq); + fuzztest::internal::RunnerCheck( + state.execution_result_override->num_outputs_read() == 1, + "Failed to set execution result from CentipedeSetExecutionResult"); +} + +extern "C" void CentipedeSetFailureDescription(const char *description) { + using fuzztest::internal::state; + if (state.failure_description_path == nullptr) return; + // Make sure that the write is atomic and only happens once. + [[maybe_unused]] static int write_once = [=] { + FILE *f = fopen(state.failure_description_path, "w"); + if (f == nullptr) { + perror("FAILURE: fopen()"); + return 0; + } + const auto len = strlen(description); + if (fwrite(description, 1, len, f) != len) { + perror("FAILURE: fwrite()"); + } + if (fflush(f) != 0) { + perror("FAILURE: fflush()"); + } + if (fclose(f) != 0) { + perror("FAILURE: fclose()"); + } + return 0; + }(); +} diff --git a/src/third_party/fuzztest/dist/centipede/runner.h b/src/third_party/fuzztest/dist/centipede/runner.h new file mode 100644 index 00000000000..8e4ff8eff97 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner.h @@ -0,0 +1,369 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_RUNNER_H_ +#define THIRD_PARTY_CENTIPEDE_RUNNER_H_ + +#include // NOLINT: use pthread to avoid extra dependencies. +#include +#include + +#include +#include +#include +#include +#include + +#include "absl/base/const_init.h" +#include "absl/base/nullability.h" +#include "absl/numeric/bits.h" +#include "./centipede/byte_array_mutator.h" +#include "./centipede/callstack.h" +#include "./centipede/concurrent_bitset.h" +#include "./centipede/concurrent_byteset.h" +#include "./centipede/feature.h" +#include "./centipede/hashed_ring_buffer.h" +#include "./centipede/knobs.h" +#include "./centipede/reverse_pc_table.h" +#include "./centipede/runner_cmp_trace.h" +#include "./centipede/runner_dl_info.h" +#include "./centipede/runner_interface.h" +#include "./centipede/runner_result.h" +#include "./centipede/runner_sancov_object.h" + +namespace fuzztest::internal { + +// Like std::lock_guard, but for pthread_mutex_t. +class LockGuard { + public: + explicit LockGuard(pthread_mutex_t &mu) : mu_(mu) { pthread_mutex_lock(&mu); } + ~LockGuard() { pthread_mutex_unlock(&mu_); } + + private: + pthread_mutex_t &mu_; +}; + +// Flags derived from CENTIPEDE_RUNNER_FLAGS. +// Flags used in instrumentation callbacks are bit-packed for efficiency. +struct RunTimeFlags { + uint64_t path_level : 8; + uint64_t use_pc_features : 1; + uint64_t use_dataflow_features : 1; + uint64_t use_cmp_features : 1; + uint64_t callstack_level : 8; + uint64_t use_counter_features : 1; + uint64_t use_auto_dictionary : 1; + std::atomic timeout_per_input; + uint64_t timeout_per_batch; + std::atomic stack_limit_kb; + std::atomic rss_limit_mb; + uint64_t crossover_level; + uint64_t skip_seen_features : 1; + uint64_t ignore_timeout_reports : 1; + uint64_t max_len; +}; + +// One such object is created in runner's TLS. +// There is no CTOR, since we don't want to use the brittle and lazy TLS CTORs. +// All data members are zero-initialized during thread creation. +struct ThreadLocalRunnerState { + // Traces the memory comparison of `n` bytes at `s1` and `s2` called at + // `caller_pc` with `is_equal` indicating whether the two memory regions have + // equal contents. May add cmp features and auto-dictionary entries if + // enabled. + void TraceMemCmp(uintptr_t caller_pc, const uint8_t *s1, const uint8_t *s2, + size_t n, bool is_equal); + + // Intrusive doubly-linked list of TLS objects. + // Guarded by state.tls_list_mu. + ThreadLocalRunnerState *next, *prev; + + // The pthread_create() interceptor calls OnThreadStart() before the thread + // callback. The main thread also calls OnThreadStart(). OnThreadStop() will + // be called when thread termination is detected internally - see runner.cc. + void OnThreadStart(); + void OnThreadStop(); + + // Whether OnThreadStart() is called on this thread. This is used as a proxy + // of the readiness of the lower-level runtime. + bool started; + + // Paths are thread-local, so we maintain the current bounded path here. + // We allow paths of up to 100, controlled at run-time via the "path_level". + static constexpr uint64_t kBoundedPathLength = 100; + HashedRingBuffer path_ring_buffer; + + // Value of SP in the top call frame of the thread, computed in OnThreadStart. + uintptr_t top_frame_sp; + // The lower bound of the stack region of this thread. 0 means unknown. + uintptr_t stack_region_low; + // Lowest observed value of SP. + uintptr_t lowest_sp; + + // The (imprecise) call stack is updated by the PC callback. + CallStack<> call_stack; + + // Cmp traces capture the arguments of CMP instructions, memcmp, etc. + // We have dedicated traces for 2-, 4-, and 8-byte comparison, and + // a catch-all `cmp_traceN` trace for memcmp, etc. + CmpTrace<2, 64> cmp_trace2; + CmpTrace<4, 64> cmp_trace4; + CmpTrace<8, 64> cmp_trace8; + CmpTrace<0, 64> cmp_traceN; + + // Set this to true if the thread needs to be ignored in ForEachTLS. + // It should be always false if the state is in the global detached_tls_list. + bool ignore; +}; + +// One global object of this type is created by the runner at start up. +// All data members will be initialized to zero, unless they have initializers. +// Accesses to the subobjects should be fast, so we are trying to avoid +// extra memory references where possible. +// +// This class has a non-trivial destructor to work with targets that do not use +// the runner or LLVM fuzzer API at all. +// +// TODO(kcc): use a CTOR with absl::kConstInit (will require refactoring). +struct GlobalRunnerState { + // Used by LLVMFuzzerMutate and initialized in main(). + ByteArrayMutator *byte_array_mutator = nullptr; + Knobs knobs; + + GlobalRunnerState(); + ~GlobalRunnerState(); + + // Runner reads flags from CentipedeGetRunnerFlags(). We don't use flags + // passed via argv so that argv flags can be passed directly to + // LLVMFuzzerInitialize, w/o filtering. The flags are separated with + // ':' on both sides, i.e. like this: ":flag1:flag2:flag3=value3". + // We do it this way to make the flag parsing code extremely simple. The + // interface is private between Centipede and the runner and may change. + // + // Note that this field reflects the initial runner flags. But some + // flags can change later (if wrapped with std::atomic). + const char *centipede_runner_flags = CentipedeGetRunnerFlags(); + const char *arg1 = GetStringFlag(":arg1="); + const char *arg2 = GetStringFlag(":arg2="); + const char *arg3 = GetStringFlag(":arg3="); + // The path to a file where the runner may write the description of failure. + const char *failure_description_path = + GetStringFlag(":failure_description_path="); + + // Flags. + RunTimeFlags run_time_flags = { + /*path_level=*/std::min(ThreadLocalRunnerState::kBoundedPathLength, + HasIntFlag(":path_level=", 0)), + /*use_pc_features=*/HasFlag(":use_pc_features:"), + /*use_dataflow_features=*/HasFlag(":use_dataflow_features:"), + /*use_cmp_features=*/HasFlag(":use_cmp_features:"), + /*callstack_level=*/HasIntFlag(":callstack_level=", 0), + /*use_counter_features=*/HasFlag(":use_counter_features:"), + /*use_auto_dictionary=*/HasFlag(":use_auto_dictionary:"), + /*timeout_per_input=*/HasIntFlag(":timeout_per_input=", 0), + /*timeout_per_batch=*/HasIntFlag(":timeout_per_batch=", 0), + /*stack_limit_kb=*/HasIntFlag(":stack_limit_kb=", 0), + /*rss_limit_mb=*/HasIntFlag(":rss_limit_mb=", 0), + /*crossover_level=*/HasIntFlag(":crossover_level=", 50), + /*skip_seen_features=*/HasFlag(":skip_seen_features:"), + /*ignore_timeout_reports=*/HasFlag(":ignore_timeout_reports:"), + /*max_len=*/HasIntFlag(":max_len=", 4000), + }; + + // Returns true iff `flag` is present. + // Typical usage: pass ":some_flag:", i.e. the flag name surrounded with ':'. + // TODO(ussuri): Refactor `char *` into a `string_view`. + bool HasFlag(const char *absl_nonnull flag) const { + if (!centipede_runner_flags) return false; + return strstr(centipede_runner_flags, flag) != nullptr; + } + + // If a flag=value pair is present, returns value, + // otherwise returns `default_value`. + // Typical usage: pass ":some_flag=". + // TODO(ussuri): Refactor `char *` into a `string_view`. + uint64_t HasIntFlag(const char *absl_nonnull flag, + uint64_t default_value) const { + if (!centipede_runner_flags) return default_value; + const char *beg = strstr(centipede_runner_flags, flag); + if (!beg) return default_value; + return atoll(beg + strlen(flag)); // NOLINT: can't use strto64, etc. + } + + // If a :flag=value: pair is present returns value, otherwise returns nullptr. + // The result is obtained by calling strndup, so make sure to save + // it in `this` to avoid a leak. + // Typical usage: pass ":some_flag=". + // TODO(ussuri): Refactor `char *` into a `string_view`. + const char *absl_nullable GetStringFlag(const char *absl_nonnull flag) const { + if (!centipede_runner_flags) return nullptr; + // Extract "value" from ":flag=value:" inside centipede_runner_flags. + const char *beg = strstr(centipede_runner_flags, flag); + if (!beg) return nullptr; + const char *value_beg = beg + strlen(flag); + const char *end = strstr(value_beg, ":"); + if (!end) return nullptr; + return strndup(value_beg, end - value_beg); + } + + pthread_mutex_t execution_result_override_mu = PTHREAD_MUTEX_INITIALIZER; + // If not nullptr, it points to a batch result with either zero or one + // execution. When an execution result present, it will be passed as the + // execution result of the current test input. The object is owned and cleaned + // up by the state, protected by execution_result_override_mu, and set by + // `CentipedeSetExecutionResult()`. + BatchResult *execution_result_override; + + // Doubly linked list of TLSs of all live threads. + ThreadLocalRunnerState *tls_list; + // Doubly linked list of detached TLSs. + ThreadLocalRunnerState *detached_tls_list; + // Guards `tls_list` and `detached_tls_list`. + pthread_mutex_t tls_list_mu = PTHREAD_MUTEX_INITIALIZER; + // Iterates all TLS objects under tls_list_mu, except those with `ignore` set. + // Calls `callback()` on every TLS. + template + void ForEachTls(Callback callback) { + LockGuard lock(tls_list_mu); + for (auto *it = tls_list; it; it = it->next) { + if (!it->ignore) callback(*it); + } + for (auto *it = detached_tls_list; it; it = it->next) { + callback(*it); + } + } + + // Reclaims all TLSs in detached_tls_list and cleans up the list. + void CleanUpDetachedTls(); + + // Computed by DlInfo(). + // Usually, the main object is the executable binary containing main() + // and most of the executable code (we assume that the target is + // built in mostly-static mode, i.e. -dynamic_mode=off). + // When the `dl_path_suffix` runner flag is provided, the main_object refers + // to the dynamic library (DSO) pointed to by this flag. + // + // Note: this runner currently does not support more than one instrumented + // DSO in the process, i.e. you either instrument the main binary, or one DSO. + // Supporting more than one DSO will require major changes, + // major added complexity, and potentially cause slowdown. + // There is currently no motivation for such a change. + DlInfo main_object; + + // State for SanitizerCoverage. + // See https://clang.llvm.org/docs/SanitizerCoverage.html. + SanCovObjectArray sancov_objects; + // An arbitrarily large size. + static constexpr size_t kDataFlowFeatureSetSize = 1 << 18; + ConcurrentBitSet data_flow_feature_set{ + absl::kConstInit}; + + // Tracing CMP instructions, capture events from these domains: + // kCMPEq, kCMPModDiff, kCMPHamming, kCMPModDiffLog, kCMPMsbEq. + // See https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow. + // An arbitrarily large size. + static constexpr size_t kCmpFeatureSetSize = 1 << 18; + // TODO(kcc): remove cmp_feature_set. + ConcurrentBitSet cmp_feature_set{absl::kConstInit}; + ConcurrentBitSet cmp_eq_set{absl::kConstInit}; + ConcurrentBitSet cmp_moddiff_set{absl::kConstInit}; + ConcurrentBitSet cmp_hamming_set{absl::kConstInit}; + ConcurrentBitSet cmp_difflog_set{absl::kConstInit}; + + // We think that call stack produces rich signal, so we give a few bits to it. + static constexpr size_t kCallStackFeatureSetSize = 1 << 24; + ConcurrentBitSet callstack_set{absl::kConstInit}; + + // kMaxNumPcs is the maximum number of instrumented PCs in the binary. + // We can be generous here since the unused memory will not cost anything. + // `pc_counter_set` is a static byte set supporting up to kMaxNumPcs PCs. + static constexpr size_t kMaxNumPcs = 1 << 28; + TwoLayerConcurrentByteSet pc_counter_set{absl::kConstInit}; + // This is the actual number of PCs, aligned up to + // pc_counter_set::kSizeMultiple, computed at startup. + size_t actual_pc_counter_set_size_aligned; + + // Initialized in CTOR from the __centipede_extra_features section. + feature_t *user_defined_begin; + feature_t *user_defined_end; + + // We use edge instrumentation w/ callbacks to implement bounded-path + // coverage. + // * The current PC is converted to an offset (a PC index). + // * The offset is pushed to a HashedRingBuffer, producing a hash. + // * The resulting hash represents N most recent PCs, we use it as a feature. + // + // WARNING: this is highly experimental. + // This is far from perfect and may be not sensitive enough in some cases + // and create exponential number of features in other cases. + // Some areas to experiment with: + // * Handle only function-entry PCs, i.e. use call paths, not branch paths. + // * Play with the length of the path (kBoundedPathLength) + // * Use call stacks instead of paths (via unwinding or other + // instrumentation). + + // An arbitrarily large size. + static constexpr size_t kPathBitSetSize = 1 << 25; + // Observed paths. The total number of observed paths for --path_level=N + // can be up to NumPCs**N. + // So, we make the bitset very large, but it may still saturate. + ConcurrentBitSet path_feature_set{absl::kConstInit}; + + // Execution stats for the currently executed input. + ExecutionResult::Stats stats; + + // Used by trace_pc instrumentation. Populated if `pcs_file_path` flag is set. + ReversePCTable reverse_pc_table; + + // CentipedeRunnerMain() sets this to true. + bool centipede_runner_main_executed = false; + + // Timeout-related machinery. + + // Starts the watchdog thread that terminates the runner if any of the + // rss/time limits are exceeded. + void StartWatchdogThread(); + // Resets the per-input timer. Call this before executing every input. + void ResetTimers(); + + // Per-input timer. Initially, zero. ResetInputTimer() sets it to the current + // time. + std::atomic input_start_time; + // Per-batch timer. Initially, zero. ResetInputTimer() sets it to the current + // time before the first input and never resets it. + std::atomic batch_start_time; + + // The Watchdog thread sets this to true. + std::atomic watchdog_thread_started; + + // An arbitrarily large size. + static const size_t kMaxFeatures = 1 << 20; + // FeatureArray used to accumulate features from all sources. + FeatureArray g_features; + + // Features that were seen before. + static constexpr size_t kSeenFeatureSetSize = + absl::bit_ceil(feature_domains::kLastDomain.end()); + ConcurrentBitSet seen_features{absl::kConstInit}; +}; + +extern GlobalRunnerState state; +extern __thread ThreadLocalRunnerState tls; + +// Check for stack limit for the stack pointer `sp` in the current thread. +void CheckStackLimit(uintptr_t sp); + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_RUNNER_H_ diff --git a/src/third_party/fuzztest/dist/centipede/runner_cmp_trace.h b/src/third_party/fuzztest/dist/centipede/runner_cmp_trace.h new file mode 100644 index 00000000000..a5855a49dd0 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_cmp_trace.h @@ -0,0 +1,140 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_RUNNER_CMP_TRACE_H_ +#define THIRD_PARTY_CENTIPEDE_RUNNER_CMP_TRACE_H_ + +// Capturing arguments of CMP instructions, memcmp, and similar. +// WARNING: this code needs to have minimal dependencies. + +#include +#include +#include + +namespace fuzztest::internal { + +// Captures up to `kNumItems` different CMP argument pairs. +// Every argument is `kFixedSize` bytes. +// +// If `kFixedSize` == 0, the argument size is variable. +// Only the first `kNumBytesPerValue` bytes of every argument are captured. +// This is used to capture arguments of memcmp() and similar. +// +// Every new captured pair may overwrite a pair stored previously. +// +// Outside of tests, objects of this class will be created in TLS, thus no CTOR. +template +class CmpTrace { + public: + // kMaxNumBytesPerValue does not depend on kFixedSize. + static constexpr size_t kMaxNumBytesPerValue = 16; + static constexpr size_t kNumBytesPerValue = + kFixedSize ? kFixedSize : kMaxNumBytesPerValue; + + // No CTOR - objects will be created in TLS. + + // Clears `this`. + void Clear() { memset(this, 0, sizeof(*this)); } + + // Captures one CMP argument pair, as two byte arrays, `size` bytes each. + void Capture(uint8_t size, const uint8_t *value0, const uint8_t *value1) { + if (size > kNumBytesPerValue) size = kNumBytesPerValue; + // We choose a pseudo-random slot each time. + // This way after capturing many pairs we end up with up to `kNumItems` + // pairs which are typically, but not always, the most recent. + rand_seed_ = rand_seed_ * 1103515245 + 12345; + Item &item = items_[rand_seed_ % kNumItems]; + item.size.set(size); + __builtin_memcpy(item.value0, value0, size); + __builtin_memcpy(item.value1, value1, size); + } + + // Captures one CMP argument pair, as two integers of kFixedSize bytes each. + template + void Capture(T value0, T value1) { + // If both values are small, ignore them as not very useful. + if (value0 < 256 && value1 < 256) return; + static_assert(sizeof(T) == kFixedSize); + Capture(sizeof(T), reinterpret_cast(&value0), + reinterpret_cast(&value1)); + } + + // Iterates non-zero CMP pairs. + template + void ForEachNonZero(Callback callback) { + for (const auto &item : items_) { + if (IsZero(item.value0, item.size.get()) && + IsZero(item.value1, item.size.get())) + continue; + callback(item.size.get(), item.value0, item.value1); + } + } + + private: + // SizeField returns kFixedSize as the size, for kFixedSize != 0. + template + class SizeField { + public: + void set(uint8_t size) {} + size_t get() const { return kSize; } + }; + + // SizeField<0> actually stores the size. + template <> + class SizeField<0> { + public: + void set(uint8_t size) { size_ = size; } + uint8_t get() const { return size_; } + + private: + uint8_t size_; + }; + + template + static bool IsZero(const uint8_t *value) { + T x = {}; + __builtin_memcpy(&x, value, sizeof(T)); + return x == T{}; + } + + // Returns true if all value[0:size] are zero. + static bool IsZero(const uint8_t *value, size_t size) { + if constexpr (kFixedSize == 8) return IsZero(value); + if constexpr (kFixedSize == 4) return IsZero(value); + if constexpr (kFixedSize == 2) return IsZero(value); + // The code iterates over bytes, but we expect the compiler to optimize it. + uint64_t ored_bytes = 0; + for (size_t i = 0; i < size; ++i) { + ored_bytes |= value[i]; + } + return ored_bytes == 0; + } + + // One CMP argument pair. + struct Item { + SizeField size; + uint8_t value0[kNumBytesPerValue]; + uint8_t value1[kNumBytesPerValue]; + }; + + // All argument pairs. + Item items_[kNumItems]; + + // Pseudo-random seed. + size_t rand_seed_; +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_RUNNER_CMP_TRACE_H_ diff --git a/src/third_party/fuzztest/dist/centipede/runner_cmp_trace_test.cc b/src/third_party/fuzztest/dist/centipede/runner_cmp_trace_test.cc new file mode 100644 index 00000000000..492c18f4d28 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_cmp_trace_test.cc @@ -0,0 +1,122 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/runner_cmp_trace.h" + +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/nullability.h" + +namespace fuzztest::internal { +namespace { + +template +std::vector IntPairToByteVector(T a, T b) { + std::vector res; + uint8_t buff[sizeof(T)]; + memcpy(buff, &a, sizeof(T)); + res.insert(res.begin(), buff, buff + sizeof(T)); + memcpy(buff, &b, sizeof(T)); + res.insert(res.begin(), buff, buff + sizeof(T)); + return res; +} + +std::vector TwoArraysToByteVector(const uint8_t *absl_nonnull a, + const uint8_t *absl_nonnull b, + size_t size) { + std::vector res; + res.insert(res.begin(), a, a + size); + res.insert(res.begin(), b, b + size); + return res; +} + +TEST(CmpTrace, T1) { + std::vector> observed_pairs; + + auto callback = [&observed_pairs](uint8_t size, const uint8_t *v0, + const uint8_t *v1) { + std::vector cmp_pair; + cmp_pair.insert(cmp_pair.begin(), v0, v0 + size); + cmp_pair.insert(cmp_pair.begin(), v1, v1 + size); + observed_pairs.push_back(cmp_pair); + }; + + CmpTrace<2, 10> trace2; + CmpTrace<4, 11> trace4; + CmpTrace<8, 12> trace8; + CmpTrace<0, 13> traceN; + trace2.Clear(); + trace4.Clear(); + trace8.Clear(); + traceN.Clear(); + + uint16_t small_short_value0 = 10; + uint16_t small_short_value1 = 20; + uint16_t short_value0 = 310; + uint16_t short_value1 = 320; + uint32_t int_value0 = 500; + uint32_t int_value1 = 600; + uint64_t long_value0 = 1000; + uint64_t long_value1 = 2000; + uint64_t long_value2 = 4000; + uint64_t long_value3 = 8000; + + trace2.Capture(small_short_value0, small_short_value1); // will be ignored. + trace2.Capture(short_value0, short_value1); + observed_pairs.clear(); + trace2.ForEachNonZero(callback); + EXPECT_THAT(observed_pairs, testing::UnorderedElementsAre(IntPairToByteVector( + short_value0, short_value1))); + + trace4.Capture(30, 40); // small values, will be ignored. + trace4.Capture(int_value0, int_value1); + observed_pairs.clear(); + trace4.ForEachNonZero(callback); + EXPECT_THAT(observed_pairs, testing::UnorderedElementsAre( + IntPairToByteVector(int_value0, int_value1))); + + trace8.Capture(200LL, 255LL); // small values, will be ignored. + trace8.Capture(long_value0, long_value1); + trace8.Capture(long_value2, long_value3); + observed_pairs.clear(); + trace8.ForEachNonZero(callback); + EXPECT_THAT(observed_pairs, + testing::UnorderedElementsAre( + IntPairToByteVector(long_value0, long_value1), + IntPairToByteVector(long_value2, long_value3))); + + constexpr uint8_t value0[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 0}; + constexpr uint8_t value1[10] = {0, 9, 8, 7, 6, 5, 4, 3, 2, 1}; + constexpr uint8_t long_array[20] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; + traceN.Capture(7, value0, value1); + traceN.Capture(3, value0, value1); + traceN.Capture(10, value0, value1); + traceN.Capture(20, long_array, long_array); // will be trimmed to 16. + observed_pairs.clear(); + traceN.ForEachNonZero(callback); + EXPECT_THAT(observed_pairs, + testing::UnorderedElementsAre( + TwoArraysToByteVector(value0, value1, 10), + TwoArraysToByteVector(value0, value1, 7), + TwoArraysToByteVector(value0, value1, 3), + TwoArraysToByteVector(long_array, long_array, 16))); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/runner_dl_info.cc b/src/third_party/fuzztest/dist/centipede/runner_dl_info.cc new file mode 100644 index 00000000000..6d06a1b06e4 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_dl_info.cc @@ -0,0 +1,333 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/runner_dl_info.h" + +#ifdef __APPLE__ +#include +#include +#else // __APPLE__ +#include +#include // dl_iterate_phdr +#endif // __APPLE__ +#include + +#include +#include +#include +#include +#include + +#ifdef __APPLE__ +#include +#endif // __APPLE__ + +#include "absl/base/nullability.h" +#include "./centipede/runner_utils.h" + +namespace fuzztest::internal { + +namespace { + +constexpr bool kDlDebug = false; // we may want to make it a runtime flag. + +bool StringEndsWithSuffix(const char* absl_nonnull string, + const char* absl_nonnull suffix) { + const char* pos = std::strstr(string, suffix); + if (pos == nullptr) return false; + return pos == string + std::strlen(string) - std::strlen(suffix); +} + +} // namespace + +#ifdef __APPLE__ +// Reference: +// https://opensource.apple.com/source/xnu/xnu-4903.221.2/EXTERNAL_HEADERS/mach-o/loader.h.auto.html + +namespace { + +// Calls `callback` on the segments with the link-time start +// address and size. +void FindSegment(const mach_header* header, + const std::function& callback) { + const load_command* cmd = nullptr; + if (header->magic == MH_MAGIC) { + cmd = reinterpret_cast( + reinterpret_cast(header) + sizeof(mach_header)); + } else if (header->magic == MH_MAGIC_64) { + cmd = reinterpret_cast( + reinterpret_cast(header) + sizeof(mach_header_64)); + } + RunnerCheck(cmd != nullptr, "bad magic number of mach image header"); + for (size_t cmd_index = 0; cmd_index < header->ncmds; + ++cmd_index, cmd = reinterpret_cast( + reinterpret_cast(cmd) + cmd->cmdsize)) { + if constexpr (kDlDebug) { + fprintf(stderr, "%s command at %p with size 0x%" PRIx32 "\n", __func__, + cmd, cmd->cmdsize); + } + uintptr_t base, size; + const char* name; + if (cmd->cmd == LC_SEGMENT) { + const auto* seg = reinterpret_cast(cmd); + base = seg->vmaddr; + size = seg->vmsize; + name = seg->segname; + } else if (cmd->cmd == LC_SEGMENT_64) { + const auto* seg = reinterpret_cast(cmd); + base = seg->vmaddr; + size = seg->vmsize; + name = seg->segname; + } else { + continue; + } + if constexpr (kDlDebug) { + fprintf(stderr, + "%s segment name %s addr seg 0x%" PRIxPTR " size 0x%" PRIxPTR + "\n", + __func__, name, base, size); + } + if (std::strcmp(name, "__PAGEZERO") == 0) continue; + callback(name, base, size); + } + if constexpr (kDlDebug) { + fprintf(stderr, "%s finished\n", __func__); + } +} + +DlInfo GetDlInfoFromImage( + const std::function& + image_filter) { + DlInfo result; + result.Clear(); + const auto image_count = _dyld_image_count(); + for (uint32_t i = 0; i < image_count; ++i) { + const mach_header* header = _dyld_get_image_header(i); + RunnerCheck(header != nullptr, "failed to get image header"); + const char* name = _dyld_get_image_name(i); + RunnerCheck(name != nullptr, "bad image name"); + if constexpr (kDlDebug) { + fprintf(stderr, "%s image header at %p, name %s\n", __func__, header, + name); + } + if (!image_filter(header, name)) continue; + uintptr_t image_start = 0; + uintptr_t image_end = 0; + FindSegment(header, + [&image_start, &image_end](const char* unused_segment_name, + uintptr_t start, uintptr_t size) { + if (image_end == 0) image_start = start; + image_end = std::max(image_end, start + size); + }); + result.link_offset = _dyld_get_image_vmaddr_slide(i); + result.start_address = image_start + result.link_offset; + result.size = image_end - image_start; + RunnerCheck(result.size > 0, "bad image size"); + std::strncpy(result.path, name, sizeof(result.path)); + break; + } + if constexpr (kDlDebug) { + fprintf(stderr, "%s succeeded? %d\n", __func__, result.IsSet()); + if (result.IsSet()) { + fprintf(stderr, + " start 0x%" PRIxPTR " size 0x%" PRIxPTR + " link_offset 0x%" PRIxPTR "\n", + result.start_address, result.size, result.link_offset); + } + } + return result; +} + +} // namespace + +DlInfo GetDlInfo(const char* absl_nullable dl_path_suffix) { + if constexpr (kDlDebug) { + fprintf(stderr, "GetDlInfo for path suffix %s\n", + dl_path_suffix ? dl_path_suffix : "(null)"); + } + return GetDlInfoFromImage( + [dl_path_suffix](const mach_header* unused_header, const char* name) { + return dl_path_suffix == nullptr || + StringEndsWithSuffix(name, dl_path_suffix); + }); +} + +DlInfo GetDlInfo(uintptr_t pc) { + if constexpr (kDlDebug) { + fprintf(stderr, "GetDlInfo for pc 0x%" PRIxPTR "\n", pc); + } + return GetDlInfoFromImage([pc](const mach_header* header, + const char* unused_image_name) { + bool matched = false; + FindSegment(header, [pc, header, &matched]( + const char* name, uintptr_t start, uintptr_t size) { + if (std::strcmp(name, "__TEXT") != 0) return; + const uintptr_t runtime_text_start = reinterpret_cast(header); + if (pc >= runtime_text_start && pc < runtime_text_start + size) { + matched = true; + } + }); + return matched; + }); +} + +#else // __APPLE__ + +namespace { + +// Struct to pass to dl_iterate_phdr's callback. +struct DlCallbackParam { + // Full path to the instrumented library or nullptr for the main binary. + const char *dl_path_suffix; + // PC to look for in a DL. + uintptr_t pc; + // DlInfo to set on success. + DlInfo &result; +}; + +int g_some_global; // Used in DlIteratePhdrCallback. + +// Returns the size of the DL represented by `info`. +size_t DlSize(struct dl_phdr_info *absl_nonnull info) { + size_t size = 0; + // Iterate program headers. + for (int j = 0; j < info->dlpi_phnum; ++j) { + // We are only interested in "Loadable program segments". + const auto &phdr = info->dlpi_phdr[j]; + if (phdr.p_type != PT_LOAD) continue; + // phdr.p_vaddr represents the offset of the segment from info->dlpi_addr. + // phdr.p_memsz is the segment size in bytes. + // Their sum is the offset of the end of the segment from info->dlpi_addr. + uintptr_t end_offset = phdr.p_vaddr + phdr.p_memsz; + // We compute result.size as the largest such offset. + if (size < end_offset) size = end_offset; + + // phdr.p_flags indicates RWX access rights for the segment, + // e.g. `phdr.p_flags & PF_X` is non-zero if the segment is executable. + if constexpr (kDlDebug) { + char executable_bit = (phdr.p_flags & PF_X) ? 'X' : '-'; + char writable_bit = (phdr.p_flags & PF_W) ? 'W' : '-'; + char readable_bit = (phdr.p_flags & PF_R) ? 'R' : '-'; + fprintf(stderr, + "%s: segment [%d] name: %s addr: %" PRIx64 " size: %" PRIu64 + " flags: %c%c%c\n", + __func__, j, info->dlpi_name, phdr.p_vaddr, phdr.p_memsz, + executable_bit, writable_bit, readable_bit); + } + } + return size; +} + +// See man dl_iterate_phdr. +// `param_voidptr` is cast to a `DlCallbackParam *param`. +// Looks for the dynamic library who's dlpi_name ends with +// `param->dl_path_suffix` or for the main binary if `param->dl_path_suffix == +// nullptr`. The code assumes that the main binary is the first one to be +// iterated on. If the desired library is found, sets result.start_address and +// result.size, otherwise leaves result unchanged. +int DlIteratePhdrCallback(struct dl_phdr_info *absl_nonnull info, size_t size, + void *absl_nonnull param_voidptr) { + const DlCallbackParam *param = static_cast(param_voidptr); + DlInfo &result = param->result; + RunnerCheck(!result.IsSet(), "result is already set"); + // Skip uninteresting info. + if (param->dl_path_suffix != nullptr && + !StringEndsWithSuffix(info->dlpi_name, param->dl_path_suffix)) { + return 0; // 0 indicates we want to see the other entries. + } + + const auto some_code_address = + reinterpret_cast(DlIteratePhdrCallback); + const auto some_global_address = reinterpret_cast(&g_some_global); + + result.start_address = info->dlpi_addr; + result.size = DlSize(info); + result.link_offset = result.start_address; + // copy dlpi_name to result.path. + std::strncpy(result.path, info->dlpi_name, sizeof(result.path)); + result.path[sizeof(result.path) - 1] = 0; + + if constexpr (kDlDebug) { + fprintf(stderr, + "%s: name: %s addr: %" PRIx64 " size: %" PRIu64 + " addr+size: %" PRIx64 " code: %" PRIx64 " global: %" PRIx64 "\n", + __func__, info->dlpi_name, info->dlpi_addr, result.size, + info->dlpi_addr + result.size, some_code_address, + some_global_address); + } + + RunnerCheck(result.size != 0, + "DlIteratePhdrCallback failed to compute result.size"); + if (param->dl_path_suffix == nullptr) { + // When the main binary is coverage-instrumented, we currently only support + // statically linking this runner. Which means, that the runner itself + // is part of the main binary, and we can do additional checks, which we + // can't do if the runner is a separate library. + RunnerCheck(result.InBounds(some_code_address), + "DlIteratePhdrCallback: a sample code address is not in bounds " + "of main executable"); + RunnerCheck(result.InBounds(some_global_address), + "DlIteratePhdrCallback: a sample global address is not in " + "bounds of main executable"); + } + return result.IsSet(); // return 1 if we found what we were looking for. +} + +// See man dl_iterate_phdr. +// `param_voidptr` is cast to a `DlCallbackParam *param`. +// Looks for the dynamic library who's address range contains `param->pc`. +int DlIteratePhdrPCCallback(struct dl_phdr_info *absl_nonnull info, + size_t unused, void *absl_nonnull param_voidptr) { + const DlCallbackParam *param = static_cast(param_voidptr); + DlInfo &result = param->result; + if (param->pc < info->dlpi_addr) return 0; // wrong DSO. + const size_t size = DlSize(info); + if (param->pc >= info->dlpi_addr + size) return 0; // wrong DSO. + result.start_address = info->dlpi_addr; + result.size = size; + result.link_offset = result.start_address; + if (std::strlen(info->dlpi_name) != 0) { + // copy dlpi_name to result.path. + std::strncpy(result.path, info->dlpi_name, sizeof(result.path)); + } else { + // dlpi_name is empty, this is the main binary, get path via /proc/self/exe. + int res = readlink("/proc/self/exe", result.path, sizeof(result.path)); + RunnerCheck(res > 0, "readlink(\"/proc/self/exe\") failed"); + } + result.path[sizeof(result.path) - 1] = 0; + return 0; // Found what we are looking for. +} + +} // namespace + +DlInfo GetDlInfo(const char *absl_nullable dl_path_suffix) { + DlInfo result; + result.Clear(); + DlCallbackParam callback_param = {dl_path_suffix, /*pc=*/0, result}; + dl_iterate_phdr(DlIteratePhdrCallback, &callback_param); + return result; +} + +DlInfo GetDlInfo(uintptr_t pc) { + DlInfo result; + result.Clear(); + DlCallbackParam callback_param = {/*dl_path_suffix=*/nullptr, pc, result}; + dl_iterate_phdr(DlIteratePhdrPCCallback, &callback_param); + return result; +} + +#endif // __APPLE__ + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/runner_dl_info.h b/src/third_party/fuzztest/dist/centipede/runner_dl_info.h new file mode 100644 index 00000000000..f6fe5ccecfd --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_dl_info.h @@ -0,0 +1,57 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_RUNNER_DL_INFO_H_ +#define THIRD_PARTY_CENTIPEDE_RUNNER_DL_INFO_H_ + +#include +#include + +#include "absl/base/nullability.h" + +namespace fuzztest::internal { + +// Basic information about one dynamic library (or executable). +// No CTOR - these objects may need to be linker-initialized. +struct DlInfo { + uintptr_t start_address; // Address in memory where the object is loaded. + uintptr_t size; // Number of bytes in the object. + intptr_t link_offset; // Difference between runtime addresses and link-time + // addresses. + char path[4096]; // Pathname from which the object was loaded. + + void Clear() { memset(this, 0, sizeof(*this)); } + + // Returns true if this object has been set. + bool IsSet() const { + // start_address can be zero for a non-PIE binary, but size can't be zero. + return size != 0; + } + // Returns true if `addr` is in [start_address, start_address + size). + bool InBounds(uintptr_t addr) const { + return addr >= start_address && addr < start_address + size; + } +}; + +// Returns DlInfo for the dynamic library who's exact path is `dl_path_suffix`. +// If `dl_path_suffix` is `nullptr`, returns DlInfo for the main binary. +// If the required library is not found, returns empty DlInfo (`!IsSet()`). +DlInfo GetDlInfo(const char* absl_nullable dl_path_suffix); + +// Returns DlInfo for the dynamic library that contains `pc`. +DlInfo GetDlInfo(uintptr_t pc); + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_RUNNER_DL_INFO_H_ diff --git a/src/third_party/fuzztest/dist/centipede/runner_flags.cc b/src/third_party/fuzztest/dist/centipede/runner_flags.cc new file mode 100644 index 00000000000..0f0535f951e --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_flags.cc @@ -0,0 +1,85 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/runner_flags.h" + +#include +#include +#include +#include + +namespace fuzztest::internal { + +RunnerFlags::RunnerFlags(const std::string& runner_flags_string) { + size_t pos = 0; + while (pos != std::string::npos) { + // Extract substring from pos up to next ':'. + size_t colon_pos = runner_flags_string.find_first_of(':', pos); + std::string flag; + if (colon_pos == std::string::npos) { + flag = runner_flags_string.substr(pos); + pos = std::string::npos; + } else { + flag = runner_flags_string.substr(pos, colon_pos - pos); + pos = runner_flags_string.find_first_not_of(':', colon_pos); + } + + if (!flag.empty()) { + std::string value; + // Check to see if flag has a value. + size_t assignment_pos = flag.find_first_of('='); + if (assignment_pos != std::string::npos) { + value = flag.substr(assignment_pos + 1); + flag.resize(assignment_pos); + } + + // We do not check for duplicate flags in input. Multiple instances of + // a flags are inserted in the order they appear input. + if (!flag.empty()) { // ignore malformed flag "=" + flags_.push_back(std::make_pair(flag, value)); + } + } + } +} + +std::string RunnerFlags::ToString() const { + if (flags_.empty()) return ""; + + std::vector output_fragments; + output_fragments.reserve(flags_.size() + 1); + size_t output_size = 0; + for (const auto& [flag, value] : flags_) { + std::string s = ":" + flag; + if (!value.empty()) { + s += "=" + value; + } + output_size += s.size(); + output_fragments.push_back(s); + } + + // Add a trailing ':' so that output starts and ends with ':'s. + output_fragments.push_back(":"); + output_size++; + + // Join fragments to form output. Reserve output size before joining to + // avoid a quadratic behavior. + std::string output; + output.reserve(output_size); + for (const auto& fragment : output_fragments) { + output.append(fragment); + } + return output; +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/runner_flags.h b/src/third_party/fuzztest/dist/centipede/runner_flags.h new file mode 100644 index 00000000000..b67b994547a --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_flags.h @@ -0,0 +1,93 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_RUNNER_FLAGS_H_ +#define THIRD_PARTY_CENTIPEDE_RUNNER_FLAGS_H_ + +#include +#include +#include // std::pair +#include + +namespace fuzztest::internal { + +class RunnerFlags { + public: + // Takes a strings of colon-separated flags. Flags in the string are either + // 1. or + // 2. = + explicit RunnerFlags(const std::string& runner_flags_string); + + ~RunnerFlags() = default; + // By default copyable and movable. + RunnerFlags(const RunnerFlags&) = default; + RunnerFlags& operator=(const RunnerFlags&) = default; + RunnerFlags(RunnerFlags&&) = default; + RunnerFlags& operator=(RunnerFlags&&) = default; + + // Tells if 'flag' is in this. + bool HasFlag(const std::string& flag) const { + return IndexOfFlag(flag) != flags_.size(); + } + + // Gets value of 'flag'. Returns an empty string if 'flag' is not in this + // or 'flag' has no value. If a flag is repeated, this returns + // value of the last occurrence. + std::string GetFlagValue(const std::string& flag) const { + const size_t index = IndexOfFlag(flag); + return index == flags_.size() ? "" : flags_[index].second; + } + + // Set 'flag' to 'value'. If 'value' is empty, 'flag' is considered + // present but without a value. If a flag is not in this, it is appended + // at the end of flags. If a flag is repeated, only the occurrence is updated. + void SetFlagValue(const std::string& flag, const std::string& value) { + const size_t index = IndexOfFlag(flag); + if (index < flags_.size()) { + flags_[index].second = value; + } else { + flags_.push_back(std::make_pair(flag, value)); + } + } + + // Returns a strings with all flags separated by colons in lexicographical + // order. The result also begins and ends with colons. + std::string ToString() const; + + private: + // Returns index of 'flag' in flags_ or flags_.size() if not found. + size_t IndexOfFlag(const std::string& flag) const { + const size_t flags_size = flags_.size(); + size_t i = 0; + // Search backward so that we pick the last occurrence of a repeated flag. + while (i < flags_size) { + size_t pos = flags_size - 1 - i; + if (flags_[pos].first == flag) { + return pos; + } + ++i; + } + return flags_size; + } + + // Individual flags with their optional values as a vector of string pairs. + // To make this suitable for the runner, we use std::vector<>, which is not + // very efficient to look up. This should not be a problem as the number of + // flags are quite small. + std::vector> flags_; +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_RUNNER_FLAGS_H_ diff --git a/src/third_party/fuzztest/dist/centipede/runner_flags_test.cc b/src/third_party/fuzztest/dist/centipede/runner_flags_test.cc new file mode 100644 index 00000000000..4943a499c5a --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_flags_test.cc @@ -0,0 +1,81 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/runner_flags.h" + +#include "gtest/gtest.h" + +namespace fuzztest::internal { + +namespace { + +TEST(RunnerFlags, Empty) { + RunnerFlags runner_flags(""); + EXPECT_TRUE(runner_flags.ToString().empty()); +} + +TEST(RunnerFlags, Malformed) { + RunnerFlags runner_flags("a=x:b=:=c"); + EXPECT_EQ(runner_flags.ToString(), ":a=x:b:"); +} + +TEST(RunnerFlags, HasFlag) { + RunnerFlags runner_flags(":a:b=x:"); + + EXPECT_TRUE(runner_flags.HasFlag("a")); + EXPECT_TRUE(runner_flags.HasFlag("b")); + EXPECT_FALSE(runner_flags.HasFlag("c")); +} + +TEST(RunnerFlags, GetFlagValue) { + RunnerFlags runner_flags(":a=x:b:"); + + EXPECT_EQ(runner_flags.GetFlagValue("a"), "x"); + EXPECT_TRUE(runner_flags.GetFlagValue("b").empty()); + EXPECT_TRUE(runner_flags.GetFlagValue("c").empty()); +} + +TEST(RunnerFlags, RepeatedFlag) { + RunnerFlags runner_flags(":a=x:a=y:"); + EXPECT_EQ(runner_flags.GetFlagValue("a"), "y"); +} + +TEST(RunnerFlags, SetFlagValue) { + RunnerFlags runner_flags(":a=x:b:"); + + // Multiple updates. + runner_flags.SetFlagValue("a", "red"); + EXPECT_EQ(runner_flags.GetFlagValue("a"), "red"); + runner_flags.SetFlagValue("a", "green"); + EXPECT_EQ(runner_flags.GetFlagValue("a"), "green"); + + // Changing a flag to valueless. + runner_flags.SetFlagValue("a", ""); + EXPECT_TRUE(runner_flags.GetFlagValue("a").empty()); + + // Adding value to an existing flag without value. + runner_flags.SetFlagValue("b", "yellow"); + EXPECT_EQ(runner_flags.GetFlagValue("b"), "yellow"); + + // Adding a new flag. + runner_flags.SetFlagValue("c", "blue"); + EXPECT_EQ(runner_flags.GetFlagValue("c"), "blue"); +} + +TEST(RunnerFlags, ToString) { + RunnerFlags runner_flags(":b=x:a:c=y:"); + EXPECT_EQ(runner_flags.ToString(), ":b=x:a:c=y:"); +} +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/runner_fork_server.cc b/src/third_party/fuzztest/dist/centipede/runner_fork_server.cc new file mode 100644 index 00000000000..28ece5a082f --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_fork_server.cc @@ -0,0 +1,351 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Fork server, a.k.a. a process Zygote, for the Centipede runner. +// +// Startup: +// * Centipede creates two named FIFO pipes: pipe0 and pipe1. +// * Centipede runs the target in background, and passes the FIFO names to it +// using two environment variables: CENTIPEDE_FORK_SERVER_FIFO[01]. +// * Centipede opens the pipe0 for writing, pipe1 for reading. +// These would block until the same pipes are open in the runner. +// * Runner, early at startup, checks if it is given the pipe names. +// If so, it opens pipe0 for reading, pipe1 for writing, +// and enters the infinite fork-server loop. +// Loop: +// * Centipede writes a byte to pipe0. +// * Runner blocks until it reads a byte from pipe0, then forks and waits. +// This is where the child process executes and does the work. +// This works because every execution of the target has the same arguments. +// * Runner receives the child exit status and writes it to pipe1. +// * Centipede blocks until it reads the status from pipe1. +// Exit: +// * Centipede closes the pipes (and then deletes them). +// * Runner (the fork server) fails on the next read from pipe0 and exits. +// +// The fork server code kicks in super-early in the process startup, +// via injecting itself into the `.preinit_array`. +// Ensure that this code is not dropped from linking (alwayslink=1). +// +// The main benefts of the fork server over plain fork/exec or system() are: +// * Dynamic linking happens once at the fork-server startup. +// * fork is cheaper than fork/exec, especially when running multiple threads. +// +// Other than performance, using fork server should be the same as not using it. +// +// Similar ideas: +// * lcamtuf.blogspot.com/2014/10/fuzzing-binaries-without-execve.html +// * Android Zygote. +// +// We try to avoid any high-level code here, even most of libc because this code +// works too early in the process. E.g. getenv() will not work yet. + +#include +#ifdef __APPLE__ +#include +#else // __APPLE__ +#include // ARG_MAX +#endif // __APPLE__ +#include +#include +#include + +#include +#include +#include + +#include "absl/base/nullability.h" + +namespace fuzztest::internal { + +namespace { + +constexpr bool kForkServerDebug = false; +[[maybe_unused]] constexpr bool kForkServerDumpEnvAtStart = false; + +} // namespace + +// Writes a C string to stderr when debugging, no-op otherwise. +void Log(const char *absl_nonnull str) { + if constexpr (kForkServerDebug) { + (void)write(STDERR_FILENO, str, strlen(str)); + fsync(STDERR_FILENO); + } +} + +// Maybe writes the `reason` to stderr; then calls _exit. We use this instead of +// CHECK/RunnerCheck since the fork server runs at the very early stage of the +// process, where the logging functions used there may not work. +void Exit(const char *absl_nonnull reason) { + Log(reason); + _exit(0); // The exit code does not matter, it won't be checked anyway. +} + +// Contents of /proc/self/environ. We avoid malloc, so it's a fixed-size global. +// The fork server will fail to initialize if /proc/self/environ is too large. +static char env[ARG_MAX]; +static ssize_t env_size; + +void GetAllEnv() { +#ifdef __APPLE__ + // Reference: + // https://chromium.googlesource.com/crashpad/crashpad/+/360e441c53ab4191a6fd2472cc57c3343a2f6944/util/posix/process_util_mac.cc + char args[ARG_MAX]; + size_t args_size = sizeof(args); + int mib[] = {CTL_KERN, KERN_PROCARGS2, getpid()}; + int rv = + sysctl(mib, sizeof(mib) / sizeof(mib[0]), args, &args_size, nullptr, 0); + if (rv != 0) { + Exit("GetEnv: sysctl({CTK_KERN, KERN_PROCARGS2, ...}) failed"); + } + if (args_size < sizeof(int)) { + Exit("GetEnv: args_size too small"); + } + int argc = 0; + memcpy(&argc, &args[0], sizeof(argc)); + size_t start_pos = sizeof(argc); + // Find the end of the executable path. + while (start_pos < args_size && args[start_pos] != 0) ++start_pos; + if (start_pos == args_size) { + Exit("GetEnv: envp not found"); + } + // Find the beginning of the string area. + while (start_pos < args_size && args[start_pos] == 0) ++start_pos; + if (start_pos == args_size) { + Exit("GetEnv: envp not found"); + } + // Ignore the first argc strings, after which is the envp. + for (int i = 0; i < argc; ++i) { + while (start_pos < args_size && args[start_pos] != 0) ++start_pos; + if (start_pos == args_size) { + Exit("GetEnv: envp not found"); + } + ++start_pos; + } + const size_t end_pos = args_size; + memcpy(env, &args[start_pos], end_pos - start_pos); + env_size = end_pos - start_pos; + if constexpr (kForkServerDumpEnvAtStart) { + size_t pos = start_pos; + while (pos < args_size) { + const size_t len = strnlen(&args[pos], args_size - pos); + (void)write(STDERR_FILENO, &args[pos], len); + (void)write(STDERR_FILENO, "\n", 1); + pos += len + 1; + } + fsync(STDERR_FILENO); + } +#else // __APPLE__ + // Reads /proc/self/environ into env. + int fd = open("/proc/self/environ", O_RDONLY); + if (fd < 0) Exit("GetEnv: can't open /proc/self/environ\n"); + env_size = read(fd, env, sizeof(env)); + if (env_size < 0) Exit("GetEnv: can't read to env\n"); + if (close(fd) != 0) Exit("GetEnv: can't close /proc/self/environ\n"); +#endif // __APPLE__ + env[sizeof(env) - 1] = 0; // Just in case. +} + +// Gets a zero-terminated string matching the environment `key` (ends with '='). +const char *absl_nullable GetOneEnv(const char *absl_nonnull key) { + size_t key_len = strlen(key); + if (env_size < key_len) return nullptr; + bool in_the_beginning_of_key = true; + // env is not a C string. + // It is an array of bytes, with '\0' between individual key=val pairs. + for (size_t idx = 0; idx < env_size - key_len; ++idx) { + if (env[idx] == 0) { + in_the_beginning_of_key = true; + continue; + } + if (in_the_beginning_of_key && 0 == memcmp(env + idx, key, key_len)) + return &env[idx + key_len]; // zero-terminated. + in_the_beginning_of_key = false; + } + return nullptr; +} + +// Starts the fork server if the pipes are given. +// This function is called from `.preinit_array` when linked statically, +// or from the DSO constructor when injected via LD_PRELOAD. +// Note: it must run before the GlobalRunnerState constructor because +// GlobalRunnerState may terminate the process early due to an error, +// then we never open the fifos and the corresponding opens in centipede +// hang forever. +// The priority 150 is chosen on the lower end (higher priority) +// of the user-available range (101-999) to allow ordering with other +// constructors and C++ constructors (init_priority). Note: constructors +// without explicitly specified priority run after all constructors with +// explicitly specified priority, thus we still run before most +// "normal" constructors. +__attribute__((constructor(150))) void ForkServerCallMeVeryEarly() { + // Guard against calling twice. + static bool called_already = false; + if (called_already) return; + called_already = true; + // Startup. + GetAllEnv(); + const char *pipe0_name = GetOneEnv("CENTIPEDE_FORK_SERVER_FIFO0="); + const char *pipe1_name = GetOneEnv("CENTIPEDE_FORK_SERVER_FIFO1="); + if (!pipe0_name || !pipe1_name) return; + Log("###Centipede fork server requested\n"); + int pipe0 = open(pipe0_name, O_RDONLY); + if (pipe0 < 0) Exit("###open pipe0 failed\n"); + int pipe1 = open(pipe1_name, O_WRONLY); + if (pipe1 < 0) Exit("###open pipe1 failed\n"); + Log("###Centipede fork server ready\n"); + + struct sigaction old_sigterm_act{}; + struct sigaction sigterm_act{}; + sigterm_act.sa_handler = [](int) {}; + if (sigaction(SIGTERM, &sigterm_act, &old_sigterm_act) != 0) { + Exit("###sigaction failed on SIGTERM for the fork server"); + } + + struct sigaction old_sigchld_act{}; + struct sigaction sigchld_act{}; + sigchld_act.sa_handler = [](int) {}; + if (sigaction(SIGCHLD, &sigchld_act, &old_sigchld_act) != 0) { + Exit("###sigaction failed on SIGCHLD for the fork server"); + } + + sigset_t old_sigset; + sigset_t server_sigset; + if (sigprocmask(SIG_SETMASK, nullptr, &server_sigset) != 0) { + Exit("###sigprocmask() failed to get the existing sigset\n"); + } + if (sigaddset(&server_sigset, SIGTERM) != 0) { + Exit("###sigaddset() failed to add SIGTERM\n"); + } + if (sigaddset(&server_sigset, SIGCHLD) != 0) { + Exit("###sigaddset() failed to add SIGCHLD\n"); + } + if (sigprocmask(SIG_SETMASK, &server_sigset, &old_sigset) != 0) { + Exit("###sigprocmask() failed to set the fork server sigset\n"); + } + + sigset_t wait_sigset; + if (sigemptyset(&wait_sigset) != 0) { + Exit("###sigemptyset() failed\n"); + } + if (sigaddset(&wait_sigset, SIGTERM) != 0) { + Exit("###sigaddset() failed to add SIGTERM to the wait sigset\n"); + } + if (sigaddset(&wait_sigset, SIGCHLD) != 0) { + Exit("###sigaddset() failed to add SIGCHLD to the wait sigset\n"); + } + + // Loop. + while (true) { + Log("###Centipede fork server blocking on pipe0\n"); + // This read will fail when Centipede shuts down the pipes. + char ch = 0; + if (read(pipe0, &ch, 1) != 1) Exit("###read from pipe0 failed\n"); + Log("###Centipede starting fork\n"); + auto pid = fork(); + if (pid < 0) { + Exit("###fork failed\n"); + } else if (pid == 0) { + if (sigaction(SIGTERM, &old_sigterm_act, nullptr) != 0) { + Exit("###sigaction failed on SIGTERM for the child"); + } + if (sigaction(SIGCHLD, &old_sigchld_act, nullptr) != 0) { + Exit("###sigaction failed on SIGCHLD for the child"); + } + if (sigprocmask(SIG_SETMASK, &old_sigset, nullptr) != 0) { + Exit("###sigprocmask() failed to restore the previous sigset\n"); + } + // Child process. Reset stdout/stderr and let it run normally. + for (int fd = 1; fd <= 2; fd++) { + lseek(fd, 0, SEEK_SET); + // NOTE: Allow ftruncate() to fail by ignoring its return; that okay to + // happen when the stdout/stderr are not redirected to a file. + (void)ftruncate(fd, 0); + } + return; + } else { + // Parent process. + int status = -1; + while (true) { + int sig = -1; + if (sigwait(&wait_sigset, &sig) != 0) { + Exit("###sigwait() failed\n"); + } + if (sig == SIGCHLD) { + Log("###Got SIGCHLD\n"); + const pid_t ret = waitpid(pid, &status, WNOHANG); + if (ret < 0) { + Exit("###waitpid failed\n"); + } + if (ret == pid && (WIFEXITED(status) || WIFSIGNALED(status))) { + Log("###Got exit status\n"); + break; + } + } else if (sig == SIGTERM) { + Log("###Got SIGTERM\n"); + kill(pid, SIGTERM); + } else { + Exit("###Unknown signal from sigwait\n"); + } + } + if (WIFEXITED(status)) { + if (WEXITSTATUS(status) == EXIT_SUCCESS) + Log("###Centipede fork returned EXIT_SUCCESS\n"); + else if (WEXITSTATUS(status) == EXIT_FAILURE) + Log("###Centipede fork returned EXIT_FAILURE\n"); + else + Log("###Centipede fork returned unknown failure status\n"); + } else { + Log("###Centipede fork crashed\n"); + } + Log("###Centipede fork writing status to pipe1\n"); + if (write(pipe1, &status, sizeof(status)) == -1) { + Exit("###write to pipe1 failed\n"); + } + // Deplete any remaining signals before the next execution. Controller + // won't send more signals after write succeeded. + { + sigset_t pending; + while (true) { + if (sigpending(&pending) != 0) { + Exit("###sigpending() failed\n"); + } + if (sigismember(&pending, SIGTERM) || + sigismember(&pending, SIGCHLD)) { + int unused_sig; + if (sigwait(&wait_sigset, &unused_sig) != 0) { + Exit("###sigwait() failed\n"); + } + } else { + break; + } + } + } + } + } + // The only way out of the loop is via Exit() or return. + __builtin_unreachable(); +} + +// If supported, use .preinit_array to call `ForkServerCallMeVeryEarly` even +// earlier than the `constructor` attribute of the declaration. This helps to +// avoid potential conflicts with higher-priority constructors. +#ifdef __APPLE__ +// .preinit_array is not supported in MacOS. +#else // __APPLE__ +__attribute__((section(".preinit_array"))) auto call_very_early = + ForkServerCallMeVeryEarly; +#endif // __APPLE__ + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/runner_interceptors.cc b/src/third_party/fuzztest/dist/centipede/runner_interceptors.cc new file mode 100644 index 00000000000..886c4508023 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_interceptors.cc @@ -0,0 +1,217 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Function interceptors for Centipede. + +#include // for dlsym() +#include + +#include +#include + +#include "absl/base/nullability.h" +#include "absl/base/optimization.h" +#include "./centipede/runner.h" + +using fuzztest::internal::tls; + +// Used for the interceptors to avoid sanitizing them, as they could be called +// before or during the sanitizer initialization. Instead, we check if the +// current thread is marked as started by the runner as the proxy of sanitizier +// initialization. If not, we skip the interception logic. +#define NO_SANITIZE __attribute__((no_sanitize("all"))) + +namespace { + +// Wrapper for dlsym(). +// Returns the pointer to the real function `function_name`. +// In most cases we need FuncAddr("foo") to be called before the first call to +// foo(), which means we either need to do this very early at startup +// (e.g. pre-init array), or on the first call. +// Currently, we do this on the first call via function-scope static. +template +FunctionT FuncAddr(const char *function_name) { + void *addr = dlsym(RTLD_NEXT, function_name); + return reinterpret_cast(addr); +} + +// 3rd and 4th arguments to pthread_create(), packed into a struct. +struct ThreadCreateArgs { + void *(*start_routine)(void *); + void *arg; +}; + +// Wrapper for a `start_routine` argument of pthread_create(). +// Calls the actual start_routine and returns its results. +// Performs custom actions before and after start_routine(). +// `arg` is a `ThreadCreateArgs *` with the actual pthread_create() args. +void *MyThreadStart(void *absl_nonnull arg) { + auto *args_orig_ptr = static_cast(arg); + auto args = *args_orig_ptr; + delete args_orig_ptr; // allocated in the pthread_create wrapper. + tls.OnThreadStart(); + void *retval = args.start_routine(args.arg); + return retval; +} + +// Normalize the *cmp result value to be one of {1, -1, 0}. +// According to the spec, *cmp can return any positive or negative value, +// and in fact it does return various different positive and negative values +// depending on . These values are later passed to our +// CMP instrumentation and are used to produce features. +// If we don't normalize the return value here, our tests may be flaky. +int NormalizeCmpResult(int result) { + if (result < 0) return -1; + if (result > 0) return 1; + return result; +} + +} // namespace + +namespace fuzztest::internal { +void RunnerInterceptor() {} // to be referenced in runner.cc +} // namespace fuzztest::internal + +// A sanitizer-compatible way to intercept functions that are potentially +// intercepted by sanitizers, in which case the symbol __interceptor_X would be +// defined for intercepted function X. So we always forward an intercepted call +// to the sanitizer interceptor if it exists, and fall back to the next +// definition following dlsym. +// +// We define the X_orig pointers that are statically initialized to GetOrig_X() +// with the aforementioned logic to fill the pointers early, but they might +// still be too late. So the Centipede interceptors might need to handle the +// nullptr case and/or use REAL(X), which calls GetOrig_X() when needed. Also +// see compiler-rt/lib/interception/interception.h in the llvm-project source +// code. +// +// Note that since LLVM 17 it allows three interceptions (from the original +// binary, an external tool, and a sanitizer) to co-exist under a new scheme, +// while it is still compatible with the old way used here. +#define SANITIZER_INTERCEPTOR_NAME(orig_func_name) \ + __interceptor_##orig_func_name +#define DECLARE_CENTIPEDE_ORIG_FUNC(ret_type, orig_func_name, args) \ + extern "C" __attribute__((weak)) ret_type( \ + SANITIZER_INTERCEPTOR_NAME(orig_func_name)) args; \ + static decltype(&SANITIZER_INTERCEPTOR_NAME( \ + orig_func_name)) GetOrig_##orig_func_name() { \ + if (auto p = &SANITIZER_INTERCEPTOR_NAME(orig_func_name)) return p; \ + return FuncAddr( \ + #orig_func_name); \ + } \ + static ret_type(*orig_func_name##_orig) args; \ + __attribute__((constructor)) void InitializeOrig_##orig_func_name() { \ + orig_func_name##_orig = GetOrig_##orig_func_name(); \ + } +#define REAL(orig_func_name) \ + (orig_func_name##_orig ? orig_func_name##_orig : GetOrig_##orig_func_name()) + +DECLARE_CENTIPEDE_ORIG_FUNC(int, memcmp, + (const void *s1, const void *s2, size_t n)); +DECLARE_CENTIPEDE_ORIG_FUNC(int, strcmp, (const char *s1, const char *s2)); +DECLARE_CENTIPEDE_ORIG_FUNC(int, strncmp, + (const char *s1, const char *s2, size_t n)); +DECLARE_CENTIPEDE_ORIG_FUNC(int, pthread_create, + (pthread_t * thread, const pthread_attr_t *attr, + void *(*start_routine)(void *), void *arg)); + +// Fallback for the case *cmp_orig is null. +// Will be executed several times at process startup, if at all. +static NO_SANITIZE int memcmp_fallback(const void *s1, const void *s2, + size_t n) { + const auto *p1 = static_cast(s1); + const auto *p2 = static_cast(s2); + for (size_t i = 0; i < n; ++i) { + int diff = p1[i] - p2[i]; + if (diff) return diff; + } + return 0; +} + +// memcmp interceptor. +// Calls the real memcmp() and possibly modifies state.cmp_feature_set. +extern "C" NO_SANITIZE int memcmp(const void *s1, const void *s2, size_t n) { + const int result = + memcmp_orig ? memcmp_orig(s1, s2, n) : memcmp_fallback(s1, s2, n); + if (ABSL_PREDICT_FALSE(!tls.started)) { + return result; + } + tls.TraceMemCmp(reinterpret_cast(__builtin_return_address(0)), + reinterpret_cast(s1), + reinterpret_cast(s2), n, result == 0); + return NormalizeCmpResult(result); +} + +// TODO(b/341111359): Investigate inefficiencies in the `strcmp`/`strncmp` +// interceptors and `TraceMemCmp`. + +// strcmp interceptor. +// Calls the real strcmp() and possibly modifies state.cmp_feature_set. +extern "C" NO_SANITIZE int strcmp(const char *s1, const char *s2) { + // Find the length of the shorter string, as this determines the actual number + // of bytes that are compared. Note that this is needed even if we call + // `strcmp_orig` because we're passing it to `TraceMemCmp()`. + size_t len = 0; + while (s1[len] && s2[len]) ++len; + const int result = + // Need to include one more byte than the shorter string length + // when falling back to memcmp e.g. "foo" < "foobar". + strcmp_orig ? strcmp_orig(s1, s2) : memcmp_fallback(s1, s2, len + 1); + if (ABSL_PREDICT_FALSE(!tls.started)) { + return result; + } + // Pass `len` here to avoid storing the trailing '\0' in the dictionary. + tls.TraceMemCmp(reinterpret_cast(__builtin_return_address(0)), + reinterpret_cast(s1), + reinterpret_cast(s2), len, result == 0); + return NormalizeCmpResult(result); +} + +// strncmp interceptor. +// Calls the real strncmp() and possibly modifies state.cmp_feature_set. +extern "C" NO_SANITIZE int strncmp(const char *s1, const char *s2, size_t n) { + // Find the length of the shorter string, as this determines the actual number + // of bytes that are compared. Note that this is needed even if we call + // `strncmp_orig` because we're passing it to `TraceMemCmp()`. + size_t len = 0; + while (len < n && s1[len] && s2[len]) ++len; + // Need to include '\0' in the comparison if the shorter string is shorter + // than `n`, hence we add 1 to the length. + if (n > len + 1) n = len + 1; + const int result = + strncmp_orig ? strncmp_orig(s1, s2, n) : memcmp_fallback(s1, s2, n); + if (ABSL_PREDICT_FALSE(!tls.started)) { + return result; + } + // Pass `len` here to avoid storing the trailing '\0' in the dictionary. + tls.TraceMemCmp(reinterpret_cast(__builtin_return_address(0)), + reinterpret_cast(s1), + reinterpret_cast(s2), len, result == 0); + return NormalizeCmpResult(result); +} + +// pthread_create interceptor. +// Calls real pthread_create, but wraps the start_routine() in MyThreadStart. +extern "C" int pthread_create(pthread_t *absl_nonnull thread, + const pthread_attr_t *absl_nullable attr, + void *(*start_routine)(void *), + void *absl_nullable arg) { + if (ABSL_PREDICT_FALSE(!tls.started)) { + return REAL(pthread_create)(thread, attr, start_routine, arg); + } + // Wrap the arguments. Will be deleted in MyThreadStart. + auto *wrapped_args = new ThreadCreateArgs{start_routine, arg}; + // Run the actual pthread_create. + return REAL(pthread_create)(thread, attr, MyThreadStart, wrapped_args); +} diff --git a/src/third_party/fuzztest/dist/centipede/runner_interface.h b/src/third_party/fuzztest/dist/centipede/runner_interface.h new file mode 100644 index 00000000000..f12691dd820 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_interface.h @@ -0,0 +1,180 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// WARNING: this interface is not yet stable and may change at any point. + +#ifndef THIRD_PARTY_CENTIPEDE_RUNNER_INTERFACE_H_ +#define THIRD_PARTY_CENTIPEDE_RUNNER_INTERFACE_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/base/nullability.h" +#include "./centipede/mutation_input.h" +#include "./common/defs.h" + +// Typedefs for the libFuzzer API, https://llvm.org/docs/LibFuzzer.html +using FuzzerTestOneInputCallback = int (*)(const uint8_t *data, size_t size); +using FuzzerInitializeCallback = int (*)(int *argc, char ***argv); +using FuzzerCustomMutatorCallback = size_t (*)(uint8_t *data, size_t size, + size_t max_size, + unsigned int seed); +using FuzzerCustomCrossOverCallback = size_t (*)( + const uint8_t *data1, size_t size1, const uint8_t *data2, size_t size2, + uint8_t *out, size_t max_out_size, unsigned int seed); + +// This is the header-less interface of libFuzzer, see +// https://llvm.org/docs/LibFuzzer.html. +extern "C" { +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size); +__attribute__((weak)) int LLVMFuzzerInitialize(int *absl_nonnull argc, + char ***absl_nonnull argv); +__attribute__((weak)) size_t LLVMFuzzerCustomMutator(uint8_t *data, size_t size, + size_t max_size, + unsigned int seed); +__attribute__((weak)) size_t LLVMFuzzerCustomCrossOver( + const uint8_t *data1, size_t size1, const uint8_t *data2, size_t size2, + uint8_t *out, size_t max_out_size, unsigned int seed); +} // extern "C" + +// https://llvm.org/docs/LibFuzzer.html#using-libfuzzer-as-a-library +extern "C" int LLVMFuzzerRunDriver( + int *absl_nonnull argc, char ***absl_nonnull argv, + FuzzerTestOneInputCallback test_one_input_cb); + +// This interface can be used to detect presence of Centipede in the binary. +// Also pretend we are LibFuzzer for compatibility. +// This API can be used by other pieces of fuzzing infrastructure, +// but should not be used by end-users of fuzz targets +// (consider using FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION macro). +extern "C" __attribute__((weak)) void CentipedeIsPresent(); +extern "C" __attribute__((weak)) void __libfuzzer_is_present(); + +// Reconfigures the RSS limit to `rss_limit_mb` - 0 indicates no limit. +extern "C" void CentipedeSetRssLimit(size_t rss_limit_mb); + +// Reconfigures the stack limit to `stack_limit_kb` - 0 indicates no limit. +extern "C" void CentipedeSetStackLimit(size_t stack_limit_kb); + +// Reconfigures `timeout_per_input` accordingly in seconds - 0 means no timeout. +extern "C" void CentipedeSetTimeoutPerInput(uint64_t timeout_per_input); + +// An overridable function to get the runner flags for configuring the runner +// during the initialization. The default implementation (as a weak function) +// gets the flags from CENTIPEDE_RUNNER_FLAGS env var. +// +// It should return either a nullptr or a constant string that is valid +// throughout the entire process life-time. +extern "C" const char *absl_nullable CentipedeGetRunnerFlags(); + +// An overridable function to override `LLVMFuzzerMutate` behavior. +extern "C" size_t CentipedeLLVMFuzzerMutateCallback(uint8_t *data, size_t size, + size_t max_size); + +// Prepares to run a batch of test executions that ends with calling +// `CentipedeEndExecutionBatch`. +// +// `CentipedeBeginExecutionBatch` would abort if it was previously called +// without a matching `CentipedeEndExecutionBatch` call. +extern "C" void CentipedeBeginExecutionBatch(); + +// Finalizes the current batch of test executions. It would abort if no +// `CentipedeBeginExecutionBatch` was called before without a matching +// `CentipedeEndExecutionBatch` call. +extern "C" void CentipedeEndExecutionBatch(); + +// Resets the internal state of the runner to process a new input. +extern "C" void CentipedePrepareProcessing(); + +// Finalizes the processing of an input and stores the state internally. +// +// For tool integration, it can be called inside `RunnerCallbacks::Execute()` to +// finalize the execution early before extra cleanups. +extern "C" void CentipedeFinalizeProcessing(); + +// Retrieves the execution results (including coverage information) after +// processing an input. This function saves the data to the provided buffer and +// returns the size of the saved data. It may be called after +// CentipedeFinalizeProcessing(). +extern "C" size_t CentipedeGetExecutionResult(uint8_t *data, size_t capacity); + +// Retrieves the coverage data collected during the processing of an input. +// This function saves the raw coverage data to the provided buffer and returns +// the size of the saved data. It may be called after +// CentipedeFinalizeProcessing(). +extern "C" size_t CentipedeGetCoverageData(uint8_t *data, size_t capacity); + +// Set the current execution result to the opaque memory `data` with `size`. +// Such data is retrieved using `CentipedeGetExecutionResult`, possibly from +// another process. When `data` is `nullptr`, will set the execution result to +// "empty" with no features or metadata. +extern "C" void CentipedeSetExecutionResult(const uint8_t *data, size_t size); + +// Set the failure description for the runner to propagate further. Only the +// description from the first call will be used. +extern "C" void CentipedeSetFailureDescription(const char *description); + +namespace fuzztest::internal { + +// Callbacks interface implemented by the fuzzer and called by the runner. +// +// WARNING: This interface is designed for FuzzTest/Centipede integration - +// no stability is guaranteed for other usages. +class RunnerCallbacks { + public: + // Attempts to execute the test logic using `input`, and returns false if the + // input should be ignored from the corpus, true otherwise. + virtual bool Execute(ByteSpan input) = 0; + // Generates seed inputs by calling `seed_callback` for each input. + // The default implementation generates a single-byte input {0}. + virtual void GetSeeds(std::function seed_callback); + // Returns the serialized configuration from the test target. The default + // implementation returns the empty string. + virtual std::string GetSerializedTargetConfig(); + // Returns true if and only if the test target has a custom mutator. + virtual bool HasCustomMutator() const = 0; + // Generates at most `num_mutants` mutants by calling `new_mutant_callback` + // for each mutant. Returns true on success, false otherwise. + // + // TODO(xinhaoyuan): Consider supporting only_shrink to speed up + // input shrinking. + virtual bool Mutate(const std::vector &inputs, + size_t num_mutants, + std::function new_mutant_callback); + virtual ~RunnerCallbacks() = default; +}; + +// Wraps legacy fuzzer callbacks into a `RunnerCallbacks` instance. +std::unique_ptr CreateLegacyRunnerCallbacks( + FuzzerTestOneInputCallback test_one_input_cb, + FuzzerCustomMutatorCallback custom_mutator_cb, + FuzzerCustomCrossOverCallback custom_crossover_cb); + +// The main Centipede Runner function. +// It performs actions prescribed by argc/argv and environment variables +// and returns EXIT_SUCCESS or EXIT_FAILURE. +// Normally, the runner itself calls this function (LLVMFuzzerRunDriver). +// +// As an *experiment* we want to allow user code to call RunnerMain(). +// This is not a guaranteed public interface (yet) and may disappear w/o notice. +int RunnerMain(int argc, char **argv, RunnerCallbacks &callbacks); + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_RUNNER_INTERFACE_H_ diff --git a/src/third_party/fuzztest/dist/centipede/runner_main.cc b/src/third_party/fuzztest/dist/centipede/runner_main.cc new file mode 100644 index 00000000000..2507feac441 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_main.cc @@ -0,0 +1,20 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/base/nullability.h" +#include "./centipede/runner_interface.h" + +int main(int argc, char** absl_nonnull argv) { + return LLVMFuzzerRunDriver(&argc, &argv, LLVMFuzzerTestOneInput); +} diff --git a/src/third_party/fuzztest/dist/centipede/runner_request.cc b/src/third_party/fuzztest/dist/centipede/runner_request.cc new file mode 100644 index 00000000000..7d286d30df0 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_request.cc @@ -0,0 +1,118 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/runner_request.h" + +#include +#include + +#include "./centipede/execution_metadata.h" +#include "./centipede/mutation_input.h" +#include "./centipede/shared_memory_blob_sequence.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +namespace { + +enum Tags : Blob::SizeAndTagT { + kTagInvalid, // 0 is an invalid tag. + kTagExecution, + kTagMutation, + kTagNumInputs, + kTagNumMutants, + kTagExecutionMetadata, + kTagDataInput, +}; + +// Writes `inputs` to `blobseq`, returns the number of inputs written. +static size_t WriteInputs(const std::vector &inputs, + BlobSequence &blobseq) { + size_t num_inputs = inputs.size(); + if (!blobseq.Write(kTagNumInputs, num_inputs)) return 0; + size_t result = 0; + for (const auto &input : inputs) { + if (!blobseq.Write({kTagDataInput, input.size(), input.data()})) + return result; + ++result; + } + return result; +} + +static bool WriteMetadataFromRefOrDefault(const ExecutionMetadata *metadata, + BlobSequence &blobseq) { + if (metadata != nullptr) + return metadata->Write(kTagExecutionMetadata, blobseq); + static const ExecutionMetadata *default_metadata = new ExecutionMetadata(); + return default_metadata->Write(kTagExecutionMetadata, blobseq); +} + +// Similar to above, but for mutation inputs. +static size_t WriteInputs(const std::vector &inputs, + BlobSequence &blobseq) { + size_t num_inputs = inputs.size(); + if (!blobseq.Write(kTagNumInputs, num_inputs)) return 0; + size_t result = 0; + for (const auto &input : inputs) { + if (!WriteMetadataFromRefOrDefault(input.metadata, blobseq)) return result; + if (!blobseq.Write({kTagDataInput, input.data.size(), input.data.data()})) + return result; + ++result; + } + return result; +} + +} // namespace + +size_t RequestExecution(const std::vector &inputs, + BlobSequence &blobseq) { + if (!blobseq.Write({kTagExecution, 0, nullptr})) return 0; + return WriteInputs(inputs, blobseq); +} + +size_t RequestMutation(size_t num_mutants, + const std::vector &inputs, + BlobSequence &blobseq) { + if (!blobseq.Write({kTagMutation, 0, nullptr})) return 0; + if (!blobseq.Write(kTagNumMutants, num_mutants)) return 0; + return WriteInputs(inputs, blobseq); +} + +bool IsExecutionRequest(Blob blob) { return blob.tag == kTagExecution; } + +bool IsMutationRequest(Blob blob) { return blob.tag == kTagMutation; } + +bool IsNumInputs(Blob blob, size_t &num_inputs) { + if (blob.tag != kTagNumInputs) return false; + if (blob.size != sizeof(num_inputs)) return false; + memcpy(&num_inputs, blob.data, sizeof(num_inputs)); + return true; +} + +bool IsNumMutants(Blob blob, size_t &num_mutants) { + if (blob.tag != kTagNumMutants) return false; + if (blob.size != sizeof(num_mutants)) return false; + memcpy(&num_mutants, blob.data, sizeof(num_mutants)); + return true; +} + +bool IsExecutionMetadata(Blob blob, ExecutionMetadata &metadata) { + if (blob.tag != kTagExecutionMetadata) return false; + metadata.Read(blob); + return true; +} + +bool IsDataInput(Blob blob) { return blob.tag == kTagDataInput; } + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/runner_request.h b/src/third_party/fuzztest/dist/centipede/runner_request.h new file mode 100644 index 00000000000..a0055bd6ebb --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_request.h @@ -0,0 +1,64 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Utilities used for Centipede => Runner requests. + +#ifndef THIRD_PARTY_CENTIPEDE_EXECUTION_REQUEST_H_ +#define THIRD_PARTY_CENTIPEDE_EXECUTION_REQUEST_H_ + +#include +#include + +#include "./centipede/execution_metadata.h" +#include "./centipede/mutation_input.h" +#include "./centipede/shared_memory_blob_sequence.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +// Sends a request (via `blobseq`) to execute `inputs`. +// Returns the number of sent inputs, which would normally be inputs.size(). +size_t RequestExecution(const std::vector &inputs, + BlobSequence &blobseq); + +// Sends a request (via `blobseq`) to compute `num_mutants` mutants of `inputs`. +// Returns the number of sent inputs, which would normally be inputs.size(). +size_t RequestMutation(size_t num_mutants, + const std::vector &inputs, + BlobSequence &blobseq); + +// Returns whether `blob` indicates an execution request. +bool IsExecutionRequest(Blob blob); + +// Returns whether `blob` indicates a mutation request. +bool IsMutationRequest(Blob blob); + +// Returns true and sets `num_inputs` +// iff the blob indicates the number of inputs. +bool IsNumInputs(Blob blob, size_t &num_inputs); + +// Returns true and sets `num_mutants` +// iff the blob indicates the number of mutants. +bool IsNumMutants(Blob blob, size_t &num_mutants); + +// Returns true and read blob into `metadata` iff the blob indicates an +// execution metadata. +bool IsExecutionMetadata(Blob blob, ExecutionMetadata &metadata); + +// Returns true iff `blob` indicates a data input. +bool IsDataInput(Blob blob); + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_EXECUTION_REQUEST_H_ diff --git a/src/third_party/fuzztest/dist/centipede/runner_result.cc b/src/third_party/fuzztest/dist/centipede/runner_result.cc new file mode 100644 index 00000000000..dac2e8390b3 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_result.cc @@ -0,0 +1,205 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/runner_result.h" + +#include +#include +#include +#include +#include + +#include "./centipede/execution_metadata.h" +#include "./centipede/feature.h" +#include "./centipede/shared_memory_blob_sequence.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +namespace { + +// Tags used for both the execution and mutation results. We use the same enum +// to make the sets of tags disjoint. +enum Tags : Blob::SizeAndTagT { + kTagInvalid, // 0 is an invalid tag. + + // Execution result tags. + kTagFeatures, + kTagDispatcher32BitFeatures, + kTagInputBegin, + kTagInputEnd, + kTagStats, + kTagMetadata, + + // Mutation result tags. + kTagHasCustomMutator, + kTagMutant, +}; + +} // namespace + +bool BatchResult::WriteOneFeatureVec(const feature_t *vec, size_t size, + BlobSequence &blobseq) { + return blobseq.Write({kTagFeatures, size * sizeof(vec[0]), + reinterpret_cast(vec)}); +} + +bool BatchResult::WriteDispatcher32BitFeatures(const uint32_t *features, + size_t num_features, + BlobSequence &blobseq) { + return blobseq.Write({kTagDispatcher32BitFeatures, + num_features * sizeof(features[0]), + reinterpret_cast(features)}); +} + +bool BatchResult::WriteInputBegin(BlobSequence &blobseq) { + return blobseq.Write({kTagInputBegin, 0, nullptr}); +} + +bool BatchResult::WriteInputEnd(BlobSequence &blobseq) { + return blobseq.Write({kTagInputEnd, 0, nullptr}); +} + +bool BatchResult::WriteStats(const ExecutionResult::Stats &stats, + BlobSequence &blobseq) { + return blobseq.Write( + {kTagStats, sizeof(stats), reinterpret_cast(&stats)}); +} + +bool BatchResult::WriteMetadata(const ExecutionMetadata &metadata, + BlobSequence &blobseq) { + return metadata.Write(kTagMetadata, blobseq); +} + +bool BatchResult::WriteMetadata(ByteSpan bytes, BlobSequence &blobseq) { + return blobseq.Write({kTagMetadata, bytes.size(), bytes.data()}); +} + +// The sequence we expect to receive is +// InputBegin, Features, Stats, InputEnd, InputBegin, ... +// with a total of results().size() tuples (InputBegin ... InputEnd). +// Blobs between InputBegin/InputEnd may go in any order. +// If the execution failed on some input, we will see InputBegin, +// but will not see all or some other blobs. +bool BatchResult::Read(BlobSequence &blobseq) { + size_t num_begins = 0; + size_t num_ends = 0; + const size_t num_expected_tuples = results().size(); + ExecutionResult *current_execution_result = nullptr; + while (true) { + auto blob = blobseq.Read(); + if (!blob.IsValid()) break; + if (blob.tag == kTagInputBegin) { + if (num_begins != num_ends) return false; + ++num_begins; + if (num_begins > num_expected_tuples) return false; + current_execution_result = &results()[num_ends]; + current_execution_result->clear(); + continue; + } + if (blob.tag == kTagInputEnd) { + ++num_ends; + if (num_ends != num_begins) return false; + current_execution_result = nullptr; + continue; + } + if (blob.tag == kTagMetadata) { + if (current_execution_result == nullptr) return false; + current_execution_result->metadata().Read(blob); + continue; + } + if (blob.tag == kTagStats) { + if (current_execution_result == nullptr) return false; + if (blob.size != sizeof(ExecutionResult::Stats)) return false; + memcpy(¤t_execution_result->stats(), blob.data, blob.size); + continue; + } + if (blob.tag == kTagFeatures) { + if (current_execution_result == nullptr) return false; + const size_t features_size = blob.size / sizeof(feature_t); + FeatureVec &features = current_execution_result->mutable_features(); + features.resize(features_size); + std::memcpy(features.data(), blob.data, + features_size * sizeof(feature_t)); + } + if (blob.tag == kTagDispatcher32BitFeatures) { + if (current_execution_result == nullptr) return false; + const size_t size = blob.size / sizeof(uint32_t); + std::vector copied_features; + copied_features.resize(size); + std::memcpy(copied_features.data(), blob.data, size * sizeof(uint32_t)); + auto &features = current_execution_result->mutable_features(); + features.reserve(features.size() + size); + for (uint32_t feature : copied_features) { + features.push_back((feature & 0x7fffffff) + + feature_domains::kUserDomains[0].begin()); + } + } + } + num_outputs_read_ = num_ends; + return true; +} + +bool BatchResult::IsIgnoredFailure() const { + constexpr std::string_view kIgnoredFailurePrefix = "IGNORED FAILURE:"; + return exit_code_ != EXIT_SUCCESS && + std::string_view(failure_description_) + .substr(0, kIgnoredFailurePrefix.size()) == + kIgnoredFailurePrefix; +} + +bool BatchResult::IsSetupFailure() const { + constexpr std::string_view kSetupFailurePrefix = "SETUP FAILURE:"; + return exit_code_ != EXIT_SUCCESS && + std::string_view(failure_description_) + .substr(0, kSetupFailurePrefix.size()) == kSetupFailurePrefix; +} + +bool BatchResult::IsSkippedTest() const { + constexpr std::string_view kSkippedTestPrefix = "SKIPPED TEST:"; + return exit_code_ != EXIT_SUCCESS && + std::string_view(failure_description_) + .substr(0, kSkippedTestPrefix.size()) == kSkippedTestPrefix; +} + +bool MutationResult::WriteHasCustomMutator(bool has_custom_mutator, + BlobSequence &blobseq) { + return blobseq.Write( + {kTagHasCustomMutator, sizeof(has_custom_mutator), + reinterpret_cast(&has_custom_mutator)}); +} + +bool MutationResult::WriteMutant(ByteSpan mutant, BlobSequence &blobseq) { + return blobseq.Write({kTagMutant, mutant.size(), mutant.data()}); +} + +bool MutationResult::Read(size_t num_mutants, BlobSequence &blobseq) { + const Blob blob = blobseq.Read(); + if (blob.tag != kTagHasCustomMutator) return false; + if (blob.size != sizeof(has_custom_mutator_)) return false; + std::memcpy(&has_custom_mutator_, blob.data, blob.size); + if (!has_custom_mutator_) return true; + + mutants_.clear(); + mutants_.reserve(num_mutants); + for (size_t i = 0; i < num_mutants; ++i) { + const Blob blob = blobseq.Read(); + if (blob.tag != kTagMutant) return false; + if (blob.size == 0) break; + mutants_.emplace_back(blob.data, blob.data + blob.size); + } + return true; +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/runner_result.h b/src/third_party/fuzztest/dist/centipede/runner_result.h new file mode 100644 index 00000000000..ef4459adb4b --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_result.h @@ -0,0 +1,237 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_EXECUTION_RESULT_H_ +#define THIRD_PARTY_CENTIPEDE_EXECUTION_RESULT_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "./centipede/execution_metadata.h" +#include "./centipede/feature.h" +#include "./centipede/shared_memory_blob_sequence.h" +#include "./common/defs.h" + +namespace fuzztest::internal { + +inline constexpr std::string_view kExecutionFailurePerInputTimeout = + "per-input-timeout-exceeded"; +inline constexpr std::string_view kExecutionFailurePerBatchTimeout = + "per-batch-timeout-exceeded"; +inline constexpr std::string_view kExecutionFailureRssLimitExceeded = + "rss-limit-exceeded"; +inline constexpr std::string_view kExecutionFailureStackLimitExceeded = + "stack-limit-exceeded"; + +// It represents the results of the execution of one input by the runner. +class ExecutionResult { + public: + // Movable, not Copyable. + ExecutionResult(ExecutionResult&& other) = default; + ExecutionResult& operator=(ExecutionResult&& other) = default; + + ExecutionResult() = default; + explicit ExecutionResult(FeatureVec features) + : features_(std::move(features)) {} + + // Execution statistics. + struct Stats { + uint64_t prep_time_usec = 0; // Time taken to prepare for execution. + uint64_t exec_time_usec = 0; // Time taken to execute the input. + uint64_t post_time_usec = 0; // Time taken to post-process the coverage. + uint64_t peak_rss_mb = 0; // Peak RSS in Mb after executing the input. + + // For tests. + bool operator==(const Stats& other) const { + return prep_time_usec == other.prep_time_usec && + exec_time_usec == other.exec_time_usec && + post_time_usec == other.post_time_usec && + peak_rss_mb == other.peak_rss_mb; + } + }; + + // Accessors. + const FeatureVec& features() const { return features_; } + FeatureVec& mutable_features() { return features_; } + const Stats& stats() const { return stats_; } + Stats& stats() { return stats_; } + const ExecutionMetadata& metadata() const { return metadata_; } + ExecutionMetadata& metadata() { return metadata_; } + + // Clears the data, but doesn't deallocate the heap storage. + void clear() { + features_.clear(); + metadata_ = {}; + stats_ = {}; + } + + private: + FeatureVec features_; // Features produced by the target on one input. + + ExecutionMetadata metadata_; // Metadata from executing one input. + + Stats stats_; // Stats from executing one input. +}; + +// BatchResult is the communication API between Centipede and its runner. +// In consists of a vector of ExecutionResult objects, one per executed input, +// and optionally some other details about the execution of the input batch. +// +// The runner uses static methods Write*() to write to a blobseq. +// Centipede uses Read() to get all the data from blobseq. +class BatchResult { + public: + // If BatchResult is used in a hot loop, define it outside the loop and + // use ClearAndResize() on every iteration. + // This will reduce the number of mallocs. + BatchResult() = default; + + // Not movable. + BatchResult(BatchResult&& other) = delete; + BatchResult& operator=(BatchResult&& other) = delete; + + // Clears all data, but usually does not deallocate heap storage. + void ClearAndResize(size_t new_size) { + for (auto& result : results_) result.clear(); + results_.resize(new_size); + log_.clear(); + exit_code_ = EXIT_SUCCESS; + num_outputs_read_ = 0; + } + + // Writes one FeatureVec (from `vec` and `size`) to `blobseq`. + // Returns true iff successful. + // Called by the runner. + // When executing N inputs, the runner will call this at most N times. + static bool WriteOneFeatureVec(const feature_t* vec, size_t size, + BlobSequence& blobseq); + // Writes a buffer of 32-bit `features` to `blobseq`. + // + // This is a temporary API to work with the dispatcher prototype. + // + // For each 32-bit feature, the bit [31] is ignored; the 4 bits [30-27] + // indicate the domain, which are mapped to the Centipede user-defined domain + // 0-15; the remaining 27 bits [26-0] represent the actual 27-bit feature ID + // in the domain. + static bool WriteDispatcher32BitFeatures(const uint32_t* features, + size_t num_features, + BlobSequence& blobseq); + // Writes a special Begin marker before executing an input. + static bool WriteInputBegin(BlobSequence& blobseq); + // Writes a special End marker after executing an input. + static bool WriteInputEnd(BlobSequence& blobseq); + // Writes unit execution stats. + static bool WriteStats(const ExecutionResult::Stats& stats, + BlobSequence& blobseq); + // Writes the execution `metadata` to `blobseq`. + // Returns true iff successful. + static bool WriteMetadata(const ExecutionMetadata& metadata, + BlobSequence& blobseq); + + // Writes the execution `metadata` to `blobseq` as raw bytes. + // Returns true iff successful. + static bool WriteMetadata(ByteSpan bytes, BlobSequence& blobseq); + + // Reads everything written by the runner to `blobseq` into `this`. + // Returns true iff successful. + // When running N inputs, ClearAndResize(N) must be called before Read(). + bool Read(BlobSequence& blobseq); + + // Returns true if the failure should be ignored. + bool IsIgnoredFailure() const; + + // Returns true if the batch execution failed due to a setup failure, and not + // a crash tied to a specific input. + bool IsSetupFailure() const; + + // Returns true if the test is skipped during setup, thus there is no need to + // run any inputs at all. + bool IsSkippedTest() const; + + // Accessors. + std::vector& results() { return results_; } + const std::vector& results() const { return results_; } + std::string& log() { return log_; } + const std::string& log() const { return log_; } + int& exit_code() { return exit_code_; } + int exit_code() const { return exit_code_; } + size_t num_outputs_read() const { return num_outputs_read_; } + size_t& num_outputs_read() { return num_outputs_read_; } + std::string& failure_description() { return failure_description_; } + const std::string& failure_description() const { + return failure_description_; + } + std::string& failure_signature() { return failure_signature_; } + const std::string& failure_signature() const { return failure_signature_; } + + private: + friend class MultiInputMock; + + std::vector results_; + std::string log_; // log_ is populated optionally, e.g. if there was a crash. + int exit_code_ = EXIT_SUCCESS; // Process exit code. + // If the batch execution fails, this may optionally contain a human-readable + // failure description, e.g., the crash type, stack trace... + std::string failure_description_; + // A signature uniquely identifying the failure, which does not need to be + // human-readable. Specially, failures with empty signatures are always + // considered unique. + std::string failure_signature_; + size_t num_outputs_read_ = 0; +}; + +// Represents results of mutating a batch of inputs, which are communicated from +// the runner to Centipede via a blob sequence using the following protocol: +// +// The runner first calls `WriteHasCustomMutator()` to indicate whether the +// target has a custom mutator. If so, it follows up with a sequence of +// `WriteMutant()` calls to write the mutants to the blob sequence. +// +// Centipede calls `Read()` to read whether the target has a custom mutator, +// and if so, reads the mutants from the blob sequence. +class MutationResult { + public: + // Writes a special marker to indicate whether the target has a custom + // mutator. Returns true iff successful. + static bool WriteHasCustomMutator(bool has_custom_mutator, + BlobSequence& blobseq); + + // Writes one mutant to `blobseq`. Returns true iff successful. + static bool WriteMutant(ByteSpan mutant, BlobSequence& blobseq); + + // Reads whether the target has a custom mutator, and if so, reads at most + // `num_mutants` mutants from `blobseq`. Returns true iff successful. + bool Read(size_t num_mutants, BlobSequence& blobseq); + + // Accessors. + int exit_code() const { return exit_code_; } + int& exit_code() { return exit_code_; } + bool has_custom_mutator() const { return has_custom_mutator_; } + const std::vector& mutants() const& { return mutants_; } + std::vector&& mutants() && { return std::move(mutants_); } + + private: + int exit_code_ = EXIT_SUCCESS; + bool has_custom_mutator_ = false; + std::vector mutants_; +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_EXECUTION_RESULT_H_ diff --git a/src/third_party/fuzztest/dist/centipede/runner_result_test.cc b/src/third_party/fuzztest/dist/centipede/runner_result_test.cc new file mode 100644 index 00000000000..19f50554fed --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_result_test.cc @@ -0,0 +1,265 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/runner_result.h" + +#include +#include +#include +#include // NOLINT +#include +#include +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "./centipede/execution_metadata.h" +#include "./centipede/feature.h" +#include "./centipede/shared_memory_blob_sequence.h" +#include "./common/defs.h" +#include "./common/test_util.h" + +namespace fuzztest::internal { +namespace { + +using ::testing::ElementsAre; + +TEST(ExecutionResult, WriteThenRead) { + auto buffer = std::make_unique(1000); + BlobSequence blobseq(buffer.get(), 1000); + BatchResult batch_result; + + // Imitate execution of two inputs. + FeatureVec v1{1, 2, 3}; + FeatureVec v2{5, 6, 7, 8}; + ExecutionMetadata metadata; + metadata.AppendCmpEntry({1, 2, 3}, {4, 5, 6}); + ExecutionResult::Stats stats1; + stats1.peak_rss_mb = 10; + ExecutionResult::Stats stats2; + stats2.peak_rss_mb = 20; + // First input. + EXPECT_TRUE(BatchResult::WriteInputBegin(blobseq)); + EXPECT_TRUE(BatchResult::WriteOneFeatureVec(v1.data(), v1.size(), blobseq)); + // Write stats after features. The order should not matter. + EXPECT_TRUE(BatchResult::WriteStats(stats1, blobseq)); + // Done. + EXPECT_TRUE(BatchResult::WriteInputEnd(blobseq)); + + // Second input. + EXPECT_TRUE(BatchResult::WriteInputBegin(blobseq)); + // Write stats before features. + EXPECT_TRUE(BatchResult::WriteStats(stats2, blobseq)); + EXPECT_TRUE(BatchResult::WriteOneFeatureVec(v2.data(), v2.size(), blobseq)); + // Write CMP traces. + EXPECT_TRUE(BatchResult::WriteMetadata(metadata, blobseq)); + // Done. + EXPECT_TRUE(BatchResult::WriteInputEnd(blobseq)); + + // Ensure we've read them. + blobseq.Reset(); + batch_result.ClearAndResize(2); + EXPECT_TRUE(batch_result.Read(blobseq)); + EXPECT_EQ(batch_result.results().size(), 2); + EXPECT_EQ(batch_result.results()[0].features(), v1); + EXPECT_EQ(batch_result.results()[0].stats(), stats1); + EXPECT_EQ(batch_result.results()[1].features(), v2); + EXPECT_EQ(batch_result.results()[1].stats(), stats2); + EXPECT_THAT(batch_result.results()[1].metadata().cmp_data, + ElementsAre(3, // size + 1, 2, 3, // cmp0 + 4, 5, 6 // cmp1 + )); + + // If there are fewer ExecutionResult-s than expected everything should work. + blobseq.Reset(); + batch_result.ClearAndResize(3); + EXPECT_TRUE(batch_result.Read(blobseq)); + EXPECT_EQ(batch_result.results().size(), 3); + EXPECT_EQ(batch_result.results()[0].features(), v1); + EXPECT_EQ(batch_result.results()[1].features(), v2); + EXPECT_EQ(batch_result.results()[2].features(), FeatureVec{}); + + // If there are too many ExecutionResult-s, Read() should fail. + // This should not happen in normal operation. + blobseq.Reset(); + batch_result.ClearAndResize(1); + EXPECT_FALSE(batch_result.Read(blobseq)); +} + +TEST(ExecutionResult, WriteIntoFileThenRead) { + const std::string temp_file = GetTestTempDir(test_info_->name()) / "tmp.txt"; + std::ofstream output_stream(temp_file, std::ios::out); + ASSERT_TRUE(output_stream.is_open()); + + // Imitate execution of two inputs. + FeatureVec v1{1, 2, 3}; + FeatureVec v2{5, 6, 7, 8}; + ExecutionResult::Stats stats1; + stats1.peak_rss_mb = 10; + ExecutionResult::Stats stats2; + stats2.peak_rss_mb = 20; + ExecutionMetadata metadata; + metadata.AppendCmpEntry({1, 2, 3}, {4, 5, 6}); + + std::vector buffer1(1000); + BlobSequence blobseq1(buffer1.data(), buffer1.size()); + // First input. + ASSERT_TRUE(BatchResult::WriteInputBegin(blobseq1)); + ASSERT_TRUE(BatchResult::WriteOneFeatureVec(v1.data(), v1.size(), blobseq1)); + // Write stats after features. The order should not matter. + ASSERT_TRUE(BatchResult::WriteStats(stats1, blobseq1)); + // Done. + ASSERT_TRUE(BatchResult::WriteInputEnd(blobseq1)); + + output_stream.write(reinterpret_cast(buffer1.data()), + blobseq1.offset()); + + std::vector buffer2(1000); + BlobSequence blobseq2(buffer2.data(), buffer2.size()); + // Second input. + ASSERT_TRUE(BatchResult::WriteInputBegin(blobseq2)); + // Write stats before features. + ASSERT_TRUE(BatchResult::WriteStats(stats2, blobseq2)); + ASSERT_TRUE(BatchResult::WriteOneFeatureVec(v2.data(), v2.size(), blobseq2)); + // Write CMP traces. + EXPECT_TRUE(BatchResult::WriteMetadata(metadata, blobseq2)); + // Done. + ASSERT_TRUE(BatchResult::WriteInputEnd(blobseq2)); + + output_stream.write(reinterpret_cast(buffer2.data()), + blobseq2.offset()); + + output_stream.close(); + + std::ifstream input_stream(temp_file); + std::string content((std::istreambuf_iterator(input_stream)), + (std::istreambuf_iterator())); + BlobSequence blobseq(reinterpret_cast(content.data()), + content.size()); + BatchResult batch_result; + batch_result.ClearAndResize(2); + ASSERT_TRUE(batch_result.Read(blobseq)); + EXPECT_EQ(batch_result.num_outputs_read(), 2); + EXPECT_EQ(batch_result.results()[0].features(), v1); + EXPECT_EQ(batch_result.results()[1].features(), v2); + EXPECT_EQ(batch_result.results()[0].stats(), stats1); + EXPECT_EQ(batch_result.results()[1].stats(), stats2); + EXPECT_THAT(batch_result.results()[1].metadata().cmp_data, + ElementsAre(3, // size + 1, 2, 3, // cmp0 + 4, 5, 6 // cmp1 + )); +} + +TEST(ExecutionResult, IdentifiesSetupFailure) { + BatchResult batch_result; + batch_result.exit_code() = EXIT_FAILURE; + batch_result.failure_description() = "SETUP FAILURE: something went wrong"; + + EXPECT_TRUE(batch_result.IsSetupFailure()); +} + +TEST(MutationResult, WriteThenRead) { + std::array buffer; + BlobSequence blobseq(buffer.data(), buffer.size()); + + // Write a mutation result. + ASSERT_TRUE(MutationResult::WriteHasCustomMutator(true, blobseq)); + ASSERT_TRUE(MutationResult::WriteMutant({1, 2, 3}, blobseq)); + ASSERT_TRUE(MutationResult::WriteMutant({4, 5, 6}, blobseq)); + ASSERT_TRUE(MutationResult::WriteMutant({7, 8, 9}, blobseq)); + blobseq.Reset(); + + MutationResult mutation_result; + ASSERT_TRUE(mutation_result.Read(3, blobseq)); + + EXPECT_TRUE(mutation_result.has_custom_mutator()); + EXPECT_THAT( + mutation_result.mutants(), + ElementsAre(ByteArray{1, 2, 3}, ByteArray{4, 5, 6}, ByteArray{7, 8, 9})); +} + +TEST(ExecutionResult, ReadResultSucceedsOnlyWithInputBegin) { + auto buffer = std::make_unique(1000); + BlobSequence blobseq(buffer.get(), 1000); + BatchResult batch_result; + + EXPECT_TRUE(BatchResult::WriteInputBegin(blobseq)); + EXPECT_TRUE(BatchResult::WriteOneFeatureVec({}, 0, blobseq)); + EXPECT_TRUE(BatchResult::WriteInputEnd(blobseq)); + blobseq.Reset(); + batch_result.ClearAndResize(1); + EXPECT_TRUE(batch_result.Read(blobseq)); + + blobseq.Reset(); + EXPECT_TRUE(BatchResult::WriteOneFeatureVec({}, 0, blobseq)); + EXPECT_TRUE(BatchResult::WriteInputEnd(blobseq)); + + blobseq.Reset(); + batch_result.ClearAndResize(1); + EXPECT_FALSE(batch_result.Read(blobseq)); +} + +TEST(ExecutionResult, ReadDispatcher32BitFeatures) { + auto buffer = std::make_unique(1000); + BlobSequence blobseq(buffer.get(), 1000); + BatchResult batch_result; + + std::vector dispatcher_features = {0, 1, 0x7fffffff, 0xffffffff}; + + EXPECT_TRUE(BatchResult::WriteInputBegin(blobseq)); + EXPECT_TRUE(BatchResult::WriteDispatcher32BitFeatures( + dispatcher_features.data(), dispatcher_features.size(), blobseq)); + EXPECT_TRUE(BatchResult::WriteInputEnd(blobseq)); + blobseq.Reset(); + batch_result.ClearAndResize(1); + EXPECT_TRUE(batch_result.Read(blobseq)); + + ASSERT_EQ(batch_result.num_outputs_read(), 1); + EXPECT_THAT(batch_result.results()[0].features(), + ElementsAre(feature_domains::kUserDomains[0].ConvertToMe(0), + feature_domains::kUserDomains[0].ConvertToMe(1), + feature_domains::kUserDomains[15].ConvertToMe( + feature_domains::Domain::kDomainSize - 1), + feature_domains::kUserDomains[15].ConvertToMe( + feature_domains::Domain::kDomainSize - 1))); +} + +TEST(ExecutionResult, KeepArbitraryBytesFromMetadata) { + auto buffer = std::make_unique(1000); + BlobSequence blobseq(buffer.get(), 1000); + BatchResult batch_result; + + ByteArray bytes = {13, 14, 15, 16}; + + EXPECT_TRUE(BatchResult::WriteInputBegin(blobseq)); + EXPECT_TRUE(BatchResult::WriteMetadata(bytes, blobseq)); + EXPECT_TRUE(BatchResult::WriteInputEnd(blobseq)); + blobseq.Reset(); + batch_result.ClearAndResize(1); + EXPECT_TRUE(batch_result.Read(blobseq)); + + ASSERT_EQ(batch_result.num_outputs_read(), 1); + EXPECT_EQ(batch_result.results()[0].metadata().cmp_data, bytes); + // `ForEachEntry()` should fail but not crash. + EXPECT_FALSE(batch_result.results()[0].metadata().ForEachCmpEntry( + [](ByteSpan, ByteSpan) {})); +} + +} // namespace +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/runner_sancov.cc b/src/third_party/fuzztest/dist/centipede/runner_sancov.cc new file mode 100644 index 00000000000..22435f4e85a --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_sancov.cc @@ -0,0 +1,315 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Instrumentation callbacks for SanitizerCoverage (sancov). +// https://clang.llvm.org/docs/SanitizerCoverage.html + +#include + +#include +#include +#include + +#include "absl/base/nullability.h" +#include "./centipede/feature.h" +#include "./centipede/int_utils.h" +#include "./centipede/pc_info.h" +#include "./centipede/reverse_pc_table.h" +#include "./centipede/runner.h" +#include "./centipede/runner_dl_info.h" + +namespace fuzztest::internal { +void RunnerSancov() {} // to be referenced in runner.cc +} // namespace fuzztest::internal + +using fuzztest::internal::PCGuard; +using fuzztest::internal::PCInfo; +using fuzztest::internal::state; +using fuzztest::internal::tls; + +// Tracing data flow. +// The instrumentation is provided by +// https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow. +// For every load we get the address of the load. We can also get the caller PC. +// If the load address in +// [main_object.start_address, main_object.start_address + main_object.size), +// it is likely a global. +// We form a feature from a pair of {caller_pc, address_of_load}. +// The rationale here is that loading from a global address unique for the +// given PC is an interesting enough behavior that it warrants its own feature. +// +// Downsides: +// * The instrumentation is expensive, it can easily add 2x slowdown. +// * This creates plenty of features, easily 10x compared to control flow, +// and bloats the corpus. But this is also what we want to achieve here. + +// NOTE: In addition to `always_inline`, also use `inline`, because some +// compilers require both to actually enforce inlining, e.g. GCC: +// https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html. +#define ENFORCE_INLINE __attribute__((always_inline)) inline + +// Use this attribute for functions that must not be instrumented even if +// the runner is built with sanitizers (asan, etc). +#define NO_SANITIZE __attribute__((no_sanitize("all"))) + +// NOTE: Enforce inlining so that `__builtin_return_address` works. +ENFORCE_INLINE static void TraceLoad(void *addr) { + if (!state.run_time_flags.use_dataflow_features) return; + auto caller_pc = reinterpret_cast(__builtin_return_address(0)); + auto load_addr = reinterpret_cast(addr); + auto pc_offset = caller_pc - state.main_object.start_address; + if (pc_offset >= state.main_object.size) return; // PC outside main obj. + auto addr_offset = load_addr - state.main_object.start_address; + if (addr_offset >= state.main_object.size) return; // Not a global address. + state.data_flow_feature_set.set(fuzztest::internal::ConvertPcPairToNumber( + pc_offset, addr_offset, state.main_object.size)); +} + +// NOTE: Enforce inlining so that `__builtin_return_address` works. +ENFORCE_INLINE static void TraceCmp(uint64_t Arg1, uint64_t Arg2) { + if (!state.run_time_flags.use_cmp_features) return; + auto caller_pc = reinterpret_cast(__builtin_return_address(0)); + auto pc_offset = caller_pc - state.main_object.start_address; + uintptr_t hash = + fuzztest::internal::Hash64Bits(pc_offset) ^ tls.path_ring_buffer.hash(); + if (Arg1 == Arg2) { + state.cmp_eq_set.set(hash); + } else { + hash <<= 6; // ABTo* generate 6-bit numbers. + state.cmp_moddiff_set.set(hash | + fuzztest::internal::ABToCmpModDiff(Arg1, Arg2)); + state.cmp_hamming_set.set(hash | + fuzztest::internal::ABToCmpHamming(Arg1, Arg2)); + state.cmp_difflog_set.set(hash | + fuzztest::internal::ABToCmpDiffLog(Arg1, Arg2)); + } +} + +//------------------------------------------------------------------------------ +// Implementations of the external sanitizer coverage hooks. +//------------------------------------------------------------------------------ + +extern "C" { +NO_SANITIZE void __sanitizer_cov_load1(uint8_t *addr) { TraceLoad(addr); } +NO_SANITIZE void __sanitizer_cov_load2(uint16_t *addr) { TraceLoad(addr); } +NO_SANITIZE void __sanitizer_cov_load4(uint32_t *addr) { TraceLoad(addr); } +NO_SANITIZE void __sanitizer_cov_load8(uint64_t *addr) { TraceLoad(addr); } +NO_SANITIZE void __sanitizer_cov_load16(__uint128_t *addr) { TraceLoad(addr); } + +NO_SANITIZE +void __sanitizer_cov_trace_const_cmp1(uint8_t Arg1, uint8_t Arg2) { + TraceCmp(Arg1, Arg2); +} +NO_SANITIZE +void __sanitizer_cov_trace_const_cmp2(uint16_t Arg1, uint16_t Arg2) { + TraceCmp(Arg1, Arg2); + if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary) + tls.cmp_trace2.Capture(Arg1, Arg2); +} +NO_SANITIZE +void __sanitizer_cov_trace_const_cmp4(uint32_t Arg1, uint32_t Arg2) { + TraceCmp(Arg1, Arg2); + if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary) + tls.cmp_trace4.Capture(Arg1, Arg2); +} +NO_SANITIZE +void __sanitizer_cov_trace_const_cmp8(uint64_t Arg1, uint64_t Arg2) { + TraceCmp(Arg1, Arg2); + if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary) + tls.cmp_trace8.Capture(Arg1, Arg2); +} +NO_SANITIZE +void __sanitizer_cov_trace_cmp1(uint8_t Arg1, uint8_t Arg2) { + TraceCmp(Arg1, Arg2); +} +NO_SANITIZE +void __sanitizer_cov_trace_cmp2(uint16_t Arg1, uint16_t Arg2) { + TraceCmp(Arg1, Arg2); + if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary) + tls.cmp_trace2.Capture(Arg1, Arg2); +} +NO_SANITIZE +void __sanitizer_cov_trace_cmp4(uint32_t Arg1, uint32_t Arg2) { + TraceCmp(Arg1, Arg2); + if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary) + tls.cmp_trace4.Capture(Arg1, Arg2); +} +NO_SANITIZE +void __sanitizer_cov_trace_cmp8(uint64_t Arg1, uint64_t Arg2) { + TraceCmp(Arg1, Arg2); + if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary) + tls.cmp_trace8.Capture(Arg1, Arg2); +} +// TODO(kcc): [impl] handle switch. +NO_SANITIZE +void __sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases) {} + +// This function is called at startup when +// -fsanitize-coverage=inline-8bit-counters is used. +// See https://clang.llvm.org/docs/SanitizerCoverage.html#inline-8bit-counters +void __sanitizer_cov_8bit_counters_init(uint8_t *beg, uint8_t *end) { + state.sancov_objects.Inline8BitCountersInit(beg, end); +} + +// https://clang.llvm.org/docs/SanitizerCoverage.html#pc-table +// This function is called at the DSO init time, potentially several times. +// When called from the same DSO, the arguments will always be the same. +// If a different DSO calls this function, it will have different arguments. +// We currently do not support more than one sancov-instrumented DSO. +void __sanitizer_cov_pcs_init(const PCInfo *absl_nonnull beg, + const PCInfo *end) { + state.sancov_objects.PCInfoInit(beg, end); +} + +// https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-control-flow +// This function is called at the DSO init time. +void __sanitizer_cov_cfs_init(const uintptr_t *beg, const uintptr_t *end) { + state.sancov_objects.CFSInit(beg, end); +} + +// Updates the state of the paths, `path_level > 0`. +// Marked noinline so that not to create spills/fills on the fast path +// of __sanitizer_cov_trace_pc_guard. +__attribute__((noinline)) static void HandlePath(uintptr_t normalized_pc) { + uintptr_t hash = tls.path_ring_buffer.push(normalized_pc); + state.path_feature_set.set(hash); +} + +// Handles one observed PC. +// `normalized_pc` is an integer representation of PC that is stable between +// the executions. +// `is_function_entry` is true if the PC is known to be a function entry. +// With __sanitizer_cov_trace_pc_guard this is an index of PC in the PC table. +// With __sanitizer_cov_trace_pc this is PC itself, normalized by subtracting +// the DSO's dynamic start address. +static ENFORCE_INLINE void HandleOnePc(PCGuard pc_guard) { + if (!state.run_time_flags.use_pc_features) return; + state.pc_counter_set.SaturatedIncrement(pc_guard.pc_index); + + if (pc_guard.is_function_entry) { + uintptr_t sp = reinterpret_cast(__builtin_frame_address(0)); + // It should be rare for the stack depth to exceed the previous record. + if (__builtin_expect( + sp < tls.lowest_sp && + // And ignore the stack pointer when it is not in the known + // region (e.g. for signal handling with an alternative stack). + (tls.stack_region_low == 0 || sp >= tls.stack_region_low), + 0)) { + tls.lowest_sp = sp; + fuzztest::internal::CheckStackLimit(sp); + } + if (state.run_time_flags.callstack_level != 0) { + tls.call_stack.OnFunctionEntry(pc_guard.pc_index, sp); + state.callstack_set.set(tls.call_stack.Hash()); + } + } + + // path features. + if (state.run_time_flags.path_level != 0) HandlePath(pc_guard.pc_index); +} + +// Caller PC is the PC of the call instruction. +// Return address is the PC where the callee will return upon completion. +// On x86_64, CallerPC == ReturnAddress - 5 +// On AArch64, CallerPC == ReturnAddress - 4 +static uintptr_t ReturnAddressToCallerPc(uintptr_t return_address) { +#ifdef __x86_64__ + return return_address - 5; +#elif defined(__aarch64__) + return return_address - 4; +#else +#error "unsupported architecture" +#endif +} + +// Sets `actual_pc_counter_set_size_aligned` to `size`, properly aligned up. +static void UpdatePcCounterSetSizeAligned(size_t size) { + constexpr size_t kAlignment = state.pc_counter_set.kSizeMultiple; + constexpr size_t kMask = kAlignment - 1; + state.actual_pc_counter_set_size_aligned = (size + kMask) & ~kMask; +} + +// MainObjectLazyInit() and helpers allow us to initialize state.main_object +// lazily and thread-safely on the first call to __sanitizer_cov_trace_pc(). +// +// TODO(kcc): consider removing :dl_path_suffix= since with lazy init +// we can auto-detect the instrumented DSO. +// +// TODO(kcc): this lazy init is brittle. +// It assumes that __sanitizer_cov_trace_pc is the only code that touches +// state.main_object concurrently. I.e. we can not blindly reuse this lazy init +// for other instrumentation callbacks that use state.main_object. +// This code is also considered *temporary* because +// a) __sanitizer_cov_trace_pc is obsolete and we hope to not need it in future. +// b) a better option might be to do a non-lazy init by intercepting dlopen. +// +// We do not call MainObjectLazyInit() in +// __sanitizer_cov_trace_pc_guard() because +// a) there is not use case for that currently and +// b) it will slowdown the hot function. +static pthread_once_t main_object_lazy_init_once = PTHREAD_ONCE_INIT; +static void MainObjectLazyInitOnceCallback() { + state.main_object = + fuzztest::internal::GetDlInfo(state.GetStringFlag(":dl_path_suffix=")); + fprintf(stderr, "MainObjectLazyInitOnceCallback %zx\n", + state.main_object.start_address); + UpdatePcCounterSetSizeAligned(state.reverse_pc_table.NumPcs()); +} + +__attribute__((noinline)) static void MainObjectLazyInit() { + pthread_once(&main_object_lazy_init_once, MainObjectLazyInitOnceCallback); +} + +// TODO(kcc): [impl] add proper testing for this callback. +// TODO(kcc): make sure the pc_table in the engine understands the raw PCs. +// TODO(kcc): this implementation is temporary. In order for symbolization to +// work we will need to translate the PC into a PCIndex or make pc_table sparse. +// See https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs. +// This instrumentation is redundant if other instrumentation +// (e.g. trace-pc-guard) is available, but GCC as of 2022-04 only supports +// this variant. +void __sanitizer_cov_trace_pc() { + uintptr_t pc = reinterpret_cast(__builtin_return_address(0)); + if (!state.main_object.start_address || + !state.actual_pc_counter_set_size_aligned) { + // Don't track coverage at all before the PC table is initialized. + if (state.reverse_pc_table.NumPcs() == 0) return; + MainObjectLazyInit(); + } + pc -= state.main_object.start_address; + pc = ReturnAddressToCallerPc(pc); + const auto pc_guard = state.reverse_pc_table.GetPCGuard(pc); + // TODO(kcc): compute is_function_entry for this case. + if (pc_guard.IsValid()) HandleOnePc(pc_guard); +} + +// This function is called at the DSO init time. +void __sanitizer_cov_trace_pc_guard_init(PCGuard *absl_nonnull start, + PCGuard *stop) { + state.sancov_objects.PCGuardInit(start, stop); + UpdatePcCounterSetSizeAligned(state.sancov_objects.NumInstrumentedPCs()); +} + +// This function is called on every instrumented edge. +NO_SANITIZE +void __sanitizer_cov_trace_pc_guard(PCGuard *absl_nonnull guard) { + // This function may be called very early during the DSO initialization, + // before the values of `*guard` are initialized to non-zero. + // But it will immidiately return bacause state.run_time_flags.use_pc_features + // is false. Once state.run_time_flags.use_pc_features becomes true, it is + // already ok to call this function. + HandleOnePc(*guard); +} + +} // extern "C" diff --git a/src/third_party/fuzztest/dist/centipede/runner_sancov_object.cc b/src/third_party/fuzztest/dist/centipede/runner_sancov_object.cc new file mode 100644 index 00000000000..a1070576def --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_sancov_object.cc @@ -0,0 +1,197 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Instrumentation callbacks for SanitizerCoverage (sancov). +// https://clang.llvm.org/docs/SanitizerCoverage.html + +#include "./centipede/runner_sancov_object.h" + +#include +#include +#include +#include +#include +#include + +#include "absl/base/nullability.h" +#include "./centipede/foreach_nonzero.h" +#include "./centipede/pc_info.h" +#include "./centipede/runner_dl_info.h" +#include "./centipede/runner_utils.h" + +namespace fuzztest::internal { + +void SanCovObjectArray::PCGuardInit(PCGuard *absl_nullable start, + PCGuard *stop) { + RunnerCheck((start != nullptr) == (stop != nullptr), + "invalid PC guard table"); + skipping_no_code_dso_ = start == stop; + if (skipping_no_code_dso_) return; + // Ignore repeated calls with the same arguments. + if (size_ != 0 && objects_[size_ - 1].pc_guard_start == start) return; + RunnerCheck(size_ < kMaxSize, "too many sancov objects"); + auto &sancov_object = objects_[size_++]; + sancov_object.pc_guard_start = start; + sancov_object.pc_guard_stop = stop; + for (PCGuard *guard = start; guard != stop; ++guard) { + guard->pc_index = num_instrumented_pcs_; + ++num_instrumented_pcs_; + } +} + +void SanCovObjectArray::Inline8BitCountersInit( + uint8_t *inline_8bit_counters_start, uint8_t *inline_8bit_counters_stop) { + RunnerCheck((inline_8bit_counters_start != nullptr) == + (inline_8bit_counters_stop != nullptr), + "invalid 8-bit counter table"); + skipping_no_code_dso_ = + inline_8bit_counters_start == inline_8bit_counters_stop; + if (skipping_no_code_dso_) return; + // Ignore repeated calls with the same arguments. + if (size_ != 0 && objects_[size_ - 1].inline_8bit_counters_start == + inline_8bit_counters_start) { + return; + } + RunnerCheck(size_ < kMaxSize, "too many sancov objects"); + auto &sancov_object = objects_[size_++]; + sancov_object.inline_8bit_counters_start = inline_8bit_counters_start; + sancov_object.inline_8bit_counters_stop = inline_8bit_counters_stop; +} + +void SanCovObjectArray::PCInfoInit(const PCInfo *absl_nullable pcs_beg, + const PCInfo *pcs_end) { + RunnerCheck((pcs_beg != nullptr) == (pcs_end != nullptr), "invalid PC table"); + if (skipping_no_code_dso_) { + RunnerCheck(pcs_beg == pcs_end, + "unexpected non-empty PC table for no-code DSO"); + return; + } + const char *called_early = + "__sanitizer_cov_pcs_init is called before either of " + "__sanitizer_cov_trace_pc_guard_init or " + "__sanitizer_cov_8bit_counters_init"; + RunnerCheck(size_ != 0, called_early); + // Assumes either __sanitizer_cov_trace_pc_guard_init or + // sanitizer_cov_8bit_counters_init was already called on this object. + auto &sancov_object = objects_[size_ - 1]; + const size_t guard_size = + sancov_object.pc_guard_stop - sancov_object.pc_guard_start; + const size_t counter_size = sancov_object.inline_8bit_counters_stop - + sancov_object.inline_8bit_counters_start; + RunnerCheck(guard_size != 0 || counter_size != 0, called_early); + RunnerCheck(std::max(guard_size, counter_size) == pcs_end - pcs_beg, + "__sanitizer_cov_pcs_init: mismatch between guard/counter size" + " and pc table size"); + sancov_object.pcs_beg = pcs_beg; + sancov_object.pcs_end = pcs_end; + sancov_object.dl_info = GetDlInfo(pcs_beg->pc); + RunnerCheck(sancov_object.dl_info.IsSet(), "failed to compute dl_info"); + if (sancov_object.pc_guard_start != nullptr) { + // Set is_function_entry for all the guards. + for (size_t i = 0, n = pcs_end - pcs_beg; i < n; ++i) { + sancov_object.pc_guard_start[i].is_function_entry = + pcs_beg[i].has_flag(PCInfo::kFuncEntry); + } + } +} + +void SanCovObjectArray::CFSInit(const uintptr_t *cfs_beg, + const uintptr_t *cfs_end) { + RunnerCheck((cfs_beg != nullptr) == (cfs_end != nullptr), + "invalid control-flow table"); + if (skipping_no_code_dso_) { + RunnerCheck(cfs_beg == cfs_end, + "unexpected non-empty control-flow table for no-code DSO"); + return; + } + // Assumes __sanitizer_cov_pcs_init has been called. + const char *called_early = + "__sanitizer_cov_cfs_init is called before __sanitizer_cov_pcs_init"; + RunnerCheck(size_ != 0, called_early); + auto &sancov_object = objects_[size_ - 1]; + RunnerCheck(sancov_object.pcs_beg != nullptr, called_early); + sancov_object.cfs_beg = cfs_beg; + sancov_object.cfs_end = cfs_end; +} + +std::vector SanCovObjectArray::CreatePCTable() const { + // Populate the result. + std::vector result; + for (size_t i = 0; i < size(); ++i) { + const auto &object = objects_[i]; + for (const auto *ptr = object.pcs_beg; ptr != object.pcs_end; ++ptr) { + auto pc_info = *ptr; + // Convert into the link-time address + pc_info.pc -= object.dl_info.link_offset; + result.push_back(pc_info); + } + } + return result; +} + +std::vector SanCovObjectArray::CreateCfTable() const { + // Compute the CF table. + std::vector result; + for (size_t i = 0; i < size(); ++i) { + const auto &object = objects_[i]; + for (const auto *ptr = object.cfs_beg; ptr != object.cfs_end; ++ptr) { + uintptr_t data = *ptr; + // CF table is an array of PCs, except for delimiter (Null) and indirect + // call indicator (-1). Convert into link-time address. + if (data != 0 && data != -1ULL) data -= object.dl_info.link_offset; + result.push_back(data); + } + } + return result; +} + +DsoTable SanCovObjectArray::CreateDsoTable() const { + DsoTable result; + result.reserve(size()); + for (size_t i = 0; i < size(); ++i) { + const auto &object = objects_[i]; + size_t num_instrumented_pcs = object.pcs_end - object.pcs_beg; + result.push_back({object.dl_info.path, num_instrumented_pcs}); + } + return result; +} + +void SanCovObjectArray::ClearInlineCounters() { + for (size_t i = 0; i < size(); ++i) { + const auto &object = objects_[i]; + if (object.inline_8bit_counters_start == nullptr) continue; + const size_t num_counters = + object.inline_8bit_counters_stop - object.inline_8bit_counters_start; + memset(object.inline_8bit_counters_start, 0, num_counters); + } +} + +void SanCovObjectArray::ForEachNonZeroInlineCounter( + const std::function &callback) + const { + size_t process_wide_idx = 0; + for (size_t i = 0; i < size(); ++i) { + const auto &object = objects_[i]; + if (object.inline_8bit_counters_start == nullptr) continue; + const size_t num_counters = + object.inline_8bit_counters_stop - object.inline_8bit_counters_start; + ForEachNonZeroByte(object.inline_8bit_counters_start, num_counters, + [&](size_t idx, uint8_t counter_value) { + callback(idx + process_wide_idx, counter_value); + }); + process_wide_idx += num_counters; + } +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/runner_sancov_object.h b/src/third_party/fuzztest/dist/centipede/runner_sancov_object.h new file mode 100644 index 00000000000..7694f047445 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_sancov_object.h @@ -0,0 +1,115 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_RUNNER_SANCOV_OBJECT_H_ +#define THIRD_PARTY_CENTIPEDE_RUNNER_SANCOV_OBJECT_H_ + +#include +#include +#include +#include + +#include "absl/base/nullability.h" +#include "./centipede/pc_info.h" +#include "./centipede/runner_dl_info.h" + +// TODO(kcc): gradually replace the old code in runner_sancov.cc with this code. +// The difference is that the old code allows only one sancov-instrumented DSO, +// while this code allows multiple instrumented DSO. +// TODO(kcc): this code is not a full replacement for the old code yet. + +namespace fuzztest::internal { + +// Information about one sancov-instrumented object (DSO). +// See https://clang.llvm.org/docs/SanitizerCoverage.html. +// These structs are created as globals and are linker-initialized to zero. +struct SanCovObject { + DlInfo dl_info; // Obtained via GetDlInfo. + PCGuard *pc_guard_start; // __sanitizer_cov_trace_pc_guard_init. + PCGuard *pc_guard_stop; // __sanitizer_cov_trace_pc_guard_init. + const PCInfo *pcs_beg; // __sanitizer_cov_pcs_init + const PCInfo *pcs_end; // __sanitizer_cov_pcs_init + const uintptr_t *cfs_beg; // __sanitizer_cov_cfs_init + const uintptr_t *cfs_end; // __sanitizer_cov_cfs_init + uint8_t *inline_8bit_counters_start; // __sanitizer_cov_8bit_counters_init + uint8_t *inline_8bit_counters_stop; // __sanitizer_cov_8bit_counters_init +}; + +// A fixed size array of SanCovObject structs. +// Also linker-initialized to zero. +class SanCovObjectArray { + public: + // To be called in __sanitizer_cov_trace_pc_guard_init. + void PCGuardInit(PCGuard *absl_nullable start, PCGuard *stop); + + // To be called in __sanitizer_cov_pcs_init. + void PCInfoInit(const PCInfo *absl_nullable pcs_beg, const PCInfo *pcs_end); + + // To be called in __sanitizer_cov_cfs_init. + void CFSInit(const uintptr_t *cfs_beg, const uintptr_t *cfs_end); + + // To be called in __sanitizer_cov_8bit_counters_init. + void Inline8BitCountersInit(uint8_t *inline_8bit_counters_start, + uint8_t *inline_8bit_counters_stop); + + // Sets all inline counters to zero. + void ClearInlineCounters(); + + // Calls `callback` for every non-zero inline counter of every object. + // The `idx` passed to `callback` is the zero-based index of the counter + // in the entire process, not just in the object. + // `counter_value` is the non-zero value of the counter. + void ForEachNonZeroInlineCounter( + const std::function &callback) + const; + + // Returns the number of sancov-instrumented objects observed so far. + size_t size() const { return size_; } + + // Returns the number of sancov-instrumented PCs across all DSOs. + size_t NumInstrumentedPCs() const { return num_instrumented_pcs_; } + + // Returns a vector of PCInfo for all instrumented DSOs. + // Every PC in the vector has the object's ASLR base (dl_info.start_address) + // subtracted. So, unless there is exactly one instrumented DSO, this vector + // by itself is not sufficient to map PCs to DSOs or symbols. + // This will require additional information. TODO(kcc) implement. + std::vector CreatePCTable() const; + + // Returns a vector of uintptr_t corresponding to a control flow table: + // https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-control-flow. + // Similar to CreatePCTable(), subtracts the ASLR base from every PC before + // returning. + std::vector CreateCfTable() const; + + // Returns a DsoTable computed from all SanCovObjects. + DsoTable CreateDsoTable() const; + + private: + static constexpr size_t kMaxSize = 64 * 1024; + // Set by `PCGuardInit`/`Inline8BitCountersInit` if the current DSO has an + // empty PC guard/counter table, which should not be tracked in a + // SanCovObject. + // + // TODO(b/326950832): Clean up the SanCov init handling to check assumptions + // (e.g. callback ordering) in a cleaner way. + bool skipping_no_code_dso_; + size_t size_; + SanCovObject objects_[kMaxSize]; + size_t num_instrumented_pcs_; // Total number of instrumented PCs. +}; + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_RUNNER_SANCOV_OBJECT_H_ diff --git a/src/third_party/fuzztest/dist/centipede/runner_utils.cc b/src/third_party/fuzztest/dist/centipede/runner_utils.cc new file mode 100644 index 00000000000..32164f2c4df --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_utils.cc @@ -0,0 +1,61 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/runner_utils.h" + +#include + +#include +#include +#include + +#include "absl/base/nullability.h" + +namespace fuzztest::internal { + +void PrintErrorAndExitIf(bool condition, const char* absl_nonnull error) { + if (!condition) return; + fprintf(stderr, "error: %s\n", error); + exit(1); +} + +uintptr_t GetCurrentThreadStackRegionLow() { +#ifdef __APPLE__ + pthread_t self = pthread_self(); + const auto stack_addr = + reinterpret_cast(pthread_get_stackaddr_np(self)); + const auto stack_size = pthread_get_stacksize_np(self); + return stack_addr - stack_size; +#else // __APPLE__ + pthread_attr_t attr = {}; + if (pthread_getattr_np(pthread_self(), &attr) != 0) { + fprintf(stderr, "Failed to get the pthread attr of the current thread.\n"); + return 0; + } + void *stack_addr = nullptr; + size_t stack_size = 0; + if (pthread_attr_getstack(&attr, &stack_addr, &stack_size) != 0) { + fprintf(stderr, "Failed to get the stack region of the current thread.\n"); + pthread_attr_destroy(&attr); + return 0; + } + pthread_attr_destroy(&attr); + const auto stack_region_low = reinterpret_cast(stack_addr); + RunnerCheck(stack_region_low != 0, + "the current thread stack region starts from 0 - unexpected!"); + return stack_region_low; +#endif // __APPLE__ +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/runner_utils.h b/src/third_party/fuzztest/dist/centipede/runner_utils.h new file mode 100644 index 00000000000..ff3b0443fe5 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/runner_utils.h @@ -0,0 +1,40 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CENTIPEDE_RUNNER_UTILS_H_ +#define THIRD_PARTY_CENTIPEDE_RUNNER_UTILS_H_ + +#include + +#include "absl/base/nullability.h" + +namespace fuzztest::internal { + +// If `condition` prints `error` and calls exit(1). +// TODO(kcc): change all uses of PrintErrorAndExitIf() to RunnerCheck() +// as it is a more common pattern. +void PrintErrorAndExitIf(bool condition, const char* absl_nonnull error); + +// A rough equivalent of "CHECK(condition) << error;". +inline void RunnerCheck(bool condition, const char* absl_nonnull error) { + PrintErrorAndExitIf(!condition, error); +} + +// Returns the lower bound of the stack region for the current thread. 0 will be +// returned on failures. +uintptr_t GetCurrentThreadStackRegionLow(); + +} // namespace fuzztest::internal + +#endif // THIRD_PARTY_CENTIPEDE_RUNNER_UTILS_H_ diff --git a/src/third_party/fuzztest/dist/centipede/rusage_profiler.cc b/src/third_party/fuzztest/dist/centipede/rusage_profiler.cc new file mode 100644 index 00000000000..ae994637a61 --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/rusage_profiler.cc @@ -0,0 +1,550 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/rusage_profiler.h" + +#include +#include +#include +#include +#include +#include // NOLINT +#include +#include +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/const_init.h" +#include "absl/base/nullability.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "./centipede/periodic_action.h" +#include "./centipede/rusage_stats.h" + +namespace fuzztest::internal { + +//------------------------------------------------------------------------------ +// RUsageProfiler::Snapshot +//------------------------------------------------------------------------------ + +std::string RUsageProfiler::Snapshot::WhereStr() const { + return absl::StrFormat("%s:%d", location.file, location.line); +} + +std::string RUsageProfiler::Snapshot::ShortWhereStr() const { + return absl::StrFormat( // + "%s:%d", std::filesystem::path(location.file).filename(), location.line); +} + +std::string RUsageProfiler::Snapshot::WhenStr() const { + return absl::FormatTime("%E4Y-%m-%dT%H:%M:%E2S", time, absl::LocalTimeZone()); +} + +std::string RUsageProfiler::Snapshot::ShortWhenStr() const { + return absl::FormatTime("%H:%M:%E2S", time, absl::LocalTimeZone()); +} + +std::string RUsageProfiler::Snapshot::FormattedMetricsStr() const { + std::string s; + absl::StrAppendFormat( // + &s, " [P.%d:S.%d] TIMING | %s |\n", // + profiler_id, id, timing.FormattedStr()); + if (delta_timing != RUsageTiming::Zero()) { + absl::StrAppendFormat( // + &s, " [P.%d:S.%d] TIMING Δ | %s |\n", // + profiler_id, id, delta_timing.FormattedStr()); + } + absl::StrAppendFormat( // + &s, " [P.%d:S.%d] MEMORY | %s |\n", // + profiler_id, id, memory.FormattedStr()); + if (delta_memory != RUsageMemory::Zero()) { + absl::StrAppendFormat( // + &s, " [P.%d:S.%d] MEMORY Δ | %s |\n", // + profiler_id, id, delta_memory.FormattedStr()); + } + return s; +} + +std::string RUsageProfiler::Snapshot::ShortMetricsStr() const { + std::string s; + absl::StrAppendFormat( // + &s, "TIMING { %s } ", timing.ShortStr()); + if (delta_timing != RUsageTiming::Zero()) { + absl::StrAppendFormat( // + &s, "TIMING Δ { %s } ", delta_timing.ShortStr()); + } + absl::StrAppendFormat( // + &s, "MEMORY { %s } ", memory.ShortStr()); + if (delta_memory != RUsageMemory::Zero()) { + absl::StrAppendFormat( // + &s, "MEMORY Δ { %s } ", delta_memory.ShortStr()); + } + return s; +} + +const RUsageProfiler::Snapshot& RUsageProfiler::Snapshot::Log() const { + if (id >= 0) { + LOG(INFO).AtLocation(location.file, location.line) + << "PROFILER [P." << profiler_id << (profiler_desc.empty() ? "" : " ") + << profiler_desc << "] SNAPSHOT [S." << id << (title.empty() ? "" : " ") + << title << "]:\n" + << FormattedMetricsStr(); + } + return *this; +} + +std::ostream& operator<<(std::ostream& os, const RUsageProfiler::Snapshot& ss) { + return os << ss.title << ": " << ss.ShortWhereStr() << " @ " + << ss.ShortWhenStr() << ": " << ss.ShortMetricsStr(); +} + +namespace { + +//------------------------------------------------------------------------------ +// ProfileReportGenerator +// +// A helper for RUsageProfiler::GenerateReport(): generates individual +// chronological charts of the tracked metrics and streams them to an ostream. +//------------------------------------------------------------------------------ + +class ProfileReportGenerator { + public: + ProfileReportGenerator( // + const std::deque& snapshots, // + RUsageProfiler::ReportSink* absl_nonnull report_sink) + : snapshots_{snapshots}, report_sink_{report_sink} { + for (const auto& snapshot : snapshots_) { + timing_low_ = RUsageTiming::LowWater( // + timing_low_, snapshot.timing); + timing_high_ = RUsageTiming::HighWater( // + timing_high_, snapshot.timing); + delta_timing_low_ = RUsageTiming::LowWater( // + delta_timing_low_, snapshot.delta_timing); + delta_timing_high_ = RUsageTiming::HighWater( // + delta_timing_high_, snapshot.delta_timing); + + memory_low_ = RUsageMemory::LowWater( // + memory_low_, snapshot.memory); + memory_high_ = RUsageMemory::HighWater( // + memory_high_, snapshot.memory); + delta_memory_low_ = RUsageMemory::LowWater( // + delta_memory_low_, snapshot.delta_memory); + delta_memory_high_ = RUsageMemory::HighWater( // + delta_memory_high_, snapshot.delta_memory); + + max_where_len_ = // + std::max(max_where_len_, snapshot.ShortWhereStr().length()); + max_when_len_ = // + std::max(max_when_len_, snapshot.ShortWhenStr().length()); + max_title_len_ = // + std::max(max_title_len_, snapshot.title.length()); + } + } + + // GenChartImpl() wrappers for the 2 available "snap" metrics. + template + void GenChart(const MetricT RUsageTiming::*metric_field) { + GenChartImpl( // + &RUsageProfiler::Snapshot::timing, metric_field, // + timing_low_, timing_high_, /*is_delta=*/false); + } + template + void GenChart(const MetricT RUsageMemory::*metric_field) const { + GenChartImpl( // + &RUsageProfiler::Snapshot::memory, metric_field, // + memory_low_, memory_high_, /*is_delta=*/false); + } + + // GenChartImpl() wrappers for the 2 available delta metrics. + template + void GenDeltaChart(const MetricT RUsageTiming::*metric_field) { + GenChartImpl( // + &RUsageProfiler::Snapshot::delta_timing, metric_field, // + delta_timing_low_, delta_timing_high_, /*is_delta=*/true); + } + template + void GenDeltaChart(const MetricT RUsageMemory::*metric_field) const { + GenChartImpl( // + &RUsageProfiler::Snapshot::delta_memory, metric_field, // + delta_memory_low_, delta_memory_high_, /*is_delta=*/true); + } + + private: + // The actual chart generator. For better understanding of the code: an + // example of `metric_field` is `&RUsageProfiler::Snapshot::delta_timing` + // which has type `RUsageTiming`; an example of a matching `submetric_field` + // for that is `&RUsageTiming::wall_time`. + template + void GenChartImpl( // + const MetricT RUsageProfiler::Snapshot::*metric_field, // + const SubmetricT MetricT::*submetric_field, // + MetricT metric_low_water, // + MetricT metric_high_water, // + bool is_delta) const { + constexpr SubmetricT kZero{}; // works for both ints and absl::Duration + const SubmetricT low_water = metric_low_water.*submetric_field; + const SubmetricT high_water = metric_high_water.*submetric_field; + // SubmetricT can be int64 or Duration: calculate a notch_size that is a + // double or an unrounded Duration, respectively, so the below calculations + // are exact. + const auto notch_size = + (high_water - low_water) / static_cast(kBarNotches); + // The position of the notch indicating 0 (used for delta metrics only). + // clang-format off + const int notch_zero = + notch_size == kZero ? kBarNotches : + low_water >= kZero ? 0 : + std::floor(std::abs(low_water / notch_size)); + // clang-format on + CHECK_GE(kBarNotches, notch_zero); + // Print a zero mark only if a delta metric goes negative. + std::string zero_mark = low_water < kZero ? "|" : ""; + + for (const auto& snapshot : snapshots_) { + const SubmetricT current = snapshot.*metric_field.*submetric_field; + + // Generate a bar of #'s as a graphical representation of the current + // value of the metric relative to its full range [low_water, high_water]: + // low_water is no #'s and all -'s, high_water is kBarNotches of #'s. + const std::string metric_str = FormatInOptimalUnits(current, is_delta); + std::string metric_bar; + // clang-format off + const int notches = + notch_size == kZero + ? kBarNotches : std::floor((current - low_water) / notch_size); + // clang-format on + CHECK_GE(kBarNotches, notches); + + if (!is_delta) { + // Non-delta metrics can't go negative, so the bar always looks like + // this: + // ###############-------------------------- + const std::string filled(notches, '#'); + const std::string unfilled(kBarNotches - notches, '-'); + metric_bar = absl::StrCat(filled, unfilled); + } else { + // Delta metrics can go negative, so this become more complicated. In + // general, print a zero mark '|' at the proper fixed position of every + // bar for this metric's history, and grow the #'s away from the zero + // mark, to the left for negative and to the right for positive deltas: + // +Delta: --------|#######--------- + // -Delta: ########|---------------- + std::string pad_minus, minus, plus, pad_plus; + // Notches range from 0 (for low_water) to kBarNotches (for high_water). + if (notches < notch_zero) { + pad_minus = std::string(notches, '-'); + minus = std::string(notch_zero - notches, '#'); + pad_plus = std::string(kBarNotches - notch_zero, '-'); + } else if (notches > notch_zero) { + pad_minus = std::string(notch_zero, '-'); + plus = std::string(notches - notch_zero, '#'); + pad_plus = std::string(kBarNotches - notches, '-'); + } else { + pad_minus = std::string(notch_zero, '-'); + pad_plus = std::string(kBarNotches - notch_zero, '-'); + } + metric_bar = absl::StrCat(pad_minus, minus, zero_mark, plus, pad_plus); + } + + // Finally print a full line for the current snapshot/metric, like on of: + // source.cc:123 @ 21:08:27.61 [P.1:S.1 Snap ] 493.78M [############---] + // source.cc:123 @ 21:08:27.61 [P.1:S.2 +Delta] +138.15M [-----|#####----] + // source.cc:123 @ 21:08:27.61 [P.1:S.3 -Delta] -82.69M [--###|---------] + *report_sink_ << absl::StrFormat( // + " %*s @ %*s [P.%d:S.%-2d %*s] %10s [%s]\n", // '*' is custom width + -max_where_len_, snapshot.ShortWhereStr(), // ...passed here. + -max_when_len_, snapshot.ShortWhenStr(), // '-' left-justifies + snapshot.profiler_id, snapshot.id, // + -max_title_len_, snapshot.title, // + metric_str, metric_bar); + } + } + + static constexpr int kBarNotches = 50; + + const std::deque& snapshots_; + RUsageProfiler::ReportSink* report_sink_; + + RUsageMemory memory_low_ = RUsageMemory::Max(); + RUsageMemory memory_high_ = RUsageMemory::Min(); + RUsageMemory delta_memory_low_ = RUsageMemory::Max(); + RUsageMemory delta_memory_high_ = RUsageMemory::Min(); + RUsageTiming timing_low_ = RUsageTiming::Max(); + RUsageTiming timing_high_ = RUsageTiming::Min(); + RUsageTiming delta_timing_low_ = RUsageTiming::Max(); + RUsageTiming delta_timing_high_ = RUsageTiming::Min(); + + // NOTE: The values are negated, so have to be signed. + int max_where_len_ = 0; + int max_when_len_ = 0; + int max_title_len_ = 0; +}; + +} // namespace + +//------------------------------------------------------------------------------ +// RUsageProfiler +//------------------------------------------------------------------------------ + +std::atomic RUsageProfiler::next_id_; + +RUsageProfiler::RUsageProfiler( // + RUsageScope scope, // + MetricsMask metrics, // + RaiiActionsMask raii_actions, // + SourceLocation location, // + std::string description) + : scope_{std::move(scope)}, + metrics_{metrics}, + raii_actions_{raii_actions}, + ctor_loc_{location}, + description_{std::move(description)}, + id_{next_id_.fetch_add(1, std::memory_order_relaxed)} { + if (metrics_ == kMetricsOff) return; + + if (raii_actions_ & kCtorSnapshot) { + TakeSnapshot(ctor_loc_, "INITIAL").Log(); + } +} + +RUsageProfiler::RUsageProfiler( // + RUsageScope scope, // + MetricsMask metrics, // + absl::Duration timelapse_interval, // + bool also_log_timelapses, // + SourceLocation location, // + std::string description) + : scope_{std::move(scope)}, + metrics_{metrics}, + raii_actions_{kDtorSnapshot | kDtorReport}, + ctor_loc_{location}, + description_{std::move(description)}, + id_{next_id_.fetch_add(1, std::memory_order_relaxed)} { + if (metrics_ == kMetricsOff) return; + + if (timelapse_interval != absl::ZeroDuration() && + timelapse_interval != absl::InfiniteDuration()) { + StartTimelapse( // + ctor_loc_, timelapse_interval, also_log_timelapses, "Timelapse"); + } +} + +RUsageProfiler::~RUsageProfiler() { + if (metrics_ == kMetricsOff) return; + + // In case the caller hasn't done this. + if (timelapse_recorder_) { + StopTimelapse(); + } + if (raii_actions_ & kDtorSnapshot) { + // NOTE: Can't pass the real location from callers, so use next best thing. + TakeSnapshot(ctor_loc_, "FINAL").Log(); + } + // If requested, also print a final report. + if (raii_actions_ & kDtorReport) { + const std::string title = + absl::StrFormat("PROFILER [P.%d %s] FINAL REPORT:", id_, description_); + PrintReport(ctor_loc_, title); + } +} + +const RUsageProfiler::Snapshot& RUsageProfiler::TakeSnapshot( // + SourceLocation loc, std::string title) { + if (metrics_ == kMetricsOff) { + static const Snapshot kEmpty{}; + return kEmpty; + } + + absl::WriterMutexLock lock{&mutex_}; + + RUsageTiming snap_timing = RUsageTiming::Zero(); + RUsageTiming delta_timing = RUsageTiming::Zero(); + RUsageMemory snap_memory = RUsageMemory::Zero(); + RUsageMemory delta_memory = RUsageMemory::Zero(); + + if (metrics_ & kTiming) { + const auto current = RUsageTiming::Snapshot(scope_, timer_); + if (metrics_ & kSnapTiming) { + snap_timing = current; + } + if (metrics_ & kDeltaTiming && !snapshots_.empty()) { + const auto& previous = snapshots_.back().timing; + delta_timing = current - previous; + } + } + + if (metrics_ & kMemory) { + const auto current = RUsageMemory::Snapshot(scope_); + if (metrics_ & kSnapMemory) { + snap_memory = current; + } + if (metrics_ & kDeltaMemory && !snapshots_.empty()) { + const auto& previous = snapshots_.back().memory; + delta_memory = current - previous; + } + } + + Snapshot snapshot{/*id=*/static_cast(snapshots_.size()), + /*title=*/std::move(title), + /*location=*/loc, + /*time=*/absl::Now(), + /*profiler_id=*/id_, + /*profiler_desc=*/description_, + /*timing=*/snap_timing, + /*delta_timing=*/delta_timing, + /*memory=*/snap_memory, + /*delta_memory=*/delta_memory}; + + return snapshots_.emplace_back(std::move(snapshot)); +} + +void RUsageProfiler::StartTimelapse( // + SourceLocation loc, // + absl::Duration interval, // + bool also_log, // + std::string title) { + absl::WriterMutexLock lock{&mutex_}; + CHECK(!timelapse_recorder_) << "StopTimelapse() wasn't called"; + timelapse_recorder_ = std::make_unique( + [this, loc = std::move(loc), title = std::move(title), also_log]() { + const auto& s = TakeSnapshot(loc, title); + if (also_log) s.Log(); + }, + PeriodicAction::ZeroDelayConstInterval(interval)); +} + +void RUsageProfiler::StopTimelapse() { + absl::WriterMutexLock lock{&mutex_}; + CHECK(timelapse_recorder_) << "StartTimelapse() wasn't called"; + timelapse_recorder_.reset(); +} + +void RUsageProfiler::PrintReport( // + SourceLocation loc, const std::string& title) { + if (metrics_ == kMetricsOff) return; + + // Logs streamed-in text to LOG(INFO), while dropping the usual log prefix + // (date/time/thread/source). LOG()'s limit on the size of a single message + // applies to one streamed text fragment only (if needed, this can be reduced + // even further to a single line of text in a fragment): this is the main + // purpose of this class, as profiling reports can get very long. especially + // with automatic timelapse snapshotting. + class ReportLogger final : public ReportSink { + public: + ReportLogger(SourceLocation loc) : loc_{loc} {} + + ~ReportLogger() override { + if (!buffer_.empty()) { + LOG(INFO).AtLocation(loc_.file, loc_.line).NoPrefix() << buffer_; + } + } + + ReportLogger& operator<<(std::string_view fragment) override { + const auto last_newline = fragment.rfind('\n'); + if (last_newline == std::string_view::npos) { + // Accumulate no-'\n' fragments: LOG() always wraps around. + buffer_ += fragment; + } else { + // Now we can log, but save the last bit of text + LOG(INFO).AtLocation(loc_.file, loc_.line).NoPrefix() + << buffer_ << fragment.substr(0, last_newline); + buffer_ = fragment.substr(last_newline + 1); + } + return *this; + } + + private: + const SourceLocation loc_; + std::string buffer_; + }; + + LOG(INFO).AtLocation(loc.file, loc.line) << title << "\n"; + ReportLogger report_logger{loc}; + GenerateReport(&report_logger); +} + +void RUsageProfiler::GenerateReport( + ReportSink* absl_nonnull report_sink) const { + absl::ReaderMutexLock lock{&mutex_}; + // Prevent interleaved reports from multiple concurrent RUsageProfilers. + ABSL_CONST_INIT static absl::Mutex report_generation_mutex_{absl::kConstInit}; + absl::WriterMutexLock logging_lock{&report_generation_mutex_}; + + ProfileReportGenerator gen{snapshots_, report_sink}; + + const std::string desc = absl::StrFormat("[P.%d %s]", id_, description_); + *report_sink << "SCOPE: " << scope_ << "\n"; + + if (metrics_ & kSnapTiming) { + *report_sink << "\n=== TIMING " << desc << " ===\n"; + *report_sink << "\nWALL TIME " << desc << ":\n"; + gen.GenChart(&RUsageTiming::wall_time); + *report_sink << "\nUSER TIME " << desc << ":\n"; + gen.GenChart(&RUsageTiming::user_time); + *report_sink << "\nSYSTEM TIME " << desc << ":\n"; + gen.GenChart(&RUsageTiming::sys_time); + *report_sink << "\nCPU UTILIZATION " << desc << ":\n"; + gen.GenChart(&RUsageTiming::cpu_utilization); + *report_sink << "\nAVERAGE CORES " << desc << ":\n"; + gen.GenChart(&RUsageTiming::cpu_hyper_cores); + } + if (metrics_ & kDeltaTiming) { + *report_sink << "\n=== Δ TIMING " << desc << " ===\n"; + *report_sink << "\nΔ WALL TIME " << desc << ":\n"; + gen.GenDeltaChart(&RUsageTiming::wall_time); + *report_sink << "\nΔ USER TIME " << desc << ":\n"; + gen.GenDeltaChart(&RUsageTiming::user_time); + *report_sink << "\nΔ SYSTEM TIME " << desc << ":\n"; + gen.GenDeltaChart(&RUsageTiming::sys_time); + *report_sink << "\nΔ CPU UTILIZATION " << desc << ":\n"; + gen.GenDeltaChart(&RUsageTiming::cpu_utilization); + *report_sink << "\nΔ AVERAGE CORES " << desc << ":\n"; + gen.GenDeltaChart(&RUsageTiming::cpu_hyper_cores); + } + if (metrics_ & kSnapMemory) { + *report_sink << "\n=== MEMORY USAGE " << desc << " ===\n"; + *report_sink << "\nRESIDENT SET SIZE " << desc << ":\n"; + gen.GenChart(&RUsageMemory::mem_rss); + *report_sink << "\nVIRTUAL SIZE " << desc << ":\n"; + gen.GenChart(&RUsageMemory::mem_vsize); + *report_sink << "\nVIRTUAL PEAK " << desc << ":\n"; + gen.GenChart(&RUsageMemory::mem_vpeak); + *report_sink << "\nDATA SEGMENT " << desc << ":\n"; + gen.GenChart(&RUsageMemory::mem_data); + *report_sink << "\nSHARED MEMORY " << desc << ":\n"; + gen.GenChart(&RUsageMemory::mem_shared); + } + if (metrics_ & kDeltaMemory) { + *report_sink << "\n=== Δ MEMORY USAGE " << desc << " ===\n"; + *report_sink << "\nΔ RESIDENT SET SIZE " << desc << ":\n"; + gen.GenDeltaChart(&RUsageMemory::mem_rss); + *report_sink << "\nΔ VIRTUAL SIZE " << desc << ":\n"; + gen.GenDeltaChart(&RUsageMemory::mem_vsize); + *report_sink << "\nΔ VIRTUAL PEAK " << desc << ":\n"; + gen.GenDeltaChart(&RUsageMemory::mem_vpeak); + *report_sink << "\nΔ DATA SEGMENT " << desc << ":\n"; + gen.GenDeltaChart(&RUsageMemory::mem_data); + *report_sink << "\nΔ SHARED MEMORY " << desc << ":\n"; + gen.GenDeltaChart(&RUsageMemory::mem_shared); + } +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/rusage_profiler.h b/src/third_party/fuzztest/dist/centipede/rusage_profiler.h new file mode 100644 index 00000000000..7e12b96391d --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/rusage_profiler.h @@ -0,0 +1,594 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// TODO(ussuri): Upgrade to optionally measure the metrics of a given thread, +// not the entire process (available via /proc/self/tasks//). + +//------------------------------------------------------------------------------ +// RUsageProfiler +// +// A profiler for the current process's timing and system memory usage. Unlike +// external sampling profilers that require code instrumentation and slow +// profiling runs, RUsageProfiler's stat collection and reporting are +// permanently compiled into the client's code, consume no additional CPU cycles +// or RAM when idle and very few additional cycles or RAM when active, and can +// be activated at any time, e.g. by simply passing a flag. +// +// Another difference is that RUsageProfiler doesn't just track timing, but +// other system resource usage as well, such as several types of the process's +// memory. +// +// While traditional profilers report performance of functions, the focus of +// RUsageProfiler is performance of higher-level logical units of processing +// that can both be smaller than a single function or span multiple functions, +// classes, and modules. +// +// To achieve that, profiling metrics collection is explicit, intrusive and +// driven entirely by client code: RUsageProfiler profiles only the bits of +// logic it is told to profile, rather than every function call +// indiscriminately. The downside is that client code requires explicit +// profiling statements. The upside is that the client gets a very different +// level of control, as well as a differently structured and differently focused +// resource usage stats, unavailable with external sampling profilers: for +// example, RUsageProfiler makes it very easy to measure the overall resource +// usage dynamics of a complicated code path regardless of what function calls +// it makes or which external libraries it uses, or to print a comparison +// diagram of resource usage by different stages of a multi-stage processor +// module. +// +// Resource usage collection is available via one of or a combination of: +// 1) Explicit snapshots at specific execution checkpoints. +// 2) An asynchronous timelapse sequence of automatic snapshots taken at +// regular intervals between two checkpoints. +// +// BASIC USAGE +// +// At construction, RUsageProfiler ctor records and logs an initial snapshot of +// the metrics requested by the client; at destruction, the dtor logs the +// current resource usage and the delta from the time of construction. +// +// ADVANCED USAGE +// +// Additional intermediate snapshots can be recorded at a client's request. +// These snapshots can also be immediately logged, with both the absolute and +// delta metrics printed. +// +// A final chronological report of the resource usage can also be generated and +// logged. The report contains each tracked metric's snapshot history over this +// RUsageProfiler object's lifetime. Each snapshot is annotated with the source +// location and time. The metric values are printed in the numeric and +// pseudo-graphical form (as a progress-like bar representing the value relative +// to its overall observed range). +// +// TIMELAPSE PROFILING +// +// RUsageProfiler also supports a limited timelapse mode. In contrast to +// traditional sampling profilers, it simply takes snapshots of resource usage +// at regular intervals, but doesn't collect per-function call usage stats. +// +// This lends it particularly useful for detection of problematic resource usage +// patterns in a blackbox or near-blackbox external API calls or modules, +// such as third-party libs, or measuring the effect of differently tuned +// parameter configurations on the performance profile of complicated, +// multi-function, multi-class, or multi-module pieces of logic. +// +// MULTI-THREADING +// +// RUsageProfiler is thread-safe. +// +// Keep in mind that this is a process-scoped profiler, not thread-scoped: it +// records and reports the current _process's_ timing and resource usage, not +// the current _thread's_. This means that in a multi-threaded context, the +// numbers recorded by each snapshot will reflect the timing and memory used up +// by _all_ the threads of the process. For example, if a thread pool executes +// the same profiled function in N threads, the stats reported by the +// function-level profiler may be erratic and not very indicative of the +// function's true performance. +// +// EXAMPLE USAGE - DIRECT +// +// void foo() { +// // Logs the initial snapshot: +// RUsageProfiler profiler{kAllMetrics, ABSL_LOC, __func__}; +// ... +// profiler.TakeSnapshot(ABSL_LOC); // Takes another snapshot +// ... +// profiler.TakeSnapshot(ABSL_LOC).Log(); // Records and logs a snapshot +// ... +// VLOG(1) << profiler.TakeSnapshot(ABSL_LOC); // A different way to log +// ... +// } // Dtor logs a final snapshot +// +// EXAMPLE USAGE - MACROS +// +// void foo() { +// RPROF_THIS_FUNCTION(VLOG_IS_ON(2)); // Profile the function @ --v>=2 +// ... +// RPROF_SNAPSHOT_AND_LOG(); // Record and log a function-level snapshot +// for (...) { +// RPROF_THIS_SCOPE(VLOG_IS_ON(3)); // Profile loop iterations @ --v>=3 +// ... +// } +// RPROF_SNAPSHOT(); // Record (not log) another function-level snapshot +// } // Dtor logs a final snapshot and a chronological report +// +// EXAMPLE SNAPSHOTS +// +// clang-format off +// I1105 16:52:20.831313 932765 foo.cc:79] PROFILER [P.1 DoSomethingFn()] SNAPSHOT [S.0 INITIAL]: // NOLINT +// [P.1:S.0] TIMING | Wall: 11us | User: 2us | Sys: 4us | CpuUtil: 8.11% | CpuCores: 0.6 | // NOLINT +// [P.1:S.0] MEMORY | RSS: 119.79M | VSize: 2.04G | VPeak: 2.04G | Data: 152.45M | ShMem: 12.83M | // NOLINT +// I1105 16:52:38.130159 932926 foo.cc:119] PROFILER [P.2 Heartbeats] SNAPSHOT [S.1 Timelapse]: // NOLINT +// [P.6:S.4] TIMING | Wall: 3.00s | User: 23ms | Sys: 71ms | CpuUtil: 0.00% | CpuCores: 0.0 | // NOLINT +// [P.6:S.4] MEMORY | RSS: 146.48M | VSize: 2.05G | VPeak: 2.05G | Data: 298.46M | ShMem: 12.85M | // NOLINT +// I1105 16:52:23.880263 932765 foo.cc:82] PROFILER [P.1 DoSomethingFn()] SNAPSHOT [S.1 Scope 1 done]: // NOLINT +// [P.1:S.1] TIMING | Wall: 3.05s | User: 16ms | Sys: 33ms | CpuUtil: 0.00% | CpuCores: 0.0 | // NOLINT +// [P.1:S.1] TIMING Δ | Wall: +3.05s | User: +16ms | Sys: +33ms | CpuUtil: -8.11% | CpuCores: -0.5 | // NOLINT +// [P.1:S.1] MEMORY | RSS: 167.75M | VSize: 2.04G | VPeak: 2.04G | Data: 200.58M | ShMem: 12.83M | // NOLINT +// I1105 16:52:26.913993 932765 foo.cc:89] PROFILER [P.1 DoSomethingFn()] SNAPSHOT [S.3 Loop iteration 0 done]: // NOLINT +// [P.1:S.3] TIMING | Wall: 6.08s | User: 37ms | Sys: 46ms | CpuUtil: 0.00% | CpuCores: 0.0 | // NOLINT +// [P.1:S.3] MEMORY | RSS: 148.27M | VSize: 2.04G | VPeak: 2.04G | Data: 200.70M | ShMem: 12.83M | // NOLINT +// I1105 16:52:43.133988 932926 foo.cc:119] PROFILER [P.2 Heartbeats] SNAPSHOT [S.2 Timelapse]: // NOLINT +// [P.6:S.9] TIMING | Wall: 8.01s | User: 42ms | Sys: 103ms | CpuUtil: 18.95% | CpuCores: 0.0 | // NOLINT +// [P.6:S.9] TIMING Δ | Wall: +1.00s | User: +1ms | Sys: +10ms | CpuUtil: -0.49% | CpuCores: -0.0 | // NOLINT +// [P.6:S.9] MEMORY | RSS: 158.96M | VSize: 2.05G | VPeak: 2.05G | Data: 298.71M | ShMem: 12.85M | // NOLINT +// I1105 16:52:28.962669 932765 foo.cc:79] PROFILER [P.1 DoSomethingFn()] SNAPSHOT [S.6 FINAL]: // NOLINT +// [P.1:S.6] TIMING | Wall: 8.13s | User: 54ms | Sys: 75ms | CpuUtil: 22.56% | CpuCores: 0.0 | // NOLINT +// [P.1:S.6] TIMING Δ | Wall: +17ms | User: +9ms | Sys: +6ms | CpuUtil: +22.56% | CpuCores: +0.0 | // NOLINT +// [P.1:S.6] MEMORY | RSS: 145.92M | VSize: 2.04G | VPeak: 2.04G | Data: 211.08M | ShMem: 12.83M | // NOLINT +// ... +// clang-format off +// +// EXAMPLE FINAL REPORT (TRUNCATED) +// +// clang-format off +// +// I1105 16:52:28.963056 932765 foo.cc:79] PROFILER [P.1 WasteTimeAndGobbleBytes()] FINAL REPORT: // NOLINT +// +// === TIMING [P.1 DoSomethingFn()] === +// +// WALL TIME: +// foo.cc:79 @ 16:52:20.83 [P.1:S.0 INITIAL ] 11us [--------------------------------------------------] // NOLINT +// foo.cc:82 @ 16:52:23.88 [P.1:S.1 Scope 1 ] 3.05s [##################--------------------------------] // NOLINT +// foo.cc:86 @ 16:52:25.90 [P.1:S.2 Scope 2 ] 5.07s [###############################-------------------] // NOLINT +// foo.cc:89 @ 16:52:26.91 [P.1:S.3 Loop iteration 0] 6.08s [#####################################-------------] // NOLINT +// foo.cc:89 @ 16:52:27.92 [P.1:S.4 Loop iteration 1] 7.10s [###########################################-------] // NOLINT +// foo.cc:79 @ 16:52:28.96 [P.1:S.6 FINAL ] 8.13s [##################################################] // NOLINT +// ... Same stats for other timing metrics. +// +// === Δ TIMING [P.1 DoSomethingFn()] === +// +// Δ WALL TIME: +// foo.cc:79 @ 16:52:20.83 [P.1:S.0 INITIAL ] +0ns [--------------------------------------------------] // NOLINT +// foo.cc:82 @ 16:52:23.88 [P.1:S.1 Scope 1 ] +3.05s [##################################################] // NOLINT +// foo.cc:86 @ 16:52:25.90 [P.1:S.2 Scope 2 ] +2.03s [#################################-----------------] // NOLINT +// foo.cc:89 @ 16:52:26.91 [P.1:S.3 Loop iteration 0] +1.01s [################----------------------------------] // NOLINT +// foo.cc:89 @ 16:52:27.92 [P.1:S.4 Loop iteration 1] +1.01s [################----------------------------------] // NOLINT +// foo.cc:79 @ 16:52:28.96 [P.1:S.6 FINAL ] +17ms [--------------------------------------------------] // NOLINT +// ... Same stats for other timing metrics. +// +// === MEMORY USAGE [P.1 DoSomethingFn()] === +// +// RESIDENT SET SIZE: +// foo.cc:79 @ 16:52:20.83 [P.1:S.0 INITIAL ] 119.79M [--------------------------------------------------] // NOLINT +// foo.cc:82 @ 16:52:23.88 [P.1:S.1 Scope 1 ] 167.75M [##################################################] // NOLINT +// foo.cc:86 @ 16:52:25.90 [P.1:S.2 Scope 2 ] 139.66M [####################------------------------------] // NOLINT +// foo.cc:89 @ 16:52:26.91 [P.1:S.3 Loop iteration 0] 148.27M [#############################---------------------] // NOLINT +// foo.cc:89 @ 16:52:27.92 [P.1:S.4 Loop iteration 1] 156.07M [#####################################-------------] // NOLINT +// foo.cc:79 @ 16:52:28.96 [P.1:S.6 FINAL ] 145.92M [###########################-----------------------] // NOLINT +// ... Same stats for other memory types. +// +// === Δ MEMORY USAGE [P.1 DoSomethingFn()] === +// +// Δ RESIDENT SET SIZE: +// foo.cc:79 @ 16:52:20.83 [P.1:S.0 INITIAL ] +0B [------------------|--------------------------------] // NOLINT +// foo.cc:82 @ 16:52:23.88 [P.1:S.1 Scope 1 ] +47.95M [------------------|################################] // NOLINT +// foo.cc:86 @ 16:52:25.90 [P.1:S.2 Scope 2 ] -28.09M [##################|--------------------------------] // NOLINT +// foo.cc:89 @ 16:52:26.91 [P.1:S.3 Loop iteration 0] +8.62M [------------------|######--------------------------] // NOLINT +// foo.cc:89 @ 16:52:27.92 [P.1:S.4 Loop iteration 1] +7.80M [------------------|#####---------------------------] // NOLINT +// foo.cc:79 @ 16:52:28.96 [P.1:S.6 FINAL ] -19.88M [-----#############|--------------------------------] // NOLINT +// ... Same stats for other memory types. +// +// clang-format on +//------------------------------------------------------------------------------ + +#ifndef THIRD_PARTY_CENTIPEDE_RUSAGE_PROFILER_H_ +#define THIRD_PARTY_CENTIPEDE_RUSAGE_PROFILER_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/base/nullability.h" +#include "absl/base/thread_annotations.h" +#include "absl/strings/str_cat.h" // IWYU pragma: keep +#include "absl/synchronization/mutex.h" +#include "absl/time/time.h" +#include "./centipede/periodic_action.h" +#include "./centipede/rusage_stats.h" + +namespace fuzztest::internal { + +// A simple source location wrapper. Typically, construct as +// `SourceLocation{__FILE__, __LINE__}` and pass around by-value. +// TODO(ussuri): Switch to absl::SourceLocation or std::source_location. +struct SourceLocation { + explicit SourceLocation() = default; + SourceLocation(const char* absl_nonnull file, int line) + : file{file}, line{line} {} + + const char* const file = ""; + const int line = 0; +}; + +class RUsageProfiler { + public: + //---------------------------------------------------------------------------- + // Types + + // A profiling snapshot. + struct Snapshot { + // Returns this snapshot's source location. + std::string WhereStr() const; + // Same as above, but shortens the file path to the basename. + std::string ShortWhereStr() const; + + // Returns this snapshot's recording date/time in local timezone. + std::string WhenStr() const; + // Same as above, but omits the date. + std::string ShortWhenStr() const; + + // Returns this snapshot's formatted metrics. The formatting is consistent + // across snapshots, so if printed in a loop, these will form a table. + std::string FormattedMetricsStr() const; + // Same as above, but the metrics are printed in one line without the + // table-like formatting. + std::string ShortMetricsStr() const; + + // Logs this snapshot to LOG(INFO). The source location that annotates the + // log message is set to `location` instead of the actual call's location. + // Returns *this so clients can do either of + // Snapshot s = profiler.TakeSnapshot(); + // Snapshot s = profiler.TakeSnapshot().Log(); + const Snapshot& Log() const; + + // Writes a short version of this snapshot to an ostream. + friend std::ostream& operator<<(std::ostream&, const Snapshot&); + + // Metadata. + const int64_t id = -1; + const std::string title; + const SourceLocation location{}; + const absl::Time time; + + // The parent profiler's data. + const int profiler_id = -1; + const std::string profiler_desc; + + // Recorded metrics. + const RUsageTiming timing = RUsageTiming::Zero(); + const RUsageTiming delta_timing = RUsageTiming::Zero(); + const RUsageMemory memory = RUsageMemory::Zero(); + const RUsageMemory delta_memory = RUsageMemory::Zero(); + }; + + // An abstract interface used to stream in a profiling report in + // GenerateReport(). Also used inside PrintReport() to overcome the LOG()'s + // limitation on the size of a single printed message. + class ReportSink { + public: + virtual ~ReportSink() = default; + virtual ReportSink& operator<<(std::string_view fragment) = 0; + }; + + //---------------------------------------------------------------------------- + // APIs + + // Which metric categories to track and report. + enum Metrics : unsigned { + kMetricsOff = 0, + kSnapTiming = 1, // Timing at the time of snapshot + kDeltaTiming = 2, // Delta timing from the previous snapshot + kTiming = kSnapTiming | kDeltaTiming, + kSnapMemory = 4, // Memory at the time of snapshot + kDeltaMemory = 8, // Delta memory from the previous snapshot + kMemory = kSnapMemory | kDeltaMemory, + kAllMetrics = kTiming | kMemory, + }; + using MetricsMask = decltype(kMetricsOff | kMetricsOff); + + // Automatic logging enabled via RAII. + enum RaiiActions : unsigned { + kRaiiOff = 0, + kCtorSnapshot = 1, + kDtorSnapshot = 2, + kDtorReport = 4, + kFinalReport = kDtorReport, + kRaiiSnapshots = kCtorSnapshot | kDtorSnapshot, + kAllRaii = kRaiiSnapshots | kFinalReport + }; + using RaiiActionsMask = decltype(kRaiiOff | kRaiiOff); + + // Initializes this profiler and possibly takes an initial snapshot if + // raii_actions & kCtorSnapshot != 0. SourceLocation `location` parameter is + // used to annotate this profiler's log messages with the source location of + // the caller, as if the caller printed them. That makes it easy to attribute + // the logged resource usage to the actual user rather than RUsageProfiler. + RUsageProfiler( // + RUsageScope scope, // Which process/thread to monitor + MetricsMask metrics, // Which metrics to track + RaiiActionsMask raii_actions, // Which RAII logs to enable + SourceLocation location, // Pass SourceLocation{__FILE__, __LINE__} + std::string description = ""); // Annotate logs in addition to ID + + // This version turns on all RAII logging and immediately initiates timelapse + // snapshots at the specified interval, unless the interval is + // absl::ZeroDuration or absl::InfiniteDuration. + // + // Dtor will stop taking snapshots and print a chronological report. + // Snapshotting can also be manually stopped at any time using + // StopTimelapse(). + // + // As with manually started timelapse snapshotting (via StartTimelapse()), + // the client can still request explicit snapshots at any time, interleaved + // with timelapse ones. + RUsageProfiler( // + RUsageScope scope, // Which process/thread to monitor + MetricsMask metrics, // Which metrics to track + absl::Duration timelapse_interval, // Take timelapse snapshots this often + bool also_log_timelapses, // Log timelapse snapshots as taken + SourceLocation location, // SourceLocation{__FILE__, __LINE__} + std::string description = ""); // Annotate logs in addition to ID + + // Logs the final report as returned by GenerateReport(). + ~RUsageProfiler(); + + // Records and returns a snapshot of the current metrics. The snapshot's + // source location is set to `location`, so its Log() will print a log message + // as if it were emitted by the `location` source line. As such, the rule of + // thumb should be to pass `SourceLocation{__FILE__, __LINE__}`. The returned + // reference remains valid until RUsageProfiler is destroyed. + const Snapshot& TakeSnapshot(SourceLocation loc, std::string title = ""); + + // Starts taking and optionally also logging periodic snapshots at a given + // interval in a separate thread. + // + // Convenient for measuring sample-based resource usage of a black-box + // external API (e.g. third-party) or a complex bit of logic spanning multiple + // functions/classes/modules in order to either detect problematic usage + // patterns or the effect of different parameter configurations on the overall + // performance. GenerateReport() or PrintReport() are particularly well-suited + // for viewing the results of timelapse measurements in graphical form. + // + // The client is free to continue taking explicit snapshots at any time, + // interleaved with timelapse ones. + void StartTimelapse( // + SourceLocation loc, // + absl::Duration interval, // + bool also_log = false, // + std::string title = ""); + + // Stops taking timelapse snapshots previously initiated by StartTimelapse(). + void StopTimelapse(); + + // Returns a vector of manual and timelapse snapshots recorded so far. + const std::deque& GetSnapshots() const { return snapshots_; } + + // Prints to `sink` a report consisting of chronological charts for each of + // the tracked metrics recorded since this profiler's construction up to this + // point. + void GenerateReport(ReportSink* absl_nonnull report_sink) const; + + // Logs the report returned by GenerateReport(). The log message's source + // location is set to `location`: as a rule of thumb, pass + // `SourceLocation{__FILE__, __LINE__}` -- the explanation before + // TakeSnapshot() does apply here. + void PrintReport(SourceLocation loc, const std::string& title = ""); + + private: + friend class RUsageProfilerTest_ValidateManualSnapshots_Test; + + //---------------------------------------------------------------------------- + // Data + + // Global instance counter. + static std::atomic next_id_; + + // Scope (the current process or the current thread). + const RUsageScope scope_; + // Metrics and report flavors to keep track of and print. + const Metrics metrics_; + // Enabled RAII actions. + const RaiiActions raii_actions_; + // The source location where this profiler got created, as recorded by ctor. + const SourceLocation ctor_loc_; + // The descriptive name of this profiler provided by the client. Used to + // annotate verbose log messages. + const std::string description_; + // The sequential ID of this profiler. Used to annotate all log + const int id_; + + // Mutex for the mutable data further below. + mutable absl::Mutex mutex_; + + // Chronological snapshots. Using std::deque gives a better-than-vector + // average insertion speed, preserves iterators across insertions, and strikes + // a balance between vector's and list's additional storage. + std::deque snapshots_ ABSL_GUARDED_BY(mutex_); + // A temporarily lived periodic action that records and optionally logs + // timelapse snapshots. (Re)created by each new call to StartTimelapse() and + // terminated by StopTimelapse() or the dtor, whichever comes first. + std::unique_ptr timelapse_recorder_ ABSL_GUARDED_BY(mutex_); + + // An auto-starting timer passed to RUsageTiming::Snapshot() in order to track + // this RUsageProfiler object's lifetime stats rather than the process's + // lifetime stats, which is the default. + ProcessTimer timer_; +}; + +} // namespace fuzztest::internal + +//------------------------------------------------------------------------------ +// Convenience macros for easy use of RUsageProfiler +//------------------------------------------------------------------------------ + +// TODO(ussuri): The macros all use RUsageScope::ThisProcess(). Parameterize. + +#define RPROF_NAME(prefix, line) RPROF_NAME_CONCAT(prefix, line) +#define RPROF_NAME_CONCAT(prefix, line) prefix##line +#define FUNCTION_LEVEL_RPROF_NAME RPROF_NAME(rprof_, 0) +#define SCOPE_LEVEL_RPROF_NAME RPROF_NAME(rprof_, __LINE__) + +// Profile the timing and resource usage of the current function, with an option +// to take additional intermediate snapshots via RPROF_SNAPSHOT* later in the +// function. +// +// The intended canonical place to call this macro is right after the function's +// open brace or precondition checks: with just that, the entire function's +// system timing and resource usage will be logged upon return. Only one such +// macro call is allowed per function. +// clang-format off +#define RPROF_THIS_FUNCTION(enable) \ + fuzztest::internal::RUsageProfiler FUNCTION_LEVEL_RPROF_NAME = { \ + /*scope=*/fuzztest::internal::RUsageScope::ThisProcess(), \ + /*metrics=*/(enable) ? fuzztest::internal::RUsageProfiler::kAllMetrics \ + : fuzztest::internal::RUsageProfiler::kMetricsOff, \ + /*raii_actions=*/fuzztest::internal::RUsageProfiler::kRaiiSnapshots, \ + /*location=*/{__FILE__, __LINE__}, \ + /*description=*/absl::StrCat(__func__, "()") \ + } +// clang-format on + +// Same as RPROF_THIS_FUNCTION, but with a full report printed at return from +// the function. +// clang-format off +#define RPROF_THIS_FUNCTION_WITH_REPORT(enable) \ + fuzztest::internal::RUsageProfiler FUNCTION_LEVEL_RPROF_NAME = { \ + /*scope=*/fuzztest::internal::RUsageScope::ThisProcess(), \ + /*metrics=*/(enable) ? fuzztest::internal::RUsageProfiler::kAllMetrics \ + : fuzztest::internal::RUsageProfiler::kMetricsOff, \ + /*raii_actions=*/fuzztest::internal::RUsageProfiler::kAllRaii, \ + /*location=*/{__FILE__, __LINE__}, \ + /*description=*/absl::StrCat(__func__, "()") \ + } +// clang-format on + +// Same as RPROF_THIS_FUNCTION, but immediately initiates timelapse snapshots +// at the specified `interval` and prints a final report for them. Additional +// snapshots can still be taken with RPROF_SNAPSHOT*. +// clang-format off +#define RPROF_THIS_FUNCTION_WITH_TIMELAPSE( \ + enable, timelapse_interval, also_log_timelapses) \ + fuzztest::internal::RUsageProfiler FUNCTION_LEVEL_RPROF_NAME = { \ + /*scope=*/fuzztest::internal::RUsageScope::ThisProcess(), \ + /*metrics=*/(enable) ? fuzztest::internal::RUsageProfiler::kAllMetrics \ + : fuzztest::internal::RUsageProfiler::kMetricsOff, \ + /*timelapse_interval=*/timelapse_interval, \ + /*also_log_timelapses=*/also_log_timelapses, \ + /*location=*/{__FILE__, __LINE__}, \ + /*description=*/absl::StrCat(__func__, "()") \ + } +// clang-format on + +// Sets an existing RUsageProfiler as this function's profiler such that it can +// be used with `RPROF_SNAPSHOT` and other similar macros below, which normally +// work with the other `RPROF_THIS_FUNCTION.*` macros. +// clang-format off +#define RPROF_THIS_FUNCTION_BY_EXISTING_RPROF(profiler) \ + ::fuzztest::internal::RUsageProfiler& FUNCTION_LEVEL_RPROF_NAME = profiler; +// clang-format on + +// Records and returns an intermediate snapshot using the profiler defined by an +// earlier RPROF_THIS_FUNCTION in the same function. An optional snapshot +// title can be passed as a macro argument. +// NOTE: Here and below, the '##' in front of __VA_ARGS__ eats up the preceding +// comma in case __VA_ARGS__ is empty, thus avoiding a malformed expression. +// clang-format off +#define RPROF_SNAPSHOT(...) \ + FUNCTION_LEVEL_RPROF_NAME.TakeSnapshot( \ + {__FILE__, __LINE__}, ##__VA_ARGS__) +// clang-format on + +// Records AND logs an intermediate snapshot using the profiler defined by an +// earlier RPROF_THIS_FUNCTION() in the same function. An optional snapshot +// title can be passed as a macro argument. +// clang-format off +#define RPROF_SNAPSHOT_AND_LOG(...) \ + FUNCTION_LEVEL_RPROF_NAME.TakeSnapshot( \ + {__FILE__, __LINE__}, ##__VA_ARGS__).Log() +// clang-format on + +// Starts taking periodic snapshots using the function-level snapshot created by +// an earlier RPROF_THIS_FUNCTION*(). `interval` is an absl::Duration. +// `also_log` will also log the snapshots. An optional snapshot title can be +// passed as the last macro argument. +// clang-format off +#define RPROF_START_TIMELAPSE(interval, also_log, ...) \ + FUNCTION_LEVEL_RPROF_NAME.StartTimelapse( \ + {__FILE__, __LINE__}, interval, also_log, ##__VA_ARGS__) +// clang-format on + +#define RPROF_STOP_TIMELAPSE() FUNCTION_LEVEL_RPROF_NAME.StopTimelapse() + +// Prints a final report to the log using the profiler defined by an earlier +// RPROF_THIS_FUNCTION in the same function. An optional report title can be +// passed as a macro argument. +// clang-format off +#define RPROF_DUMP_REPORT_TO_LOG(...) \ + FUNCTION_LEVEL_RPROF_NAME.PrintReport({__FILE__, __LINE__}, ##__VA_ARGS__) +// clang-format on + +// Profiles a given scope: a snapshot and a delta of the system timing and +// resource usage are logged at the call site and at the scope exit. +// +// Unlike, RPROF_THIS_FUNCTION, RPROF_THIS_SCOPE can be called any number of +// times per scope, provided the calls are on different lines. That includes +// nested scopes. +// +// Also unlike RPROF_THIS_FUNCTION, RPROF_THIS_SCOPE lacks a complimentary +// macro for intermediate snapshotting using the same profiler: this macro is +// intended as a simple, fast way to profile a scope; for anything more +// involved, use RPROF_THIS_FUNCTION() or RUsageProfiler directly. +// clang-format off +#define RPROF_THIS_SCOPE(enable, description) \ + fuzztest::internal::RUsageProfiler SCOPE_LEVEL_RPROF_NAME = { \ + /*scope=*/fuzztest::internal::RUsageScope::ThisProcess(), \ + /*metrics=*/(enable) ? fuzztest::internal::RUsageProfiler::kAllMetrics \ + : fuzztest::internal::RUsageProfiler::kMetricsOff, \ + /*raii_actions=*/fuzztest::internal::RUsageProfiler::kRaiiSnapshots, \ + /*location=*/{__FILE__, __LINE__}, \ + /*description=*/description \ + } +// clang-format on + +// clang-format off +#define RPROF_THIS_SCOPE_WITH_TIMELAPSE( \ + enable, timelapse_interval, also_log_timelapses, description) \ + fuzztest::internal::RUsageProfiler SCOPE_LEVEL_RPROF_NAME = { \ + /*scope=*/fuzztest::internal::RUsageScope::ThisProcess(), \ + /*metrics=*/(enable) ? fuzztest::internal::RUsageProfiler::kAllMetrics \ + : fuzztest::internal::RUsageProfiler::kMetricsOff, \ + /*timelapse_interval=*/timelapse_interval, \ + /*also_log_timelapses=*/also_log_timelapses, \ + /*location=*/{__FILE__, __LINE__}, \ + /*description=*/description \ + } +// clang-format on + +#endif // THIRD_PARTY_CENTIPEDE_RUSAGE_PROFILER_H_ diff --git a/src/third_party/fuzztest/dist/centipede/rusage_profiler_test.cc b/src/third_party/fuzztest/dist/centipede/rusage_profiler_test.cc new file mode 100644 index 00000000000..3960dd4e1ed --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/rusage_profiler_test.cc @@ -0,0 +1,244 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/rusage_profiler.h" + +#include + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/flags/flag.h" +#include "absl/log/log.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "./centipede/rusage_stats.h" + +ABSL_FLAG(bool, verbose, false, "Print extra info for debugging"); +ABSL_FLAG(bool, enable_system_load_sensitive_tests, false, + "Enable tests that are sensitive to the overall execution " + "environment on the current machine, e.g. the wall time accuracy or " + "average CPU load"); + +// clang-format off +#define EXPECT_TIME_NEAR(x, y, e) \ + EXPECT_NEAR(absl::ToDoubleSeconds(x), absl::ToDoubleSeconds(y), e) +#define EXPECT_SYS_TIMING_NEAR(x, y) \ + EXPECT_TIME_NEAR((x).wall_time, (y).wall_time, 0.1); \ + EXPECT_TIME_NEAR((x).user_time, (y).user_time, 0.01); \ + EXPECT_TIME_NEAR((x).sys_time, (y).sys_time, 0.01); \ + EXPECT_NEAR((x).cpu_utilization, (y).cpu_utilization, 0.5); \ + EXPECT_NEAR((x).cpu_hyper_cores, (y).cpu_hyper_cores, 0.5) + +#define EXPECT_MEM_NEAR(x, y, e) \ + EXPECT_NEAR(static_cast(x), static_cast(y), \ + std::fabs(((x) + (y)) / 2.0 * e)) +#define EXPECT_SYS_MEMORY_NEAR(x, y) \ + EXPECT_MEM_NEAR((x).mem_vsize, (y).mem_vsize, 0.1); \ + EXPECT_MEM_NEAR((x).mem_vpeak, (y).mem_vpeak, 0.1); \ + EXPECT_MEM_NEAR((x).mem_rss, (y).mem_rss, 0.2); \ + EXPECT_MEM_NEAR((x).mem_data, (y).mem_data, 0.2); +// clang-format on + +namespace fuzztest::internal { +namespace { + +struct BigSlowThing { + // NOTE: The order -- first gobble bytes, then waste time -- is important: + // it gives the system memory a chance to settle (finish paging etc.) before + // the returning, so the measurement results are more stable. + BigSlowThing(int64_t gobble_bytes, absl::Duration waste_time) { + const absl::Time start = absl::Now(); + + big_mem.resize(gobble_bytes); + // Touch the memory to cause it to actually materialize. + for (std::string::size_type i = 0; i < big_mem.size(); i += 1000) { + big_mem[i] = '$'; + } + + double cpu_waster = 1.23; + while (absl::Now() - start < waste_time) { + cpu_waster = std::cos(cpu_waster); + absl::SleepFor(absl::Seconds(1)); + } + } + + std::string big_mem; +}; + +void WasteTimeAndGobbleBytes() { + { + BigSlowThing big_slow_1{50'000'000, absl::Seconds(1)}; + } + { + BigSlowThing big_slow_2{20'000'000, absl::Seconds(1)}; + for (int i = 0; i < 3; ++i) { + BigSlowThing big_slow_3{10'000'000, absl::Seconds(1)}; + } + } +} + +} // namespace + +TEST(RUsageProfilerTest, TimelapseSnapshots) { + RPROF_THIS_FUNCTION_WITH_REPORT(/*enable=*/true); + RPROF_START_TIMELAPSE(absl::Seconds(1), /*also_log=*/true, "Timelapse"); + WasteTimeAndGobbleBytes(); + RPROF_STOP_TIMELAPSE(); + RPROF_DUMP_REPORT_TO_LOG("Report"); +} + +// NOTE: Exclude this test from MSAN: 1) MSAN messes with the system memory +// and skews the test's memory measurements. 2) The test allocates large +// memory blocks to fight small number volatility of the system allocator, but +// MSAN's custom allocator can't cope and intermittently OOMs. +#if !defined(MEMORY_SANITIZER) +// Compare RUsageProfiler's manually taken snapshots against raw RUsageTiming +// and RUsageMemory numbers acquired approximately at the same time. +// "Approximately the same" is still not *the same*, so some discrepancies are +// fully expected. +TEST(RUsageProfilerTest, ValidateManualSnapshots) { + // Allocate A LOT of memory to fight the small numbers volatility, in + // particular in the virtual memory size and peak, which grow in page + // increments. + constexpr int64_t kGobbleBytes = 1'000'000'000; + constexpr absl::Duration kWasteTime = absl::Seconds(2); + const auto rusage_scope = RUsageScope::ThisProcess(); + + RUsageProfiler rprof{rusage_scope, + RUsageProfiler::kAllMetrics, + RUsageProfiler::kRaiiOff, + {__FILE__, __LINE__}}; + + const RUsageProfiler::Snapshot& before_snapshot = + rprof.TakeSnapshot({__FILE__, __LINE__}); + // NOTE: Use RUsageProfiler's internal timer rather than RUsageTiming's + // default global one (which starts when the process starts) to measure the + // times on the same timeline. + const RUsageTiming before_timing = + RUsageTiming::Snapshot(rusage_scope, rprof.timer_); + const RUsageMemory before_memory = RUsageMemory::Snapshot(rusage_scope); + + const BigSlowThing big_slow_thing{kGobbleBytes, kWasteTime}; + + const RUsageProfiler::Snapshot& after_snapshot = + rprof.TakeSnapshot({__FILE__, __LINE__}); + const RUsageTiming after_timing = + RUsageTiming::Snapshot(rusage_scope, rprof.timer_); + const RUsageMemory after_memory = RUsageMemory::Snapshot(rusage_scope); + const RUsageTiming delta_timing = after_timing - before_timing; + const RUsageMemory delta_memory = after_memory - before_memory; + + if (absl::GetFlag(FLAGS_verbose)) { + LOG(INFO) << "before_snapshot:\n" << before_snapshot.FormattedMetricsStr(); + LOG(INFO) << "after_snapshot:\n" << after_snapshot.FormattedMetricsStr(); + LOG(INFO) << ""; + LOG(INFO) << "before_timing: " << before_timing.FormattedStr(); + LOG(INFO) << "after_timing: " << after_timing.FormattedStr(); + LOG(INFO) << "delta_timing: " << delta_timing.FormattedStr(); + LOG(INFO) << ""; + LOG(INFO) << "before_memory: " << before_memory.FormattedStr(); + LOG(INFO) << "after_memory: " << after_memory.FormattedStr(); + LOG(INFO) << "delta_memory: " << delta_memory.FormattedStr(); + } + + EXPECT_EQ(after_snapshot.delta_timing, + after_snapshot.timing - before_snapshot.timing); + EXPECT_EQ(after_snapshot.delta_memory, + after_snapshot.memory - before_snapshot.memory); + + // NOTES: 1) The "before" timing numbers are close to 0 and extremely + // volatile, especially for the CPU utilization. Therefore, exclude it, as + // well as the delta timing partially determined by it, from validation. + // 2) All *SANs slow down execution, so skip timing checks under them. + // However, still run RUsageProfiler under them to catch any respective bugs. +#if !defined(MEMORY_SANITIZER) && !defined(ADDRESS_SANITIZER) && \ + !defined(THREAD_SANITIZER) + // EXPECT_SYS_TIMING_NEAR(before_snapshot.timing, before_timing); + EXPECT_SYS_TIMING_NEAR(after_snapshot.timing, after_timing); + // EXPECT_SYS_TIMING_NEAR(after_snapshot.delta_timing, delta_timing); +#else + LOG(WARNING) << "Validation of some test results omitted under *SANs"; +#endif + + if (absl::GetFlag(FLAGS_enable_system_load_sensitive_tests)) { + EXPECT_SYS_MEMORY_NEAR(before_snapshot.memory, before_memory); + EXPECT_SYS_MEMORY_NEAR(after_snapshot.memory, after_memory); + EXPECT_SYS_MEMORY_NEAR(after_snapshot.delta_memory, delta_memory); + } +} +#endif // MSAN is now back on. + +TEST(RUsageProfilerTest, ValidateTimelapseSnapshots) { + constexpr absl::Duration kWasteTime = absl::Seconds(2); + constexpr absl::Duration kInterval = absl::Milliseconds(500); + constexpr int kGobbleBytes = 100'000'000; + const bool kAlsoLog = absl::GetFlag(FLAGS_verbose); + + RUsageProfiler rprof{RUsageScope::ThisProcess(), + RUsageProfiler::kAllMetrics, + kInterval, + kAlsoLog, + {__FILE__, __LINE__}}; + const BigSlowThing big_slow_thing{kGobbleBytes, kWasteTime}; + rprof.StopTimelapse(); + + // NOTE: The sanitizers heavily instrument the code and skew any time + // measurements. +#if !defined(ADDRESS_SANITIZER) && !defined(THREAD_SANITIZER) && \ + !defined(MEMORY_SANITIZER) + const auto& snapshots = rprof.GetSnapshots(); + ASSERT_NEAR(snapshots.size(), absl::FDivDuration(kWasteTime, kInterval), 1); + for (int i = 1; i < snapshots.size(); ++i) { + EXPECT_TIME_NEAR(snapshots[i].time - snapshots[i - 1].time, kInterval, .05); + } +#else + LOG(WARNING) << "Validation of some test results omitted under *SANs"; +#endif +} + +TEST(RUsageProfilerTest, ValidateReport) { + constexpr int kGobbleBytes = 100'000'000; + constexpr absl::Duration kWasteTime = absl::Seconds(3); + + RUsageProfiler rprof{RUsageScope::ThisProcess(), + RUsageProfiler::kAllMetrics, + RUsageProfiler::kRaiiOff, + {__FILE__, __LINE__}}; + { + rprof.TakeSnapshot({__FILE__, __LINE__}); + const BigSlowThing big_slow_thing_1{kGobbleBytes, kWasteTime}; + rprof.TakeSnapshot({__FILE__, __LINE__}); + const BigSlowThing big_slow_thing_2{kGobbleBytes, kWasteTime}; + rprof.TakeSnapshot({__FILE__, __LINE__}); + } // BigSlowThings release their memory. + rprof.TakeSnapshot({__FILE__, __LINE__}); + + class ReportCapture : public RUsageProfiler::ReportSink { + public: + ~ReportCapture() override = default; + ReportCapture& operator<<(std::string_view fragment) override { + LOG(INFO).NoPrefix() << fragment; + return *this; + } + }; + + ReportCapture report_capture{}; + rprof.GenerateReport(&report_capture); +} + +} // namespace fuzztest::internal diff --git a/src/third_party/fuzztest/dist/centipede/rusage_stats.cc b/src/third_party/fuzztest/dist/centipede/rusage_stats.cc new file mode 100644 index 00000000000..8ca674bcd3c --- /dev/null +++ b/src/third_party/fuzztest/dist/centipede/rusage_stats.cc @@ -0,0 +1,663 @@ +// Copyright 2022 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./centipede/rusage_stats.h" + +#ifdef __APPLE__ +#include +#include +#include +#endif // __APPLE__ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // NOLINT: For hardware_concurrency() only. + +#include "absl/base/nullability.h" +#include "absl/log/check.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" + +namespace fuzztest::internal { + +//------------------------------------------------------------------------------ +// ProcessTimer +//------------------------------------------------------------------------------ + +ProcessTimer::ProcessTimer() : start_time_{absl::Now()}, start_rusage_{} { + getrusage(RUSAGE_SELF, &start_rusage_); +} + +void ProcessTimer::Get(double& user, double& sys, double& wall) const { + struct rusage curr_rusage = {}; + getrusage(RUSAGE_SELF, &curr_rusage); + // clang-format off + user = absl::ToDoubleSeconds( + absl::DurationFromTimeval(curr_rusage.ru_utime) - + absl::DurationFromTimeval(start_rusage_.ru_utime)); + sys = absl::ToDoubleSeconds( + absl::DurationFromTimeval(curr_rusage.ru_stime) - + absl::DurationFromTimeval(start_rusage_.ru_stime)); + wall = absl::ToDoubleSeconds(absl::Now() - start_time_); + // clang-format on +} + +//------------------------------------------------------------------------------ +// RUsageScope +//------------------------------------------------------------------------------ + +#ifdef __APPLE__ +class RUsageScope::PlatformInfo { + public: + PlatformInfo(pid_t pid) : pid_(pid) {} + + pid_t pid() const { return pid_; } + + private: + pid_t pid_; +}; +#else +class RUsageScope::PlatformInfo { + public: + enum ProcFile : size_t { + kSched = 0, + kStatm = 1, + kStatus = 2, + kNumDoNotUseDirectly = 3 + }; + + PlatformInfo(pid_t pid) + : proc_file_paths_{ + absl::StrFormat("/proc/%d/sched", pid), + absl::StrFormat("/proc/%d/statm", pid), + absl::StrFormat("/proc/%d/status", pid), + } {} + + // Returns a path to the /proc// or /proc//task//. + [[nodiscard]] const std::string& GetProcFilePath(ProcFile file) const { + CHECK_LT(file, proc_file_paths_.size()); + return proc_file_paths_[file]; + } + + private: + std::array proc_file_paths_; +}; +#endif + +RUsageScope RUsageScope::ThisProcess() { // + return RUsageScope{getpid()}; +} + +RUsageScope RUsageScope::Process(pid_t pid) { // + return RUsageScope{pid}; +} + +RUsageScope::RUsageScope(pid_t pid) + : description_{absl::StrFormat("PID=%d", pid)}, + info_(std::make_shared(pid)) {} + +namespace detail { +namespace { + +// A global static is fine: this object depends on getrusage() syscall ONLY, and +// absolutely no other globals in the program. +const ProcessTimer global_process_timer; + +//------------------------------------------------------------------------------ +// Read values from /proc/* files +//------------------------------------------------------------------------------ + +bool ReadProcFileFields(const std::string& path, + const char* absl_nonnull format, ...) { + bool success = false; + va_list value_list; + va_start(value_list, format); + std::ifstream file{path}; + // TODO(b/265461840): Silently ignoring missing /proc/ files. The current + // callers ignore the returned status too. Improve. + if (file.good()) { + std::stringstream contents; + contents << file.rdbuf(); + if (contents.good()) { + if (vsscanf(contents.str().c_str(), format, value_list) != EOF) { + success = true; + } + } + } + va_end(value_list); + return success; +} + +template +bool ReadProcFileKeyword( // + const std::string& path, const char* format, T* value) { + std::ifstream file{path}; + // TODO(b/265461840): Silently ignoring missing /proc/ files. The current + // callers ignore the returned status too. Improve. + if (file.good()) { + constexpr std::streamsize kMaxLineLen = 1024; + char line[kMaxLineLen] = {0}; + while (file.good()) { + file.getline(line, kMaxLineLen); + if (sscanf(line, format, value) == 1) { + return true; + } + } + } + return false; +} + +//------------------------------------------------------------------------------ +// Comparison overloads +//------------------------------------------------------------------------------ + +template +std::string NormalizeSign(T* value, bool always_signed) { + if (*value < T{}) { + *value = -(*value); + return "-"; + } else if (always_signed) { + return "+"; + } else { + return ""; + } +} + +template