SERVER-107667 Upgrade re2 (#41882)
GitOrigin-RevId: 7cb9472471a3904fba1b7dc1ebe29e20b26b1d25
This commit is contained in:
parent
45517c14ae
commit
b8f1573315
12
sbom.json
12
sbom.json
@ -772,7 +772,7 @@
|
||||
},
|
||||
{
|
||||
"type": "library",
|
||||
"bom-ref": "pkg:github/google/re2@2023-11-01",
|
||||
"bom-ref": "pkg:github/google/re2@2025-08-12",
|
||||
"supplier": {
|
||||
"name": "Google LLC",
|
||||
"url": [
|
||||
@ -782,7 +782,7 @@
|
||||
"author": "The RE2 Authors",
|
||||
"group": "google.opensource",
|
||||
"name": "re2",
|
||||
"version": "2023-11-01",
|
||||
"version": "2025-08-12",
|
||||
"description": "RE2 is a fast, safe, thread-friendly alternative to backtracking regular expression engines like those used in PCRE, Perl, and Python. It is a C++ library.",
|
||||
"licenses": [
|
||||
{
|
||||
@ -792,8 +792,8 @@
|
||||
}
|
||||
],
|
||||
"copyright": "Copyright (c) 2009 The RE2 Authors. All rights reserved.",
|
||||
"cpe": "cpe:2.3:h:google:re2:2023-11-01:*:*:*:*:*:*:*",
|
||||
"purl": "pkg:github/google/re2@2023-11-01",
|
||||
"cpe": "cpe:2.3:h:google:re2:2025-08-12:*:*:*:*:*:*:*",
|
||||
"purl": "pkg:github/google/re2@2025-08-12",
|
||||
"properties": [
|
||||
{
|
||||
"name": "internal:team_responsible",
|
||||
@ -2966,7 +2966,7 @@
|
||||
"pkg:github/fmtlib/fmt@11.1.3",
|
||||
"pkg:github/facebook/folly@v2025.04.21.00",
|
||||
"pkg:github/google/benchmark@v1.5.2",
|
||||
"pkg:github/google/re2@2023-11-01",
|
||||
"pkg:github/google/re2@2025-08-12",
|
||||
"pkg:github/google/snappy@1.1.10",
|
||||
"pkg:github/google/tcmalloc@093ba93c1bd6dca03b0a8334f06d01b019244291",
|
||||
"pkg:github/google/googletest@v1.17.0",
|
||||
@ -3089,7 +3089,7 @@
|
||||
"dependsOn": []
|
||||
},
|
||||
{
|
||||
"ref": "pkg:github/google/re2@2023-11-01",
|
||||
"ref": "pkg:github/google/re2@2025-08-12",
|
||||
"dependsOn": []
|
||||
},
|
||||
{
|
||||
|
||||
54
src/third_party/re2/BUILD.bazel
vendored
54
src/third_party/re2/BUILD.bazel
vendored
@ -6,6 +6,7 @@ mongo_cc_library(
|
||||
name = "re2",
|
||||
srcs = [
|
||||
"dist/re2/bitmap256.cc",
|
||||
"dist/re2/bitmap256.h",
|
||||
"dist/re2/bitstate.cc",
|
||||
"dist/re2/compile.cc",
|
||||
"dist/re2/dfa.cc",
|
||||
@ -15,53 +16,40 @@ mongo_cc_library(
|
||||
"dist/re2/onepass.cc",
|
||||
"dist/re2/parse.cc",
|
||||
"dist/re2/perl_groups.cc",
|
||||
"dist/re2/pod_array.h",
|
||||
"dist/re2/prefilter.cc",
|
||||
"dist/re2/prefilter.h",
|
||||
"dist/re2/prefilter_tree.cc",
|
||||
"dist/re2/prefilter_tree.h",
|
||||
"dist/re2/prog.cc",
|
||||
"dist/re2/prog.h",
|
||||
"dist/re2/re2.cc",
|
||||
"dist/re2/regexp.cc",
|
||||
"dist/re2/regexp.h",
|
||||
"dist/re2/set.cc",
|
||||
"dist/re2/simplify.cc",
|
||||
"dist/re2/tostring.cc",
|
||||
"dist/re2/unicode_casefold.cc",
|
||||
"dist/re2/unicode_groups.cc",
|
||||
"dist/util/rune.cc",
|
||||
"dist/util/strutil.cc",
|
||||
] + [
|
||||
# Internal headers
|
||||
"dist/re2/bitmap256.h",
|
||||
"dist/re2/filtered_re2.h",
|
||||
"dist/re2/pod_array.h",
|
||||
"dist/re2/prefilter.h",
|
||||
"dist/re2/prefilter_tree.h",
|
||||
"dist/re2/prog.h",
|
||||
"dist/re2/re2.h",
|
||||
"dist/re2/regexp.h",
|
||||
"dist/re2/set.h",
|
||||
"dist/re2/sparse_array.h",
|
||||
"dist/re2/sparse_set.h",
|
||||
"dist/re2/stringpiece.h",
|
||||
"dist/re2/testing/exhaustive_tester.h",
|
||||
"dist/re2/testing/regexp_generator.h",
|
||||
"dist/re2/testing/string_generator.h",
|
||||
"dist/re2/testing/tester.h",
|
||||
"dist/re2/tostring.cc",
|
||||
"dist/re2/unicode_casefold.cc",
|
||||
"dist/re2/unicode_casefold.h",
|
||||
"dist/re2/unicode_groups.cc",
|
||||
"dist/re2/unicode_groups.h",
|
||||
"dist/re2/walker-inl.h",
|
||||
"dist/util/logging.h",
|
||||
"dist/util/malloc_counter.h",
|
||||
"dist/util/pcre.h",
|
||||
"dist/util/rune.cc",
|
||||
"dist/util/strutil.cc",
|
||||
"dist/util/strutil.h",
|
||||
"dist/util/utf.h",
|
||||
],
|
||||
hdrs = [
|
||||
"dist/re2/filtered_re2.h",
|
||||
"dist/re2/re2.h",
|
||||
"dist/re2/set.h",
|
||||
"dist/re2/stringpiece.h",
|
||||
],
|
||||
copts = select({
|
||||
"//bazel/config:gcc_or_clang": [
|
||||
"-pthread",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
"@platforms//os:windows": [],
|
||||
"//conditions:default": ["-pthread"],
|
||||
}) + select({
|
||||
"@platforms//os:windows": [
|
||||
# The POSIX name for this item is deprecated
|
||||
@ -72,6 +60,14 @@ mongo_cc_library(
|
||||
includes = [
|
||||
"dist",
|
||||
],
|
||||
linkopts = select({
|
||||
# macOS doesn't need `-pthread' when linking and it appears that
|
||||
# older versions of Clang will warn about the unused command line
|
||||
# argument, so just don't pass it.
|
||||
"@platforms//os:macos": [],
|
||||
"@platforms//os:windows": [],
|
||||
"//conditions:default": ["-pthread"],
|
||||
}),
|
||||
local_defines = select({
|
||||
"@platforms//os:windows": [
|
||||
"NOMINMAX",
|
||||
@ -84,6 +80,8 @@ mongo_cc_library(
|
||||
}),
|
||||
deps = [
|
||||
"//src/third_party/abseil-cpp:absl_base",
|
||||
"//src/third_party/abseil-cpp:absl_log_internal_check_op",
|
||||
"//src/third_party/abseil-cpp:absl_log_internal_message",
|
||||
"//src/third_party/abseil-cpp:absl_raw_hash_set",
|
||||
"//src/third_party/abseil-cpp:absl_str_format_internal",
|
||||
"//src/third_party/abseil-cpp:absl_strings",
|
||||
|
||||
13
src/third_party/re2/dist/AUTHORS
vendored
13
src/third_party/re2/dist/AUTHORS
vendored
@ -1,13 +0,0 @@
|
||||
# This is the official list of RE2 authors for copyright purposes.
|
||||
# This file is distinct from the CONTRIBUTORS files.
|
||||
# See the latter for an explanation.
|
||||
|
||||
# Names should be added to this file as
|
||||
# Name or Organization <email address>
|
||||
# The email address is not required for organizations.
|
||||
|
||||
# Please keep the list sorted.
|
||||
|
||||
Google Inc.
|
||||
Samsung Electronics
|
||||
Stefano Rivera <stefano.rivera@gmail.com>
|
||||
394
src/third_party/re2/dist/BUILD.bazel
vendored
394
src/third_party/re2/dist/BUILD.bazel
vendored
@ -1,394 +0,0 @@
|
||||
# Copyright 2009 The RE2 Authors. All Rights Reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
# Bazel (http://bazel.build/) BUILD file for RE2.
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
exports_files(["LICENSE"])
|
||||
|
||||
cc_library(
|
||||
name = "re2",
|
||||
srcs = [
|
||||
"re2/bitmap256.cc",
|
||||
"re2/bitmap256.h",
|
||||
"re2/bitstate.cc",
|
||||
"re2/compile.cc",
|
||||
"re2/dfa.cc",
|
||||
"re2/filtered_re2.cc",
|
||||
"re2/mimics_pcre.cc",
|
||||
"re2/nfa.cc",
|
||||
"re2/onepass.cc",
|
||||
"re2/parse.cc",
|
||||
"re2/perl_groups.cc",
|
||||
"re2/pod_array.h",
|
||||
"re2/prefilter.cc",
|
||||
"re2/prefilter.h",
|
||||
"re2/prefilter_tree.cc",
|
||||
"re2/prefilter_tree.h",
|
||||
"re2/prog.cc",
|
||||
"re2/prog.h",
|
||||
"re2/re2.cc",
|
||||
"re2/regexp.cc",
|
||||
"re2/regexp.h",
|
||||
"re2/set.cc",
|
||||
"re2/simplify.cc",
|
||||
"re2/sparse_array.h",
|
||||
"re2/sparse_set.h",
|
||||
"re2/tostring.cc",
|
||||
"re2/unicode_casefold.cc",
|
||||
"re2/unicode_casefold.h",
|
||||
"re2/unicode_groups.cc",
|
||||
"re2/unicode_groups.h",
|
||||
"re2/walker-inl.h",
|
||||
"util/logging.h",
|
||||
"util/rune.cc",
|
||||
"util/strutil.cc",
|
||||
"util/strutil.h",
|
||||
"util/utf.h",
|
||||
],
|
||||
hdrs = [
|
||||
"re2/filtered_re2.h",
|
||||
"re2/re2.h",
|
||||
"re2/set.h",
|
||||
"re2/stringpiece.h",
|
||||
],
|
||||
copts = select({
|
||||
"@platforms//os:wasi": [],
|
||||
"@platforms//os:windows": [],
|
||||
"//conditions:default": ["-pthread"],
|
||||
}),
|
||||
linkopts = select({
|
||||
# macOS doesn't need `-pthread' when linking and it appears that
|
||||
# older versions of Clang will warn about the unused command line
|
||||
# argument, so just don't pass it.
|
||||
"@platforms//os:macos": [],
|
||||
"@platforms//os:wasi": [],
|
||||
"@platforms//os:windows": [],
|
||||
"//conditions:default": ["-pthread"],
|
||||
}),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"@com_google_absl//absl/base",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@com_google_absl//absl/container:fixed_array",
|
||||
"@com_google_absl//absl/container:flat_hash_map",
|
||||
"@com_google_absl//absl/container:flat_hash_set",
|
||||
"@com_google_absl//absl/container:inlined_vector",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
"@com_google_absl//absl/synchronization",
|
||||
"@com_google_absl//absl/types:optional",
|
||||
"@com_google_absl//absl/types:span",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "testing",
|
||||
testonly = 1,
|
||||
srcs = [
|
||||
"re2/testing/backtrack.cc",
|
||||
"re2/testing/dump.cc",
|
||||
"re2/testing/exhaustive_tester.cc",
|
||||
"re2/testing/null_walker.cc",
|
||||
"re2/testing/regexp_generator.cc",
|
||||
"re2/testing/string_generator.cc",
|
||||
"re2/testing/tester.cc",
|
||||
"util/pcre.cc",
|
||||
],
|
||||
hdrs = [
|
||||
"re2/testing/exhaustive_tester.h",
|
||||
"re2/testing/regexp_generator.h",
|
||||
"re2/testing/string_generator.h",
|
||||
"re2/testing/tester.h",
|
||||
"util/malloc_counter.h",
|
||||
"util/pcre.h",
|
||||
|
||||
# Exposed for testing only.
|
||||
"re2/bitmap256.h",
|
||||
"re2/pod_array.h",
|
||||
"re2/prefilter.h",
|
||||
"re2/prefilter_tree.h",
|
||||
"re2/prog.h",
|
||||
"re2/regexp.h",
|
||||
"re2/sparse_array.h",
|
||||
"re2/sparse_set.h",
|
||||
"re2/unicode_casefold.h",
|
||||
"re2/unicode_groups.h",
|
||||
"re2/walker-inl.h",
|
||||
"util/logging.h",
|
||||
"util/strutil.h",
|
||||
"util/utf.h",
|
||||
],
|
||||
visibility = [":__subpackages__"],
|
||||
deps = [
|
||||
":re2",
|
||||
"@com_google_absl//absl/base",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@com_google_absl//absl/flags:flag",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
"@googletest//:gtest",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "charclass_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/charclass_test.cc"],
|
||||
deps = [
|
||||
":testing",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "compile_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/compile_test.cc"],
|
||||
deps = [
|
||||
":testing",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "filtered_re2_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/filtered_re2_test.cc"],
|
||||
deps = [
|
||||
":re2",
|
||||
":testing",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "mimics_pcre_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/mimics_pcre_test.cc"],
|
||||
deps = [
|
||||
":testing",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "parse_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/parse_test.cc"],
|
||||
deps = [
|
||||
":testing",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "possible_match_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/possible_match_test.cc"],
|
||||
deps = [
|
||||
":re2",
|
||||
":testing",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "re2_arg_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/re2_arg_test.cc"],
|
||||
deps = [
|
||||
":re2",
|
||||
":testing",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "re2_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/re2_test.cc"],
|
||||
deps = [
|
||||
":re2",
|
||||
":testing",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "regexp_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/regexp_test.cc"],
|
||||
deps = [
|
||||
":testing",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "required_prefix_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/required_prefix_test.cc"],
|
||||
deps = [
|
||||
":testing",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "search_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/search_test.cc"],
|
||||
deps = [
|
||||
":testing",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "set_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/set_test.cc"],
|
||||
deps = [
|
||||
":re2",
|
||||
":testing",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "simplify_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/simplify_test.cc"],
|
||||
deps = [
|
||||
":testing",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "string_generator_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/string_generator_test.cc"],
|
||||
deps = [
|
||||
":testing",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "dfa_test",
|
||||
size = "large",
|
||||
srcs = ["re2/testing/dfa_test.cc"],
|
||||
deps = [
|
||||
":re2",
|
||||
":testing",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@com_google_absl//absl/flags:flag",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "exhaustive1_test",
|
||||
size = "large",
|
||||
srcs = ["re2/testing/exhaustive1_test.cc"],
|
||||
deps = [
|
||||
":testing",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "exhaustive2_test",
|
||||
size = "large",
|
||||
srcs = ["re2/testing/exhaustive2_test.cc"],
|
||||
deps = [
|
||||
":testing",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "exhaustive3_test",
|
||||
size = "large",
|
||||
srcs = ["re2/testing/exhaustive3_test.cc"],
|
||||
deps = [
|
||||
":testing",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "exhaustive_test",
|
||||
size = "large",
|
||||
srcs = ["re2/testing/exhaustive_test.cc"],
|
||||
deps = [
|
||||
":testing",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "random_test",
|
||||
size = "large",
|
||||
srcs = ["re2/testing/random_test.cc"],
|
||||
deps = [
|
||||
":testing",
|
||||
"@com_google_absl//absl/flags:flag",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
"@googletest//:gtest",
|
||||
"@googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "regexp_benchmark",
|
||||
testonly = 1,
|
||||
srcs = ["re2/testing/regexp_benchmark.cc"],
|
||||
deps = [
|
||||
":re2",
|
||||
":testing",
|
||||
"@com_google_absl//absl/container:flat_hash_map",
|
||||
"@com_google_absl//absl/flags:flag",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
"@com_google_absl//absl/synchronization",
|
||||
"@google_benchmark//:benchmark_main",
|
||||
],
|
||||
)
|
||||
41
src/third_party/re2/dist/CONTRIBUTORS
vendored
41
src/third_party/re2/dist/CONTRIBUTORS
vendored
@ -1,41 +0,0 @@
|
||||
# This is the official list of people who can contribute
|
||||
# (and typically have contributed) code to the RE2 repository.
|
||||
# The AUTHORS file lists the copyright holders; this file
|
||||
# lists people. For example, Google employees are listed here
|
||||
# but not in AUTHORS, because Google holds the copyright.
|
||||
#
|
||||
# The submission process automatically checks to make sure
|
||||
# that people submitting code are listed in this file (by email address).
|
||||
#
|
||||
# Names should be added to this file only after verifying that
|
||||
# the individual or the individual's organization has agreed to
|
||||
# the appropriate Contributor License Agreement, found here:
|
||||
#
|
||||
# http://code.google.com/legal/individual-cla-v1.0.html
|
||||
# http://code.google.com/legal/corporate-cla-v1.0.html
|
||||
#
|
||||
# The agreement for individuals can be filled out on the web.
|
||||
#
|
||||
# When adding J Random Contributor's name to this file,
|
||||
# either J's name or J's organization's name should be
|
||||
# added to the AUTHORS file, depending on whether the
|
||||
# individual or corporate CLA was used.
|
||||
|
||||
# Names should be added to this file like so:
|
||||
# Name <email address>
|
||||
|
||||
# Please keep the list sorted.
|
||||
|
||||
Dominic Battré <battre@chromium.org>
|
||||
Doug Kwan <dougkwan@google.com>
|
||||
Dmitriy Vyukov <dvyukov@google.com>
|
||||
John Millikin <jmillikin@gmail.com>
|
||||
Mike Nazarewicz <mpn@google.com>
|
||||
Nico Weber <thakis@chromium.org>
|
||||
Pawel Hajdan <phajdan.jr@gmail.com>
|
||||
Rob Pike <r@google.com>
|
||||
Russ Cox <rsc@swtch.com>
|
||||
Sanjay Ghemawat <sanjay@google.com>
|
||||
Stefano Rivera <stefano.rivera@gmail.com>
|
||||
Srinivasan Venkatachary <vsri@google.com>
|
||||
Viatcheslav Ostapenko <sl.ostapenko@samsung.com>
|
||||
27
src/third_party/re2/dist/MODULE.bazel
vendored
27
src/third_party/re2/dist/MODULE.bazel
vendored
@ -1,27 +0,0 @@
|
||||
# Copyright 2009 The RE2 Authors. All Rights Reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
# Bazel (http://bazel.build/) MODULE file for RE2.
|
||||
|
||||
module(
|
||||
name = "re2",
|
||||
version = "2023-11-01",
|
||||
compatibility_level = 1,
|
||||
)
|
||||
|
||||
bazel_dep(name = "platforms", version = "0.0.8")
|
||||
bazel_dep(name = "rules_cc", version = "0.0.9")
|
||||
bazel_dep(name = "abseil-cpp", version = "20230802.0", repo_name = "com_google_absl")
|
||||
bazel_dep(name = "rules_python", version = "0.26.0")
|
||||
bazel_dep(name = "pybind11_bazel", version = "2.11.1")
|
||||
|
||||
python_configure = use_extension("@pybind11_bazel//:python_configure.bzl", "extension")
|
||||
python_configure.toolchain(python_version = "3") # ignored when non-root module
|
||||
use_repo(python_configure, "local_config_python", "pybind11")
|
||||
|
||||
# These dependencies will be ignored when the `re2` module is not
|
||||
# the root module (or when `--ignore_dev_dependency` is enabled).
|
||||
bazel_dep(name = "google_benchmark", version = "1.8.3", dev_dependency = True)
|
||||
bazel_dep(name = "googletest", version = "1.14.0.bcr.1", dev_dependency = True)
|
||||
bazel_dep(name = "abseil-py", version = "1.4.0", dev_dependency = True)
|
||||
399
src/third_party/re2/dist/Makefile
vendored
399
src/third_party/re2/dist/Makefile
vendored
@ -1,399 +0,0 @@
|
||||
# Copyright 2009 The RE2 Authors. All Rights Reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
# Build against Abseil.
|
||||
ABSL_DEPS=\
|
||||
absl_base\
|
||||
absl_core_headers\
|
||||
absl_fixed_array\
|
||||
absl_flags\
|
||||
absl_flat_hash_map\
|
||||
absl_flat_hash_set\
|
||||
absl_inlined_vector\
|
||||
absl_optional\
|
||||
absl_span\
|
||||
absl_str_format\
|
||||
absl_strings\
|
||||
absl_synchronization\
|
||||
|
||||
PKG_CONFIG?=pkg-config
|
||||
CCABSL=$(shell $(PKG_CONFIG) $(ABSL_DEPS) --cflags)
|
||||
# GCC barfs on `-Wl` whereas Clang doesn't mind, but it's unclear what
|
||||
# causes it to manifest on Ubuntu 22.04 LTS, so filter it out for now.
|
||||
# Similar is needed for `static-testinstall` and `shared-testinstall`.
|
||||
LDABSL=$(shell $(PKG_CONFIG) $(ABSL_DEPS) --libs | sed -e 's/-Wl / /g')
|
||||
|
||||
# To build against ICU for full Unicode properties support,
|
||||
# uncomment the next two lines:
|
||||
# CCICU=$(shell $(PKG_CONFIG) icu-uc --cflags) -DRE2_USE_ICU
|
||||
# LDICU=$(shell $(PKG_CONFIG) icu-uc --libs)
|
||||
|
||||
# To build against PCRE for testing and benchmarking,
|
||||
# uncomment the next two lines:
|
||||
# CCPCRE=-I/usr/local/include -DUSEPCRE
|
||||
# LDPCRE=-L/usr/local/lib -lpcre
|
||||
|
||||
CXX?=g++
|
||||
# can override
|
||||
CXXFLAGS?=-O3 -g
|
||||
LDFLAGS?=
|
||||
# required
|
||||
RE2_CXXFLAGS?=-pthread -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCABSL) $(CCICU) $(CCPCRE)
|
||||
RE2_LDFLAGS?=-pthread $(LDABSL) $(LDICU) $(LDPCRE)
|
||||
AR?=ar
|
||||
ARFLAGS?=rsc
|
||||
NM?=nm
|
||||
NMFLAGS?=-p
|
||||
|
||||
# Variables mandated by GNU, the arbiter of all good taste on the internet.
|
||||
# http://www.gnu.org/prep/standards/standards.html
|
||||
prefix=/usr/local
|
||||
exec_prefix=$(prefix)
|
||||
includedir=$(prefix)/include
|
||||
libdir=$(exec_prefix)/lib
|
||||
INSTALL=install
|
||||
INSTALL_DATA=$(INSTALL) -m 644
|
||||
|
||||
# Work around the weirdness of sed(1) on Darwin. :/
|
||||
ifeq ($(shell uname),Darwin)
|
||||
SED_INPLACE=sed -i ''
|
||||
else ifeq ($(shell uname),SunOS)
|
||||
SED_INPLACE=sed -i
|
||||
else
|
||||
SED_INPLACE=sed -i
|
||||
endif
|
||||
|
||||
# The pkg-config Requires: field.
|
||||
REQUIRES=$(ABSL_DEPS)
|
||||
ifdef LDICU
|
||||
REQUIRES+=icu-uc
|
||||
endif
|
||||
|
||||
# ABI version
|
||||
# http://tldp.org/HOWTO/Program-Library-HOWTO/shared-libraries.html
|
||||
SONAME=11
|
||||
|
||||
# To rebuild the Tables generated by Perl and Python scripts (requires Internet
|
||||
# access for Unicode data), uncomment the following line:
|
||||
# REBUILD_TABLES=1
|
||||
|
||||
# The SunOS linker does not support wildcards. :(
|
||||
ifeq ($(shell uname),Darwin)
|
||||
SOEXT=dylib
|
||||
SOEXTVER=$(SONAME).$(SOEXT)
|
||||
SOEXTVER00=$(SONAME).0.0.$(SOEXT)
|
||||
MAKE_SHARED_LIBRARY=$(CXX) -dynamiclib -Wl,-compatibility_version,$(SONAME),-current_version,$(SONAME).0.0,-install_name,$(libdir)/libre2.$(SOEXTVER),-exported_symbols_list,libre2.symbols.darwin
|
||||
else ifeq ($(shell uname),SunOS)
|
||||
SOEXT=so
|
||||
SOEXTVER=$(SOEXT).$(SONAME)
|
||||
SOEXTVER00=$(SOEXT).$(SONAME).0.0
|
||||
MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER)
|
||||
else
|
||||
SOEXT=so
|
||||
SOEXTVER=$(SOEXT).$(SONAME)
|
||||
SOEXTVER00=$(SOEXT).$(SONAME).0.0
|
||||
MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER),--version-script,libre2.symbols
|
||||
endif
|
||||
|
||||
.PHONY: all
|
||||
all: obj/libre2.a obj/so/libre2.$(SOEXT)
|
||||
|
||||
INSTALL_HFILES=\
|
||||
re2/filtered_re2.h\
|
||||
re2/re2.h\
|
||||
re2/set.h\
|
||||
re2/stringpiece.h\
|
||||
|
||||
HFILES=\
|
||||
util/logging.h\
|
||||
util/malloc_counter.h\
|
||||
util/pcre.h\
|
||||
util/strutil.h\
|
||||
util/utf.h\
|
||||
re2/bitmap256.h\
|
||||
re2/filtered_re2.h\
|
||||
re2/pod_array.h\
|
||||
re2/prefilter.h\
|
||||
re2/prefilter_tree.h\
|
||||
re2/prog.h\
|
||||
re2/re2.h\
|
||||
re2/regexp.h\
|
||||
re2/set.h\
|
||||
re2/sparse_array.h\
|
||||
re2/sparse_set.h\
|
||||
re2/stringpiece.h\
|
||||
re2/testing/exhaustive_tester.h\
|
||||
re2/testing/regexp_generator.h\
|
||||
re2/testing/string_generator.h\
|
||||
re2/testing/tester.h\
|
||||
re2/unicode_casefold.h\
|
||||
re2/unicode_groups.h\
|
||||
re2/walker-inl.h\
|
||||
|
||||
OFILES=\
|
||||
obj/util/rune.o\
|
||||
obj/util/strutil.o\
|
||||
obj/re2/bitmap256.o\
|
||||
obj/re2/bitstate.o\
|
||||
obj/re2/compile.o\
|
||||
obj/re2/dfa.o\
|
||||
obj/re2/filtered_re2.o\
|
||||
obj/re2/mimics_pcre.o\
|
||||
obj/re2/nfa.o\
|
||||
obj/re2/onepass.o\
|
||||
obj/re2/parse.o\
|
||||
obj/re2/perl_groups.o\
|
||||
obj/re2/prefilter.o\
|
||||
obj/re2/prefilter_tree.o\
|
||||
obj/re2/prog.o\
|
||||
obj/re2/re2.o\
|
||||
obj/re2/regexp.o\
|
||||
obj/re2/set.o\
|
||||
obj/re2/simplify.o\
|
||||
obj/re2/tostring.o\
|
||||
obj/re2/unicode_casefold.o\
|
||||
obj/re2/unicode_groups.o\
|
||||
|
||||
TESTOFILES=\
|
||||
obj/util/pcre.o\
|
||||
obj/re2/testing/backtrack.o\
|
||||
obj/re2/testing/dump.o\
|
||||
obj/re2/testing/exhaustive_tester.o\
|
||||
obj/re2/testing/null_walker.o\
|
||||
obj/re2/testing/regexp_generator.o\
|
||||
obj/re2/testing/string_generator.o\
|
||||
obj/re2/testing/tester.o\
|
||||
|
||||
TESTS=\
|
||||
obj/test/charclass_test\
|
||||
obj/test/compile_test\
|
||||
obj/test/filtered_re2_test\
|
||||
obj/test/mimics_pcre_test\
|
||||
obj/test/parse_test\
|
||||
obj/test/possible_match_test\
|
||||
obj/test/re2_test\
|
||||
obj/test/re2_arg_test\
|
||||
obj/test/regexp_test\
|
||||
obj/test/required_prefix_test\
|
||||
obj/test/search_test\
|
||||
obj/test/set_test\
|
||||
obj/test/simplify_test\
|
||||
obj/test/string_generator_test\
|
||||
|
||||
BIGTESTS=\
|
||||
obj/test/dfa_test\
|
||||
obj/test/exhaustive1_test\
|
||||
obj/test/exhaustive2_test\
|
||||
obj/test/exhaustive3_test\
|
||||
obj/test/exhaustive_test\
|
||||
obj/test/random_test\
|
||||
|
||||
SOFILES=$(patsubst obj/%,obj/so/%,$(OFILES))
|
||||
# We use TESTOFILES for testing the shared lib, only it is built differently.
|
||||
STESTS=$(patsubst obj/%,obj/so/%,$(TESTS))
|
||||
SBIGTESTS=$(patsubst obj/%,obj/so/%,$(BIGTESTS))
|
||||
|
||||
DOFILES=$(patsubst obj/%,obj/dbg/%,$(OFILES))
|
||||
DTESTOFILES=$(patsubst obj/%,obj/dbg/%,$(TESTOFILES))
|
||||
DTESTS=$(patsubst obj/%,obj/dbg/%,$(TESTS))
|
||||
DBIGTESTS=$(patsubst obj/%,obj/dbg/%,$(BIGTESTS))
|
||||
|
||||
.PRECIOUS: obj/%.o
|
||||
obj/%.o: %.cc $(HFILES)
|
||||
@mkdir -p $$(dirname $@)
|
||||
$(CXX) -c -o $@ $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) -DNDEBUG $*.cc
|
||||
|
||||
.PRECIOUS: obj/dbg/%.o
|
||||
obj/dbg/%.o: %.cc $(HFILES)
|
||||
@mkdir -p $$(dirname $@)
|
||||
$(CXX) -c -o $@ $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) $*.cc
|
||||
|
||||
.PRECIOUS: obj/so/%.o
|
||||
obj/so/%.o: %.cc $(HFILES)
|
||||
@mkdir -p $$(dirname $@)
|
||||
$(CXX) -c -o $@ -fPIC $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) -DNDEBUG $*.cc
|
||||
|
||||
.PRECIOUS: obj/libre2.a
|
||||
obj/libre2.a: $(OFILES)
|
||||
@mkdir -p obj
|
||||
$(AR) $(ARFLAGS) obj/libre2.a $(OFILES)
|
||||
|
||||
.PRECIOUS: obj/dbg/libre2.a
|
||||
obj/dbg/libre2.a: $(DOFILES)
|
||||
@mkdir -p obj/dbg
|
||||
$(AR) $(ARFLAGS) obj/dbg/libre2.a $(DOFILES)
|
||||
|
||||
.PRECIOUS: obj/so/libre2.$(SOEXT)
|
||||
obj/so/libre2.$(SOEXT): $(SOFILES) libre2.symbols libre2.symbols.darwin
|
||||
@mkdir -p obj/so
|
||||
$(MAKE_SHARED_LIBRARY) -o obj/so/libre2.$(SOEXTVER) $(SOFILES) $(RE2_LDFLAGS) $(LDFLAGS)
|
||||
ln -sf libre2.$(SOEXTVER) $@
|
||||
|
||||
.PRECIOUS: obj/dbg/test/%
|
||||
obj/dbg/test/%: obj/dbg/libre2.a obj/dbg/re2/testing/%.o $(DTESTOFILES)
|
||||
@mkdir -p obj/dbg/test
|
||||
$(CXX) -o $@ obj/dbg/re2/testing/$*.o $(DTESTOFILES) -lgtest -lgtest_main obj/dbg/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
||||
|
||||
.PRECIOUS: obj/test/%
|
||||
obj/test/%: obj/libre2.a obj/re2/testing/%.o $(TESTOFILES)
|
||||
@mkdir -p obj/test
|
||||
$(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) -lgtest -lgtest_main obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
||||
|
||||
# Test the shared lib, falling back to the static lib for private symbols
|
||||
.PRECIOUS: obj/so/test/%
|
||||
obj/so/test/%: obj/so/libre2.$(SOEXT) obj/libre2.a obj/re2/testing/%.o $(TESTOFILES)
|
||||
@mkdir -p obj/so/test
|
||||
$(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) -lgtest -lgtest_main -Lobj/so -lre2 obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
||||
|
||||
obj/test/regexp_benchmark: obj/libre2.a obj/re2/testing/regexp_benchmark.o $(TESTOFILES)
|
||||
@mkdir -p obj/test
|
||||
$(CXX) -o $@ obj/re2/testing/regexp_benchmark.o $(TESTOFILES) -lgtest -lbenchmark -lbenchmark_main obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
||||
|
||||
obj/test/re2_fuzzer: obj/libre2.a obj/re2/fuzzing/re2_fuzzer.o
|
||||
@mkdir -p obj/test
|
||||
$(CXX) -o $@ obj/re2/fuzzing/re2_fuzzer.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
||||
|
||||
ifdef REBUILD_TABLES
|
||||
.PRECIOUS: re2/perl_groups.cc
|
||||
re2/perl_groups.cc: re2/make_perl_groups.pl
|
||||
perl $< > $@
|
||||
|
||||
.PRECIOUS: re2/unicode_%.cc
|
||||
re2/unicode_%.cc: re2/make_unicode_%.py re2/unicode.py
|
||||
python3 $< > $@
|
||||
endif
|
||||
|
||||
.PHONY: distclean
|
||||
distclean: clean
|
||||
rm -f re2/perl_groups.cc re2/unicode_casefold.cc re2/unicode_groups.cc
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -rf obj
|
||||
rm -f re2/*.pyc
|
||||
|
||||
.PHONY: testofiles
|
||||
testofiles: $(TESTOFILES)
|
||||
|
||||
.PHONY: test
|
||||
test: $(DTESTS) $(TESTS) $(STESTS) debug-test static-test shared-test
|
||||
|
||||
.PHONY: debug-test
|
||||
debug-test: $(DTESTS)
|
||||
@./runtests $(DTESTS)
|
||||
|
||||
.PHONY: static-test
|
||||
static-test: $(TESTS)
|
||||
@./runtests $(TESTS)
|
||||
|
||||
.PHONY: shared-test
|
||||
shared-test: $(STESTS)
|
||||
@./runtests -shared-library-path obj/so $(STESTS)
|
||||
|
||||
.PHONY: debug-bigtest
|
||||
debug-bigtest: $(DTESTS) $(DBIGTESTS)
|
||||
@./runtests $(DTESTS) $(DBIGTESTS)
|
||||
|
||||
.PHONY: static-bigtest
|
||||
static-bigtest: $(TESTS) $(BIGTESTS)
|
||||
@./runtests $(TESTS) $(BIGTESTS)
|
||||
|
||||
.PHONY: shared-bigtest
|
||||
shared-bigtest: $(STESTS) $(SBIGTESTS)
|
||||
@./runtests -shared-library-path obj/so $(STESTS) $(SBIGTESTS)
|
||||
|
||||
.PHONY: benchmark
|
||||
benchmark: obj/test/regexp_benchmark
|
||||
|
||||
.PHONY: fuzz
|
||||
fuzz: obj/test/re2_fuzzer
|
||||
|
||||
.PHONY: install
|
||||
install: static-install shared-install
|
||||
|
||||
.PHONY: static
|
||||
static: obj/libre2.a
|
||||
|
||||
.PHONY: static-install
|
||||
static-install: obj/libre2.a common-install
|
||||
$(INSTALL) obj/libre2.a $(DESTDIR)$(libdir)/libre2.a
|
||||
|
||||
.PHONY: shared
|
||||
shared: obj/so/libre2.$(SOEXT)
|
||||
|
||||
.PHONY: shared-install
|
||||
shared-install: obj/so/libre2.$(SOEXT) common-install
|
||||
$(INSTALL) obj/so/libre2.$(SOEXT) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER00)
|
||||
ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER)
|
||||
ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXT)
|
||||
|
||||
.PHONY: common-install
|
||||
common-install:
|
||||
mkdir -p $(DESTDIR)$(includedir)/re2 $(DESTDIR)$(libdir)/pkgconfig
|
||||
$(INSTALL_DATA) $(INSTALL_HFILES) $(DESTDIR)$(includedir)/re2
|
||||
$(INSTALL_DATA) re2.pc.in $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
||||
$(SED_INPLACE) -e "s#@CMAKE_INSTALL_FULL_INCLUDEDIR@#$(includedir)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
||||
$(SED_INPLACE) -e "s#@CMAKE_INSTALL_FULL_LIBDIR@#$(libdir)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
||||
$(SED_INPLACE) -e "s#@REQUIRES@#$(REQUIRES)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
||||
$(SED_INPLACE) -e "s#@SONAME@#$(SONAME)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
||||
|
||||
.PHONY: testinstall
|
||||
testinstall: static-testinstall shared-testinstall
|
||||
@echo
|
||||
@echo Install tests passed.
|
||||
@echo
|
||||
|
||||
.PHONY: static-testinstall
|
||||
static-testinstall:
|
||||
ifeq ($(shell uname),Darwin)
|
||||
@echo Skipping test for libre2.a on Darwin.
|
||||
else ifeq ($(shell uname),SunOS)
|
||||
@echo Skipping test for libre2.a on SunOS.
|
||||
else
|
||||
@mkdir -p obj
|
||||
@cp testinstall.cc obj/static-testinstall.cc
|
||||
(cd obj && export PKG_CONFIG_PATH=$(DESTDIR)$(libdir)/pkgconfig; \
|
||||
$(CXX) static-testinstall.cc -o static-testinstall $(CXXFLAGS) $(LDFLAGS) \
|
||||
$$($(PKG_CONFIG) re2 --cflags) \
|
||||
$$($(PKG_CONFIG) re2 --libs | sed -e 's/-Wl / /g' | sed -e 's/-lre2/-l:libre2.a/'))
|
||||
obj/static-testinstall
|
||||
endif
|
||||
|
||||
.PHONY: shared-testinstall
|
||||
shared-testinstall:
|
||||
@mkdir -p obj
|
||||
@cp testinstall.cc obj/shared-testinstall.cc
|
||||
(cd obj && export PKG_CONFIG_PATH=$(DESTDIR)$(libdir)/pkgconfig; \
|
||||
$(CXX) shared-testinstall.cc -o shared-testinstall $(CXXFLAGS) $(LDFLAGS) \
|
||||
$$($(PKG_CONFIG) re2 --cflags) \
|
||||
$$($(PKG_CONFIG) re2 --libs | sed -e 's/-Wl / /g'))
|
||||
ifeq ($(shell uname),Darwin)
|
||||
DYLD_LIBRARY_PATH="$(DESTDIR)$(libdir):$(DYLD_LIBRARY_PATH)" obj/shared-testinstall
|
||||
else
|
||||
LD_LIBRARY_PATH="$(DESTDIR)$(libdir):$(LD_LIBRARY_PATH)" obj/shared-testinstall
|
||||
endif
|
||||
|
||||
.PHONY: benchlog
|
||||
benchlog: obj/test/regexp_benchmark
|
||||
(echo '==BENCHMARK==' `hostname` `date`; \
|
||||
(uname -a; $(CXX) --version; git rev-parse --short HEAD; file obj/test/regexp_benchmark) | sed 's/^/# /'; \
|
||||
echo; \
|
||||
./obj/test/regexp_benchmark 'PCRE|RE2') | tee -a benchlog.$$(hostname | sed 's/\..*//')
|
||||
|
||||
.PHONY: log
|
||||
log:
|
||||
$(MAKE) clean
|
||||
$(MAKE) CXXFLAGS="$(CXXFLAGS) -DLOGGING=1" \
|
||||
$(filter obj/test/exhaustive%_test,$(BIGTESTS))
|
||||
echo '#' RE2 exhaustive tests built by make log >re2-exhaustive.txt
|
||||
echo '#' $$(date) >>re2-exhaustive.txt
|
||||
obj/test/exhaustive_test |grep -v '^PASS$$' >>re2-exhaustive.txt
|
||||
obj/test/exhaustive1_test |grep -v '^PASS$$' >>re2-exhaustive.txt
|
||||
obj/test/exhaustive2_test |grep -v '^PASS$$' >>re2-exhaustive.txt
|
||||
obj/test/exhaustive3_test |grep -v '^PASS$$' >>re2-exhaustive.txt
|
||||
|
||||
$(MAKE) CXXFLAGS="$(CXXFLAGS) -DLOGGING=1" obj/test/search_test
|
||||
echo '#' RE2 basic search tests built by make $@ >re2-search.txt
|
||||
echo '#' $$(date) >>re2-search.txt
|
||||
obj/test/search_test |grep -v '^PASS$$' >>re2-search.txt
|
||||
47
src/third_party/re2/dist/README
vendored
47
src/third_party/re2/dist/README
vendored
@ -1,47 +0,0 @@
|
||||
This is the source code repository for RE2, a regular expression library.
|
||||
|
||||
For documentation about how to install and use RE2,
|
||||
visit https://github.com/google/re2/.
|
||||
|
||||
The short version is:
|
||||
|
||||
make
|
||||
make test
|
||||
make install
|
||||
make testinstall
|
||||
|
||||
Building RE2 requires Abseil (https://github.com/abseil/abseil-cpp)
|
||||
to be installed on your system. Building the testing for RE2 requires
|
||||
GoogleTest (https://github.com/google/googletest) and Benchmark
|
||||
(https://github.com/google/benchmark) to be installed as well.
|
||||
|
||||
There is a fair amount of documentation (including code snippets) in
|
||||
the re2.h header file.
|
||||
|
||||
More information can be found on the wiki:
|
||||
https://github.com/google/re2/wiki
|
||||
|
||||
Issue tracker:
|
||||
https://github.com/google/re2/issues
|
||||
|
||||
Mailing list:
|
||||
https://groups.google.com/group/re2-dev
|
||||
|
||||
Unless otherwise noted, the RE2 source files are distributed
|
||||
under the BSD-style license found in the LICENSE file.
|
||||
|
||||
RE2's native language is C++.
|
||||
|
||||
The Python wrapper is at https://github.com/google/re2/tree/abseil/python
|
||||
and on PyPI (https://pypi.org/project/google-re2/).
|
||||
|
||||
A C wrapper is at https://github.com/marcomaggi/cre2/.
|
||||
A D wrapper is at https://github.com/ShigekiKarita/re2d/ and on DUB (code.dlang.org).
|
||||
An Erlang wrapper is at https://github.com/dukesoferl/re2/ and on Hex (hex.pm).
|
||||
An Inferno wrapper is at https://github.com/powerman/inferno-re2/.
|
||||
A Node.js wrapper is at https://github.com/uhop/node-re2/ and on NPM (npmjs.com).
|
||||
An OCaml wrapper is at https://github.com/janestreet/re2/ and on OPAM (opam.ocaml.org).
|
||||
A Perl wrapper is at https://github.com/dgl/re-engine-RE2/ and on CPAN (cpan.org).
|
||||
An R wrapper is at https://github.com/girishji/re2/ and on CRAN (cran.r-project.org).
|
||||
A Ruby wrapper is at https://github.com/mudge/re2/ and on RubyGems (rubygems.org).
|
||||
A WebAssembly wrapper is at https://github.com/google/re2-wasm/ and on NPM (npmjs.com).
|
||||
259
src/third_party/re2/dist/README.md
vendored
Normal file
259
src/third_party/re2/dist/README.md
vendored
Normal file
@ -0,0 +1,259 @@
|
||||
# RE2, a regular expression library
|
||||
|
||||
RE2 is an efficient, principled regular expression library
|
||||
that has been used in production at Google and many other places
|
||||
since 2006.
|
||||
|
||||
_**Safety is RE2's primary goal.**_
|
||||
|
||||
RE2 was designed and implemented with an explicit goal of being able
|
||||
to handle regular expressions from untrusted users without risk.
|
||||
One of its primary guarantees is that the match time is linear in the
|
||||
length of the input string. It was also written with production concerns in mind:
|
||||
the parser, the compiler and the execution engines limit their memory usage
|
||||
by working within a configurable budget—failing gracefully when exhausted—and
|
||||
they avoid stack overflow by eschewing recursion.
|
||||
|
||||
It is not a goal to be faster than all other engines under all circumstances.
|
||||
Although RE2 guarantees a running time that is asymptotically linear in
|
||||
the length of the input, more complex expressions may incur larger constant factors;
|
||||
longer expressions increase the overhead required to handle those expressions safely.
|
||||
In a sense, RE2 is pessimistic where a backtracking engine is optimistic:
|
||||
A backtracking engine tests each alternative sequentially, making it fast when the first alternative is common.
|
||||
By contrast RE2 evaluates all alternatives in parallel, avoiding the performance penalty for the last alternative,
|
||||
at the cost of some overhead. This pessimism is what makes RE2 secure.
|
||||
|
||||
It is also not a goal to implement all of the features offered by Perl, PCRE and other engines.
|
||||
As a matter of principle, RE2 does not support constructs for which only backtracking solutions are known to exist.
|
||||
Thus, backreferences and look-around assertions are not supported.
|
||||
|
||||
For more information, please refer to Russ Cox's articles on regular expression theory and practice:
|
||||
|
||||
* [Regular Expression Matching Can Be Simple And Fast](https://swtch.com/~rsc/regexp/regexp1.html)
|
||||
* [Regular Expression Matching: the Virtual Machine Approach](https://swtch.com/~rsc/regexp/regexp2.html)
|
||||
* [Regular Expression Matching in the Wild](https://swtch.com/~rsc/regexp/regexp3.html)
|
||||
|
||||
### Syntax
|
||||
|
||||
In POSIX mode, RE2 accepts standard POSIX (egrep) syntax regular expressions.
|
||||
In Perl mode, RE2 accepts most Perl operators. The only excluded ones are
|
||||
those that require backtracking (and its potential for exponential runtime)
|
||||
to implement. These include backreferences (submatching is still okay)
|
||||
and generalized assertions.
|
||||
The [Syntax wiki page](https://github.com/google/re2/wiki/Syntax)
|
||||
documents the supported Perl-mode syntax in detail.
|
||||
The default is Perl mode.
|
||||
|
||||
### C++ API
|
||||
|
||||
RE2's native language is C++, although there are [ports and wrappers](#ports-and-wrappers) listed below.
|
||||
|
||||
#### Matching Interface
|
||||
|
||||
There are two basic operators:
|
||||
`RE2::FullMatch` requires the regexp to match the entire input text, and
|
||||
`RE2::PartialMatch` looks for a match for a substring of the input text,
|
||||
returning the leftmost-longest match in POSIX mode and the
|
||||
same match that Perl would have chosen in Perl mode.
|
||||
|
||||
Examples:
|
||||
|
||||
```cpp
|
||||
assert(RE2::FullMatch("hello", "h.*o"))
|
||||
assert(!RE2::FullMatch("hello", "e"))
|
||||
|
||||
assert(RE2::PartialMatch("hello", "h.*o"))
|
||||
assert(RE2::PartialMatch("hello", "e"))
|
||||
```
|
||||
|
||||
#### Submatch Extraction
|
||||
|
||||
Both matching functions take additional arguments in which submatches will be stored.
|
||||
The argument can be a `string*`, or an integer type, or the type `absl::string_view*`.
|
||||
(The `absl::string_view` type is very similar to the `std::string_view` type,
|
||||
but for historical reasons, RE2 uses the former.)
|
||||
A `string_view` is a pointer to the original input text, along with a count.
|
||||
It behaves like a string but doesn't carry its own storage.
|
||||
Like when using a pointer, when using a `string_view`
|
||||
you must be careful not to use it once the original text has been deleted or gone out of scope.
|
||||
|
||||
Examples:
|
||||
|
||||
```cpp
|
||||
// Successful parsing.
|
||||
int i;
|
||||
string s;
|
||||
assert(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
|
||||
assert(s == "ruby");
|
||||
assert(i == 1234);
|
||||
|
||||
// Fails: "ruby" cannot be parsed as an integer.
|
||||
assert(!RE2::FullMatch("ruby", "(.+)", &i));
|
||||
|
||||
// Success; does not extract the number.
|
||||
assert(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
|
||||
|
||||
// Success; skips NULL argument.
|
||||
assert(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", (void*)NULL, &i));
|
||||
|
||||
// Fails: integer overflow keeps value from being stored in i.
|
||||
assert(!RE2::FullMatch("ruby:123456789123", "(\\w+):(\\d+)", &s, &i));
|
||||
```
|
||||
|
||||
#### Pre-Compiled Regular Expressions
|
||||
|
||||
The examples above all recompile the regular expression on each call.
|
||||
Instead, you can compile it once to an RE2 object and reuse that object for each call.
|
||||
|
||||
Example:
|
||||
```cpp
|
||||
RE2 re("(\\w+):(\\d+)");
|
||||
assert(re.ok()); // compiled; if not, see re.error();
|
||||
|
||||
assert(RE2::FullMatch("ruby:1234", re, &s, &i));
|
||||
assert(RE2::FullMatch("ruby:1234", re, &s));
|
||||
assert(RE2::FullMatch("ruby:1234", re, (void*)NULL, &i));
|
||||
assert(!RE2::FullMatch("ruby:123456789123", re, &s, &i));
|
||||
```
|
||||
|
||||
#### Options
|
||||
|
||||
The constructor takes an optional second argument that can
|
||||
be used to change RE2's default options.
|
||||
For example, `RE2::Quiet` silences the error messages that are
|
||||
usually printed when a regular expression fails to parse:
|
||||
|
||||
```cpp
|
||||
RE2 re("(ab", RE2::Quiet); // don't write to stderr for parser failure
|
||||
assert(!re.ok()); // can check re.error() for details
|
||||
```
|
||||
|
||||
Other useful predefined options are `Latin1` (disable UTF-8) and `POSIX`
|
||||
(use POSIX syntax and leftmost longest matching).
|
||||
|
||||
You can also declare your own `RE2::Options` object and then configure it as you like.
|
||||
See the [header](https://github.com/google/re2/blob/main/re2/re2.h) for the full set of options.
|
||||
|
||||
#### Unicode Normalization
|
||||
|
||||
RE2 operates on Unicode code points: it makes no attempt at normalization.
|
||||
For example, the regular expression /ü/ (U+00FC, u with diaeresis)
|
||||
does not match the input "ü" (U+0075 U+0308, u followed by combining diaeresis).
|
||||
Normalization is a long, involved topic.
|
||||
The simplest solution, if you need such matches, is to normalize both the regular expressions
|
||||
and the input in a preprocessing step before using RE2.
|
||||
For more details on the general topic, see <https://www.unicode.org/reports/tr15/>.
|
||||
|
||||
#### Additional Tips and Tricks
|
||||
|
||||
For advanced usage, like constructing your own argument lists,
|
||||
or using RE2 as a lexer, or parsing hex, octal, and C-radix numbers,
|
||||
see [re2.h](https://github.com/google/re2/blob/main/re2/re2.h).
|
||||
|
||||
### Installation
|
||||
|
||||
RE2 can be built and installed using GNU make, CMake, or Bazel.
|
||||
The simplest installation instructions are:
|
||||
|
||||
make
|
||||
make test
|
||||
make benchmark
|
||||
make install
|
||||
make testinstall
|
||||
|
||||
Building RE2 requires a C++17 compiler and the [Abseil](https://github.com/abseil/abseil-cpp) library.
|
||||
Building the tests and benchmarks requires
|
||||
[GoogleTest](https://github.com/google/googletest)
|
||||
and [Benchmark](https://github.com/google/benchmark).
|
||||
To obtain those:
|
||||
|
||||
- Linux: `apt install libabsl-dev libgtest-dev libbenchmark-dev`
|
||||
- macOS: `brew install abseil googletest google-benchmark pkg-config-wrapper`
|
||||
- Windows: `vcpkg install abseil gtest benchmark` \
|
||||
or `vcpkg add port abseil gtest benchmark`
|
||||
|
||||
Once those are installed, the build has to be able to find them.
|
||||
If the standard Makefile has trouble, then switching to CMake can help:
|
||||
|
||||
rm -rf build
|
||||
cmake -DRE2_TEST=ON -DRE2_BENCHMARK=ON -S . -B build
|
||||
cd build
|
||||
make
|
||||
make test
|
||||
make install
|
||||
|
||||
When using CMake, with benchmarks enabled, `make test` builds and runs test binaries
|
||||
and builds a `regexp_benchmark` binary but does not run it.
|
||||
If you don't need the tests or benchmarks at all, you can omit the corresponding `-D` arguments,
|
||||
and then you don't need the GoogleTest or Benchmark dependencies either.
|
||||
|
||||
Another useful option is `-DRE2_USE_ICU=ON`, which adds a dependency on the
|
||||
ICU Unicode library but also extends the list of property names available in the `\p` and `\P` patterns.
|
||||
|
||||
CMake can also be used to generate Visual Studio and Xcode projects, as well as
|
||||
Cygwin, MinGW, and MSYS makefiles.
|
||||
|
||||
- Visual Studio users: You need Visual Studio 2019 or later.
|
||||
- Cygwin users: You must run CMake from the Cygwin command line, not the Windows command line.
|
||||
|
||||
If you are adding RE2 to your own CMake project,
|
||||
CMake has two ways to use a dependency: `add_subdirectory()`,
|
||||
which is when the dependency's **_sources_** are in a subdirectory of your project;
|
||||
and `find_package()`, which is when the dependency's
|
||||
**_binaries_** have been built and installed somewhere on your system.
|
||||
The Abseil documentation walks through the former [here](https://abseil.io/docs/cpp/quickstart-cmake)
|
||||
versus the latter [here](https://abseil.io/docs/cpp/tools/cmake-installs).
|
||||
Once you get Abseil working, getting RE2 working will be a very similar process and,
|
||||
either way, `target_link_libraries(… re2::re2)` should Just Work™.
|
||||
|
||||
If you are using [Bazel](https://bazel.io), it will handle the dependencies for you,
|
||||
although you still need to download Bazel,
|
||||
which you can do with [Bazelisk](https://github.com/bazelbuild/bazelisk).
|
||||
|
||||
go install github.com/bazelbuild/bazelisk@latest
|
||||
# or on mac: brew install bazelisk
|
||||
|
||||
bazelisk build :all
|
||||
bazelisk test :all
|
||||
|
||||
If you are using RE2 from another project, you need to make sure you are
|
||||
using at least C++17.
|
||||
See the RE2 [.bazelrc](https://github.com/google/re2/blob/main/.bazelrc) file for an example.
|
||||
|
||||
### Ports and Wrappers
|
||||
|
||||
RE2 is implemented in C++.
|
||||
|
||||
The official Python wrapper is [in the `python` directory](https://github.com/google/re2/tree/main/python)
|
||||
and [published on PyPI as `google-re2`](https://pypi.org/project/google-re2/).
|
||||
Note that there is also a PyPI `re2` but it is not by the RE2 authors and is unmaintained. Use `google-re2`.
|
||||
|
||||
There are also other unofficial wrappers:
|
||||
|
||||
- A C wrapper is at <https://github.com/marcomaggi/cre2/>.
|
||||
- A D wrapper is at <https://github.com/ShigekiKarita/re2d/> and [on DUB](https://code.dlang.org/packages/re2d).
|
||||
- An Erlang wrapper is at <https://github.com/dukesoferl/re2/> and [on Hex](https://hex.pm/packages/re2).
|
||||
- An Inferno wrapper is at <https://github.com/powerman/inferno-re2/>.
|
||||
- A Node.js wrapper is at <https://github.com/uhop/node-re2/> and [on NPM](https://www.npmjs.com/package/re2).
|
||||
- An OCaml wrapper is at <https://github.com/janestreet/re2/> and [on OPAM](https://opam.ocaml.org/packages/re2/).
|
||||
- A Perl wrapper is at <https://github.com/dgl/re-engine-RE2/> and [on CPAN](https://metacpan.org/pod/re::engine::RE2).
|
||||
- An R wrapper is at <https://github.com/girishji/re2/> and [on CRAN](https://cran.r-project.org/web/packages/re2/index.html).
|
||||
- A Ruby wrapper is at <https://github.com/mudge/re2/> and on RubyGems (rubygems.org).
|
||||
- A WebAssembly wrapper is at <https://github.com/google/re2-wasm/> and on NPM (npmjs.com).
|
||||
|
||||
[RE2J](https://github.com/google/re2j) is a port of the RE2 C++ code to pure Java,
|
||||
and [RE2JS](https://github.com/le0pard/re2js) is a port of RE2J to JavaScript.
|
||||
|
||||
The [Go `regexp` package](https://go.dev/pkg/regexp)
|
||||
and [Rust `regex` crate](https://docs.rs/regex)
|
||||
do not share code with RE2, but they follow the same principles,
|
||||
accept the same syntax, and provide the same efficiency guarantees.
|
||||
|
||||
### Contact
|
||||
|
||||
The [issue tracker](https://github.com/google/re2/issues) is the best place for discussions.
|
||||
|
||||
There is a [mailing list](https://groups.google.com/group/re2-dev) for keeping up with code changes.
|
||||
|
||||
Please read the [contribution guide](https://github.com/google/re2/wiki/Contribute) before sending changes.
|
||||
In particular, note that RE2 does not use GitHub pull requests.
|
||||
7
src/third_party/re2/dist/WORKSPACE.bazel
vendored
7
src/third_party/re2/dist/WORKSPACE.bazel
vendored
@ -1,7 +0,0 @@
|
||||
# Copyright 2009 The RE2 Authors. All Rights Reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
# Bazel (http://bazel.build/) WORKSPACE file for RE2.
|
||||
|
||||
workspace(name = "com_googlesource_code_re2")
|
||||
7
src/third_party/re2/dist/WORKSPACE.bzlmod
vendored
7
src/third_party/re2/dist/WORKSPACE.bzlmod
vendored
@ -1,7 +0,0 @@
|
||||
# Copyright 2009 The RE2 Authors. All Rights Reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
# Bazel (http://bazel.build/) WORKSPACE file for RE2.
|
||||
|
||||
workspace(name = "com_googlesource_code_re2")
|
||||
16
src/third_party/re2/dist/libre2.symbols
vendored
16
src/third_party/re2/dist/libre2.symbols
vendored
@ -1,16 +0,0 @@
|
||||
{
|
||||
global:
|
||||
# re2::RE2*
|
||||
_ZN3re23RE2*;
|
||||
_ZNK3re23RE2*;
|
||||
# re2::operator<<*
|
||||
_ZN3re2ls*;
|
||||
# re2::FilteredRE2*
|
||||
_ZN3re211FilteredRE2*;
|
||||
_ZNK3re211FilteredRE2*;
|
||||
# re2::re2_internal*
|
||||
_ZN3re212re2_internal*;
|
||||
_ZNK3re212re2_internal*;
|
||||
local:
|
||||
*;
|
||||
};
|
||||
12
src/third_party/re2/dist/libre2.symbols.darwin
vendored
12
src/third_party/re2/dist/libre2.symbols.darwin
vendored
@ -1,12 +0,0 @@
|
||||
# Linker doesn't like these unmangled:
|
||||
# re2::RE2*
|
||||
__ZN3re23RE2*
|
||||
__ZNK3re23RE2*
|
||||
# re2::operator<<*
|
||||
__ZN3re2ls*
|
||||
# re2::FilteredRE2*
|
||||
__ZN3re211FilteredRE2*
|
||||
__ZNK3re211FilteredRE2*
|
||||
# re2::re2_internal*
|
||||
__ZN3re212re2_internal*
|
||||
__ZNK3re212re2_internal*
|
||||
9
src/third_party/re2/dist/re2.pc.in
vendored
9
src/third_party/re2/dist/re2.pc.in
vendored
@ -1,9 +0,0 @@
|
||||
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
|
||||
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
|
||||
|
||||
Name: re2
|
||||
Description: RE2 is a fast, safe, thread-friendly regular expression engine.
|
||||
Requires: @REQUIRES@
|
||||
Version: @SONAME@.0.0
|
||||
Cflags: -pthread -I${includedir}
|
||||
Libs: -pthread -L${libdir} -lre2
|
||||
13
src/third_party/re2/dist/re2/bitmap256.cc
vendored
13
src/third_party/re2/dist/re2/bitmap256.cc
vendored
@ -6,14 +6,13 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "util/logging.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
int Bitmap256::FindNextSetBit(int c) const {
|
||||
DCHECK_GE(c, 0);
|
||||
DCHECK_LE(c, 255);
|
||||
ABSL_DCHECK_GE(c, 0);
|
||||
ABSL_DCHECK_LE(c, 255);
|
||||
|
||||
// Check the word that contains the bit. Mask out any lower bits.
|
||||
int i = c / 64;
|
||||
@ -27,15 +26,15 @@ int Bitmap256::FindNextSetBit(int c) const {
|
||||
case 1:
|
||||
if (words_[1] != 0)
|
||||
return (1 * 64) + FindLSBSet(words_[1]);
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
[[fallthrough]];
|
||||
case 2:
|
||||
if (words_[2] != 0)
|
||||
return (2 * 64) + FindLSBSet(words_[2]);
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
[[fallthrough]];
|
||||
case 3:
|
||||
if (words_[3] != 0)
|
||||
return (3 * 64) + FindLSBSet(words_[3]);
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
[[fallthrough]];
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
|
||||
20
src/third_party/re2/dist/re2/bitmap256.h
vendored
20
src/third_party/re2/dist/re2/bitmap256.h
vendored
@ -5,13 +5,15 @@
|
||||
#ifndef RE2_BITMAP256_H_
|
||||
#define RE2_BITMAP256_H_
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "util/logging.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -28,16 +30,16 @@ class Bitmap256 {
|
||||
|
||||
// Tests the bit with index c.
|
||||
bool Test(int c) const {
|
||||
DCHECK_GE(c, 0);
|
||||
DCHECK_LE(c, 255);
|
||||
ABSL_DCHECK_GE(c, 0);
|
||||
ABSL_DCHECK_LE(c, 255);
|
||||
|
||||
return (words_[c / 64] & (uint64_t{1} << (c % 64))) != 0;
|
||||
}
|
||||
|
||||
// Sets the bit with index c.
|
||||
void Set(int c) {
|
||||
DCHECK_GE(c, 0);
|
||||
DCHECK_LE(c, 255);
|
||||
ABSL_DCHECK_GE(c, 0);
|
||||
ABSL_DCHECK_LE(c, 255);
|
||||
|
||||
words_[c / 64] |= (uint64_t{1} << (c % 64));
|
||||
}
|
||||
@ -49,7 +51,7 @@ class Bitmap256 {
|
||||
private:
|
||||
// Finds the least significant non-zero bit in n.
|
||||
static int FindLSBSet(uint64_t n) {
|
||||
DCHECK_NE(n, 0);
|
||||
ABSL_DCHECK_NE(n, uint64_t{0});
|
||||
#if defined(__GNUC__)
|
||||
return __builtin_ctzll(n);
|
||||
#elif defined(_MSC_VER) && defined(_M_X64)
|
||||
|
||||
15
src/third_party/re2/dist/re2/bitstate.cc
vendored
15
src/third_party/re2/dist/re2/bitstate.cc
vendored
@ -20,10 +20,13 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
|
||||
#include "util/logging.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/regexp.h"
|
||||
@ -107,9 +110,9 @@ void BitState::Push(int id, const char* p) {
|
||||
if (njob_ >= job_.size()) {
|
||||
GrowStack();
|
||||
if (njob_ >= job_.size()) {
|
||||
LOG(DFATAL) << "GrowStack() failed: "
|
||||
<< "njob_ = " << njob_ << ", "
|
||||
<< "job_.size() = " << job_.size();
|
||||
ABSL_LOG(DFATAL) << "GrowStack() failed: "
|
||||
<< "njob_ = " << njob_ << ", "
|
||||
<< "job_.size() = " << job_.size();
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -167,7 +170,7 @@ bool BitState::TrySearch(int id0, const char* p0) {
|
||||
Prog::Inst* ip = prog_->inst(id);
|
||||
switch (ip->opcode()) {
|
||||
default:
|
||||
LOG(DFATAL) << "Unexpected opcode: " << ip->opcode();
|
||||
ABSL_LOG(DFATAL) << "Unexpected opcode: " << ip->opcode();
|
||||
return false;
|
||||
|
||||
case kInstFail:
|
||||
@ -233,7 +236,7 @@ bool BitState::TrySearch(int id0, const char* p0) {
|
||||
CheckAndLoop:
|
||||
// Sanity check: id is the head of its list, which must
|
||||
// be the case if id-1 is the last of *its* list. :)
|
||||
DCHECK(id == 0 || prog_->inst(id-1)->last());
|
||||
ABSL_DCHECK(id == 0 || prog_->inst(id-1)->last());
|
||||
if (ShouldVisit(id, p))
|
||||
goto Loop;
|
||||
break;
|
||||
|
||||
25
src/third_party/re2/dist/re2/compile.cc
vendored
25
src/third_party/re2/dist/re2/compile.cc
vendored
@ -10,17 +10,20 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/container/flat_hash_map.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/utf.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "re2/walker-inl.h"
|
||||
#include "util/utf.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -522,8 +525,8 @@ void Compiler::AddSuffix(int id) {
|
||||
}
|
||||
|
||||
int Compiler::AddSuffixRecursive(int root, int id) {
|
||||
DCHECK(inst_[root].opcode() == kInstAlt ||
|
||||
inst_[root].opcode() == kInstByteRange);
|
||||
ABSL_DCHECK(inst_[root].opcode() == kInstAlt ||
|
||||
inst_[root].opcode() == kInstByteRange);
|
||||
|
||||
Frag f = FindByteRange(root, id);
|
||||
if (IsNoMatch(f)) {
|
||||
@ -565,7 +568,7 @@ int Compiler::AddSuffixRecursive(int root, int id) {
|
||||
if (!IsCachedRuneByteSuffix(id)) {
|
||||
// The head should be the instruction most recently allocated, so free it
|
||||
// instead of leaving it unreachable.
|
||||
DCHECK_EQ(id, ninst_-1);
|
||||
ABSL_DCHECK_EQ(id, ninst_-1);
|
||||
inst_[id].out_opcode_ = 0;
|
||||
inst_[id].out1_ = 0;
|
||||
ninst_--;
|
||||
@ -613,7 +616,7 @@ Frag Compiler::FindByteRange(int root, int id) {
|
||||
return NoMatch();
|
||||
}
|
||||
|
||||
LOG(DFATAL) << "should never happen";
|
||||
ABSL_LOG(DFATAL) << "should never happen";
|
||||
return NoMatch();
|
||||
}
|
||||
|
||||
@ -738,7 +741,7 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
|
||||
int n = runetochar(reinterpret_cast<char*>(ulo), &lo);
|
||||
int m = runetochar(reinterpret_cast<char*>(uhi), &hi);
|
||||
(void)m; // USED(m)
|
||||
DCHECK_EQ(n, m);
|
||||
ABSL_DCHECK_EQ(n, m);
|
||||
|
||||
// The logic below encodes this thinking:
|
||||
//
|
||||
@ -791,7 +794,7 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
|
||||
Frag Compiler::Copy(Frag arg) {
|
||||
// We're using WalkExponential; there should be no copying.
|
||||
failed_ = true;
|
||||
LOG(DFATAL) << "Compiler::Copy called!";
|
||||
ABSL_LOG(DFATAL) << "Compiler::Copy called!";
|
||||
return NoMatch();
|
||||
}
|
||||
|
||||
@ -918,7 +921,7 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
|
||||
if (cc->empty()) {
|
||||
// This can't happen.
|
||||
failed_ = true;
|
||||
LOG(DFATAL) << "No ranges in char class";
|
||||
ABSL_LOG(DFATAL) << "No ranges in char class";
|
||||
return NoMatch();
|
||||
}
|
||||
|
||||
@ -976,7 +979,7 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
|
||||
return EmptyWidth(kEmptyNonWordBoundary);
|
||||
}
|
||||
failed_ = true;
|
||||
LOG(DFATAL) << "Missing case in Compiler: " << re->op();
|
||||
ABSL_LOG(DFATAL) << "Missing case in Compiler: " << re->op();
|
||||
return NoMatch();
|
||||
}
|
||||
|
||||
|
||||
55
src/third_party/re2/dist/re2/dfa.cc
vendored
55
src/third_party/re2/dist/re2/dfa.cc
vendored
@ -25,28 +25,31 @@
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <deque>
|
||||
#include <new>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/base/call_once.h"
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/base/thread_annotations.h"
|
||||
#include "absl/container/flat_hash_map.h"
|
||||
#include "absl/container/flat_hash_set.h"
|
||||
#include "absl/hash/hash.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "absl/synchronization/mutex.h"
|
||||
#include "absl/types/span.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/strutil.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/sparse_set.h"
|
||||
#include "util/strutil.h"
|
||||
|
||||
// Silence "zero-sized array in struct/union" warning for DFA::State::next_.
|
||||
#ifdef _MSC_VER
|
||||
@ -149,15 +152,15 @@ class DFA {
|
||||
|
||||
struct StateHash {
|
||||
size_t operator()(const State* a) const {
|
||||
DCHECK(a != NULL);
|
||||
ABSL_DCHECK(a != NULL);
|
||||
return absl::Hash<State>()(*a);
|
||||
}
|
||||
};
|
||||
|
||||
struct StateEqual {
|
||||
bool operator()(const State* a, const State* b) const {
|
||||
DCHECK(a != NULL);
|
||||
DCHECK(b != NULL);
|
||||
ABSL_DCHECK(a != NULL);
|
||||
ABSL_DCHECK(b != NULL);
|
||||
return *a == *b;
|
||||
}
|
||||
};
|
||||
@ -646,7 +649,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
|
||||
absl::FPrintF(stderr, " -> FullMatchState\n");
|
||||
return FullMatchState;
|
||||
}
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
[[fallthrough]];
|
||||
default:
|
||||
// Record iff id is the head of its list, which must
|
||||
// be the case if id-1 is the last of *its* list. :)
|
||||
@ -659,7 +662,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
DCHECK_LE(n, q->size());
|
||||
ABSL_DCHECK_LE(n, q->size());
|
||||
if (n > 0 && inst[n-1] == Mark)
|
||||
n--;
|
||||
|
||||
@ -847,7 +850,7 @@ void DFA::AddToQueue(Workq* q, int id, uint32_t flag) {
|
||||
|
||||
stk[nstk++] = id;
|
||||
while (nstk > 0) {
|
||||
DCHECK_LE(nstk, stack_.size());
|
||||
ABSL_DCHECK_LE(nstk, stack_.size());
|
||||
id = stk[--nstk];
|
||||
|
||||
Loop:
|
||||
@ -872,7 +875,7 @@ void DFA::AddToQueue(Workq* q, int id, uint32_t flag) {
|
||||
Prog::Inst* ip = prog_->inst(id);
|
||||
switch (ip->opcode()) {
|
||||
default:
|
||||
LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
|
||||
ABSL_LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
|
||||
break;
|
||||
|
||||
case kInstByteRange: // just save these on the queue
|
||||
@ -898,7 +901,7 @@ void DFA::AddToQueue(Workq* q, int id, uint32_t flag) {
|
||||
goto Loop;
|
||||
|
||||
case kInstAltMatch:
|
||||
DCHECK(!ip->last());
|
||||
ABSL_DCHECK(!ip->last());
|
||||
id = id+1;
|
||||
goto Loop;
|
||||
|
||||
@ -961,7 +964,7 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
|
||||
Prog::Inst* ip = prog_->inst(id);
|
||||
switch (ip->opcode()) {
|
||||
default:
|
||||
LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
|
||||
ABSL_LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
|
||||
break;
|
||||
|
||||
case kInstFail: // never succeeds
|
||||
@ -1029,14 +1032,14 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
|
||||
return FullMatchState;
|
||||
}
|
||||
if (state == DeadState) {
|
||||
LOG(DFATAL) << "DeadState in RunStateOnByte";
|
||||
ABSL_LOG(DFATAL) << "DeadState in RunStateOnByte";
|
||||
return NULL;
|
||||
}
|
||||
if (state == NULL) {
|
||||
LOG(DFATAL) << "NULL state in RunStateOnByte";
|
||||
ABSL_LOG(DFATAL) << "NULL state in RunStateOnByte";
|
||||
return NULL;
|
||||
}
|
||||
LOG(DFATAL) << "Unexpected special state in RunStateOnByte";
|
||||
ABSL_LOG(DFATAL) << "Unexpected special state in RunStateOnByte";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -1267,7 +1270,7 @@ DFA::State* DFA::StateSaver::Restore() {
|
||||
absl::MutexLock l(&dfa_->mutex_);
|
||||
State* s = dfa_->CachedState(inst_, ninst_, flag_);
|
||||
if (s == NULL)
|
||||
LOG(DFATAL) << "StateSaver failed to restore state.";
|
||||
ABSL_LOG(DFATAL) << "StateSaver failed to restore state.";
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -1367,7 +1370,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
|
||||
lastmatch = p;
|
||||
if (ExtraDebug)
|
||||
absl::FPrintF(stderr, "match @stx! [%s]\n", DumpState(s));
|
||||
if (params->matches != NULL && kind_ == Prog::kManyMatch) {
|
||||
if (params->matches != NULL) {
|
||||
for (int i = s->ninst_ - 1; i >= 0; i--) {
|
||||
int id = s->inst_[i];
|
||||
if (id == MatchSep)
|
||||
@ -1451,13 +1454,13 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
|
||||
// Restore start and s so we can continue.
|
||||
if ((start = save_start.Restore()) == NULL ||
|
||||
(s = save_s.Restore()) == NULL) {
|
||||
// Restore already did LOG(DFATAL).
|
||||
// Restore already did ABSL_LOG(DFATAL).
|
||||
params->failed = true;
|
||||
return false;
|
||||
}
|
||||
ns = RunStateOnByteUnlocked(s, c);
|
||||
if (ns == NULL) {
|
||||
LOG(DFATAL) << "RunStateOnByteUnlocked failed after ResetCache";
|
||||
ABSL_LOG(DFATAL) << "RunStateOnByteUnlocked failed after ResetCache";
|
||||
params->failed = true;
|
||||
return false;
|
||||
}
|
||||
@ -1484,7 +1487,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
|
||||
lastmatch = p + 1;
|
||||
if (ExtraDebug)
|
||||
absl::FPrintF(stderr, "match @%d! [%s]\n", lastmatch - bp, DumpState(s));
|
||||
if (params->matches != NULL && kind_ == Prog::kManyMatch) {
|
||||
if (params->matches != NULL) {
|
||||
for (int i = s->ninst_ - 1; i >= 0; i--) {
|
||||
int id = s->inst_[i];
|
||||
if (id == MatchSep)
|
||||
@ -1529,7 +1532,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
|
||||
}
|
||||
ns = RunStateOnByteUnlocked(s, lastbyte);
|
||||
if (ns == NULL) {
|
||||
LOG(DFATAL) << "RunStateOnByteUnlocked failed after Reset";
|
||||
ABSL_LOG(DFATAL) << "RunStateOnByteUnlocked failed after Reset";
|
||||
params->failed = true;
|
||||
return false;
|
||||
}
|
||||
@ -1551,7 +1554,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
|
||||
lastmatch = p;
|
||||
if (ExtraDebug)
|
||||
absl::FPrintF(stderr, "match @etx! [%s]\n", DumpState(s));
|
||||
if (params->matches != NULL && kind_ == Prog::kManyMatch) {
|
||||
if (params->matches != NULL) {
|
||||
for (int i = s->ninst_ - 1; i >= 0; i--) {
|
||||
int id = s->inst_[i];
|
||||
if (id == MatchSep)
|
||||
@ -1646,7 +1649,7 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
|
||||
|
||||
// Sanity check: make sure that text lies within context.
|
||||
if (BeginPtr(text) < BeginPtr(context) || EndPtr(text) > EndPtr(context)) {
|
||||
LOG(DFATAL) << "context does not contain text";
|
||||
ABSL_LOG(DFATAL) << "context does not contain text";
|
||||
params->start = DeadState;
|
||||
return true;
|
||||
}
|
||||
@ -1694,7 +1697,7 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
|
||||
ResetCache(params->cache_lock);
|
||||
if (!AnalyzeSearchHelper(params, info, flags)) {
|
||||
params->failed = true;
|
||||
LOG(DFATAL) << "Failed to analyze start state.";
|
||||
ABSL_LOG(DFATAL) << "Failed to analyze start state.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -1767,6 +1770,8 @@ bool DFA::Search(absl::string_view text, absl::string_view context,
|
||||
params.anchored = anchored;
|
||||
params.want_earliest_match = want_earliest_match;
|
||||
params.run_forward = run_forward;
|
||||
// matches should be null except when using RE2::Set.
|
||||
ABSL_DCHECK(matches == NULL || kind_ == Prog::kManyMatch);
|
||||
params.matches = matches;
|
||||
|
||||
if (!AnalyzeSearch(¶ms)) {
|
||||
|
||||
16
src/third_party/re2/dist/re2/filtered_re2.cc
vendored
16
src/third_party/re2/dist/re2/filtered_re2.cc
vendored
@ -5,10 +5,13 @@
|
||||
#include "re2/filtered_re2.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "util/logging.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "re2/prefilter.h"
|
||||
#include "re2/prefilter_tree.h"
|
||||
|
||||
@ -52,8 +55,8 @@ RE2::ErrorCode FilteredRE2::Add(absl::string_view pattern,
|
||||
|
||||
if (!re->ok()) {
|
||||
if (options.log_errors()) {
|
||||
LOG(ERROR) << "Couldn't compile regular expression, skipping: "
|
||||
<< pattern << " due to error " << re->error();
|
||||
ABSL_LOG(ERROR) << "Couldn't compile regular expression, skipping: "
|
||||
<< pattern << " due to error " << re->error();
|
||||
}
|
||||
delete re;
|
||||
} else {
|
||||
@ -66,12 +69,13 @@ RE2::ErrorCode FilteredRE2::Add(absl::string_view pattern,
|
||||
|
||||
void FilteredRE2::Compile(std::vector<std::string>* atoms) {
|
||||
if (compiled_) {
|
||||
LOG(ERROR) << "Compile called already.";
|
||||
ABSL_LOG(ERROR) << "Compile called already.";
|
||||
return;
|
||||
}
|
||||
|
||||
// Similarly to PrefilterTree::Compile(), make compiling
|
||||
// a no-op if it's attempted before adding any patterns.
|
||||
if (re2_vec_.empty()) {
|
||||
LOG(ERROR) << "Compile called before Add.";
|
||||
return;
|
||||
}
|
||||
|
||||
@ -94,7 +98,7 @@ int FilteredRE2::SlowFirstMatch(absl::string_view text) const {
|
||||
int FilteredRE2::FirstMatch(absl::string_view text,
|
||||
const std::vector<int>& atoms) const {
|
||||
if (!compiled_) {
|
||||
LOG(DFATAL) << "FirstMatch called before Compile.";
|
||||
ABSL_LOG(DFATAL) << "FirstMatch called before Compile.";
|
||||
return -1;
|
||||
}
|
||||
std::vector<int> regexps;
|
||||
|
||||
@ -5,10 +5,12 @@
|
||||
#include <fuzzer/FuzzedDataProvider.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "re2/filtered_re2.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/regexp.h"
|
||||
|
||||
6
src/third_party/re2/dist/re2/mimics_pcre.cc
vendored
6
src/third_party/re2/dist/re2/mimics_pcre.cc
vendored
@ -22,7 +22,7 @@
|
||||
//
|
||||
// Regexp::MimicsPCRE checks for any of these conditions.
|
||||
|
||||
#include "util/logging.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "re2/walker-inl.h"
|
||||
|
||||
@ -44,7 +44,7 @@ class PCREWalker : public Regexp::Walker<bool> {
|
||||
virtual bool ShortVisit(Regexp* re, bool a) {
|
||||
// Should never be called: we use Walk(), not WalkExponential().
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
LOG(DFATAL) << "PCREWalker::ShortVisit called";
|
||||
ABSL_LOG(DFATAL) << "PCREWalker::ShortVisit called";
|
||||
#endif
|
||||
return a;
|
||||
}
|
||||
@ -128,7 +128,7 @@ class EmptyStringWalker : public Regexp::Walker<bool> {
|
||||
virtual bool ShortVisit(Regexp* re, bool a) {
|
||||
// Should never be called: we use Walk(), not WalkExponential().
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
|
||||
ABSL_LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
|
||||
#endif
|
||||
return a;
|
||||
}
|
||||
|
||||
38
src/third_party/re2/dist/re2/nfa.cc
vendored
38
src/third_party/re2/dist/re2/nfa.cc
vendored
@ -26,14 +26,16 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <deque>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "util/logging.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/regexp.h"
|
||||
@ -172,17 +174,17 @@ NFA::Thread* NFA::AllocThread() {
|
||||
}
|
||||
|
||||
NFA::Thread* NFA::Incref(Thread* t) {
|
||||
DCHECK(t != NULL);
|
||||
ABSL_DCHECK(t != NULL);
|
||||
t->ref++;
|
||||
return t;
|
||||
}
|
||||
|
||||
void NFA::Decref(Thread* t) {
|
||||
DCHECK(t != NULL);
|
||||
ABSL_DCHECK(t != NULL);
|
||||
t->ref--;
|
||||
if (t->ref > 0)
|
||||
return;
|
||||
DCHECK_EQ(t->ref, 0);
|
||||
ABSL_DCHECK_EQ(t->ref, 0);
|
||||
t->next = freelist_;
|
||||
freelist_ = t;
|
||||
}
|
||||
@ -208,7 +210,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, absl::string_view context,
|
||||
|
||||
stk[nstk++] = {id0, NULL};
|
||||
while (nstk > 0) {
|
||||
DCHECK_LE(nstk, stack_.size());
|
||||
ABSL_DCHECK_LE(nstk, stack_.size());
|
||||
AddState a = stk[--nstk];
|
||||
|
||||
Loop:
|
||||
@ -238,7 +240,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, absl::string_view context,
|
||||
Prog::Inst* ip = prog_->inst(id);
|
||||
switch (ip->opcode()) {
|
||||
default:
|
||||
LOG(DFATAL) << "unhandled " << ip->opcode() << " in AddToThreadq";
|
||||
ABSL_LOG(DFATAL) << "unhandled " << ip->opcode() << " in AddToThreadq";
|
||||
break;
|
||||
|
||||
case kInstFail:
|
||||
@ -249,7 +251,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, absl::string_view context,
|
||||
t = Incref(t0);
|
||||
*tp = t;
|
||||
|
||||
DCHECK(!ip->last());
|
||||
ABSL_DCHECK(!ip->last());
|
||||
a = {id+1, NULL};
|
||||
goto Loop;
|
||||
|
||||
@ -350,7 +352,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, absl::string_view context,
|
||||
switch (ip->opcode()) {
|
||||
default:
|
||||
// Should only see the values handled below.
|
||||
LOG(DFATAL) << "Unhandled " << ip->opcode() << " in step";
|
||||
ABSL_LOG(DFATAL) << "Unhandled " << ip->opcode() << " in step";
|
||||
break;
|
||||
|
||||
case kInstByteRange:
|
||||
@ -455,7 +457,7 @@ bool NFA::Search(absl::string_view text, absl::string_view context,
|
||||
|
||||
// Sanity check: make sure that text lies within context.
|
||||
if (BeginPtr(text) < BeginPtr(context) || EndPtr(text) > EndPtr(context)) {
|
||||
LOG(DFATAL) << "context does not contain text";
|
||||
ABSL_LOG(DFATAL) << "context does not contain text";
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -470,7 +472,7 @@ bool NFA::Search(absl::string_view text, absl::string_view context,
|
||||
}
|
||||
|
||||
if (nsubmatch < 0) {
|
||||
LOG(DFATAL) << "Bad args: nsubmatch=" << nsubmatch;
|
||||
ABSL_LOG(DFATAL) << "Bad args: nsubmatch=" << nsubmatch;
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -527,7 +529,7 @@ bool NFA::Search(absl::string_view text, absl::string_view context,
|
||||
|
||||
// This is a no-op the first time around the loop because runq is empty.
|
||||
int id = Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p);
|
||||
DCHECK_EQ(runq->size(), 0);
|
||||
ABSL_DCHECK_EQ(runq->size(), 0);
|
||||
using std::swap;
|
||||
swap(nextq, runq);
|
||||
nextq->clear();
|
||||
@ -538,7 +540,8 @@ bool NFA::Search(absl::string_view text, absl::string_view context,
|
||||
Prog::Inst* ip = prog_->inst(id);
|
||||
switch (ip->opcode()) {
|
||||
default:
|
||||
LOG(DFATAL) << "Unexpected opcode in short circuit: " << ip->opcode();
|
||||
ABSL_LOG(DFATAL) << "Unexpected opcode in short circuit: "
|
||||
<< ip->opcode();
|
||||
break;
|
||||
|
||||
case kInstCapture:
|
||||
@ -599,7 +602,7 @@ bool NFA::Search(absl::string_view text, absl::string_view context,
|
||||
// This complements the special case in NFA::Step().
|
||||
if (p == NULL) {
|
||||
(void) Step(runq, nextq, -1, context, p);
|
||||
DCHECK_EQ(runq->size(), 0);
|
||||
ABSL_DCHECK_EQ(runq->size(), 0);
|
||||
using std::swap;
|
||||
swap(nextq, runq);
|
||||
nextq->clear();
|
||||
@ -655,7 +658,7 @@ bool Prog::SearchNFA(absl::string_view text, absl::string_view context,
|
||||
// fanout holds the results and is also the work queue for the outer iteration.
|
||||
// reachable holds the reached nodes for the inner iteration.
|
||||
void Prog::Fanout(SparseArray<int>* fanout) {
|
||||
DCHECK_EQ(fanout->max_size(), size());
|
||||
ABSL_DCHECK_EQ(fanout->max_size(), size());
|
||||
SparseSet reachable(size());
|
||||
fanout->clear();
|
||||
fanout->set_new(start(), 0);
|
||||
@ -668,7 +671,8 @@ void Prog::Fanout(SparseArray<int>* fanout) {
|
||||
Prog::Inst* ip = inst(id);
|
||||
switch (ip->opcode()) {
|
||||
default:
|
||||
LOG(DFATAL) << "unhandled " << ip->opcode() << " in Prog::Fanout()";
|
||||
ABSL_LOG(DFATAL) << "unhandled " << ip->opcode()
|
||||
<< " in Prog::Fanout()";
|
||||
break;
|
||||
|
||||
case kInstByteRange:
|
||||
@ -682,7 +686,7 @@ void Prog::Fanout(SparseArray<int>* fanout) {
|
||||
break;
|
||||
|
||||
case kInstAltMatch:
|
||||
DCHECK(!ip->last());
|
||||
ABSL_DCHECK(!ip->last());
|
||||
reachable.insert(id+1);
|
||||
break;
|
||||
|
||||
|
||||
32
src/third_party/re2/dist/re2/onepass.cc
vendored
32
src/third_party/re2/dist/re2/onepass.cc
vendored
@ -52,19 +52,21 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/container/fixed_array.h"
|
||||
#include "absl/container/inlined_vector.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/utf.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/sparse_set.h"
|
||||
#include "util/utf.h"
|
||||
|
||||
// Silence "zero-sized array in struct/union" warning for OneState::action.
|
||||
#ifdef _MSC_VER
|
||||
@ -215,7 +217,7 @@ bool Prog::SearchOnePass(absl::string_view text, absl::string_view context,
|
||||
Anchor anchor, MatchKind kind,
|
||||
absl::string_view* match, int nmatch) {
|
||||
if (anchor != kAnchored && kind != kFullMatch) {
|
||||
LOG(DFATAL) << "Cannot use SearchOnePass for unanchored matches.";
|
||||
ABSL_LOG(DFATAL) << "Cannot use SearchOnePass for unanchored matches.";
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -442,13 +444,13 @@ bool Prog::IsOnePass() {
|
||||
Prog::Inst* ip = inst(id);
|
||||
switch (ip->opcode()) {
|
||||
default:
|
||||
LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
|
||||
ABSL_LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
|
||||
break;
|
||||
|
||||
case kInstAltMatch:
|
||||
// TODO(rsc): Ignoring kInstAltMatch optimization.
|
||||
// Should implement it in this engine, but it's subtle.
|
||||
DCHECK(!ip->last());
|
||||
ABSL_DCHECK(!ip->last());
|
||||
// If already on work queue, (1) is violated: bail out.
|
||||
if (!AddQ(&workq, id+1))
|
||||
goto fail;
|
||||
@ -460,7 +462,7 @@ bool Prog::IsOnePass() {
|
||||
if (nextindex == -1) {
|
||||
if (nalloc >= maxnodes) {
|
||||
if (ExtraDebug)
|
||||
LOG(ERROR) << absl::StrFormat(
|
||||
ABSL_LOG(ERROR) << absl::StrFormat(
|
||||
"Not OnePass: hit node limit %d >= %d", nalloc, maxnodes);
|
||||
goto fail;
|
||||
}
|
||||
@ -485,7 +487,7 @@ bool Prog::IsOnePass() {
|
||||
node->action[b] = newact;
|
||||
} else if (act != newact) {
|
||||
if (ExtraDebug)
|
||||
LOG(ERROR) << absl::StrFormat(
|
||||
ABSL_LOG(ERROR) << absl::StrFormat(
|
||||
"Not OnePass: conflict on byte %#x at state %d", c, *it);
|
||||
goto fail;
|
||||
}
|
||||
@ -506,7 +508,7 @@ bool Prog::IsOnePass() {
|
||||
node->action[b] = newact;
|
||||
} else if (act != newact) {
|
||||
if (ExtraDebug)
|
||||
LOG(ERROR) << absl::StrFormat(
|
||||
ABSL_LOG(ERROR) << absl::StrFormat(
|
||||
"Not OnePass: conflict on byte %#x at state %d", c, *it);
|
||||
goto fail;
|
||||
}
|
||||
@ -547,7 +549,7 @@ bool Prog::IsOnePass() {
|
||||
// If already on work queue, (1) is violated: bail out.
|
||||
if (!AddQ(&workq, ip->out())) {
|
||||
if (ExtraDebug)
|
||||
LOG(ERROR) << absl::StrFormat(
|
||||
ABSL_LOG(ERROR) << absl::StrFormat(
|
||||
"Not OnePass: multiple paths %d -> %d", *it, ip->out());
|
||||
goto fail;
|
||||
}
|
||||
@ -558,7 +560,7 @@ bool Prog::IsOnePass() {
|
||||
if (matched) {
|
||||
// (3) is violated
|
||||
if (ExtraDebug)
|
||||
LOG(ERROR) << absl::StrFormat(
|
||||
ABSL_LOG(ERROR) << absl::StrFormat(
|
||||
"Not OnePass: multiple matches from %d", *it);
|
||||
goto fail;
|
||||
}
|
||||
@ -579,9 +581,9 @@ bool Prog::IsOnePass() {
|
||||
}
|
||||
}
|
||||
|
||||
if (ExtraDebug) { // For debugging, dump one-pass NFA to LOG(ERROR).
|
||||
LOG(ERROR) << "bytemap:\n" << DumpByteMap();
|
||||
LOG(ERROR) << "prog:\n" << Dump();
|
||||
if (ExtraDebug) { // For debugging, dump one-pass NFA to ABSL_LOG(ERROR).
|
||||
ABSL_LOG(ERROR) << "bytemap:\n" << DumpByteMap();
|
||||
ABSL_LOG(ERROR) << "prog:\n" << Dump();
|
||||
|
||||
std::map<int, int> idmap;
|
||||
for (int i = 0; i < size; i++)
|
||||
@ -606,7 +608,7 @@ bool Prog::IsOnePass() {
|
||||
idmap[node->action[i] >> kIndexShift]);
|
||||
}
|
||||
}
|
||||
LOG(ERROR) << "nodes:\n" << dump;
|
||||
ABSL_LOG(ERROR) << "nodes:\n" << dump;
|
||||
}
|
||||
|
||||
dfa_mem_ -= nalloc*statesize;
|
||||
|
||||
118
src/third_party/re2/dist/re2/parse.cc
vendored
118
src/third_party/re2/dist/re2/parse.cc
vendored
@ -16,24 +16,24 @@
|
||||
// and recognizes the Perl escape sequences \d, \s, \w, \D, \S, and \W.
|
||||
// See regexp.h for rationale.
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/ascii.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/utf.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "re2/unicode_casefold.h"
|
||||
#include "re2/unicode_groups.h"
|
||||
#include "re2/walker-inl.h"
|
||||
#include "util/utf.h"
|
||||
|
||||
#if defined(RE2_USE_ICU)
|
||||
#include "unicode/uniset.h"
|
||||
@ -303,7 +303,7 @@ Rune ApplyFold(const CaseFold* f, Rune r) {
|
||||
case EvenOddSkip: // even <-> odd but only applies to every other
|
||||
if ((r - f->lo) % 2)
|
||||
return r;
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
[[fallthrough]];
|
||||
case EvenOdd: // even <-> odd
|
||||
if (r%2 == 0)
|
||||
return r + 1;
|
||||
@ -312,7 +312,7 @@ Rune ApplyFold(const CaseFold* f, Rune r) {
|
||||
case OddEvenSkip: // odd <-> even but only applies to every other
|
||||
if ((r - f->lo) % 2)
|
||||
return r;
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
[[fallthrough]];
|
||||
case OddEven: // odd <-> even
|
||||
if (r%2 == 1)
|
||||
return r + 1;
|
||||
@ -337,6 +337,20 @@ Rune CycleFoldRune(Rune r) {
|
||||
return ApplyFold(f, r);
|
||||
}
|
||||
|
||||
// Add lo-hi to the class, along with their fold-equivalent characters.
|
||||
static void AddFoldedRangeLatin1(CharClassBuilder* cc, Rune lo, Rune hi) {
|
||||
while (lo <= hi) {
|
||||
cc->AddRange(lo, lo);
|
||||
if ('A' <= lo && lo <= 'Z') {
|
||||
cc->AddRange(lo - 'A' + 'a', lo - 'A' + 'a');
|
||||
}
|
||||
if ('a' <= lo && lo <= 'z') {
|
||||
cc->AddRange(lo - 'a' + 'A', lo - 'a' + 'A');
|
||||
}
|
||||
lo++;
|
||||
}
|
||||
}
|
||||
|
||||
// Add lo-hi to the class, along with their fold-equivalent characters.
|
||||
// If lo-hi is already in the class, assume that the fold-equivalent
|
||||
// chars are there too, so there's no work to do.
|
||||
@ -346,7 +360,7 @@ static void AddFoldedRange(CharClassBuilder* cc, Rune lo, Rune hi, int depth) {
|
||||
// current Unicode tables. make_unicode_casefold.py checks that
|
||||
// the cycles are not too long, and we double-check here using depth.
|
||||
if (depth > 10) {
|
||||
LOG(DFATAL) << "AddFoldedRange recurses too much.";
|
||||
ABSL_LOG(DFATAL) << "AddFoldedRange recurses too much.";
|
||||
return;
|
||||
}
|
||||
|
||||
@ -394,17 +408,26 @@ static void AddFoldedRange(CharClassBuilder* cc, Rune lo, Rune hi, int depth) {
|
||||
// Pushes the literal rune r onto the stack.
|
||||
bool Regexp::ParseState::PushLiteral(Rune r) {
|
||||
// Do case folding if needed.
|
||||
if ((flags_ & FoldCase) && CycleFoldRune(r) != r) {
|
||||
Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
|
||||
re->ccb_ = new CharClassBuilder;
|
||||
Rune r1 = r;
|
||||
do {
|
||||
if (!(flags_ & NeverNL) || r != '\n') {
|
||||
re->ccb_->AddRange(r, r);
|
||||
}
|
||||
r = CycleFoldRune(r);
|
||||
} while (r != r1);
|
||||
return PushRegexp(re);
|
||||
if (flags_ & FoldCase) {
|
||||
if (flags_ & Latin1 && (('A' <= r && r <= 'Z') ||
|
||||
('a' <= r && r <= 'z'))) {
|
||||
Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
|
||||
re->ccb_ = new CharClassBuilder;
|
||||
AddFoldedRangeLatin1(re->ccb_, r, r);
|
||||
return PushRegexp(re);
|
||||
}
|
||||
if (!(flags_ & Latin1) && CycleFoldRune(r) != r) {
|
||||
Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
|
||||
re->ccb_ = new CharClassBuilder;
|
||||
Rune r1 = r;
|
||||
do {
|
||||
if (!(flags_ & NeverNL) || r != '\n') {
|
||||
re->ccb_->AddRange(r, r);
|
||||
}
|
||||
r = CycleFoldRune(r);
|
||||
} while (r != r1);
|
||||
return PushRegexp(re);
|
||||
}
|
||||
}
|
||||
|
||||
// Exclude newline if applicable.
|
||||
@ -556,7 +579,7 @@ int RepetitionWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
|
||||
int RepetitionWalker::ShortVisit(Regexp* re, int parent_arg) {
|
||||
// Should never be called: we use Walk(), not WalkExponential().
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
LOG(DFATAL) << "RepetitionWalker::ShortVisit called";
|
||||
ABSL_LOG(DFATAL) << "RepetitionWalker::ShortVisit called";
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
@ -776,7 +799,8 @@ Rune* Regexp::LeadingString(Regexp* re, int* nrune,
|
||||
while (re->op() == kRegexpConcat && re->nsub() > 0)
|
||||
re = re->sub()[0];
|
||||
|
||||
*flags = static_cast<Regexp::ParseFlags>(re->parse_flags_ & Regexp::FoldCase);
|
||||
*flags = static_cast<Regexp::ParseFlags>(re->parse_flags_ &
|
||||
(Regexp::FoldCase | Regexp::Latin1));
|
||||
|
||||
if (re->op() == kRegexpLiteral) {
|
||||
*nrune = 1;
|
||||
@ -843,7 +867,7 @@ void Regexp::RemoveLeadingString(Regexp* re, int n) {
|
||||
case 0:
|
||||
case 1:
|
||||
// Impossible.
|
||||
LOG(DFATAL) << "Concat of " << re->nsub();
|
||||
ABSL_LOG(DFATAL) << "Concat of " << re->nsub();
|
||||
re->submany_ = NULL;
|
||||
re->op_ = kRegexpEmptyMatch;
|
||||
break;
|
||||
@ -973,7 +997,7 @@ int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) {
|
||||
i += iter->nsub;
|
||||
break;
|
||||
default:
|
||||
LOG(DFATAL) << "unknown round: " << round;
|
||||
ABSL_LOG(DFATAL) << "unknown round: " << round;
|
||||
break;
|
||||
}
|
||||
// If we are done, copy until the end of sub.
|
||||
@ -1012,7 +1036,7 @@ int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) {
|
||||
continue;
|
||||
}
|
||||
default:
|
||||
LOG(DFATAL) << "unknown round: " << round;
|
||||
ABSL_LOG(DFATAL) << "unknown round: " << round;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1175,16 +1199,26 @@ void FactorAlternationImpl::Round3(Regexp** sub, int nsub,
|
||||
if (re->op() == kRegexpCharClass) {
|
||||
CharClass* cc = re->cc();
|
||||
for (CharClass::iterator it = cc->begin(); it != cc->end(); ++it)
|
||||
ccb.AddRange(it->lo, it->hi);
|
||||
ccb.AddRangeFlags(it->lo, it->hi, re->parse_flags());
|
||||
} else if (re->op() == kRegexpLiteral) {
|
||||
ccb.AddRangeFlags(re->rune(), re->rune(), re->parse_flags());
|
||||
if (re->parse_flags() & Regexp::FoldCase) {
|
||||
// AddFoldedRange() can terminate prematurely if the character class
|
||||
// already contains the rune. For example, if it contains 'a' and we
|
||||
// want to add folded 'a', it sees 'a' and stops without adding 'A'.
|
||||
// To avoid that, we use an empty character class and then merge it.
|
||||
CharClassBuilder tmp;
|
||||
tmp.AddRangeFlags(re->rune(), re->rune(), re->parse_flags());
|
||||
ccb.AddCharClass(&tmp);
|
||||
} else {
|
||||
ccb.AddRangeFlags(re->rune(), re->rune(), re->parse_flags());
|
||||
}
|
||||
} else {
|
||||
LOG(DFATAL) << "RE2: unexpected op: " << re->op() << " "
|
||||
<< re->ToString();
|
||||
ABSL_LOG(DFATAL) << "RE2: unexpected op: " << re->op() << " "
|
||||
<< re->ToString();
|
||||
}
|
||||
re->Decref();
|
||||
}
|
||||
Regexp* re = Regexp::NewCharClass(ccb.GetCharClass(), flags);
|
||||
Regexp* re = Regexp::NewCharClass(ccb.GetCharClass(), flags & ~Regexp::FoldCase);
|
||||
splices->emplace_back(re, sub + start, i - start);
|
||||
}
|
||||
|
||||
@ -1441,7 +1475,7 @@ static int UnHex(int c) {
|
||||
return c - 'A' + 10;
|
||||
if ('a' <= c && c <= 'f')
|
||||
return c - 'a' + 10;
|
||||
LOG(DFATAL) << "Bad hex digit " << c;
|
||||
ABSL_LOG(DFATAL) << "Bad hex digit " << c;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1490,7 +1524,7 @@ static bool ParseEscape(absl::string_view* s, Rune* rp,
|
||||
// Single non-zero octal digit is a backreference; not supported.
|
||||
if (s->empty() || (*s)[0] < '0' || (*s)[0] > '7')
|
||||
goto BadEscape;
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
[[fallthrough]];
|
||||
case '0':
|
||||
// consume up to three octal digits; already have one.
|
||||
code = c - '0';
|
||||
@ -1612,10 +1646,15 @@ void CharClassBuilder::AddRangeFlags(
|
||||
}
|
||||
|
||||
// If folding case, add fold-equivalent characters too.
|
||||
if (parse_flags & Regexp::FoldCase)
|
||||
AddFoldedRange(this, lo, hi, 0);
|
||||
else
|
||||
if (parse_flags & Regexp::FoldCase) {
|
||||
if (parse_flags & Regexp::Latin1) {
|
||||
AddFoldedRangeLatin1(this, lo, hi);
|
||||
} else {
|
||||
AddFoldedRange(this, lo, hi, 0);
|
||||
}
|
||||
} else {
|
||||
AddRange(lo, hi);
|
||||
}
|
||||
}
|
||||
|
||||
// Look for a group with the given name.
|
||||
@ -2056,7 +2095,18 @@ bool Regexp::ParseState::ParsePerlFlags(absl::string_view* s) {
|
||||
// Caller is supposed to check this.
|
||||
if (!(flags_ & PerlX) || t.size() < 2 || t[0] != '(' || t[1] != '?') {
|
||||
status_->set_code(kRegexpInternalError);
|
||||
LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags";
|
||||
ABSL_LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check for look-around assertions. This is NOT because we support them! ;)
|
||||
// As per https://github.com/google/re2/issues/468, we really want to report
|
||||
// kRegexpBadPerlOp (not kRegexpBadNamedCapture) for look-behind assertions.
|
||||
// Additionally, it would be nice to report not "(?<", but "(?<=" or "(?<!".
|
||||
if ((t.size() > 3 && (t[2] == '=' || t[2] == '!')) ||
|
||||
(t.size() > 4 && t[2] == '<' && (t[3] == '=' || t[3] == '!'))) {
|
||||
status_->set_code(kRegexpBadPerlOp);
|
||||
status_->set_error_arg(absl::string_view(t.data(), t[2] == '<' ? 4 : 3));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
28
src/third_party/re2/dist/re2/prefilter.cc
vendored
28
src/third_party/re2/dist/re2/prefilter.cc
vendored
@ -5,17 +5,19 @@
|
||||
#include "re2/prefilter.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/utf.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "re2/unicode_casefold.h"
|
||||
#include "re2/walker-inl.h"
|
||||
#include "util/utf.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -300,8 +302,8 @@ void Prefilter::CrossProduct(const SSet& a, const SSet& b, SSet* dst) {
|
||||
Prefilter::Info* Prefilter::Info::Concat(Info* a, Info* b) {
|
||||
if (a == NULL)
|
||||
return b;
|
||||
DCHECK(a->is_exact_);
|
||||
DCHECK(b && b->is_exact_);
|
||||
ABSL_DCHECK(a->is_exact_);
|
||||
ABSL_DCHECK(b && b->is_exact_);
|
||||
Info *ab = new Info();
|
||||
|
||||
CrossProduct(a->exact_, b->exact_, &ab->exact_);
|
||||
@ -450,9 +452,9 @@ typedef CharClass::iterator CCIter;
|
||||
Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,
|
||||
bool latin1) {
|
||||
if (ExtraDebug) {
|
||||
LOG(ERROR) << "CharClassInfo:";
|
||||
ABSL_LOG(ERROR) << "CharClassInfo:";
|
||||
for (CCIter i = cc->begin(); i != cc->end(); ++i)
|
||||
LOG(ERROR) << " " << i->lo << "-" << i->hi;
|
||||
ABSL_LOG(ERROR) << " " << i->lo << "-" << i->hi;
|
||||
}
|
||||
|
||||
// If the class is too large, it's okay to overestimate.
|
||||
@ -473,7 +475,7 @@ Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,
|
||||
a->is_exact_ = true;
|
||||
|
||||
if (ExtraDebug)
|
||||
LOG(ERROR) << " = " << a->ToString();
|
||||
ABSL_LOG(ERROR) << " = " << a->ToString();
|
||||
|
||||
return a;
|
||||
}
|
||||
@ -501,7 +503,7 @@ class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> {
|
||||
|
||||
Prefilter::Info* Prefilter::BuildInfo(Regexp* re) {
|
||||
if (ExtraDebug)
|
||||
LOG(ERROR) << "BuildPrefilter::Info: " << re->ToString();
|
||||
ABSL_LOG(ERROR) << "BuildPrefilter::Info: " << re->ToString();
|
||||
|
||||
bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0;
|
||||
Prefilter::Info::Walker w(latin1);
|
||||
@ -531,7 +533,7 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
|
||||
default:
|
||||
case kRegexpRepeat:
|
||||
info = EmptyString();
|
||||
LOG(DFATAL) << "Bad regexp op " << re->op();
|
||||
ABSL_LOG(DFATAL) << "Bad regexp op " << re->op();
|
||||
break;
|
||||
|
||||
case kRegexpNoMatch:
|
||||
@ -634,8 +636,8 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
|
||||
}
|
||||
|
||||
if (ExtraDebug)
|
||||
LOG(ERROR) << "BuildInfo " << re->ToString()
|
||||
<< ": " << (info ? info->ToString() : "");
|
||||
ABSL_LOG(ERROR) << "BuildInfo " << re->ToString()
|
||||
<< ": " << (info ? info->ToString() : "");
|
||||
|
||||
return info;
|
||||
}
|
||||
@ -662,7 +664,7 @@ Prefilter* Prefilter::FromRegexp(Regexp* re) {
|
||||
std::string Prefilter::DebugString() const {
|
||||
switch (op_) {
|
||||
default:
|
||||
LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_;
|
||||
ABSL_LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_;
|
||||
return absl::StrFormat("op%d", op_);
|
||||
case NONE:
|
||||
return "*no-matches*";
|
||||
|
||||
5
src/third_party/re2/dist/re2/prefilter.h
vendored
5
src/third_party/re2/dist/re2/prefilter.h
vendored
@ -13,7 +13,8 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "util/logging.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -42,7 +43,7 @@ class Prefilter {
|
||||
|
||||
// The children of the Prefilter node.
|
||||
std::vector<Prefilter*>* subs() {
|
||||
DCHECK(op_ == AND || op_ == OR);
|
||||
ABSL_DCHECK(op_ == AND || op_ == OR);
|
||||
return subs_;
|
||||
}
|
||||
|
||||
|
||||
42
src/third_party/re2/dist/re2/prefilter_tree.cc
vendored
42
src/third_party/re2/dist/re2/prefilter_tree.cc
vendored
@ -5,17 +5,17 @@
|
||||
#include "re2/prefilter_tree.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/prefilter.h"
|
||||
#include "re2/re2.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -38,7 +38,7 @@ PrefilterTree::~PrefilterTree() {
|
||||
|
||||
void PrefilterTree::Add(Prefilter* prefilter) {
|
||||
if (compiled_) {
|
||||
LOG(DFATAL) << "Add called after Compile.";
|
||||
ABSL_LOG(DFATAL) << "Add called after Compile.";
|
||||
return;
|
||||
}
|
||||
if (prefilter != NULL && !KeepNode(prefilter)) {
|
||||
@ -51,14 +51,15 @@ void PrefilterTree::Add(Prefilter* prefilter) {
|
||||
|
||||
void PrefilterTree::Compile(std::vector<std::string>* atom_vec) {
|
||||
if (compiled_) {
|
||||
LOG(DFATAL) << "Compile called already.";
|
||||
ABSL_LOG(DFATAL) << "Compile called already.";
|
||||
return;
|
||||
}
|
||||
|
||||
// Some legacy users of PrefilterTree call Compile() before
|
||||
// adding any regexps and expect Compile() to have no effect.
|
||||
if (prefilter_vec_.empty())
|
||||
if (prefilter_vec_.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
compiled_ = true;
|
||||
|
||||
@ -82,7 +83,7 @@ bool PrefilterTree::KeepNode(Prefilter* node) const {
|
||||
|
||||
switch (node->op()) {
|
||||
default:
|
||||
LOG(DFATAL) << "Unexpected op in KeepNode: " << node->op();
|
||||
ABSL_LOG(DFATAL) << "Unexpected op in KeepNode: " << node->op();
|
||||
return false;
|
||||
|
||||
case Prefilter::ALL:
|
||||
@ -177,7 +178,7 @@ void PrefilterTree::AssignUniqueIds(NodeSet* nodes,
|
||||
int id = prefilter->unique_id();
|
||||
switch (prefilter->op()) {
|
||||
default:
|
||||
LOG(DFATAL) << "Unexpected op: " << prefilter->op();
|
||||
ABSL_LOG(DFATAL) << "Unexpected op: " << prefilter->op();
|
||||
return;
|
||||
|
||||
case Prefilter::ATOM:
|
||||
@ -211,7 +212,7 @@ void PrefilterTree::AssignUniqueIds(NodeSet* nodes,
|
||||
if (prefilter_vec_[i] == NULL)
|
||||
continue;
|
||||
int id = CanonicalNode(nodes, prefilter_vec_[i])->unique_id();
|
||||
DCHECK_LE(0, id);
|
||||
ABSL_DCHECK_LE(0, id);
|
||||
Entry* entry = &entries_[id];
|
||||
entry->regexps.push_back(static_cast<int>(i));
|
||||
}
|
||||
@ -272,10 +273,11 @@ void PrefilterTree::RegexpsGivenStrings(
|
||||
// Some legacy users of PrefilterTree call Compile() before
|
||||
// adding any regexps and expect Compile() to have no effect.
|
||||
// This kludge is a counterpart to that kludge.
|
||||
if (prefilter_vec_.empty())
|
||||
if (prefilter_vec_.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
LOG(ERROR) << "RegexpsGivenStrings called before Compile.";
|
||||
ABSL_LOG(ERROR) << "RegexpsGivenStrings called before Compile.";
|
||||
for (size_t i = 0; i < prefilter_vec_.size(); i++)
|
||||
regexps->push_back(static_cast<int>(i));
|
||||
} else {
|
||||
@ -329,31 +331,31 @@ void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids,
|
||||
|
||||
// Debugging help.
|
||||
void PrefilterTree::PrintPrefilter(int regexpid) {
|
||||
LOG(ERROR) << DebugNodeString(prefilter_vec_[regexpid]);
|
||||
ABSL_LOG(ERROR) << DebugNodeString(prefilter_vec_[regexpid]);
|
||||
}
|
||||
|
||||
void PrefilterTree::PrintDebugInfo(NodeSet* nodes) {
|
||||
LOG(ERROR) << "#Unique Atoms: " << atom_index_to_id_.size();
|
||||
LOG(ERROR) << "#Unique Nodes: " << entries_.size();
|
||||
ABSL_LOG(ERROR) << "#Unique Atoms: " << atom_index_to_id_.size();
|
||||
ABSL_LOG(ERROR) << "#Unique Nodes: " << entries_.size();
|
||||
|
||||
for (size_t i = 0; i < entries_.size(); i++) {
|
||||
const std::vector<int>& parents = entries_[i].parents;
|
||||
const std::vector<int>& regexps = entries_[i].regexps;
|
||||
LOG(ERROR) << "EntryId: " << i
|
||||
<< " N: " << parents.size() << " R: " << regexps.size();
|
||||
ABSL_LOG(ERROR) << "EntryId: " << i
|
||||
<< " N: " << parents.size() << " R: " << regexps.size();
|
||||
for (int parent : parents)
|
||||
LOG(ERROR) << parent;
|
||||
ABSL_LOG(ERROR) << parent;
|
||||
}
|
||||
LOG(ERROR) << "Set:";
|
||||
ABSL_LOG(ERROR) << "Set:";
|
||||
for (NodeSet::const_iterator iter = nodes->begin();
|
||||
iter != nodes->end(); ++iter)
|
||||
LOG(ERROR) << "NodeId: " << (*iter)->unique_id();
|
||||
ABSL_LOG(ERROR) << "NodeId: " << (*iter)->unique_id();
|
||||
}
|
||||
|
||||
std::string PrefilterTree::DebugNodeString(Prefilter* node) const {
|
||||
std::string node_string = "";
|
||||
if (node->op() == Prefilter::ATOM) {
|
||||
DCHECK(!node->atom().empty());
|
||||
ABSL_DCHECK(!node->atom().empty());
|
||||
node_string += node->atom();
|
||||
} else {
|
||||
// Adding the operation disambiguates AND and OR nodes.
|
||||
|
||||
@ -20,9 +20,10 @@
|
||||
#include <vector>
|
||||
|
||||
#include "absl/container/flat_hash_set.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "re2/prefilter.h"
|
||||
#include "re2/sparse_array.h"
|
||||
#include "util/logging.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -62,15 +63,15 @@ class PrefilterTree {
|
||||
|
||||
struct PrefilterHash {
|
||||
size_t operator()(const Prefilter* a) const {
|
||||
DCHECK(a != NULL);
|
||||
ABSL_DCHECK(a != NULL);
|
||||
return absl::Hash<Prefilter>()(*a);
|
||||
}
|
||||
};
|
||||
|
||||
struct PrefilterEqual {
|
||||
bool operator()(const Prefilter* a, const Prefilter* b) const {
|
||||
DCHECK(a != NULL);
|
||||
DCHECK(b != NULL);
|
||||
ABSL_DCHECK(a != NULL);
|
||||
ABSL_DCHECK(b != NULL);
|
||||
return *a == *b;
|
||||
}
|
||||
};
|
||||
|
||||
73
src/third_party/re2/dist/re2/prog.cc
vendored
73
src/third_party/re2/dist/re2/prog.cc
vendored
@ -7,35 +7,42 @@
|
||||
|
||||
#include "re2/prog.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "re2/bitmap256.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/sparse_array.h"
|
||||
#include "re2/sparse_set.h"
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#include <immintrin.h>
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#endif
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/bitmap256.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
// Constructors per Inst opcode
|
||||
|
||||
void Prog::Inst::InitAlt(uint32_t out, uint32_t out1) {
|
||||
DCHECK_EQ(out_opcode_, 0);
|
||||
ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
|
||||
set_out_opcode(out, kInstAlt);
|
||||
out1_ = out1;
|
||||
}
|
||||
|
||||
void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) {
|
||||
DCHECK_EQ(out_opcode_, 0);
|
||||
ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
|
||||
set_out_opcode(out, kInstByteRange);
|
||||
lo_ = lo & 0xFF;
|
||||
hi_ = hi & 0xFF;
|
||||
@ -43,30 +50,30 @@ void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) {
|
||||
}
|
||||
|
||||
void Prog::Inst::InitCapture(int cap, uint32_t out) {
|
||||
DCHECK_EQ(out_opcode_, 0);
|
||||
ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
|
||||
set_out_opcode(out, kInstCapture);
|
||||
cap_ = cap;
|
||||
}
|
||||
|
||||
void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32_t out) {
|
||||
DCHECK_EQ(out_opcode_, 0);
|
||||
ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
|
||||
set_out_opcode(out, kInstEmptyWidth);
|
||||
empty_ = empty;
|
||||
}
|
||||
|
||||
void Prog::Inst::InitMatch(int32_t id) {
|
||||
DCHECK_EQ(out_opcode_, 0);
|
||||
ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
|
||||
set_opcode(kInstMatch);
|
||||
match_id_ = id;
|
||||
}
|
||||
|
||||
void Prog::Inst::InitNop(uint32_t out) {
|
||||
DCHECK_EQ(out_opcode_, 0);
|
||||
ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
|
||||
set_opcode(kInstNop);
|
||||
}
|
||||
|
||||
void Prog::Inst::InitFail() {
|
||||
DCHECK_EQ(out_opcode_, 0);
|
||||
ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
|
||||
set_opcode(kInstFail);
|
||||
}
|
||||
|
||||
@ -198,7 +205,7 @@ static bool IsMatch(Prog* prog, Prog::Inst* ip) {
|
||||
for (;;) {
|
||||
switch (ip->opcode()) {
|
||||
default:
|
||||
LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
|
||||
ABSL_LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
|
||||
return false;
|
||||
|
||||
case kInstAlt:
|
||||
@ -362,11 +369,11 @@ class ByteMapBuilder {
|
||||
};
|
||||
|
||||
void ByteMapBuilder::Mark(int lo, int hi) {
|
||||
DCHECK_GE(lo, 0);
|
||||
DCHECK_GE(hi, 0);
|
||||
DCHECK_LE(lo, 255);
|
||||
DCHECK_LE(hi, 255);
|
||||
DCHECK_LE(lo, hi);
|
||||
ABSL_DCHECK_GE(lo, 0);
|
||||
ABSL_DCHECK_GE(hi, 0);
|
||||
ABSL_DCHECK_LE(lo, 255);
|
||||
ABSL_DCHECK_LE(hi, 255);
|
||||
ABSL_DCHECK_LE(lo, hi);
|
||||
|
||||
// Ignore any [0-255] ranges. They cause us to recolor every range, which
|
||||
// has no effect on the eventual result and is therefore a waste of time.
|
||||
@ -511,7 +518,7 @@ void Prog::ComputeByteMap() {
|
||||
builder.Build(bytemap_, &bytemap_range_);
|
||||
|
||||
if ((0)) { // For debugging, use trivial bytemap.
|
||||
LOG(ERROR) << "Using trivial bytemap.";
|
||||
ABSL_LOG(ERROR) << "Using trivial bytemap.";
|
||||
for (int i = 0; i < 256; i++)
|
||||
bytemap_[i] = static_cast<uint8_t>(i);
|
||||
bytemap_range_ = 256;
|
||||
@ -615,12 +622,12 @@ void Prog::Flatten() {
|
||||
size_t total = 0;
|
||||
for (int i = 0; i < kNumInst; i++)
|
||||
total += inst_count_[i];
|
||||
CHECK_EQ(total, flat.size());
|
||||
ABSL_CHECK_EQ(total, flat.size());
|
||||
#endif
|
||||
|
||||
// Remap start_unanchored and start.
|
||||
if (start_unanchored() == 0) {
|
||||
DCHECK_EQ(start(), 0);
|
||||
ABSL_DCHECK_EQ(start(), 0);
|
||||
} else if (start_unanchored() == start()) {
|
||||
set_start_unanchored(flatmap[1]);
|
||||
set_start(flatmap[1]);
|
||||
@ -677,7 +684,7 @@ void Prog::MarkSuccessors(SparseArray<int>* rootmap,
|
||||
Inst* ip = inst(id);
|
||||
switch (ip->opcode()) {
|
||||
default:
|
||||
LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
|
||||
ABSL_LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
|
||||
break;
|
||||
|
||||
case kInstAltMatch:
|
||||
@ -737,7 +744,7 @@ void Prog::MarkDominator(int root, SparseArray<int>* rootmap,
|
||||
Inst* ip = inst(id);
|
||||
switch (ip->opcode()) {
|
||||
default:
|
||||
LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
|
||||
ABSL_LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
|
||||
break;
|
||||
|
||||
case kInstAltMatch:
|
||||
@ -804,7 +811,7 @@ void Prog::EmitList(int root, SparseArray<int>* rootmap,
|
||||
Inst* ip = inst(id);
|
||||
switch (ip->opcode()) {
|
||||
default:
|
||||
LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
|
||||
ABSL_LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
|
||||
break;
|
||||
|
||||
case kInstAltMatch:
|
||||
@ -812,7 +819,7 @@ void Prog::EmitList(int root, SparseArray<int>* rootmap,
|
||||
flat->back().set_opcode(kInstAltMatch);
|
||||
flat->back().set_out(static_cast<int>(flat->size()));
|
||||
flat->back().out1_ = static_cast<uint32_t>(flat->size())+1;
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
[[fallthrough]];
|
||||
|
||||
case kInstAlt:
|
||||
stk->push_back(ip->out1());
|
||||
@ -1105,7 +1112,7 @@ const void* Prog::PrefixAccel_ShiftDFA(const void* data, size_t size) {
|
||||
#if defined(__AVX2__)
|
||||
// Finds the least significant non-zero bit in n.
|
||||
static int FindLSBSet(uint32_t n) {
|
||||
DCHECK_NE(n, 0);
|
||||
ABSL_DCHECK_NE(n, uint32_t{0});
|
||||
#if defined(__GNUC__)
|
||||
return __builtin_ctz(n);
|
||||
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
|
||||
@ -1127,7 +1134,7 @@ static int FindLSBSet(uint32_t n) {
|
||||
#endif
|
||||
|
||||
const void* Prog::PrefixAccel_FrontAndBack(const void* data, size_t size) {
|
||||
DCHECK_GE(prefix_size_, 2);
|
||||
ABSL_DCHECK_GE(prefix_size_, size_t{2});
|
||||
if (size < prefix_size_)
|
||||
return NULL;
|
||||
// Don't bother searching the last prefix_size_-1 bytes for prefix_front_.
|
||||
@ -1164,7 +1171,7 @@ const void* Prog::PrefixAccel_FrontAndBack(const void* data, size_t size) {
|
||||
|
||||
const char* p0 = reinterpret_cast<const char*>(data);
|
||||
for (const char* p = p0;; p++) {
|
||||
DCHECK_GE(size, static_cast<size_t>(p-p0));
|
||||
ABSL_DCHECK_GE(size, static_cast<size_t>(p-p0));
|
||||
p = reinterpret_cast<const char*>(memchr(p, prefix_front_, size - (p-p0)));
|
||||
if (p == NULL || p[prefix_size_-1] == prefix_back_)
|
||||
return p;
|
||||
|
||||
59
src/third_party/re2/dist/re2/prog.h
vendored
59
src/third_party/re2/dist/re2/prog.h
vendored
@ -10,14 +10,17 @@
|
||||
// expression symbolically.
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/base/call_once.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/sparse_array.h"
|
||||
@ -79,20 +82,44 @@ class Prog {
|
||||
|
||||
// Getters
|
||||
int id(Prog* p) { return static_cast<int>(this - p->inst_.data()); }
|
||||
InstOp opcode() { return static_cast<InstOp>(out_opcode_&7); }
|
||||
int last() { return (out_opcode_>>3)&1; }
|
||||
int out() { return out_opcode_>>4; }
|
||||
int out1() { DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch); return out1_; }
|
||||
int cap() { DCHECK_EQ(opcode(), kInstCapture); return cap_; }
|
||||
int lo() { DCHECK_EQ(opcode(), kInstByteRange); return lo_; }
|
||||
int hi() { DCHECK_EQ(opcode(), kInstByteRange); return hi_; }
|
||||
int foldcase() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_&1; }
|
||||
int hint() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_>>1; }
|
||||
int match_id() { DCHECK_EQ(opcode(), kInstMatch); return match_id_; }
|
||||
EmptyOp empty() { DCHECK_EQ(opcode(), kInstEmptyWidth); return empty_; }
|
||||
InstOp opcode() { return static_cast<InstOp>(out_opcode_ & 7); }
|
||||
int last() { return (out_opcode_ >> 3) & 1; }
|
||||
int out() { return out_opcode_ >> 4; }
|
||||
int out1() {
|
||||
ABSL_DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch);
|
||||
return out1_;
|
||||
}
|
||||
int cap() {
|
||||
ABSL_DCHECK_EQ(opcode(), kInstCapture);
|
||||
return cap_;
|
||||
}
|
||||
int lo() {
|
||||
ABSL_DCHECK_EQ(opcode(), kInstByteRange);
|
||||
return lo_;
|
||||
}
|
||||
int hi() {
|
||||
ABSL_DCHECK_EQ(opcode(), kInstByteRange);
|
||||
return hi_;
|
||||
}
|
||||
int foldcase() {
|
||||
ABSL_DCHECK_EQ(opcode(), kInstByteRange);
|
||||
return hint_foldcase_ & 1;
|
||||
}
|
||||
int hint() {
|
||||
ABSL_DCHECK_EQ(opcode(), kInstByteRange);
|
||||
return hint_foldcase_ >> 1;
|
||||
}
|
||||
int match_id() {
|
||||
ABSL_DCHECK_EQ(opcode(), kInstMatch);
|
||||
return match_id_;
|
||||
}
|
||||
EmptyOp empty() {
|
||||
ABSL_DCHECK_EQ(opcode(), kInstEmptyWidth);
|
||||
return empty_;
|
||||
}
|
||||
|
||||
bool greedy(Prog* p) {
|
||||
DCHECK_EQ(opcode(), kInstAltMatch);
|
||||
ABSL_DCHECK_EQ(opcode(), kInstAltMatch);
|
||||
return p->inst(out())->opcode() == kInstByteRange ||
|
||||
(p->inst(out())->opcode() == kInstNop &&
|
||||
p->inst(p->inst(out())->out())->opcode() == kInstByteRange);
|
||||
@ -100,7 +127,7 @@ class Prog {
|
||||
|
||||
// Does this inst (an kInstByteRange) match c?
|
||||
inline bool Matches(int c) {
|
||||
DCHECK_EQ(opcode(), kInstByteRange);
|
||||
ABSL_DCHECK_EQ(opcode(), kInstByteRange);
|
||||
if (foldcase() && 'A' <= c && c <= 'Z')
|
||||
c += 'a' - 'A';
|
||||
return lo_ <= c && c <= hi_;
|
||||
@ -221,7 +248,7 @@ class Prog {
|
||||
// Accelerates to the first likely occurrence of the prefix.
|
||||
// Returns a pointer to the first byte or NULL if not found.
|
||||
const void* PrefixAccel(const void* data, size_t size) {
|
||||
DCHECK(can_prefix_accel());
|
||||
ABSL_DCHECK(can_prefix_accel());
|
||||
if (prefix_foldcase_) {
|
||||
return PrefixAccel_ShiftDFA(data, size);
|
||||
} else if (prefix_size_ != 1) {
|
||||
|
||||
113
src/third_party/re2/dist/re2/re2.cc
vendored
113
src/third_party/re2/dist/re2/re2.cc
vendored
@ -9,32 +9,36 @@
|
||||
|
||||
#include "re2/re2.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/base/call_once.h"
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/container/fixed_array.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/ascii.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/strutil.h"
|
||||
#include "util/utf.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "re2/sparse_array.h"
|
||||
#include "util/strutil.h"
|
||||
#include "util/utf.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -139,6 +143,11 @@ static std::string trunc(absl::string_view pattern) {
|
||||
|
||||
|
||||
RE2::RE2(const char* pattern) {
|
||||
// If absl::string_view becomes an alias for std::string_view,
|
||||
// it will stop allowing NULL to be converted.
|
||||
// Handle NULL explicitly to keep callers working no matter what.
|
||||
if (pattern == NULL)
|
||||
pattern = "";
|
||||
Init(pattern, DefaultOptions);
|
||||
}
|
||||
|
||||
@ -159,7 +168,7 @@ int RE2::Options::ParseFlags() const {
|
||||
switch (encoding()) {
|
||||
default:
|
||||
if (log_errors())
|
||||
LOG(ERROR) << "Unknown encoding " << encoding();
|
||||
ABSL_LOG(ERROR) << "Unknown encoding " << encoding();
|
||||
break;
|
||||
case RE2::Options::EncodingUTF8:
|
||||
break;
|
||||
@ -230,8 +239,8 @@ void RE2::Init(absl::string_view pattern, const Options& options) {
|
||||
&status);
|
||||
if (entire_regexp_ == NULL) {
|
||||
if (options_.log_errors()) {
|
||||
LOG(ERROR) << "Error parsing '" << trunc(*pattern_) << "': "
|
||||
<< status.Text();
|
||||
ABSL_LOG(ERROR) << "Error parsing '" << trunc(*pattern_) << "': "
|
||||
<< status.Text();
|
||||
}
|
||||
error_ = new std::string(status.Text());
|
||||
error_code_ = RegexpErrorToRE2(status.code());
|
||||
@ -255,7 +264,7 @@ void RE2::Init(absl::string_view pattern, const Options& options) {
|
||||
prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3);
|
||||
if (prog_ == NULL) {
|
||||
if (options_.log_errors())
|
||||
LOG(ERROR) << "Error compiling '" << trunc(*pattern_) << "'";
|
||||
ABSL_LOG(ERROR) << "Error compiling '" << trunc(*pattern_) << "'";
|
||||
error_ = new std::string("pattern too large - compile failed");
|
||||
error_code_ = RE2::ErrorPatternTooLarge;
|
||||
return;
|
||||
@ -281,8 +290,8 @@ re2::Prog* RE2::ReverseProg() const {
|
||||
re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
|
||||
if (re->rprog_ == NULL) {
|
||||
if (re->options_.log_errors())
|
||||
LOG(ERROR) << "Error reverse compiling '" << trunc(*re->pattern_)
|
||||
<< "'";
|
||||
ABSL_LOG(ERROR) << "Error reverse compiling '" << trunc(*re->pattern_)
|
||||
<< "'";
|
||||
// We no longer touch error_ and error_code_ because failing to compile
|
||||
// the reverse Prog is not a showstopper: falling back to NFA execution
|
||||
// is fine. More importantly, an RE2 object is supposed to be logically
|
||||
@ -328,7 +337,7 @@ int RE2::ReverseProgramSize() const {
|
||||
|
||||
// Finds the most significant non-zero bit in n.
|
||||
static int FindMSBSet(uint32_t n) {
|
||||
DCHECK_NE(n, 0);
|
||||
ABSL_DCHECK_NE(n, uint32_t{0});
|
||||
#if defined(__GNUC__)
|
||||
return 31 ^ __builtin_clz(n);
|
||||
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
|
||||
@ -454,8 +463,8 @@ bool RE2::Replace(std::string* str,
|
||||
if (!re.Rewrite(&s, rewrite, vec, nvec))
|
||||
return false;
|
||||
|
||||
assert(vec[0].data() >= str->data());
|
||||
assert(vec[0].data() + vec[0].size() <= str->data() + str->size());
|
||||
ABSL_DCHECK_GE(vec[0].data(), str->data());
|
||||
ABSL_DCHECK_LE(vec[0].data() + vec[0].size(), str->data() + str->size());
|
||||
str->replace(vec[0].data() - str->data(), vec[0].size(), s);
|
||||
return true;
|
||||
}
|
||||
@ -654,16 +663,16 @@ bool RE2::Match(absl::string_view text,
|
||||
int nsubmatch) const {
|
||||
if (!ok()) {
|
||||
if (options_.log_errors())
|
||||
LOG(ERROR) << "Invalid RE2: " << *error_;
|
||||
ABSL_LOG(ERROR) << "Invalid RE2: " << *error_;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (startpos > endpos || endpos > text.size()) {
|
||||
if (options_.log_errors())
|
||||
LOG(ERROR) << "RE2: invalid startpos, endpos pair. ["
|
||||
<< "startpos: " << startpos << ", "
|
||||
<< "endpos: " << endpos << ", "
|
||||
<< "text size: " << text.size() << "]";
|
||||
ABSL_LOG(ERROR) << "RE2: invalid startpos, endpos pair. ["
|
||||
<< "startpos: " << startpos << ", "
|
||||
<< "endpos: " << endpos << ", "
|
||||
<< "text size: " << text.size() << "]";
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -733,7 +742,7 @@ bool RE2::Match(absl::string_view text,
|
||||
bool skipped_test = false;
|
||||
switch (re_anchor) {
|
||||
default:
|
||||
LOG(DFATAL) << "Unexpected re_anchor value: " << re_anchor;
|
||||
ABSL_LOG(DFATAL) << "Unexpected re_anchor value: " << re_anchor;
|
||||
return false;
|
||||
|
||||
case UNANCHORED: {
|
||||
@ -751,11 +760,11 @@ bool RE2::Match(absl::string_view text,
|
||||
Prog::kLongestMatch, matchp, &dfa_failed, NULL)) {
|
||||
if (dfa_failed) {
|
||||
if (options_.log_errors())
|
||||
LOG(ERROR) << "DFA out of memory: "
|
||||
<< "pattern length " << pattern_->size() << ", "
|
||||
<< "program size " << prog->size() << ", "
|
||||
<< "list count " << prog->list_count() << ", "
|
||||
<< "bytemap range " << prog->bytemap_range();
|
||||
ABSL_LOG(ERROR) << "DFA out of memory: "
|
||||
<< "pattern length " << pattern_->size() << ", "
|
||||
<< "program size " << prog->size() << ", "
|
||||
<< "list count " << prog->list_count() << ", "
|
||||
<< "bytemap range " << prog->bytemap_range();
|
||||
// Fall back to NFA below.
|
||||
skipped_test = true;
|
||||
break;
|
||||
@ -771,11 +780,11 @@ bool RE2::Match(absl::string_view text,
|
||||
matchp, &dfa_failed, NULL)) {
|
||||
if (dfa_failed) {
|
||||
if (options_.log_errors())
|
||||
LOG(ERROR) << "DFA out of memory: "
|
||||
<< "pattern length " << pattern_->size() << ", "
|
||||
<< "program size " << prog_->size() << ", "
|
||||
<< "list count " << prog_->list_count() << ", "
|
||||
<< "bytemap range " << prog_->bytemap_range();
|
||||
ABSL_LOG(ERROR) << "DFA out of memory: "
|
||||
<< "pattern length " << pattern_->size() << ", "
|
||||
<< "program size " << prog_->size() << ", "
|
||||
<< "list count " << prog_->list_count() << ", "
|
||||
<< "bytemap range " << prog_->bytemap_range();
|
||||
// Fall back to NFA below.
|
||||
skipped_test = true;
|
||||
break;
|
||||
@ -797,17 +806,17 @@ bool RE2::Match(absl::string_view text,
|
||||
Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
|
||||
if (dfa_failed) {
|
||||
if (options_.log_errors())
|
||||
LOG(ERROR) << "DFA out of memory: "
|
||||
<< "pattern length " << pattern_->size() << ", "
|
||||
<< "program size " << prog->size() << ", "
|
||||
<< "list count " << prog->list_count() << ", "
|
||||
<< "bytemap range " << prog->bytemap_range();
|
||||
ABSL_LOG(ERROR) << "DFA out of memory: "
|
||||
<< "pattern length " << pattern_->size() << ", "
|
||||
<< "program size " << prog->size() << ", "
|
||||
<< "list count " << prog->list_count() << ", "
|
||||
<< "bytemap range " << prog->bytemap_range();
|
||||
// Fall back to NFA below.
|
||||
skipped_test = true;
|
||||
break;
|
||||
}
|
||||
if (options_.log_errors())
|
||||
LOG(ERROR) << "SearchDFA inconsistency";
|
||||
ABSL_LOG(ERROR) << "SearchDFA inconsistency";
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
@ -840,11 +849,11 @@ bool RE2::Match(absl::string_view text,
|
||||
&match, &dfa_failed, NULL)) {
|
||||
if (dfa_failed) {
|
||||
if (options_.log_errors())
|
||||
LOG(ERROR) << "DFA out of memory: "
|
||||
<< "pattern length " << pattern_->size() << ", "
|
||||
<< "program size " << prog_->size() << ", "
|
||||
<< "list count " << prog_->list_count() << ", "
|
||||
<< "bytemap range " << prog_->bytemap_range();
|
||||
ABSL_LOG(ERROR) << "DFA out of memory: "
|
||||
<< "pattern length " << pattern_->size() << ", "
|
||||
<< "program size " << prog_->size() << ", "
|
||||
<< "list count " << prog_->list_count() << ", "
|
||||
<< "bytemap range " << prog_->bytemap_range();
|
||||
// Fall back to NFA below.
|
||||
skipped_test = true;
|
||||
break;
|
||||
@ -876,20 +885,20 @@ bool RE2::Match(absl::string_view text,
|
||||
if (can_one_pass && anchor != Prog::kUnanchored) {
|
||||
if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) {
|
||||
if (!skipped_test && options_.log_errors())
|
||||
LOG(ERROR) << "SearchOnePass inconsistency";
|
||||
ABSL_LOG(ERROR) << "SearchOnePass inconsistency";
|
||||
return false;
|
||||
}
|
||||
} else if (can_bit_state && subtext1.size() <= bit_state_text_max_size) {
|
||||
if (!prog_->SearchBitState(subtext1, text, anchor,
|
||||
kind, submatch, ncap)) {
|
||||
if (!skipped_test && options_.log_errors())
|
||||
LOG(ERROR) << "SearchBitState inconsistency";
|
||||
ABSL_LOG(ERROR) << "SearchBitState inconsistency";
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) {
|
||||
if (!skipped_test && options_.log_errors())
|
||||
LOG(ERROR) << "SearchNFA inconsistency";
|
||||
ABSL_LOG(ERROR) << "SearchNFA inconsistency";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -914,7 +923,7 @@ bool RE2::DoMatch(absl::string_view text,
|
||||
int n) const {
|
||||
if (!ok()) {
|
||||
if (options_.log_errors())
|
||||
LOG(ERROR) << "Invalid RE2: " << *error_;
|
||||
ABSL_LOG(ERROR) << "Invalid RE2: " << *error_;
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1034,8 +1043,8 @@ bool RE2::Rewrite(std::string* out,
|
||||
int n = (c - '0');
|
||||
if (n >= veclen) {
|
||||
if (options_.log_errors()) {
|
||||
LOG(ERROR) << "invalid substitution \\" << n
|
||||
<< " from " << veclen << " groups";
|
||||
ABSL_LOG(ERROR) << "invalid substitution \\" << n
|
||||
<< " from " << veclen << " groups";
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -1046,7 +1055,7 @@ bool RE2::Rewrite(std::string* out,
|
||||
out->push_back('\\');
|
||||
} else {
|
||||
if (options_.log_errors())
|
||||
LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
|
||||
ABSL_LOG(ERROR) << "invalid rewrite pattern: " << rewrite;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
70
src/third_party/re2/dist/re2/re2.h
vendored
70
src/third_party/re2/dist/re2/re2.h
vendored
@ -50,10 +50,10 @@
|
||||
// supplied pattern exactly.
|
||||
//
|
||||
// Example: successful match
|
||||
// CHECK(RE2::FullMatch("hello", "h.*o"));
|
||||
// ABSL_CHECK(RE2::FullMatch("hello", "h.*o"));
|
||||
//
|
||||
// Example: unsuccessful match (requires full match):
|
||||
// CHECK(!RE2::FullMatch("hello", "e"));
|
||||
// ABSL_CHECK(!RE2::FullMatch("hello", "e"));
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// UTF-8 AND THE MATCHING INTERFACE:
|
||||
@ -62,8 +62,9 @@
|
||||
// The RE2::Latin1 option causes them to be interpreted as Latin-1.
|
||||
//
|
||||
// Example:
|
||||
// CHECK(RE2::FullMatch(utf8_string, RE2(utf8_pattern)));
|
||||
// CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1)));
|
||||
// ABSL_CHECK(RE2::FullMatch(utf8_string, RE2(utf8_pattern)));
|
||||
// ABSL_CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern,
|
||||
// RE2::Latin1)));
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// SUBMATCH EXTRACTION:
|
||||
@ -83,27 +84,27 @@
|
||||
// Example: extracts "ruby" into "s" and 1234 into "i"
|
||||
// int i;
|
||||
// std::string s;
|
||||
// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
|
||||
// ABSL_CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
|
||||
//
|
||||
// Example: extracts "ruby" into "s" and no value into "i"
|
||||
// absl::optional<int> i;
|
||||
// std::optional<int> i;
|
||||
// std::string s;
|
||||
// CHECK(RE2::FullMatch("ruby", "(\\w+)(?::(\\d+))?", &s, &i));
|
||||
// ABSL_CHECK(RE2::FullMatch("ruby", "(\\w+)(?::(\\d+))?", &s, &i));
|
||||
//
|
||||
// Example: fails because string cannot be stored in integer
|
||||
// CHECK(!RE2::FullMatch("ruby", "(.*)", &i));
|
||||
// ABSL_CHECK(!RE2::FullMatch("ruby", "(.*)", &i));
|
||||
//
|
||||
// Example: fails because there aren't enough sub-patterns
|
||||
// CHECK(!RE2::FullMatch("ruby:1234", "\\w+:\\d+", &s));
|
||||
// ABSL_CHECK(!RE2::FullMatch("ruby:1234", "\\w+:\\d+", &s));
|
||||
//
|
||||
// Example: does not try to extract any extra sub-patterns
|
||||
// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
|
||||
// ABSL_CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
|
||||
//
|
||||
// Example: does not try to extract into NULL
|
||||
// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
|
||||
// ABSL_CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
|
||||
//
|
||||
// Example: integer overflow causes failure
|
||||
// CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
|
||||
// ABSL_CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
|
||||
//
|
||||
// NOTE(rsc): Asking for submatches slows successful matches quite a bit.
|
||||
// This may get a little faster in the future, but right now is slower
|
||||
@ -117,12 +118,12 @@
|
||||
// to match any substring of the text.
|
||||
//
|
||||
// Example: simple search for a string:
|
||||
// CHECK(RE2::PartialMatch("hello", "ell"));
|
||||
// ABSL_CHECK(RE2::PartialMatch("hello", "ell"));
|
||||
//
|
||||
// Example: find first number in a string
|
||||
// int number;
|
||||
// CHECK(RE2::PartialMatch("x*100 + 20", "(\\d+)", &number));
|
||||
// CHECK_EQ(number, 100);
|
||||
// ABSL_CHECK(RE2::PartialMatch("x*100 + 20", "(\\d+)", &number));
|
||||
// ABSL_CHECK_EQ(number, 100);
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// PRE-COMPILED REGULAR EXPRESSIONS
|
||||
@ -203,27 +204,28 @@
|
||||
//
|
||||
// Example:
|
||||
// int a, b, c, d;
|
||||
// CHECK(RE2::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
|
||||
// ABSL_CHECK(RE2::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
|
||||
// RE2::Octal(&a), RE2::Hex(&b), RE2::CRadix(&c), RE2::CRadix(&d));
|
||||
// will leave 64 in a, b, c, and d.
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/base/call_once.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "re2/stringpiece.h"
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#include <TargetConditionals.h>
|
||||
#endif
|
||||
|
||||
#include "absl/base/call_once.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "absl/types/optional.h"
|
||||
#include "re2/stringpiece.h"
|
||||
|
||||
namespace re2 {
|
||||
class Prog;
|
||||
class Regexp;
|
||||
@ -383,7 +385,7 @@ class RE2 {
|
||||
// type, or one of:
|
||||
// std::string (matched piece is copied to string)
|
||||
// absl::string_view (string_view is mutated to point to matched piece)
|
||||
// absl::optional<T> (T is a supported numeric or string type as above)
|
||||
// std::optional<T> (T is a supported numeric or string type as above)
|
||||
// T ("bool T::ParseFrom(const char*, size_t)" must exist)
|
||||
// (void*)NULL (the corresponding matched sub-pattern is not copied)
|
||||
//
|
||||
@ -404,7 +406,7 @@ class RE2 {
|
||||
// int number;
|
||||
// RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number);
|
||||
//
|
||||
// Use absl::optional<int> instead to handle this case correctly.
|
||||
// Use std::optional<int> instead to handle this case correctly.
|
||||
template <typename... A>
|
||||
static bool FullMatch(absl::string_view text, const RE2& re, A&&... a) {
|
||||
return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...);
|
||||
@ -469,7 +471,7 @@ class RE2 {
|
||||
// text. E.g.,
|
||||
//
|
||||
// std::string s = "yabba dabba doo";
|
||||
// CHECK(RE2::Replace(&s, "b+", "d"));
|
||||
// ABSL_CHECK(RE2::Replace(&s, "b+", "d"));
|
||||
//
|
||||
// will leave "s" containing "yada dabba doo"
|
||||
//
|
||||
@ -483,7 +485,7 @@ class RE2 {
|
||||
// of the pattern in the string with the rewrite. E.g.
|
||||
//
|
||||
// std::string s = "yabba dabba doo";
|
||||
// CHECK(RE2::GlobalReplace(&s, "b+", "d"));
|
||||
// ABSL_CHECK(RE2::GlobalReplace(&s, "b+", "d"));
|
||||
//
|
||||
// will leave "s" containing "yada dada doo"
|
||||
// Replacements are not subject to re-matching.
|
||||
@ -840,12 +842,12 @@ template <> struct Parse4ary<unsigned long long> : public std::true_type {};
|
||||
template <typename T>
|
||||
bool Parse(const char* str, size_t n, T* dest, int radix);
|
||||
|
||||
// Support absl::optional<T> for all T with a stock parser.
|
||||
template <typename T> struct Parse3ary<absl::optional<T>> : public Parse3ary<T> {};
|
||||
template <typename T> struct Parse4ary<absl::optional<T>> : public Parse4ary<T> {};
|
||||
// Support std::optional<T> for all T with a stock parser.
|
||||
template <typename T> struct Parse3ary<std::optional<T>> : public Parse3ary<T> {};
|
||||
template <typename T> struct Parse4ary<std::optional<T>> : public Parse4ary<T> {};
|
||||
|
||||
template <typename T>
|
||||
bool Parse(const char* str, size_t n, absl::optional<T>* dest) {
|
||||
bool Parse(const char* str, size_t n, std::optional<T>* dest) {
|
||||
if (str == NULL) {
|
||||
if (dest != NULL)
|
||||
dest->reset();
|
||||
@ -861,7 +863,7 @@ bool Parse(const char* str, size_t n, absl::optional<T>* dest) {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool Parse(const char* str, size_t n, absl::optional<T>* dest, int radix) {
|
||||
bool Parse(const char* str, size_t n, std::optional<T>* dest, int radix) {
|
||||
if (str == NULL) {
|
||||
if (dest != NULL)
|
||||
dest->reset();
|
||||
@ -890,14 +892,12 @@ class RE2::Arg {
|
||||
re2_internal::Parse4ary<T>::value,
|
||||
int>::type;
|
||||
|
||||
#if !defined(_MSC_VER)
|
||||
template <typename T>
|
||||
using CanParseFrom = typename std::enable_if<
|
||||
std::is_member_function_pointer<
|
||||
decltype(static_cast<bool (T::*)(const char*, size_t)>(
|
||||
&T::ParseFrom))>::value,
|
||||
int>::type;
|
||||
#endif
|
||||
|
||||
public:
|
||||
Arg() : Arg(nullptr) {}
|
||||
@ -909,10 +909,8 @@ class RE2::Arg {
|
||||
template <typename T, CanParse4ary<T> = 0>
|
||||
Arg(T* ptr) : arg_(ptr), parser_(DoParse4ary<T>) {}
|
||||
|
||||
#if !defined(_MSC_VER)
|
||||
template <typename T, CanParseFrom<T> = 0>
|
||||
Arg(T* ptr) : arg_(ptr), parser_(DoParseFrom<T>) {}
|
||||
#endif
|
||||
|
||||
typedef bool (*Parser)(const char* str, size_t n, void* dest);
|
||||
|
||||
@ -938,13 +936,11 @@ class RE2::Arg {
|
||||
return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 10);
|
||||
}
|
||||
|
||||
#if !defined(_MSC_VER)
|
||||
template <typename T>
|
||||
static bool DoParseFrom(const char* str, size_t n, void* dest) {
|
||||
if (dest == NULL) return true;
|
||||
return reinterpret_cast<T*>(dest)->ParseFrom(str, n);
|
||||
}
|
||||
#endif
|
||||
|
||||
void* arg_;
|
||||
Parser parser_;
|
||||
@ -972,7 +968,7 @@ inline RE2::Arg RE2::Octal(T* ptr) {
|
||||
}
|
||||
|
||||
// Silence warnings about missing initializers for members of LazyRE2.
|
||||
#if !defined(__clang__) && defined(__GNUC__)
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
|
||||
#endif
|
||||
|
||||
|
||||
24
src/third_party/re2/dist/re2/regexp.cc
vendored
24
src/third_party/re2/dist/re2/regexp.cc
vendored
@ -10,6 +10,7 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <string>
|
||||
@ -18,11 +19,12 @@
|
||||
#include "absl/base/call_once.h"
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/container/flat_hash_map.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/synchronization/mutex.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/utf.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/walker-inl.h"
|
||||
#include "util/utf.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -45,7 +47,7 @@ Regexp::Regexp(RegexpOp op, ParseFlags parse_flags)
|
||||
// required Decref() to have handled them for us.
|
||||
Regexp::~Regexp() {
|
||||
if (nsub_ > 0)
|
||||
LOG(DFATAL) << "Regexp not destroyed.";
|
||||
ABSL_LOG(DFATAL) << "Regexp not destroyed.";
|
||||
|
||||
switch (op_) {
|
||||
default:
|
||||
@ -154,7 +156,7 @@ void Regexp::Destroy() {
|
||||
Regexp* re = stack;
|
||||
stack = re->down_;
|
||||
if (re->ref_ != 0)
|
||||
LOG(DFATAL) << "Bad reference count " << re->ref_;
|
||||
ABSL_LOG(DFATAL) << "Bad reference count " << re->ref_;
|
||||
if (re->nsub_ > 0) {
|
||||
Regexp** subs = re->sub();
|
||||
for (int i = 0; i < re->nsub_; i++) {
|
||||
@ -179,7 +181,7 @@ void Regexp::Destroy() {
|
||||
}
|
||||
|
||||
void Regexp::AddRuneToString(Rune r) {
|
||||
DCHECK(op_ == kRegexpLiteralString);
|
||||
ABSL_DCHECK(op_ == kRegexpLiteralString);
|
||||
if (nrunes_ == 0) {
|
||||
// start with 8
|
||||
runes_ = new Rune[8];
|
||||
@ -421,7 +423,7 @@ static bool TopEqual(Regexp* a, Regexp* b) {
|
||||
}
|
||||
}
|
||||
|
||||
LOG(DFATAL) << "Unexpected op in Regexp::Equal: " << a->op();
|
||||
ABSL_LOG(DFATAL) << "Unexpected op in Regexp::Equal: " << a->op();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -496,7 +498,7 @@ bool Regexp::Equal(Regexp* a, Regexp* b) {
|
||||
if (n == 0)
|
||||
break;
|
||||
|
||||
DCHECK_GE(n, 2);
|
||||
ABSL_DCHECK_GE(n, size_t{2});
|
||||
a = stk[n-2];
|
||||
b = stk[n-1];
|
||||
stk.resize(n-2);
|
||||
@ -562,7 +564,7 @@ class NumCapturesWalker : public Regexp::Walker<Ignored> {
|
||||
virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
|
||||
// Should never be called: we use Walk(), not WalkExponential().
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
LOG(DFATAL) << "NumCapturesWalker::ShortVisit called";
|
||||
ABSL_LOG(DFATAL) << "NumCapturesWalker::ShortVisit called";
|
||||
#endif
|
||||
return ignored;
|
||||
}
|
||||
@ -609,7 +611,7 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> {
|
||||
virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
|
||||
// Should never be called: we use Walk(), not WalkExponential().
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
LOG(DFATAL) << "NamedCapturesWalker::ShortVisit called";
|
||||
ABSL_LOG(DFATAL) << "NamedCapturesWalker::ShortVisit called";
|
||||
#endif
|
||||
return ignored;
|
||||
}
|
||||
@ -653,7 +655,7 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> {
|
||||
virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
|
||||
// Should never be called: we use Walk(), not WalkExponential().
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
LOG(DFATAL) << "CaptureNamesWalker::ShortVisit called";
|
||||
ABSL_LOG(DFATAL) << "CaptureNamesWalker::ShortVisit called";
|
||||
#endif
|
||||
return ignored;
|
||||
}
|
||||
@ -993,7 +995,7 @@ CharClass* CharClassBuilder::GetCharClass() {
|
||||
for (iterator it = begin(); it != end(); ++it)
|
||||
cc->ranges_[n++] = *it;
|
||||
cc->nranges_ = n;
|
||||
DCHECK_LE(n, static_cast<int>(ranges_.size()));
|
||||
ABSL_DCHECK_LE(n, static_cast<int>(ranges_.size()));
|
||||
cc->nrunes_ = nrunes_;
|
||||
cc->folds_ascii_ = FoldsASCII();
|
||||
return cc;
|
||||
|
||||
51
src/third_party/re2/dist/re2/regexp.h
vendored
51
src/third_party/re2/dist/re2/regexp.h
vendored
@ -88,12 +88,14 @@
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/utf.h"
|
||||
|
||||
namespace re2 {
|
||||
@ -332,15 +334,42 @@ class Regexp {
|
||||
return submany_;
|
||||
}
|
||||
|
||||
int min() { DCHECK_EQ(op_, kRegexpRepeat); return min_; }
|
||||
int max() { DCHECK_EQ(op_, kRegexpRepeat); return max_; }
|
||||
Rune rune() { DCHECK_EQ(op_, kRegexpLiteral); return rune_; }
|
||||
CharClass* cc() { DCHECK_EQ(op_, kRegexpCharClass); return cc_; }
|
||||
int cap() { DCHECK_EQ(op_, kRegexpCapture); return cap_; }
|
||||
const std::string* name() { DCHECK_EQ(op_, kRegexpCapture); return name_; }
|
||||
Rune* runes() { DCHECK_EQ(op_, kRegexpLiteralString); return runes_; }
|
||||
int nrunes() { DCHECK_EQ(op_, kRegexpLiteralString); return nrunes_; }
|
||||
int match_id() { DCHECK_EQ(op_, kRegexpHaveMatch); return match_id_; }
|
||||
int min() {
|
||||
ABSL_DCHECK_EQ(op_, kRegexpRepeat);
|
||||
return min_;
|
||||
}
|
||||
int max() {
|
||||
ABSL_DCHECK_EQ(op_, kRegexpRepeat);
|
||||
return max_;
|
||||
}
|
||||
Rune rune() {
|
||||
ABSL_DCHECK_EQ(op_, kRegexpLiteral);
|
||||
return rune_;
|
||||
}
|
||||
CharClass* cc() {
|
||||
ABSL_DCHECK_EQ(op_, kRegexpCharClass);
|
||||
return cc_;
|
||||
}
|
||||
int cap() {
|
||||
ABSL_DCHECK_EQ(op_, kRegexpCapture);
|
||||
return cap_;
|
||||
}
|
||||
const std::string* name() {
|
||||
ABSL_DCHECK_EQ(op_, kRegexpCapture);
|
||||
return name_;
|
||||
}
|
||||
Rune* runes() {
|
||||
ABSL_DCHECK_EQ(op_, kRegexpLiteralString);
|
||||
return runes_;
|
||||
}
|
||||
int nrunes() {
|
||||
ABSL_DCHECK_EQ(op_, kRegexpLiteralString);
|
||||
return nrunes_;
|
||||
}
|
||||
int match_id() {
|
||||
ABSL_DCHECK_EQ(op_, kRegexpHaveMatch);
|
||||
return match_id_;
|
||||
}
|
||||
|
||||
// Increments reference count, returns object as convenience.
|
||||
Regexp* Incref();
|
||||
@ -515,7 +544,7 @@ class Regexp {
|
||||
|
||||
// Allocate space for n sub-regexps.
|
||||
void AllocSub(int n) {
|
||||
DCHECK(n >= 0 && static_cast<uint16_t>(n) == n);
|
||||
ABSL_DCHECK(n >= 0 && static_cast<uint16_t>(n) == n);
|
||||
if (n > 1)
|
||||
submany_ = new Regexp*[n];
|
||||
nsub_ = static_cast<uint16_t>(n);
|
||||
|
||||
31
src/third_party/re2/dist/re2/set.cc
vendored
31
src/third_party/re2/dist/re2/set.cc
vendored
@ -5,15 +5,20 @@
|
||||
#include "re2/set.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "util/logging.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "re2/sparse_set.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -50,9 +55,15 @@ RE2::Set& RE2::Set::operator=(Set&& other) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
int RE2::Set::Size() const {
|
||||
if (!compiled_)
|
||||
return static_cast<int>(elem_.size());
|
||||
return size_;
|
||||
}
|
||||
|
||||
int RE2::Set::Add(absl::string_view pattern, std::string* error) {
|
||||
if (compiled_) {
|
||||
LOG(DFATAL) << "RE2::Set::Add() called after compiling";
|
||||
ABSL_LOG(DFATAL) << "RE2::Set::Add() called after compiling";
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -64,7 +75,7 @@ int RE2::Set::Add(absl::string_view pattern, std::string* error) {
|
||||
if (error != NULL)
|
||||
*error = status.Text();
|
||||
if (options_.log_errors())
|
||||
LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
|
||||
ABSL_LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -91,7 +102,7 @@ int RE2::Set::Add(absl::string_view pattern, std::string* error) {
|
||||
|
||||
bool RE2::Set::Compile() {
|
||||
if (compiled_) {
|
||||
LOG(DFATAL) << "RE2::Set::Compile() called more than once";
|
||||
ABSL_LOG(DFATAL) << "RE2::Set::Compile() called more than once";
|
||||
return false;
|
||||
}
|
||||
compiled_ = true;
|
||||
@ -128,7 +139,7 @@ bool RE2::Set::Match(absl::string_view text, std::vector<int>* v,
|
||||
if (!compiled_) {
|
||||
if (error_info != NULL)
|
||||
error_info->kind = kNotCompiled;
|
||||
LOG(DFATAL) << "RE2::Set::Match() called before compiling";
|
||||
ABSL_LOG(DFATAL) << "RE2::Set::Match() called before compiling";
|
||||
return false;
|
||||
}
|
||||
#ifdef RE2_HAVE_THREAD_LOCAL
|
||||
@ -144,10 +155,10 @@ bool RE2::Set::Match(absl::string_view text, std::vector<int>* v,
|
||||
NULL, &dfa_failed, matches.get());
|
||||
if (dfa_failed) {
|
||||
if (options_.log_errors())
|
||||
LOG(ERROR) << "DFA out of memory: "
|
||||
<< "program size " << prog_->size() << ", "
|
||||
<< "list count " << prog_->list_count() << ", "
|
||||
<< "bytemap range " << prog_->bytemap_range();
|
||||
ABSL_LOG(ERROR) << "DFA out of memory: "
|
||||
<< "program size " << prog_->size() << ", "
|
||||
<< "list count " << prog_->list_count() << ", "
|
||||
<< "bytemap range " << prog_->bytemap_range();
|
||||
if (error_info != NULL)
|
||||
error_info->kind = kOutOfMemory;
|
||||
return false;
|
||||
@ -161,7 +172,7 @@ bool RE2::Set::Match(absl::string_view text, std::vector<int>* v,
|
||||
if (matches->empty()) {
|
||||
if (error_info != NULL)
|
||||
error_info->kind = kInconsistent;
|
||||
LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
|
||||
ABSL_LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned";
|
||||
return false;
|
||||
}
|
||||
v->assign(matches->begin(), matches->end());
|
||||
|
||||
5
src/third_party/re2/dist/re2/set.h
vendored
5
src/third_party/re2/dist/re2/set.h
vendored
@ -53,6 +53,10 @@ class RE2::Set {
|
||||
// the error message from the parser.
|
||||
int Add(absl::string_view pattern, std::string* error);
|
||||
|
||||
// Returns the number of patterns in the set.
|
||||
// Can be called before or after Compile().
|
||||
int Size() const;
|
||||
|
||||
// Compiles the set in preparation for matching.
|
||||
// Returns false if the compiler runs out of memory.
|
||||
// Add() must not be called again after Compile().
|
||||
@ -62,6 +66,7 @@ class RE2::Set {
|
||||
// Returns true if text matches at least one of the regexps in the set.
|
||||
// Fills v (if not NULL) with the indices of the matching regexps.
|
||||
// Callers must not expect v to be sorted.
|
||||
// The indices are in the half-open interval [0, Size()).
|
||||
bool Match(absl::string_view text, std::vector<int>* v) const;
|
||||
|
||||
// As above, but populates error_info (if not NULL) when none of the regexps
|
||||
|
||||
22
src/third_party/re2/dist/re2/simplify.cc
vendored
22
src/third_party/re2/dist/re2/simplify.cc
vendored
@ -6,14 +6,17 @@
|
||||
// to use simple extended regular expression features.
|
||||
// Also sort and simplify character classes.
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
|
||||
#include "util/logging.h"
|
||||
#include "util/utf.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "re2/walker-inl.h"
|
||||
#include "util/utf.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -94,7 +97,7 @@ bool Regexp::ComputeSimple() {
|
||||
case kRegexpRepeat:
|
||||
return false;
|
||||
}
|
||||
LOG(DFATAL) << "Case not handled in ComputeSimple: " << op_;
|
||||
ABSL_LOG(DFATAL) << "Case not handled in ComputeSimple: " << op_;
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -222,7 +225,7 @@ Regexp* CoalesceWalker::Copy(Regexp* re) {
|
||||
Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
|
||||
// Should never be called: we use Walk(), not WalkExponential().
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
LOG(DFATAL) << "CoalesceWalker::ShortVisit called";
|
||||
ABSL_LOG(DFATAL) << "CoalesceWalker::ShortVisit called";
|
||||
#endif
|
||||
return re->Incref();
|
||||
}
|
||||
@ -372,7 +375,7 @@ void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) {
|
||||
|
||||
default:
|
||||
nre->Decref();
|
||||
LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op();
|
||||
ABSL_LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op();
|
||||
return;
|
||||
}
|
||||
|
||||
@ -433,7 +436,7 @@ void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) {
|
||||
|
||||
default:
|
||||
nre->Decref();
|
||||
LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op();
|
||||
ABSL_LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op();
|
||||
return;
|
||||
}
|
||||
|
||||
@ -448,7 +451,7 @@ Regexp* SimplifyWalker::Copy(Regexp* re) {
|
||||
Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
|
||||
// Should never be called: we use Walk(), not WalkExponential().
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
|
||||
ABSL_LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
|
||||
#endif
|
||||
return re->Incref();
|
||||
}
|
||||
@ -564,7 +567,7 @@ Regexp* SimplifyWalker::PostVisit(Regexp* re,
|
||||
}
|
||||
}
|
||||
|
||||
LOG(ERROR) << "Simplify case not handled: " << re->op();
|
||||
ABSL_LOG(ERROR) << "Simplify case not handled: " << re->op();
|
||||
return re->Incref();
|
||||
}
|
||||
|
||||
@ -661,7 +664,8 @@ Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max,
|
||||
if (nre == NULL) {
|
||||
// Some degenerate case, like min > max, or min < max < 0.
|
||||
// This shouldn't happen, because the parser rejects such regexps.
|
||||
LOG(DFATAL) << "Malformed repeat " << re->ToString() << " " << min << " " << max;
|
||||
ABSL_LOG(DFATAL) << "Malformed repeat of " << re->ToString()
|
||||
<< " min " << min << " max " << max;
|
||||
return new Regexp(kRegexpNoMatch, f);
|
||||
}
|
||||
|
||||
|
||||
18
src/third_party/re2/dist/re2/sparse_array.h
vendored
18
src/third_party/re2/dist/re2/sparse_array.h
vendored
@ -88,22 +88,24 @@
|
||||
//
|
||||
// A moved-from SparseArray will be empty.
|
||||
|
||||
// Doing this simplifies the logic below.
|
||||
#ifndef __has_feature
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#if __has_feature(memory_sanitizer)
|
||||
#include <sanitizer/msan_interface.h>
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "re2/pod_array.h"
|
||||
|
||||
// Doing this simplifies the logic below.
|
||||
#ifndef __has_feature
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
|
||||
#if __has_feature(memory_sanitizer)
|
||||
#include <sanitizer/msan_interface.h>
|
||||
#endif
|
||||
|
||||
namespace re2 {
|
||||
|
||||
template<typename Value>
|
||||
|
||||
18
src/third_party/re2/dist/re2/sparse_set.h
vendored
18
src/third_party/re2/dist/re2/sparse_set.h
vendored
@ -47,22 +47,24 @@
|
||||
//
|
||||
// See sparse_array.h for implementation details.
|
||||
|
||||
// Doing this simplifies the logic below.
|
||||
#ifndef __has_feature
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#if __has_feature(memory_sanitizer)
|
||||
#include <sanitizer/msan_interface.h>
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "re2/pod_array.h"
|
||||
|
||||
// Doing this simplifies the logic below.
|
||||
#ifndef __has_feature
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
|
||||
#if __has_feature(memory_sanitizer)
|
||||
#include <sanitizer/msan_interface.h>
|
||||
#endif
|
||||
|
||||
namespace re2 {
|
||||
|
||||
template<typename Value>
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
// THIS CODE SHOULD NEVER BE USED IN PRODUCTION:
|
||||
// - It uses a ton of memory.
|
||||
// - It uses a ton of stack.
|
||||
// - It uses CHECK and LOG(FATAL).
|
||||
// - It uses ABSL_CHECK() and ABSL_LOG(FATAL).
|
||||
// - It implements unanchored search by repeated anchored search.
|
||||
//
|
||||
// On the other hand, it is very simple and a good reference
|
||||
@ -28,7 +28,9 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "util/logging.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/regexp.h"
|
||||
@ -111,7 +113,7 @@ bool Backtracker::Search(absl::string_view text, absl::string_view context,
|
||||
endmatch_ = prog_->anchor_end();
|
||||
submatch_ = submatch;
|
||||
nsubmatch_ = nsubmatch;
|
||||
CHECK_LT(2*nsubmatch_, static_cast<int>(ABSL_ARRAYSIZE(cap_)));
|
||||
ABSL_CHECK_LT(2*nsubmatch_, static_cast<int>(ABSL_ARRAYSIZE(cap_)));
|
||||
memset(cap_, 0, sizeof cap_);
|
||||
|
||||
// We use submatch_[0] for our own bookkeeping,
|
||||
@ -157,10 +159,10 @@ bool Backtracker::Visit(int id, const char* p) {
|
||||
// Check bitmap. If we've already explored from here,
|
||||
// either it didn't match or it did but we're hoping for a better match.
|
||||
// Either way, don't go down that road again.
|
||||
CHECK(p <= text_.data() + text_.size());
|
||||
ABSL_CHECK(p <= text_.data() + text_.size());
|
||||
int n = id * static_cast<int>(text_.size()+1) +
|
||||
static_cast<int>(p-text_.data());
|
||||
CHECK_LT(n/32, visited_.size());
|
||||
ABSL_CHECK_LT(n/32, visited_.size());
|
||||
if (visited_[n/32] & (1 << (n&31)))
|
||||
return false;
|
||||
visited_[n/32] |= 1 << (n&31);
|
||||
@ -188,7 +190,7 @@ bool Backtracker::Try(int id, const char* p) {
|
||||
Prog::Inst* ip = prog_->inst(id);
|
||||
switch (ip->opcode()) {
|
||||
default:
|
||||
LOG(FATAL) << "Unexpected opcode: " << (int)ip->opcode();
|
||||
ABSL_LOG(FATAL) << "Unexpected opcode: " << ip->opcode();
|
||||
return false; // not reached
|
||||
|
||||
case kInstAltMatch:
|
||||
|
||||
@ -9,8 +9,8 @@
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/utf.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "util/utf.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
|
||||
@ -4,13 +4,16 @@
|
||||
|
||||
// Test prog.cc, compile.cc
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/regexp.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -132,13 +135,13 @@ TEST(TestRegexpCompileToProg, Simple) {
|
||||
const re2::Test& t = tests[i];
|
||||
Regexp* re = Regexp::Parse(t.regexp, Regexp::PerlX|Regexp::Latin1, NULL);
|
||||
if (re == NULL) {
|
||||
LOG(ERROR) << "Cannot parse: " << t.regexp;
|
||||
ABSL_LOG(ERROR) << "Cannot parse: " << t.regexp;
|
||||
failed++;
|
||||
continue;
|
||||
}
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
if (prog == NULL) {
|
||||
LOG(ERROR) << "Cannot compile: " << t.regexp;
|
||||
ABSL_LOG(ERROR) << "Cannot compile: " << t.regexp;
|
||||
re->Decref();
|
||||
failed++;
|
||||
continue;
|
||||
@ -146,9 +149,9 @@ TEST(TestRegexpCompileToProg, Simple) {
|
||||
ASSERT_TRUE(re->CompileToProg(1) == NULL);
|
||||
std::string s = prog->Dump();
|
||||
if (s != t.code) {
|
||||
LOG(ERROR) << "Incorrect compiled code for: " << t.regexp;
|
||||
LOG(ERROR) << "Want:\n" << t.code;
|
||||
LOG(ERROR) << "Got:\n" << s;
|
||||
ABSL_LOG(ERROR) << "Incorrect compiled code for: " << t.regexp;
|
||||
ABSL_LOG(ERROR) << "Want:\n" << t.code;
|
||||
ABSL_LOG(ERROR) << "Got:\n" << s;
|
||||
failed++;
|
||||
}
|
||||
delete prog;
|
||||
|
||||
27
src/third_party/re2/dist/re2/testing/dfa_test.cc
vendored
27
src/third_party/re2/dist/re2/testing/dfa_test.cc
vendored
@ -2,22 +2,24 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/flags/flag.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/malloc_counter.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "re2/testing/regexp_generator.h"
|
||||
#include "re2/testing/string_generator.h"
|
||||
#include "util/malloc_counter.h"
|
||||
|
||||
static const bool UsingMallocCounter = false;
|
||||
|
||||
@ -111,10 +113,10 @@ TEST(SingleThreaded, BuildEntireDFA) {
|
||||
delete prog;
|
||||
}
|
||||
if (UsingMallocCounter) {
|
||||
//LOG(INFO) << "limit " << limit << ", "
|
||||
// << "prog usage " << progusage << ", "
|
||||
// << "DFA budget " << dfamem << ", "
|
||||
// << "total " << usage;
|
||||
//ABSL_LOG(INFO) << "limit " << limit << ", "
|
||||
// << "prog usage " << progusage << ", "
|
||||
// << "DFA budget " << dfamem << ", "
|
||||
// << "total " << usage;
|
||||
// Tolerate +/- 10%.
|
||||
ASSERT_GT(usage, limit*9/10);
|
||||
ASSERT_LT(usage, limit*11/10);
|
||||
@ -189,8 +191,8 @@ TEST(SingleThreaded, SearchDFA) {
|
||||
delete prog;
|
||||
}
|
||||
if (UsingMallocCounter) {
|
||||
//LOG(INFO) << "usage " << usage << ", "
|
||||
// << "peak usage " << peak_usage;
|
||||
//ABSL_LOG(INFO) << "usage " << usage << ", "
|
||||
// << "peak usage " << peak_usage;
|
||||
ASSERT_LT(usage, 1<<n);
|
||||
ASSERT_LT(peak_usage, 1<<n);
|
||||
}
|
||||
@ -297,7 +299,7 @@ TEST(DFA, ReverseMatch) {
|
||||
prog->SearchDFA(t.text, absl::string_view(), Prog::kUnanchored,
|
||||
Prog::kFirstMatch, NULL, &failed, NULL);
|
||||
if (matched != t.match) {
|
||||
LOG(ERROR) << t.regexp << " on " << t.text << ": want " << t.match;
|
||||
ABSL_LOG(ERROR) << t.regexp << " on " << t.text << ": want " << t.match;
|
||||
nfail++;
|
||||
}
|
||||
delete prog;
|
||||
@ -360,8 +362,9 @@ TEST(DFA, Callback) {
|
||||
dump += match ? "]]" : "]";
|
||||
});
|
||||
if (dump != t.dump) {
|
||||
LOG(ERROR) << t.regexp << " bytemap:\n" << prog->DumpByteMap();
|
||||
LOG(ERROR) << t.regexp << " dump:\ngot " << dump << "\nwant " << t.dump;
|
||||
ABSL_LOG(ERROR) << t.regexp << " bytemap:\n" << prog->DumpByteMap();
|
||||
ABSL_LOG(ERROR) << t.regexp << " dump:\n" << "got " << dump << "\n"
|
||||
<< "want " << t.dump;
|
||||
nfail++;
|
||||
}
|
||||
delete prog;
|
||||
|
||||
29
src/third_party/re2/dist/re2/testing/dump.cc
vendored
29
src/third_party/re2/dist/re2/testing/dump.cc
vendored
@ -19,11 +19,12 @@
|
||||
#include <string>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/utf.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "util/utf.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -96,17 +97,25 @@ static void DumpRegexpAppending(Regexp* re, std::string* s) {
|
||||
break;
|
||||
case kRegexpLiteral: {
|
||||
Rune r = re->rune();
|
||||
char buf[UTFmax+1];
|
||||
buf[runetochar(buf, &r)] = 0;
|
||||
s->append(buf);
|
||||
if (re->parse_flags() & Regexp::Latin1) {
|
||||
s->push_back(r);
|
||||
} else {
|
||||
char buf[UTFmax+1];
|
||||
buf[runetochar(buf, &r)] = 0;
|
||||
s->append(buf);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kRegexpLiteralString:
|
||||
for (int i = 0; i < re->nrunes(); i++) {
|
||||
Rune r = re->runes()[i];
|
||||
char buf[UTFmax+1];
|
||||
buf[runetochar(buf, &r)] = 0;
|
||||
s->append(buf);
|
||||
if (re->parse_flags() & Regexp::Latin1) {
|
||||
s->push_back(r);
|
||||
} else {
|
||||
char buf[UTFmax+1];
|
||||
buf[runetochar(buf, &r)] = 0;
|
||||
s->append(buf);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case kRegexpConcat:
|
||||
@ -121,7 +130,7 @@ static void DumpRegexpAppending(Regexp* re, std::string* s) {
|
||||
break;
|
||||
case kRegexpCapture:
|
||||
if (re->cap() == 0)
|
||||
LOG(DFATAL) << "kRegexpCapture cap() == 0";
|
||||
ABSL_LOG(DFATAL) << "kRegexpCapture cap() == 0";
|
||||
if (re->name()) {
|
||||
s->append(*re->name());
|
||||
s->append(":");
|
||||
@ -153,7 +162,7 @@ static void DumpRegexpAppending(Regexp* re, std::string* s) {
|
||||
std::string Regexp::Dump() {
|
||||
// Make sure that we are being called from a unit test.
|
||||
// Should cause a link error if used outside of testing.
|
||||
CHECK(!::testing::TempDir().empty());
|
||||
ABSL_CHECK(!::testing::TempDir().empty());
|
||||
|
||||
std::string s;
|
||||
DumpRegexpAppending(this, &s);
|
||||
|
||||
@ -9,6 +9,7 @@
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "re2/testing/exhaustive_tester.h"
|
||||
#include "re2/testing/regexp_generator.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
|
||||
@ -5,12 +5,13 @@
|
||||
// Exhaustive testing of regular expression matching.
|
||||
|
||||
#include <stddef.h>
|
||||
#include <memory>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "re2/testing/exhaustive_tester.h"
|
||||
#include "re2/testing/regexp_generator.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -69,4 +70,3 @@ TEST(LineEnds, Exhaustive) {
|
||||
// }
|
||||
|
||||
} // namespace re2
|
||||
|
||||
|
||||
@ -5,13 +5,14 @@
|
||||
// Exhaustive testing of regular expression matching.
|
||||
|
||||
#include <stddef.h>
|
||||
#include <memory>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/utf.h"
|
||||
#include "re2/testing/exhaustive_tester.h"
|
||||
#include "re2/testing/regexp_generator.h"
|
||||
#include "util/utf.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -97,4 +98,3 @@ TEST(InterestingUTF8, AB) {
|
||||
}
|
||||
|
||||
} // namespace re2
|
||||
|
||||
|
||||
@ -33,4 +33,3 @@ TEST(EgrepLiterals, UTF8) {
|
||||
}
|
||||
|
||||
} // namespace re2
|
||||
|
||||
|
||||
@ -11,14 +11,23 @@
|
||||
// the NFA, DFA, and a trivial backtracking implementation agree about
|
||||
// the location of the match.
|
||||
|
||||
#include "re2/testing/exhaustive_tester.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/flags/flag.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/testing/exhaustive_tester.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/testing/regexp_generator.h"
|
||||
#include "re2/testing/tester.h"
|
||||
|
||||
// For target `log' in the Makefile.
|
||||
@ -40,7 +49,7 @@ static char* escape(absl::string_view sp) {
|
||||
*p++ = '\"';
|
||||
for (size_t i = 0; i < sp.size(); i++) {
|
||||
if(p+5 >= buf+sizeof buf)
|
||||
LOG(FATAL) << "ExhaustiveTester escape: too long";
|
||||
ABSL_LOG(FATAL) << "ExhaustiveTester escape: too long";
|
||||
if(sp[i] == '\\' || sp[i] == '\"') {
|
||||
*p++ = '\\';
|
||||
*p++ = sp[i];
|
||||
@ -82,7 +91,7 @@ void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
|
||||
std::string regexp = const_regexp;
|
||||
if (!topwrapper_.empty()) {
|
||||
auto fmt = absl::ParsedFormat<'s'>::New(topwrapper_);
|
||||
CHECK(fmt != nullptr);
|
||||
ABSL_CHECK(fmt != nullptr);
|
||||
regexp = absl::StrFormat(*fmt, regexp);
|
||||
}
|
||||
|
||||
@ -95,7 +104,7 @@ void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
|
||||
// Write out test cases and answers for use in testing
|
||||
// other implementations, such as Go's regexp package.
|
||||
if (randomstrings_)
|
||||
LOG(ERROR) << "Cannot log with random strings.";
|
||||
ABSL_LOG(ERROR) << "Cannot log with random strings.";
|
||||
if (regexps_ == 1) { // first
|
||||
absl::PrintF("strings\n");
|
||||
strgen_.Reset();
|
||||
|
||||
@ -6,6 +6,7 @@
|
||||
#define RE2_TESTING_EXHAUSTIVE_TESTER_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
||||
@ -2,17 +2,18 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "re2/filtered_re2.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/filtered_re2.h"
|
||||
#include "re2/re2.h"
|
||||
|
||||
namespace re2 {
|
||||
@ -32,14 +33,14 @@ TEST(FilteredRE2Test, EmptyTest) {
|
||||
FilterTestVars v;
|
||||
|
||||
v.f.Compile(&v.atoms);
|
||||
EXPECT_EQ(0, v.atoms.size());
|
||||
EXPECT_EQ(size_t{0}, v.atoms.size());
|
||||
|
||||
// Compile has no effect at all when called before Add: it will not
|
||||
// record that it has been called and it will not clear the vector.
|
||||
// The second point does not matter here, but the first point means
|
||||
// that an error will be logged during the call to AllMatches.
|
||||
v.f.AllMatches("foo", v.atom_indices, &v.matches);
|
||||
EXPECT_EQ(0, v.matches.size());
|
||||
EXPECT_EQ(size_t{0}, v.matches.size());
|
||||
}
|
||||
|
||||
TEST(FilteredRE2Test, SmallOrTest) {
|
||||
@ -48,10 +49,10 @@ TEST(FilteredRE2Test, SmallOrTest) {
|
||||
v.f.Add("(foo|bar)", v.opts, &id);
|
||||
|
||||
v.f.Compile(&v.atoms);
|
||||
EXPECT_EQ(0, v.atoms.size());
|
||||
EXPECT_EQ(size_t{0}, v.atoms.size());
|
||||
|
||||
v.f.AllMatches("lemurs bar", v.atom_indices, &v.matches);
|
||||
EXPECT_EQ(1, v.matches.size());
|
||||
EXPECT_EQ(size_t{1}, v.matches.size());
|
||||
EXPECT_EQ(id, v.matches[0]);
|
||||
}
|
||||
|
||||
@ -62,12 +63,12 @@ TEST(FilteredRE2Test, SmallLatinTest) {
|
||||
v.opts.set_encoding(RE2::Options::EncodingLatin1);
|
||||
v.f.Add("\xde\xadQ\xbe\xef", v.opts, &id);
|
||||
v.f.Compile(&v.atoms);
|
||||
EXPECT_EQ(1, v.atoms.size());
|
||||
EXPECT_EQ(size_t{1}, v.atoms.size());
|
||||
EXPECT_EQ(v.atoms[0], "\xde\xadq\xbe\xef");
|
||||
|
||||
v.atom_indices.push_back(0);
|
||||
v.f.AllMatches("foo\xde\xadQ\xbe\xeflemur", v.atom_indices, &v.matches);
|
||||
EXPECT_EQ(1, v.matches.size());
|
||||
EXPECT_EQ(size_t{1}, v.matches.size());
|
||||
EXPECT_EQ(id, v.matches[0]);
|
||||
}
|
||||
|
||||
@ -172,13 +173,13 @@ bool CheckExpectedAtoms(const char* atoms[],
|
||||
pass = pass && expected[i] == v->atoms[i];
|
||||
|
||||
if (!pass) {
|
||||
LOG(ERROR) << "Failed " << testname;
|
||||
LOG(ERROR) << "Expected #atoms = " << expected.size();
|
||||
ABSL_LOG(ERROR) << "Failed " << testname;
|
||||
ABSL_LOG(ERROR) << "Expected #atoms = " << expected.size();
|
||||
for (size_t i = 0; i < expected.size(); i++)
|
||||
LOG(ERROR) << expected[i];
|
||||
LOG(ERROR) << "Found #atoms = " << v->atoms.size();
|
||||
ABSL_LOG(ERROR) << expected[i];
|
||||
ABSL_LOG(ERROR) << "Found #atoms = " << v->atoms.size();
|
||||
for (size_t i = 0; i < v->atoms.size(); i++)
|
||||
LOG(ERROR) << v->atoms[i];
|
||||
ABSL_LOG(ERROR) << v->atoms[i];
|
||||
}
|
||||
|
||||
return pass;
|
||||
@ -255,7 +256,7 @@ TEST(FilteredRE2Test, MatchTests) {
|
||||
FindAtomIndices(v.atoms, atoms, &atom_ids);
|
||||
std::vector<int> matching_regexps;
|
||||
v.f.AllMatches(text, atom_ids, &matching_regexps);
|
||||
EXPECT_EQ(1, matching_regexps.size());
|
||||
EXPECT_EQ(size_t{1}, matching_regexps.size());
|
||||
|
||||
text = "abc12312yyyzzz";
|
||||
atoms.clear();
|
||||
@ -264,7 +265,7 @@ TEST(FilteredRE2Test, MatchTests) {
|
||||
atoms.push_back("yyyzzz");
|
||||
FindAtomIndices(v.atoms, atoms, &atom_ids);
|
||||
v.f.AllMatches(text, atom_ids, &matching_regexps);
|
||||
EXPECT_EQ(1, matching_regexps.size());
|
||||
EXPECT_EQ(size_t{1}, matching_regexps.size());
|
||||
|
||||
text = "abcd12yyy32yyyzzz";
|
||||
atoms.clear();
|
||||
@ -273,11 +274,11 @@ TEST(FilteredRE2Test, MatchTests) {
|
||||
atoms.push_back("yyy");
|
||||
atoms.push_back("yyyzzz");
|
||||
FindAtomIndices(v.atoms, atoms, &atom_ids);
|
||||
LOG(INFO) << "S: " << atom_ids.size();
|
||||
ABSL_LOG(INFO) << "S: " << atom_ids.size();
|
||||
for (size_t i = 0; i < atom_ids.size(); i++)
|
||||
LOG(INFO) << "i: " << i << " : " << atom_ids[i];
|
||||
ABSL_LOG(INFO) << "i: " << i << " : " << atom_ids[i];
|
||||
v.f.AllMatches(text, atom_ids, &matching_regexps);
|
||||
EXPECT_EQ(2, matching_regexps.size());
|
||||
EXPECT_EQ(size_t{2}, matching_regexps.size());
|
||||
}
|
||||
|
||||
TEST(FilteredRE2Test, EmptyStringInStringSetBug) {
|
||||
@ -300,43 +301,43 @@ TEST(FilteredRE2Test, MoveSemantics) {
|
||||
v1.f.Add("foo\\d+", v1.opts, &id);
|
||||
EXPECT_EQ(0, id);
|
||||
v1.f.Compile(&v1.atoms);
|
||||
EXPECT_EQ(1, v1.atoms.size());
|
||||
EXPECT_EQ(size_t{1}, v1.atoms.size());
|
||||
EXPECT_EQ("foo", v1.atoms[0]);
|
||||
v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
|
||||
EXPECT_EQ(1, v1.matches.size());
|
||||
EXPECT_EQ(size_t{1}, v1.matches.size());
|
||||
EXPECT_EQ(0, v1.matches[0]);
|
||||
v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
|
||||
EXPECT_EQ(0, v1.matches.size());
|
||||
EXPECT_EQ(size_t{0}, v1.matches.size());
|
||||
|
||||
// The moved-to object should do what the moved-from object did.
|
||||
FilterTestVars v2;
|
||||
v2.f = std::move(v1.f);
|
||||
v2.f.AllMatches("abc foo1 xyz", {0}, &v2.matches);
|
||||
EXPECT_EQ(1, v2.matches.size());
|
||||
EXPECT_EQ(size_t{1}, v2.matches.size());
|
||||
EXPECT_EQ(0, v2.matches[0]);
|
||||
v2.f.AllMatches("abc bar2 xyz", {0}, &v2.matches);
|
||||
EXPECT_EQ(0, v2.matches.size());
|
||||
EXPECT_EQ(size_t{0}, v2.matches.size());
|
||||
|
||||
// The moved-from object should have been reset and be reusable.
|
||||
v1.f.Add("bar\\d+", v1.opts, &id);
|
||||
EXPECT_EQ(0, id);
|
||||
v1.f.Compile(&v1.atoms);
|
||||
EXPECT_EQ(1, v1.atoms.size());
|
||||
EXPECT_EQ(size_t{1}, v1.atoms.size());
|
||||
EXPECT_EQ("bar", v1.atoms[0]);
|
||||
v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
|
||||
EXPECT_EQ(0, v1.matches.size());
|
||||
EXPECT_EQ(size_t{0}, v1.matches.size());
|
||||
v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
|
||||
EXPECT_EQ(1, v1.matches.size());
|
||||
EXPECT_EQ(size_t{1}, v1.matches.size());
|
||||
EXPECT_EQ(0, v1.matches[0]);
|
||||
|
||||
// Verify that "overwriting" works and also doesn't leak memory.
|
||||
// (The latter will need a leak detector such as LeakSanitizer.)
|
||||
v1.f = std::move(v2.f);
|
||||
v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
|
||||
EXPECT_EQ(1, v1.matches.size());
|
||||
EXPECT_EQ(size_t{1}, v1.matches.size());
|
||||
EXPECT_EQ(0, v1.matches[0]);
|
||||
v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
|
||||
EXPECT_EQ(0, v1.matches.size());
|
||||
EXPECT_EQ(size_t{0}, v1.matches.size());
|
||||
}
|
||||
|
||||
} // namespace re2
|
||||
|
||||
@ -2,9 +2,10 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/regexp.h"
|
||||
|
||||
|
||||
@ -2,8 +2,7 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/logging.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "re2/walker-inl.h"
|
||||
|
||||
@ -21,7 +20,7 @@ class NullWalker : public Regexp::Walker<bool> {
|
||||
virtual bool ShortVisit(Regexp* re, bool a) {
|
||||
// Should never be called: we use Walk(), not WalkExponential().
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
LOG(DFATAL) << "NullWalker::ShortVisit called";
|
||||
ABSL_LOG(DFATAL) << "NullWalker::ShortVisit called";
|
||||
#endif
|
||||
return a;
|
||||
}
|
||||
|
||||
@ -4,11 +4,13 @@
|
||||
|
||||
// Test parse.cc, dump.cc, and tostring.cc.
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/regexp.h"
|
||||
|
||||
namespace re2 {
|
||||
@ -225,6 +227,29 @@ static Test tests[] = {
|
||||
// Bug in Regexp::ToString() that emitted [^], which
|
||||
// would (obviously) fail to parse when fed back in.
|
||||
{ "[\\s\\S]", "cc{0-0x10ffff}" },
|
||||
|
||||
// As per https://github.com/google/re2/issues/477,
|
||||
// there were long-standing bugs involving Latin-1.
|
||||
// Here, we exercise it WITHOUT case folding...
|
||||
{ "\xa5\x64\xd1", "str{\xa5""d\xd1}", Regexp::Latin1 },
|
||||
{ "\xa5\xd1\x64", "str{\xa5\xd1""d}", Regexp::Latin1 },
|
||||
{ "\xa5\x64[\xd1\xd2]", "cat{str{\xa5""d}cc{0xd1-0xd2}}", Regexp::Latin1 },
|
||||
{ "\xa5[\xd1\xd2]\x64", "cat{lit{\xa5}cc{0xd1-0xd2}lit{d}}", Regexp::Latin1 },
|
||||
{ "\xa5\x64|\xa5\xd1", "cat{lit{\xa5}cc{0x64 0xd1}}", Regexp::Latin1 },
|
||||
{ "\xa5\xd1|\xa5\x64", "cat{lit{\xa5}cc{0x64 0xd1}}", Regexp::Latin1 },
|
||||
{ "\xa5\x64|\xa5[\xd1\xd2]", "cat{lit{\xa5}cc{0x64 0xd1-0xd2}}", Regexp::Latin1 },
|
||||
{ "\xa5[\xd1\xd2]|\xa5\x64", "cat{lit{\xa5}cc{0x64 0xd1-0xd2}}", Regexp::Latin1 },
|
||||
// Here, we exercise it WITH case folding...
|
||||
// 0x64 should fold to 0x44, but neither 0xD1 nor 0xD2
|
||||
// should fold to 0xF1 and 0xF2, respectively.
|
||||
{ "\xa5\x64\xd1", "strfold{\xa5""d\xd1}", Regexp::Latin1 | Regexp::FoldCase },
|
||||
{ "\xa5\xd1\x64", "strfold{\xa5\xd1""d}", Regexp::Latin1 | Regexp::FoldCase },
|
||||
{ "\xa5\x64[\xd1\xd2]", "cat{strfold{\xa5""d}cc{0xd1-0xd2}}", Regexp::Latin1 | Regexp::FoldCase },
|
||||
{ "\xa5[\xd1\xd2]\x64", "cat{lit{\xa5}cc{0xd1-0xd2}litfold{d}}", Regexp::Latin1 | Regexp::FoldCase },
|
||||
{ "\xa5\x64|\xa5\xd1", "cat{lit{\xa5}cc{0x44 0x64 0xd1}}", Regexp::Latin1 | Regexp::FoldCase },
|
||||
{ "\xa5\xd1|\xa5\x64", "cat{lit{\xa5}cc{0x44 0x64 0xd1}}", Regexp::Latin1 | Regexp::FoldCase },
|
||||
{ "\xa5\x64|\xa5[\xd1\xd2]", "cat{lit{\xa5}cc{0x44 0x64 0xd1-0xd2}}", Regexp::Latin1 | Regexp::FoldCase },
|
||||
{ "\xa5[\xd1\xd2]|\xa5\x64", "cat{lit{\xa5}cc{0x44 0x64 0xd1-0xd2}}", Regexp::Latin1 | Regexp::FoldCase },
|
||||
};
|
||||
|
||||
bool RegexpEqualTestingOnly(Regexp* a, Regexp* b) {
|
||||
@ -356,6 +381,13 @@ Test prefix_tests[] = {
|
||||
"cat{lit{a}alt{emp{}cat{str{ardvark}alt{emp{}lit{s}}}"
|
||||
"cat{str{ba}alt{cat{lit{c}alt{cc{0x69 0x6b}cat{str{us}alt{emp{}str{es}}}}}"
|
||||
"str{ft}cat{str{lone}alt{emp{}lit{s}}}}}}}" },
|
||||
// As per https://github.com/google/re2/issues/467,
|
||||
// these should factor identically, but they didn't
|
||||
// because AddFoldedRange() terminated prematurely.
|
||||
{ "0A|0[aA]", "cat{lit{0}cc{0x41 0x61}}" },
|
||||
{ "0a|0[aA]", "cat{lit{0}cc{0x41 0x61}}" },
|
||||
{ "0[aA]|0A", "cat{lit{0}cc{0x41 0x61}}" },
|
||||
{ "0[aA]|0a", "cat{lit{0}cc{0x41 0x61}}" },
|
||||
};
|
||||
|
||||
// Test that prefix factoring works.
|
||||
@ -485,12 +517,12 @@ TEST(TestToString, EquivalentParse) {
|
||||
// << " t=" << t << " regexp=" << tests[i].regexp;
|
||||
|
||||
// Test that if we parse the new regexp we get the same structure.
|
||||
Regexp* nre = Regexp::Parse(t, Regexp::MatchNL | Regexp::PerlX, &status);
|
||||
Regexp* nre = Regexp::Parse(t, f, &status);
|
||||
ASSERT_TRUE(nre != NULL) << " reparse " << t << " " << status.Text();
|
||||
std::string ss = nre->Dump();
|
||||
std::string tt = nre->ToString();
|
||||
if (s != ss || t != tt)
|
||||
LOG(INFO) << "ToString(" << tests[i].regexp << ") = " << t;
|
||||
ABSL_LOG(INFO) << "ToString(" << tests[i].regexp << ") = " << t;
|
||||
EXPECT_EQ(s, ss);
|
||||
EXPECT_EQ(t, tt);
|
||||
nre->Decref();
|
||||
@ -525,4 +557,30 @@ TEST(NamedCaptures, ErrorArgs) {
|
||||
EXPECT_EQ(status.error_arg(), "(?<space bar>");
|
||||
}
|
||||
|
||||
// Test that look-around error args are correct.
|
||||
TEST(LookAround, ErrorArgs) {
|
||||
RegexpStatus status;
|
||||
Regexp* re;
|
||||
|
||||
re = Regexp::Parse("(?=foo).*", Regexp::LikePerl, &status);
|
||||
EXPECT_TRUE(re == NULL);
|
||||
EXPECT_EQ(status.code(), kRegexpBadPerlOp);
|
||||
EXPECT_EQ(status.error_arg(), "(?=");
|
||||
|
||||
re = Regexp::Parse("(?!foo).*", Regexp::LikePerl, &status);
|
||||
EXPECT_TRUE(re == NULL);
|
||||
EXPECT_EQ(status.code(), kRegexpBadPerlOp);
|
||||
EXPECT_EQ(status.error_arg(), "(?!");
|
||||
|
||||
re = Regexp::Parse("(?<=foo).*", Regexp::LikePerl, &status);
|
||||
EXPECT_TRUE(re == NULL);
|
||||
EXPECT_EQ(status.code(), kRegexpBadPerlOp);
|
||||
EXPECT_EQ(status.error_arg(), "(?<=");
|
||||
|
||||
re = Regexp::Parse("(?<!foo).*", Regexp::LikePerl, &status);
|
||||
EXPECT_TRUE(re == NULL);
|
||||
EXPECT_EQ(status.code(), kRegexpBadPerlOp);
|
||||
EXPECT_EQ(status.error_arg(), "(?<!");
|
||||
}
|
||||
|
||||
} // namespace re2
|
||||
|
||||
@ -3,13 +3,15 @@
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/escaping.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/regexp.h"
|
||||
@ -113,7 +115,7 @@ TEST(PossibleMatchRange, HandWritten) {
|
||||
const PrefixTest& t = tests[i];
|
||||
std::string min, max;
|
||||
if (j == 0) {
|
||||
LOG(INFO) << "Checking regexp=" << absl::CEscape(t.regexp);
|
||||
ABSL_LOG(INFO) << "Checking regexp=" << absl::CEscape(t.regexp);
|
||||
Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
|
||||
ASSERT_TRUE(re != NULL);
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
@ -202,7 +204,7 @@ class PossibleMatchTester : public RegexpGenerator {
|
||||
void PossibleMatchTester::HandleRegexp(const std::string& regexp) {
|
||||
regexps_++;
|
||||
|
||||
VLOG(3) << absl::CEscape(regexp);
|
||||
ABSL_VLOG(3) << absl::CEscape(regexp);
|
||||
|
||||
RE2 re(regexp, RE2::Latin1);
|
||||
ASSERT_EQ(re.error(), "");
|
||||
@ -214,7 +216,8 @@ void PossibleMatchTester::HandleRegexp(const std::string& regexp) {
|
||||
// complicated expressions.
|
||||
if(strstr(regexp.c_str(), "\\C*"))
|
||||
return;
|
||||
LOG(QFATAL) << "PossibleMatchRange failed on: " << absl::CEscape(regexp);
|
||||
ABSL_LOG(QFATAL) << "PossibleMatchRange failed on: "
|
||||
<< absl::CEscape(regexp);
|
||||
}
|
||||
|
||||
strgen_.Reset();
|
||||
@ -241,8 +244,8 @@ TEST(PossibleMatchRange, Exhaustive) {
|
||||
RegexpGenerator::EgrepOps(),
|
||||
stringlen, Explode("ab4"));
|
||||
t.Generate();
|
||||
LOG(INFO) << t.regexps() << " regexps, "
|
||||
<< t.tests() << " tests";
|
||||
ABSL_LOG(INFO) << t.regexps() << " regexps, "
|
||||
<< t.tests() << " tests";
|
||||
}
|
||||
|
||||
} // namespace re2
|
||||
|
||||
@ -4,7 +4,6 @@
|
||||
|
||||
// Random testing of regular expression matching.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@ -12,6 +11,7 @@
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "re2/testing/exhaustive_tester.h"
|
||||
#include "re2/testing/regexp_generator.h"
|
||||
|
||||
ABSL_FLAG(int, regexpseed, 404, "Random regexp seed.");
|
||||
ABSL_FLAG(int, regexpcount, 100, "How many random regexps to generate.");
|
||||
|
||||
@ -9,10 +9,11 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <optional>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/re2.h"
|
||||
|
||||
namespace re2 {
|
||||
@ -135,10 +136,9 @@ TEST(RE2ArgTest, Uint64Test) {
|
||||
}
|
||||
|
||||
TEST(RE2ArgTest, ParseFromTest) {
|
||||
#if !defined(_MSC_VER)
|
||||
struct {
|
||||
bool ParseFrom(const char* str, size_t n) {
|
||||
LOG(INFO) << "str = " << str << ", n = " << n;
|
||||
ABSL_LOG(INFO) << "str = " << str << ", n = " << n;
|
||||
return true;
|
||||
}
|
||||
} obj1;
|
||||
@ -147,7 +147,7 @@ TEST(RE2ArgTest, ParseFromTest) {
|
||||
|
||||
struct {
|
||||
bool ParseFrom(const char* str, size_t n) {
|
||||
LOG(INFO) << "str = " << str << ", n = " << n;
|
||||
ABSL_LOG(INFO) << "str = " << str << ", n = " << n;
|
||||
return false;
|
||||
}
|
||||
// Ensure that RE2::Arg works even with overloaded ParseFrom().
|
||||
@ -155,11 +155,10 @@ TEST(RE2ArgTest, ParseFromTest) {
|
||||
} obj2;
|
||||
RE2::Arg arg2(&obj2);
|
||||
EXPECT_FALSE(arg2.Parse("two", 3));
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(RE2ArgTest, OptionalDoubleTest) {
|
||||
absl::optional<double> opt;
|
||||
std::optional<double> opt;
|
||||
RE2::Arg arg(&opt);
|
||||
EXPECT_TRUE(arg.Parse(NULL, 0));
|
||||
EXPECT_FALSE(opt.has_value());
|
||||
@ -170,7 +169,7 @@ TEST(RE2ArgTest, OptionalDoubleTest) {
|
||||
}
|
||||
|
||||
TEST(RE2ArgTest, OptionalIntWithCRadixTest) {
|
||||
absl::optional<int> opt;
|
||||
std::optional<int> opt;
|
||||
RE2::Arg arg = RE2::CRadix(&opt);
|
||||
EXPECT_TRUE(arg.Parse(NULL, 0));
|
||||
EXPECT_FALSE(opt.has_value());
|
||||
|
||||
74
src/third_party/re2/dist/re2/testing/re2_test.cc
vendored
74
src/third_party/re2/dist/re2/testing/re2_test.cc
vendored
@ -5,26 +5,30 @@
|
||||
|
||||
// TODO: Test extractions for PartialMatch/Consume
|
||||
|
||||
#include "re2/re2.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h> /* for sysconf */
|
||||
#endif
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/regexp.h"
|
||||
|
||||
#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
namespace re2 {
|
||||
|
||||
TEST(RE2, HexTests) {
|
||||
@ -554,14 +558,14 @@ TEST(Capture, NamedGroups) {
|
||||
RE2 re("(hello world)");
|
||||
ASSERT_EQ(re.NumberOfCapturingGroups(), 1);
|
||||
const std::map<std::string, int>& m = re.NamedCapturingGroups();
|
||||
ASSERT_EQ(m.size(), 0);
|
||||
ASSERT_EQ(m.size(), size_t{0});
|
||||
}
|
||||
|
||||
{
|
||||
RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
|
||||
ASSERT_EQ(re.NumberOfCapturingGroups(), 6);
|
||||
const std::map<std::string, int>& m = re.NamedCapturingGroups();
|
||||
ASSERT_EQ(m.size(), 4);
|
||||
ASSERT_EQ(m.size(), size_t{4});
|
||||
ASSERT_EQ(m.find("A")->second, 1);
|
||||
ASSERT_EQ(m.find("B")->second, 2);
|
||||
ASSERT_EQ(m.find("C")->second, 3);
|
||||
@ -683,7 +687,7 @@ TEST(RE2, FullMatchStringViewArg) {
|
||||
absl::string_view sp;
|
||||
// string_view-arg
|
||||
ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i));
|
||||
ASSERT_EQ(sp.size(), 4);
|
||||
ASSERT_EQ(sp.size(), size_t{4});
|
||||
ASSERT_TRUE(memcmp(sp.data(), "ruby", 4) == 0);
|
||||
ASSERT_EQ(i, 1234);
|
||||
}
|
||||
@ -773,7 +777,7 @@ TEST(RE2, NULTerminated) {
|
||||
v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
|
||||
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
|
||||
ASSERT_TRUE(v != reinterpret_cast<char*>(-1));
|
||||
LOG(INFO) << "Memory at " << (void*)v;
|
||||
ABSL_LOG(INFO) << "Memory at " << reinterpret_cast<void*>(v);
|
||||
ASSERT_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno;
|
||||
v[pagesize - 1] = '1';
|
||||
|
||||
@ -792,6 +796,11 @@ TEST(RE2, FullMatchTypeTests) {
|
||||
ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
|
||||
ASSERT_EQ(c, 'H');
|
||||
}
|
||||
{
|
||||
signed char c;
|
||||
ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
|
||||
ASSERT_EQ(c, static_cast<signed char>('H'));
|
||||
}
|
||||
{
|
||||
unsigned char c;
|
||||
ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
|
||||
@ -837,7 +846,7 @@ TEST(RE2, FullMatchTypeTests) {
|
||||
{
|
||||
uint32_t v;
|
||||
static const uint32_t max = UINT32_C(0xffffffff);
|
||||
ASSERT_TRUE(RE2::FullMatch("100", "(\\d+)", &v)); ASSERT_EQ(v, 100);
|
||||
ASSERT_TRUE(RE2::FullMatch("100", "(\\d+)", &v)); ASSERT_EQ(v, uint32_t{100});
|
||||
ASSERT_TRUE(RE2::FullMatch("4294967295", "(\\d+)", &v)); ASSERT_EQ(v, max);
|
||||
ASSERT_FALSE(RE2::FullMatch("4294967296", "(\\d+)", &v));
|
||||
ASSERT_FALSE(RE2::FullMatch("-1", "(\\d+)", &v));
|
||||
@ -875,7 +884,7 @@ TEST(RE2, FullMatchTypeTests) {
|
||||
static const uint64_t max = UINT64_C(0xffffffffffffffff);
|
||||
std::string str;
|
||||
|
||||
ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
|
||||
ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, uint64_t{100});
|
||||
ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v2)); ASSERT_EQ(v2, -100);
|
||||
|
||||
str = std::to_string(max);
|
||||
@ -893,11 +902,11 @@ TEST(RE2, FloatingPointFullMatchTypes) {
|
||||
float v;
|
||||
ASSERT_TRUE(RE2::FullMatch("100", "(.*)", &v)); ASSERT_EQ(v, 100);
|
||||
ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
|
||||
ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, float(1e23));
|
||||
ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, float{1e23});
|
||||
ASSERT_TRUE(RE2::FullMatch(" 100", "(.*)", &v)); ASSERT_EQ(v, 100);
|
||||
|
||||
ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
|
||||
ASSERT_EQ(v, float(1e23));
|
||||
ASSERT_EQ(v, float{1e23});
|
||||
|
||||
// 6700000000081920.1 is an edge case.
|
||||
// 6700000000081920 is exactly halfway between
|
||||
@ -926,9 +935,11 @@ TEST(RE2, FloatingPointFullMatchTypes) {
|
||||
double v;
|
||||
ASSERT_TRUE(RE2::FullMatch("100", "(.*)", &v)); ASSERT_EQ(v, 100);
|
||||
ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
|
||||
ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, 1e23);
|
||||
ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, double{1e23});
|
||||
ASSERT_TRUE(RE2::FullMatch(" 100", "(.*)", &v)); ASSERT_EQ(v, 100);
|
||||
|
||||
ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
|
||||
ASSERT_EQ(v, double(1e23));
|
||||
ASSERT_EQ(v, double{1e23});
|
||||
|
||||
ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
|
||||
ASSERT_EQ(v, 0.1) << absl::StrFormat("%.17g != %.17g", v, 0.1);
|
||||
@ -1562,7 +1573,7 @@ TEST(RE2, Bug18391750) {
|
||||
|
||||
TEST(RE2, Bug18458852) {
|
||||
// Bug in parser accepting invalid (too large) rune,
|
||||
// causing compiler to fail in DCHECK in UTF-8
|
||||
// causing compiler to fail in ABSL_DCHECK() in UTF-8
|
||||
// character class code.
|
||||
const char b[] = {
|
||||
(char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28,
|
||||
@ -1598,7 +1609,7 @@ TEST(RE2, Bug18523943) {
|
||||
|
||||
TEST(RE2, Bug21371806) {
|
||||
// Bug in parser accepting Unicode groups in Latin-1 mode,
|
||||
// causing compiler to fail in DCHECK in prog.cc.
|
||||
// causing compiler to fail in ABSL_DCHECK() in prog.cc.
|
||||
|
||||
RE2::Options opt;
|
||||
opt.set_encoding(RE2::Options::EncodingLatin1);
|
||||
@ -1658,4 +1669,31 @@ TEST(RE2, Issue310) {
|
||||
ASSERT_EQ(m, "") << " got m='" << m << "', want ''";
|
||||
}
|
||||
|
||||
TEST(RE2, Issue477) {
|
||||
// Regexp::LeadingString didn't output Latin1 into flags.
|
||||
// In the given pattern, 0xA5 should be factored out, but
|
||||
// shouldn't lose its Latin1-ness in the process. Because
|
||||
// that was happening, the prefix for accel was 0xC2 0xA5
|
||||
// instead of 0xA5. Note that the former doesn't occur in
|
||||
// the given input and so replacements weren't occurring.
|
||||
|
||||
const char bytes[] = {
|
||||
(char)0xa5, (char)0xd1, (char)0xa5, (char)0xd1,
|
||||
(char)0x61, (char)0x63, (char)0xa5, (char)0x64,
|
||||
};
|
||||
std::string s(bytes, ABSL_ARRAYSIZE(bytes));
|
||||
RE2 re("\xa5\xd1|\xa5\x64", RE2::Latin1);
|
||||
int n = RE2::GlobalReplace(&s, re, "");
|
||||
ASSERT_EQ(n, 3);
|
||||
ASSERT_EQ(s, "\x61\x63");
|
||||
}
|
||||
|
||||
TEST(RE2, InitNULL) {
|
||||
// RE2::RE2 accepts NULL. Make sure it keeps doing that.
|
||||
RE2 re(NULL);
|
||||
ASSERT_TRUE(re.ok());
|
||||
ASSERT_TRUE(RE2::FullMatch("", re));
|
||||
ASSERT_TRUE(RE2::FullMatch("", NULL));
|
||||
}
|
||||
|
||||
} // namespace re2
|
||||
|
||||
@ -7,20 +7,22 @@
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
|
||||
#include "absl/container/flat_hash_map.h"
|
||||
#include "absl/flags/flag.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "absl/synchronization/mutex.h"
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/malloc_counter.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "util/malloc_counter.h"
|
||||
#include "util/pcre.h"
|
||||
|
||||
namespace re2 {
|
||||
@ -34,21 +36,22 @@ namespace re2 {
|
||||
|
||||
void Test() {
|
||||
Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
CHECK(prog);
|
||||
CHECK(prog->IsOnePass());
|
||||
CHECK(prog->CanBitState());
|
||||
ABSL_CHECK(prog);
|
||||
ABSL_CHECK(prog->IsOnePass());
|
||||
ABSL_CHECK(prog->CanBitState());
|
||||
const char* text = "650-253-0001";
|
||||
absl::string_view sp[4];
|
||||
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
|
||||
CHECK_EQ(sp[0], "650-253-0001");
|
||||
CHECK_EQ(sp[1], "650");
|
||||
CHECK_EQ(sp[2], "253");
|
||||
CHECK_EQ(sp[3], "0001");
|
||||
ABSL_CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch,
|
||||
sp, 4));
|
||||
ABSL_CHECK_EQ(sp[0], "650-253-0001");
|
||||
ABSL_CHECK_EQ(sp[1], "650");
|
||||
ABSL_CHECK_EQ(sp[2], "253");
|
||||
ABSL_CHECK_EQ(sp[3], "0001");
|
||||
delete prog;
|
||||
re->Decref();
|
||||
LOG(INFO) << "test passed\n";
|
||||
ABSL_LOG(INFO) << "test passed\n";
|
||||
}
|
||||
|
||||
void MemoryUsage() {
|
||||
@ -57,23 +60,25 @@ void MemoryUsage() {
|
||||
{
|
||||
MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
// Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly,
|
||||
// because LOG(INFO) might do a big allocation before they get evaluated.
|
||||
ABSL_CHECK(re);
|
||||
// Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to ABSL_LOG(INFO)
|
||||
// directly because ABSL_LOG(INFO) might do a big allocation before they
|
||||
// get evaluated.
|
||||
absl::FPrintF(stderr, "Regexp: %7d bytes (peak=%d)\n",
|
||||
mc.HeapGrowth(), mc.PeakHeapGrowth());
|
||||
mc.Reset();
|
||||
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
CHECK(prog);
|
||||
CHECK(prog->IsOnePass());
|
||||
CHECK(prog->CanBitState());
|
||||
ABSL_CHECK(prog);
|
||||
ABSL_CHECK(prog->IsOnePass());
|
||||
ABSL_CHECK(prog->CanBitState());
|
||||
absl::FPrintF(stderr, "Prog: %7d bytes (peak=%d)\n",
|
||||
mc.HeapGrowth(), mc.PeakHeapGrowth());
|
||||
mc.Reset();
|
||||
|
||||
absl::string_view sp[4];
|
||||
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
|
||||
ABSL_CHECK(prog->SearchOnePass(text, text, Prog::kAnchored,
|
||||
Prog::kFullMatch, sp, 4));
|
||||
absl::FPrintF(stderr, "Search: %7d bytes (peak=%d)\n",
|
||||
mc.HeapGrowth(), mc.PeakHeapGrowth());
|
||||
delete prog;
|
||||
@ -168,7 +173,7 @@ std::string RandomText(int64_t nbytes) {
|
||||
}
|
||||
return text;
|
||||
}();
|
||||
CHECK_LE(nbytes, 16<<20);
|
||||
ABSL_CHECK_LE(nbytes, 16<<20);
|
||||
return text->substr(0, nbytes);
|
||||
}
|
||||
|
||||
@ -319,8 +324,8 @@ void FindAndConsume(benchmark::State& state) {
|
||||
for (auto _ : state) {
|
||||
absl::string_view t = s;
|
||||
absl::string_view u;
|
||||
CHECK(RE2::FindAndConsume(&t, re, &u));
|
||||
CHECK_EQ(u, "Hello World");
|
||||
ABSL_CHECK(RE2::FindAndConsume(&t, re, &u));
|
||||
ABSL_CHECK_EQ(u, "Hello World");
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * state.range(0));
|
||||
}
|
||||
@ -660,7 +665,7 @@ BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs());
|
||||
void ParseRegexp(benchmark::State& state, const std::string& regexp) {
|
||||
for (auto _ : state) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
re->Decref();
|
||||
}
|
||||
}
|
||||
@ -668,9 +673,9 @@ void ParseRegexp(benchmark::State& state, const std::string& regexp) {
|
||||
void SimplifyRegexp(benchmark::State& state, const std::string& regexp) {
|
||||
for (auto _ : state) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
Regexp* sre = re->Simplify();
|
||||
CHECK(sre);
|
||||
ABSL_CHECK(sre);
|
||||
sre->Decref();
|
||||
re->Decref();
|
||||
}
|
||||
@ -678,7 +683,7 @@ void SimplifyRegexp(benchmark::State& state, const std::string& regexp) {
|
||||
|
||||
void NullWalkRegexp(benchmark::State& state, const std::string& regexp) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
for (auto _ : state) {
|
||||
re->NullWalk();
|
||||
}
|
||||
@ -688,11 +693,11 @@ void NullWalkRegexp(benchmark::State& state, const std::string& regexp) {
|
||||
void SimplifyCompileRegexp(benchmark::State& state, const std::string& regexp) {
|
||||
for (auto _ : state) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
Regexp* sre = re->Simplify();
|
||||
CHECK(sre);
|
||||
ABSL_CHECK(sre);
|
||||
Prog* prog = sre->CompileToProg(0);
|
||||
CHECK(prog);
|
||||
ABSL_CHECK(prog);
|
||||
delete prog;
|
||||
sre->Decref();
|
||||
re->Decref();
|
||||
@ -702,9 +707,9 @@ void SimplifyCompileRegexp(benchmark::State& state, const std::string& regexp) {
|
||||
void CompileRegexp(benchmark::State& state, const std::string& regexp) {
|
||||
for (auto _ : state) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
CHECK(prog);
|
||||
ABSL_CHECK(prog);
|
||||
delete prog;
|
||||
re->Decref();
|
||||
}
|
||||
@ -712,10 +717,10 @@ void CompileRegexp(benchmark::State& state, const std::string& regexp) {
|
||||
|
||||
void CompileToProg(benchmark::State& state, const std::string& regexp) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
for (auto _ : state) {
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
CHECK(prog);
|
||||
ABSL_CHECK(prog);
|
||||
delete prog;
|
||||
}
|
||||
re->Decref();
|
||||
@ -723,9 +728,9 @@ void CompileToProg(benchmark::State& state, const std::string& regexp) {
|
||||
|
||||
void CompileByteMap(benchmark::State& state, const std::string& regexp) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
CHECK(prog);
|
||||
ABSL_CHECK(prog);
|
||||
for (auto _ : state) {
|
||||
prog->ComputeByteMap();
|
||||
}
|
||||
@ -736,14 +741,14 @@ void CompileByteMap(benchmark::State& state, const std::string& regexp) {
|
||||
void CompilePCRE(benchmark::State& state, const std::string& regexp) {
|
||||
for (auto _ : state) {
|
||||
PCRE re(regexp, PCRE::UTF8);
|
||||
CHECK_EQ(re.error(), "");
|
||||
ABSL_CHECK_EQ(re.error(), "");
|
||||
}
|
||||
}
|
||||
|
||||
void CompileRE2(benchmark::State& state, const std::string& regexp) {
|
||||
for (auto _ : state) {
|
||||
RE2 re(regexp);
|
||||
CHECK_EQ(re.error(), "");
|
||||
ABSL_CHECK_EQ(re.error(), "");
|
||||
}
|
||||
}
|
||||
|
||||
@ -862,14 +867,14 @@ void SearchDFA(benchmark::State& state, const char* regexp,
|
||||
bool expect_match) {
|
||||
for (auto _ : state) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
CHECK(prog);
|
||||
ABSL_CHECK(prog);
|
||||
bool failed = false;
|
||||
CHECK_EQ(prog->SearchDFA(text, absl::string_view(), anchor,
|
||||
Prog::kFirstMatch, NULL, &failed, NULL),
|
||||
expect_match);
|
||||
CHECK(!failed);
|
||||
ABSL_CHECK_EQ(prog->SearchDFA(text, absl::string_view(), anchor,
|
||||
Prog::kFirstMatch, NULL, &failed, NULL),
|
||||
expect_match);
|
||||
ABSL_CHECK(!failed);
|
||||
delete prog;
|
||||
re->Decref();
|
||||
}
|
||||
@ -880,12 +885,12 @@ void SearchNFA(benchmark::State& state, const char* regexp,
|
||||
bool expect_match) {
|
||||
for (auto _ : state) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
CHECK(prog);
|
||||
CHECK_EQ(prog->SearchNFA(text, absl::string_view(), anchor,
|
||||
Prog::kFirstMatch, NULL, 0),
|
||||
expect_match);
|
||||
ABSL_CHECK(prog);
|
||||
ABSL_CHECK_EQ(prog->SearchNFA(text, absl::string_view(), anchor,
|
||||
Prog::kFirstMatch, NULL, 0),
|
||||
expect_match);
|
||||
delete prog;
|
||||
re->Decref();
|
||||
}
|
||||
@ -896,12 +901,13 @@ void SearchOnePass(benchmark::State& state, const char* regexp,
|
||||
bool expect_match) {
|
||||
for (auto _ : state) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
CHECK(prog);
|
||||
CHECK(prog->IsOnePass());
|
||||
CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
|
||||
expect_match);
|
||||
ABSL_CHECK(prog);
|
||||
ABSL_CHECK(prog->IsOnePass());
|
||||
ABSL_CHECK_EQ(
|
||||
prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
|
||||
expect_match);
|
||||
delete prog;
|
||||
re->Decref();
|
||||
}
|
||||
@ -912,12 +918,13 @@ void SearchBitState(benchmark::State& state, const char* regexp,
|
||||
bool expect_match) {
|
||||
for (auto _ : state) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
CHECK(prog);
|
||||
CHECK(prog->CanBitState());
|
||||
CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
|
||||
expect_match);
|
||||
ABSL_CHECK(prog);
|
||||
ABSL_CHECK(prog->CanBitState());
|
||||
ABSL_CHECK_EQ(
|
||||
prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
|
||||
expect_match);
|
||||
delete prog;
|
||||
re->Decref();
|
||||
}
|
||||
@ -928,11 +935,12 @@ void SearchPCRE(benchmark::State& state, const char* regexp,
|
||||
bool expect_match) {
|
||||
for (auto _ : state) {
|
||||
PCRE re(regexp, PCRE::UTF8);
|
||||
CHECK_EQ(re.error(), "");
|
||||
if (anchor == Prog::kAnchored)
|
||||
CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
|
||||
else
|
||||
CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
|
||||
ABSL_CHECK_EQ(re.error(), "");
|
||||
if (anchor == Prog::kAnchored) {
|
||||
ABSL_CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
|
||||
} else {
|
||||
ABSL_CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -941,11 +949,12 @@ void SearchRE2(benchmark::State& state, const char* regexp,
|
||||
bool expect_match) {
|
||||
for (auto _ : state) {
|
||||
RE2 re(regexp);
|
||||
CHECK_EQ(re.error(), "");
|
||||
if (anchor == Prog::kAnchored)
|
||||
CHECK_EQ(RE2::FullMatch(text, re), expect_match);
|
||||
else
|
||||
CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
|
||||
ABSL_CHECK_EQ(re.error(), "");
|
||||
if (anchor == Prog::kAnchored) {
|
||||
ABSL_CHECK_EQ(RE2::FullMatch(text, re), expect_match);
|
||||
} else {
|
||||
ABSL_CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -960,9 +969,9 @@ Prog* GetCachedProg(const char* regexp) {
|
||||
Prog* prog = cache[regexp];
|
||||
if (prog == NULL) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
prog = re->CompileToProg(int64_t{1}<<31); // mostly for the DFA
|
||||
CHECK(prog);
|
||||
ABSL_CHECK(prog);
|
||||
cache[regexp] = prog;
|
||||
re->Decref();
|
||||
// We must call this here - while we have exclusive access.
|
||||
@ -978,7 +987,7 @@ PCRE* GetCachedPCRE(const char* regexp) {
|
||||
PCRE* re = cache[regexp];
|
||||
if (re == NULL) {
|
||||
re = new PCRE(regexp, PCRE::UTF8);
|
||||
CHECK_EQ(re->error(), "");
|
||||
ABSL_CHECK_EQ(re->error(), "");
|
||||
cache[regexp] = re;
|
||||
}
|
||||
return re;
|
||||
@ -991,7 +1000,7 @@ RE2* GetCachedRE2(const char* regexp) {
|
||||
RE2* re = cache[regexp];
|
||||
if (re == NULL) {
|
||||
re = new RE2(regexp);
|
||||
CHECK_EQ(re->error(), "");
|
||||
ABSL_CHECK_EQ(re->error(), "");
|
||||
cache[regexp] = re;
|
||||
}
|
||||
return re;
|
||||
@ -1003,10 +1012,10 @@ void SearchCachedDFA(benchmark::State& state, const char* regexp,
|
||||
Prog* prog = GetCachedProg(regexp);
|
||||
for (auto _ : state) {
|
||||
bool failed = false;
|
||||
CHECK_EQ(prog->SearchDFA(text, absl::string_view(), anchor,
|
||||
ABSL_CHECK_EQ(prog->SearchDFA(text, absl::string_view(), anchor,
|
||||
Prog::kFirstMatch, NULL, &failed, NULL),
|
||||
expect_match);
|
||||
CHECK(!failed);
|
||||
ABSL_CHECK(!failed);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1015,7 +1024,7 @@ void SearchCachedNFA(benchmark::State& state, const char* regexp,
|
||||
bool expect_match) {
|
||||
Prog* prog = GetCachedProg(regexp);
|
||||
for (auto _ : state) {
|
||||
CHECK_EQ(prog->SearchNFA(text, absl::string_view(), anchor,
|
||||
ABSL_CHECK_EQ(prog->SearchNFA(text, absl::string_view(), anchor,
|
||||
Prog::kFirstMatch, NULL, 0),
|
||||
expect_match);
|
||||
}
|
||||
@ -1025,10 +1034,11 @@ void SearchCachedOnePass(benchmark::State& state, const char* regexp,
|
||||
absl::string_view text, Prog::Anchor anchor,
|
||||
bool expect_match) {
|
||||
Prog* prog = GetCachedProg(regexp);
|
||||
CHECK(prog->IsOnePass());
|
||||
ABSL_CHECK(prog->IsOnePass());
|
||||
for (auto _ : state) {
|
||||
CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
|
||||
expect_match);
|
||||
ABSL_CHECK_EQ(
|
||||
prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
|
||||
expect_match);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1036,10 +1046,11 @@ void SearchCachedBitState(benchmark::State& state, const char* regexp,
|
||||
absl::string_view text, Prog::Anchor anchor,
|
||||
bool expect_match) {
|
||||
Prog* prog = GetCachedProg(regexp);
|
||||
CHECK(prog->CanBitState());
|
||||
ABSL_CHECK(prog->CanBitState());
|
||||
for (auto _ : state) {
|
||||
CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
|
||||
expect_match);
|
||||
ABSL_CHECK_EQ(
|
||||
prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
|
||||
expect_match);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1048,10 +1059,11 @@ void SearchCachedPCRE(benchmark::State& state, const char* regexp,
|
||||
bool expect_match) {
|
||||
PCRE& re = *GetCachedPCRE(regexp);
|
||||
for (auto _ : state) {
|
||||
if (anchor == Prog::kAnchored)
|
||||
CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
|
||||
else
|
||||
CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
|
||||
if (anchor == Prog::kAnchored) {
|
||||
ABSL_CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
|
||||
} else {
|
||||
ABSL_CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1060,10 +1072,11 @@ void SearchCachedRE2(benchmark::State& state, const char* regexp,
|
||||
bool expect_match) {
|
||||
RE2& re = *GetCachedRE2(regexp);
|
||||
for (auto _ : state) {
|
||||
if (anchor == Prog::kAnchored)
|
||||
CHECK_EQ(RE2::FullMatch(text, re), expect_match);
|
||||
else
|
||||
CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
|
||||
if (anchor == Prog::kAnchored) {
|
||||
ABSL_CHECK_EQ(RE2::FullMatch(text, re), expect_match);
|
||||
} else {
|
||||
ABSL_CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1074,11 +1087,11 @@ void Parse3NFA(benchmark::State& state, const char* regexp,
|
||||
absl::string_view text) {
|
||||
for (auto _ : state) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
CHECK(prog);
|
||||
ABSL_CHECK(prog);
|
||||
absl::string_view sp[4]; // 4 because sp[0] is whole match.
|
||||
CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
|
||||
ABSL_CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
|
||||
Prog::kFullMatch, sp, 4));
|
||||
delete prog;
|
||||
re->Decref();
|
||||
@ -1089,12 +1102,13 @@ void Parse3OnePass(benchmark::State& state, const char* regexp,
|
||||
absl::string_view text) {
|
||||
for (auto _ : state) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
CHECK(prog);
|
||||
CHECK(prog->IsOnePass());
|
||||
ABSL_CHECK(prog);
|
||||
ABSL_CHECK(prog->IsOnePass());
|
||||
absl::string_view sp[4]; // 4 because sp[0] is whole match.
|
||||
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
|
||||
ABSL_CHECK(prog->SearchOnePass(text, text, Prog::kAnchored,
|
||||
Prog::kFullMatch, sp, 4));
|
||||
delete prog;
|
||||
re->Decref();
|
||||
}
|
||||
@ -1104,12 +1118,13 @@ void Parse3BitState(benchmark::State& state, const char* regexp,
|
||||
absl::string_view text) {
|
||||
for (auto _ : state) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
CHECK(prog);
|
||||
CHECK(prog->CanBitState());
|
||||
ABSL_CHECK(prog);
|
||||
ABSL_CHECK(prog->CanBitState());
|
||||
absl::string_view sp[4]; // 4 because sp[0] is whole match.
|
||||
CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
|
||||
ABSL_CHECK(prog->SearchBitState(text, text, Prog::kAnchored,
|
||||
Prog::kFullMatch, sp, 4));
|
||||
delete prog;
|
||||
re->Decref();
|
||||
}
|
||||
@ -1119,11 +1134,12 @@ void Parse3Backtrack(benchmark::State& state, const char* regexp,
|
||||
absl::string_view text) {
|
||||
for (auto _ : state) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
CHECK(prog);
|
||||
ABSL_CHECK(prog);
|
||||
absl::string_view sp[4]; // 4 because sp[0] is whole match.
|
||||
CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
|
||||
ABSL_CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored,
|
||||
Prog::kFullMatch, sp, 4));
|
||||
delete prog;
|
||||
re->Decref();
|
||||
}
|
||||
@ -1133,9 +1149,9 @@ void Parse3PCRE(benchmark::State& state, const char* regexp,
|
||||
absl::string_view text) {
|
||||
for (auto _ : state) {
|
||||
PCRE re(regexp, PCRE::UTF8);
|
||||
CHECK_EQ(re.error(), "");
|
||||
ABSL_CHECK_EQ(re.error(), "");
|
||||
absl::string_view sp1, sp2, sp3;
|
||||
CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
|
||||
ABSL_CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1143,9 +1159,9 @@ void Parse3RE2(benchmark::State& state, const char* regexp,
|
||||
absl::string_view text) {
|
||||
for (auto _ : state) {
|
||||
RE2 re(regexp);
|
||||
CHECK_EQ(re.error(), "");
|
||||
ABSL_CHECK_EQ(re.error(), "");
|
||||
absl::string_view sp1, sp2, sp3;
|
||||
CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
|
||||
ABSL_CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1154,7 +1170,7 @@ void Parse3CachedNFA(benchmark::State& state, const char* regexp,
|
||||
Prog* prog = GetCachedProg(regexp);
|
||||
absl::string_view sp[4]; // 4 because sp[0] is whole match.
|
||||
for (auto _ : state) {
|
||||
CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
|
||||
ABSL_CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
|
||||
Prog::kFullMatch, sp, 4));
|
||||
}
|
||||
}
|
||||
@ -1162,20 +1178,22 @@ void Parse3CachedNFA(benchmark::State& state, const char* regexp,
|
||||
void Parse3CachedOnePass(benchmark::State& state, const char* regexp,
|
||||
absl::string_view text) {
|
||||
Prog* prog = GetCachedProg(regexp);
|
||||
CHECK(prog->IsOnePass());
|
||||
ABSL_CHECK(prog->IsOnePass());
|
||||
absl::string_view sp[4]; // 4 because sp[0] is whole match.
|
||||
for (auto _ : state) {
|
||||
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
|
||||
ABSL_CHECK(prog->SearchOnePass(text, text, Prog::kAnchored,
|
||||
Prog::kFullMatch, sp, 4));
|
||||
}
|
||||
}
|
||||
|
||||
void Parse3CachedBitState(benchmark::State& state, const char* regexp,
|
||||
absl::string_view text) {
|
||||
Prog* prog = GetCachedProg(regexp);
|
||||
CHECK(prog->CanBitState());
|
||||
ABSL_CHECK(prog->CanBitState());
|
||||
absl::string_view sp[4]; // 4 because sp[0] is whole match.
|
||||
for (auto _ : state) {
|
||||
CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
|
||||
ABSL_CHECK(prog->SearchBitState(text, text, Prog::kAnchored,
|
||||
Prog::kFullMatch, sp, 4));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1184,7 +1202,8 @@ void Parse3CachedBacktrack(benchmark::State& state, const char* regexp,
|
||||
Prog* prog = GetCachedProg(regexp);
|
||||
absl::string_view sp[4]; // 4 because sp[0] is whole match.
|
||||
for (auto _ : state) {
|
||||
CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
|
||||
ABSL_CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored,
|
||||
Prog::kFullMatch, sp, 4));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1193,7 +1212,7 @@ void Parse3CachedPCRE(benchmark::State& state, const char* regexp,
|
||||
PCRE& re = *GetCachedPCRE(regexp);
|
||||
absl::string_view sp1, sp2, sp3;
|
||||
for (auto _ : state) {
|
||||
CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
|
||||
ABSL_CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1202,7 +1221,7 @@ void Parse3CachedRE2(benchmark::State& state, const char* regexp,
|
||||
RE2& re = *GetCachedRE2(regexp);
|
||||
absl::string_view sp1, sp2, sp3;
|
||||
for (auto _ : state) {
|
||||
CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
|
||||
ABSL_CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1213,12 +1232,12 @@ void Parse1NFA(benchmark::State& state, const char* regexp,
|
||||
absl::string_view text) {
|
||||
for (auto _ : state) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
CHECK(prog);
|
||||
ABSL_CHECK(prog);
|
||||
absl::string_view sp[2]; // 2 because sp[0] is whole match.
|
||||
CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
|
||||
Prog::kFullMatch, sp, 2));
|
||||
ABSL_CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
|
||||
Prog::kFullMatch, sp, 2));
|
||||
delete prog;
|
||||
re->Decref();
|
||||
}
|
||||
@ -1228,12 +1247,13 @@ void Parse1OnePass(benchmark::State& state, const char* regexp,
|
||||
absl::string_view text) {
|
||||
for (auto _ : state) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
CHECK(prog);
|
||||
CHECK(prog->IsOnePass());
|
||||
ABSL_CHECK(prog);
|
||||
ABSL_CHECK(prog->IsOnePass());
|
||||
absl::string_view sp[2]; // 2 because sp[0] is whole match.
|
||||
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
|
||||
ABSL_CHECK(prog->SearchOnePass(text, text, Prog::kAnchored,
|
||||
Prog::kFullMatch, sp, 2));
|
||||
delete prog;
|
||||
re->Decref();
|
||||
}
|
||||
@ -1243,12 +1263,13 @@ void Parse1BitState(benchmark::State& state, const char* regexp,
|
||||
absl::string_view text) {
|
||||
for (auto _ : state) {
|
||||
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
|
||||
CHECK(re);
|
||||
ABSL_CHECK(re);
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
CHECK(prog);
|
||||
CHECK(prog->CanBitState());
|
||||
ABSL_CHECK(prog);
|
||||
ABSL_CHECK(prog->CanBitState());
|
||||
absl::string_view sp[2]; // 2 because sp[0] is whole match.
|
||||
CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
|
||||
ABSL_CHECK(prog->SearchBitState(text, text, Prog::kAnchored,
|
||||
Prog::kFullMatch, sp, 2));
|
||||
delete prog;
|
||||
re->Decref();
|
||||
}
|
||||
@ -1258,9 +1279,9 @@ void Parse1PCRE(benchmark::State& state, const char* regexp,
|
||||
absl::string_view text) {
|
||||
for (auto _ : state) {
|
||||
PCRE re(regexp, PCRE::UTF8);
|
||||
CHECK_EQ(re.error(), "");
|
||||
ABSL_CHECK_EQ(re.error(), "");
|
||||
absl::string_view sp1;
|
||||
CHECK(PCRE::FullMatch(text, re, &sp1));
|
||||
ABSL_CHECK(PCRE::FullMatch(text, re, &sp1));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1268,9 +1289,9 @@ void Parse1RE2(benchmark::State& state, const char* regexp,
|
||||
absl::string_view text) {
|
||||
for (auto _ : state) {
|
||||
RE2 re(regexp);
|
||||
CHECK_EQ(re.error(), "");
|
||||
ABSL_CHECK_EQ(re.error(), "");
|
||||
absl::string_view sp1;
|
||||
CHECK(RE2::FullMatch(text, re, &sp1));
|
||||
ABSL_CHECK(RE2::FullMatch(text, re, &sp1));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1279,7 +1300,7 @@ void Parse1CachedNFA(benchmark::State& state, const char* regexp,
|
||||
Prog* prog = GetCachedProg(regexp);
|
||||
absl::string_view sp[2]; // 2 because sp[0] is whole match.
|
||||
for (auto _ : state) {
|
||||
CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
|
||||
ABSL_CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
|
||||
Prog::kFullMatch, sp, 2));
|
||||
}
|
||||
}
|
||||
@ -1287,20 +1308,22 @@ void Parse1CachedNFA(benchmark::State& state, const char* regexp,
|
||||
void Parse1CachedOnePass(benchmark::State& state, const char* regexp,
|
||||
absl::string_view text) {
|
||||
Prog* prog = GetCachedProg(regexp);
|
||||
CHECK(prog->IsOnePass());
|
||||
ABSL_CHECK(prog->IsOnePass());
|
||||
absl::string_view sp[2]; // 2 because sp[0] is whole match.
|
||||
for (auto _ : state) {
|
||||
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
|
||||
ABSL_CHECK(prog->SearchOnePass(text, text, Prog::kAnchored,
|
||||
Prog::kFullMatch, sp, 2));
|
||||
}
|
||||
}
|
||||
|
||||
void Parse1CachedBitState(benchmark::State& state, const char* regexp,
|
||||
absl::string_view text) {
|
||||
Prog* prog = GetCachedProg(regexp);
|
||||
CHECK(prog->CanBitState());
|
||||
ABSL_CHECK(prog->CanBitState());
|
||||
absl::string_view sp[2]; // 2 because sp[0] is whole match.
|
||||
for (auto _ : state) {
|
||||
CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
|
||||
ABSL_CHECK(prog->SearchBitState(text, text, Prog::kAnchored,
|
||||
Prog::kFullMatch, sp, 2));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1309,7 +1332,8 @@ void Parse1CachedBacktrack(benchmark::State& state, const char* regexp,
|
||||
Prog* prog = GetCachedProg(regexp);
|
||||
absl::string_view sp[2]; // 2 because sp[0] is whole match.
|
||||
for (auto _ : state) {
|
||||
CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
|
||||
ABSL_CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored,
|
||||
Prog::kFullMatch, sp, 2));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1318,7 +1342,7 @@ void Parse1CachedPCRE(benchmark::State& state, const char* regexp,
|
||||
PCRE& re = *GetCachedPCRE(regexp);
|
||||
absl::string_view sp1;
|
||||
for (auto _ : state) {
|
||||
CHECK(PCRE::FullMatch(text, re, &sp1));
|
||||
ABSL_CHECK(PCRE::FullMatch(text, re, &sp1));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1327,7 +1351,7 @@ void Parse1CachedRE2(benchmark::State& state, const char* regexp,
|
||||
RE2& re = *GetCachedRE2(regexp);
|
||||
absl::string_view sp1;
|
||||
for (auto _ : state) {
|
||||
CHECK(RE2::FullMatch(text, re, &sp1));
|
||||
ABSL_CHECK(RE2::FullMatch(text, re, &sp1));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1336,7 +1360,7 @@ void SearchParse2CachedPCRE(benchmark::State& state, const char* regexp,
|
||||
PCRE& re = *GetCachedPCRE(regexp);
|
||||
for (auto _ : state) {
|
||||
absl::string_view sp1, sp2;
|
||||
CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
|
||||
ABSL_CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1345,7 +1369,7 @@ void SearchParse2CachedRE2(benchmark::State& state, const char* regexp,
|
||||
RE2& re = *GetCachedRE2(regexp);
|
||||
for (auto _ : state) {
|
||||
absl::string_view sp1, sp2;
|
||||
CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
|
||||
ABSL_CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1354,7 +1378,7 @@ void SearchParse1CachedPCRE(benchmark::State& state, const char* regexp,
|
||||
PCRE& re = *GetCachedPCRE(regexp);
|
||||
for (auto _ : state) {
|
||||
absl::string_view sp1;
|
||||
CHECK(PCRE::PartialMatch(text, re, &sp1));
|
||||
ABSL_CHECK(PCRE::PartialMatch(text, re, &sp1));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1363,7 +1387,7 @@ void SearchParse1CachedRE2(benchmark::State& state, const char* regexp,
|
||||
RE2& re = *GetCachedRE2(regexp);
|
||||
for (auto _ : state) {
|
||||
absl::string_view sp1;
|
||||
CHECK(RE2::PartialMatch(text, re, &sp1));
|
||||
ABSL_CHECK(RE2::PartialMatch(text, re, &sp1));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1499,7 +1523,7 @@ void FullMatchPCRE(benchmark::State& state, const char *regexp) {
|
||||
s += "ABCDEFGHIJ";
|
||||
PCRE re(regexp);
|
||||
for (auto _ : state) {
|
||||
CHECK(PCRE::FullMatch(s, re));
|
||||
ABSL_CHECK(PCRE::FullMatch(s, re));
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * state.range(0));
|
||||
}
|
||||
@ -1509,19 +1533,31 @@ void FullMatchRE2(benchmark::State& state, const char *regexp) {
|
||||
s += "ABCDEFGHIJ";
|
||||
RE2 re(regexp, RE2::Latin1);
|
||||
for (auto _ : state) {
|
||||
CHECK(RE2::FullMatch(s, re));
|
||||
ABSL_CHECK(RE2::FullMatch(s, re));
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * state.range(0));
|
||||
}
|
||||
|
||||
void FullMatch_DotStar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*"); }
|
||||
void FullMatch_DotStar_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s).*"); }
|
||||
void FullMatch_DotStar_CachedPCRE(benchmark::State& state) {
|
||||
FullMatchPCRE(state, "(?s).*");
|
||||
}
|
||||
void FullMatch_DotStar_CachedRE2(benchmark::State& state) {
|
||||
FullMatchRE2(state, "(?s).*");
|
||||
}
|
||||
|
||||
void FullMatch_DotStarDollar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*$"); }
|
||||
void FullMatch_DotStarDollar_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s).*$"); }
|
||||
void FullMatch_DotStarDollar_CachedPCRE(benchmark::State& state) {
|
||||
FullMatchPCRE(state, "(?s).*$");
|
||||
}
|
||||
void FullMatch_DotStarDollar_CachedRE2(benchmark::State& state) {
|
||||
FullMatchRE2(state, "(?s).*$");
|
||||
}
|
||||
|
||||
void FullMatch_DotStarCapture_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s)((.*)()()($))"); }
|
||||
void FullMatch_DotStarCapture_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s)((.*)()()($))"); }
|
||||
void FullMatch_DotStarCapture_CachedPCRE(benchmark::State& state) {
|
||||
FullMatchPCRE(state, "(?s)((.*)()()($))");
|
||||
}
|
||||
void FullMatch_DotStarCapture_CachedRE2(benchmark::State& state) {
|
||||
FullMatchRE2(state, "(?s)((.*)()()($))");
|
||||
}
|
||||
|
||||
#ifdef USEPCRE
|
||||
BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20);
|
||||
@ -1544,7 +1580,7 @@ void PossibleMatchRangeCommon(benchmark::State& state, const char* regexp) {
|
||||
std::string max;
|
||||
const int kMaxLen = 16;
|
||||
for (auto _ : state) {
|
||||
CHECK(re.PossibleMatchRange(&min, &max, kMaxLen));
|
||||
ABSL_CHECK(re.PossibleMatchRange(&min, &max, kMaxLen));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -20,22 +20,26 @@
|
||||
// Then RunPostfix turns each sequence into a regular expression
|
||||
// and passes the regexp to HandleRegexp.
|
||||
|
||||
#include "re2/testing/regexp_generator.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <stack>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/escaping.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/logging.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "util/utf.h"
|
||||
#include "re2/testing/regexp_generator.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -196,13 +200,13 @@ void RegexpGenerator::RunPostfix(const std::vector<std::string>& post) {
|
||||
for (size_t i = 0; i < post.size(); i++) {
|
||||
switch (CountArgs(post[i])) {
|
||||
default:
|
||||
LOG(FATAL) << "Bad operator: " << post[i];
|
||||
ABSL_LOG(FATAL) << "Bad operator: " << post[i];
|
||||
case 0:
|
||||
regexps.push(post[i]);
|
||||
break;
|
||||
case 1: {
|
||||
auto fmt = absl::ParsedFormat<'s'>::New(post[i]);
|
||||
CHECK(fmt != nullptr);
|
||||
ABSL_CHECK(fmt != nullptr);
|
||||
std::string a = regexps.top();
|
||||
regexps.pop();
|
||||
regexps.push("(?:" + absl::StrFormat(*fmt, a) + ")");
|
||||
@ -210,7 +214,7 @@ void RegexpGenerator::RunPostfix(const std::vector<std::string>& post) {
|
||||
}
|
||||
case 2: {
|
||||
auto fmt = absl::ParsedFormat<'s', 's'>::New(post[i]);
|
||||
CHECK(fmt != nullptr);
|
||||
ABSL_CHECK(fmt != nullptr);
|
||||
std::string b = regexps.top();
|
||||
regexps.pop();
|
||||
std::string a = regexps.top();
|
||||
@ -232,7 +236,7 @@ void RegexpGenerator::RunPostfix(const std::vector<std::string>& post) {
|
||||
absl::PrintF(" %s\n", absl::CEscape(regexps.top()));
|
||||
regexps.pop();
|
||||
}
|
||||
LOG(FATAL) << "Bad regexp program.";
|
||||
ABSL_LOG(FATAL) << "Bad regexp program.";
|
||||
}
|
||||
|
||||
HandleRegexp(regexps.top());
|
||||
|
||||
@ -9,6 +9,7 @@
|
||||
// regular expressions within given parameters (see below for details).
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@ -4,14 +4,15 @@
|
||||
|
||||
// Test parse.cc, dump.cc, and tostring.cc.
|
||||
|
||||
#include "re2/regexp.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/regexp.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -53,8 +54,8 @@ TEST(Regexp, NamedCaptures) {
|
||||
EXPECT_EQ(4, x->NumCaptures());
|
||||
const std::map<std::string, int>* have = x->NamedCaptures();
|
||||
EXPECT_TRUE(have != NULL);
|
||||
EXPECT_EQ(2, have->size()); // there are only two named groups in
|
||||
// the regexp: 'g1' and 'g2'.
|
||||
// there are only two named groups in the regexp: 'g1' and 'g2'.
|
||||
EXPECT_EQ(size_t{2}, have->size());
|
||||
std::map<std::string, int> want;
|
||||
want["g1"] = 1;
|
||||
want["g2"] = 3;
|
||||
@ -72,7 +73,7 @@ TEST(Regexp, CaptureNames) {
|
||||
EXPECT_EQ(4, x->NumCaptures());
|
||||
const std::map<int, std::string>* have = x->CaptureNames();
|
||||
EXPECT_TRUE(have != NULL);
|
||||
EXPECT_EQ(3, have->size());
|
||||
EXPECT_EQ(size_t{3}, have->size());
|
||||
std::map<int, std::string> want;
|
||||
want[1] = "g1";
|
||||
want[3] = "g2";
|
||||
|
||||
@ -2,11 +2,12 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/regexp.h"
|
||||
|
||||
|
||||
@ -2,12 +2,15 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "re2/testing/tester.h"
|
||||
#include "re2/testing/exhaustive_tester.h"
|
||||
#include "re2/testing/tester.h"
|
||||
|
||||
// For target `log' in the Makefile.
|
||||
#ifndef LOGGING
|
||||
|
||||
61
src/third_party/re2/dist/re2/testing/set_test.cc
vendored
61
src/third_party/re2/dist/re2/testing/set_test.cc
vendored
@ -2,25 +2,30 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "re2/set.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/set.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
TEST(Set, Unanchored) {
|
||||
RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
|
||||
|
||||
ASSERT_EQ(s.Size(), 0);
|
||||
ASSERT_EQ(s.Add("foo", NULL), 0);
|
||||
ASSERT_EQ(s.Size(), 1);
|
||||
ASSERT_EQ(s.Add("(", NULL), -1);
|
||||
ASSERT_EQ(s.Size(), 1);
|
||||
ASSERT_EQ(s.Add("bar", NULL), 1);
|
||||
ASSERT_EQ(s.Size(), 2);
|
||||
ASSERT_EQ(s.Compile(), true);
|
||||
ASSERT_EQ(s.Size(), 2);
|
||||
|
||||
ASSERT_EQ(s.Match("foobar", NULL), true);
|
||||
ASSERT_EQ(s.Match("fooba", NULL), true);
|
||||
@ -28,16 +33,16 @@ TEST(Set, Unanchored) {
|
||||
|
||||
std::vector<int> v;
|
||||
ASSERT_EQ(s.Match("foobar", &v), true);
|
||||
ASSERT_EQ(v.size(), 2);
|
||||
ASSERT_EQ(v.size(), size_t{2});
|
||||
ASSERT_EQ(v[0], 0);
|
||||
ASSERT_EQ(v[1], 1);
|
||||
|
||||
ASSERT_EQ(s.Match("fooba", &v), true);
|
||||
ASSERT_EQ(v.size(), 1);
|
||||
ASSERT_EQ(v.size(), size_t{1});
|
||||
ASSERT_EQ(v[0], 0);
|
||||
|
||||
ASSERT_EQ(s.Match("oobar", &v), true);
|
||||
ASSERT_EQ(v.size(), 1);
|
||||
ASSERT_EQ(v.size(), size_t{1});
|
||||
ASSERT_EQ(v[0], 1);
|
||||
}
|
||||
|
||||
@ -56,21 +61,21 @@ TEST(Set, UnanchoredFactored) {
|
||||
|
||||
std::vector<int> v;
|
||||
ASSERT_EQ(s.Match("foobar", &v), true);
|
||||
ASSERT_EQ(v.size(), 2);
|
||||
ASSERT_EQ(v.size(), size_t{2});
|
||||
ASSERT_EQ(v[0], 0);
|
||||
ASSERT_EQ(v[1], 1);
|
||||
|
||||
ASSERT_EQ(s.Match("obarfoobaroo", &v), true);
|
||||
ASSERT_EQ(v.size(), 2);
|
||||
ASSERT_EQ(v.size(), size_t{2});
|
||||
ASSERT_EQ(v[0], 0);
|
||||
ASSERT_EQ(v[1], 1);
|
||||
|
||||
ASSERT_EQ(s.Match("fooba", &v), true);
|
||||
ASSERT_EQ(v.size(), 1);
|
||||
ASSERT_EQ(v.size(), size_t{1});
|
||||
ASSERT_EQ(v[0], 0);
|
||||
|
||||
ASSERT_EQ(s.Match("oobar", &v), false);
|
||||
ASSERT_EQ(v.size(), 0);
|
||||
ASSERT_EQ(v.size(), size_t{0});
|
||||
}
|
||||
|
||||
TEST(Set, UnanchoredDollar) {
|
||||
@ -84,11 +89,11 @@ TEST(Set, UnanchoredDollar) {
|
||||
|
||||
std::vector<int> v;
|
||||
ASSERT_EQ(s.Match("foo", &v), true);
|
||||
ASSERT_EQ(v.size(), 1);
|
||||
ASSERT_EQ(v.size(), size_t{1});
|
||||
ASSERT_EQ(v[0], 0);
|
||||
|
||||
ASSERT_EQ(s.Match("foobar", &v), false);
|
||||
ASSERT_EQ(v.size(), 0);
|
||||
ASSERT_EQ(v.size(), size_t{0});
|
||||
}
|
||||
|
||||
TEST(Set, UnanchoredWordBoundary) {
|
||||
@ -103,14 +108,14 @@ TEST(Set, UnanchoredWordBoundary) {
|
||||
|
||||
std::vector<int> v;
|
||||
ASSERT_EQ(s.Match("foo", &v), true);
|
||||
ASSERT_EQ(v.size(), 1);
|
||||
ASSERT_EQ(v.size(), size_t{1});
|
||||
ASSERT_EQ(v[0], 0);
|
||||
|
||||
ASSERT_EQ(s.Match("foobar", &v), false);
|
||||
ASSERT_EQ(v.size(), 0);
|
||||
ASSERT_EQ(v.size(), size_t{0});
|
||||
|
||||
ASSERT_EQ(s.Match("foo bar", &v), true);
|
||||
ASSERT_EQ(v.size(), 1);
|
||||
ASSERT_EQ(v.size(), size_t{1});
|
||||
ASSERT_EQ(v[0], 0);
|
||||
}
|
||||
|
||||
@ -130,20 +135,20 @@ TEST(Set, Anchored) {
|
||||
|
||||
std::vector<int> v;
|
||||
ASSERT_EQ(s.Match("foobar", &v), false);
|
||||
ASSERT_EQ(v.size(), 0);
|
||||
ASSERT_EQ(v.size(), size_t{0});
|
||||
|
||||
ASSERT_EQ(s.Match("fooba", &v), false);
|
||||
ASSERT_EQ(v.size(), 0);
|
||||
ASSERT_EQ(v.size(), size_t{0});
|
||||
|
||||
ASSERT_EQ(s.Match("oobar", &v), false);
|
||||
ASSERT_EQ(v.size(), 0);
|
||||
ASSERT_EQ(v.size(), size_t{0});
|
||||
|
||||
ASSERT_EQ(s.Match("foo", &v), true);
|
||||
ASSERT_EQ(v.size(), 1);
|
||||
ASSERT_EQ(v.size(), size_t{1});
|
||||
ASSERT_EQ(v[0], 0);
|
||||
|
||||
ASSERT_EQ(s.Match("bar", &v), true);
|
||||
ASSERT_EQ(v.size(), 1);
|
||||
ASSERT_EQ(v.size(), size_t{1});
|
||||
ASSERT_EQ(v[0], 1);
|
||||
}
|
||||
|
||||
@ -157,10 +162,10 @@ TEST(Set, EmptyUnanchored) {
|
||||
|
||||
std::vector<int> v;
|
||||
ASSERT_EQ(s.Match("", &v), false);
|
||||
ASSERT_EQ(v.size(), 0);
|
||||
ASSERT_EQ(v.size(), size_t{0});
|
||||
|
||||
ASSERT_EQ(s.Match("foobar", &v), false);
|
||||
ASSERT_EQ(v.size(), 0);
|
||||
ASSERT_EQ(v.size(), size_t{0});
|
||||
}
|
||||
|
||||
TEST(Set, EmptyAnchored) {
|
||||
@ -173,10 +178,10 @@ TEST(Set, EmptyAnchored) {
|
||||
|
||||
std::vector<int> v;
|
||||
ASSERT_EQ(s.Match("", &v), false);
|
||||
ASSERT_EQ(v.size(), 0);
|
||||
ASSERT_EQ(v.size(), size_t{0});
|
||||
|
||||
ASSERT_EQ(s.Match("foobar", &v), false);
|
||||
ASSERT_EQ(v.size(), 0);
|
||||
ASSERT_EQ(v.size(), size_t{0});
|
||||
}
|
||||
|
||||
TEST(Set, Prefix) {
|
||||
@ -191,14 +196,14 @@ TEST(Set, Prefix) {
|
||||
|
||||
std::vector<int> v;
|
||||
ASSERT_EQ(s.Match("/prefix", &v), false);
|
||||
ASSERT_EQ(v.size(), 0);
|
||||
ASSERT_EQ(v.size(), size_t{0});
|
||||
|
||||
ASSERT_EQ(s.Match("/prefix/", &v), true);
|
||||
ASSERT_EQ(v.size(), 1);
|
||||
ASSERT_EQ(v.size(), size_t{1});
|
||||
ASSERT_EQ(v[0], 0);
|
||||
|
||||
ASSERT_EQ(s.Match("/prefix/42", &v), true);
|
||||
ASSERT_EQ(v.size(), 1);
|
||||
ASSERT_EQ(v.size(), size_t{1});
|
||||
ASSERT_EQ(v[0], 0);
|
||||
}
|
||||
|
||||
|
||||
@ -5,11 +5,10 @@
|
||||
// Test simplify.cc.
|
||||
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/regexp.h"
|
||||
|
||||
namespace re2 {
|
||||
@ -264,7 +263,7 @@ static Test tests[] = {
|
||||
TEST(TestSimplify, SimpleRegexps) {
|
||||
for (size_t i = 0; i < ABSL_ARRAYSIZE(tests); i++) {
|
||||
RegexpStatus status;
|
||||
VLOG(1) << "Testing " << tests[i].regexp;
|
||||
ABSL_VLOG(1) << "Testing " << tests[i].regexp;
|
||||
Regexp* re = Regexp::Parse(tests[i].regexp,
|
||||
Regexp::MatchNL | (Regexp::LikePerl &
|
||||
~Regexp::OneLine),
|
||||
|
||||
@ -6,14 +6,17 @@
|
||||
// maxlen letters using the set of letters in alpha.
|
||||
// Fetch strings using a Java-like Next()/HasNext() interface.
|
||||
|
||||
#include "re2/testing/string_generator.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/testing/string_generator.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -82,7 +85,7 @@ bool StringGenerator::RandomDigits() {
|
||||
// after computing the string, so that it knows the answer
|
||||
// for subsequent HasNext() calls.
|
||||
absl::string_view StringGenerator::Next() {
|
||||
CHECK(hasnext_);
|
||||
ABSL_CHECK(hasnext_);
|
||||
if (generate_null_) {
|
||||
generate_null_ = false;
|
||||
sp_ = absl::string_view();
|
||||
@ -112,8 +115,8 @@ void StringGenerator::GenerateNULL() {
|
||||
}
|
||||
|
||||
std::string DeBruijnString(int n) {
|
||||
CHECK_GE(n, 1);
|
||||
CHECK_LE(n, 29);
|
||||
ABSL_CHECK_GE(n, 1);
|
||||
ABSL_CHECK_LE(n, 29);
|
||||
const size_t size = size_t{1} << static_cast<size_t>(n);
|
||||
const size_t mask = size - 1;
|
||||
std::vector<bool> did(size, false);
|
||||
@ -131,10 +134,10 @@ std::string DeBruijnString(int n) {
|
||||
} else {
|
||||
s += '0';
|
||||
}
|
||||
CHECK(!did[bits]);
|
||||
ABSL_CHECK(!did[bits]);
|
||||
did[bits] = true;
|
||||
}
|
||||
CHECK_EQ(s.size(), static_cast<size_t>(n - 1) + size);
|
||||
ABSL_CHECK_EQ(s.size(), static_cast<size_t>(n - 1) + size);
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
@ -10,6 +10,7 @@
|
||||
// Fetch strings using a Java-like Next()/HasNext() interface.
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@ -4,13 +4,17 @@
|
||||
|
||||
// Test StringGenerator.
|
||||
|
||||
#include "re2/testing/string_generator.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "util/utf.h"
|
||||
#include "re2/testing/string_generator.h"
|
||||
#include "re2/testing/regexp_generator.h"
|
||||
#include "util/utf.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -43,7 +47,7 @@ static void RunTest(int len, const std::string& alphabet, bool donull) {
|
||||
EXPECT_TRUE(g.HasNext());
|
||||
absl::string_view sp = g.Next();
|
||||
EXPECT_EQ(sp.data(), static_cast<const char*>(NULL));
|
||||
EXPECT_EQ(sp.size(), 0);
|
||||
EXPECT_EQ(sp.size(), size_t{0});
|
||||
}
|
||||
|
||||
while (g.HasNext()) {
|
||||
|
||||
86
src/third_party/re2/dist/re2/testing/tester.cc
vendored
86
src/third_party/re2/dist/re2/testing/tester.cc
vendored
@ -4,20 +4,25 @@
|
||||
|
||||
// Regular expression engine tester -- test all the implementations against each other.
|
||||
|
||||
#include "re2/testing/tester.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/flags/flag.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/escaping.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/testing/tester.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "util/pcre.h"
|
||||
|
||||
ABSL_FLAG(bool, dump_prog, false, "dump regexp program");
|
||||
ABSL_FLAG(bool, log_okay, false, "log successful runs");
|
||||
@ -50,9 +55,9 @@ const char* engine_names[kEngineMax] = {
|
||||
|
||||
// Returns the name of the engine.
|
||||
static const char* EngineName(Engine e) {
|
||||
CHECK_GE(e, 0);
|
||||
CHECK_LT(e, ABSL_ARRAYSIZE(engine_names));
|
||||
CHECK(engine_names[e] != NULL);
|
||||
ABSL_CHECK_GE(e, 0);
|
||||
ABSL_CHECK_LT(e, ABSL_ARRAYSIZE(engine_names));
|
||||
ABSL_CHECK(engine_names[e] != NULL);
|
||||
return engine_names[e];
|
||||
}
|
||||
|
||||
@ -73,12 +78,12 @@ static uint32_t Engines() {
|
||||
}
|
||||
|
||||
if (cached_engines == 0)
|
||||
LOG(INFO) << "Warning: no engines enabled.";
|
||||
ABSL_LOG(INFO) << "Warning: no engines enabled.";
|
||||
if (!UsingPCRE)
|
||||
cached_engines &= ~(1<<kEnginePCRE);
|
||||
for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++) {
|
||||
if (cached_engines & (1<<i))
|
||||
LOG(INFO) << EngineName(i) << " enabled";
|
||||
ABSL_LOG(INFO) << EngineName(i) << " enabled";
|
||||
}
|
||||
|
||||
did_parse = true;
|
||||
@ -196,45 +201,46 @@ TestInstance::TestInstance(absl::string_view regexp_str, Prog::MatchKind kind,
|
||||
re_(NULL),
|
||||
re2_(NULL) {
|
||||
|
||||
VLOG(1) << absl::CEscape(regexp_str);
|
||||
ABSL_VLOG(1) << absl::CEscape(regexp_str);
|
||||
|
||||
// Compile regexp to prog.
|
||||
// Always required - needed for backtracking (reference implementation).
|
||||
RegexpStatus status;
|
||||
regexp_ = Regexp::Parse(regexp_str, flags, &status);
|
||||
if (regexp_ == NULL) {
|
||||
LOG(INFO) << "Cannot parse: " << absl::CEscape(regexp_str_)
|
||||
<< " mode: " << FormatMode(flags);
|
||||
ABSL_LOG(INFO) << "Cannot parse: " << absl::CEscape(regexp_str_)
|
||||
<< " mode: " << FormatMode(flags);
|
||||
error_ = true;
|
||||
return;
|
||||
}
|
||||
num_captures_ = regexp_->NumCaptures();
|
||||
prog_ = regexp_->CompileToProg(0);
|
||||
if (prog_ == NULL) {
|
||||
LOG(INFO) << "Cannot compile: " << absl::CEscape(regexp_str_);
|
||||
ABSL_LOG(INFO) << "Cannot compile: " << absl::CEscape(regexp_str_);
|
||||
error_ = true;
|
||||
return;
|
||||
}
|
||||
if (absl::GetFlag(FLAGS_dump_prog)) {
|
||||
LOG(INFO) << "Prog for "
|
||||
<< " regexp "
|
||||
<< absl::CEscape(regexp_str_)
|
||||
<< " (" << FormatKind(kind_)
|
||||
<< ", " << FormatMode(flags_)
|
||||
<< ")\n"
|
||||
<< prog_->Dump();
|
||||
ABSL_LOG(INFO) << "Prog for "
|
||||
<< " regexp "
|
||||
<< absl::CEscape(regexp_str_)
|
||||
<< " (" << FormatKind(kind_)
|
||||
<< ", " << FormatMode(flags_)
|
||||
<< ")\n"
|
||||
<< prog_->Dump();
|
||||
}
|
||||
|
||||
// Compile regexp to reversed prog. Only needed for DFA engines.
|
||||
if (Engines() & ((1<<kEngineDFA)|(1<<kEngineDFA1))) {
|
||||
rprog_ = regexp_->CompileToReverseProg(0);
|
||||
if (rprog_ == NULL) {
|
||||
LOG(INFO) << "Cannot reverse compile: " << absl::CEscape(regexp_str_);
|
||||
ABSL_LOG(INFO) << "Cannot reverse compile: "
|
||||
<< absl::CEscape(regexp_str_);
|
||||
error_ = true;
|
||||
return;
|
||||
}
|
||||
if (absl::GetFlag(FLAGS_dump_rprog))
|
||||
LOG(INFO) << rprog_->Dump();
|
||||
ABSL_LOG(INFO) << rprog_->Dump();
|
||||
}
|
||||
|
||||
// Create re string that will be used for RE and RE2.
|
||||
@ -257,7 +263,7 @@ TestInstance::TestInstance(absl::string_view regexp_str, Prog::MatchKind kind,
|
||||
options.set_longest_match(true);
|
||||
re2_ = new RE2(re, options);
|
||||
if (!re2_->error().empty()) {
|
||||
LOG(INFO) << "Cannot RE2: " << absl::CEscape(re);
|
||||
ABSL_LOG(INFO) << "Cannot RE2: " << absl::CEscape(re);
|
||||
error_ = true;
|
||||
return;
|
||||
}
|
||||
@ -283,7 +289,7 @@ TestInstance::TestInstance(absl::string_view regexp_str, Prog::MatchKind kind,
|
||||
// add one more layer of parens.
|
||||
re_ = new PCRE("("+re+")", o);
|
||||
if (!re_->error().empty()) {
|
||||
LOG(INFO) << "Cannot PCRE: " << absl::CEscape(re);
|
||||
ABSL_LOG(INFO) << "Cannot PCRE: " << absl::CEscape(re);
|
||||
error_ = true;
|
||||
return;
|
||||
}
|
||||
@ -318,7 +324,7 @@ void TestInstance::RunSearch(Engine type, absl::string_view orig_text,
|
||||
|
||||
switch (type) {
|
||||
default:
|
||||
LOG(FATAL) << "Bad RunSearch type: " << (int)type;
|
||||
ABSL_LOG(FATAL) << "Bad RunSearch type: " << (int)type;
|
||||
|
||||
case kEngineBacktrack:
|
||||
if (prog_ == NULL) {
|
||||
@ -366,9 +372,9 @@ void TestInstance::RunSearch(Engine type, absl::string_view orig_text,
|
||||
Prog::kAnchored, Prog::kLongestMatch,
|
||||
result->submatch,
|
||||
&result->skipped, NULL)) {
|
||||
LOG(ERROR) << "Reverse DFA inconsistency: "
|
||||
<< absl::CEscape(regexp_str_)
|
||||
<< " on " << absl::CEscape(text);
|
||||
ABSL_LOG(ERROR) << "Reverse DFA inconsistency: "
|
||||
<< absl::CEscape(regexp_str_)
|
||||
<< " on " << absl::CEscape(text);
|
||||
result->matched = false;
|
||||
}
|
||||
}
|
||||
@ -520,16 +526,16 @@ bool TestInstance::RunCase(absl::string_view text, absl::string_view context,
|
||||
if (correct.skipped) {
|
||||
if (regexp_ == NULL)
|
||||
return true;
|
||||
LOG(ERROR) << "Skipped backtracking! " << absl::CEscape(regexp_str_)
|
||||
<< " " << FormatMode(flags_);
|
||||
ABSL_LOG(ERROR) << "Skipped backtracking! " << absl::CEscape(regexp_str_)
|
||||
<< " " << FormatMode(flags_);
|
||||
return false;
|
||||
}
|
||||
VLOG(1) << "Try: regexp " << absl::CEscape(regexp_str_)
|
||||
<< " text " << absl::CEscape(text)
|
||||
<< " (" << FormatKind(kind_)
|
||||
<< ", " << FormatAnchor(anchor)
|
||||
<< ", " << FormatMode(flags_)
|
||||
<< ")";
|
||||
ABSL_VLOG(1) << "Try: regexp " << absl::CEscape(regexp_str_)
|
||||
<< " text " << absl::CEscape(text)
|
||||
<< " (" << FormatKind(kind_)
|
||||
<< ", " << FormatAnchor(anchor)
|
||||
<< ", " << FormatMode(flags_)
|
||||
<< ")";
|
||||
|
||||
// Compare the others.
|
||||
bool all_okay = true;
|
||||
@ -560,22 +566,22 @@ bool TestInstance::RunCase(absl::string_view text, absl::string_view context,
|
||||
context, anchor);
|
||||
if (r.matched != correct.matched) {
|
||||
if (r.matched) {
|
||||
LOG(INFO) << " Should not match (but does).";
|
||||
ABSL_LOG(INFO) << " Should not match (but does).";
|
||||
} else {
|
||||
LOG(INFO) << " Should match (but does not).";
|
||||
ABSL_LOG(INFO) << " Should match (but does not).";
|
||||
continue;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 1+num_captures_; i++) {
|
||||
if (r.submatch[i].data() != correct.submatch[i].data() ||
|
||||
r.submatch[i].size() != correct.submatch[i].size()) {
|
||||
LOG(INFO) <<
|
||||
ABSL_LOG(INFO) <<
|
||||
absl::StrFormat(" $%d: should be %s is %s",
|
||||
i,
|
||||
FormatCapture(text, correct.submatch[i]),
|
||||
FormatCapture(text, r.submatch[i]));
|
||||
} else {
|
||||
LOG(INFO) <<
|
||||
ABSL_LOG(INFO) <<
|
||||
absl::StrFormat(" $%d: %s ok", i,
|
||||
FormatCapture(text, r.submatch[i]));
|
||||
}
|
||||
@ -587,7 +593,7 @@ bool TestInstance::RunCase(absl::string_view text, absl::string_view context,
|
||||
// and that is desirable because we want to enforce a global limit.
|
||||
static int max_regexp_failures = absl::GetFlag(FLAGS_max_regexp_failures);
|
||||
if (max_regexp_failures > 0 && --max_regexp_failures == 0)
|
||||
LOG(QFATAL) << "Too many regexp failures.";
|
||||
ABSL_LOG(QFATAL) << "Too many regexp failures.";
|
||||
}
|
||||
|
||||
return all_okay;
|
||||
@ -596,7 +602,7 @@ bool TestInstance::RunCase(absl::string_view text, absl::string_view context,
|
||||
void TestInstance::LogMatch(const char* prefix, Engine e,
|
||||
absl::string_view text, absl::string_view context,
|
||||
Prog::Anchor anchor) {
|
||||
LOG(INFO) << prefix
|
||||
ABSL_LOG(INFO) << prefix
|
||||
<< EngineName(e)
|
||||
<< " regexp "
|
||||
<< absl::CEscape(regexp_str_)
|
||||
|
||||
@ -12,8 +12,8 @@
|
||||
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "util/pcre.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
9
src/third_party/re2/dist/re2/tostring.cc
vendored
9
src/third_party/re2/dist/re2/tostring.cc
vendored
@ -6,13 +6,14 @@
|
||||
// Tested by parse_test.cc
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/utf.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "re2/walker-inl.h"
|
||||
#include "util/utf.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -101,7 +102,7 @@ int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) {
|
||||
case kRegexpCapture:
|
||||
t_->append("(");
|
||||
if (re->cap() == 0)
|
||||
LOG(DFATAL) << "kRegexpCapture cap() == 0";
|
||||
ABSL_LOG(DFATAL) << "kRegexpCapture cap() == 0";
|
||||
if (re->name()) {
|
||||
t_->append("?P<");
|
||||
t_->append(*re->name());
|
||||
@ -184,7 +185,7 @@ int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
|
||||
if ((*t_)[t_->size()-1] == '|')
|
||||
t_->erase(t_->size()-1);
|
||||
else
|
||||
LOG(DFATAL) << "Bad final char: " << t_;
|
||||
ABSL_LOG(DFATAL) << "Bad final char: " << t_;
|
||||
if (prec < PrecAlternate)
|
||||
t_->append(")");
|
||||
break;
|
||||
|
||||
19
src/third_party/re2/dist/re2/unicode.py
vendored
19
src/third_party/re2/dist/re2/unicode.py
vendored
@ -75,25 +75,6 @@ def _URange(s):
|
||||
raise InputError("invalid Unicode range %s" % (s,))
|
||||
|
||||
|
||||
def _UStr(v):
|
||||
"""Converts Unicode code point to hex string.
|
||||
|
||||
0x263a => '0x263A'.
|
||||
|
||||
Args:
|
||||
v: code point to convert
|
||||
|
||||
Returns:
|
||||
Unicode string
|
||||
|
||||
Raises:
|
||||
InputError: the argument is not a valid Unicode value.
|
||||
"""
|
||||
if v < 0 or v > _RUNE_MAX:
|
||||
raise InputError("invalid Unicode value %s" % (v,))
|
||||
return "0x%04X" % (v,)
|
||||
|
||||
|
||||
def _ParseContinue(s):
|
||||
"""Parses a Unicode continuation field.
|
||||
|
||||
|
||||
9
src/third_party/re2/dist/re2/walker-inl.h
vendored
9
src/third_party/re2/dist/re2/walker-inl.h
vendored
@ -16,7 +16,8 @@
|
||||
#include <stack>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "util/logging.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "re2/regexp.h"
|
||||
|
||||
namespace re2 {
|
||||
@ -147,7 +148,7 @@ template<typename T> Regexp::Walker<T>::~Walker() {
|
||||
// Logs DFATAL if stack is not already clear.
|
||||
template<typename T> void Regexp::Walker<T>::Reset() {
|
||||
if (!stack_.empty()) {
|
||||
LOG(DFATAL) << "Stack not empty.";
|
||||
ABSL_LOG(DFATAL) << "Stack not empty.";
|
||||
while (!stack_.empty()) {
|
||||
if (stack_.top().re->nsub_ > 1)
|
||||
delete[] stack_.top().child_args;
|
||||
@ -161,7 +162,7 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
|
||||
Reset();
|
||||
|
||||
if (re == NULL) {
|
||||
LOG(DFATAL) << "Walk NULL";
|
||||
ABSL_LOG(DFATAL) << "Walk NULL";
|
||||
return top_arg;
|
||||
}
|
||||
|
||||
@ -191,7 +192,7 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
|
||||
s->child_args = &s->child_arg;
|
||||
else if (re->nsub_ > 1)
|
||||
s->child_args = new T[re->nsub_];
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
[[fallthrough]];
|
||||
}
|
||||
default: {
|
||||
if (re->nsub_ > 0) {
|
||||
|
||||
28
src/third_party/re2/dist/re2Config.cmake.in
vendored
28
src/third_party/re2/dist/re2Config.cmake.in
vendored
@ -1,28 +0,0 @@
|
||||
# Copyright 2022 The RE2 Authors. All Rights Reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
@PACKAGE_INIT@
|
||||
|
||||
include(CMakeFindDependencyMacro)
|
||||
|
||||
set_and_check(re2_INCLUDE_DIR ${PACKAGE_PREFIX_DIR}/@CMAKE_INSTALL_INCLUDEDIR@)
|
||||
|
||||
if(UNIX)
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_dependency(Threads REQUIRED)
|
||||
endif()
|
||||
|
||||
find_dependency(absl REQUIRED)
|
||||
|
||||
if(@RE2_USE_ICU@)
|
||||
find_dependency(ICU REQUIRED COMPONENTS uc)
|
||||
endif()
|
||||
|
||||
check_required_components(re2)
|
||||
|
||||
if(TARGET re2::re2)
|
||||
return()
|
||||
endif()
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/re2Targets.cmake)
|
||||
33
src/third_party/re2/dist/runtests
vendored
33
src/third_party/re2/dist/runtests
vendored
@ -1,33 +0,0 @@
|
||||
#!/usr/bin/env sh
|
||||
|
||||
# System Integrity Protection on Darwin complicated these matters somewhat.
|
||||
# See https://github.com/google/re2/issues/175 for details.
|
||||
if [ "x$1" = "x-shared-library-path" ]; then
|
||||
if [ "x$(uname)" = "xDarwin" ]; then
|
||||
DYLD_LIBRARY_PATH="$2:$DYLD_LIBRARY_PATH"
|
||||
export DYLD_LIBRARY_PATH
|
||||
else
|
||||
LD_LIBRARY_PATH="$2:$LD_LIBRARY_PATH"
|
||||
export LD_LIBRARY_PATH
|
||||
fi
|
||||
shift 2
|
||||
fi
|
||||
|
||||
success=true
|
||||
for i; do
|
||||
printf "%-40s" $i
|
||||
if $($i >$i.log 2>&1) 2>/dev/null; then
|
||||
echo PASS
|
||||
else
|
||||
echo FAIL';' output in $i.log
|
||||
success=false
|
||||
fi
|
||||
done
|
||||
|
||||
if $success; then
|
||||
echo 'ALL TESTS PASSED.'
|
||||
exit 0
|
||||
else
|
||||
echo 'TESTS FAILED.'
|
||||
exit 1
|
||||
fi
|
||||
27
src/third_party/re2/dist/testinstall.cc
vendored
27
src/third_party/re2/dist/testinstall.cc
vendored
@ -1,27 +0,0 @@
|
||||
// Copyright 2008 The RE2 Authors. All Rights Reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <re2/filtered_re2.h>
|
||||
#include <re2/re2.h>
|
||||
|
||||
int main() {
|
||||
re2::FilteredRE2 f;
|
||||
int id;
|
||||
f.Add("a.*b.*c", RE2::DefaultOptions, &id);
|
||||
std::vector<std::string> v;
|
||||
f.Compile(&v);
|
||||
std::vector<int> ids;
|
||||
f.FirstMatch("abbccc", ids);
|
||||
|
||||
int n;
|
||||
if (RE2::FullMatch("axbyc", "a.*b.*c") &&
|
||||
RE2::PartialMatch("foo123bar", "(\\d+)", &n) && n == 123) {
|
||||
printf("PASS\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
printf("FAIL\n");
|
||||
return 2;
|
||||
}
|
||||
567
src/third_party/re2/dist/ucs2.diff
vendored
567
src/third_party/re2/dist/ucs2.diff
vendored
@ -1,567 +0,0 @@
|
||||
This is a dump from Google's source control system of the change
|
||||
that removed UCS-2 support from RE2. As the explanation below
|
||||
says, UCS-2 mode is fundamentally at odds with things like ^ and $,
|
||||
so it never really worked very well. But if you are interested in using
|
||||
it without those operators, it did work for that. It assumed that the
|
||||
UCS-2 data was in the native host byte order.
|
||||
|
||||
If you are interested in adding UCS-2 mode back, this patch might
|
||||
be a good starting point.
|
||||
|
||||
|
||||
Change 12780686 by rsc@rsc-re2 on 2009/09/16 15:30:15
|
||||
|
||||
Retire UCS-2 mode.
|
||||
|
||||
I added it as an experiment for V8, but it
|
||||
requires 2-byte lookahead to do completely,
|
||||
and RE2 has 1-byte lookahead (enough for UTF-8)
|
||||
as a fairly deep fundamental assumption,
|
||||
so it did not support ^ or $.
|
||||
|
||||
==== re2/bitstate.cc#2 - re2/bitstate.cc#3 ====
|
||||
re2/bitstate.cc#2:314,321 - re2/bitstate.cc#3:314,319
|
||||
cap_[0] = p;
|
||||
if (TrySearch(prog_->start(), p)) // Match must be leftmost; done.
|
||||
return true;
|
||||
- if (prog_->flags() & Regexp::UCS2)
|
||||
- p++;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
==== re2/compile.cc#17 - re2/compile.cc#18 ====
|
||||
re2/compile.cc#17:95,101 - re2/compile.cc#18:95,100
|
||||
// Input encodings.
|
||||
enum Encoding {
|
||||
kEncodingUTF8 = 1, // UTF-8 (0-10FFFF)
|
||||
- kEncodingUCS2, // UCS-2 (0-FFFF), native byte order
|
||||
kEncodingLatin1, // Latin1 (0-FF)
|
||||
};
|
||||
|
||||
re2/compile.cc#17:168,176 - re2/compile.cc#18:167,172
|
||||
void AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase);
|
||||
void AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase);
|
||||
void Add_80_10ffff();
|
||||
- void AddRuneRangeUCS2(Rune lo, Rune hi, bool foldcase);
|
||||
- void AddUCS2Pair(uint8 lo1, uint8 hi1, bool fold1,
|
||||
- uint8 lo2, uint8 hi2, bool fold2);
|
||||
|
||||
// New suffix that matches the byte range lo-hi, then goes to next.
|
||||
Inst* RuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, Inst* next);
|
||||
re2/compile.cc#17:475,481 - re2/compile.cc#18:471,477
|
||||
|
||||
// Converts rune range lo-hi into a fragment that recognizes
|
||||
// the bytes that would make up those runes in the current
|
||||
- // encoding (Latin 1, UTF-8, or UCS-2).
|
||||
+ // encoding (Latin 1 or UTF-8).
|
||||
// This lets the machine work byte-by-byte even when
|
||||
// using multibyte encodings.
|
||||
|
||||
re2/compile.cc#17:488,496 - re2/compile.cc#18:484,489
|
||||
case kEncodingLatin1:
|
||||
AddRuneRangeLatin1(lo, hi, foldcase);
|
||||
break;
|
||||
- case kEncodingUCS2:
|
||||
- AddRuneRangeUCS2(lo, hi, foldcase);
|
||||
- break;
|
||||
}
|
||||
}
|
||||
|
||||
re2/compile.cc#17:503,581 - re2/compile.cc#18:496,501
|
||||
AddSuffix(RuneByteSuffix(lo, hi, foldcase, NULL));
|
||||
}
|
||||
|
||||
- // Test whether 16-bit values are big or little endian.
|
||||
- static bool BigEndian() {
|
||||
- union {
|
||||
- char byte[2];
|
||||
- int16 endian;
|
||||
- } u;
|
||||
-
|
||||
- u.byte[0] = 1;
|
||||
- u.byte[1] = 2;
|
||||
- return u.endian == 0x0102;
|
||||
- }
|
||||
-
|
||||
- void Compiler::AddUCS2Pair(uint8 lo1, uint8 hi1, bool fold1,
|
||||
- uint8 lo2, uint8 hi2, bool fold2) {
|
||||
- Inst* ip;
|
||||
- if (reversed_) {
|
||||
- ip = RuneByteSuffix(lo1, hi1, fold1, NULL);
|
||||
- ip = RuneByteSuffix(lo2, hi2, fold2, ip);
|
||||
- } else {
|
||||
- ip = RuneByteSuffix(lo2, hi2, fold2, NULL);
|
||||
- ip = RuneByteSuffix(lo1, hi1, fold1, ip);
|
||||
- }
|
||||
- AddSuffix(ip);
|
||||
- }
|
||||
-
|
||||
- void Compiler::AddRuneRangeUCS2(Rune lo, Rune hi, bool foldcase) {
|
||||
- if (lo > hi || lo > 0xFFFF)
|
||||
- return;
|
||||
- if (hi > 0xFFFF)
|
||||
- hi = 0xFFFF;
|
||||
-
|
||||
- // We'll assemble a pattern assuming big endian.
|
||||
- // If the machine isn't, tell Cat to reverse its arguments.
|
||||
- bool oldreversed = reversed_;
|
||||
- if (!BigEndian()) {
|
||||
- reversed_ = !oldreversed;
|
||||
- }
|
||||
-
|
||||
- // Split into bytes.
|
||||
- int lo1 = lo >> 8;
|
||||
- int lo2 = lo & 0xFF;
|
||||
- int hi1 = hi >> 8;
|
||||
- int hi2 = hi & 0xFF;
|
||||
-
|
||||
- if (lo1 == hi1) {
|
||||
- // Easy case: high bits are same in both.
|
||||
- // Only do ASCII case folding on the second byte if the top byte is 00.
|
||||
- AddUCS2Pair(lo1, lo1, false, lo2, hi2, lo1==0 && foldcase);
|
||||
- } else {
|
||||
- // Harder case: different second byte ranges depending on first byte.
|
||||
-
|
||||
- // Initial fragment.
|
||||
- if (lo2 > 0) {
|
||||
- AddUCS2Pair(lo1, lo1, false, lo2, 0xFF, lo1==0 && foldcase);
|
||||
- lo1++;
|
||||
- }
|
||||
-
|
||||
- // Trailing fragment.
|
||||
- if (hi2 < 0xFF) {
|
||||
- AddUCS2Pair(hi1, hi1, false, 0, hi2, false);
|
||||
- hi1--;
|
||||
- }
|
||||
-
|
||||
- // Inner ranges.
|
||||
- if (lo1 <= hi1) {
|
||||
- AddUCS2Pair(lo1, hi1, false, 0, 0xFF, false);
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- // Restore reverse setting.
|
||||
- reversed_ = oldreversed;
|
||||
- }
|
||||
-
|
||||
// Table describing how to make a UTF-8 matching machine
|
||||
// for the rune range 80-10FFFF (Runeself-Runemax).
|
||||
// This range happens frequently enough (for example /./ and /[^a-z]/)
|
||||
re2/compile.cc#17:707,716 - re2/compile.cc#18:627,634
|
||||
|
||||
Frag Compiler::Literal(Rune r, bool foldcase) {
|
||||
switch (encoding_) {
|
||||
- default: // UCS-2 or something new
|
||||
- BeginRange();
|
||||
- AddRuneRange(r, r, foldcase);
|
||||
- return EndRange();
|
||||
+ default:
|
||||
+ return kNullFrag;
|
||||
|
||||
case kEncodingLatin1:
|
||||
return ByteRange(r, r, foldcase);
|
||||
re2/compile.cc#17:927,934 - re2/compile.cc#18:845,850
|
||||
|
||||
if (re->parse_flags() & Regexp::Latin1)
|
||||
c.encoding_ = kEncodingLatin1;
|
||||
- else if (re->parse_flags() & Regexp::UCS2)
|
||||
- c.encoding_ = kEncodingUCS2;
|
||||
c.reversed_ = reversed;
|
||||
if (max_mem <= 0) {
|
||||
c.max_inst_ = 100000; // more than enough
|
||||
re2/compile.cc#17:983,993 - re2/compile.cc#18:899,905
|
||||
c.prog_->set_start_unanchored(c.prog_->start());
|
||||
} else {
|
||||
Frag dot;
|
||||
- if (c.encoding_ == kEncodingUCS2) {
|
||||
- dot = c.Cat(c.ByteRange(0x00, 0xFF, false), c.ByteRange(0x00, 0xFF, false));
|
||||
- } else {
|
||||
- dot = c.ByteRange(0x00, 0xFF, false);
|
||||
- }
|
||||
+ dot = c.ByteRange(0x00, 0xFF, false);
|
||||
Frag dotloop = c.Star(dot, true);
|
||||
Frag unanchored = c.Cat(dotloop, all);
|
||||
c.prog_->set_start_unanchored(unanchored.begin);
|
||||
==== re2/nfa.cc#8 - re2/nfa.cc#9 ====
|
||||
re2/nfa.cc#8:426,432 - re2/nfa.cc#9:426,431
|
||||
const char* bp = context.begin();
|
||||
int c = -1;
|
||||
int wasword = 0;
|
||||
- bool ucs2 = prog_->flags() & Regexp::UCS2;
|
||||
|
||||
if (text.begin() > context.begin()) {
|
||||
c = text.begin()[-1] & 0xFF;
|
||||
re2/nfa.cc#8:492,498 - re2/nfa.cc#9:491,497
|
||||
// If there's a required first byte for an unanchored search
|
||||
// and we're not in the middle of any possible matches,
|
||||
// use memchr to search for the byte quickly.
|
||||
- if (!ucs2 && !anchored && first_byte_ >= 0 && runq->size() == 0 &&
|
||||
+ if (!anchored && first_byte_ >= 0 && runq->size() == 0 &&
|
||||
p < text.end() && (p[0] & 0xFF) != first_byte_) {
|
||||
p = reinterpret_cast<const char*>(memchr(p, first_byte_,
|
||||
text.end() - p));
|
||||
re2/nfa.cc#8:505,526 - re2/nfa.cc#9:504,514
|
||||
flag = Prog::EmptyFlags(context, p);
|
||||
}
|
||||
|
||||
- // In UCS-2 mode, if we need to start a new thread,
|
||||
- // make sure to do it on an even boundary.
|
||||
- if(ucs2 && runq->size() == 0 &&
|
||||
- (p - context.begin()) % 2 && p < text.end()) {
|
||||
- p++;
|
||||
- flag = Prog::EmptyFlags(context, p);
|
||||
- }
|
||||
-
|
||||
// Steal match storage (cleared but unused as of yet)
|
||||
// temporarily to hold match boundaries for new thread.
|
||||
- // In UCS-2 mode, only start the thread on a 2-byte boundary.
|
||||
- if(!ucs2 || (p - context.begin()) % 2 == 0) {
|
||||
- match_[0] = p;
|
||||
- AddToThreadq(runq, start_, flag, p, match_);
|
||||
- match_[0] = NULL;
|
||||
- }
|
||||
+ match_[0] = p;
|
||||
+ AddToThreadq(runq, start_, flag, p, match_);
|
||||
+ match_[0] = NULL;
|
||||
}
|
||||
|
||||
// If all the threads have died, stop early.
|
||||
==== re2/parse.cc#22 - re2/parse.cc#23 ====
|
||||
re2/parse.cc#22:160,167 - re2/parse.cc#23:160,165
|
||||
status_(status), stacktop_(NULL), ncap_(0) {
|
||||
if (flags_ & Latin1)
|
||||
rune_max_ = 0xFF;
|
||||
- else if (flags & UCS2)
|
||||
- rune_max_ = 0xFFFF;
|
||||
else
|
||||
rune_max_ = Runemax;
|
||||
}
|
||||
re2/parse.cc#22:365,387 - re2/parse.cc#23:363,374
|
||||
bool Regexp::ParseState::PushCarat() {
|
||||
if (flags_ & OneLine) {
|
||||
return PushSimpleOp(kRegexpBeginText);
|
||||
- } else {
|
||||
- if (flags_ & UCS2) {
|
||||
- status_->set_code(kRegexpUnsupported);
|
||||
- status_->set_error_arg("multiline ^ in UCS-2 mode");
|
||||
- return false;
|
||||
- }
|
||||
- return PushSimpleOp(kRegexpBeginLine);
|
||||
}
|
||||
+ return PushSimpleOp(kRegexpBeginLine);
|
||||
}
|
||||
|
||||
// Pushes a \b or \B onto the stack.
|
||||
bool Regexp::ParseState::PushWordBoundary(bool word) {
|
||||
- if (flags_ & UCS2) {
|
||||
- status_->set_code(kRegexpUnsupported);
|
||||
- status_->set_error_arg("\\b or \\B in UCS-2 mode");
|
||||
- return false;
|
||||
- }
|
||||
if (word)
|
||||
return PushSimpleOp(kRegexpWordBoundary);
|
||||
return PushSimpleOp(kRegexpNoWordBoundary);
|
||||
re2/parse.cc#22:397,407 - re2/parse.cc#23:384,389
|
||||
bool ret = PushSimpleOp(kRegexpEndText);
|
||||
flags_ = oflags;
|
||||
return ret;
|
||||
- }
|
||||
- if (flags_ & UCS2) {
|
||||
- status_->set_code(kRegexpUnsupported);
|
||||
- status_->set_error_arg("multiline $ in UCS-2 mode");
|
||||
- return false;
|
||||
}
|
||||
return PushSimpleOp(kRegexpEndLine);
|
||||
}
|
||||
==== re2/re2.cc#34 - re2/re2.cc#35 ====
|
||||
re2/re2.cc#34:79,86 - re2/re2.cc#35:79,84
|
||||
return RE2::ErrorBadUTF8;
|
||||
case re2::kRegexpBadNamedCapture:
|
||||
return RE2::ErrorBadNamedCapture;
|
||||
- case re2::kRegexpUnsupported:
|
||||
- return RE2::ErrorUnsupported;
|
||||
}
|
||||
return RE2::ErrorInternal;
|
||||
}
|
||||
re2/re2.cc#34:122,130 - re2/re2.cc#35:120,125
|
||||
break;
|
||||
case RE2::Options::EncodingLatin1:
|
||||
flags |= Regexp::Latin1;
|
||||
- break;
|
||||
- case RE2::Options::EncodingUCS2:
|
||||
- flags |= Regexp::UCS2;
|
||||
break;
|
||||
}
|
||||
|
||||
==== re2/re2.h#36 - re2/re2.h#37 ====
|
||||
re2/re2.h#36:246,252 - re2/re2.h#37:246,251
|
||||
ErrorBadUTF8, // invalid UTF-8 in regexp
|
||||
ErrorBadNamedCapture, // bad named capture group
|
||||
ErrorPatternTooLarge, // pattern too large (compile failed)
|
||||
- ErrorUnsupported, // unsupported feature (in UCS-2 mode)
|
||||
};
|
||||
|
||||
// Predefined common options.
|
||||
re2/re2.h#36:570,576 - re2/re2.h#37:569,574
|
||||
|
||||
enum Encoding {
|
||||
EncodingUTF8 = 1,
|
||||
- EncodingUCS2, // 16-bit Unicode 0-FFFF only
|
||||
EncodingLatin1
|
||||
};
|
||||
|
||||
==== re2/regexp.cc#15 - re2/regexp.cc#16 ====
|
||||
re2/regexp.cc#15:324,333 - re2/regexp.cc#16:324,329
|
||||
// the regexp that remains after the prefix. The prefix might
|
||||
// be ASCII case-insensitive.
|
||||
bool Regexp::RequiredPrefix(string *prefix, bool *foldcase, Regexp** suffix) {
|
||||
- // Don't even bother for UCS-2; it's time to throw that code away.
|
||||
- if (parse_flags_ & UCS2)
|
||||
- return false;
|
||||
-
|
||||
// No need for a walker: the regexp must be of the form
|
||||
// 1. some number of ^ anchors
|
||||
// 2. a literal char or string
|
||||
==== re2/regexp.h#20 - re2/regexp.h#21 ====
|
||||
re2/regexp.h#20:187,193 - re2/regexp.h#21:187,192
|
||||
kRegexpBadPerlOp, // bad perl operator
|
||||
kRegexpBadUTF8, // invalid UTF-8 in regexp
|
||||
kRegexpBadNamedCapture, // bad named capture
|
||||
- kRegexpUnsupported, // unsupported operator
|
||||
};
|
||||
|
||||
// Error status for certain operations.
|
||||
re2/regexp.h#20:307,316 - re2/regexp.h#21:306,314
|
||||
// \Q and \E to disable/enable metacharacters
|
||||
// (?P<name>expr) for named captures
|
||||
// \C to match any single byte
|
||||
- UCS2 = 1<<10, // Text is in UCS-2, regexp is in UTF-8.
|
||||
- UnicodeGroups = 1<<11, // Allow \p{Han} for Unicode Han group
|
||||
+ UnicodeGroups = 1<<10, // Allow \p{Han} for Unicode Han group
|
||||
// and \P{Han} for its negation.
|
||||
- NeverNL = 1<<12, // Never match NL, even if the regexp mentions
|
||||
+ NeverNL = 1<<11, // Never match NL, even if the regexp mentions
|
||||
// it explicitly.
|
||||
|
||||
// As close to Perl as we can get.
|
||||
==== re2/testing/backtrack.cc#4 - re2/testing/backtrack.cc#5 ====
|
||||
re2/testing/backtrack.cc#4:134,141 - re2/testing/backtrack.cc#5:134,139
|
||||
cap_[0] = p;
|
||||
if (Visit(prog_->start(), p)) // Match must be leftmost; done.
|
||||
return true;
|
||||
- if (prog_->flags() & Regexp::UCS2)
|
||||
- p++;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
==== re2/testing/tester.cc#12 - re2/testing/tester.cc#13 ====
|
||||
re2/testing/tester.cc#12:144,154 - re2/testing/tester.cc#13:144,152
|
||||
static ParseMode parse_modes[] = {
|
||||
{ single_line, "single-line" },
|
||||
{ single_line|Regexp::Latin1, "single-line, latin1" },
|
||||
- { single_line|Regexp::UCS2, "single-line, ucs2" },
|
||||
{ multi_line, "multiline" },
|
||||
{ multi_line|Regexp::NonGreedy, "multiline, nongreedy" },
|
||||
{ multi_line|Regexp::Latin1, "multiline, latin1" },
|
||||
- { multi_line|Regexp::UCS2, "multiline, ucs2" },
|
||||
};
|
||||
|
||||
static string FormatMode(Regexp::ParseFlags flags) {
|
||||
re2/testing/tester.cc#12:179,189 - re2/testing/tester.cc#13:177,185
|
||||
RegexpStatus status;
|
||||
regexp_ = Regexp::Parse(regexp_str, flags, &status);
|
||||
if (regexp_ == NULL) {
|
||||
- if (status.code() != kRegexpUnsupported) {
|
||||
- LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_)
|
||||
- << " mode: " << FormatMode(flags);
|
||||
- error_ = true;
|
||||
- }
|
||||
+ LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_)
|
||||
+ << " mode: " << FormatMode(flags);
|
||||
+ error_ = true;
|
||||
return;
|
||||
}
|
||||
prog_ = regexp_->CompileToProg(0);
|
||||
re2/testing/tester.cc#12:230,237 - re2/testing/tester.cc#13:226,231
|
||||
RE2::Options options;
|
||||
if (flags & Regexp::Latin1)
|
||||
options.set_encoding(RE2::Options::EncodingLatin1);
|
||||
- else if (flags & Regexp::UCS2)
|
||||
- options.set_encoding(RE2::Options::EncodingUCS2);
|
||||
if (kind_ == Prog::kLongestMatch)
|
||||
options.set_longest_match(true);
|
||||
re2_ = new RE2(re, options);
|
||||
re2/testing/tester.cc#12:281,379 - re2/testing/tester.cc#13:275,280
|
||||
delete re2_;
|
||||
}
|
||||
|
||||
- // Converts UTF-8 string in text into UCS-2 string in new_text.
|
||||
- static bool ConvertUTF8ToUCS2(const StringPiece& text, StringPiece* new_text) {
|
||||
- const char* p = text.begin();
|
||||
- const char* ep = text.end();
|
||||
- uint16* q = new uint16[ep - p];
|
||||
- uint16* q0 = q;
|
||||
-
|
||||
- int n;
|
||||
- Rune r;
|
||||
- for (; p < ep; p += n) {
|
||||
- if (!fullrune(p, ep - p)) {
|
||||
- delete[] q0;
|
||||
- return false;
|
||||
- }
|
||||
- n = chartorune(&r, p);
|
||||
- if (r > 0xFFFF) {
|
||||
- delete[] q0;
|
||||
- return false;
|
||||
- }
|
||||
- *q++ = r;
|
||||
- }
|
||||
- *new_text = StringPiece(reinterpret_cast<char*>(q0), 2*(q - q0));
|
||||
- return true;
|
||||
- }
|
||||
-
|
||||
- // Rewrites *sp from being a pointer into text8 (UTF-8)
|
||||
- // to being a pointer into text16 (equivalent text but in UCS-2).
|
||||
- static void AdjustUTF8ToUCS2(const StringPiece& text8, const StringPiece& text16,
|
||||
- StringPiece *sp) {
|
||||
- if (sp->begin() == NULL && text8.begin() != NULL)
|
||||
- return;
|
||||
-
|
||||
- int nrune = 0;
|
||||
- int n;
|
||||
- Rune r;
|
||||
- const char* p = text8.begin();
|
||||
- const char* ep = text8.end();
|
||||
- const char* spbegin = NULL;
|
||||
- const char* spend = NULL;
|
||||
- for (;;) {
|
||||
- if (p == sp->begin())
|
||||
- spbegin = text16.begin() + sizeof(uint16)*nrune;
|
||||
- if (p == sp->end())
|
||||
- spend = text16.begin() + sizeof(uint16)*nrune;
|
||||
- if (p >= ep)
|
||||
- break;
|
||||
- n = chartorune(&r, p);
|
||||
- p += n;
|
||||
- nrune++;
|
||||
- }
|
||||
- if (spbegin == NULL || spend == NULL) {
|
||||
- LOG(FATAL) << "Error in AdjustUTF8ToUCS2 "
|
||||
- << CEscape(text8) << " "
|
||||
- << (int)(sp->begin() - text8.begin()) << " "
|
||||
- << (int)(sp->end() - text8.begin());
|
||||
- }
|
||||
- *sp = StringPiece(spbegin, spend - spbegin);
|
||||
- }
|
||||
-
|
||||
- // Rewrites *sp from begin a pointer into text16 (UCS-2)
|
||||
- // to being a pointer into text8 (equivalent text but in UTF-8).
|
||||
- static void AdjustUCS2ToUTF8(const StringPiece& text16, const StringPiece& text8,
|
||||
- StringPiece* sp) {
|
||||
- if (sp->begin() == NULL)
|
||||
- return;
|
||||
-
|
||||
- int nrune = 0;
|
||||
- int n;
|
||||
- Rune r;
|
||||
- const char* p = text8.begin();
|
||||
- const char* ep = text8.end();
|
||||
- const char* spbegin = NULL;
|
||||
- const char* spend = NULL;
|
||||
- for (;;) {
|
||||
- if (nrune == (sp->begin() - text16.begin())/2)
|
||||
- spbegin = p;
|
||||
- if (nrune == (sp->end() - text16.begin())/2)
|
||||
- spend = p;
|
||||
- if (p >= ep)
|
||||
- break;
|
||||
- n = chartorune(&r, p);
|
||||
- p += n;
|
||||
- nrune++;
|
||||
- }
|
||||
- if (text8.begin() != NULL && (spbegin == NULL || spend == NULL)) {
|
||||
- LOG(FATAL) << "Error in AdjustUCS2ToUTF8 "
|
||||
- << CEscape(text16) << " "
|
||||
- << (int)(sp->begin() - text16.begin()) << " "
|
||||
- << (int)(sp->end() - text16.begin());
|
||||
- }
|
||||
- *sp = StringPiece(spbegin, spend - spbegin);
|
||||
- }
|
||||
-
|
||||
// Runs a single search using the named engine type.
|
||||
// This interface hides all the irregularities of the various
|
||||
// engine interfaces from the rest of this file.
|
||||
re2/testing/tester.cc#12:393,411 - re2/testing/tester.cc#13:294,300
|
||||
|
||||
StringPiece text = orig_text;
|
||||
StringPiece context = orig_context;
|
||||
- bool ucs2 = false;
|
||||
|
||||
- if ((flags() & Regexp::UCS2) && type != kEnginePCRE) {
|
||||
- if (!ConvertUTF8ToUCS2(orig_context, &context)) {
|
||||
- result->skipped = true;
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- // Rewrite context to refer to new text.
|
||||
- AdjustUTF8ToUCS2(orig_context, context, &text);
|
||||
- ucs2 = true;
|
||||
- }
|
||||
-
|
||||
switch (type) {
|
||||
default:
|
||||
LOG(FATAL) << "Bad RunSearch type: " << (int)type;
|
||||
re2/testing/tester.cc#12:557,577 - re2/testing/tester.cc#13:446,451
|
||||
}
|
||||
}
|
||||
|
||||
- // If we did UCS-2 matching, rewrite the matches to refer
|
||||
- // to the original UTF-8 text.
|
||||
- if (ucs2) {
|
||||
- if (result->matched) {
|
||||
- if (result->have_submatch0) {
|
||||
- AdjustUCS2ToUTF8(context, orig_context, &result->submatch[0]);
|
||||
- } else if (result->have_submatch) {
|
||||
- for (int i = 0; i < nsubmatch; i++) {
|
||||
- AdjustUCS2ToUTF8(context, orig_context, &result->submatch[i]);
|
||||
- }
|
||||
- }
|
||||
- }
|
||||
- delete[] context.begin();
|
||||
- }
|
||||
-
|
||||
if (!result->matched)
|
||||
memset(result->submatch, 0, sizeof result->submatch);
|
||||
}
|
||||
re2/testing/tester.cc#12:596,617 - re2/testing/tester.cc#13:470,475
|
||||
return true;
|
||||
}
|
||||
|
||||
- // Check whether text uses only Unicode points <= 0xFFFF
|
||||
- // (in the BMP).
|
||||
- static bool IsBMP(const StringPiece& text) {
|
||||
- const char* p = text.begin();
|
||||
- const char* ep = text.end();
|
||||
- while (p < ep) {
|
||||
- if (!fullrune(p, ep - p))
|
||||
- return false;
|
||||
- Rune r;
|
||||
- p += chartorune(&r, p);
|
||||
- if (r > 0xFFFF)
|
||||
- return false;
|
||||
- }
|
||||
- return true;
|
||||
- }
|
||||
-
|
||||
// Runs a single test.
|
||||
bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
|
||||
Prog::Anchor anchor) {
|
||||
re2/testing/tester.cc#12:619,625 - re2/testing/tester.cc#13:477,483
|
||||
Result correct;
|
||||
RunSearch(kEngineBacktrack, text, context, anchor, &correct);
|
||||
if (correct.skipped) {
|
||||
- if (regexp_ == NULL || !IsBMP(context)) // okay to skip in UCS-2 mode
|
||||
+ if (regexp_ == NULL)
|
||||
return true;
|
||||
LOG(ERROR) << "Skipped backtracking! " << CEscape(regexp_str_)
|
||||
<< " " << FormatMode(flags_);
|
||||
109
src/third_party/re2/dist/util/logging.h
vendored
109
src/third_party/re2/dist/util/logging.h
vendored
@ -1,109 +0,0 @@
|
||||
// Copyright 2009 The RE2 Authors. All Rights Reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#ifndef UTIL_LOGGING_H_
|
||||
#define UTIL_LOGGING_H_
|
||||
|
||||
// Simplified version of Google's logging.
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "absl/base/attributes.h"
|
||||
|
||||
// Debug-only checking.
|
||||
#define DCHECK(condition) assert(condition)
|
||||
#define DCHECK_EQ(val1, val2) assert((val1) == (val2))
|
||||
#define DCHECK_NE(val1, val2) assert((val1) != (val2))
|
||||
#define DCHECK_LE(val1, val2) assert((val1) <= (val2))
|
||||
#define DCHECK_LT(val1, val2) assert((val1) < (val2))
|
||||
#define DCHECK_GE(val1, val2) assert((val1) >= (val2))
|
||||
#define DCHECK_GT(val1, val2) assert((val1) > (val2))
|
||||
|
||||
// Always-on checking
|
||||
#define CHECK(x) if(x){}else LogMessageFatal(__FILE__, __LINE__).stream() << "Check failed: " #x
|
||||
#define CHECK_LT(x, y) CHECK((x) < (y))
|
||||
#define CHECK_GT(x, y) CHECK((x) > (y))
|
||||
#define CHECK_LE(x, y) CHECK((x) <= (y))
|
||||
#define CHECK_GE(x, y) CHECK((x) >= (y))
|
||||
#define CHECK_EQ(x, y) CHECK((x) == (y))
|
||||
#define CHECK_NE(x, y) CHECK((x) != (y))
|
||||
|
||||
#define LOG_INFO LogMessage(__FILE__, __LINE__)
|
||||
#define LOG_WARNING LogMessage(__FILE__, __LINE__)
|
||||
#define LOG_ERROR LogMessage(__FILE__, __LINE__)
|
||||
#define LOG_FATAL LogMessageFatal(__FILE__, __LINE__)
|
||||
#define LOG_QFATAL LOG_FATAL
|
||||
|
||||
// It seems that one of the Windows header files defines ERROR as 0.
|
||||
#ifdef _WIN32
|
||||
#define LOG_0 LOG_INFO
|
||||
#endif
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define LOG_DFATAL LOG_ERROR
|
||||
#else
|
||||
#define LOG_DFATAL LOG_FATAL
|
||||
#endif
|
||||
|
||||
#define LOG(severity) LOG_ ## severity.stream()
|
||||
|
||||
#define VLOG(x) if((x)>0){}else LOG_INFO.stream()
|
||||
|
||||
class LogMessage {
|
||||
public:
|
||||
LogMessage(const char* file, int line)
|
||||
: flushed_(false) {
|
||||
stream() << file << ":" << line << ": ";
|
||||
}
|
||||
void Flush() {
|
||||
stream() << "\n";
|
||||
std::string s = str_.str();
|
||||
size_t n = s.size();
|
||||
if (fwrite(s.data(), 1, n, stderr) < n) {} // shut up gcc
|
||||
flushed_ = true;
|
||||
}
|
||||
~LogMessage() {
|
||||
if (!flushed_) {
|
||||
Flush();
|
||||
}
|
||||
}
|
||||
std::ostream& stream() { return str_; }
|
||||
|
||||
private:
|
||||
bool flushed_;
|
||||
std::ostringstream str_;
|
||||
|
||||
LogMessage(const LogMessage&) = delete;
|
||||
LogMessage& operator=(const LogMessage&) = delete;
|
||||
};
|
||||
|
||||
// Silence "destructor never returns" warning for ~LogMessageFatal().
|
||||
// Since this is a header file, push and then pop to limit the scope.
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable: 4722)
|
||||
#endif
|
||||
|
||||
class LogMessageFatal : public LogMessage {
|
||||
public:
|
||||
LogMessageFatal(const char* file, int line)
|
||||
: LogMessage(file, line) {}
|
||||
ABSL_ATTRIBUTE_NORETURN ~LogMessageFatal() {
|
||||
Flush();
|
||||
abort();
|
||||
}
|
||||
private:
|
||||
LogMessageFatal(const LogMessageFatal&) = delete;
|
||||
LogMessageFatal& operator=(const LogMessageFatal&) = delete;
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
#endif // UTIL_LOGGING_H_
|
||||
7
src/third_party/re2/dist/util/pcre.cc
vendored
7
src/third_party/re2/dist/util/pcre.cc
vendored
@ -16,16 +16,17 @@
|
||||
#include <utility>
|
||||
|
||||
#include "absl/flags/flag.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/pcre.h"
|
||||
|
||||
// Silence warnings about the wacky formatting in the operator() functions.
|
||||
#if !defined(__clang__) && defined(__GNUC__)
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC diagnostic ignored "-Wmisleading-indentation"
|
||||
#endif
|
||||
|
||||
#define PCREPORT(level) LOG(level)
|
||||
#define PCREPORT(level) ABSL_LOG(level)
|
||||
|
||||
// Default PCRE limits.
|
||||
// Defaults chosen to allow a plausible amount of CPU and
|
||||
|
||||
32
src/third_party/re2/dist/util/pcre.h
vendored
32
src/third_party/re2/dist/util/pcre.h
vendored
@ -39,10 +39,10 @@
|
||||
// supplied pattern exactly.
|
||||
//
|
||||
// Example: successful match
|
||||
// CHECK(PCRE::FullMatch("hello", "h.*o"));
|
||||
// ABSL_CHECK(PCRE::FullMatch("hello", "h.*o"));
|
||||
//
|
||||
// Example: unsuccessful match (requires full match):
|
||||
// CHECK(!PCRE::FullMatch("hello", "e"));
|
||||
// ABSL_CHECK(!PCRE::FullMatch("hello", "e"));
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// UTF-8 AND THE MATCHING INTERFACE:
|
||||
@ -58,7 +58,7 @@
|
||||
//
|
||||
// Example:
|
||||
// PCRE re(utf8_pattern, PCRE::UTF8);
|
||||
// CHECK(PCRE::FullMatch(utf8_string, re));
|
||||
// ABSL_CHECK(PCRE::FullMatch(utf8_string, re));
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// MATCHING WITH SUBSTRING EXTRACTION:
|
||||
@ -68,22 +68,22 @@
|
||||
// Example: extracts "ruby" into "s" and 1234 into "i"
|
||||
// int i;
|
||||
// std::string s;
|
||||
// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
|
||||
// ABSL_CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
|
||||
//
|
||||
// Example: fails because string cannot be stored in integer
|
||||
// CHECK(!PCRE::FullMatch("ruby", "(.*)", &i));
|
||||
// ABSL_CHECK(!PCRE::FullMatch("ruby", "(.*)", &i));
|
||||
//
|
||||
// Example: fails because there aren't enough sub-patterns:
|
||||
// CHECK(!PCRE::FullMatch("ruby:1234", "\\w+:\\d+", &s));
|
||||
// ABSL_CHECK(!PCRE::FullMatch("ruby:1234", "\\w+:\\d+", &s));
|
||||
//
|
||||
// Example: does not try to extract any extra sub-patterns
|
||||
// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
|
||||
// ABSL_CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
|
||||
//
|
||||
// Example: does not try to extract into NULL
|
||||
// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
|
||||
// ABSL_CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
|
||||
//
|
||||
// Example: integer overflow causes failure
|
||||
// CHECK(!PCRE::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
|
||||
// ABSL_CHECK(!PCRE::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// PARTIAL MATCHES
|
||||
@ -92,12 +92,12 @@
|
||||
// to match any substring of the text.
|
||||
//
|
||||
// Example: simple search for a string:
|
||||
// CHECK(PCRE::PartialMatch("hello", "ell"));
|
||||
// ABSL_CHECK(PCRE::PartialMatch("hello", "ell"));
|
||||
//
|
||||
// Example: find first number in a string
|
||||
// int number;
|
||||
// CHECK(PCRE::PartialMatch("x*100 + 20", "(\\d+)", &number));
|
||||
// CHECK_EQ(number, 100);
|
||||
// ABSL_CHECK(PCRE::PartialMatch("x*100 + 20", "(\\d+)", &number));
|
||||
// ABSL_CHECK_EQ(number, 100);
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// PPCRE-COMPILED PCREGULAR EXPPCRESSIONS
|
||||
@ -157,7 +157,7 @@
|
||||
//
|
||||
// Example:
|
||||
// int a, b, c, d;
|
||||
// CHECK(PCRE::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
|
||||
// ABSL_CHECK(PCRE::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
|
||||
// Octal(&a), Hex(&b), CRadix(&c), CRadix(&d));
|
||||
// will leave 64 in a, b, c, and d.
|
||||
|
||||
@ -379,7 +379,7 @@ class PCRE {
|
||||
// text. E.g.,
|
||||
//
|
||||
// std::string s = "yabba dabba doo";
|
||||
// CHECK(PCRE::Replace(&s, "b+", "d"));
|
||||
// ABSL_CHECK(PCRE::Replace(&s, "b+", "d"));
|
||||
//
|
||||
// will leave "s" containing "yada dabba doo"
|
||||
//
|
||||
@ -393,7 +393,7 @@ class PCRE {
|
||||
// re-matching. E.g.,
|
||||
//
|
||||
// std::string s = "yabba dabba doo";
|
||||
// CHECK(PCRE::GlobalReplace(&s, "b+", "d"));
|
||||
// ABSL_CHECK(PCRE::GlobalReplace(&s, "b+", "d"));
|
||||
//
|
||||
// will leave "s" containing "yada dada doo"
|
||||
//
|
||||
@ -417,7 +417,7 @@ class PCRE {
|
||||
// * The @p rewrite string doesn't have any syntax errors
|
||||
// ('\' followed by anything besides [0-9] and '\').
|
||||
// Making this test will guarantee that "replace" and "extract"
|
||||
// operations won't LOG(ERROR) or fail because of a bad rewrite
|
||||
// operations won't ABSL_LOG(ERROR) or fail because of a bad rewrite
|
||||
// string.
|
||||
// @param rewrite The proposed rewrite string.
|
||||
// @param error An error message is recorded here, iff we return false.
|
||||
|
||||
15
src/third_party/re2/scripts/import.sh
vendored
15
src/third_party/re2/scripts/import.sh
vendored
@ -7,8 +7,8 @@ IFS=$'\n\t'
|
||||
set -vx
|
||||
|
||||
NAME=re2
|
||||
REVISION="2023-11-01"
|
||||
VERSION="2023-11-01"
|
||||
REVISION="2025-08-12-mongo"
|
||||
VERSION="2025-08-12"
|
||||
|
||||
DEST_DIR=$(git rev-parse --show-toplevel)/src/third_party/re2
|
||||
if [[ -d $DEST_DIR/dist ]]; then
|
||||
@ -16,12 +16,11 @@ if [[ -d $DEST_DIR/dist ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
git clone --branch 2023-11-01 git@github.com:mongodb-forks/re2.git $DEST_DIR/dist
|
||||
mkdir -p $DEST_DIR/dist
|
||||
|
||||
git clone --branch $REVISION git@github.com:mongodb-forks/re2.git $DEST_DIR/dist
|
||||
pushd $DEST_DIR/dist
|
||||
find . -mindepth 1 -maxdepth 1 -name ".*" -exec rm -rf {} \;
|
||||
rm -rf app
|
||||
rm -rf benchlog
|
||||
rm -rf doc
|
||||
rm -rf lib
|
||||
rm -rf python
|
||||
rm -rf app benchlog doc lib python
|
||||
find . -maxdepth 1 -type f -not -regex ".*\(CONTRIBUTING.md\|LICENSE\|README.md\|SECURITY.md\)$" -delete
|
||||
popd
|
||||
|
||||
Loading…
Reference in New Issue
Block a user