SERVER-107667 Upgrade re2 (#41882)

GitOrigin-RevId: 7cb9472471a3904fba1b7dc1ebe29e20b26b1d25
This commit is contained in:
Alex Li 2025-10-01 15:42:17 -04:00 committed by MongoDB Bot
parent 45517c14ae
commit b8f1573315
83 changed files with 1427 additions and 2559 deletions

View File

@ -772,7 +772,7 @@
},
{
"type": "library",
"bom-ref": "pkg:github/google/re2@2023-11-01",
"bom-ref": "pkg:github/google/re2@2025-08-12",
"supplier": {
"name": "Google LLC",
"url": [
@ -782,7 +782,7 @@
"author": "The RE2 Authors",
"group": "google.opensource",
"name": "re2",
"version": "2023-11-01",
"version": "2025-08-12",
"description": "RE2 is a fast, safe, thread-friendly alternative to backtracking regular expression engines like those used in PCRE, Perl, and Python. It is a C++ library.",
"licenses": [
{
@ -792,8 +792,8 @@
}
],
"copyright": "Copyright (c) 2009 The RE2 Authors. All rights reserved.",
"cpe": "cpe:2.3:h:google:re2:2023-11-01:*:*:*:*:*:*:*",
"purl": "pkg:github/google/re2@2023-11-01",
"cpe": "cpe:2.3:h:google:re2:2025-08-12:*:*:*:*:*:*:*",
"purl": "pkg:github/google/re2@2025-08-12",
"properties": [
{
"name": "internal:team_responsible",
@ -2966,7 +2966,7 @@
"pkg:github/fmtlib/fmt@11.1.3",
"pkg:github/facebook/folly@v2025.04.21.00",
"pkg:github/google/benchmark@v1.5.2",
"pkg:github/google/re2@2023-11-01",
"pkg:github/google/re2@2025-08-12",
"pkg:github/google/snappy@1.1.10",
"pkg:github/google/tcmalloc@093ba93c1bd6dca03b0a8334f06d01b019244291",
"pkg:github/google/googletest@v1.17.0",
@ -3089,7 +3089,7 @@
"dependsOn": []
},
{
"ref": "pkg:github/google/re2@2023-11-01",
"ref": "pkg:github/google/re2@2025-08-12",
"dependsOn": []
},
{

View File

@ -6,6 +6,7 @@ mongo_cc_library(
name = "re2",
srcs = [
"dist/re2/bitmap256.cc",
"dist/re2/bitmap256.h",
"dist/re2/bitstate.cc",
"dist/re2/compile.cc",
"dist/re2/dfa.cc",
@ -15,53 +16,40 @@ mongo_cc_library(
"dist/re2/onepass.cc",
"dist/re2/parse.cc",
"dist/re2/perl_groups.cc",
"dist/re2/pod_array.h",
"dist/re2/prefilter.cc",
"dist/re2/prefilter.h",
"dist/re2/prefilter_tree.cc",
"dist/re2/prefilter_tree.h",
"dist/re2/prog.cc",
"dist/re2/prog.h",
"dist/re2/re2.cc",
"dist/re2/regexp.cc",
"dist/re2/regexp.h",
"dist/re2/set.cc",
"dist/re2/simplify.cc",
"dist/re2/tostring.cc",
"dist/re2/unicode_casefold.cc",
"dist/re2/unicode_groups.cc",
"dist/util/rune.cc",
"dist/util/strutil.cc",
] + [
# Internal headers
"dist/re2/bitmap256.h",
"dist/re2/filtered_re2.h",
"dist/re2/pod_array.h",
"dist/re2/prefilter.h",
"dist/re2/prefilter_tree.h",
"dist/re2/prog.h",
"dist/re2/re2.h",
"dist/re2/regexp.h",
"dist/re2/set.h",
"dist/re2/sparse_array.h",
"dist/re2/sparse_set.h",
"dist/re2/stringpiece.h",
"dist/re2/testing/exhaustive_tester.h",
"dist/re2/testing/regexp_generator.h",
"dist/re2/testing/string_generator.h",
"dist/re2/testing/tester.h",
"dist/re2/tostring.cc",
"dist/re2/unicode_casefold.cc",
"dist/re2/unicode_casefold.h",
"dist/re2/unicode_groups.cc",
"dist/re2/unicode_groups.h",
"dist/re2/walker-inl.h",
"dist/util/logging.h",
"dist/util/malloc_counter.h",
"dist/util/pcre.h",
"dist/util/rune.cc",
"dist/util/strutil.cc",
"dist/util/strutil.h",
"dist/util/utf.h",
],
hdrs = [
"dist/re2/filtered_re2.h",
"dist/re2/re2.h",
"dist/re2/set.h",
"dist/re2/stringpiece.h",
],
copts = select({
"//bazel/config:gcc_or_clang": [
"-pthread",
],
"//conditions:default": [],
"@platforms//os:windows": [],
"//conditions:default": ["-pthread"],
}) + select({
"@platforms//os:windows": [
# The POSIX name for this item is deprecated
@ -72,6 +60,14 @@ mongo_cc_library(
includes = [
"dist",
],
linkopts = select({
# macOS doesn't need `-pthread' when linking and it appears that
# older versions of Clang will warn about the unused command line
# argument, so just don't pass it.
"@platforms//os:macos": [],
"@platforms//os:windows": [],
"//conditions:default": ["-pthread"],
}),
local_defines = select({
"@platforms//os:windows": [
"NOMINMAX",
@ -84,6 +80,8 @@ mongo_cc_library(
}),
deps = [
"//src/third_party/abseil-cpp:absl_base",
"//src/third_party/abseil-cpp:absl_log_internal_check_op",
"//src/third_party/abseil-cpp:absl_log_internal_message",
"//src/third_party/abseil-cpp:absl_raw_hash_set",
"//src/third_party/abseil-cpp:absl_str_format_internal",
"//src/third_party/abseil-cpp:absl_strings",

View File

@ -1,13 +0,0 @@
# This is the official list of RE2 authors for copyright purposes.
# This file is distinct from the CONTRIBUTORS files.
# See the latter for an explanation.
# Names should be added to this file as
# Name or Organization <email address>
# The email address is not required for organizations.
# Please keep the list sorted.
Google Inc.
Samsung Electronics
Stefano Rivera <stefano.rivera@gmail.com>

View File

@ -1,394 +0,0 @@
# Copyright 2009 The RE2 Authors. All Rights Reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
# Bazel (http://bazel.build/) BUILD file for RE2.
licenses(["notice"])
exports_files(["LICENSE"])
cc_library(
name = "re2",
srcs = [
"re2/bitmap256.cc",
"re2/bitmap256.h",
"re2/bitstate.cc",
"re2/compile.cc",
"re2/dfa.cc",
"re2/filtered_re2.cc",
"re2/mimics_pcre.cc",
"re2/nfa.cc",
"re2/onepass.cc",
"re2/parse.cc",
"re2/perl_groups.cc",
"re2/pod_array.h",
"re2/prefilter.cc",
"re2/prefilter.h",
"re2/prefilter_tree.cc",
"re2/prefilter_tree.h",
"re2/prog.cc",
"re2/prog.h",
"re2/re2.cc",
"re2/regexp.cc",
"re2/regexp.h",
"re2/set.cc",
"re2/simplify.cc",
"re2/sparse_array.h",
"re2/sparse_set.h",
"re2/tostring.cc",
"re2/unicode_casefold.cc",
"re2/unicode_casefold.h",
"re2/unicode_groups.cc",
"re2/unicode_groups.h",
"re2/walker-inl.h",
"util/logging.h",
"util/rune.cc",
"util/strutil.cc",
"util/strutil.h",
"util/utf.h",
],
hdrs = [
"re2/filtered_re2.h",
"re2/re2.h",
"re2/set.h",
"re2/stringpiece.h",
],
copts = select({
"@platforms//os:wasi": [],
"@platforms//os:windows": [],
"//conditions:default": ["-pthread"],
}),
linkopts = select({
# macOS doesn't need `-pthread' when linking and it appears that
# older versions of Clang will warn about the unused command line
# argument, so just don't pass it.
"@platforms//os:macos": [],
"@platforms//os:wasi": [],
"@platforms//os:windows": [],
"//conditions:default": ["-pthread"],
}),
visibility = ["//visibility:public"],
deps = [
"@com_google_absl//absl/base",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/container:fixed_array",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/container:inlined_vector",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/synchronization",
"@com_google_absl//absl/types:optional",
"@com_google_absl//absl/types:span",
],
)
cc_library(
name = "testing",
testonly = 1,
srcs = [
"re2/testing/backtrack.cc",
"re2/testing/dump.cc",
"re2/testing/exhaustive_tester.cc",
"re2/testing/null_walker.cc",
"re2/testing/regexp_generator.cc",
"re2/testing/string_generator.cc",
"re2/testing/tester.cc",
"util/pcre.cc",
],
hdrs = [
"re2/testing/exhaustive_tester.h",
"re2/testing/regexp_generator.h",
"re2/testing/string_generator.h",
"re2/testing/tester.h",
"util/malloc_counter.h",
"util/pcre.h",
# Exposed for testing only.
"re2/bitmap256.h",
"re2/pod_array.h",
"re2/prefilter.h",
"re2/prefilter_tree.h",
"re2/prog.h",
"re2/regexp.h",
"re2/sparse_array.h",
"re2/sparse_set.h",
"re2/unicode_casefold.h",
"re2/unicode_groups.h",
"re2/walker-inl.h",
"util/logging.h",
"util/strutil.h",
"util/utf.h",
],
visibility = [":__subpackages__"],
deps = [
":re2",
"@com_google_absl//absl/base",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
"@googletest//:gtest",
],
)
cc_test(
name = "charclass_test",
size = "small",
srcs = ["re2/testing/charclass_test.cc"],
deps = [
":testing",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/strings:str_format",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "compile_test",
size = "small",
srcs = ["re2/testing/compile_test.cc"],
deps = [
":testing",
"@com_google_absl//absl/base:core_headers",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "filtered_re2_test",
size = "small",
srcs = ["re2/testing/filtered_re2_test.cc"],
deps = [
":re2",
":testing",
"@com_google_absl//absl/base:core_headers",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "mimics_pcre_test",
size = "small",
srcs = ["re2/testing/mimics_pcre_test.cc"],
deps = [
":testing",
"@com_google_absl//absl/base:core_headers",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "parse_test",
size = "small",
srcs = ["re2/testing/parse_test.cc"],
deps = [
":testing",
"@com_google_absl//absl/base:core_headers",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "possible_match_test",
size = "small",
srcs = ["re2/testing/possible_match_test.cc"],
deps = [
":re2",
":testing",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/strings",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "re2_arg_test",
size = "small",
srcs = ["re2/testing/re2_arg_test.cc"],
deps = [
":re2",
":testing",
"@com_google_absl//absl/base:core_headers",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "re2_test",
size = "small",
srcs = ["re2/testing/re2_test.cc"],
deps = [
":re2",
":testing",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/strings:str_format",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "regexp_test",
size = "small",
srcs = ["re2/testing/regexp_test.cc"],
deps = [
":testing",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "required_prefix_test",
size = "small",
srcs = ["re2/testing/required_prefix_test.cc"],
deps = [
":testing",
"@com_google_absl//absl/base:core_headers",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "search_test",
size = "small",
srcs = ["re2/testing/search_test.cc"],
deps = [
":testing",
"@com_google_absl//absl/base:core_headers",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "set_test",
size = "small",
srcs = ["re2/testing/set_test.cc"],
deps = [
":re2",
":testing",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "simplify_test",
size = "small",
srcs = ["re2/testing/simplify_test.cc"],
deps = [
":testing",
"@com_google_absl//absl/base:core_headers",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "string_generator_test",
size = "small",
srcs = ["re2/testing/string_generator_test.cc"],
deps = [
":testing",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "dfa_test",
size = "large",
srcs = ["re2/testing/dfa_test.cc"],
deps = [
":re2",
":testing",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/strings:str_format",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "exhaustive1_test",
size = "large",
srcs = ["re2/testing/exhaustive1_test.cc"],
deps = [
":testing",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "exhaustive2_test",
size = "large",
srcs = ["re2/testing/exhaustive2_test.cc"],
deps = [
":testing",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "exhaustive3_test",
size = "large",
srcs = ["re2/testing/exhaustive3_test.cc"],
deps = [
":testing",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "exhaustive_test",
size = "large",
srcs = ["re2/testing/exhaustive_test.cc"],
deps = [
":testing",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_test(
name = "random_test",
size = "large",
srcs = ["re2/testing/random_test.cc"],
deps = [
":testing",
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/strings:str_format",
"@googletest//:gtest",
"@googletest//:gtest_main",
],
)
cc_binary(
name = "regexp_benchmark",
testonly = 1,
srcs = ["re2/testing/regexp_benchmark.cc"],
deps = [
":re2",
":testing",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/synchronization",
"@google_benchmark//:benchmark_main",
],
)

View File

@ -1,41 +0,0 @@
# This is the official list of people who can contribute
# (and typically have contributed) code to the RE2 repository.
# The AUTHORS file lists the copyright holders; this file
# lists people. For example, Google employees are listed here
# but not in AUTHORS, because Google holds the copyright.
#
# The submission process automatically checks to make sure
# that people submitting code are listed in this file (by email address).
#
# Names should be added to this file only after verifying that
# the individual or the individual's organization has agreed to
# the appropriate Contributor License Agreement, found here:
#
# http://code.google.com/legal/individual-cla-v1.0.html
# http://code.google.com/legal/corporate-cla-v1.0.html
#
# The agreement for individuals can be filled out on the web.
#
# When adding J Random Contributor's name to this file,
# either J's name or J's organization's name should be
# added to the AUTHORS file, depending on whether the
# individual or corporate CLA was used.
# Names should be added to this file like so:
# Name <email address>
# Please keep the list sorted.
Dominic Battré <battre@chromium.org>
Doug Kwan <dougkwan@google.com>
Dmitriy Vyukov <dvyukov@google.com>
John Millikin <jmillikin@gmail.com>
Mike Nazarewicz <mpn@google.com>
Nico Weber <thakis@chromium.org>
Pawel Hajdan <phajdan.jr@gmail.com>
Rob Pike <r@google.com>
Russ Cox <rsc@swtch.com>
Sanjay Ghemawat <sanjay@google.com>
Stefano Rivera <stefano.rivera@gmail.com>
Srinivasan Venkatachary <vsri@google.com>
Viatcheslav Ostapenko <sl.ostapenko@samsung.com>

View File

@ -1,27 +0,0 @@
# Copyright 2009 The RE2 Authors. All Rights Reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
# Bazel (http://bazel.build/) MODULE file for RE2.
module(
name = "re2",
version = "2023-11-01",
compatibility_level = 1,
)
bazel_dep(name = "platforms", version = "0.0.8")
bazel_dep(name = "rules_cc", version = "0.0.9")
bazel_dep(name = "abseil-cpp", version = "20230802.0", repo_name = "com_google_absl")
bazel_dep(name = "rules_python", version = "0.26.0")
bazel_dep(name = "pybind11_bazel", version = "2.11.1")
python_configure = use_extension("@pybind11_bazel//:python_configure.bzl", "extension")
python_configure.toolchain(python_version = "3") # ignored when non-root module
use_repo(python_configure, "local_config_python", "pybind11")
# These dependencies will be ignored when the `re2` module is not
# the root module (or when `--ignore_dev_dependency` is enabled).
bazel_dep(name = "google_benchmark", version = "1.8.3", dev_dependency = True)
bazel_dep(name = "googletest", version = "1.14.0.bcr.1", dev_dependency = True)
bazel_dep(name = "abseil-py", version = "1.4.0", dev_dependency = True)

View File

@ -1,399 +0,0 @@
# Copyright 2009 The RE2 Authors. All Rights Reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
# Build against Abseil.
ABSL_DEPS=\
absl_base\
absl_core_headers\
absl_fixed_array\
absl_flags\
absl_flat_hash_map\
absl_flat_hash_set\
absl_inlined_vector\
absl_optional\
absl_span\
absl_str_format\
absl_strings\
absl_synchronization\
PKG_CONFIG?=pkg-config
CCABSL=$(shell $(PKG_CONFIG) $(ABSL_DEPS) --cflags)
# GCC barfs on `-Wl` whereas Clang doesn't mind, but it's unclear what
# causes it to manifest on Ubuntu 22.04 LTS, so filter it out for now.
# Similar is needed for `static-testinstall` and `shared-testinstall`.
LDABSL=$(shell $(PKG_CONFIG) $(ABSL_DEPS) --libs | sed -e 's/-Wl / /g')
# To build against ICU for full Unicode properties support,
# uncomment the next two lines:
# CCICU=$(shell $(PKG_CONFIG) icu-uc --cflags) -DRE2_USE_ICU
# LDICU=$(shell $(PKG_CONFIG) icu-uc --libs)
# To build against PCRE for testing and benchmarking,
# uncomment the next two lines:
# CCPCRE=-I/usr/local/include -DUSEPCRE
# LDPCRE=-L/usr/local/lib -lpcre
CXX?=g++
# can override
CXXFLAGS?=-O3 -g
LDFLAGS?=
# required
RE2_CXXFLAGS?=-pthread -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCABSL) $(CCICU) $(CCPCRE)
RE2_LDFLAGS?=-pthread $(LDABSL) $(LDICU) $(LDPCRE)
AR?=ar
ARFLAGS?=rsc
NM?=nm
NMFLAGS?=-p
# Variables mandated by GNU, the arbiter of all good taste on the internet.
# http://www.gnu.org/prep/standards/standards.html
prefix=/usr/local
exec_prefix=$(prefix)
includedir=$(prefix)/include
libdir=$(exec_prefix)/lib
INSTALL=install
INSTALL_DATA=$(INSTALL) -m 644
# Work around the weirdness of sed(1) on Darwin. :/
ifeq ($(shell uname),Darwin)
SED_INPLACE=sed -i ''
else ifeq ($(shell uname),SunOS)
SED_INPLACE=sed -i
else
SED_INPLACE=sed -i
endif
# The pkg-config Requires: field.
REQUIRES=$(ABSL_DEPS)
ifdef LDICU
REQUIRES+=icu-uc
endif
# ABI version
# http://tldp.org/HOWTO/Program-Library-HOWTO/shared-libraries.html
SONAME=11
# To rebuild the Tables generated by Perl and Python scripts (requires Internet
# access for Unicode data), uncomment the following line:
# REBUILD_TABLES=1
# The SunOS linker does not support wildcards. :(
ifeq ($(shell uname),Darwin)
SOEXT=dylib
SOEXTVER=$(SONAME).$(SOEXT)
SOEXTVER00=$(SONAME).0.0.$(SOEXT)
MAKE_SHARED_LIBRARY=$(CXX) -dynamiclib -Wl,-compatibility_version,$(SONAME),-current_version,$(SONAME).0.0,-install_name,$(libdir)/libre2.$(SOEXTVER),-exported_symbols_list,libre2.symbols.darwin
else ifeq ($(shell uname),SunOS)
SOEXT=so
SOEXTVER=$(SOEXT).$(SONAME)
SOEXTVER00=$(SOEXT).$(SONAME).0.0
MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER)
else
SOEXT=so
SOEXTVER=$(SOEXT).$(SONAME)
SOEXTVER00=$(SOEXT).$(SONAME).0.0
MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER),--version-script,libre2.symbols
endif
.PHONY: all
all: obj/libre2.a obj/so/libre2.$(SOEXT)
INSTALL_HFILES=\
re2/filtered_re2.h\
re2/re2.h\
re2/set.h\
re2/stringpiece.h\
HFILES=\
util/logging.h\
util/malloc_counter.h\
util/pcre.h\
util/strutil.h\
util/utf.h\
re2/bitmap256.h\
re2/filtered_re2.h\
re2/pod_array.h\
re2/prefilter.h\
re2/prefilter_tree.h\
re2/prog.h\
re2/re2.h\
re2/regexp.h\
re2/set.h\
re2/sparse_array.h\
re2/sparse_set.h\
re2/stringpiece.h\
re2/testing/exhaustive_tester.h\
re2/testing/regexp_generator.h\
re2/testing/string_generator.h\
re2/testing/tester.h\
re2/unicode_casefold.h\
re2/unicode_groups.h\
re2/walker-inl.h\
OFILES=\
obj/util/rune.o\
obj/util/strutil.o\
obj/re2/bitmap256.o\
obj/re2/bitstate.o\
obj/re2/compile.o\
obj/re2/dfa.o\
obj/re2/filtered_re2.o\
obj/re2/mimics_pcre.o\
obj/re2/nfa.o\
obj/re2/onepass.o\
obj/re2/parse.o\
obj/re2/perl_groups.o\
obj/re2/prefilter.o\
obj/re2/prefilter_tree.o\
obj/re2/prog.o\
obj/re2/re2.o\
obj/re2/regexp.o\
obj/re2/set.o\
obj/re2/simplify.o\
obj/re2/tostring.o\
obj/re2/unicode_casefold.o\
obj/re2/unicode_groups.o\
TESTOFILES=\
obj/util/pcre.o\
obj/re2/testing/backtrack.o\
obj/re2/testing/dump.o\
obj/re2/testing/exhaustive_tester.o\
obj/re2/testing/null_walker.o\
obj/re2/testing/regexp_generator.o\
obj/re2/testing/string_generator.o\
obj/re2/testing/tester.o\
TESTS=\
obj/test/charclass_test\
obj/test/compile_test\
obj/test/filtered_re2_test\
obj/test/mimics_pcre_test\
obj/test/parse_test\
obj/test/possible_match_test\
obj/test/re2_test\
obj/test/re2_arg_test\
obj/test/regexp_test\
obj/test/required_prefix_test\
obj/test/search_test\
obj/test/set_test\
obj/test/simplify_test\
obj/test/string_generator_test\
BIGTESTS=\
obj/test/dfa_test\
obj/test/exhaustive1_test\
obj/test/exhaustive2_test\
obj/test/exhaustive3_test\
obj/test/exhaustive_test\
obj/test/random_test\
SOFILES=$(patsubst obj/%,obj/so/%,$(OFILES))
# We use TESTOFILES for testing the shared lib, only it is built differently.
STESTS=$(patsubst obj/%,obj/so/%,$(TESTS))
SBIGTESTS=$(patsubst obj/%,obj/so/%,$(BIGTESTS))
DOFILES=$(patsubst obj/%,obj/dbg/%,$(OFILES))
DTESTOFILES=$(patsubst obj/%,obj/dbg/%,$(TESTOFILES))
DTESTS=$(patsubst obj/%,obj/dbg/%,$(TESTS))
DBIGTESTS=$(patsubst obj/%,obj/dbg/%,$(BIGTESTS))
.PRECIOUS: obj/%.o
obj/%.o: %.cc $(HFILES)
@mkdir -p $$(dirname $@)
$(CXX) -c -o $@ $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) -DNDEBUG $*.cc
.PRECIOUS: obj/dbg/%.o
obj/dbg/%.o: %.cc $(HFILES)
@mkdir -p $$(dirname $@)
$(CXX) -c -o $@ $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) $*.cc
.PRECIOUS: obj/so/%.o
obj/so/%.o: %.cc $(HFILES)
@mkdir -p $$(dirname $@)
$(CXX) -c -o $@ -fPIC $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) -DNDEBUG $*.cc
.PRECIOUS: obj/libre2.a
obj/libre2.a: $(OFILES)
@mkdir -p obj
$(AR) $(ARFLAGS) obj/libre2.a $(OFILES)
.PRECIOUS: obj/dbg/libre2.a
obj/dbg/libre2.a: $(DOFILES)
@mkdir -p obj/dbg
$(AR) $(ARFLAGS) obj/dbg/libre2.a $(DOFILES)
.PRECIOUS: obj/so/libre2.$(SOEXT)
obj/so/libre2.$(SOEXT): $(SOFILES) libre2.symbols libre2.symbols.darwin
@mkdir -p obj/so
$(MAKE_SHARED_LIBRARY) -o obj/so/libre2.$(SOEXTVER) $(SOFILES) $(RE2_LDFLAGS) $(LDFLAGS)
ln -sf libre2.$(SOEXTVER) $@
.PRECIOUS: obj/dbg/test/%
obj/dbg/test/%: obj/dbg/libre2.a obj/dbg/re2/testing/%.o $(DTESTOFILES)
@mkdir -p obj/dbg/test
$(CXX) -o $@ obj/dbg/re2/testing/$*.o $(DTESTOFILES) -lgtest -lgtest_main obj/dbg/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
.PRECIOUS: obj/test/%
obj/test/%: obj/libre2.a obj/re2/testing/%.o $(TESTOFILES)
@mkdir -p obj/test
$(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) -lgtest -lgtest_main obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
# Test the shared lib, falling back to the static lib for private symbols
.PRECIOUS: obj/so/test/%
obj/so/test/%: obj/so/libre2.$(SOEXT) obj/libre2.a obj/re2/testing/%.o $(TESTOFILES)
@mkdir -p obj/so/test
$(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) -lgtest -lgtest_main -Lobj/so -lre2 obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
obj/test/regexp_benchmark: obj/libre2.a obj/re2/testing/regexp_benchmark.o $(TESTOFILES)
@mkdir -p obj/test
$(CXX) -o $@ obj/re2/testing/regexp_benchmark.o $(TESTOFILES) -lgtest -lbenchmark -lbenchmark_main obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
obj/test/re2_fuzzer: obj/libre2.a obj/re2/fuzzing/re2_fuzzer.o
@mkdir -p obj/test
$(CXX) -o $@ obj/re2/fuzzing/re2_fuzzer.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
ifdef REBUILD_TABLES
.PRECIOUS: re2/perl_groups.cc
re2/perl_groups.cc: re2/make_perl_groups.pl
perl $< > $@
.PRECIOUS: re2/unicode_%.cc
re2/unicode_%.cc: re2/make_unicode_%.py re2/unicode.py
python3 $< > $@
endif
.PHONY: distclean
distclean: clean
rm -f re2/perl_groups.cc re2/unicode_casefold.cc re2/unicode_groups.cc
.PHONY: clean
clean:
rm -rf obj
rm -f re2/*.pyc
.PHONY: testofiles
testofiles: $(TESTOFILES)
.PHONY: test
test: $(DTESTS) $(TESTS) $(STESTS) debug-test static-test shared-test
.PHONY: debug-test
debug-test: $(DTESTS)
@./runtests $(DTESTS)
.PHONY: static-test
static-test: $(TESTS)
@./runtests $(TESTS)
.PHONY: shared-test
shared-test: $(STESTS)
@./runtests -shared-library-path obj/so $(STESTS)
.PHONY: debug-bigtest
debug-bigtest: $(DTESTS) $(DBIGTESTS)
@./runtests $(DTESTS) $(DBIGTESTS)
.PHONY: static-bigtest
static-bigtest: $(TESTS) $(BIGTESTS)
@./runtests $(TESTS) $(BIGTESTS)
.PHONY: shared-bigtest
shared-bigtest: $(STESTS) $(SBIGTESTS)
@./runtests -shared-library-path obj/so $(STESTS) $(SBIGTESTS)
.PHONY: benchmark
benchmark: obj/test/regexp_benchmark
.PHONY: fuzz
fuzz: obj/test/re2_fuzzer
.PHONY: install
install: static-install shared-install
.PHONY: static
static: obj/libre2.a
.PHONY: static-install
static-install: obj/libre2.a common-install
$(INSTALL) obj/libre2.a $(DESTDIR)$(libdir)/libre2.a
.PHONY: shared
shared: obj/so/libre2.$(SOEXT)
.PHONY: shared-install
shared-install: obj/so/libre2.$(SOEXT) common-install
$(INSTALL) obj/so/libre2.$(SOEXT) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER00)
ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER)
ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXT)
.PHONY: common-install
common-install:
mkdir -p $(DESTDIR)$(includedir)/re2 $(DESTDIR)$(libdir)/pkgconfig
$(INSTALL_DATA) $(INSTALL_HFILES) $(DESTDIR)$(includedir)/re2
$(INSTALL_DATA) re2.pc.in $(DESTDIR)$(libdir)/pkgconfig/re2.pc
$(SED_INPLACE) -e "s#@CMAKE_INSTALL_FULL_INCLUDEDIR@#$(includedir)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
$(SED_INPLACE) -e "s#@CMAKE_INSTALL_FULL_LIBDIR@#$(libdir)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
$(SED_INPLACE) -e "s#@REQUIRES@#$(REQUIRES)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
$(SED_INPLACE) -e "s#@SONAME@#$(SONAME)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
.PHONY: testinstall
testinstall: static-testinstall shared-testinstall
@echo
@echo Install tests passed.
@echo
.PHONY: static-testinstall
static-testinstall:
ifeq ($(shell uname),Darwin)
@echo Skipping test for libre2.a on Darwin.
else ifeq ($(shell uname),SunOS)
@echo Skipping test for libre2.a on SunOS.
else
@mkdir -p obj
@cp testinstall.cc obj/static-testinstall.cc
(cd obj && export PKG_CONFIG_PATH=$(DESTDIR)$(libdir)/pkgconfig; \
$(CXX) static-testinstall.cc -o static-testinstall $(CXXFLAGS) $(LDFLAGS) \
$$($(PKG_CONFIG) re2 --cflags) \
$$($(PKG_CONFIG) re2 --libs | sed -e 's/-Wl / /g' | sed -e 's/-lre2/-l:libre2.a/'))
obj/static-testinstall
endif
.PHONY: shared-testinstall
shared-testinstall:
@mkdir -p obj
@cp testinstall.cc obj/shared-testinstall.cc
(cd obj && export PKG_CONFIG_PATH=$(DESTDIR)$(libdir)/pkgconfig; \
$(CXX) shared-testinstall.cc -o shared-testinstall $(CXXFLAGS) $(LDFLAGS) \
$$($(PKG_CONFIG) re2 --cflags) \
$$($(PKG_CONFIG) re2 --libs | sed -e 's/-Wl / /g'))
ifeq ($(shell uname),Darwin)
DYLD_LIBRARY_PATH="$(DESTDIR)$(libdir):$(DYLD_LIBRARY_PATH)" obj/shared-testinstall
else
LD_LIBRARY_PATH="$(DESTDIR)$(libdir):$(LD_LIBRARY_PATH)" obj/shared-testinstall
endif
.PHONY: benchlog
benchlog: obj/test/regexp_benchmark
(echo '==BENCHMARK==' `hostname` `date`; \
(uname -a; $(CXX) --version; git rev-parse --short HEAD; file obj/test/regexp_benchmark) | sed 's/^/# /'; \
echo; \
./obj/test/regexp_benchmark 'PCRE|RE2') | tee -a benchlog.$$(hostname | sed 's/\..*//')
.PHONY: log
log:
$(MAKE) clean
$(MAKE) CXXFLAGS="$(CXXFLAGS) -DLOGGING=1" \
$(filter obj/test/exhaustive%_test,$(BIGTESTS))
echo '#' RE2 exhaustive tests built by make log >re2-exhaustive.txt
echo '#' $$(date) >>re2-exhaustive.txt
obj/test/exhaustive_test |grep -v '^PASS$$' >>re2-exhaustive.txt
obj/test/exhaustive1_test |grep -v '^PASS$$' >>re2-exhaustive.txt
obj/test/exhaustive2_test |grep -v '^PASS$$' >>re2-exhaustive.txt
obj/test/exhaustive3_test |grep -v '^PASS$$' >>re2-exhaustive.txt
$(MAKE) CXXFLAGS="$(CXXFLAGS) -DLOGGING=1" obj/test/search_test
echo '#' RE2 basic search tests built by make $@ >re2-search.txt
echo '#' $$(date) >>re2-search.txt
obj/test/search_test |grep -v '^PASS$$' >>re2-search.txt

View File

@ -1,47 +0,0 @@
This is the source code repository for RE2, a regular expression library.
For documentation about how to install and use RE2,
visit https://github.com/google/re2/.
The short version is:
make
make test
make install
make testinstall
Building RE2 requires Abseil (https://github.com/abseil/abseil-cpp)
to be installed on your system. Building the testing for RE2 requires
GoogleTest (https://github.com/google/googletest) and Benchmark
(https://github.com/google/benchmark) to be installed as well.
There is a fair amount of documentation (including code snippets) in
the re2.h header file.
More information can be found on the wiki:
https://github.com/google/re2/wiki
Issue tracker:
https://github.com/google/re2/issues
Mailing list:
https://groups.google.com/group/re2-dev
Unless otherwise noted, the RE2 source files are distributed
under the BSD-style license found in the LICENSE file.
RE2's native language is C++.
The Python wrapper is at https://github.com/google/re2/tree/abseil/python
and on PyPI (https://pypi.org/project/google-re2/).
A C wrapper is at https://github.com/marcomaggi/cre2/.
A D wrapper is at https://github.com/ShigekiKarita/re2d/ and on DUB (code.dlang.org).
An Erlang wrapper is at https://github.com/dukesoferl/re2/ and on Hex (hex.pm).
An Inferno wrapper is at https://github.com/powerman/inferno-re2/.
A Node.js wrapper is at https://github.com/uhop/node-re2/ and on NPM (npmjs.com).
An OCaml wrapper is at https://github.com/janestreet/re2/ and on OPAM (opam.ocaml.org).
A Perl wrapper is at https://github.com/dgl/re-engine-RE2/ and on CPAN (cpan.org).
An R wrapper is at https://github.com/girishji/re2/ and on CRAN (cran.r-project.org).
A Ruby wrapper is at https://github.com/mudge/re2/ and on RubyGems (rubygems.org).
A WebAssembly wrapper is at https://github.com/google/re2-wasm/ and on NPM (npmjs.com).

259
src/third_party/re2/dist/README.md vendored Normal file
View File

@ -0,0 +1,259 @@
# RE2, a regular expression library
RE2 is an efficient, principled regular expression library
that has been used in production at Google and many other places
since 2006.
_**Safety is RE2's primary goal.**_
RE2 was designed and implemented with an explicit goal of being able
to handle regular expressions from untrusted users without risk.
One of its primary guarantees is that the match time is linear in the
length of the input string. It was also written with production concerns in mind:
the parser, the compiler and the execution engines limit their memory usage
by working within a configurable budget—failing gracefully when exhausted—and
they avoid stack overflow by eschewing recursion.
It is not a goal to be faster than all other engines under all circumstances.
Although RE2 guarantees a running time that is asymptotically linear in
the length of the input, more complex expressions may incur larger constant factors;
longer expressions increase the overhead required to handle those expressions safely.
In a sense, RE2 is pessimistic where a backtracking engine is optimistic:
A backtracking engine tests each alternative sequentially, making it fast when the first alternative is common.
By contrast RE2 evaluates all alternatives in parallel, avoiding the performance penalty for the last alternative,
at the cost of some overhead. This pessimism is what makes RE2 secure.
It is also not a goal to implement all of the features offered by Perl, PCRE and other engines.
As a matter of principle, RE2 does not support constructs for which only backtracking solutions are known to exist.
Thus, backreferences and look-around assertions are not supported.
For more information, please refer to Russ Cox's articles on regular expression theory and practice:
* [Regular Expression Matching Can Be Simple And Fast](https://swtch.com/~rsc/regexp/regexp1.html)
* [Regular Expression Matching: the Virtual Machine Approach](https://swtch.com/~rsc/regexp/regexp2.html)
* [Regular Expression Matching in the Wild](https://swtch.com/~rsc/regexp/regexp3.html)
### Syntax
In POSIX mode, RE2 accepts standard POSIX (egrep) syntax regular expressions.
In Perl mode, RE2 accepts most Perl operators. The only excluded ones are
those that require backtracking (and its potential for exponential runtime)
to implement. These include backreferences (submatching is still okay)
and generalized assertions.
The [Syntax wiki page](https://github.com/google/re2/wiki/Syntax)
documents the supported Perl-mode syntax in detail.
The default is Perl mode.
### C++ API
RE2's native language is C++, although there are [ports and wrappers](#ports-and-wrappers) listed below.
#### Matching Interface
There are two basic operators:
`RE2::FullMatch` requires the regexp to match the entire input text, and
`RE2::PartialMatch` looks for a match for a substring of the input text,
returning the leftmost-longest match in POSIX mode and the
same match that Perl would have chosen in Perl mode.
Examples:
```cpp
assert(RE2::FullMatch("hello", "h.*o"))
assert(!RE2::FullMatch("hello", "e"))
assert(RE2::PartialMatch("hello", "h.*o"))
assert(RE2::PartialMatch("hello", "e"))
```
#### Submatch Extraction
Both matching functions take additional arguments in which submatches will be stored.
The argument can be a `string*`, or an integer type, or the type `absl::string_view*`.
(The `absl::string_view` type is very similar to the `std::string_view` type,
but for historical reasons, RE2 uses the former.)
A `string_view` is a pointer to the original input text, along with a count.
It behaves like a string but doesn't carry its own storage.
Like when using a pointer, when using a `string_view`
you must be careful not to use it once the original text has been deleted or gone out of scope.
Examples:
```cpp
// Successful parsing.
int i;
string s;
assert(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
assert(s == "ruby");
assert(i == 1234);
// Fails: "ruby" cannot be parsed as an integer.
assert(!RE2::FullMatch("ruby", "(.+)", &i));
// Success; does not extract the number.
assert(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
// Success; skips NULL argument.
assert(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", (void*)NULL, &i));
// Fails: integer overflow keeps value from being stored in i.
assert(!RE2::FullMatch("ruby:123456789123", "(\\w+):(\\d+)", &s, &i));
```
#### Pre-Compiled Regular Expressions
The examples above all recompile the regular expression on each call.
Instead, you can compile it once to an RE2 object and reuse that object for each call.
Example:
```cpp
RE2 re("(\\w+):(\\d+)");
assert(re.ok()); // compiled; if not, see re.error();
assert(RE2::FullMatch("ruby:1234", re, &s, &i));
assert(RE2::FullMatch("ruby:1234", re, &s));
assert(RE2::FullMatch("ruby:1234", re, (void*)NULL, &i));
assert(!RE2::FullMatch("ruby:123456789123", re, &s, &i));
```
#### Options
The constructor takes an optional second argument that can
be used to change RE2's default options.
For example, `RE2::Quiet` silences the error messages that are
usually printed when a regular expression fails to parse:
```cpp
RE2 re("(ab", RE2::Quiet); // don't write to stderr for parser failure
assert(!re.ok()); // can check re.error() for details
```
Other useful predefined options are `Latin1` (disable UTF-8) and `POSIX`
(use POSIX syntax and leftmost longest matching).
You can also declare your own `RE2::Options` object and then configure it as you like.
See the [header](https://github.com/google/re2/blob/main/re2/re2.h) for the full set of options.
#### Unicode Normalization
RE2 operates on Unicode code points: it makes no attempt at normalization.
For example, the regular expression /ü/ (U+00FC, u with diaeresis)
does not match the input "ü" (U+0075 U+0308, u followed by combining diaeresis).
Normalization is a long, involved topic.
The simplest solution, if you need such matches, is to normalize both the regular expressions
and the input in a preprocessing step before using RE2.
For more details on the general topic, see <https://www.unicode.org/reports/tr15/>.
#### Additional Tips and Tricks
For advanced usage, like constructing your own argument lists,
or using RE2 as a lexer, or parsing hex, octal, and C-radix numbers,
see [re2.h](https://github.com/google/re2/blob/main/re2/re2.h).
### Installation
RE2 can be built and installed using GNU make, CMake, or Bazel.
The simplest installation instructions are:
make
make test
make benchmark
make install
make testinstall
Building RE2 requires a C++17 compiler and the [Abseil](https://github.com/abseil/abseil-cpp) library.
Building the tests and benchmarks requires
[GoogleTest](https://github.com/google/googletest)
and [Benchmark](https://github.com/google/benchmark).
To obtain those:
- Linux: `apt install libabsl-dev libgtest-dev libbenchmark-dev`
- macOS: `brew install abseil googletest google-benchmark pkg-config-wrapper`
- Windows: `vcpkg install abseil gtest benchmark` \
or `vcpkg add port abseil gtest benchmark`
Once those are installed, the build has to be able to find them.
If the standard Makefile has trouble, then switching to CMake can help:
rm -rf build
cmake -DRE2_TEST=ON -DRE2_BENCHMARK=ON -S . -B build
cd build
make
make test
make install
When using CMake, with benchmarks enabled, `make test` builds and runs test binaries
and builds a `regexp_benchmark` binary but does not run it.
If you don't need the tests or benchmarks at all, you can omit the corresponding `-D` arguments,
and then you don't need the GoogleTest or Benchmark dependencies either.
Another useful option is `-DRE2_USE_ICU=ON`, which adds a dependency on the
ICU Unicode library but also extends the list of property names available in the `\p` and `\P` patterns.
CMake can also be used to generate Visual Studio and Xcode projects, as well as
Cygwin, MinGW, and MSYS makefiles.
- Visual Studio users: You need Visual Studio 2019 or later.
- Cygwin users: You must run CMake from the Cygwin command line, not the Windows command line.
If you are adding RE2 to your own CMake project,
CMake has two ways to use a dependency: `add_subdirectory()`,
which is when the dependency's **_sources_** are in a subdirectory of your project;
and `find_package()`, which is when the dependency's
**_binaries_** have been built and installed somewhere on your system.
The Abseil documentation walks through the former [here](https://abseil.io/docs/cpp/quickstart-cmake)
versus the latter [here](https://abseil.io/docs/cpp/tools/cmake-installs).
Once you get Abseil working, getting RE2 working will be a very similar process and,
either way, `target_link_libraries(… re2::re2)` should Just Work™.
If you are using [Bazel](https://bazel.io), it will handle the dependencies for you,
although you still need to download Bazel,
which you can do with [Bazelisk](https://github.com/bazelbuild/bazelisk).
go install github.com/bazelbuild/bazelisk@latest
# or on mac: brew install bazelisk
bazelisk build :all
bazelisk test :all
If you are using RE2 from another project, you need to make sure you are
using at least C++17.
See the RE2 [.bazelrc](https://github.com/google/re2/blob/main/.bazelrc) file for an example.
### Ports and Wrappers
RE2 is implemented in C++.
The official Python wrapper is [in the `python` directory](https://github.com/google/re2/tree/main/python)
and [published on PyPI as `google-re2`](https://pypi.org/project/google-re2/).
Note that there is also a PyPI `re2` but it is not by the RE2 authors and is unmaintained. Use `google-re2`.
There are also other unofficial wrappers:
- A C wrapper is at <https://github.com/marcomaggi/cre2/>.
- A D wrapper is at <https://github.com/ShigekiKarita/re2d/> and [on DUB](https://code.dlang.org/packages/re2d).
- An Erlang wrapper is at <https://github.com/dukesoferl/re2/> and [on Hex](https://hex.pm/packages/re2).
- An Inferno wrapper is at <https://github.com/powerman/inferno-re2/>.
- A Node.js wrapper is at <https://github.com/uhop/node-re2/> and [on NPM](https://www.npmjs.com/package/re2).
- An OCaml wrapper is at <https://github.com/janestreet/re2/> and [on OPAM](https://opam.ocaml.org/packages/re2/).
- A Perl wrapper is at <https://github.com/dgl/re-engine-RE2/> and [on CPAN](https://metacpan.org/pod/re::engine::RE2).
- An R wrapper is at <https://github.com/girishji/re2/> and [on CRAN](https://cran.r-project.org/web/packages/re2/index.html).
- A Ruby wrapper is at <https://github.com/mudge/re2/> and on RubyGems (rubygems.org).
- A WebAssembly wrapper is at <https://github.com/google/re2-wasm/> and on NPM (npmjs.com).
[RE2J](https://github.com/google/re2j) is a port of the RE2 C++ code to pure Java,
and [RE2JS](https://github.com/le0pard/re2js) is a port of RE2J to JavaScript.
The [Go `regexp` package](https://go.dev/pkg/regexp)
and [Rust `regex` crate](https://docs.rs/regex)
do not share code with RE2, but they follow the same principles,
accept the same syntax, and provide the same efficiency guarantees.
### Contact
The [issue tracker](https://github.com/google/re2/issues) is the best place for discussions.
There is a [mailing list](https://groups.google.com/group/re2-dev) for keeping up with code changes.
Please read the [contribution guide](https://github.com/google/re2/wiki/Contribute) before sending changes.
In particular, note that RE2 does not use GitHub pull requests.

View File

@ -1,7 +0,0 @@
# Copyright 2009 The RE2 Authors. All Rights Reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
# Bazel (http://bazel.build/) WORKSPACE file for RE2.
workspace(name = "com_googlesource_code_re2")

View File

@ -1,7 +0,0 @@
# Copyright 2009 The RE2 Authors. All Rights Reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
# Bazel (http://bazel.build/) WORKSPACE file for RE2.
workspace(name = "com_googlesource_code_re2")

View File

@ -1,16 +0,0 @@
{
global:
# re2::RE2*
_ZN3re23RE2*;
_ZNK3re23RE2*;
# re2::operator<<*
_ZN3re2ls*;
# re2::FilteredRE2*
_ZN3re211FilteredRE2*;
_ZNK3re211FilteredRE2*;
# re2::re2_internal*
_ZN3re212re2_internal*;
_ZNK3re212re2_internal*;
local:
*;
};

View File

@ -1,12 +0,0 @@
# Linker doesn't like these unmangled:
# re2::RE2*
__ZN3re23RE2*
__ZNK3re23RE2*
# re2::operator<<*
__ZN3re2ls*
# re2::FilteredRE2*
__ZN3re211FilteredRE2*
__ZNK3re211FilteredRE2*
# re2::re2_internal*
__ZN3re212re2_internal*
__ZNK3re212re2_internal*

View File

@ -1,9 +0,0 @@
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
Name: re2
Description: RE2 is a fast, safe, thread-friendly regular expression engine.
Requires: @REQUIRES@
Version: @SONAME@.0.0
Cflags: -pthread -I${includedir}
Libs: -pthread -L${libdir} -lre2

View File

@ -6,14 +6,13 @@
#include <stdint.h>
#include "absl/base/macros.h"
#include "util/logging.h"
#include "absl/log/absl_check.h"
namespace re2 {
int Bitmap256::FindNextSetBit(int c) const {
DCHECK_GE(c, 0);
DCHECK_LE(c, 255);
ABSL_DCHECK_GE(c, 0);
ABSL_DCHECK_LE(c, 255);
// Check the word that contains the bit. Mask out any lower bits.
int i = c / 64;
@ -27,15 +26,15 @@ int Bitmap256::FindNextSetBit(int c) const {
case 1:
if (words_[1] != 0)
return (1 * 64) + FindLSBSet(words_[1]);
ABSL_FALLTHROUGH_INTENDED;
[[fallthrough]];
case 2:
if (words_[2] != 0)
return (2 * 64) + FindLSBSet(words_[2]);
ABSL_FALLTHROUGH_INTENDED;
[[fallthrough]];
case 3:
if (words_[3] != 0)
return (3 * 64) + FindLSBSet(words_[3]);
ABSL_FALLTHROUGH_INTENDED;
[[fallthrough]];
default:
return -1;
}

View File

@ -5,13 +5,15 @@
#ifndef RE2_BITMAP256_H_
#define RE2_BITMAP256_H_
#ifdef _MSC_VER
#include <intrin.h>
#endif
#include <stdint.h>
#include <string.h>
#include "util/logging.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#ifdef _MSC_VER
#include <intrin.h>
#endif
namespace re2 {
@ -28,16 +30,16 @@ class Bitmap256 {
// Tests the bit with index c.
bool Test(int c) const {
DCHECK_GE(c, 0);
DCHECK_LE(c, 255);
ABSL_DCHECK_GE(c, 0);
ABSL_DCHECK_LE(c, 255);
return (words_[c / 64] & (uint64_t{1} << (c % 64))) != 0;
}
// Sets the bit with index c.
void Set(int c) {
DCHECK_GE(c, 0);
DCHECK_LE(c, 255);
ABSL_DCHECK_GE(c, 0);
ABSL_DCHECK_LE(c, 255);
words_[c / 64] |= (uint64_t{1} << (c % 64));
}
@ -49,7 +51,7 @@ class Bitmap256 {
private:
// Finds the least significant non-zero bit in n.
static int FindLSBSet(uint64_t n) {
DCHECK_NE(n, 0);
ABSL_DCHECK_NE(n, uint64_t{0});
#if defined(__GNUC__)
return __builtin_ctzll(n);
#elif defined(_MSC_VER) && defined(_M_X64)

View File

@ -20,10 +20,13 @@
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <limits>
#include <utility>
#include "util/logging.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/string_view.h"
#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/regexp.h"
@ -107,9 +110,9 @@ void BitState::Push(int id, const char* p) {
if (njob_ >= job_.size()) {
GrowStack();
if (njob_ >= job_.size()) {
LOG(DFATAL) << "GrowStack() failed: "
<< "njob_ = " << njob_ << ", "
<< "job_.size() = " << job_.size();
ABSL_LOG(DFATAL) << "GrowStack() failed: "
<< "njob_ = " << njob_ << ", "
<< "job_.size() = " << job_.size();
return;
}
}
@ -167,7 +170,7 @@ bool BitState::TrySearch(int id0, const char* p0) {
Prog::Inst* ip = prog_->inst(id);
switch (ip->opcode()) {
default:
LOG(DFATAL) << "Unexpected opcode: " << ip->opcode();
ABSL_LOG(DFATAL) << "Unexpected opcode: " << ip->opcode();
return false;
case kInstFail:
@ -233,7 +236,7 @@ bool BitState::TrySearch(int id0, const char* p0) {
CheckAndLoop:
// Sanity check: id is the head of its list, which must
// be the case if id-1 is the last of *its* list. :)
DCHECK(id == 0 || prog_->inst(id-1)->last());
ABSL_DCHECK(id == 0 || prog_->inst(id-1)->last());
if (ShouldVisit(id, p))
goto Loop;
break;

View File

@ -10,17 +10,20 @@
#include <stdint.h>
#include <string.h>
#include <string>
#include <utility>
#include "absl/base/macros.h"
#include "absl/container/flat_hash_map.h"
#include "util/logging.h"
#include "util/utf.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/string_view.h"
#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/re2.h"
#include "re2/regexp.h"
#include "re2/walker-inl.h"
#include "util/utf.h"
namespace re2 {
@ -522,8 +525,8 @@ void Compiler::AddSuffix(int id) {
}
int Compiler::AddSuffixRecursive(int root, int id) {
DCHECK(inst_[root].opcode() == kInstAlt ||
inst_[root].opcode() == kInstByteRange);
ABSL_DCHECK(inst_[root].opcode() == kInstAlt ||
inst_[root].opcode() == kInstByteRange);
Frag f = FindByteRange(root, id);
if (IsNoMatch(f)) {
@ -565,7 +568,7 @@ int Compiler::AddSuffixRecursive(int root, int id) {
if (!IsCachedRuneByteSuffix(id)) {
// The head should be the instruction most recently allocated, so free it
// instead of leaving it unreachable.
DCHECK_EQ(id, ninst_-1);
ABSL_DCHECK_EQ(id, ninst_-1);
inst_[id].out_opcode_ = 0;
inst_[id].out1_ = 0;
ninst_--;
@ -613,7 +616,7 @@ Frag Compiler::FindByteRange(int root, int id) {
return NoMatch();
}
LOG(DFATAL) << "should never happen";
ABSL_LOG(DFATAL) << "should never happen";
return NoMatch();
}
@ -738,7 +741,7 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
int n = runetochar(reinterpret_cast<char*>(ulo), &lo);
int m = runetochar(reinterpret_cast<char*>(uhi), &hi);
(void)m; // USED(m)
DCHECK_EQ(n, m);
ABSL_DCHECK_EQ(n, m);
// The logic below encodes this thinking:
//
@ -791,7 +794,7 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
Frag Compiler::Copy(Frag arg) {
// We're using WalkExponential; there should be no copying.
failed_ = true;
LOG(DFATAL) << "Compiler::Copy called!";
ABSL_LOG(DFATAL) << "Compiler::Copy called!";
return NoMatch();
}
@ -918,7 +921,7 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
if (cc->empty()) {
// This can't happen.
failed_ = true;
LOG(DFATAL) << "No ranges in char class";
ABSL_LOG(DFATAL) << "No ranges in char class";
return NoMatch();
}
@ -976,7 +979,7 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
return EmptyWidth(kEmptyNonWordBoundary);
}
failed_ = true;
LOG(DFATAL) << "Missing case in Compiler: " << re->op();
ABSL_LOG(DFATAL) << "Missing case in Compiler: " << re->op();
return NoMatch();
}

View File

@ -25,28 +25,31 @@
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <algorithm>
#include <atomic>
#include <deque>
#include <new>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "absl/base/call_once.h"
#include "absl/base/macros.h"
#include "absl/base/thread_annotations.h"
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/hash/hash.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "absl/synchronization/mutex.h"
#include "absl/types/span.h"
#include "util/logging.h"
#include "util/strutil.h"
#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/re2.h"
#include "re2/sparse_set.h"
#include "util/strutil.h"
// Silence "zero-sized array in struct/union" warning for DFA::State::next_.
#ifdef _MSC_VER
@ -149,15 +152,15 @@ class DFA {
struct StateHash {
size_t operator()(const State* a) const {
DCHECK(a != NULL);
ABSL_DCHECK(a != NULL);
return absl::Hash<State>()(*a);
}
};
struct StateEqual {
bool operator()(const State* a, const State* b) const {
DCHECK(a != NULL);
DCHECK(b != NULL);
ABSL_DCHECK(a != NULL);
ABSL_DCHECK(b != NULL);
return *a == *b;
}
};
@ -646,7 +649,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
absl::FPrintF(stderr, " -> FullMatchState\n");
return FullMatchState;
}
ABSL_FALLTHROUGH_INTENDED;
[[fallthrough]];
default:
// Record iff id is the head of its list, which must
// be the case if id-1 is the last of *its* list. :)
@ -659,7 +662,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
break;
}
}
DCHECK_LE(n, q->size());
ABSL_DCHECK_LE(n, q->size());
if (n > 0 && inst[n-1] == Mark)
n--;
@ -847,7 +850,7 @@ void DFA::AddToQueue(Workq* q, int id, uint32_t flag) {
stk[nstk++] = id;
while (nstk > 0) {
DCHECK_LE(nstk, stack_.size());
ABSL_DCHECK_LE(nstk, stack_.size());
id = stk[--nstk];
Loop:
@ -872,7 +875,7 @@ void DFA::AddToQueue(Workq* q, int id, uint32_t flag) {
Prog::Inst* ip = prog_->inst(id);
switch (ip->opcode()) {
default:
LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
ABSL_LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
break;
case kInstByteRange: // just save these on the queue
@ -898,7 +901,7 @@ void DFA::AddToQueue(Workq* q, int id, uint32_t flag) {
goto Loop;
case kInstAltMatch:
DCHECK(!ip->last());
ABSL_DCHECK(!ip->last());
id = id+1;
goto Loop;
@ -961,7 +964,7 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
Prog::Inst* ip = prog_->inst(id);
switch (ip->opcode()) {
default:
LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
ABSL_LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
break;
case kInstFail: // never succeeds
@ -1029,14 +1032,14 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
return FullMatchState;
}
if (state == DeadState) {
LOG(DFATAL) << "DeadState in RunStateOnByte";
ABSL_LOG(DFATAL) << "DeadState in RunStateOnByte";
return NULL;
}
if (state == NULL) {
LOG(DFATAL) << "NULL state in RunStateOnByte";
ABSL_LOG(DFATAL) << "NULL state in RunStateOnByte";
return NULL;
}
LOG(DFATAL) << "Unexpected special state in RunStateOnByte";
ABSL_LOG(DFATAL) << "Unexpected special state in RunStateOnByte";
return NULL;
}
@ -1267,7 +1270,7 @@ DFA::State* DFA::StateSaver::Restore() {
absl::MutexLock l(&dfa_->mutex_);
State* s = dfa_->CachedState(inst_, ninst_, flag_);
if (s == NULL)
LOG(DFATAL) << "StateSaver failed to restore state.";
ABSL_LOG(DFATAL) << "StateSaver failed to restore state.";
return s;
}
@ -1367,7 +1370,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
lastmatch = p;
if (ExtraDebug)
absl::FPrintF(stderr, "match @stx! [%s]\n", DumpState(s));
if (params->matches != NULL && kind_ == Prog::kManyMatch) {
if (params->matches != NULL) {
for (int i = s->ninst_ - 1; i >= 0; i--) {
int id = s->inst_[i];
if (id == MatchSep)
@ -1451,13 +1454,13 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
// Restore start and s so we can continue.
if ((start = save_start.Restore()) == NULL ||
(s = save_s.Restore()) == NULL) {
// Restore already did LOG(DFATAL).
// Restore already did ABSL_LOG(DFATAL).
params->failed = true;
return false;
}
ns = RunStateOnByteUnlocked(s, c);
if (ns == NULL) {
LOG(DFATAL) << "RunStateOnByteUnlocked failed after ResetCache";
ABSL_LOG(DFATAL) << "RunStateOnByteUnlocked failed after ResetCache";
params->failed = true;
return false;
}
@ -1484,7 +1487,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
lastmatch = p + 1;
if (ExtraDebug)
absl::FPrintF(stderr, "match @%d! [%s]\n", lastmatch - bp, DumpState(s));
if (params->matches != NULL && kind_ == Prog::kManyMatch) {
if (params->matches != NULL) {
for (int i = s->ninst_ - 1; i >= 0; i--) {
int id = s->inst_[i];
if (id == MatchSep)
@ -1529,7 +1532,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
}
ns = RunStateOnByteUnlocked(s, lastbyte);
if (ns == NULL) {
LOG(DFATAL) << "RunStateOnByteUnlocked failed after Reset";
ABSL_LOG(DFATAL) << "RunStateOnByteUnlocked failed after Reset";
params->failed = true;
return false;
}
@ -1551,7 +1554,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
lastmatch = p;
if (ExtraDebug)
absl::FPrintF(stderr, "match @etx! [%s]\n", DumpState(s));
if (params->matches != NULL && kind_ == Prog::kManyMatch) {
if (params->matches != NULL) {
for (int i = s->ninst_ - 1; i >= 0; i--) {
int id = s->inst_[i];
if (id == MatchSep)
@ -1646,7 +1649,7 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
// Sanity check: make sure that text lies within context.
if (BeginPtr(text) < BeginPtr(context) || EndPtr(text) > EndPtr(context)) {
LOG(DFATAL) << "context does not contain text";
ABSL_LOG(DFATAL) << "context does not contain text";
params->start = DeadState;
return true;
}
@ -1694,7 +1697,7 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
ResetCache(params->cache_lock);
if (!AnalyzeSearchHelper(params, info, flags)) {
params->failed = true;
LOG(DFATAL) << "Failed to analyze start state.";
ABSL_LOG(DFATAL) << "Failed to analyze start state.";
return false;
}
}
@ -1767,6 +1770,8 @@ bool DFA::Search(absl::string_view text, absl::string_view context,
params.anchored = anchored;
params.want_earliest_match = want_earliest_match;
params.run_forward = run_forward;
// matches should be null except when using RE2::Set.
ABSL_DCHECK(matches == NULL || kind_ == Prog::kManyMatch);
params.matches = matches;
if (!AnalyzeSearch(&params)) {

View File

@ -5,10 +5,13 @@
#include "re2/filtered_re2.h"
#include <stddef.h>
#include <string>
#include <utility>
#include <vector>
#include "util/logging.h"
#include "absl/log/absl_log.h"
#include "absl/strings/string_view.h"
#include "re2/prefilter.h"
#include "re2/prefilter_tree.h"
@ -52,8 +55,8 @@ RE2::ErrorCode FilteredRE2::Add(absl::string_view pattern,
if (!re->ok()) {
if (options.log_errors()) {
LOG(ERROR) << "Couldn't compile regular expression, skipping: "
<< pattern << " due to error " << re->error();
ABSL_LOG(ERROR) << "Couldn't compile regular expression, skipping: "
<< pattern << " due to error " << re->error();
}
delete re;
} else {
@ -66,12 +69,13 @@ RE2::ErrorCode FilteredRE2::Add(absl::string_view pattern,
void FilteredRE2::Compile(std::vector<std::string>* atoms) {
if (compiled_) {
LOG(ERROR) << "Compile called already.";
ABSL_LOG(ERROR) << "Compile called already.";
return;
}
// Similarly to PrefilterTree::Compile(), make compiling
// a no-op if it's attempted before adding any patterns.
if (re2_vec_.empty()) {
LOG(ERROR) << "Compile called before Add.";
return;
}
@ -94,7 +98,7 @@ int FilteredRE2::SlowFirstMatch(absl::string_view text) const {
int FilteredRE2::FirstMatch(absl::string_view text,
const std::vector<int>& atoms) const {
if (!compiled_) {
LOG(DFATAL) << "FirstMatch called before Compile.";
ABSL_LOG(DFATAL) << "FirstMatch called before Compile.";
return -1;
}
std::vector<int> regexps;

View File

@ -5,10 +5,12 @@
#include <fuzzer/FuzzedDataProvider.h>
#include <stddef.h>
#include <stdint.h>
#include <algorithm>
#include <string>
#include <vector>
#include "absl/strings/string_view.h"
#include "re2/filtered_re2.h"
#include "re2/re2.h"
#include "re2/regexp.h"

View File

@ -22,7 +22,7 @@
//
// Regexp::MimicsPCRE checks for any of these conditions.
#include "util/logging.h"
#include "absl/log/absl_log.h"
#include "re2/regexp.h"
#include "re2/walker-inl.h"
@ -44,7 +44,7 @@ class PCREWalker : public Regexp::Walker<bool> {
virtual bool ShortVisit(Regexp* re, bool a) {
// Should never be called: we use Walk(), not WalkExponential().
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
LOG(DFATAL) << "PCREWalker::ShortVisit called";
ABSL_LOG(DFATAL) << "PCREWalker::ShortVisit called";
#endif
return a;
}
@ -128,7 +128,7 @@ class EmptyStringWalker : public Regexp::Walker<bool> {
virtual bool ShortVisit(Regexp* re, bool a) {
// Should never be called: we use Walk(), not WalkExponential().
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
ABSL_LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
#endif
return a;
}

View File

@ -26,14 +26,16 @@
#include <stdio.h>
#include <string.h>
#include <algorithm>
#include <deque>
#include <string>
#include <utility>
#include <vector>
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/str_format.h"
#include "util/logging.h"
#include "absl/strings/string_view.h"
#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/regexp.h"
@ -172,17 +174,17 @@ NFA::Thread* NFA::AllocThread() {
}
NFA::Thread* NFA::Incref(Thread* t) {
DCHECK(t != NULL);
ABSL_DCHECK(t != NULL);
t->ref++;
return t;
}
void NFA::Decref(Thread* t) {
DCHECK(t != NULL);
ABSL_DCHECK(t != NULL);
t->ref--;
if (t->ref > 0)
return;
DCHECK_EQ(t->ref, 0);
ABSL_DCHECK_EQ(t->ref, 0);
t->next = freelist_;
freelist_ = t;
}
@ -208,7 +210,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, absl::string_view context,
stk[nstk++] = {id0, NULL};
while (nstk > 0) {
DCHECK_LE(nstk, stack_.size());
ABSL_DCHECK_LE(nstk, stack_.size());
AddState a = stk[--nstk];
Loop:
@ -238,7 +240,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, absl::string_view context,
Prog::Inst* ip = prog_->inst(id);
switch (ip->opcode()) {
default:
LOG(DFATAL) << "unhandled " << ip->opcode() << " in AddToThreadq";
ABSL_LOG(DFATAL) << "unhandled " << ip->opcode() << " in AddToThreadq";
break;
case kInstFail:
@ -249,7 +251,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, absl::string_view context,
t = Incref(t0);
*tp = t;
DCHECK(!ip->last());
ABSL_DCHECK(!ip->last());
a = {id+1, NULL};
goto Loop;
@ -350,7 +352,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, absl::string_view context,
switch (ip->opcode()) {
default:
// Should only see the values handled below.
LOG(DFATAL) << "Unhandled " << ip->opcode() << " in step";
ABSL_LOG(DFATAL) << "Unhandled " << ip->opcode() << " in step";
break;
case kInstByteRange:
@ -455,7 +457,7 @@ bool NFA::Search(absl::string_view text, absl::string_view context,
// Sanity check: make sure that text lies within context.
if (BeginPtr(text) < BeginPtr(context) || EndPtr(text) > EndPtr(context)) {
LOG(DFATAL) << "context does not contain text";
ABSL_LOG(DFATAL) << "context does not contain text";
return false;
}
@ -470,7 +472,7 @@ bool NFA::Search(absl::string_view text, absl::string_view context,
}
if (nsubmatch < 0) {
LOG(DFATAL) << "Bad args: nsubmatch=" << nsubmatch;
ABSL_LOG(DFATAL) << "Bad args: nsubmatch=" << nsubmatch;
return false;
}
@ -527,7 +529,7 @@ bool NFA::Search(absl::string_view text, absl::string_view context,
// This is a no-op the first time around the loop because runq is empty.
int id = Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p);
DCHECK_EQ(runq->size(), 0);
ABSL_DCHECK_EQ(runq->size(), 0);
using std::swap;
swap(nextq, runq);
nextq->clear();
@ -538,7 +540,8 @@ bool NFA::Search(absl::string_view text, absl::string_view context,
Prog::Inst* ip = prog_->inst(id);
switch (ip->opcode()) {
default:
LOG(DFATAL) << "Unexpected opcode in short circuit: " << ip->opcode();
ABSL_LOG(DFATAL) << "Unexpected opcode in short circuit: "
<< ip->opcode();
break;
case kInstCapture:
@ -599,7 +602,7 @@ bool NFA::Search(absl::string_view text, absl::string_view context,
// This complements the special case in NFA::Step().
if (p == NULL) {
(void) Step(runq, nextq, -1, context, p);
DCHECK_EQ(runq->size(), 0);
ABSL_DCHECK_EQ(runq->size(), 0);
using std::swap;
swap(nextq, runq);
nextq->clear();
@ -655,7 +658,7 @@ bool Prog::SearchNFA(absl::string_view text, absl::string_view context,
// fanout holds the results and is also the work queue for the outer iteration.
// reachable holds the reached nodes for the inner iteration.
void Prog::Fanout(SparseArray<int>* fanout) {
DCHECK_EQ(fanout->max_size(), size());
ABSL_DCHECK_EQ(fanout->max_size(), size());
SparseSet reachable(size());
fanout->clear();
fanout->set_new(start(), 0);
@ -668,7 +671,8 @@ void Prog::Fanout(SparseArray<int>* fanout) {
Prog::Inst* ip = inst(id);
switch (ip->opcode()) {
default:
LOG(DFATAL) << "unhandled " << ip->opcode() << " in Prog::Fanout()";
ABSL_LOG(DFATAL) << "unhandled " << ip->opcode()
<< " in Prog::Fanout()";
break;
case kInstByteRange:
@ -682,7 +686,7 @@ void Prog::Fanout(SparseArray<int>* fanout) {
break;
case kInstAltMatch:
DCHECK(!ip->last());
ABSL_DCHECK(!ip->last());
reachable.insert(id+1);
break;

View File

@ -52,19 +52,21 @@
#include <stdint.h>
#include <string.h>
#include <algorithm>
#include <map>
#include <string>
#include <vector>
#include "absl/container/fixed_array.h"
#include "absl/container/inlined_vector.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/str_format.h"
#include "util/logging.h"
#include "util/utf.h"
#include "absl/strings/string_view.h"
#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/sparse_set.h"
#include "util/utf.h"
// Silence "zero-sized array in struct/union" warning for OneState::action.
#ifdef _MSC_VER
@ -215,7 +217,7 @@ bool Prog::SearchOnePass(absl::string_view text, absl::string_view context,
Anchor anchor, MatchKind kind,
absl::string_view* match, int nmatch) {
if (anchor != kAnchored && kind != kFullMatch) {
LOG(DFATAL) << "Cannot use SearchOnePass for unanchored matches.";
ABSL_LOG(DFATAL) << "Cannot use SearchOnePass for unanchored matches.";
return false;
}
@ -442,13 +444,13 @@ bool Prog::IsOnePass() {
Prog::Inst* ip = inst(id);
switch (ip->opcode()) {
default:
LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
ABSL_LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
break;
case kInstAltMatch:
// TODO(rsc): Ignoring kInstAltMatch optimization.
// Should implement it in this engine, but it's subtle.
DCHECK(!ip->last());
ABSL_DCHECK(!ip->last());
// If already on work queue, (1) is violated: bail out.
if (!AddQ(&workq, id+1))
goto fail;
@ -460,7 +462,7 @@ bool Prog::IsOnePass() {
if (nextindex == -1) {
if (nalloc >= maxnodes) {
if (ExtraDebug)
LOG(ERROR) << absl::StrFormat(
ABSL_LOG(ERROR) << absl::StrFormat(
"Not OnePass: hit node limit %d >= %d", nalloc, maxnodes);
goto fail;
}
@ -485,7 +487,7 @@ bool Prog::IsOnePass() {
node->action[b] = newact;
} else if (act != newact) {
if (ExtraDebug)
LOG(ERROR) << absl::StrFormat(
ABSL_LOG(ERROR) << absl::StrFormat(
"Not OnePass: conflict on byte %#x at state %d", c, *it);
goto fail;
}
@ -506,7 +508,7 @@ bool Prog::IsOnePass() {
node->action[b] = newact;
} else if (act != newact) {
if (ExtraDebug)
LOG(ERROR) << absl::StrFormat(
ABSL_LOG(ERROR) << absl::StrFormat(
"Not OnePass: conflict on byte %#x at state %d", c, *it);
goto fail;
}
@ -547,7 +549,7 @@ bool Prog::IsOnePass() {
// If already on work queue, (1) is violated: bail out.
if (!AddQ(&workq, ip->out())) {
if (ExtraDebug)
LOG(ERROR) << absl::StrFormat(
ABSL_LOG(ERROR) << absl::StrFormat(
"Not OnePass: multiple paths %d -> %d", *it, ip->out());
goto fail;
}
@ -558,7 +560,7 @@ bool Prog::IsOnePass() {
if (matched) {
// (3) is violated
if (ExtraDebug)
LOG(ERROR) << absl::StrFormat(
ABSL_LOG(ERROR) << absl::StrFormat(
"Not OnePass: multiple matches from %d", *it);
goto fail;
}
@ -579,9 +581,9 @@ bool Prog::IsOnePass() {
}
}
if (ExtraDebug) { // For debugging, dump one-pass NFA to LOG(ERROR).
LOG(ERROR) << "bytemap:\n" << DumpByteMap();
LOG(ERROR) << "prog:\n" << Dump();
if (ExtraDebug) { // For debugging, dump one-pass NFA to ABSL_LOG(ERROR).
ABSL_LOG(ERROR) << "bytemap:\n" << DumpByteMap();
ABSL_LOG(ERROR) << "prog:\n" << Dump();
std::map<int, int> idmap;
for (int i = 0; i < size; i++)
@ -606,7 +608,7 @@ bool Prog::IsOnePass() {
idmap[node->action[i] >> kIndexShift]);
}
}
LOG(ERROR) << "nodes:\n" << dump;
ABSL_LOG(ERROR) << "nodes:\n" << dump;
}
dfa_mem_ -= nalloc*statesize;

View File

@ -16,24 +16,24 @@
// and recognizes the Perl escape sequences \d, \s, \w, \D, \S, and \W.
// See regexp.h for rationale.
#include <ctype.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <algorithm>
#include <map>
#include <string>
#include <vector>
#include "absl/base/macros.h"
#include "absl/log/absl_log.h"
#include "absl/strings/ascii.h"
#include "util/logging.h"
#include "util/utf.h"
#include "absl/strings/string_view.h"
#include "re2/pod_array.h"
#include "re2/regexp.h"
#include "re2/unicode_casefold.h"
#include "re2/unicode_groups.h"
#include "re2/walker-inl.h"
#include "util/utf.h"
#if defined(RE2_USE_ICU)
#include "unicode/uniset.h"
@ -303,7 +303,7 @@ Rune ApplyFold(const CaseFold* f, Rune r) {
case EvenOddSkip: // even <-> odd but only applies to every other
if ((r - f->lo) % 2)
return r;
ABSL_FALLTHROUGH_INTENDED;
[[fallthrough]];
case EvenOdd: // even <-> odd
if (r%2 == 0)
return r + 1;
@ -312,7 +312,7 @@ Rune ApplyFold(const CaseFold* f, Rune r) {
case OddEvenSkip: // odd <-> even but only applies to every other
if ((r - f->lo) % 2)
return r;
ABSL_FALLTHROUGH_INTENDED;
[[fallthrough]];
case OddEven: // odd <-> even
if (r%2 == 1)
return r + 1;
@ -337,6 +337,20 @@ Rune CycleFoldRune(Rune r) {
return ApplyFold(f, r);
}
// Add lo-hi to the class, along with their fold-equivalent characters.
static void AddFoldedRangeLatin1(CharClassBuilder* cc, Rune lo, Rune hi) {
while (lo <= hi) {
cc->AddRange(lo, lo);
if ('A' <= lo && lo <= 'Z') {
cc->AddRange(lo - 'A' + 'a', lo - 'A' + 'a');
}
if ('a' <= lo && lo <= 'z') {
cc->AddRange(lo - 'a' + 'A', lo - 'a' + 'A');
}
lo++;
}
}
// Add lo-hi to the class, along with their fold-equivalent characters.
// If lo-hi is already in the class, assume that the fold-equivalent
// chars are there too, so there's no work to do.
@ -346,7 +360,7 @@ static void AddFoldedRange(CharClassBuilder* cc, Rune lo, Rune hi, int depth) {
// current Unicode tables. make_unicode_casefold.py checks that
// the cycles are not too long, and we double-check here using depth.
if (depth > 10) {
LOG(DFATAL) << "AddFoldedRange recurses too much.";
ABSL_LOG(DFATAL) << "AddFoldedRange recurses too much.";
return;
}
@ -394,17 +408,26 @@ static void AddFoldedRange(CharClassBuilder* cc, Rune lo, Rune hi, int depth) {
// Pushes the literal rune r onto the stack.
bool Regexp::ParseState::PushLiteral(Rune r) {
// Do case folding if needed.
if ((flags_ & FoldCase) && CycleFoldRune(r) != r) {
Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
re->ccb_ = new CharClassBuilder;
Rune r1 = r;
do {
if (!(flags_ & NeverNL) || r != '\n') {
re->ccb_->AddRange(r, r);
}
r = CycleFoldRune(r);
} while (r != r1);
return PushRegexp(re);
if (flags_ & FoldCase) {
if (flags_ & Latin1 && (('A' <= r && r <= 'Z') ||
('a' <= r && r <= 'z'))) {
Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
re->ccb_ = new CharClassBuilder;
AddFoldedRangeLatin1(re->ccb_, r, r);
return PushRegexp(re);
}
if (!(flags_ & Latin1) && CycleFoldRune(r) != r) {
Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
re->ccb_ = new CharClassBuilder;
Rune r1 = r;
do {
if (!(flags_ & NeverNL) || r != '\n') {
re->ccb_->AddRange(r, r);
}
r = CycleFoldRune(r);
} while (r != r1);
return PushRegexp(re);
}
}
// Exclude newline if applicable.
@ -556,7 +579,7 @@ int RepetitionWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
int RepetitionWalker::ShortVisit(Regexp* re, int parent_arg) {
// Should never be called: we use Walk(), not WalkExponential().
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
LOG(DFATAL) << "RepetitionWalker::ShortVisit called";
ABSL_LOG(DFATAL) << "RepetitionWalker::ShortVisit called";
#endif
return 0;
}
@ -776,7 +799,8 @@ Rune* Regexp::LeadingString(Regexp* re, int* nrune,
while (re->op() == kRegexpConcat && re->nsub() > 0)
re = re->sub()[0];
*flags = static_cast<Regexp::ParseFlags>(re->parse_flags_ & Regexp::FoldCase);
*flags = static_cast<Regexp::ParseFlags>(re->parse_flags_ &
(Regexp::FoldCase | Regexp::Latin1));
if (re->op() == kRegexpLiteral) {
*nrune = 1;
@ -843,7 +867,7 @@ void Regexp::RemoveLeadingString(Regexp* re, int n) {
case 0:
case 1:
// Impossible.
LOG(DFATAL) << "Concat of " << re->nsub();
ABSL_LOG(DFATAL) << "Concat of " << re->nsub();
re->submany_ = NULL;
re->op_ = kRegexpEmptyMatch;
break;
@ -973,7 +997,7 @@ int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) {
i += iter->nsub;
break;
default:
LOG(DFATAL) << "unknown round: " << round;
ABSL_LOG(DFATAL) << "unknown round: " << round;
break;
}
// If we are done, copy until the end of sub.
@ -1012,7 +1036,7 @@ int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) {
continue;
}
default:
LOG(DFATAL) << "unknown round: " << round;
ABSL_LOG(DFATAL) << "unknown round: " << round;
break;
}
@ -1175,16 +1199,26 @@ void FactorAlternationImpl::Round3(Regexp** sub, int nsub,
if (re->op() == kRegexpCharClass) {
CharClass* cc = re->cc();
for (CharClass::iterator it = cc->begin(); it != cc->end(); ++it)
ccb.AddRange(it->lo, it->hi);
ccb.AddRangeFlags(it->lo, it->hi, re->parse_flags());
} else if (re->op() == kRegexpLiteral) {
ccb.AddRangeFlags(re->rune(), re->rune(), re->parse_flags());
if (re->parse_flags() & Regexp::FoldCase) {
// AddFoldedRange() can terminate prematurely if the character class
// already contains the rune. For example, if it contains 'a' and we
// want to add folded 'a', it sees 'a' and stops without adding 'A'.
// To avoid that, we use an empty character class and then merge it.
CharClassBuilder tmp;
tmp.AddRangeFlags(re->rune(), re->rune(), re->parse_flags());
ccb.AddCharClass(&tmp);
} else {
ccb.AddRangeFlags(re->rune(), re->rune(), re->parse_flags());
}
} else {
LOG(DFATAL) << "RE2: unexpected op: " << re->op() << " "
<< re->ToString();
ABSL_LOG(DFATAL) << "RE2: unexpected op: " << re->op() << " "
<< re->ToString();
}
re->Decref();
}
Regexp* re = Regexp::NewCharClass(ccb.GetCharClass(), flags);
Regexp* re = Regexp::NewCharClass(ccb.GetCharClass(), flags & ~Regexp::FoldCase);
splices->emplace_back(re, sub + start, i - start);
}
@ -1441,7 +1475,7 @@ static int UnHex(int c) {
return c - 'A' + 10;
if ('a' <= c && c <= 'f')
return c - 'a' + 10;
LOG(DFATAL) << "Bad hex digit " << c;
ABSL_LOG(DFATAL) << "Bad hex digit " << c;
return 0;
}
@ -1490,7 +1524,7 @@ static bool ParseEscape(absl::string_view* s, Rune* rp,
// Single non-zero octal digit is a backreference; not supported.
if (s->empty() || (*s)[0] < '0' || (*s)[0] > '7')
goto BadEscape;
ABSL_FALLTHROUGH_INTENDED;
[[fallthrough]];
case '0':
// consume up to three octal digits; already have one.
code = c - '0';
@ -1612,10 +1646,15 @@ void CharClassBuilder::AddRangeFlags(
}
// If folding case, add fold-equivalent characters too.
if (parse_flags & Regexp::FoldCase)
AddFoldedRange(this, lo, hi, 0);
else
if (parse_flags & Regexp::FoldCase) {
if (parse_flags & Regexp::Latin1) {
AddFoldedRangeLatin1(this, lo, hi);
} else {
AddFoldedRange(this, lo, hi, 0);
}
} else {
AddRange(lo, hi);
}
}
// Look for a group with the given name.
@ -2056,7 +2095,18 @@ bool Regexp::ParseState::ParsePerlFlags(absl::string_view* s) {
// Caller is supposed to check this.
if (!(flags_ & PerlX) || t.size() < 2 || t[0] != '(' || t[1] != '?') {
status_->set_code(kRegexpInternalError);
LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags";
ABSL_LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags";
return false;
}
// Check for look-around assertions. This is NOT because we support them! ;)
// As per https://github.com/google/re2/issues/468, we really want to report
// kRegexpBadPerlOp (not kRegexpBadNamedCapture) for look-behind assertions.
// Additionally, it would be nice to report not "(?<", but "(?<=" or "(?<!".
if ((t.size() > 3 && (t[2] == '=' || t[2] == '!')) ||
(t.size() > 4 && t[2] == '<' && (t[3] == '=' || t[3] == '!'))) {
status_->set_code(kRegexpBadPerlOp);
status_->set_error_arg(absl::string_view(t.data(), t[2] == '<' ? 4 : 3));
return false;
}

View File

@ -5,17 +5,19 @@
#include "re2/prefilter.h"
#include <stddef.h>
#include <stdint.h>
#include <string>
#include <utility>
#include <vector>
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/str_format.h"
#include "util/logging.h"
#include "util/utf.h"
#include "re2/re2.h"
#include "re2/regexp.h"
#include "re2/unicode_casefold.h"
#include "re2/walker-inl.h"
#include "util/utf.h"
namespace re2 {
@ -300,8 +302,8 @@ void Prefilter::CrossProduct(const SSet& a, const SSet& b, SSet* dst) {
Prefilter::Info* Prefilter::Info::Concat(Info* a, Info* b) {
if (a == NULL)
return b;
DCHECK(a->is_exact_);
DCHECK(b && b->is_exact_);
ABSL_DCHECK(a->is_exact_);
ABSL_DCHECK(b && b->is_exact_);
Info *ab = new Info();
CrossProduct(a->exact_, b->exact_, &ab->exact_);
@ -450,9 +452,9 @@ typedef CharClass::iterator CCIter;
Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,
bool latin1) {
if (ExtraDebug) {
LOG(ERROR) << "CharClassInfo:";
ABSL_LOG(ERROR) << "CharClassInfo:";
for (CCIter i = cc->begin(); i != cc->end(); ++i)
LOG(ERROR) << " " << i->lo << "-" << i->hi;
ABSL_LOG(ERROR) << " " << i->lo << "-" << i->hi;
}
// If the class is too large, it's okay to overestimate.
@ -473,7 +475,7 @@ Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,
a->is_exact_ = true;
if (ExtraDebug)
LOG(ERROR) << " = " << a->ToString();
ABSL_LOG(ERROR) << " = " << a->ToString();
return a;
}
@ -501,7 +503,7 @@ class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> {
Prefilter::Info* Prefilter::BuildInfo(Regexp* re) {
if (ExtraDebug)
LOG(ERROR) << "BuildPrefilter::Info: " << re->ToString();
ABSL_LOG(ERROR) << "BuildPrefilter::Info: " << re->ToString();
bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0;
Prefilter::Info::Walker w(latin1);
@ -531,7 +533,7 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
default:
case kRegexpRepeat:
info = EmptyString();
LOG(DFATAL) << "Bad regexp op " << re->op();
ABSL_LOG(DFATAL) << "Bad regexp op " << re->op();
break;
case kRegexpNoMatch:
@ -634,8 +636,8 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
}
if (ExtraDebug)
LOG(ERROR) << "BuildInfo " << re->ToString()
<< ": " << (info ? info->ToString() : "");
ABSL_LOG(ERROR) << "BuildInfo " << re->ToString()
<< ": " << (info ? info->ToString() : "");
return info;
}
@ -662,7 +664,7 @@ Prefilter* Prefilter::FromRegexp(Regexp* re) {
std::string Prefilter::DebugString() const {
switch (op_) {
default:
LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_;
ABSL_LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_;
return absl::StrFormat("op%d", op_);
case NONE:
return "*no-matches*";

View File

@ -13,7 +13,8 @@
#include <string>
#include <vector>
#include "util/logging.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
namespace re2 {
@ -42,7 +43,7 @@ class Prefilter {
// The children of the Prefilter node.
std::vector<Prefilter*>* subs() {
DCHECK(op_ == AND || op_ == OR);
ABSL_DCHECK(op_ == AND || op_ == OR);
return subs_;
}

View File

@ -5,17 +5,17 @@
#include "re2/prefilter_tree.h"
#include <stddef.h>
#include <algorithm>
#include <cmath>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/str_format.h"
#include "util/logging.h"
#include "re2/prefilter.h"
#include "re2/re2.h"
namespace re2 {
@ -38,7 +38,7 @@ PrefilterTree::~PrefilterTree() {
void PrefilterTree::Add(Prefilter* prefilter) {
if (compiled_) {
LOG(DFATAL) << "Add called after Compile.";
ABSL_LOG(DFATAL) << "Add called after Compile.";
return;
}
if (prefilter != NULL && !KeepNode(prefilter)) {
@ -51,14 +51,15 @@ void PrefilterTree::Add(Prefilter* prefilter) {
void PrefilterTree::Compile(std::vector<std::string>* atom_vec) {
if (compiled_) {
LOG(DFATAL) << "Compile called already.";
ABSL_LOG(DFATAL) << "Compile called already.";
return;
}
// Some legacy users of PrefilterTree call Compile() before
// adding any regexps and expect Compile() to have no effect.
if (prefilter_vec_.empty())
if (prefilter_vec_.empty()) {
return;
}
compiled_ = true;
@ -82,7 +83,7 @@ bool PrefilterTree::KeepNode(Prefilter* node) const {
switch (node->op()) {
default:
LOG(DFATAL) << "Unexpected op in KeepNode: " << node->op();
ABSL_LOG(DFATAL) << "Unexpected op in KeepNode: " << node->op();
return false;
case Prefilter::ALL:
@ -177,7 +178,7 @@ void PrefilterTree::AssignUniqueIds(NodeSet* nodes,
int id = prefilter->unique_id();
switch (prefilter->op()) {
default:
LOG(DFATAL) << "Unexpected op: " << prefilter->op();
ABSL_LOG(DFATAL) << "Unexpected op: " << prefilter->op();
return;
case Prefilter::ATOM:
@ -211,7 +212,7 @@ void PrefilterTree::AssignUniqueIds(NodeSet* nodes,
if (prefilter_vec_[i] == NULL)
continue;
int id = CanonicalNode(nodes, prefilter_vec_[i])->unique_id();
DCHECK_LE(0, id);
ABSL_DCHECK_LE(0, id);
Entry* entry = &entries_[id];
entry->regexps.push_back(static_cast<int>(i));
}
@ -272,10 +273,11 @@ void PrefilterTree::RegexpsGivenStrings(
// Some legacy users of PrefilterTree call Compile() before
// adding any regexps and expect Compile() to have no effect.
// This kludge is a counterpart to that kludge.
if (prefilter_vec_.empty())
if (prefilter_vec_.empty()) {
return;
}
LOG(ERROR) << "RegexpsGivenStrings called before Compile.";
ABSL_LOG(ERROR) << "RegexpsGivenStrings called before Compile.";
for (size_t i = 0; i < prefilter_vec_.size(); i++)
regexps->push_back(static_cast<int>(i));
} else {
@ -329,31 +331,31 @@ void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids,
// Debugging help.
void PrefilterTree::PrintPrefilter(int regexpid) {
LOG(ERROR) << DebugNodeString(prefilter_vec_[regexpid]);
ABSL_LOG(ERROR) << DebugNodeString(prefilter_vec_[regexpid]);
}
void PrefilterTree::PrintDebugInfo(NodeSet* nodes) {
LOG(ERROR) << "#Unique Atoms: " << atom_index_to_id_.size();
LOG(ERROR) << "#Unique Nodes: " << entries_.size();
ABSL_LOG(ERROR) << "#Unique Atoms: " << atom_index_to_id_.size();
ABSL_LOG(ERROR) << "#Unique Nodes: " << entries_.size();
for (size_t i = 0; i < entries_.size(); i++) {
const std::vector<int>& parents = entries_[i].parents;
const std::vector<int>& regexps = entries_[i].regexps;
LOG(ERROR) << "EntryId: " << i
<< " N: " << parents.size() << " R: " << regexps.size();
ABSL_LOG(ERROR) << "EntryId: " << i
<< " N: " << parents.size() << " R: " << regexps.size();
for (int parent : parents)
LOG(ERROR) << parent;
ABSL_LOG(ERROR) << parent;
}
LOG(ERROR) << "Set:";
ABSL_LOG(ERROR) << "Set:";
for (NodeSet::const_iterator iter = nodes->begin();
iter != nodes->end(); ++iter)
LOG(ERROR) << "NodeId: " << (*iter)->unique_id();
ABSL_LOG(ERROR) << "NodeId: " << (*iter)->unique_id();
}
std::string PrefilterTree::DebugNodeString(Prefilter* node) const {
std::string node_string = "";
if (node->op() == Prefilter::ATOM) {
DCHECK(!node->atom().empty());
ABSL_DCHECK(!node->atom().empty());
node_string += node->atom();
} else {
// Adding the operation disambiguates AND and OR nodes.

View File

@ -20,9 +20,10 @@
#include <vector>
#include "absl/container/flat_hash_set.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "re2/prefilter.h"
#include "re2/sparse_array.h"
#include "util/logging.h"
namespace re2 {
@ -62,15 +63,15 @@ class PrefilterTree {
struct PrefilterHash {
size_t operator()(const Prefilter* a) const {
DCHECK(a != NULL);
ABSL_DCHECK(a != NULL);
return absl::Hash<Prefilter>()(*a);
}
};
struct PrefilterEqual {
bool operator()(const Prefilter* a, const Prefilter* b) const {
DCHECK(a != NULL);
DCHECK(b != NULL);
ABSL_DCHECK(a != NULL);
ABSL_DCHECK(b != NULL);
return *a == *b;
}
};

View File

@ -7,35 +7,42 @@
#include "re2/prog.h"
#include <stdint.h>
#include <string.h>
#include <algorithm>
#include <string>
#include <utility>
#include <vector>
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "re2/bitmap256.h"
#include "re2/pod_array.h"
#include "re2/sparse_array.h"
#include "re2/sparse_set.h"
#if defined(__AVX2__)
#include <immintrin.h>
#ifdef _MSC_VER
#include <intrin.h>
#endif
#endif
#include <stdint.h>
#include <string.h>
#include <algorithm>
#include <memory>
#include <utility>
#include "absl/base/macros.h"
#include "absl/strings/str_format.h"
#include "util/logging.h"
#include "re2/bitmap256.h"
namespace re2 {
// Constructors per Inst opcode
void Prog::Inst::InitAlt(uint32_t out, uint32_t out1) {
DCHECK_EQ(out_opcode_, 0);
ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
set_out_opcode(out, kInstAlt);
out1_ = out1;
}
void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) {
DCHECK_EQ(out_opcode_, 0);
ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
set_out_opcode(out, kInstByteRange);
lo_ = lo & 0xFF;
hi_ = hi & 0xFF;
@ -43,30 +50,30 @@ void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) {
}
void Prog::Inst::InitCapture(int cap, uint32_t out) {
DCHECK_EQ(out_opcode_, 0);
ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
set_out_opcode(out, kInstCapture);
cap_ = cap;
}
void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32_t out) {
DCHECK_EQ(out_opcode_, 0);
ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
set_out_opcode(out, kInstEmptyWidth);
empty_ = empty;
}
void Prog::Inst::InitMatch(int32_t id) {
DCHECK_EQ(out_opcode_, 0);
ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
set_opcode(kInstMatch);
match_id_ = id;
}
void Prog::Inst::InitNop(uint32_t out) {
DCHECK_EQ(out_opcode_, 0);
ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
set_opcode(kInstNop);
}
void Prog::Inst::InitFail() {
DCHECK_EQ(out_opcode_, 0);
ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
set_opcode(kInstFail);
}
@ -198,7 +205,7 @@ static bool IsMatch(Prog* prog, Prog::Inst* ip) {
for (;;) {
switch (ip->opcode()) {
default:
LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
ABSL_LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
return false;
case kInstAlt:
@ -362,11 +369,11 @@ class ByteMapBuilder {
};
void ByteMapBuilder::Mark(int lo, int hi) {
DCHECK_GE(lo, 0);
DCHECK_GE(hi, 0);
DCHECK_LE(lo, 255);
DCHECK_LE(hi, 255);
DCHECK_LE(lo, hi);
ABSL_DCHECK_GE(lo, 0);
ABSL_DCHECK_GE(hi, 0);
ABSL_DCHECK_LE(lo, 255);
ABSL_DCHECK_LE(hi, 255);
ABSL_DCHECK_LE(lo, hi);
// Ignore any [0-255] ranges. They cause us to recolor every range, which
// has no effect on the eventual result and is therefore a waste of time.
@ -511,7 +518,7 @@ void Prog::ComputeByteMap() {
builder.Build(bytemap_, &bytemap_range_);
if ((0)) { // For debugging, use trivial bytemap.
LOG(ERROR) << "Using trivial bytemap.";
ABSL_LOG(ERROR) << "Using trivial bytemap.";
for (int i = 0; i < 256; i++)
bytemap_[i] = static_cast<uint8_t>(i);
bytemap_range_ = 256;
@ -615,12 +622,12 @@ void Prog::Flatten() {
size_t total = 0;
for (int i = 0; i < kNumInst; i++)
total += inst_count_[i];
CHECK_EQ(total, flat.size());
ABSL_CHECK_EQ(total, flat.size());
#endif
// Remap start_unanchored and start.
if (start_unanchored() == 0) {
DCHECK_EQ(start(), 0);
ABSL_DCHECK_EQ(start(), 0);
} else if (start_unanchored() == start()) {
set_start_unanchored(flatmap[1]);
set_start(flatmap[1]);
@ -677,7 +684,7 @@ void Prog::MarkSuccessors(SparseArray<int>* rootmap,
Inst* ip = inst(id);
switch (ip->opcode()) {
default:
LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
ABSL_LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
break;
case kInstAltMatch:
@ -737,7 +744,7 @@ void Prog::MarkDominator(int root, SparseArray<int>* rootmap,
Inst* ip = inst(id);
switch (ip->opcode()) {
default:
LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
ABSL_LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
break;
case kInstAltMatch:
@ -804,7 +811,7 @@ void Prog::EmitList(int root, SparseArray<int>* rootmap,
Inst* ip = inst(id);
switch (ip->opcode()) {
default:
LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
ABSL_LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
break;
case kInstAltMatch:
@ -812,7 +819,7 @@ void Prog::EmitList(int root, SparseArray<int>* rootmap,
flat->back().set_opcode(kInstAltMatch);
flat->back().set_out(static_cast<int>(flat->size()));
flat->back().out1_ = static_cast<uint32_t>(flat->size())+1;
ABSL_FALLTHROUGH_INTENDED;
[[fallthrough]];
case kInstAlt:
stk->push_back(ip->out1());
@ -1105,7 +1112,7 @@ const void* Prog::PrefixAccel_ShiftDFA(const void* data, size_t size) {
#if defined(__AVX2__)
// Finds the least significant non-zero bit in n.
static int FindLSBSet(uint32_t n) {
DCHECK_NE(n, 0);
ABSL_DCHECK_NE(n, uint32_t{0});
#if defined(__GNUC__)
return __builtin_ctz(n);
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
@ -1127,7 +1134,7 @@ static int FindLSBSet(uint32_t n) {
#endif
const void* Prog::PrefixAccel_FrontAndBack(const void* data, size_t size) {
DCHECK_GE(prefix_size_, 2);
ABSL_DCHECK_GE(prefix_size_, size_t{2});
if (size < prefix_size_)
return NULL;
// Don't bother searching the last prefix_size_-1 bytes for prefix_front_.
@ -1164,7 +1171,7 @@ const void* Prog::PrefixAccel_FrontAndBack(const void* data, size_t size) {
const char* p0 = reinterpret_cast<const char*>(data);
for (const char* p = p0;; p++) {
DCHECK_GE(size, static_cast<size_t>(p-p0));
ABSL_DCHECK_GE(size, static_cast<size_t>(p-p0));
p = reinterpret_cast<const char*>(memchr(p, prefix_front_, size - (p-p0)));
if (p == NULL || p[prefix_size_-1] == prefix_back_)
return p;

View File

@ -10,14 +10,17 @@
// expression symbolically.
#include <stdint.h>
#include <cstring>
#include <functional>
#include <string>
#include <vector>
#include <type_traits>
#include <vector>
#include "absl/base/call_once.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/string_view.h"
#include "util/logging.h"
#include "re2/pod_array.h"
#include "re2/re2.h"
#include "re2/sparse_array.h"
@ -79,20 +82,44 @@ class Prog {
// Getters
int id(Prog* p) { return static_cast<int>(this - p->inst_.data()); }
InstOp opcode() { return static_cast<InstOp>(out_opcode_&7); }
int last() { return (out_opcode_>>3)&1; }
int out() { return out_opcode_>>4; }
int out1() { DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch); return out1_; }
int cap() { DCHECK_EQ(opcode(), kInstCapture); return cap_; }
int lo() { DCHECK_EQ(opcode(), kInstByteRange); return lo_; }
int hi() { DCHECK_EQ(opcode(), kInstByteRange); return hi_; }
int foldcase() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_&1; }
int hint() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_>>1; }
int match_id() { DCHECK_EQ(opcode(), kInstMatch); return match_id_; }
EmptyOp empty() { DCHECK_EQ(opcode(), kInstEmptyWidth); return empty_; }
InstOp opcode() { return static_cast<InstOp>(out_opcode_ & 7); }
int last() { return (out_opcode_ >> 3) & 1; }
int out() { return out_opcode_ >> 4; }
int out1() {
ABSL_DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch);
return out1_;
}
int cap() {
ABSL_DCHECK_EQ(opcode(), kInstCapture);
return cap_;
}
int lo() {
ABSL_DCHECK_EQ(opcode(), kInstByteRange);
return lo_;
}
int hi() {
ABSL_DCHECK_EQ(opcode(), kInstByteRange);
return hi_;
}
int foldcase() {
ABSL_DCHECK_EQ(opcode(), kInstByteRange);
return hint_foldcase_ & 1;
}
int hint() {
ABSL_DCHECK_EQ(opcode(), kInstByteRange);
return hint_foldcase_ >> 1;
}
int match_id() {
ABSL_DCHECK_EQ(opcode(), kInstMatch);
return match_id_;
}
EmptyOp empty() {
ABSL_DCHECK_EQ(opcode(), kInstEmptyWidth);
return empty_;
}
bool greedy(Prog* p) {
DCHECK_EQ(opcode(), kInstAltMatch);
ABSL_DCHECK_EQ(opcode(), kInstAltMatch);
return p->inst(out())->opcode() == kInstByteRange ||
(p->inst(out())->opcode() == kInstNop &&
p->inst(p->inst(out())->out())->opcode() == kInstByteRange);
@ -100,7 +127,7 @@ class Prog {
// Does this inst (an kInstByteRange) match c?
inline bool Matches(int c) {
DCHECK_EQ(opcode(), kInstByteRange);
ABSL_DCHECK_EQ(opcode(), kInstByteRange);
if (foldcase() && 'A' <= c && c <= 'Z')
c += 'a' - 'A';
return lo_ <= c && c <= hi_;
@ -221,7 +248,7 @@ class Prog {
// Accelerates to the first likely occurrence of the prefix.
// Returns a pointer to the first byte or NULL if not found.
const void* PrefixAccel(const void* data, size_t size) {
DCHECK(can_prefix_accel());
ABSL_DCHECK(can_prefix_accel());
if (prefix_foldcase_) {
return PrefixAccel_ShiftDFA(data, size);
} else if (prefix_size_ != 1) {

View File

@ -9,32 +9,36 @@
#include "re2/re2.h"
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#ifdef _MSC_VER
#include <intrin.h>
#endif
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include <atomic>
#include <iterator>
#include <map>
#include <string>
#include <utility>
#include <vector>
#include "absl/base/call_once.h"
#include "absl/base/macros.h"
#include "absl/container/fixed_array.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/ascii.h"
#include "absl/strings/str_format.h"
#include "util/logging.h"
#include "util/strutil.h"
#include "util/utf.h"
#include "absl/strings/string_view.h"
#include "re2/prog.h"
#include "re2/regexp.h"
#include "re2/sparse_array.h"
#include "util/strutil.h"
#include "util/utf.h"
#ifdef _MSC_VER
#include <intrin.h>
#endif
namespace re2 {
@ -139,6 +143,11 @@ static std::string trunc(absl::string_view pattern) {
RE2::RE2(const char* pattern) {
// If absl::string_view becomes an alias for std::string_view,
// it will stop allowing NULL to be converted.
// Handle NULL explicitly to keep callers working no matter what.
if (pattern == NULL)
pattern = "";
Init(pattern, DefaultOptions);
}
@ -159,7 +168,7 @@ int RE2::Options::ParseFlags() const {
switch (encoding()) {
default:
if (log_errors())
LOG(ERROR) << "Unknown encoding " << encoding();
ABSL_LOG(ERROR) << "Unknown encoding " << encoding();
break;
case RE2::Options::EncodingUTF8:
break;
@ -230,8 +239,8 @@ void RE2::Init(absl::string_view pattern, const Options& options) {
&status);
if (entire_regexp_ == NULL) {
if (options_.log_errors()) {
LOG(ERROR) << "Error parsing '" << trunc(*pattern_) << "': "
<< status.Text();
ABSL_LOG(ERROR) << "Error parsing '" << trunc(*pattern_) << "': "
<< status.Text();
}
error_ = new std::string(status.Text());
error_code_ = RegexpErrorToRE2(status.code());
@ -255,7 +264,7 @@ void RE2::Init(absl::string_view pattern, const Options& options) {
prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3);
if (prog_ == NULL) {
if (options_.log_errors())
LOG(ERROR) << "Error compiling '" << trunc(*pattern_) << "'";
ABSL_LOG(ERROR) << "Error compiling '" << trunc(*pattern_) << "'";
error_ = new std::string("pattern too large - compile failed");
error_code_ = RE2::ErrorPatternTooLarge;
return;
@ -281,8 +290,8 @@ re2::Prog* RE2::ReverseProg() const {
re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
if (re->rprog_ == NULL) {
if (re->options_.log_errors())
LOG(ERROR) << "Error reverse compiling '" << trunc(*re->pattern_)
<< "'";
ABSL_LOG(ERROR) << "Error reverse compiling '" << trunc(*re->pattern_)
<< "'";
// We no longer touch error_ and error_code_ because failing to compile
// the reverse Prog is not a showstopper: falling back to NFA execution
// is fine. More importantly, an RE2 object is supposed to be logically
@ -328,7 +337,7 @@ int RE2::ReverseProgramSize() const {
// Finds the most significant non-zero bit in n.
static int FindMSBSet(uint32_t n) {
DCHECK_NE(n, 0);
ABSL_DCHECK_NE(n, uint32_t{0});
#if defined(__GNUC__)
return 31 ^ __builtin_clz(n);
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
@ -454,8 +463,8 @@ bool RE2::Replace(std::string* str,
if (!re.Rewrite(&s, rewrite, vec, nvec))
return false;
assert(vec[0].data() >= str->data());
assert(vec[0].data() + vec[0].size() <= str->data() + str->size());
ABSL_DCHECK_GE(vec[0].data(), str->data());
ABSL_DCHECK_LE(vec[0].data() + vec[0].size(), str->data() + str->size());
str->replace(vec[0].data() - str->data(), vec[0].size(), s);
return true;
}
@ -654,16 +663,16 @@ bool RE2::Match(absl::string_view text,
int nsubmatch) const {
if (!ok()) {
if (options_.log_errors())
LOG(ERROR) << "Invalid RE2: " << *error_;
ABSL_LOG(ERROR) << "Invalid RE2: " << *error_;
return false;
}
if (startpos > endpos || endpos > text.size()) {
if (options_.log_errors())
LOG(ERROR) << "RE2: invalid startpos, endpos pair. ["
<< "startpos: " << startpos << ", "
<< "endpos: " << endpos << ", "
<< "text size: " << text.size() << "]";
ABSL_LOG(ERROR) << "RE2: invalid startpos, endpos pair. ["
<< "startpos: " << startpos << ", "
<< "endpos: " << endpos << ", "
<< "text size: " << text.size() << "]";
return false;
}
@ -733,7 +742,7 @@ bool RE2::Match(absl::string_view text,
bool skipped_test = false;
switch (re_anchor) {
default:
LOG(DFATAL) << "Unexpected re_anchor value: " << re_anchor;
ABSL_LOG(DFATAL) << "Unexpected re_anchor value: " << re_anchor;
return false;
case UNANCHORED: {
@ -751,11 +760,11 @@ bool RE2::Match(absl::string_view text,
Prog::kLongestMatch, matchp, &dfa_failed, NULL)) {
if (dfa_failed) {
if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: "
<< "pattern length " << pattern_->size() << ", "
<< "program size " << prog->size() << ", "
<< "list count " << prog->list_count() << ", "
<< "bytemap range " << prog->bytemap_range();
ABSL_LOG(ERROR) << "DFA out of memory: "
<< "pattern length " << pattern_->size() << ", "
<< "program size " << prog->size() << ", "
<< "list count " << prog->list_count() << ", "
<< "bytemap range " << prog->bytemap_range();
// Fall back to NFA below.
skipped_test = true;
break;
@ -771,11 +780,11 @@ bool RE2::Match(absl::string_view text,
matchp, &dfa_failed, NULL)) {
if (dfa_failed) {
if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: "
<< "pattern length " << pattern_->size() << ", "
<< "program size " << prog_->size() << ", "
<< "list count " << prog_->list_count() << ", "
<< "bytemap range " << prog_->bytemap_range();
ABSL_LOG(ERROR) << "DFA out of memory: "
<< "pattern length " << pattern_->size() << ", "
<< "program size " << prog_->size() << ", "
<< "list count " << prog_->list_count() << ", "
<< "bytemap range " << prog_->bytemap_range();
// Fall back to NFA below.
skipped_test = true;
break;
@ -797,17 +806,17 @@ bool RE2::Match(absl::string_view text,
Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
if (dfa_failed) {
if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: "
<< "pattern length " << pattern_->size() << ", "
<< "program size " << prog->size() << ", "
<< "list count " << prog->list_count() << ", "
<< "bytemap range " << prog->bytemap_range();
ABSL_LOG(ERROR) << "DFA out of memory: "
<< "pattern length " << pattern_->size() << ", "
<< "program size " << prog->size() << ", "
<< "list count " << prog->list_count() << ", "
<< "bytemap range " << prog->bytemap_range();
// Fall back to NFA below.
skipped_test = true;
break;
}
if (options_.log_errors())
LOG(ERROR) << "SearchDFA inconsistency";
ABSL_LOG(ERROR) << "SearchDFA inconsistency";
return false;
}
break;
@ -840,11 +849,11 @@ bool RE2::Match(absl::string_view text,
&match, &dfa_failed, NULL)) {
if (dfa_failed) {
if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: "
<< "pattern length " << pattern_->size() << ", "
<< "program size " << prog_->size() << ", "
<< "list count " << prog_->list_count() << ", "
<< "bytemap range " << prog_->bytemap_range();
ABSL_LOG(ERROR) << "DFA out of memory: "
<< "pattern length " << pattern_->size() << ", "
<< "program size " << prog_->size() << ", "
<< "list count " << prog_->list_count() << ", "
<< "bytemap range " << prog_->bytemap_range();
// Fall back to NFA below.
skipped_test = true;
break;
@ -876,20 +885,20 @@ bool RE2::Match(absl::string_view text,
if (can_one_pass && anchor != Prog::kUnanchored) {
if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) {
if (!skipped_test && options_.log_errors())
LOG(ERROR) << "SearchOnePass inconsistency";
ABSL_LOG(ERROR) << "SearchOnePass inconsistency";
return false;
}
} else if (can_bit_state && subtext1.size() <= bit_state_text_max_size) {
if (!prog_->SearchBitState(subtext1, text, anchor,
kind, submatch, ncap)) {
if (!skipped_test && options_.log_errors())
LOG(ERROR) << "SearchBitState inconsistency";
ABSL_LOG(ERROR) << "SearchBitState inconsistency";
return false;
}
} else {
if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) {
if (!skipped_test && options_.log_errors())
LOG(ERROR) << "SearchNFA inconsistency";
ABSL_LOG(ERROR) << "SearchNFA inconsistency";
return false;
}
}
@ -914,7 +923,7 @@ bool RE2::DoMatch(absl::string_view text,
int n) const {
if (!ok()) {
if (options_.log_errors())
LOG(ERROR) << "Invalid RE2: " << *error_;
ABSL_LOG(ERROR) << "Invalid RE2: " << *error_;
return false;
}
@ -1034,8 +1043,8 @@ bool RE2::Rewrite(std::string* out,
int n = (c - '0');
if (n >= veclen) {
if (options_.log_errors()) {
LOG(ERROR) << "invalid substitution \\" << n
<< " from " << veclen << " groups";
ABSL_LOG(ERROR) << "invalid substitution \\" << n
<< " from " << veclen << " groups";
}
return false;
}
@ -1046,7 +1055,7 @@ bool RE2::Rewrite(std::string* out,
out->push_back('\\');
} else {
if (options_.log_errors())
LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
ABSL_LOG(ERROR) << "invalid rewrite pattern: " << rewrite;
return false;
}
}

View File

@ -50,10 +50,10 @@
// supplied pattern exactly.
//
// Example: successful match
// CHECK(RE2::FullMatch("hello", "h.*o"));
// ABSL_CHECK(RE2::FullMatch("hello", "h.*o"));
//
// Example: unsuccessful match (requires full match):
// CHECK(!RE2::FullMatch("hello", "e"));
// ABSL_CHECK(!RE2::FullMatch("hello", "e"));
//
// -----------------------------------------------------------------------
// UTF-8 AND THE MATCHING INTERFACE:
@ -62,8 +62,9 @@
// The RE2::Latin1 option causes them to be interpreted as Latin-1.
//
// Example:
// CHECK(RE2::FullMatch(utf8_string, RE2(utf8_pattern)));
// CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1)));
// ABSL_CHECK(RE2::FullMatch(utf8_string, RE2(utf8_pattern)));
// ABSL_CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern,
// RE2::Latin1)));
//
// -----------------------------------------------------------------------
// SUBMATCH EXTRACTION:
@ -83,27 +84,27 @@
// Example: extracts "ruby" into "s" and 1234 into "i"
// int i;
// std::string s;
// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
// ABSL_CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
//
// Example: extracts "ruby" into "s" and no value into "i"
// absl::optional<int> i;
// std::optional<int> i;
// std::string s;
// CHECK(RE2::FullMatch("ruby", "(\\w+)(?::(\\d+))?", &s, &i));
// ABSL_CHECK(RE2::FullMatch("ruby", "(\\w+)(?::(\\d+))?", &s, &i));
//
// Example: fails because string cannot be stored in integer
// CHECK(!RE2::FullMatch("ruby", "(.*)", &i));
// ABSL_CHECK(!RE2::FullMatch("ruby", "(.*)", &i));
//
// Example: fails because there aren't enough sub-patterns
// CHECK(!RE2::FullMatch("ruby:1234", "\\w+:\\d+", &s));
// ABSL_CHECK(!RE2::FullMatch("ruby:1234", "\\w+:\\d+", &s));
//
// Example: does not try to extract any extra sub-patterns
// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
// ABSL_CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
//
// Example: does not try to extract into NULL
// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
// ABSL_CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
//
// Example: integer overflow causes failure
// CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
// ABSL_CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
//
// NOTE(rsc): Asking for submatches slows successful matches quite a bit.
// This may get a little faster in the future, but right now is slower
@ -117,12 +118,12 @@
// to match any substring of the text.
//
// Example: simple search for a string:
// CHECK(RE2::PartialMatch("hello", "ell"));
// ABSL_CHECK(RE2::PartialMatch("hello", "ell"));
//
// Example: find first number in a string
// int number;
// CHECK(RE2::PartialMatch("x*100 + 20", "(\\d+)", &number));
// CHECK_EQ(number, 100);
// ABSL_CHECK(RE2::PartialMatch("x*100 + 20", "(\\d+)", &number));
// ABSL_CHECK_EQ(number, 100);
//
// -----------------------------------------------------------------------
// PRE-COMPILED REGULAR EXPRESSIONS
@ -203,27 +204,28 @@
//
// Example:
// int a, b, c, d;
// CHECK(RE2::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
// ABSL_CHECK(RE2::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
// RE2::Octal(&a), RE2::Hex(&b), RE2::CRadix(&c), RE2::CRadix(&d));
// will leave 64 in a, b, c, and d.
#include <stddef.h>
#include <stdint.h>
#include <algorithm>
#include <map>
#include <optional>
#include <string>
#include <type_traits>
#include <vector>
#include "absl/base/call_once.h"
#include "absl/strings/string_view.h"
#include "re2/stringpiece.h"
#if defined(__APPLE__)
#include <TargetConditionals.h>
#endif
#include "absl/base/call_once.h"
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "re2/stringpiece.h"
namespace re2 {
class Prog;
class Regexp;
@ -383,7 +385,7 @@ class RE2 {
// type, or one of:
// std::string (matched piece is copied to string)
// absl::string_view (string_view is mutated to point to matched piece)
// absl::optional<T> (T is a supported numeric or string type as above)
// std::optional<T> (T is a supported numeric or string type as above)
// T ("bool T::ParseFrom(const char*, size_t)" must exist)
// (void*)NULL (the corresponding matched sub-pattern is not copied)
//
@ -404,7 +406,7 @@ class RE2 {
// int number;
// RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number);
//
// Use absl::optional<int> instead to handle this case correctly.
// Use std::optional<int> instead to handle this case correctly.
template <typename... A>
static bool FullMatch(absl::string_view text, const RE2& re, A&&... a) {
return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...);
@ -469,7 +471,7 @@ class RE2 {
// text. E.g.,
//
// std::string s = "yabba dabba doo";
// CHECK(RE2::Replace(&s, "b+", "d"));
// ABSL_CHECK(RE2::Replace(&s, "b+", "d"));
//
// will leave "s" containing "yada dabba doo"
//
@ -483,7 +485,7 @@ class RE2 {
// of the pattern in the string with the rewrite. E.g.
//
// std::string s = "yabba dabba doo";
// CHECK(RE2::GlobalReplace(&s, "b+", "d"));
// ABSL_CHECK(RE2::GlobalReplace(&s, "b+", "d"));
//
// will leave "s" containing "yada dada doo"
// Replacements are not subject to re-matching.
@ -840,12 +842,12 @@ template <> struct Parse4ary<unsigned long long> : public std::true_type {};
template <typename T>
bool Parse(const char* str, size_t n, T* dest, int radix);
// Support absl::optional<T> for all T with a stock parser.
template <typename T> struct Parse3ary<absl::optional<T>> : public Parse3ary<T> {};
template <typename T> struct Parse4ary<absl::optional<T>> : public Parse4ary<T> {};
// Support std::optional<T> for all T with a stock parser.
template <typename T> struct Parse3ary<std::optional<T>> : public Parse3ary<T> {};
template <typename T> struct Parse4ary<std::optional<T>> : public Parse4ary<T> {};
template <typename T>
bool Parse(const char* str, size_t n, absl::optional<T>* dest) {
bool Parse(const char* str, size_t n, std::optional<T>* dest) {
if (str == NULL) {
if (dest != NULL)
dest->reset();
@ -861,7 +863,7 @@ bool Parse(const char* str, size_t n, absl::optional<T>* dest) {
}
template <typename T>
bool Parse(const char* str, size_t n, absl::optional<T>* dest, int radix) {
bool Parse(const char* str, size_t n, std::optional<T>* dest, int radix) {
if (str == NULL) {
if (dest != NULL)
dest->reset();
@ -890,14 +892,12 @@ class RE2::Arg {
re2_internal::Parse4ary<T>::value,
int>::type;
#if !defined(_MSC_VER)
template <typename T>
using CanParseFrom = typename std::enable_if<
std::is_member_function_pointer<
decltype(static_cast<bool (T::*)(const char*, size_t)>(
&T::ParseFrom))>::value,
int>::type;
#endif
public:
Arg() : Arg(nullptr) {}
@ -909,10 +909,8 @@ class RE2::Arg {
template <typename T, CanParse4ary<T> = 0>
Arg(T* ptr) : arg_(ptr), parser_(DoParse4ary<T>) {}
#if !defined(_MSC_VER)
template <typename T, CanParseFrom<T> = 0>
Arg(T* ptr) : arg_(ptr), parser_(DoParseFrom<T>) {}
#endif
typedef bool (*Parser)(const char* str, size_t n, void* dest);
@ -938,13 +936,11 @@ class RE2::Arg {
return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 10);
}
#if !defined(_MSC_VER)
template <typename T>
static bool DoParseFrom(const char* str, size_t n, void* dest) {
if (dest == NULL) return true;
return reinterpret_cast<T*>(dest)->ParseFrom(str, n);
}
#endif
void* arg_;
Parser parser_;
@ -972,7 +968,7 @@ inline RE2::Arg RE2::Octal(T* ptr) {
}
// Silence warnings about missing initializers for members of LazyRE2.
#if !defined(__clang__) && defined(__GNUC__)
#if defined(__GNUC__)
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
#endif

View File

@ -10,6 +10,7 @@
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <algorithm>
#include <map>
#include <string>
@ -18,11 +19,12 @@
#include "absl/base/call_once.h"
#include "absl/base/macros.h"
#include "absl/container/flat_hash_map.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/synchronization/mutex.h"
#include "util/logging.h"
#include "util/utf.h"
#include "re2/pod_array.h"
#include "re2/walker-inl.h"
#include "util/utf.h"
namespace re2 {
@ -45,7 +47,7 @@ Regexp::Regexp(RegexpOp op, ParseFlags parse_flags)
// required Decref() to have handled them for us.
Regexp::~Regexp() {
if (nsub_ > 0)
LOG(DFATAL) << "Regexp not destroyed.";
ABSL_LOG(DFATAL) << "Regexp not destroyed.";
switch (op_) {
default:
@ -154,7 +156,7 @@ void Regexp::Destroy() {
Regexp* re = stack;
stack = re->down_;
if (re->ref_ != 0)
LOG(DFATAL) << "Bad reference count " << re->ref_;
ABSL_LOG(DFATAL) << "Bad reference count " << re->ref_;
if (re->nsub_ > 0) {
Regexp** subs = re->sub();
for (int i = 0; i < re->nsub_; i++) {
@ -179,7 +181,7 @@ void Regexp::Destroy() {
}
void Regexp::AddRuneToString(Rune r) {
DCHECK(op_ == kRegexpLiteralString);
ABSL_DCHECK(op_ == kRegexpLiteralString);
if (nrunes_ == 0) {
// start with 8
runes_ = new Rune[8];
@ -421,7 +423,7 @@ static bool TopEqual(Regexp* a, Regexp* b) {
}
}
LOG(DFATAL) << "Unexpected op in Regexp::Equal: " << a->op();
ABSL_LOG(DFATAL) << "Unexpected op in Regexp::Equal: " << a->op();
return 0;
}
@ -496,7 +498,7 @@ bool Regexp::Equal(Regexp* a, Regexp* b) {
if (n == 0)
break;
DCHECK_GE(n, 2);
ABSL_DCHECK_GE(n, size_t{2});
a = stk[n-2];
b = stk[n-1];
stk.resize(n-2);
@ -562,7 +564,7 @@ class NumCapturesWalker : public Regexp::Walker<Ignored> {
virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
// Should never be called: we use Walk(), not WalkExponential().
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
LOG(DFATAL) << "NumCapturesWalker::ShortVisit called";
ABSL_LOG(DFATAL) << "NumCapturesWalker::ShortVisit called";
#endif
return ignored;
}
@ -609,7 +611,7 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> {
virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
// Should never be called: we use Walk(), not WalkExponential().
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
LOG(DFATAL) << "NamedCapturesWalker::ShortVisit called";
ABSL_LOG(DFATAL) << "NamedCapturesWalker::ShortVisit called";
#endif
return ignored;
}
@ -653,7 +655,7 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> {
virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
// Should never be called: we use Walk(), not WalkExponential().
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
LOG(DFATAL) << "CaptureNamesWalker::ShortVisit called";
ABSL_LOG(DFATAL) << "CaptureNamesWalker::ShortVisit called";
#endif
return ignored;
}
@ -993,7 +995,7 @@ CharClass* CharClassBuilder::GetCharClass() {
for (iterator it = begin(); it != end(); ++it)
cc->ranges_[n++] = *it;
cc->nranges_ = n;
DCHECK_LE(n, static_cast<int>(ranges_.size()));
ABSL_DCHECK_LE(n, static_cast<int>(ranges_.size()));
cc->nrunes_ = nrunes_;
cc->folds_ascii_ = FoldsASCII();
return cc;

View File

@ -88,12 +88,14 @@
#include <stddef.h>
#include <stdint.h>
#include <map>
#include <set>
#include <string>
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/string_view.h"
#include "util/logging.h"
#include "util/utf.h"
namespace re2 {
@ -332,15 +334,42 @@ class Regexp {
return submany_;
}
int min() { DCHECK_EQ(op_, kRegexpRepeat); return min_; }
int max() { DCHECK_EQ(op_, kRegexpRepeat); return max_; }
Rune rune() { DCHECK_EQ(op_, kRegexpLiteral); return rune_; }
CharClass* cc() { DCHECK_EQ(op_, kRegexpCharClass); return cc_; }
int cap() { DCHECK_EQ(op_, kRegexpCapture); return cap_; }
const std::string* name() { DCHECK_EQ(op_, kRegexpCapture); return name_; }
Rune* runes() { DCHECK_EQ(op_, kRegexpLiteralString); return runes_; }
int nrunes() { DCHECK_EQ(op_, kRegexpLiteralString); return nrunes_; }
int match_id() { DCHECK_EQ(op_, kRegexpHaveMatch); return match_id_; }
int min() {
ABSL_DCHECK_EQ(op_, kRegexpRepeat);
return min_;
}
int max() {
ABSL_DCHECK_EQ(op_, kRegexpRepeat);
return max_;
}
Rune rune() {
ABSL_DCHECK_EQ(op_, kRegexpLiteral);
return rune_;
}
CharClass* cc() {
ABSL_DCHECK_EQ(op_, kRegexpCharClass);
return cc_;
}
int cap() {
ABSL_DCHECK_EQ(op_, kRegexpCapture);
return cap_;
}
const std::string* name() {
ABSL_DCHECK_EQ(op_, kRegexpCapture);
return name_;
}
Rune* runes() {
ABSL_DCHECK_EQ(op_, kRegexpLiteralString);
return runes_;
}
int nrunes() {
ABSL_DCHECK_EQ(op_, kRegexpLiteralString);
return nrunes_;
}
int match_id() {
ABSL_DCHECK_EQ(op_, kRegexpHaveMatch);
return match_id_;
}
// Increments reference count, returns object as convenience.
Regexp* Incref();
@ -515,7 +544,7 @@ class Regexp {
// Allocate space for n sub-regexps.
void AllocSub(int n) {
DCHECK(n >= 0 && static_cast<uint16_t>(n) == n);
ABSL_DCHECK(n >= 0 && static_cast<uint16_t>(n) == n);
if (n > 1)
submany_ = new Regexp*[n];
nsub_ = static_cast<uint16_t>(n);

View File

@ -5,15 +5,20 @@
#include "re2/set.h"
#include <stddef.h>
#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "util/logging.h"
#include "absl/log/absl_log.h"
#include "absl/strings/string_view.h"
#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/re2.h"
#include "re2/regexp.h"
#include "re2/sparse_set.h"
namespace re2 {
@ -50,9 +55,15 @@ RE2::Set& RE2::Set::operator=(Set&& other) {
return *this;
}
int RE2::Set::Size() const {
if (!compiled_)
return static_cast<int>(elem_.size());
return size_;
}
int RE2::Set::Add(absl::string_view pattern, std::string* error) {
if (compiled_) {
LOG(DFATAL) << "RE2::Set::Add() called after compiling";
ABSL_LOG(DFATAL) << "RE2::Set::Add() called after compiling";
return -1;
}
@ -64,7 +75,7 @@ int RE2::Set::Add(absl::string_view pattern, std::string* error) {
if (error != NULL)
*error = status.Text();
if (options_.log_errors())
LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
ABSL_LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
return -1;
}
@ -91,7 +102,7 @@ int RE2::Set::Add(absl::string_view pattern, std::string* error) {
bool RE2::Set::Compile() {
if (compiled_) {
LOG(DFATAL) << "RE2::Set::Compile() called more than once";
ABSL_LOG(DFATAL) << "RE2::Set::Compile() called more than once";
return false;
}
compiled_ = true;
@ -128,7 +139,7 @@ bool RE2::Set::Match(absl::string_view text, std::vector<int>* v,
if (!compiled_) {
if (error_info != NULL)
error_info->kind = kNotCompiled;
LOG(DFATAL) << "RE2::Set::Match() called before compiling";
ABSL_LOG(DFATAL) << "RE2::Set::Match() called before compiling";
return false;
}
#ifdef RE2_HAVE_THREAD_LOCAL
@ -144,10 +155,10 @@ bool RE2::Set::Match(absl::string_view text, std::vector<int>* v,
NULL, &dfa_failed, matches.get());
if (dfa_failed) {
if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: "
<< "program size " << prog_->size() << ", "
<< "list count " << prog_->list_count() << ", "
<< "bytemap range " << prog_->bytemap_range();
ABSL_LOG(ERROR) << "DFA out of memory: "
<< "program size " << prog_->size() << ", "
<< "list count " << prog_->list_count() << ", "
<< "bytemap range " << prog_->bytemap_range();
if (error_info != NULL)
error_info->kind = kOutOfMemory;
return false;
@ -161,7 +172,7 @@ bool RE2::Set::Match(absl::string_view text, std::vector<int>* v,
if (matches->empty()) {
if (error_info != NULL)
error_info->kind = kInconsistent;
LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
ABSL_LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned";
return false;
}
v->assign(matches->begin(), matches->end());

View File

@ -53,6 +53,10 @@ class RE2::Set {
// the error message from the parser.
int Add(absl::string_view pattern, std::string* error);
// Returns the number of patterns in the set.
// Can be called before or after Compile().
int Size() const;
// Compiles the set in preparation for matching.
// Returns false if the compiler runs out of memory.
// Add() must not be called again after Compile().
@ -62,6 +66,7 @@ class RE2::Set {
// Returns true if text matches at least one of the regexps in the set.
// Fills v (if not NULL) with the indices of the matching regexps.
// Callers must not expect v to be sorted.
// The indices are in the half-open interval [0, Size()).
bool Match(absl::string_view text, std::vector<int>* v) const;
// As above, but populates error_info (if not NULL) when none of the regexps

View File

@ -6,14 +6,17 @@
// to use simple extended regular expression features.
// Also sort and simplify character classes.
#include <stddef.h>
#include <algorithm>
#include <string>
#include "util/logging.h"
#include "util/utf.h"
#include "absl/log/absl_log.h"
#include "absl/strings/string_view.h"
#include "re2/pod_array.h"
#include "re2/regexp.h"
#include "re2/walker-inl.h"
#include "util/utf.h"
namespace re2 {
@ -94,7 +97,7 @@ bool Regexp::ComputeSimple() {
case kRegexpRepeat:
return false;
}
LOG(DFATAL) << "Case not handled in ComputeSimple: " << op_;
ABSL_LOG(DFATAL) << "Case not handled in ComputeSimple: " << op_;
return false;
}
@ -222,7 +225,7 @@ Regexp* CoalesceWalker::Copy(Regexp* re) {
Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
// Should never be called: we use Walk(), not WalkExponential().
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
LOG(DFATAL) << "CoalesceWalker::ShortVisit called";
ABSL_LOG(DFATAL) << "CoalesceWalker::ShortVisit called";
#endif
return re->Incref();
}
@ -372,7 +375,7 @@ void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) {
default:
nre->Decref();
LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op();
ABSL_LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op();
return;
}
@ -433,7 +436,7 @@ void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) {
default:
nre->Decref();
LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op();
ABSL_LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op();
return;
}
@ -448,7 +451,7 @@ Regexp* SimplifyWalker::Copy(Regexp* re) {
Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
// Should never be called: we use Walk(), not WalkExponential().
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
ABSL_LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
#endif
return re->Incref();
}
@ -564,7 +567,7 @@ Regexp* SimplifyWalker::PostVisit(Regexp* re,
}
}
LOG(ERROR) << "Simplify case not handled: " << re->op();
ABSL_LOG(ERROR) << "Simplify case not handled: " << re->op();
return re->Incref();
}
@ -661,7 +664,8 @@ Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max,
if (nre == NULL) {
// Some degenerate case, like min > max, or min < max < 0.
// This shouldn't happen, because the parser rejects such regexps.
LOG(DFATAL) << "Malformed repeat " << re->ToString() << " " << min << " " << max;
ABSL_LOG(DFATAL) << "Malformed repeat of " << re->ToString()
<< " min " << min << " max " << max;
return new Regexp(kRegexpNoMatch, f);
}

View File

@ -88,22 +88,24 @@
//
// A moved-from SparseArray will be empty.
// Doing this simplifies the logic below.
#ifndef __has_feature
#define __has_feature(x) 0
#endif
#include <assert.h>
#include <stdint.h>
#if __has_feature(memory_sanitizer)
#include <sanitizer/msan_interface.h>
#endif
#include <algorithm>
#include <memory>
#include <utility>
#include "re2/pod_array.h"
// Doing this simplifies the logic below.
#ifndef __has_feature
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer)
#include <sanitizer/msan_interface.h>
#endif
namespace re2 {
template<typename Value>

View File

@ -47,22 +47,24 @@
//
// See sparse_array.h for implementation details.
// Doing this simplifies the logic below.
#ifndef __has_feature
#define __has_feature(x) 0
#endif
#include <assert.h>
#include <stdint.h>
#if __has_feature(memory_sanitizer)
#include <sanitizer/msan_interface.h>
#endif
#include <algorithm>
#include <memory>
#include <utility>
#include "re2/pod_array.h"
// Doing this simplifies the logic below.
#ifndef __has_feature
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer)
#include <sanitizer/msan_interface.h>
#endif
namespace re2 {
template<typename Value>

View File

@ -13,7 +13,7 @@
// THIS CODE SHOULD NEVER BE USED IN PRODUCTION:
// - It uses a ton of memory.
// - It uses a ton of stack.
// - It uses CHECK and LOG(FATAL).
// - It uses ABSL_CHECK() and ABSL_LOG(FATAL).
// - It implements unanchored search by repeated anchored search.
//
// On the other hand, it is very simple and a good reference
@ -28,7 +28,9 @@
#include <string.h>
#include "absl/base/macros.h"
#include "util/logging.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/string_view.h"
#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/regexp.h"
@ -111,7 +113,7 @@ bool Backtracker::Search(absl::string_view text, absl::string_view context,
endmatch_ = prog_->anchor_end();
submatch_ = submatch;
nsubmatch_ = nsubmatch;
CHECK_LT(2*nsubmatch_, static_cast<int>(ABSL_ARRAYSIZE(cap_)));
ABSL_CHECK_LT(2*nsubmatch_, static_cast<int>(ABSL_ARRAYSIZE(cap_)));
memset(cap_, 0, sizeof cap_);
// We use submatch_[0] for our own bookkeeping,
@ -157,10 +159,10 @@ bool Backtracker::Visit(int id, const char* p) {
// Check bitmap. If we've already explored from here,
// either it didn't match or it did but we're hoping for a better match.
// Either way, don't go down that road again.
CHECK(p <= text_.data() + text_.size());
ABSL_CHECK(p <= text_.data() + text_.size());
int n = id * static_cast<int>(text_.size()+1) +
static_cast<int>(p-text_.data());
CHECK_LT(n/32, visited_.size());
ABSL_CHECK_LT(n/32, visited_.size());
if (visited_[n/32] & (1 << (n&31)))
return false;
visited_[n/32] |= 1 << (n&31);
@ -188,7 +190,7 @@ bool Backtracker::Try(int id, const char* p) {
Prog::Inst* ip = prog_->inst(id);
switch (ip->opcode()) {
default:
LOG(FATAL) << "Unexpected opcode: " << (int)ip->opcode();
ABSL_LOG(FATAL) << "Unexpected opcode: " << ip->opcode();
return false; // not reached
case kInstAltMatch:

View File

@ -9,8 +9,8 @@
#include "absl/base/macros.h"
#include "absl/strings/str_format.h"
#include "gtest/gtest.h"
#include "util/utf.h"
#include "re2/regexp.h"
#include "util/utf.h"
namespace re2 {

View File

@ -4,13 +4,16 @@
// Test prog.cc, compile.cc
#include <stddef.h>
#include <string>
#include "absl/base/macros.h"
#include "absl/log/absl_log.h"
#include "absl/strings/string_view.h"
#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/regexp.h"
#include "re2/prog.h"
#include "re2/regexp.h"
namespace re2 {
@ -132,13 +135,13 @@ TEST(TestRegexpCompileToProg, Simple) {
const re2::Test& t = tests[i];
Regexp* re = Regexp::Parse(t.regexp, Regexp::PerlX|Regexp::Latin1, NULL);
if (re == NULL) {
LOG(ERROR) << "Cannot parse: " << t.regexp;
ABSL_LOG(ERROR) << "Cannot parse: " << t.regexp;
failed++;
continue;
}
Prog* prog = re->CompileToProg(0);
if (prog == NULL) {
LOG(ERROR) << "Cannot compile: " << t.regexp;
ABSL_LOG(ERROR) << "Cannot compile: " << t.regexp;
re->Decref();
failed++;
continue;
@ -146,9 +149,9 @@ TEST(TestRegexpCompileToProg, Simple) {
ASSERT_TRUE(re->CompileToProg(1) == NULL);
std::string s = prog->Dump();
if (s != t.code) {
LOG(ERROR) << "Incorrect compiled code for: " << t.regexp;
LOG(ERROR) << "Want:\n" << t.code;
LOG(ERROR) << "Got:\n" << s;
ABSL_LOG(ERROR) << "Incorrect compiled code for: " << t.regexp;
ABSL_LOG(ERROR) << "Want:\n" << t.code;
ABSL_LOG(ERROR) << "Got:\n" << s;
failed++;
}
delete prog;

View File

@ -2,22 +2,24 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <stddef.h>
#include <stdint.h>
#include <string>
#include <thread>
#include <vector>
#include "absl/base/macros.h"
#include "absl/flags/flag.h"
#include "absl/log/absl_log.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "gtest/gtest.h"
#include "util/logging.h"
#include "util/malloc_counter.h"
#include "re2/prog.h"
#include "re2/re2.h"
#include "re2/regexp.h"
#include "re2/testing/regexp_generator.h"
#include "re2/testing/string_generator.h"
#include "util/malloc_counter.h"
static const bool UsingMallocCounter = false;
@ -111,10 +113,10 @@ TEST(SingleThreaded, BuildEntireDFA) {
delete prog;
}
if (UsingMallocCounter) {
//LOG(INFO) << "limit " << limit << ", "
// << "prog usage " << progusage << ", "
// << "DFA budget " << dfamem << ", "
// << "total " << usage;
//ABSL_LOG(INFO) << "limit " << limit << ", "
// << "prog usage " << progusage << ", "
// << "DFA budget " << dfamem << ", "
// << "total " << usage;
// Tolerate +/- 10%.
ASSERT_GT(usage, limit*9/10);
ASSERT_LT(usage, limit*11/10);
@ -189,8 +191,8 @@ TEST(SingleThreaded, SearchDFA) {
delete prog;
}
if (UsingMallocCounter) {
//LOG(INFO) << "usage " << usage << ", "
// << "peak usage " << peak_usage;
//ABSL_LOG(INFO) << "usage " << usage << ", "
// << "peak usage " << peak_usage;
ASSERT_LT(usage, 1<<n);
ASSERT_LT(peak_usage, 1<<n);
}
@ -297,7 +299,7 @@ TEST(DFA, ReverseMatch) {
prog->SearchDFA(t.text, absl::string_view(), Prog::kUnanchored,
Prog::kFirstMatch, NULL, &failed, NULL);
if (matched != t.match) {
LOG(ERROR) << t.regexp << " on " << t.text << ": want " << t.match;
ABSL_LOG(ERROR) << t.regexp << " on " << t.text << ": want " << t.match;
nfail++;
}
delete prog;
@ -360,8 +362,9 @@ TEST(DFA, Callback) {
dump += match ? "]]" : "]";
});
if (dump != t.dump) {
LOG(ERROR) << t.regexp << " bytemap:\n" << prog->DumpByteMap();
LOG(ERROR) << t.regexp << " dump:\ngot " << dump << "\nwant " << t.dump;
ABSL_LOG(ERROR) << t.regexp << " bytemap:\n" << prog->DumpByteMap();
ABSL_LOG(ERROR) << t.regexp << " dump:\n" << "got " << dump << "\n"
<< "want " << t.dump;
nfail++;
}
delete prog;

View File

@ -19,11 +19,12 @@
#include <string>
#include "absl/base/macros.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/str_format.h"
#include "gtest/gtest.h"
#include "util/logging.h"
#include "util/utf.h"
#include "re2/regexp.h"
#include "util/utf.h"
namespace re2 {
@ -96,17 +97,25 @@ static void DumpRegexpAppending(Regexp* re, std::string* s) {
break;
case kRegexpLiteral: {
Rune r = re->rune();
char buf[UTFmax+1];
buf[runetochar(buf, &r)] = 0;
s->append(buf);
if (re->parse_flags() & Regexp::Latin1) {
s->push_back(r);
} else {
char buf[UTFmax+1];
buf[runetochar(buf, &r)] = 0;
s->append(buf);
}
break;
}
case kRegexpLiteralString:
for (int i = 0; i < re->nrunes(); i++) {
Rune r = re->runes()[i];
char buf[UTFmax+1];
buf[runetochar(buf, &r)] = 0;
s->append(buf);
if (re->parse_flags() & Regexp::Latin1) {
s->push_back(r);
} else {
char buf[UTFmax+1];
buf[runetochar(buf, &r)] = 0;
s->append(buf);
}
}
break;
case kRegexpConcat:
@ -121,7 +130,7 @@ static void DumpRegexpAppending(Regexp* re, std::string* s) {
break;
case kRegexpCapture:
if (re->cap() == 0)
LOG(DFATAL) << "kRegexpCapture cap() == 0";
ABSL_LOG(DFATAL) << "kRegexpCapture cap() == 0";
if (re->name()) {
s->append(*re->name());
s->append(":");
@ -153,7 +162,7 @@ static void DumpRegexpAppending(Regexp* re, std::string* s) {
std::string Regexp::Dump() {
// Make sure that we are being called from a unit test.
// Should cause a link error if used outside of testing.
CHECK(!::testing::TempDir().empty());
ABSL_CHECK(!::testing::TempDir().empty());
std::string s;
DumpRegexpAppending(this, &s);

View File

@ -9,6 +9,7 @@
#include "gtest/gtest.h"
#include "re2/testing/exhaustive_tester.h"
#include "re2/testing/regexp_generator.h"
namespace re2 {

View File

@ -5,12 +5,13 @@
// Exhaustive testing of regular expression matching.
#include <stddef.h>
#include <memory>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "re2/testing/exhaustive_tester.h"
#include "re2/testing/regexp_generator.h"
namespace re2 {
@ -69,4 +70,3 @@ TEST(LineEnds, Exhaustive) {
// }
} // namespace re2

View File

@ -5,13 +5,14 @@
// Exhaustive testing of regular expression matching.
#include <stddef.h>
#include <memory>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "util/utf.h"
#include "re2/testing/exhaustive_tester.h"
#include "re2/testing/regexp_generator.h"
#include "util/utf.h"
namespace re2 {
@ -97,4 +98,3 @@ TEST(InterestingUTF8, AB) {
}
} // namespace re2

View File

@ -33,4 +33,3 @@ TEST(EgrepLiterals, UTF8) {
}
} // namespace re2

View File

@ -11,14 +11,23 @@
// the NFA, DFA, and a trivial backtracking implementation agree about
// the location of the match.
#include "re2/testing/exhaustive_tester.h"
#include <stdio.h>
#include <string>
#include <vector>
#include "absl/base/macros.h"
#include "absl/flags/flag.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/testing/exhaustive_tester.h"
#include "re2/prog.h"
#include "re2/re2.h"
#include "re2/testing/regexp_generator.h"
#include "re2/testing/tester.h"
// For target `log' in the Makefile.
@ -40,7 +49,7 @@ static char* escape(absl::string_view sp) {
*p++ = '\"';
for (size_t i = 0; i < sp.size(); i++) {
if(p+5 >= buf+sizeof buf)
LOG(FATAL) << "ExhaustiveTester escape: too long";
ABSL_LOG(FATAL) << "ExhaustiveTester escape: too long";
if(sp[i] == '\\' || sp[i] == '\"') {
*p++ = '\\';
*p++ = sp[i];
@ -82,7 +91,7 @@ void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
std::string regexp = const_regexp;
if (!topwrapper_.empty()) {
auto fmt = absl::ParsedFormat<'s'>::New(topwrapper_);
CHECK(fmt != nullptr);
ABSL_CHECK(fmt != nullptr);
regexp = absl::StrFormat(*fmt, regexp);
}
@ -95,7 +104,7 @@ void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
// Write out test cases and answers for use in testing
// other implementations, such as Go's regexp package.
if (randomstrings_)
LOG(ERROR) << "Cannot log with random strings.";
ABSL_LOG(ERROR) << "Cannot log with random strings.";
if (regexps_ == 1) { // first
absl::PrintF("strings\n");
strgen_.Reset();

View File

@ -6,6 +6,7 @@
#define RE2_TESTING_EXHAUSTIVE_TESTER_H_
#include <stdint.h>
#include <string>
#include <vector>

View File

@ -2,17 +2,18 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "re2/filtered_re2.h"
#include <stddef.h>
#include <algorithm>
#include <memory>
#include <string>
#include <vector>
#include <utility>
#include <vector>
#include "absl/base/macros.h"
#include "absl/log/absl_log.h"
#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/filtered_re2.h"
#include "re2/re2.h"
namespace re2 {
@ -32,14 +33,14 @@ TEST(FilteredRE2Test, EmptyTest) {
FilterTestVars v;
v.f.Compile(&v.atoms);
EXPECT_EQ(0, v.atoms.size());
EXPECT_EQ(size_t{0}, v.atoms.size());
// Compile has no effect at all when called before Add: it will not
// record that it has been called and it will not clear the vector.
// The second point does not matter here, but the first point means
// that an error will be logged during the call to AllMatches.
v.f.AllMatches("foo", v.atom_indices, &v.matches);
EXPECT_EQ(0, v.matches.size());
EXPECT_EQ(size_t{0}, v.matches.size());
}
TEST(FilteredRE2Test, SmallOrTest) {
@ -48,10 +49,10 @@ TEST(FilteredRE2Test, SmallOrTest) {
v.f.Add("(foo|bar)", v.opts, &id);
v.f.Compile(&v.atoms);
EXPECT_EQ(0, v.atoms.size());
EXPECT_EQ(size_t{0}, v.atoms.size());
v.f.AllMatches("lemurs bar", v.atom_indices, &v.matches);
EXPECT_EQ(1, v.matches.size());
EXPECT_EQ(size_t{1}, v.matches.size());
EXPECT_EQ(id, v.matches[0]);
}
@ -62,12 +63,12 @@ TEST(FilteredRE2Test, SmallLatinTest) {
v.opts.set_encoding(RE2::Options::EncodingLatin1);
v.f.Add("\xde\xadQ\xbe\xef", v.opts, &id);
v.f.Compile(&v.atoms);
EXPECT_EQ(1, v.atoms.size());
EXPECT_EQ(size_t{1}, v.atoms.size());
EXPECT_EQ(v.atoms[0], "\xde\xadq\xbe\xef");
v.atom_indices.push_back(0);
v.f.AllMatches("foo\xde\xadQ\xbe\xeflemur", v.atom_indices, &v.matches);
EXPECT_EQ(1, v.matches.size());
EXPECT_EQ(size_t{1}, v.matches.size());
EXPECT_EQ(id, v.matches[0]);
}
@ -172,13 +173,13 @@ bool CheckExpectedAtoms(const char* atoms[],
pass = pass && expected[i] == v->atoms[i];
if (!pass) {
LOG(ERROR) << "Failed " << testname;
LOG(ERROR) << "Expected #atoms = " << expected.size();
ABSL_LOG(ERROR) << "Failed " << testname;
ABSL_LOG(ERROR) << "Expected #atoms = " << expected.size();
for (size_t i = 0; i < expected.size(); i++)
LOG(ERROR) << expected[i];
LOG(ERROR) << "Found #atoms = " << v->atoms.size();
ABSL_LOG(ERROR) << expected[i];
ABSL_LOG(ERROR) << "Found #atoms = " << v->atoms.size();
for (size_t i = 0; i < v->atoms.size(); i++)
LOG(ERROR) << v->atoms[i];
ABSL_LOG(ERROR) << v->atoms[i];
}
return pass;
@ -255,7 +256,7 @@ TEST(FilteredRE2Test, MatchTests) {
FindAtomIndices(v.atoms, atoms, &atom_ids);
std::vector<int> matching_regexps;
v.f.AllMatches(text, atom_ids, &matching_regexps);
EXPECT_EQ(1, matching_regexps.size());
EXPECT_EQ(size_t{1}, matching_regexps.size());
text = "abc12312yyyzzz";
atoms.clear();
@ -264,7 +265,7 @@ TEST(FilteredRE2Test, MatchTests) {
atoms.push_back("yyyzzz");
FindAtomIndices(v.atoms, atoms, &atom_ids);
v.f.AllMatches(text, atom_ids, &matching_regexps);
EXPECT_EQ(1, matching_regexps.size());
EXPECT_EQ(size_t{1}, matching_regexps.size());
text = "abcd12yyy32yyyzzz";
atoms.clear();
@ -273,11 +274,11 @@ TEST(FilteredRE2Test, MatchTests) {
atoms.push_back("yyy");
atoms.push_back("yyyzzz");
FindAtomIndices(v.atoms, atoms, &atom_ids);
LOG(INFO) << "S: " << atom_ids.size();
ABSL_LOG(INFO) << "S: " << atom_ids.size();
for (size_t i = 0; i < atom_ids.size(); i++)
LOG(INFO) << "i: " << i << " : " << atom_ids[i];
ABSL_LOG(INFO) << "i: " << i << " : " << atom_ids[i];
v.f.AllMatches(text, atom_ids, &matching_regexps);
EXPECT_EQ(2, matching_regexps.size());
EXPECT_EQ(size_t{2}, matching_regexps.size());
}
TEST(FilteredRE2Test, EmptyStringInStringSetBug) {
@ -300,43 +301,43 @@ TEST(FilteredRE2Test, MoveSemantics) {
v1.f.Add("foo\\d+", v1.opts, &id);
EXPECT_EQ(0, id);
v1.f.Compile(&v1.atoms);
EXPECT_EQ(1, v1.atoms.size());
EXPECT_EQ(size_t{1}, v1.atoms.size());
EXPECT_EQ("foo", v1.atoms[0]);
v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
EXPECT_EQ(1, v1.matches.size());
EXPECT_EQ(size_t{1}, v1.matches.size());
EXPECT_EQ(0, v1.matches[0]);
v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
EXPECT_EQ(0, v1.matches.size());
EXPECT_EQ(size_t{0}, v1.matches.size());
// The moved-to object should do what the moved-from object did.
FilterTestVars v2;
v2.f = std::move(v1.f);
v2.f.AllMatches("abc foo1 xyz", {0}, &v2.matches);
EXPECT_EQ(1, v2.matches.size());
EXPECT_EQ(size_t{1}, v2.matches.size());
EXPECT_EQ(0, v2.matches[0]);
v2.f.AllMatches("abc bar2 xyz", {0}, &v2.matches);
EXPECT_EQ(0, v2.matches.size());
EXPECT_EQ(size_t{0}, v2.matches.size());
// The moved-from object should have been reset and be reusable.
v1.f.Add("bar\\d+", v1.opts, &id);
EXPECT_EQ(0, id);
v1.f.Compile(&v1.atoms);
EXPECT_EQ(1, v1.atoms.size());
EXPECT_EQ(size_t{1}, v1.atoms.size());
EXPECT_EQ("bar", v1.atoms[0]);
v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
EXPECT_EQ(0, v1.matches.size());
EXPECT_EQ(size_t{0}, v1.matches.size());
v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
EXPECT_EQ(1, v1.matches.size());
EXPECT_EQ(size_t{1}, v1.matches.size());
EXPECT_EQ(0, v1.matches[0]);
// Verify that "overwriting" works and also doesn't leak memory.
// (The latter will need a leak detector such as LeakSanitizer.)
v1.f = std::move(v2.f);
v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
EXPECT_EQ(1, v1.matches.size());
EXPECT_EQ(size_t{1}, v1.matches.size());
EXPECT_EQ(0, v1.matches[0]);
v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
EXPECT_EQ(0, v1.matches.size());
EXPECT_EQ(size_t{0}, v1.matches.size());
}
} // namespace re2

View File

@ -2,9 +2,10 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <stddef.h>
#include "absl/base/macros.h"
#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/prog.h"
#include "re2/regexp.h"

View File

@ -2,8 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "gtest/gtest.h"
#include "util/logging.h"
#include "absl/log/absl_log.h"
#include "re2/regexp.h"
#include "re2/walker-inl.h"
@ -21,7 +20,7 @@ class NullWalker : public Regexp::Walker<bool> {
virtual bool ShortVisit(Regexp* re, bool a) {
// Should never be called: we use Walk(), not WalkExponential().
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
LOG(DFATAL) << "NullWalker::ShortVisit called";
ABSL_LOG(DFATAL) << "NullWalker::ShortVisit called";
#endif
return a;
}

View File

@ -4,11 +4,13 @@
// Test parse.cc, dump.cc, and tostring.cc.
#include <stddef.h>
#include <string>
#include "absl/base/macros.h"
#include "absl/log/absl_log.h"
#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/regexp.h"
namespace re2 {
@ -225,6 +227,29 @@ static Test tests[] = {
// Bug in Regexp::ToString() that emitted [^], which
// would (obviously) fail to parse when fed back in.
{ "[\\s\\S]", "cc{0-0x10ffff}" },
// As per https://github.com/google/re2/issues/477,
// there were long-standing bugs involving Latin-1.
// Here, we exercise it WITHOUT case folding...
{ "\xa5\x64\xd1", "str{\xa5""d\xd1}", Regexp::Latin1 },
{ "\xa5\xd1\x64", "str{\xa5\xd1""d}", Regexp::Latin1 },
{ "\xa5\x64[\xd1\xd2]", "cat{str{\xa5""d}cc{0xd1-0xd2}}", Regexp::Latin1 },
{ "\xa5[\xd1\xd2]\x64", "cat{lit{\xa5}cc{0xd1-0xd2}lit{d}}", Regexp::Latin1 },
{ "\xa5\x64|\xa5\xd1", "cat{lit{\xa5}cc{0x64 0xd1}}", Regexp::Latin1 },
{ "\xa5\xd1|\xa5\x64", "cat{lit{\xa5}cc{0x64 0xd1}}", Regexp::Latin1 },
{ "\xa5\x64|\xa5[\xd1\xd2]", "cat{lit{\xa5}cc{0x64 0xd1-0xd2}}", Regexp::Latin1 },
{ "\xa5[\xd1\xd2]|\xa5\x64", "cat{lit{\xa5}cc{0x64 0xd1-0xd2}}", Regexp::Latin1 },
// Here, we exercise it WITH case folding...
// 0x64 should fold to 0x44, but neither 0xD1 nor 0xD2
// should fold to 0xF1 and 0xF2, respectively.
{ "\xa5\x64\xd1", "strfold{\xa5""d\xd1}", Regexp::Latin1 | Regexp::FoldCase },
{ "\xa5\xd1\x64", "strfold{\xa5\xd1""d}", Regexp::Latin1 | Regexp::FoldCase },
{ "\xa5\x64[\xd1\xd2]", "cat{strfold{\xa5""d}cc{0xd1-0xd2}}", Regexp::Latin1 | Regexp::FoldCase },
{ "\xa5[\xd1\xd2]\x64", "cat{lit{\xa5}cc{0xd1-0xd2}litfold{d}}", Regexp::Latin1 | Regexp::FoldCase },
{ "\xa5\x64|\xa5\xd1", "cat{lit{\xa5}cc{0x44 0x64 0xd1}}", Regexp::Latin1 | Regexp::FoldCase },
{ "\xa5\xd1|\xa5\x64", "cat{lit{\xa5}cc{0x44 0x64 0xd1}}", Regexp::Latin1 | Regexp::FoldCase },
{ "\xa5\x64|\xa5[\xd1\xd2]", "cat{lit{\xa5}cc{0x44 0x64 0xd1-0xd2}}", Regexp::Latin1 | Regexp::FoldCase },
{ "\xa5[\xd1\xd2]|\xa5\x64", "cat{lit{\xa5}cc{0x44 0x64 0xd1-0xd2}}", Regexp::Latin1 | Regexp::FoldCase },
};
bool RegexpEqualTestingOnly(Regexp* a, Regexp* b) {
@ -356,6 +381,13 @@ Test prefix_tests[] = {
"cat{lit{a}alt{emp{}cat{str{ardvark}alt{emp{}lit{s}}}"
"cat{str{ba}alt{cat{lit{c}alt{cc{0x69 0x6b}cat{str{us}alt{emp{}str{es}}}}}"
"str{ft}cat{str{lone}alt{emp{}lit{s}}}}}}}" },
// As per https://github.com/google/re2/issues/467,
// these should factor identically, but they didn't
// because AddFoldedRange() terminated prematurely.
{ "0A|0[aA]", "cat{lit{0}cc{0x41 0x61}}" },
{ "0a|0[aA]", "cat{lit{0}cc{0x41 0x61}}" },
{ "0[aA]|0A", "cat{lit{0}cc{0x41 0x61}}" },
{ "0[aA]|0a", "cat{lit{0}cc{0x41 0x61}}" },
};
// Test that prefix factoring works.
@ -485,12 +517,12 @@ TEST(TestToString, EquivalentParse) {
// << " t=" << t << " regexp=" << tests[i].regexp;
// Test that if we parse the new regexp we get the same structure.
Regexp* nre = Regexp::Parse(t, Regexp::MatchNL | Regexp::PerlX, &status);
Regexp* nre = Regexp::Parse(t, f, &status);
ASSERT_TRUE(nre != NULL) << " reparse " << t << " " << status.Text();
std::string ss = nre->Dump();
std::string tt = nre->ToString();
if (s != ss || t != tt)
LOG(INFO) << "ToString(" << tests[i].regexp << ") = " << t;
ABSL_LOG(INFO) << "ToString(" << tests[i].regexp << ") = " << t;
EXPECT_EQ(s, ss);
EXPECT_EQ(t, tt);
nre->Decref();
@ -525,4 +557,30 @@ TEST(NamedCaptures, ErrorArgs) {
EXPECT_EQ(status.error_arg(), "(?<space bar>");
}
// Test that look-around error args are correct.
TEST(LookAround, ErrorArgs) {
RegexpStatus status;
Regexp* re;
re = Regexp::Parse("(?=foo).*", Regexp::LikePerl, &status);
EXPECT_TRUE(re == NULL);
EXPECT_EQ(status.code(), kRegexpBadPerlOp);
EXPECT_EQ(status.error_arg(), "(?=");
re = Regexp::Parse("(?!foo).*", Regexp::LikePerl, &status);
EXPECT_TRUE(re == NULL);
EXPECT_EQ(status.code(), kRegexpBadPerlOp);
EXPECT_EQ(status.error_arg(), "(?!");
re = Regexp::Parse("(?<=foo).*", Regexp::LikePerl, &status);
EXPECT_TRUE(re == NULL);
EXPECT_EQ(status.code(), kRegexpBadPerlOp);
EXPECT_EQ(status.error_arg(), "(?<=");
re = Regexp::Parse("(?<!foo).*", Regexp::LikePerl, &status);
EXPECT_TRUE(re == NULL);
EXPECT_EQ(status.code(), kRegexpBadPerlOp);
EXPECT_EQ(status.error_arg(), "(?<!");
}
} // namespace re2

View File

@ -3,13 +3,15 @@
// license that can be found in the LICENSE file.
#include <string.h>
#include <string>
#include <vector>
#include "absl/base/macros.h"
#include "absl/log/absl_log.h"
#include "absl/strings/escaping.h"
#include "absl/strings/string_view.h"
#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/prog.h"
#include "re2/re2.h"
#include "re2/regexp.h"
@ -113,7 +115,7 @@ TEST(PossibleMatchRange, HandWritten) {
const PrefixTest& t = tests[i];
std::string min, max;
if (j == 0) {
LOG(INFO) << "Checking regexp=" << absl::CEscape(t.regexp);
ABSL_LOG(INFO) << "Checking regexp=" << absl::CEscape(t.regexp);
Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
ASSERT_TRUE(re != NULL);
Prog* prog = re->CompileToProg(0);
@ -202,7 +204,7 @@ class PossibleMatchTester : public RegexpGenerator {
void PossibleMatchTester::HandleRegexp(const std::string& regexp) {
regexps_++;
VLOG(3) << absl::CEscape(regexp);
ABSL_VLOG(3) << absl::CEscape(regexp);
RE2 re(regexp, RE2::Latin1);
ASSERT_EQ(re.error(), "");
@ -214,7 +216,8 @@ void PossibleMatchTester::HandleRegexp(const std::string& regexp) {
// complicated expressions.
if(strstr(regexp.c_str(), "\\C*"))
return;
LOG(QFATAL) << "PossibleMatchRange failed on: " << absl::CEscape(regexp);
ABSL_LOG(QFATAL) << "PossibleMatchRange failed on: "
<< absl::CEscape(regexp);
}
strgen_.Reset();
@ -241,8 +244,8 @@ TEST(PossibleMatchRange, Exhaustive) {
RegexpGenerator::EgrepOps(),
stringlen, Explode("ab4"));
t.Generate();
LOG(INFO) << t.regexps() << " regexps, "
<< t.tests() << " tests";
ABSL_LOG(INFO) << t.regexps() << " regexps, "
<< t.tests() << " tests";
}
} // namespace re2

View File

@ -4,7 +4,6 @@
// Random testing of regular expression matching.
#include <stdio.h>
#include <string>
#include <vector>
@ -12,6 +11,7 @@
#include "absl/strings/str_format.h"
#include "gtest/gtest.h"
#include "re2/testing/exhaustive_tester.h"
#include "re2/testing/regexp_generator.h"
ABSL_FLAG(int, regexpseed, 404, "Random regexp seed.");
ABSL_FLAG(int, regexpcount, 100, "How many random regexps to generate.");

View File

@ -9,10 +9,11 @@
#include <stdint.h>
#include <string.h>
#include <optional>
#include "absl/base/macros.h"
#include "absl/log/absl_log.h"
#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/re2.h"
namespace re2 {
@ -135,10 +136,9 @@ TEST(RE2ArgTest, Uint64Test) {
}
TEST(RE2ArgTest, ParseFromTest) {
#if !defined(_MSC_VER)
struct {
bool ParseFrom(const char* str, size_t n) {
LOG(INFO) << "str = " << str << ", n = " << n;
ABSL_LOG(INFO) << "str = " << str << ", n = " << n;
return true;
}
} obj1;
@ -147,7 +147,7 @@ TEST(RE2ArgTest, ParseFromTest) {
struct {
bool ParseFrom(const char* str, size_t n) {
LOG(INFO) << "str = " << str << ", n = " << n;
ABSL_LOG(INFO) << "str = " << str << ", n = " << n;
return false;
}
// Ensure that RE2::Arg works even with overloaded ParseFrom().
@ -155,11 +155,10 @@ TEST(RE2ArgTest, ParseFromTest) {
} obj2;
RE2::Arg arg2(&obj2);
EXPECT_FALSE(arg2.Parse("two", 3));
#endif
}
TEST(RE2ArgTest, OptionalDoubleTest) {
absl::optional<double> opt;
std::optional<double> opt;
RE2::Arg arg(&opt);
EXPECT_TRUE(arg.Parse(NULL, 0));
EXPECT_FALSE(opt.has_value());
@ -170,7 +169,7 @@ TEST(RE2ArgTest, OptionalDoubleTest) {
}
TEST(RE2ArgTest, OptionalIntWithCRadixTest) {
absl::optional<int> opt;
std::optional<int> opt;
RE2::Arg arg = RE2::CRadix(&opt);
EXPECT_TRUE(arg.Parse(NULL, 0));
EXPECT_FALSE(opt.has_value());

View File

@ -5,26 +5,30 @@
// TODO: Test extractions for PartialMatch/Consume
#include "re2/re2.h"
#include <errno.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <map>
#include <string>
#include <utility>
#include <vector>
#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
#include <sys/mman.h>
#include <unistd.h> /* for sysconf */
#endif
#include "absl/base/macros.h"
#include "absl/log/absl_log.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/re2.h"
#include "re2/regexp.h"
#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
#include <sys/mman.h>
#include <unistd.h>
#endif
namespace re2 {
TEST(RE2, HexTests) {
@ -554,14 +558,14 @@ TEST(Capture, NamedGroups) {
RE2 re("(hello world)");
ASSERT_EQ(re.NumberOfCapturingGroups(), 1);
const std::map<std::string, int>& m = re.NamedCapturingGroups();
ASSERT_EQ(m.size(), 0);
ASSERT_EQ(m.size(), size_t{0});
}
{
RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
ASSERT_EQ(re.NumberOfCapturingGroups(), 6);
const std::map<std::string, int>& m = re.NamedCapturingGroups();
ASSERT_EQ(m.size(), 4);
ASSERT_EQ(m.size(), size_t{4});
ASSERT_EQ(m.find("A")->second, 1);
ASSERT_EQ(m.find("B")->second, 2);
ASSERT_EQ(m.find("C")->second, 3);
@ -683,7 +687,7 @@ TEST(RE2, FullMatchStringViewArg) {
absl::string_view sp;
// string_view-arg
ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i));
ASSERT_EQ(sp.size(), 4);
ASSERT_EQ(sp.size(), size_t{4});
ASSERT_TRUE(memcmp(sp.data(), "ruby", 4) == 0);
ASSERT_EQ(i, 1234);
}
@ -773,7 +777,7 @@ TEST(RE2, NULTerminated) {
v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
ASSERT_TRUE(v != reinterpret_cast<char*>(-1));
LOG(INFO) << "Memory at " << (void*)v;
ABSL_LOG(INFO) << "Memory at " << reinterpret_cast<void*>(v);
ASSERT_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno;
v[pagesize - 1] = '1';
@ -792,6 +796,11 @@ TEST(RE2, FullMatchTypeTests) {
ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
ASSERT_EQ(c, 'H');
}
{
signed char c;
ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
ASSERT_EQ(c, static_cast<signed char>('H'));
}
{
unsigned char c;
ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
@ -837,7 +846,7 @@ TEST(RE2, FullMatchTypeTests) {
{
uint32_t v;
static const uint32_t max = UINT32_C(0xffffffff);
ASSERT_TRUE(RE2::FullMatch("100", "(\\d+)", &v)); ASSERT_EQ(v, 100);
ASSERT_TRUE(RE2::FullMatch("100", "(\\d+)", &v)); ASSERT_EQ(v, uint32_t{100});
ASSERT_TRUE(RE2::FullMatch("4294967295", "(\\d+)", &v)); ASSERT_EQ(v, max);
ASSERT_FALSE(RE2::FullMatch("4294967296", "(\\d+)", &v));
ASSERT_FALSE(RE2::FullMatch("-1", "(\\d+)", &v));
@ -875,7 +884,7 @@ TEST(RE2, FullMatchTypeTests) {
static const uint64_t max = UINT64_C(0xffffffffffffffff);
std::string str;
ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, uint64_t{100});
ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v2)); ASSERT_EQ(v2, -100);
str = std::to_string(max);
@ -893,11 +902,11 @@ TEST(RE2, FloatingPointFullMatchTypes) {
float v;
ASSERT_TRUE(RE2::FullMatch("100", "(.*)", &v)); ASSERT_EQ(v, 100);
ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, float(1e23));
ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, float{1e23});
ASSERT_TRUE(RE2::FullMatch(" 100", "(.*)", &v)); ASSERT_EQ(v, 100);
ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
ASSERT_EQ(v, float(1e23));
ASSERT_EQ(v, float{1e23});
// 6700000000081920.1 is an edge case.
// 6700000000081920 is exactly halfway between
@ -926,9 +935,11 @@ TEST(RE2, FloatingPointFullMatchTypes) {
double v;
ASSERT_TRUE(RE2::FullMatch("100", "(.*)", &v)); ASSERT_EQ(v, 100);
ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, 1e23);
ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, double{1e23});
ASSERT_TRUE(RE2::FullMatch(" 100", "(.*)", &v)); ASSERT_EQ(v, 100);
ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
ASSERT_EQ(v, double(1e23));
ASSERT_EQ(v, double{1e23});
ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
ASSERT_EQ(v, 0.1) << absl::StrFormat("%.17g != %.17g", v, 0.1);
@ -1562,7 +1573,7 @@ TEST(RE2, Bug18391750) {
TEST(RE2, Bug18458852) {
// Bug in parser accepting invalid (too large) rune,
// causing compiler to fail in DCHECK in UTF-8
// causing compiler to fail in ABSL_DCHECK() in UTF-8
// character class code.
const char b[] = {
(char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28,
@ -1598,7 +1609,7 @@ TEST(RE2, Bug18523943) {
TEST(RE2, Bug21371806) {
// Bug in parser accepting Unicode groups in Latin-1 mode,
// causing compiler to fail in DCHECK in prog.cc.
// causing compiler to fail in ABSL_DCHECK() in prog.cc.
RE2::Options opt;
opt.set_encoding(RE2::Options::EncodingLatin1);
@ -1658,4 +1669,31 @@ TEST(RE2, Issue310) {
ASSERT_EQ(m, "") << " got m='" << m << "', want ''";
}
TEST(RE2, Issue477) {
// Regexp::LeadingString didn't output Latin1 into flags.
// In the given pattern, 0xA5 should be factored out, but
// shouldn't lose its Latin1-ness in the process. Because
// that was happening, the prefix for accel was 0xC2 0xA5
// instead of 0xA5. Note that the former doesn't occur in
// the given input and so replacements weren't occurring.
const char bytes[] = {
(char)0xa5, (char)0xd1, (char)0xa5, (char)0xd1,
(char)0x61, (char)0x63, (char)0xa5, (char)0x64,
};
std::string s(bytes, ABSL_ARRAYSIZE(bytes));
RE2 re("\xa5\xd1|\xa5\x64", RE2::Latin1);
int n = RE2::GlobalReplace(&s, re, "");
ASSERT_EQ(n, 3);
ASSERT_EQ(s, "\x61\x63");
}
TEST(RE2, InitNULL) {
// RE2::RE2 accepts NULL. Make sure it keeps doing that.
RE2 re(NULL);
ASSERT_TRUE(re.ok());
ASSERT_TRUE(RE2::FullMatch("", re));
ASSERT_TRUE(RE2::FullMatch("", NULL));
}
} // namespace re2

View File

@ -7,20 +7,22 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <thread>
#include <utility>
#include "absl/container/flat_hash_map.h"
#include "absl/flags/flag.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "absl/synchronization/mutex.h"
#include "benchmark/benchmark.h"
#include "util/logging.h"
#include "util/malloc_counter.h"
#include "re2/prog.h"
#include "re2/re2.h"
#include "re2/regexp.h"
#include "util/malloc_counter.h"
#include "util/pcre.h"
namespace re2 {
@ -34,21 +36,22 @@ namespace re2 {
void Test() {
Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
CHECK(prog->IsOnePass());
CHECK(prog->CanBitState());
ABSL_CHECK(prog);
ABSL_CHECK(prog->IsOnePass());
ABSL_CHECK(prog->CanBitState());
const char* text = "650-253-0001";
absl::string_view sp[4];
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
CHECK_EQ(sp[0], "650-253-0001");
CHECK_EQ(sp[1], "650");
CHECK_EQ(sp[2], "253");
CHECK_EQ(sp[3], "0001");
ABSL_CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch,
sp, 4));
ABSL_CHECK_EQ(sp[0], "650-253-0001");
ABSL_CHECK_EQ(sp[1], "650");
ABSL_CHECK_EQ(sp[2], "253");
ABSL_CHECK_EQ(sp[3], "0001");
delete prog;
re->Decref();
LOG(INFO) << "test passed\n";
ABSL_LOG(INFO) << "test passed\n";
}
void MemoryUsage() {
@ -57,23 +60,25 @@ void MemoryUsage() {
{
MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
// Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly,
// because LOG(INFO) might do a big allocation before they get evaluated.
ABSL_CHECK(re);
// Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to ABSL_LOG(INFO)
// directly because ABSL_LOG(INFO) might do a big allocation before they
// get evaluated.
absl::FPrintF(stderr, "Regexp: %7d bytes (peak=%d)\n",
mc.HeapGrowth(), mc.PeakHeapGrowth());
mc.Reset();
Prog* prog = re->CompileToProg(0);
CHECK(prog);
CHECK(prog->IsOnePass());
CHECK(prog->CanBitState());
ABSL_CHECK(prog);
ABSL_CHECK(prog->IsOnePass());
ABSL_CHECK(prog->CanBitState());
absl::FPrintF(stderr, "Prog: %7d bytes (peak=%d)\n",
mc.HeapGrowth(), mc.PeakHeapGrowth());
mc.Reset();
absl::string_view sp[4];
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
ABSL_CHECK(prog->SearchOnePass(text, text, Prog::kAnchored,
Prog::kFullMatch, sp, 4));
absl::FPrintF(stderr, "Search: %7d bytes (peak=%d)\n",
mc.HeapGrowth(), mc.PeakHeapGrowth());
delete prog;
@ -168,7 +173,7 @@ std::string RandomText(int64_t nbytes) {
}
return text;
}();
CHECK_LE(nbytes, 16<<20);
ABSL_CHECK_LE(nbytes, 16<<20);
return text->substr(0, nbytes);
}
@ -319,8 +324,8 @@ void FindAndConsume(benchmark::State& state) {
for (auto _ : state) {
absl::string_view t = s;
absl::string_view u;
CHECK(RE2::FindAndConsume(&t, re, &u));
CHECK_EQ(u, "Hello World");
ABSL_CHECK(RE2::FindAndConsume(&t, re, &u));
ABSL_CHECK_EQ(u, "Hello World");
}
state.SetBytesProcessed(state.iterations() * state.range(0));
}
@ -660,7 +665,7 @@ BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs());
void ParseRegexp(benchmark::State& state, const std::string& regexp) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
re->Decref();
}
}
@ -668,9 +673,9 @@ void ParseRegexp(benchmark::State& state, const std::string& regexp) {
void SimplifyRegexp(benchmark::State& state, const std::string& regexp) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
Regexp* sre = re->Simplify();
CHECK(sre);
ABSL_CHECK(sre);
sre->Decref();
re->Decref();
}
@ -678,7 +683,7 @@ void SimplifyRegexp(benchmark::State& state, const std::string& regexp) {
void NullWalkRegexp(benchmark::State& state, const std::string& regexp) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
for (auto _ : state) {
re->NullWalk();
}
@ -688,11 +693,11 @@ void NullWalkRegexp(benchmark::State& state, const std::string& regexp) {
void SimplifyCompileRegexp(benchmark::State& state, const std::string& regexp) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
Regexp* sre = re->Simplify();
CHECK(sre);
ABSL_CHECK(sre);
Prog* prog = sre->CompileToProg(0);
CHECK(prog);
ABSL_CHECK(prog);
delete prog;
sre->Decref();
re->Decref();
@ -702,9 +707,9 @@ void SimplifyCompileRegexp(benchmark::State& state, const std::string& regexp) {
void CompileRegexp(benchmark::State& state, const std::string& regexp) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
ABSL_CHECK(prog);
delete prog;
re->Decref();
}
@ -712,10 +717,10 @@ void CompileRegexp(benchmark::State& state, const std::string& regexp) {
void CompileToProg(benchmark::State& state, const std::string& regexp) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
for (auto _ : state) {
Prog* prog = re->CompileToProg(0);
CHECK(prog);
ABSL_CHECK(prog);
delete prog;
}
re->Decref();
@ -723,9 +728,9 @@ void CompileToProg(benchmark::State& state, const std::string& regexp) {
void CompileByteMap(benchmark::State& state, const std::string& regexp) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
ABSL_CHECK(prog);
for (auto _ : state) {
prog->ComputeByteMap();
}
@ -736,14 +741,14 @@ void CompileByteMap(benchmark::State& state, const std::string& regexp) {
void CompilePCRE(benchmark::State& state, const std::string& regexp) {
for (auto _ : state) {
PCRE re(regexp, PCRE::UTF8);
CHECK_EQ(re.error(), "");
ABSL_CHECK_EQ(re.error(), "");
}
}
void CompileRE2(benchmark::State& state, const std::string& regexp) {
for (auto _ : state) {
RE2 re(regexp);
CHECK_EQ(re.error(), "");
ABSL_CHECK_EQ(re.error(), "");
}
}
@ -862,14 +867,14 @@ void SearchDFA(benchmark::State& state, const char* regexp,
bool expect_match) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
ABSL_CHECK(prog);
bool failed = false;
CHECK_EQ(prog->SearchDFA(text, absl::string_view(), anchor,
Prog::kFirstMatch, NULL, &failed, NULL),
expect_match);
CHECK(!failed);
ABSL_CHECK_EQ(prog->SearchDFA(text, absl::string_view(), anchor,
Prog::kFirstMatch, NULL, &failed, NULL),
expect_match);
ABSL_CHECK(!failed);
delete prog;
re->Decref();
}
@ -880,12 +885,12 @@ void SearchNFA(benchmark::State& state, const char* regexp,
bool expect_match) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
CHECK_EQ(prog->SearchNFA(text, absl::string_view(), anchor,
Prog::kFirstMatch, NULL, 0),
expect_match);
ABSL_CHECK(prog);
ABSL_CHECK_EQ(prog->SearchNFA(text, absl::string_view(), anchor,
Prog::kFirstMatch, NULL, 0),
expect_match);
delete prog;
re->Decref();
}
@ -896,12 +901,13 @@ void SearchOnePass(benchmark::State& state, const char* regexp,
bool expect_match) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
CHECK(prog->IsOnePass());
CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
expect_match);
ABSL_CHECK(prog);
ABSL_CHECK(prog->IsOnePass());
ABSL_CHECK_EQ(
prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
expect_match);
delete prog;
re->Decref();
}
@ -912,12 +918,13 @@ void SearchBitState(benchmark::State& state, const char* regexp,
bool expect_match) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
CHECK(prog->CanBitState());
CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
expect_match);
ABSL_CHECK(prog);
ABSL_CHECK(prog->CanBitState());
ABSL_CHECK_EQ(
prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
expect_match);
delete prog;
re->Decref();
}
@ -928,11 +935,12 @@ void SearchPCRE(benchmark::State& state, const char* regexp,
bool expect_match) {
for (auto _ : state) {
PCRE re(regexp, PCRE::UTF8);
CHECK_EQ(re.error(), "");
if (anchor == Prog::kAnchored)
CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
else
CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
ABSL_CHECK_EQ(re.error(), "");
if (anchor == Prog::kAnchored) {
ABSL_CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
} else {
ABSL_CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
}
}
}
@ -941,11 +949,12 @@ void SearchRE2(benchmark::State& state, const char* regexp,
bool expect_match) {
for (auto _ : state) {
RE2 re(regexp);
CHECK_EQ(re.error(), "");
if (anchor == Prog::kAnchored)
CHECK_EQ(RE2::FullMatch(text, re), expect_match);
else
CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
ABSL_CHECK_EQ(re.error(), "");
if (anchor == Prog::kAnchored) {
ABSL_CHECK_EQ(RE2::FullMatch(text, re), expect_match);
} else {
ABSL_CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
}
}
}
@ -960,9 +969,9 @@ Prog* GetCachedProg(const char* regexp) {
Prog* prog = cache[regexp];
if (prog == NULL) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
prog = re->CompileToProg(int64_t{1}<<31); // mostly for the DFA
CHECK(prog);
ABSL_CHECK(prog);
cache[regexp] = prog;
re->Decref();
// We must call this here - while we have exclusive access.
@ -978,7 +987,7 @@ PCRE* GetCachedPCRE(const char* regexp) {
PCRE* re = cache[regexp];
if (re == NULL) {
re = new PCRE(regexp, PCRE::UTF8);
CHECK_EQ(re->error(), "");
ABSL_CHECK_EQ(re->error(), "");
cache[regexp] = re;
}
return re;
@ -991,7 +1000,7 @@ RE2* GetCachedRE2(const char* regexp) {
RE2* re = cache[regexp];
if (re == NULL) {
re = new RE2(regexp);
CHECK_EQ(re->error(), "");
ABSL_CHECK_EQ(re->error(), "");
cache[regexp] = re;
}
return re;
@ -1003,10 +1012,10 @@ void SearchCachedDFA(benchmark::State& state, const char* regexp,
Prog* prog = GetCachedProg(regexp);
for (auto _ : state) {
bool failed = false;
CHECK_EQ(prog->SearchDFA(text, absl::string_view(), anchor,
ABSL_CHECK_EQ(prog->SearchDFA(text, absl::string_view(), anchor,
Prog::kFirstMatch, NULL, &failed, NULL),
expect_match);
CHECK(!failed);
ABSL_CHECK(!failed);
}
}
@ -1015,7 +1024,7 @@ void SearchCachedNFA(benchmark::State& state, const char* regexp,
bool expect_match) {
Prog* prog = GetCachedProg(regexp);
for (auto _ : state) {
CHECK_EQ(prog->SearchNFA(text, absl::string_view(), anchor,
ABSL_CHECK_EQ(prog->SearchNFA(text, absl::string_view(), anchor,
Prog::kFirstMatch, NULL, 0),
expect_match);
}
@ -1025,10 +1034,11 @@ void SearchCachedOnePass(benchmark::State& state, const char* regexp,
absl::string_view text, Prog::Anchor anchor,
bool expect_match) {
Prog* prog = GetCachedProg(regexp);
CHECK(prog->IsOnePass());
ABSL_CHECK(prog->IsOnePass());
for (auto _ : state) {
CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
expect_match);
ABSL_CHECK_EQ(
prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
expect_match);
}
}
@ -1036,10 +1046,11 @@ void SearchCachedBitState(benchmark::State& state, const char* regexp,
absl::string_view text, Prog::Anchor anchor,
bool expect_match) {
Prog* prog = GetCachedProg(regexp);
CHECK(prog->CanBitState());
ABSL_CHECK(prog->CanBitState());
for (auto _ : state) {
CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
expect_match);
ABSL_CHECK_EQ(
prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
expect_match);
}
}
@ -1048,10 +1059,11 @@ void SearchCachedPCRE(benchmark::State& state, const char* regexp,
bool expect_match) {
PCRE& re = *GetCachedPCRE(regexp);
for (auto _ : state) {
if (anchor == Prog::kAnchored)
CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
else
CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
if (anchor == Prog::kAnchored) {
ABSL_CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
} else {
ABSL_CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
}
}
}
@ -1060,10 +1072,11 @@ void SearchCachedRE2(benchmark::State& state, const char* regexp,
bool expect_match) {
RE2& re = *GetCachedRE2(regexp);
for (auto _ : state) {
if (anchor == Prog::kAnchored)
CHECK_EQ(RE2::FullMatch(text, re), expect_match);
else
CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
if (anchor == Prog::kAnchored) {
ABSL_CHECK_EQ(RE2::FullMatch(text, re), expect_match);
} else {
ABSL_CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
}
}
}
@ -1074,11 +1087,11 @@ void Parse3NFA(benchmark::State& state, const char* regexp,
absl::string_view text) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
ABSL_CHECK(prog);
absl::string_view sp[4]; // 4 because sp[0] is whole match.
CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
ABSL_CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
Prog::kFullMatch, sp, 4));
delete prog;
re->Decref();
@ -1089,12 +1102,13 @@ void Parse3OnePass(benchmark::State& state, const char* regexp,
absl::string_view text) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
CHECK(prog->IsOnePass());
ABSL_CHECK(prog);
ABSL_CHECK(prog->IsOnePass());
absl::string_view sp[4]; // 4 because sp[0] is whole match.
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
ABSL_CHECK(prog->SearchOnePass(text, text, Prog::kAnchored,
Prog::kFullMatch, sp, 4));
delete prog;
re->Decref();
}
@ -1104,12 +1118,13 @@ void Parse3BitState(benchmark::State& state, const char* regexp,
absl::string_view text) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
CHECK(prog->CanBitState());
ABSL_CHECK(prog);
ABSL_CHECK(prog->CanBitState());
absl::string_view sp[4]; // 4 because sp[0] is whole match.
CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
ABSL_CHECK(prog->SearchBitState(text, text, Prog::kAnchored,
Prog::kFullMatch, sp, 4));
delete prog;
re->Decref();
}
@ -1119,11 +1134,12 @@ void Parse3Backtrack(benchmark::State& state, const char* regexp,
absl::string_view text) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
ABSL_CHECK(prog);
absl::string_view sp[4]; // 4 because sp[0] is whole match.
CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
ABSL_CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored,
Prog::kFullMatch, sp, 4));
delete prog;
re->Decref();
}
@ -1133,9 +1149,9 @@ void Parse3PCRE(benchmark::State& state, const char* regexp,
absl::string_view text) {
for (auto _ : state) {
PCRE re(regexp, PCRE::UTF8);
CHECK_EQ(re.error(), "");
ABSL_CHECK_EQ(re.error(), "");
absl::string_view sp1, sp2, sp3;
CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
ABSL_CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
}
}
@ -1143,9 +1159,9 @@ void Parse3RE2(benchmark::State& state, const char* regexp,
absl::string_view text) {
for (auto _ : state) {
RE2 re(regexp);
CHECK_EQ(re.error(), "");
ABSL_CHECK_EQ(re.error(), "");
absl::string_view sp1, sp2, sp3;
CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
ABSL_CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
}
}
@ -1154,7 +1170,7 @@ void Parse3CachedNFA(benchmark::State& state, const char* regexp,
Prog* prog = GetCachedProg(regexp);
absl::string_view sp[4]; // 4 because sp[0] is whole match.
for (auto _ : state) {
CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
ABSL_CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
Prog::kFullMatch, sp, 4));
}
}
@ -1162,20 +1178,22 @@ void Parse3CachedNFA(benchmark::State& state, const char* regexp,
void Parse3CachedOnePass(benchmark::State& state, const char* regexp,
absl::string_view text) {
Prog* prog = GetCachedProg(regexp);
CHECK(prog->IsOnePass());
ABSL_CHECK(prog->IsOnePass());
absl::string_view sp[4]; // 4 because sp[0] is whole match.
for (auto _ : state) {
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
ABSL_CHECK(prog->SearchOnePass(text, text, Prog::kAnchored,
Prog::kFullMatch, sp, 4));
}
}
void Parse3CachedBitState(benchmark::State& state, const char* regexp,
absl::string_view text) {
Prog* prog = GetCachedProg(regexp);
CHECK(prog->CanBitState());
ABSL_CHECK(prog->CanBitState());
absl::string_view sp[4]; // 4 because sp[0] is whole match.
for (auto _ : state) {
CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
ABSL_CHECK(prog->SearchBitState(text, text, Prog::kAnchored,
Prog::kFullMatch, sp, 4));
}
}
@ -1184,7 +1202,8 @@ void Parse3CachedBacktrack(benchmark::State& state, const char* regexp,
Prog* prog = GetCachedProg(regexp);
absl::string_view sp[4]; // 4 because sp[0] is whole match.
for (auto _ : state) {
CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
ABSL_CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored,
Prog::kFullMatch, sp, 4));
}
}
@ -1193,7 +1212,7 @@ void Parse3CachedPCRE(benchmark::State& state, const char* regexp,
PCRE& re = *GetCachedPCRE(regexp);
absl::string_view sp1, sp2, sp3;
for (auto _ : state) {
CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
ABSL_CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
}
}
@ -1202,7 +1221,7 @@ void Parse3CachedRE2(benchmark::State& state, const char* regexp,
RE2& re = *GetCachedRE2(regexp);
absl::string_view sp1, sp2, sp3;
for (auto _ : state) {
CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
ABSL_CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
}
}
@ -1213,12 +1232,12 @@ void Parse1NFA(benchmark::State& state, const char* regexp,
absl::string_view text) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
ABSL_CHECK(prog);
absl::string_view sp[2]; // 2 because sp[0] is whole match.
CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
Prog::kFullMatch, sp, 2));
ABSL_CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
Prog::kFullMatch, sp, 2));
delete prog;
re->Decref();
}
@ -1228,12 +1247,13 @@ void Parse1OnePass(benchmark::State& state, const char* regexp,
absl::string_view text) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
CHECK(prog->IsOnePass());
ABSL_CHECK(prog);
ABSL_CHECK(prog->IsOnePass());
absl::string_view sp[2]; // 2 because sp[0] is whole match.
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
ABSL_CHECK(prog->SearchOnePass(text, text, Prog::kAnchored,
Prog::kFullMatch, sp, 2));
delete prog;
re->Decref();
}
@ -1243,12 +1263,13 @@ void Parse1BitState(benchmark::State& state, const char* regexp,
absl::string_view text) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
ABSL_CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
CHECK(prog->CanBitState());
ABSL_CHECK(prog);
ABSL_CHECK(prog->CanBitState());
absl::string_view sp[2]; // 2 because sp[0] is whole match.
CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
ABSL_CHECK(prog->SearchBitState(text, text, Prog::kAnchored,
Prog::kFullMatch, sp, 2));
delete prog;
re->Decref();
}
@ -1258,9 +1279,9 @@ void Parse1PCRE(benchmark::State& state, const char* regexp,
absl::string_view text) {
for (auto _ : state) {
PCRE re(regexp, PCRE::UTF8);
CHECK_EQ(re.error(), "");
ABSL_CHECK_EQ(re.error(), "");
absl::string_view sp1;
CHECK(PCRE::FullMatch(text, re, &sp1));
ABSL_CHECK(PCRE::FullMatch(text, re, &sp1));
}
}
@ -1268,9 +1289,9 @@ void Parse1RE2(benchmark::State& state, const char* regexp,
absl::string_view text) {
for (auto _ : state) {
RE2 re(regexp);
CHECK_EQ(re.error(), "");
ABSL_CHECK_EQ(re.error(), "");
absl::string_view sp1;
CHECK(RE2::FullMatch(text, re, &sp1));
ABSL_CHECK(RE2::FullMatch(text, re, &sp1));
}
}
@ -1279,7 +1300,7 @@ void Parse1CachedNFA(benchmark::State& state, const char* regexp,
Prog* prog = GetCachedProg(regexp);
absl::string_view sp[2]; // 2 because sp[0] is whole match.
for (auto _ : state) {
CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
ABSL_CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
Prog::kFullMatch, sp, 2));
}
}
@ -1287,20 +1308,22 @@ void Parse1CachedNFA(benchmark::State& state, const char* regexp,
void Parse1CachedOnePass(benchmark::State& state, const char* regexp,
absl::string_view text) {
Prog* prog = GetCachedProg(regexp);
CHECK(prog->IsOnePass());
ABSL_CHECK(prog->IsOnePass());
absl::string_view sp[2]; // 2 because sp[0] is whole match.
for (auto _ : state) {
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
ABSL_CHECK(prog->SearchOnePass(text, text, Prog::kAnchored,
Prog::kFullMatch, sp, 2));
}
}
void Parse1CachedBitState(benchmark::State& state, const char* regexp,
absl::string_view text) {
Prog* prog = GetCachedProg(regexp);
CHECK(prog->CanBitState());
ABSL_CHECK(prog->CanBitState());
absl::string_view sp[2]; // 2 because sp[0] is whole match.
for (auto _ : state) {
CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
ABSL_CHECK(prog->SearchBitState(text, text, Prog::kAnchored,
Prog::kFullMatch, sp, 2));
}
}
@ -1309,7 +1332,8 @@ void Parse1CachedBacktrack(benchmark::State& state, const char* regexp,
Prog* prog = GetCachedProg(regexp);
absl::string_view sp[2]; // 2 because sp[0] is whole match.
for (auto _ : state) {
CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
ABSL_CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored,
Prog::kFullMatch, sp, 2));
}
}
@ -1318,7 +1342,7 @@ void Parse1CachedPCRE(benchmark::State& state, const char* regexp,
PCRE& re = *GetCachedPCRE(regexp);
absl::string_view sp1;
for (auto _ : state) {
CHECK(PCRE::FullMatch(text, re, &sp1));
ABSL_CHECK(PCRE::FullMatch(text, re, &sp1));
}
}
@ -1327,7 +1351,7 @@ void Parse1CachedRE2(benchmark::State& state, const char* regexp,
RE2& re = *GetCachedRE2(regexp);
absl::string_view sp1;
for (auto _ : state) {
CHECK(RE2::FullMatch(text, re, &sp1));
ABSL_CHECK(RE2::FullMatch(text, re, &sp1));
}
}
@ -1336,7 +1360,7 @@ void SearchParse2CachedPCRE(benchmark::State& state, const char* regexp,
PCRE& re = *GetCachedPCRE(regexp);
for (auto _ : state) {
absl::string_view sp1, sp2;
CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
ABSL_CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
}
}
@ -1345,7 +1369,7 @@ void SearchParse2CachedRE2(benchmark::State& state, const char* regexp,
RE2& re = *GetCachedRE2(regexp);
for (auto _ : state) {
absl::string_view sp1, sp2;
CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
ABSL_CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
}
}
@ -1354,7 +1378,7 @@ void SearchParse1CachedPCRE(benchmark::State& state, const char* regexp,
PCRE& re = *GetCachedPCRE(regexp);
for (auto _ : state) {
absl::string_view sp1;
CHECK(PCRE::PartialMatch(text, re, &sp1));
ABSL_CHECK(PCRE::PartialMatch(text, re, &sp1));
}
}
@ -1363,7 +1387,7 @@ void SearchParse1CachedRE2(benchmark::State& state, const char* regexp,
RE2& re = *GetCachedRE2(regexp);
for (auto _ : state) {
absl::string_view sp1;
CHECK(RE2::PartialMatch(text, re, &sp1));
ABSL_CHECK(RE2::PartialMatch(text, re, &sp1));
}
}
@ -1499,7 +1523,7 @@ void FullMatchPCRE(benchmark::State& state, const char *regexp) {
s += "ABCDEFGHIJ";
PCRE re(regexp);
for (auto _ : state) {
CHECK(PCRE::FullMatch(s, re));
ABSL_CHECK(PCRE::FullMatch(s, re));
}
state.SetBytesProcessed(state.iterations() * state.range(0));
}
@ -1509,19 +1533,31 @@ void FullMatchRE2(benchmark::State& state, const char *regexp) {
s += "ABCDEFGHIJ";
RE2 re(regexp, RE2::Latin1);
for (auto _ : state) {
CHECK(RE2::FullMatch(s, re));
ABSL_CHECK(RE2::FullMatch(s, re));
}
state.SetBytesProcessed(state.iterations() * state.range(0));
}
void FullMatch_DotStar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*"); }
void FullMatch_DotStar_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s).*"); }
void FullMatch_DotStar_CachedPCRE(benchmark::State& state) {
FullMatchPCRE(state, "(?s).*");
}
void FullMatch_DotStar_CachedRE2(benchmark::State& state) {
FullMatchRE2(state, "(?s).*");
}
void FullMatch_DotStarDollar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*$"); }
void FullMatch_DotStarDollar_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s).*$"); }
void FullMatch_DotStarDollar_CachedPCRE(benchmark::State& state) {
FullMatchPCRE(state, "(?s).*$");
}
void FullMatch_DotStarDollar_CachedRE2(benchmark::State& state) {
FullMatchRE2(state, "(?s).*$");
}
void FullMatch_DotStarCapture_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s)((.*)()()($))"); }
void FullMatch_DotStarCapture_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s)((.*)()()($))"); }
void FullMatch_DotStarCapture_CachedPCRE(benchmark::State& state) {
FullMatchPCRE(state, "(?s)((.*)()()($))");
}
void FullMatch_DotStarCapture_CachedRE2(benchmark::State& state) {
FullMatchRE2(state, "(?s)((.*)()()($))");
}
#ifdef USEPCRE
BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20);
@ -1544,7 +1580,7 @@ void PossibleMatchRangeCommon(benchmark::State& state, const char* regexp) {
std::string max;
const int kMaxLen = 16;
for (auto _ : state) {
CHECK(re.PossibleMatchRange(&min, &max, kMaxLen));
ABSL_CHECK(re.PossibleMatchRange(&min, &max, kMaxLen));
}
}

View File

@ -20,22 +20,26 @@
// Then RunPostfix turns each sequence into a regular expression
// and passes the regexp to HandleRegexp.
#include "re2/testing/regexp_generator.h"
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <memory>
#include <random>
#include <stack>
#include <string>
#include <vector>
#include "absl/base/macros.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/escaping.h"
#include "absl/strings/str_format.h"
#include "gtest/gtest.h"
#include "util/logging.h"
#include "absl/strings/string_view.h"
#include "util/utf.h"
#include "re2/testing/regexp_generator.h"
namespace re2 {
@ -196,13 +200,13 @@ void RegexpGenerator::RunPostfix(const std::vector<std::string>& post) {
for (size_t i = 0; i < post.size(); i++) {
switch (CountArgs(post[i])) {
default:
LOG(FATAL) << "Bad operator: " << post[i];
ABSL_LOG(FATAL) << "Bad operator: " << post[i];
case 0:
regexps.push(post[i]);
break;
case 1: {
auto fmt = absl::ParsedFormat<'s'>::New(post[i]);
CHECK(fmt != nullptr);
ABSL_CHECK(fmt != nullptr);
std::string a = regexps.top();
regexps.pop();
regexps.push("(?:" + absl::StrFormat(*fmt, a) + ")");
@ -210,7 +214,7 @@ void RegexpGenerator::RunPostfix(const std::vector<std::string>& post) {
}
case 2: {
auto fmt = absl::ParsedFormat<'s', 's'>::New(post[i]);
CHECK(fmt != nullptr);
ABSL_CHECK(fmt != nullptr);
std::string b = regexps.top();
regexps.pop();
std::string a = regexps.top();
@ -232,7 +236,7 @@ void RegexpGenerator::RunPostfix(const std::vector<std::string>& post) {
absl::PrintF(" %s\n", absl::CEscape(regexps.top()));
regexps.pop();
}
LOG(FATAL) << "Bad regexp program.";
ABSL_LOG(FATAL) << "Bad regexp program.";
}
HandleRegexp(regexps.top());

View File

@ -9,6 +9,7 @@
// regular expressions within given parameters (see below for details).
#include <stdint.h>
#include <random>
#include <string>
#include <vector>

View File

@ -4,14 +4,15 @@
// Test parse.cc, dump.cc, and tostring.cc.
#include "re2/regexp.h"
#include <stddef.h>
#include <map>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/regexp.h"
namespace re2 {
@ -53,8 +54,8 @@ TEST(Regexp, NamedCaptures) {
EXPECT_EQ(4, x->NumCaptures());
const std::map<std::string, int>* have = x->NamedCaptures();
EXPECT_TRUE(have != NULL);
EXPECT_EQ(2, have->size()); // there are only two named groups in
// the regexp: 'g1' and 'g2'.
// there are only two named groups in the regexp: 'g1' and 'g2'.
EXPECT_EQ(size_t{2}, have->size());
std::map<std::string, int> want;
want["g1"] = 1;
want["g2"] = 3;
@ -72,7 +73,7 @@ TEST(Regexp, CaptureNames) {
EXPECT_EQ(4, x->NumCaptures());
const std::map<int, std::string>* have = x->CaptureNames();
EXPECT_TRUE(have != NULL);
EXPECT_EQ(3, have->size());
EXPECT_EQ(size_t{3}, have->size());
std::map<int, std::string> want;
want[1] = "g1";
want[3] = "g2";

View File

@ -2,11 +2,12 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <stddef.h>
#include <string>
#include "absl/base/macros.h"
#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/prog.h"
#include "re2/regexp.h"

View File

@ -2,12 +2,15 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <stddef.h>
#include <string>
#include <vector>
#include "absl/base/macros.h"
#include "gtest/gtest.h"
#include "re2/prog.h"
#include "re2/regexp.h"
#include "re2/testing/tester.h"
#include "re2/testing/exhaustive_tester.h"
#include "re2/testing/tester.h"
// For target `log' in the Makefile.
#ifndef LOGGING

View File

@ -2,25 +2,30 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "re2/set.h"
#include <stddef.h>
#include <string>
#include <vector>
#include <utility>
#include <vector>
#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/re2.h"
#include "re2/set.h"
namespace re2 {
TEST(Set, Unanchored) {
RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
ASSERT_EQ(s.Size(), 0);
ASSERT_EQ(s.Add("foo", NULL), 0);
ASSERT_EQ(s.Size(), 1);
ASSERT_EQ(s.Add("(", NULL), -1);
ASSERT_EQ(s.Size(), 1);
ASSERT_EQ(s.Add("bar", NULL), 1);
ASSERT_EQ(s.Size(), 2);
ASSERT_EQ(s.Compile(), true);
ASSERT_EQ(s.Size(), 2);
ASSERT_EQ(s.Match("foobar", NULL), true);
ASSERT_EQ(s.Match("fooba", NULL), true);
@ -28,16 +33,16 @@ TEST(Set, Unanchored) {
std::vector<int> v;
ASSERT_EQ(s.Match("foobar", &v), true);
ASSERT_EQ(v.size(), 2);
ASSERT_EQ(v.size(), size_t{2});
ASSERT_EQ(v[0], 0);
ASSERT_EQ(v[1], 1);
ASSERT_EQ(s.Match("fooba", &v), true);
ASSERT_EQ(v.size(), 1);
ASSERT_EQ(v.size(), size_t{1});
ASSERT_EQ(v[0], 0);
ASSERT_EQ(s.Match("oobar", &v), true);
ASSERT_EQ(v.size(), 1);
ASSERT_EQ(v.size(), size_t{1});
ASSERT_EQ(v[0], 1);
}
@ -56,21 +61,21 @@ TEST(Set, UnanchoredFactored) {
std::vector<int> v;
ASSERT_EQ(s.Match("foobar", &v), true);
ASSERT_EQ(v.size(), 2);
ASSERT_EQ(v.size(), size_t{2});
ASSERT_EQ(v[0], 0);
ASSERT_EQ(v[1], 1);
ASSERT_EQ(s.Match("obarfoobaroo", &v), true);
ASSERT_EQ(v.size(), 2);
ASSERT_EQ(v.size(), size_t{2});
ASSERT_EQ(v[0], 0);
ASSERT_EQ(v[1], 1);
ASSERT_EQ(s.Match("fooba", &v), true);
ASSERT_EQ(v.size(), 1);
ASSERT_EQ(v.size(), size_t{1});
ASSERT_EQ(v[0], 0);
ASSERT_EQ(s.Match("oobar", &v), false);
ASSERT_EQ(v.size(), 0);
ASSERT_EQ(v.size(), size_t{0});
}
TEST(Set, UnanchoredDollar) {
@ -84,11 +89,11 @@ TEST(Set, UnanchoredDollar) {
std::vector<int> v;
ASSERT_EQ(s.Match("foo", &v), true);
ASSERT_EQ(v.size(), 1);
ASSERT_EQ(v.size(), size_t{1});
ASSERT_EQ(v[0], 0);
ASSERT_EQ(s.Match("foobar", &v), false);
ASSERT_EQ(v.size(), 0);
ASSERT_EQ(v.size(), size_t{0});
}
TEST(Set, UnanchoredWordBoundary) {
@ -103,14 +108,14 @@ TEST(Set, UnanchoredWordBoundary) {
std::vector<int> v;
ASSERT_EQ(s.Match("foo", &v), true);
ASSERT_EQ(v.size(), 1);
ASSERT_EQ(v.size(), size_t{1});
ASSERT_EQ(v[0], 0);
ASSERT_EQ(s.Match("foobar", &v), false);
ASSERT_EQ(v.size(), 0);
ASSERT_EQ(v.size(), size_t{0});
ASSERT_EQ(s.Match("foo bar", &v), true);
ASSERT_EQ(v.size(), 1);
ASSERT_EQ(v.size(), size_t{1});
ASSERT_EQ(v[0], 0);
}
@ -130,20 +135,20 @@ TEST(Set, Anchored) {
std::vector<int> v;
ASSERT_EQ(s.Match("foobar", &v), false);
ASSERT_EQ(v.size(), 0);
ASSERT_EQ(v.size(), size_t{0});
ASSERT_EQ(s.Match("fooba", &v), false);
ASSERT_EQ(v.size(), 0);
ASSERT_EQ(v.size(), size_t{0});
ASSERT_EQ(s.Match("oobar", &v), false);
ASSERT_EQ(v.size(), 0);
ASSERT_EQ(v.size(), size_t{0});
ASSERT_EQ(s.Match("foo", &v), true);
ASSERT_EQ(v.size(), 1);
ASSERT_EQ(v.size(), size_t{1});
ASSERT_EQ(v[0], 0);
ASSERT_EQ(s.Match("bar", &v), true);
ASSERT_EQ(v.size(), 1);
ASSERT_EQ(v.size(), size_t{1});
ASSERT_EQ(v[0], 1);
}
@ -157,10 +162,10 @@ TEST(Set, EmptyUnanchored) {
std::vector<int> v;
ASSERT_EQ(s.Match("", &v), false);
ASSERT_EQ(v.size(), 0);
ASSERT_EQ(v.size(), size_t{0});
ASSERT_EQ(s.Match("foobar", &v), false);
ASSERT_EQ(v.size(), 0);
ASSERT_EQ(v.size(), size_t{0});
}
TEST(Set, EmptyAnchored) {
@ -173,10 +178,10 @@ TEST(Set, EmptyAnchored) {
std::vector<int> v;
ASSERT_EQ(s.Match("", &v), false);
ASSERT_EQ(v.size(), 0);
ASSERT_EQ(v.size(), size_t{0});
ASSERT_EQ(s.Match("foobar", &v), false);
ASSERT_EQ(v.size(), 0);
ASSERT_EQ(v.size(), size_t{0});
}
TEST(Set, Prefix) {
@ -191,14 +196,14 @@ TEST(Set, Prefix) {
std::vector<int> v;
ASSERT_EQ(s.Match("/prefix", &v), false);
ASSERT_EQ(v.size(), 0);
ASSERT_EQ(v.size(), size_t{0});
ASSERT_EQ(s.Match("/prefix/", &v), true);
ASSERT_EQ(v.size(), 1);
ASSERT_EQ(v.size(), size_t{1});
ASSERT_EQ(v[0], 0);
ASSERT_EQ(s.Match("/prefix/42", &v), true);
ASSERT_EQ(v.size(), 1);
ASSERT_EQ(v.size(), size_t{1});
ASSERT_EQ(v[0], 0);
}

View File

@ -5,11 +5,10 @@
// Test simplify.cc.
#include <string.h>
#include <string>
#include "absl/base/macros.h"
#include "absl/log/absl_log.h"
#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/regexp.h"
namespace re2 {
@ -264,7 +263,7 @@ static Test tests[] = {
TEST(TestSimplify, SimpleRegexps) {
for (size_t i = 0; i < ABSL_ARRAYSIZE(tests); i++) {
RegexpStatus status;
VLOG(1) << "Testing " << tests[i].regexp;
ABSL_VLOG(1) << "Testing " << tests[i].regexp;
Regexp* re = Regexp::Parse(tests[i].regexp,
Regexp::MatchNL | (Regexp::LikePerl &
~Regexp::OneLine),

View File

@ -6,14 +6,17 @@
// maxlen letters using the set of letters in alpha.
// Fetch strings using a Java-like Next()/HasNext() interface.
#include "re2/testing/string_generator.h"
#include <stddef.h>
#include <stdint.h>
#include <random>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/testing/string_generator.h"
#include "absl/log/absl_check.h"
#include "absl/strings/string_view.h"
namespace re2 {
@ -82,7 +85,7 @@ bool StringGenerator::RandomDigits() {
// after computing the string, so that it knows the answer
// for subsequent HasNext() calls.
absl::string_view StringGenerator::Next() {
CHECK(hasnext_);
ABSL_CHECK(hasnext_);
if (generate_null_) {
generate_null_ = false;
sp_ = absl::string_view();
@ -112,8 +115,8 @@ void StringGenerator::GenerateNULL() {
}
std::string DeBruijnString(int n) {
CHECK_GE(n, 1);
CHECK_LE(n, 29);
ABSL_CHECK_GE(n, 1);
ABSL_CHECK_LE(n, 29);
const size_t size = size_t{1} << static_cast<size_t>(n);
const size_t mask = size - 1;
std::vector<bool> did(size, false);
@ -131,10 +134,10 @@ std::string DeBruijnString(int n) {
} else {
s += '0';
}
CHECK(!did[bits]);
ABSL_CHECK(!did[bits]);
did[bits] = true;
}
CHECK_EQ(s.size(), static_cast<size_t>(n - 1) + size);
ABSL_CHECK_EQ(s.size(), static_cast<size_t>(n - 1) + size);
return s;
}

View File

@ -10,6 +10,7 @@
// Fetch strings using a Java-like Next()/HasNext() interface.
#include <stdint.h>
#include <random>
#include <string>
#include <vector>

View File

@ -4,13 +4,17 @@
// Test StringGenerator.
#include "re2/testing/string_generator.h"
#include <stddef.h>
#include <stdint.h>
#include <string>
#include "absl/strings/string_view.h"
#include "gtest/gtest.h"
#include "util/utf.h"
#include "re2/testing/string_generator.h"
#include "re2/testing/regexp_generator.h"
#include "util/utf.h"
namespace re2 {
@ -43,7 +47,7 @@ static void RunTest(int len, const std::string& alphabet, bool donull) {
EXPECT_TRUE(g.HasNext());
absl::string_view sp = g.Next();
EXPECT_EQ(sp.data(), static_cast<const char*>(NULL));
EXPECT_EQ(sp.size(), 0);
EXPECT_EQ(sp.size(), size_t{0});
}
while (g.HasNext()) {

View File

@ -4,20 +4,25 @@
// Regular expression engine tester -- test all the implementations against each other.
#include "re2/testing/tester.h"
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <string>
#include "absl/base/macros.h"
#include "absl/flags/flag.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/escaping.h"
#include "absl/strings/str_format.h"
#include "util/logging.h"
#include "re2/testing/tester.h"
#include "absl/strings/string_view.h"
#include "re2/prog.h"
#include "re2/re2.h"
#include "re2/regexp.h"
#include "util/pcre.h"
ABSL_FLAG(bool, dump_prog, false, "dump regexp program");
ABSL_FLAG(bool, log_okay, false, "log successful runs");
@ -50,9 +55,9 @@ const char* engine_names[kEngineMax] = {
// Returns the name of the engine.
static const char* EngineName(Engine e) {
CHECK_GE(e, 0);
CHECK_LT(e, ABSL_ARRAYSIZE(engine_names));
CHECK(engine_names[e] != NULL);
ABSL_CHECK_GE(e, 0);
ABSL_CHECK_LT(e, ABSL_ARRAYSIZE(engine_names));
ABSL_CHECK(engine_names[e] != NULL);
return engine_names[e];
}
@ -73,12 +78,12 @@ static uint32_t Engines() {
}
if (cached_engines == 0)
LOG(INFO) << "Warning: no engines enabled.";
ABSL_LOG(INFO) << "Warning: no engines enabled.";
if (!UsingPCRE)
cached_engines &= ~(1<<kEnginePCRE);
for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++) {
if (cached_engines & (1<<i))
LOG(INFO) << EngineName(i) << " enabled";
ABSL_LOG(INFO) << EngineName(i) << " enabled";
}
did_parse = true;
@ -196,45 +201,46 @@ TestInstance::TestInstance(absl::string_view regexp_str, Prog::MatchKind kind,
re_(NULL),
re2_(NULL) {
VLOG(1) << absl::CEscape(regexp_str);
ABSL_VLOG(1) << absl::CEscape(regexp_str);
// Compile regexp to prog.
// Always required - needed for backtracking (reference implementation).
RegexpStatus status;
regexp_ = Regexp::Parse(regexp_str, flags, &status);
if (regexp_ == NULL) {
LOG(INFO) << "Cannot parse: " << absl::CEscape(regexp_str_)
<< " mode: " << FormatMode(flags);
ABSL_LOG(INFO) << "Cannot parse: " << absl::CEscape(regexp_str_)
<< " mode: " << FormatMode(flags);
error_ = true;
return;
}
num_captures_ = regexp_->NumCaptures();
prog_ = regexp_->CompileToProg(0);
if (prog_ == NULL) {
LOG(INFO) << "Cannot compile: " << absl::CEscape(regexp_str_);
ABSL_LOG(INFO) << "Cannot compile: " << absl::CEscape(regexp_str_);
error_ = true;
return;
}
if (absl::GetFlag(FLAGS_dump_prog)) {
LOG(INFO) << "Prog for "
<< " regexp "
<< absl::CEscape(regexp_str_)
<< " (" << FormatKind(kind_)
<< ", " << FormatMode(flags_)
<< ")\n"
<< prog_->Dump();
ABSL_LOG(INFO) << "Prog for "
<< " regexp "
<< absl::CEscape(regexp_str_)
<< " (" << FormatKind(kind_)
<< ", " << FormatMode(flags_)
<< ")\n"
<< prog_->Dump();
}
// Compile regexp to reversed prog. Only needed for DFA engines.
if (Engines() & ((1<<kEngineDFA)|(1<<kEngineDFA1))) {
rprog_ = regexp_->CompileToReverseProg(0);
if (rprog_ == NULL) {
LOG(INFO) << "Cannot reverse compile: " << absl::CEscape(regexp_str_);
ABSL_LOG(INFO) << "Cannot reverse compile: "
<< absl::CEscape(regexp_str_);
error_ = true;
return;
}
if (absl::GetFlag(FLAGS_dump_rprog))
LOG(INFO) << rprog_->Dump();
ABSL_LOG(INFO) << rprog_->Dump();
}
// Create re string that will be used for RE and RE2.
@ -257,7 +263,7 @@ TestInstance::TestInstance(absl::string_view regexp_str, Prog::MatchKind kind,
options.set_longest_match(true);
re2_ = new RE2(re, options);
if (!re2_->error().empty()) {
LOG(INFO) << "Cannot RE2: " << absl::CEscape(re);
ABSL_LOG(INFO) << "Cannot RE2: " << absl::CEscape(re);
error_ = true;
return;
}
@ -283,7 +289,7 @@ TestInstance::TestInstance(absl::string_view regexp_str, Prog::MatchKind kind,
// add one more layer of parens.
re_ = new PCRE("("+re+")", o);
if (!re_->error().empty()) {
LOG(INFO) << "Cannot PCRE: " << absl::CEscape(re);
ABSL_LOG(INFO) << "Cannot PCRE: " << absl::CEscape(re);
error_ = true;
return;
}
@ -318,7 +324,7 @@ void TestInstance::RunSearch(Engine type, absl::string_view orig_text,
switch (type) {
default:
LOG(FATAL) << "Bad RunSearch type: " << (int)type;
ABSL_LOG(FATAL) << "Bad RunSearch type: " << (int)type;
case kEngineBacktrack:
if (prog_ == NULL) {
@ -366,9 +372,9 @@ void TestInstance::RunSearch(Engine type, absl::string_view orig_text,
Prog::kAnchored, Prog::kLongestMatch,
result->submatch,
&result->skipped, NULL)) {
LOG(ERROR) << "Reverse DFA inconsistency: "
<< absl::CEscape(regexp_str_)
<< " on " << absl::CEscape(text);
ABSL_LOG(ERROR) << "Reverse DFA inconsistency: "
<< absl::CEscape(regexp_str_)
<< " on " << absl::CEscape(text);
result->matched = false;
}
}
@ -520,16 +526,16 @@ bool TestInstance::RunCase(absl::string_view text, absl::string_view context,
if (correct.skipped) {
if (regexp_ == NULL)
return true;
LOG(ERROR) << "Skipped backtracking! " << absl::CEscape(regexp_str_)
<< " " << FormatMode(flags_);
ABSL_LOG(ERROR) << "Skipped backtracking! " << absl::CEscape(regexp_str_)
<< " " << FormatMode(flags_);
return false;
}
VLOG(1) << "Try: regexp " << absl::CEscape(regexp_str_)
<< " text " << absl::CEscape(text)
<< " (" << FormatKind(kind_)
<< ", " << FormatAnchor(anchor)
<< ", " << FormatMode(flags_)
<< ")";
ABSL_VLOG(1) << "Try: regexp " << absl::CEscape(regexp_str_)
<< " text " << absl::CEscape(text)
<< " (" << FormatKind(kind_)
<< ", " << FormatAnchor(anchor)
<< ", " << FormatMode(flags_)
<< ")";
// Compare the others.
bool all_okay = true;
@ -560,22 +566,22 @@ bool TestInstance::RunCase(absl::string_view text, absl::string_view context,
context, anchor);
if (r.matched != correct.matched) {
if (r.matched) {
LOG(INFO) << " Should not match (but does).";
ABSL_LOG(INFO) << " Should not match (but does).";
} else {
LOG(INFO) << " Should match (but does not).";
ABSL_LOG(INFO) << " Should match (but does not).";
continue;
}
}
for (int i = 0; i < 1+num_captures_; i++) {
if (r.submatch[i].data() != correct.submatch[i].data() ||
r.submatch[i].size() != correct.submatch[i].size()) {
LOG(INFO) <<
ABSL_LOG(INFO) <<
absl::StrFormat(" $%d: should be %s is %s",
i,
FormatCapture(text, correct.submatch[i]),
FormatCapture(text, r.submatch[i]));
} else {
LOG(INFO) <<
ABSL_LOG(INFO) <<
absl::StrFormat(" $%d: %s ok", i,
FormatCapture(text, r.submatch[i]));
}
@ -587,7 +593,7 @@ bool TestInstance::RunCase(absl::string_view text, absl::string_view context,
// and that is desirable because we want to enforce a global limit.
static int max_regexp_failures = absl::GetFlag(FLAGS_max_regexp_failures);
if (max_regexp_failures > 0 && --max_regexp_failures == 0)
LOG(QFATAL) << "Too many regexp failures.";
ABSL_LOG(QFATAL) << "Too many regexp failures.";
}
return all_okay;
@ -596,7 +602,7 @@ bool TestInstance::RunCase(absl::string_view text, absl::string_view context,
void TestInstance::LogMatch(const char* prefix, Engine e,
absl::string_view text, absl::string_view context,
Prog::Anchor anchor) {
LOG(INFO) << prefix
ABSL_LOG(INFO) << prefix
<< EngineName(e)
<< " regexp "
<< absl::CEscape(regexp_str_)

View File

@ -12,8 +12,8 @@
#include "absl/strings/string_view.h"
#include "re2/prog.h"
#include "re2/regexp.h"
#include "re2/re2.h"
#include "re2/regexp.h"
#include "util/pcre.h"
namespace re2 {

View File

@ -6,13 +6,14 @@
// Tested by parse_test.cc
#include <string.h>
#include <string>
#include "absl/log/absl_log.h"
#include "absl/strings/str_format.h"
#include "util/logging.h"
#include "util/utf.h"
#include "re2/regexp.h"
#include "re2/walker-inl.h"
#include "util/utf.h"
namespace re2 {
@ -101,7 +102,7 @@ int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) {
case kRegexpCapture:
t_->append("(");
if (re->cap() == 0)
LOG(DFATAL) << "kRegexpCapture cap() == 0";
ABSL_LOG(DFATAL) << "kRegexpCapture cap() == 0";
if (re->name()) {
t_->append("?P<");
t_->append(*re->name());
@ -184,7 +185,7 @@ int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
if ((*t_)[t_->size()-1] == '|')
t_->erase(t_->size()-1);
else
LOG(DFATAL) << "Bad final char: " << t_;
ABSL_LOG(DFATAL) << "Bad final char: " << t_;
if (prec < PrecAlternate)
t_->append(")");
break;

View File

@ -75,25 +75,6 @@ def _URange(s):
raise InputError("invalid Unicode range %s" % (s,))
def _UStr(v):
"""Converts Unicode code point to hex string.
0x263a => '0x263A'.
Args:
v: code point to convert
Returns:
Unicode string
Raises:
InputError: the argument is not a valid Unicode value.
"""
if v < 0 or v > _RUNE_MAX:
raise InputError("invalid Unicode value %s" % (v,))
return "0x%04X" % (v,)
def _ParseContinue(s):
"""Parses a Unicode continuation field.

View File

@ -16,7 +16,8 @@
#include <stack>
#include "absl/base/macros.h"
#include "util/logging.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "re2/regexp.h"
namespace re2 {
@ -147,7 +148,7 @@ template<typename T> Regexp::Walker<T>::~Walker() {
// Logs DFATAL if stack is not already clear.
template<typename T> void Regexp::Walker<T>::Reset() {
if (!stack_.empty()) {
LOG(DFATAL) << "Stack not empty.";
ABSL_LOG(DFATAL) << "Stack not empty.";
while (!stack_.empty()) {
if (stack_.top().re->nsub_ > 1)
delete[] stack_.top().child_args;
@ -161,7 +162,7 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
Reset();
if (re == NULL) {
LOG(DFATAL) << "Walk NULL";
ABSL_LOG(DFATAL) << "Walk NULL";
return top_arg;
}
@ -191,7 +192,7 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
s->child_args = &s->child_arg;
else if (re->nsub_ > 1)
s->child_args = new T[re->nsub_];
ABSL_FALLTHROUGH_INTENDED;
[[fallthrough]];
}
default: {
if (re->nsub_ > 0) {

View File

@ -1,28 +0,0 @@
# Copyright 2022 The RE2 Authors. All Rights Reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
@PACKAGE_INIT@
include(CMakeFindDependencyMacro)
set_and_check(re2_INCLUDE_DIR ${PACKAGE_PREFIX_DIR}/@CMAKE_INSTALL_INCLUDEDIR@)
if(UNIX)
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_dependency(Threads REQUIRED)
endif()
find_dependency(absl REQUIRED)
if(@RE2_USE_ICU@)
find_dependency(ICU REQUIRED COMPONENTS uc)
endif()
check_required_components(re2)
if(TARGET re2::re2)
return()
endif()
include(${CMAKE_CURRENT_LIST_DIR}/re2Targets.cmake)

View File

@ -1,33 +0,0 @@
#!/usr/bin/env sh
# System Integrity Protection on Darwin complicated these matters somewhat.
# See https://github.com/google/re2/issues/175 for details.
if [ "x$1" = "x-shared-library-path" ]; then
if [ "x$(uname)" = "xDarwin" ]; then
DYLD_LIBRARY_PATH="$2:$DYLD_LIBRARY_PATH"
export DYLD_LIBRARY_PATH
else
LD_LIBRARY_PATH="$2:$LD_LIBRARY_PATH"
export LD_LIBRARY_PATH
fi
shift 2
fi
success=true
for i; do
printf "%-40s" $i
if $($i >$i.log 2>&1) 2>/dev/null; then
echo PASS
else
echo FAIL';' output in $i.log
success=false
fi
done
if $success; then
echo 'ALL TESTS PASSED.'
exit 0
else
echo 'TESTS FAILED.'
exit 1
fi

View File

@ -1,27 +0,0 @@
// Copyright 2008 The RE2 Authors. All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <stdio.h>
#include <re2/filtered_re2.h>
#include <re2/re2.h>
int main() {
re2::FilteredRE2 f;
int id;
f.Add("a.*b.*c", RE2::DefaultOptions, &id);
std::vector<std::string> v;
f.Compile(&v);
std::vector<int> ids;
f.FirstMatch("abbccc", ids);
int n;
if (RE2::FullMatch("axbyc", "a.*b.*c") &&
RE2::PartialMatch("foo123bar", "(\\d+)", &n) && n == 123) {
printf("PASS\n");
return 0;
}
printf("FAIL\n");
return 2;
}

View File

@ -1,567 +0,0 @@
This is a dump from Google's source control system of the change
that removed UCS-2 support from RE2. As the explanation below
says, UCS-2 mode is fundamentally at odds with things like ^ and $,
so it never really worked very well. But if you are interested in using
it without those operators, it did work for that. It assumed that the
UCS-2 data was in the native host byte order.
If you are interested in adding UCS-2 mode back, this patch might
be a good starting point.
Change 12780686 by rsc@rsc-re2 on 2009/09/16 15:30:15
Retire UCS-2 mode.
I added it as an experiment for V8, but it
requires 2-byte lookahead to do completely,
and RE2 has 1-byte lookahead (enough for UTF-8)
as a fairly deep fundamental assumption,
so it did not support ^ or $.
==== re2/bitstate.cc#2 - re2/bitstate.cc#3 ====
re2/bitstate.cc#2:314,321 - re2/bitstate.cc#3:314,319
cap_[0] = p;
if (TrySearch(prog_->start(), p)) // Match must be leftmost; done.
return true;
- if (prog_->flags() & Regexp::UCS2)
- p++;
}
return false;
}
==== re2/compile.cc#17 - re2/compile.cc#18 ====
re2/compile.cc#17:95,101 - re2/compile.cc#18:95,100
// Input encodings.
enum Encoding {
kEncodingUTF8 = 1, // UTF-8 (0-10FFFF)
- kEncodingUCS2, // UCS-2 (0-FFFF), native byte order
kEncodingLatin1, // Latin1 (0-FF)
};
re2/compile.cc#17:168,176 - re2/compile.cc#18:167,172
void AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase);
void AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase);
void Add_80_10ffff();
- void AddRuneRangeUCS2(Rune lo, Rune hi, bool foldcase);
- void AddUCS2Pair(uint8 lo1, uint8 hi1, bool fold1,
- uint8 lo2, uint8 hi2, bool fold2);
// New suffix that matches the byte range lo-hi, then goes to next.
Inst* RuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, Inst* next);
re2/compile.cc#17:475,481 - re2/compile.cc#18:471,477
// Converts rune range lo-hi into a fragment that recognizes
// the bytes that would make up those runes in the current
- // encoding (Latin 1, UTF-8, or UCS-2).
+ // encoding (Latin 1 or UTF-8).
// This lets the machine work byte-by-byte even when
// using multibyte encodings.
re2/compile.cc#17:488,496 - re2/compile.cc#18:484,489
case kEncodingLatin1:
AddRuneRangeLatin1(lo, hi, foldcase);
break;
- case kEncodingUCS2:
- AddRuneRangeUCS2(lo, hi, foldcase);
- break;
}
}
re2/compile.cc#17:503,581 - re2/compile.cc#18:496,501
AddSuffix(RuneByteSuffix(lo, hi, foldcase, NULL));
}
- // Test whether 16-bit values are big or little endian.
- static bool BigEndian() {
- union {
- char byte[2];
- int16 endian;
- } u;
-
- u.byte[0] = 1;
- u.byte[1] = 2;
- return u.endian == 0x0102;
- }
-
- void Compiler::AddUCS2Pair(uint8 lo1, uint8 hi1, bool fold1,
- uint8 lo2, uint8 hi2, bool fold2) {
- Inst* ip;
- if (reversed_) {
- ip = RuneByteSuffix(lo1, hi1, fold1, NULL);
- ip = RuneByteSuffix(lo2, hi2, fold2, ip);
- } else {
- ip = RuneByteSuffix(lo2, hi2, fold2, NULL);
- ip = RuneByteSuffix(lo1, hi1, fold1, ip);
- }
- AddSuffix(ip);
- }
-
- void Compiler::AddRuneRangeUCS2(Rune lo, Rune hi, bool foldcase) {
- if (lo > hi || lo > 0xFFFF)
- return;
- if (hi > 0xFFFF)
- hi = 0xFFFF;
-
- // We'll assemble a pattern assuming big endian.
- // If the machine isn't, tell Cat to reverse its arguments.
- bool oldreversed = reversed_;
- if (!BigEndian()) {
- reversed_ = !oldreversed;
- }
-
- // Split into bytes.
- int lo1 = lo >> 8;
- int lo2 = lo & 0xFF;
- int hi1 = hi >> 8;
- int hi2 = hi & 0xFF;
-
- if (lo1 == hi1) {
- // Easy case: high bits are same in both.
- // Only do ASCII case folding on the second byte if the top byte is 00.
- AddUCS2Pair(lo1, lo1, false, lo2, hi2, lo1==0 && foldcase);
- } else {
- // Harder case: different second byte ranges depending on first byte.
-
- // Initial fragment.
- if (lo2 > 0) {
- AddUCS2Pair(lo1, lo1, false, lo2, 0xFF, lo1==0 && foldcase);
- lo1++;
- }
-
- // Trailing fragment.
- if (hi2 < 0xFF) {
- AddUCS2Pair(hi1, hi1, false, 0, hi2, false);
- hi1--;
- }
-
- // Inner ranges.
- if (lo1 <= hi1) {
- AddUCS2Pair(lo1, hi1, false, 0, 0xFF, false);
- }
- }
-
- // Restore reverse setting.
- reversed_ = oldreversed;
- }
-
// Table describing how to make a UTF-8 matching machine
// for the rune range 80-10FFFF (Runeself-Runemax).
// This range happens frequently enough (for example /./ and /[^a-z]/)
re2/compile.cc#17:707,716 - re2/compile.cc#18:627,634
Frag Compiler::Literal(Rune r, bool foldcase) {
switch (encoding_) {
- default: // UCS-2 or something new
- BeginRange();
- AddRuneRange(r, r, foldcase);
- return EndRange();
+ default:
+ return kNullFrag;
case kEncodingLatin1:
return ByteRange(r, r, foldcase);
re2/compile.cc#17:927,934 - re2/compile.cc#18:845,850
if (re->parse_flags() & Regexp::Latin1)
c.encoding_ = kEncodingLatin1;
- else if (re->parse_flags() & Regexp::UCS2)
- c.encoding_ = kEncodingUCS2;
c.reversed_ = reversed;
if (max_mem <= 0) {
c.max_inst_ = 100000; // more than enough
re2/compile.cc#17:983,993 - re2/compile.cc#18:899,905
c.prog_->set_start_unanchored(c.prog_->start());
} else {
Frag dot;
- if (c.encoding_ == kEncodingUCS2) {
- dot = c.Cat(c.ByteRange(0x00, 0xFF, false), c.ByteRange(0x00, 0xFF, false));
- } else {
- dot = c.ByteRange(0x00, 0xFF, false);
- }
+ dot = c.ByteRange(0x00, 0xFF, false);
Frag dotloop = c.Star(dot, true);
Frag unanchored = c.Cat(dotloop, all);
c.prog_->set_start_unanchored(unanchored.begin);
==== re2/nfa.cc#8 - re2/nfa.cc#9 ====
re2/nfa.cc#8:426,432 - re2/nfa.cc#9:426,431
const char* bp = context.begin();
int c = -1;
int wasword = 0;
- bool ucs2 = prog_->flags() & Regexp::UCS2;
if (text.begin() > context.begin()) {
c = text.begin()[-1] & 0xFF;
re2/nfa.cc#8:492,498 - re2/nfa.cc#9:491,497
// If there's a required first byte for an unanchored search
// and we're not in the middle of any possible matches,
// use memchr to search for the byte quickly.
- if (!ucs2 && !anchored && first_byte_ >= 0 && runq->size() == 0 &&
+ if (!anchored && first_byte_ >= 0 && runq->size() == 0 &&
p < text.end() && (p[0] & 0xFF) != first_byte_) {
p = reinterpret_cast<const char*>(memchr(p, first_byte_,
text.end() - p));
re2/nfa.cc#8:505,526 - re2/nfa.cc#9:504,514
flag = Prog::EmptyFlags(context, p);
}
- // In UCS-2 mode, if we need to start a new thread,
- // make sure to do it on an even boundary.
- if(ucs2 && runq->size() == 0 &&
- (p - context.begin()) % 2 && p < text.end()) {
- p++;
- flag = Prog::EmptyFlags(context, p);
- }
-
// Steal match storage (cleared but unused as of yet)
// temporarily to hold match boundaries for new thread.
- // In UCS-2 mode, only start the thread on a 2-byte boundary.
- if(!ucs2 || (p - context.begin()) % 2 == 0) {
- match_[0] = p;
- AddToThreadq(runq, start_, flag, p, match_);
- match_[0] = NULL;
- }
+ match_[0] = p;
+ AddToThreadq(runq, start_, flag, p, match_);
+ match_[0] = NULL;
}
// If all the threads have died, stop early.
==== re2/parse.cc#22 - re2/parse.cc#23 ====
re2/parse.cc#22:160,167 - re2/parse.cc#23:160,165
status_(status), stacktop_(NULL), ncap_(0) {
if (flags_ & Latin1)
rune_max_ = 0xFF;
- else if (flags & UCS2)
- rune_max_ = 0xFFFF;
else
rune_max_ = Runemax;
}
re2/parse.cc#22:365,387 - re2/parse.cc#23:363,374
bool Regexp::ParseState::PushCarat() {
if (flags_ & OneLine) {
return PushSimpleOp(kRegexpBeginText);
- } else {
- if (flags_ & UCS2) {
- status_->set_code(kRegexpUnsupported);
- status_->set_error_arg("multiline ^ in UCS-2 mode");
- return false;
- }
- return PushSimpleOp(kRegexpBeginLine);
}
+ return PushSimpleOp(kRegexpBeginLine);
}
// Pushes a \b or \B onto the stack.
bool Regexp::ParseState::PushWordBoundary(bool word) {
- if (flags_ & UCS2) {
- status_->set_code(kRegexpUnsupported);
- status_->set_error_arg("\\b or \\B in UCS-2 mode");
- return false;
- }
if (word)
return PushSimpleOp(kRegexpWordBoundary);
return PushSimpleOp(kRegexpNoWordBoundary);
re2/parse.cc#22:397,407 - re2/parse.cc#23:384,389
bool ret = PushSimpleOp(kRegexpEndText);
flags_ = oflags;
return ret;
- }
- if (flags_ & UCS2) {
- status_->set_code(kRegexpUnsupported);
- status_->set_error_arg("multiline $ in UCS-2 mode");
- return false;
}
return PushSimpleOp(kRegexpEndLine);
}
==== re2/re2.cc#34 - re2/re2.cc#35 ====
re2/re2.cc#34:79,86 - re2/re2.cc#35:79,84
return RE2::ErrorBadUTF8;
case re2::kRegexpBadNamedCapture:
return RE2::ErrorBadNamedCapture;
- case re2::kRegexpUnsupported:
- return RE2::ErrorUnsupported;
}
return RE2::ErrorInternal;
}
re2/re2.cc#34:122,130 - re2/re2.cc#35:120,125
break;
case RE2::Options::EncodingLatin1:
flags |= Regexp::Latin1;
- break;
- case RE2::Options::EncodingUCS2:
- flags |= Regexp::UCS2;
break;
}
==== re2/re2.h#36 - re2/re2.h#37 ====
re2/re2.h#36:246,252 - re2/re2.h#37:246,251
ErrorBadUTF8, // invalid UTF-8 in regexp
ErrorBadNamedCapture, // bad named capture group
ErrorPatternTooLarge, // pattern too large (compile failed)
- ErrorUnsupported, // unsupported feature (in UCS-2 mode)
};
// Predefined common options.
re2/re2.h#36:570,576 - re2/re2.h#37:569,574
enum Encoding {
EncodingUTF8 = 1,
- EncodingUCS2, // 16-bit Unicode 0-FFFF only
EncodingLatin1
};
==== re2/regexp.cc#15 - re2/regexp.cc#16 ====
re2/regexp.cc#15:324,333 - re2/regexp.cc#16:324,329
// the regexp that remains after the prefix. The prefix might
// be ASCII case-insensitive.
bool Regexp::RequiredPrefix(string *prefix, bool *foldcase, Regexp** suffix) {
- // Don't even bother for UCS-2; it's time to throw that code away.
- if (parse_flags_ & UCS2)
- return false;
-
// No need for a walker: the regexp must be of the form
// 1. some number of ^ anchors
// 2. a literal char or string
==== re2/regexp.h#20 - re2/regexp.h#21 ====
re2/regexp.h#20:187,193 - re2/regexp.h#21:187,192
kRegexpBadPerlOp, // bad perl operator
kRegexpBadUTF8, // invalid UTF-8 in regexp
kRegexpBadNamedCapture, // bad named capture
- kRegexpUnsupported, // unsupported operator
};
// Error status for certain operations.
re2/regexp.h#20:307,316 - re2/regexp.h#21:306,314
// \Q and \E to disable/enable metacharacters
// (?P<name>expr) for named captures
// \C to match any single byte
- UCS2 = 1<<10, // Text is in UCS-2, regexp is in UTF-8.
- UnicodeGroups = 1<<11, // Allow \p{Han} for Unicode Han group
+ UnicodeGroups = 1<<10, // Allow \p{Han} for Unicode Han group
// and \P{Han} for its negation.
- NeverNL = 1<<12, // Never match NL, even if the regexp mentions
+ NeverNL = 1<<11, // Never match NL, even if the regexp mentions
// it explicitly.
// As close to Perl as we can get.
==== re2/testing/backtrack.cc#4 - re2/testing/backtrack.cc#5 ====
re2/testing/backtrack.cc#4:134,141 - re2/testing/backtrack.cc#5:134,139
cap_[0] = p;
if (Visit(prog_->start(), p)) // Match must be leftmost; done.
return true;
- if (prog_->flags() & Regexp::UCS2)
- p++;
}
return false;
}
==== re2/testing/tester.cc#12 - re2/testing/tester.cc#13 ====
re2/testing/tester.cc#12:144,154 - re2/testing/tester.cc#13:144,152
static ParseMode parse_modes[] = {
{ single_line, "single-line" },
{ single_line|Regexp::Latin1, "single-line, latin1" },
- { single_line|Regexp::UCS2, "single-line, ucs2" },
{ multi_line, "multiline" },
{ multi_line|Regexp::NonGreedy, "multiline, nongreedy" },
{ multi_line|Regexp::Latin1, "multiline, latin1" },
- { multi_line|Regexp::UCS2, "multiline, ucs2" },
};
static string FormatMode(Regexp::ParseFlags flags) {
re2/testing/tester.cc#12:179,189 - re2/testing/tester.cc#13:177,185
RegexpStatus status;
regexp_ = Regexp::Parse(regexp_str, flags, &status);
if (regexp_ == NULL) {
- if (status.code() != kRegexpUnsupported) {
- LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_)
- << " mode: " << FormatMode(flags);
- error_ = true;
- }
+ LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_)
+ << " mode: " << FormatMode(flags);
+ error_ = true;
return;
}
prog_ = regexp_->CompileToProg(0);
re2/testing/tester.cc#12:230,237 - re2/testing/tester.cc#13:226,231
RE2::Options options;
if (flags & Regexp::Latin1)
options.set_encoding(RE2::Options::EncodingLatin1);
- else if (flags & Regexp::UCS2)
- options.set_encoding(RE2::Options::EncodingUCS2);
if (kind_ == Prog::kLongestMatch)
options.set_longest_match(true);
re2_ = new RE2(re, options);
re2/testing/tester.cc#12:281,379 - re2/testing/tester.cc#13:275,280
delete re2_;
}
- // Converts UTF-8 string in text into UCS-2 string in new_text.
- static bool ConvertUTF8ToUCS2(const StringPiece& text, StringPiece* new_text) {
- const char* p = text.begin();
- const char* ep = text.end();
- uint16* q = new uint16[ep - p];
- uint16* q0 = q;
-
- int n;
- Rune r;
- for (; p < ep; p += n) {
- if (!fullrune(p, ep - p)) {
- delete[] q0;
- return false;
- }
- n = chartorune(&r, p);
- if (r > 0xFFFF) {
- delete[] q0;
- return false;
- }
- *q++ = r;
- }
- *new_text = StringPiece(reinterpret_cast<char*>(q0), 2*(q - q0));
- return true;
- }
-
- // Rewrites *sp from being a pointer into text8 (UTF-8)
- // to being a pointer into text16 (equivalent text but in UCS-2).
- static void AdjustUTF8ToUCS2(const StringPiece& text8, const StringPiece& text16,
- StringPiece *sp) {
- if (sp->begin() == NULL && text8.begin() != NULL)
- return;
-
- int nrune = 0;
- int n;
- Rune r;
- const char* p = text8.begin();
- const char* ep = text8.end();
- const char* spbegin = NULL;
- const char* spend = NULL;
- for (;;) {
- if (p == sp->begin())
- spbegin = text16.begin() + sizeof(uint16)*nrune;
- if (p == sp->end())
- spend = text16.begin() + sizeof(uint16)*nrune;
- if (p >= ep)
- break;
- n = chartorune(&r, p);
- p += n;
- nrune++;
- }
- if (spbegin == NULL || spend == NULL) {
- LOG(FATAL) << "Error in AdjustUTF8ToUCS2 "
- << CEscape(text8) << " "
- << (int)(sp->begin() - text8.begin()) << " "
- << (int)(sp->end() - text8.begin());
- }
- *sp = StringPiece(spbegin, spend - spbegin);
- }
-
- // Rewrites *sp from begin a pointer into text16 (UCS-2)
- // to being a pointer into text8 (equivalent text but in UTF-8).
- static void AdjustUCS2ToUTF8(const StringPiece& text16, const StringPiece& text8,
- StringPiece* sp) {
- if (sp->begin() == NULL)
- return;
-
- int nrune = 0;
- int n;
- Rune r;
- const char* p = text8.begin();
- const char* ep = text8.end();
- const char* spbegin = NULL;
- const char* spend = NULL;
- for (;;) {
- if (nrune == (sp->begin() - text16.begin())/2)
- spbegin = p;
- if (nrune == (sp->end() - text16.begin())/2)
- spend = p;
- if (p >= ep)
- break;
- n = chartorune(&r, p);
- p += n;
- nrune++;
- }
- if (text8.begin() != NULL && (spbegin == NULL || spend == NULL)) {
- LOG(FATAL) << "Error in AdjustUCS2ToUTF8 "
- << CEscape(text16) << " "
- << (int)(sp->begin() - text16.begin()) << " "
- << (int)(sp->end() - text16.begin());
- }
- *sp = StringPiece(spbegin, spend - spbegin);
- }
-
// Runs a single search using the named engine type.
// This interface hides all the irregularities of the various
// engine interfaces from the rest of this file.
re2/testing/tester.cc#12:393,411 - re2/testing/tester.cc#13:294,300
StringPiece text = orig_text;
StringPiece context = orig_context;
- bool ucs2 = false;
- if ((flags() & Regexp::UCS2) && type != kEnginePCRE) {
- if (!ConvertUTF8ToUCS2(orig_context, &context)) {
- result->skipped = true;
- return;
- }
-
- // Rewrite context to refer to new text.
- AdjustUTF8ToUCS2(orig_context, context, &text);
- ucs2 = true;
- }
-
switch (type) {
default:
LOG(FATAL) << "Bad RunSearch type: " << (int)type;
re2/testing/tester.cc#12:557,577 - re2/testing/tester.cc#13:446,451
}
}
- // If we did UCS-2 matching, rewrite the matches to refer
- // to the original UTF-8 text.
- if (ucs2) {
- if (result->matched) {
- if (result->have_submatch0) {
- AdjustUCS2ToUTF8(context, orig_context, &result->submatch[0]);
- } else if (result->have_submatch) {
- for (int i = 0; i < nsubmatch; i++) {
- AdjustUCS2ToUTF8(context, orig_context, &result->submatch[i]);
- }
- }
- }
- delete[] context.begin();
- }
-
if (!result->matched)
memset(result->submatch, 0, sizeof result->submatch);
}
re2/testing/tester.cc#12:596,617 - re2/testing/tester.cc#13:470,475
return true;
}
- // Check whether text uses only Unicode points <= 0xFFFF
- // (in the BMP).
- static bool IsBMP(const StringPiece& text) {
- const char* p = text.begin();
- const char* ep = text.end();
- while (p < ep) {
- if (!fullrune(p, ep - p))
- return false;
- Rune r;
- p += chartorune(&r, p);
- if (r > 0xFFFF)
- return false;
- }
- return true;
- }
-
// Runs a single test.
bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
Prog::Anchor anchor) {
re2/testing/tester.cc#12:619,625 - re2/testing/tester.cc#13:477,483
Result correct;
RunSearch(kEngineBacktrack, text, context, anchor, &correct);
if (correct.skipped) {
- if (regexp_ == NULL || !IsBMP(context)) // okay to skip in UCS-2 mode
+ if (regexp_ == NULL)
return true;
LOG(ERROR) << "Skipped backtracking! " << CEscape(regexp_str_)
<< " " << FormatMode(flags_);

View File

@ -1,109 +0,0 @@
// Copyright 2009 The RE2 Authors. All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#ifndef UTIL_LOGGING_H_
#define UTIL_LOGGING_H_
// Simplified version of Google's logging.
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <ostream>
#include <sstream>
#include "absl/base/attributes.h"
// Debug-only checking.
#define DCHECK(condition) assert(condition)
#define DCHECK_EQ(val1, val2) assert((val1) == (val2))
#define DCHECK_NE(val1, val2) assert((val1) != (val2))
#define DCHECK_LE(val1, val2) assert((val1) <= (val2))
#define DCHECK_LT(val1, val2) assert((val1) < (val2))
#define DCHECK_GE(val1, val2) assert((val1) >= (val2))
#define DCHECK_GT(val1, val2) assert((val1) > (val2))
// Always-on checking
#define CHECK(x) if(x){}else LogMessageFatal(__FILE__, __LINE__).stream() << "Check failed: " #x
#define CHECK_LT(x, y) CHECK((x) < (y))
#define CHECK_GT(x, y) CHECK((x) > (y))
#define CHECK_LE(x, y) CHECK((x) <= (y))
#define CHECK_GE(x, y) CHECK((x) >= (y))
#define CHECK_EQ(x, y) CHECK((x) == (y))
#define CHECK_NE(x, y) CHECK((x) != (y))
#define LOG_INFO LogMessage(__FILE__, __LINE__)
#define LOG_WARNING LogMessage(__FILE__, __LINE__)
#define LOG_ERROR LogMessage(__FILE__, __LINE__)
#define LOG_FATAL LogMessageFatal(__FILE__, __LINE__)
#define LOG_QFATAL LOG_FATAL
// It seems that one of the Windows header files defines ERROR as 0.
#ifdef _WIN32
#define LOG_0 LOG_INFO
#endif
#ifdef NDEBUG
#define LOG_DFATAL LOG_ERROR
#else
#define LOG_DFATAL LOG_FATAL
#endif
#define LOG(severity) LOG_ ## severity.stream()
#define VLOG(x) if((x)>0){}else LOG_INFO.stream()
class LogMessage {
public:
LogMessage(const char* file, int line)
: flushed_(false) {
stream() << file << ":" << line << ": ";
}
void Flush() {
stream() << "\n";
std::string s = str_.str();
size_t n = s.size();
if (fwrite(s.data(), 1, n, stderr) < n) {} // shut up gcc
flushed_ = true;
}
~LogMessage() {
if (!flushed_) {
Flush();
}
}
std::ostream& stream() { return str_; }
private:
bool flushed_;
std::ostringstream str_;
LogMessage(const LogMessage&) = delete;
LogMessage& operator=(const LogMessage&) = delete;
};
// Silence "destructor never returns" warning for ~LogMessageFatal().
// Since this is a header file, push and then pop to limit the scope.
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable: 4722)
#endif
class LogMessageFatal : public LogMessage {
public:
LogMessageFatal(const char* file, int line)
: LogMessage(file, line) {}
ABSL_ATTRIBUTE_NORETURN ~LogMessageFatal() {
Flush();
abort();
}
private:
LogMessageFatal(const LogMessageFatal&) = delete;
LogMessageFatal& operator=(const LogMessageFatal&) = delete;
};
#ifdef _MSC_VER
#pragma warning(pop)
#endif
#endif // UTIL_LOGGING_H_

View File

@ -16,16 +16,17 @@
#include <utility>
#include "absl/flags/flag.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/str_format.h"
#include "util/logging.h"
#include "util/pcre.h"
// Silence warnings about the wacky formatting in the operator() functions.
#if !defined(__clang__) && defined(__GNUC__)
#if defined(__GNUC__)
#pragma GCC diagnostic ignored "-Wmisleading-indentation"
#endif
#define PCREPORT(level) LOG(level)
#define PCREPORT(level) ABSL_LOG(level)
// Default PCRE limits.
// Defaults chosen to allow a plausible amount of CPU and

View File

@ -39,10 +39,10 @@
// supplied pattern exactly.
//
// Example: successful match
// CHECK(PCRE::FullMatch("hello", "h.*o"));
// ABSL_CHECK(PCRE::FullMatch("hello", "h.*o"));
//
// Example: unsuccessful match (requires full match):
// CHECK(!PCRE::FullMatch("hello", "e"));
// ABSL_CHECK(!PCRE::FullMatch("hello", "e"));
//
// -----------------------------------------------------------------------
// UTF-8 AND THE MATCHING INTERFACE:
@ -58,7 +58,7 @@
//
// Example:
// PCRE re(utf8_pattern, PCRE::UTF8);
// CHECK(PCRE::FullMatch(utf8_string, re));
// ABSL_CHECK(PCRE::FullMatch(utf8_string, re));
//
// -----------------------------------------------------------------------
// MATCHING WITH SUBSTRING EXTRACTION:
@ -68,22 +68,22 @@
// Example: extracts "ruby" into "s" and 1234 into "i"
// int i;
// std::string s;
// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
// ABSL_CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
//
// Example: fails because string cannot be stored in integer
// CHECK(!PCRE::FullMatch("ruby", "(.*)", &i));
// ABSL_CHECK(!PCRE::FullMatch("ruby", "(.*)", &i));
//
// Example: fails because there aren't enough sub-patterns:
// CHECK(!PCRE::FullMatch("ruby:1234", "\\w+:\\d+", &s));
// ABSL_CHECK(!PCRE::FullMatch("ruby:1234", "\\w+:\\d+", &s));
//
// Example: does not try to extract any extra sub-patterns
// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
// ABSL_CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
//
// Example: does not try to extract into NULL
// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
// ABSL_CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
//
// Example: integer overflow causes failure
// CHECK(!PCRE::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
// ABSL_CHECK(!PCRE::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
//
// -----------------------------------------------------------------------
// PARTIAL MATCHES
@ -92,12 +92,12 @@
// to match any substring of the text.
//
// Example: simple search for a string:
// CHECK(PCRE::PartialMatch("hello", "ell"));
// ABSL_CHECK(PCRE::PartialMatch("hello", "ell"));
//
// Example: find first number in a string
// int number;
// CHECK(PCRE::PartialMatch("x*100 + 20", "(\\d+)", &number));
// CHECK_EQ(number, 100);
// ABSL_CHECK(PCRE::PartialMatch("x*100 + 20", "(\\d+)", &number));
// ABSL_CHECK_EQ(number, 100);
//
// -----------------------------------------------------------------------
// PPCRE-COMPILED PCREGULAR EXPPCRESSIONS
@ -157,7 +157,7 @@
//
// Example:
// int a, b, c, d;
// CHECK(PCRE::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
// ABSL_CHECK(PCRE::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
// Octal(&a), Hex(&b), CRadix(&c), CRadix(&d));
// will leave 64 in a, b, c, and d.
@ -379,7 +379,7 @@ class PCRE {
// text. E.g.,
//
// std::string s = "yabba dabba doo";
// CHECK(PCRE::Replace(&s, "b+", "d"));
// ABSL_CHECK(PCRE::Replace(&s, "b+", "d"));
//
// will leave "s" containing "yada dabba doo"
//
@ -393,7 +393,7 @@ class PCRE {
// re-matching. E.g.,
//
// std::string s = "yabba dabba doo";
// CHECK(PCRE::GlobalReplace(&s, "b+", "d"));
// ABSL_CHECK(PCRE::GlobalReplace(&s, "b+", "d"));
//
// will leave "s" containing "yada dada doo"
//
@ -417,7 +417,7 @@ class PCRE {
// * The @p rewrite string doesn't have any syntax errors
// ('\' followed by anything besides [0-9] and '\').
// Making this test will guarantee that "replace" and "extract"
// operations won't LOG(ERROR) or fail because of a bad rewrite
// operations won't ABSL_LOG(ERROR) or fail because of a bad rewrite
// string.
// @param rewrite The proposed rewrite string.
// @param error An error message is recorded here, iff we return false.

View File

@ -7,8 +7,8 @@ IFS=$'\n\t'
set -vx
NAME=re2
REVISION="2023-11-01"
VERSION="2023-11-01"
REVISION="2025-08-12-mongo"
VERSION="2025-08-12"
DEST_DIR=$(git rev-parse --show-toplevel)/src/third_party/re2
if [[ -d $DEST_DIR/dist ]]; then
@ -16,12 +16,11 @@ if [[ -d $DEST_DIR/dist ]]; then
exit 1
fi
git clone --branch 2023-11-01 git@github.com:mongodb-forks/re2.git $DEST_DIR/dist
mkdir -p $DEST_DIR/dist
git clone --branch $REVISION git@github.com:mongodb-forks/re2.git $DEST_DIR/dist
pushd $DEST_DIR/dist
find . -mindepth 1 -maxdepth 1 -name ".*" -exec rm -rf {} \;
rm -rf app
rm -rf benchlog
rm -rf doc
rm -rf lib
rm -rf python
rm -rf app benchlog doc lib python
find . -maxdepth 1 -type f -not -regex ".*\(CONTRIBUTING.md\|LICENSE\|README.md\|SECURITY.md\)$" -delete
popd