This commit is contained in:
Harikrishna KP 2026-02-26 05:23:44 +00:00 committed by GitHub
commit 6e05a4f5bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 18 additions and 2 deletions

View File

@ -51,12 +51,15 @@ QUERY_SAFE = "".join(
)
# The path percent-encode set is the query percent-encode set
# and U+003F (?), U+0060 (`), U+007B ({), and U+007D (}).
# and U+003F (?), U+0060 (`), U+007B ({), U+007C (|), and U+007D (}).
# We include U+007C (|) in the encode set to align with RFC 3986 and
# Python's stdlib, since | is not a valid pchar and can cause servers
# to issue redirects or reject requests when left unencoded.
PATH_SAFE = "".join(
[
chr(i)
for i in range(0x20, 0x7F)
if i not in (0x20, 0x22, 0x23, 0x3C, 0x3E) + (0x3F, 0x60, 0x7B, 0x7D)
if i not in (0x20, 0x22, 0x23, 0x3C, 0x3E) + (0x3F, 0x60, 0x7B, 0x7C, 0x7D)
]
)

View File

@ -140,6 +140,13 @@ def test_path_query_fragment(url, raw_path, path, query, fragment):
assert url.fragment == fragment
def test_url_pipe_encoding_in_path():
# The pipe character should be percent-encoded in paths per RFC 3986.
url = httpx.URL("https://example.com/path|segment")
assert url.raw_path == b"/path%7Csegment"
assert url.path == "/path|segment"
def test_url_query_encoding():
url = httpx.URL("https://www.example.com/?a=b c&d=e/f")
assert url.raw_path == b"/?a=b%20c&d=e/f"

View File

@ -27,6 +27,12 @@ def test_urlparse(test_case):
# Anyone know what's going on here?
return
# We percent-encode "|" in paths (unlike WHATWG), to align with RFC 3986
# and Python's stdlib. The pipe character is not a valid pchar and some
# servers reject or redirect URLs containing an unencoded "|".
if "|" in test_case.get("pathname", ""):
return
p = urlparse(test_case["href"])
# Test cases include the protocol with the trailing ":"