percent-encode pipe character in URL paths
the | character was not being percent-encoded in path segments, even though it's not a valid pchar per RFC 3986. this caused issues with servers that reject or redirect URLs with unencoded pipes. now | gets encoded as %7C in paths, matching the behavior of Python's urllib and the requests library.
This commit is contained in:
parent
ae1b9f6623
commit
1bdeaf65c3
@ -51,12 +51,15 @@ QUERY_SAFE = "".join(
|
||||
)
|
||||
|
||||
# The path percent-encode set is the query percent-encode set
|
||||
# and U+003F (?), U+0060 (`), U+007B ({), and U+007D (}).
|
||||
# and U+003F (?), U+0060 (`), U+007B ({), U+007C (|), and U+007D (}).
|
||||
# We include U+007C (|) in the encode set to align with RFC 3986 and
|
||||
# Python's stdlib, since | is not a valid pchar and can cause servers
|
||||
# to issue redirects or reject requests when left unencoded.
|
||||
PATH_SAFE = "".join(
|
||||
[
|
||||
chr(i)
|
||||
for i in range(0x20, 0x7F)
|
||||
if i not in (0x20, 0x22, 0x23, 0x3C, 0x3E) + (0x3F, 0x60, 0x7B, 0x7D)
|
||||
if i not in (0x20, 0x22, 0x23, 0x3C, 0x3E) + (0x3F, 0x60, 0x7B, 0x7C, 0x7D)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@ -140,6 +140,13 @@ def test_path_query_fragment(url, raw_path, path, query, fragment):
|
||||
assert url.fragment == fragment
|
||||
|
||||
|
||||
def test_url_pipe_encoding_in_path():
|
||||
# The pipe character should be percent-encoded in paths per RFC 3986.
|
||||
url = httpx.URL("https://example.com/path|segment")
|
||||
assert url.raw_path == b"/path%7Csegment"
|
||||
assert url.path == "/path|segment"
|
||||
|
||||
|
||||
def test_url_query_encoding():
|
||||
url = httpx.URL("https://www.example.com/?a=b c&d=e/f")
|
||||
assert url.raw_path == b"/?a=b%20c&d=e/f"
|
||||
|
||||
@ -27,6 +27,12 @@ def test_urlparse(test_case):
|
||||
# Anyone know what's going on here?
|
||||
return
|
||||
|
||||
# We percent-encode "|" in paths (unlike WHATWG), to align with RFC 3986
|
||||
# and Python's stdlib. The pipe character is not a valid pchar and some
|
||||
# servers reject or redirect URLs containing an unencoded "|".
|
||||
if "|" in test_case.get("pathname", ""):
|
||||
return
|
||||
|
||||
p = urlparse(test_case["href"])
|
||||
|
||||
# Test cases include the protocol with the trailing ":"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user