Always encode forward slashes as %2F in query parameters (#2723)

* Always encode forward slashes as `%2F` in query parameters

* Revert inclusion of "%"

This is expected to fail tests due to double escaping

* Update `urlencode`

---------

Co-authored-by: Tom Christie <tom@tomchristie.com>
This commit is contained in:
Zanie Adkins 2023-06-09 04:06:56 -05:00 committed by GitHub
parent 301b8fb03a
commit 920333ea98
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 15 additions and 13 deletions

View File

@ -260,8 +260,10 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult:
# For 'path' we need to drop ? and # from the GEN_DELIMS set.
parsed_path: str = quote(path, safe=SUB_DELIMS + ":/[]@")
# For 'query' we need to drop '#' from the GEN_DELIMS set.
# We also exclude '/' because it is more robust to replace it with a percent
# encoding despite it not being a requirement of the spec.
parsed_query: typing.Optional[str] = (
None if query is None else quote(query, safe=SUB_DELIMS + ":/?[]@")
None if query is None else quote(query, safe=SUB_DELIMS + ":?[]@")
)
# For 'fragment' we can include all of the GEN_DELIMS set.
parsed_fragment: typing.Optional[str] = (
@ -452,11 +454,11 @@ def urlencode(items: typing.List[typing.Tuple[str, str]]) -> str:
#
# https://github.com/python/cpython/blob/b2f7b2ef0b5421e01efb8c7bee2ef95d3bab77eb/Lib/urllib/parse.py#L926
#
# Note that we use '%20' encoding for spaces, and treat '/' as a safe
# character. This means our query params have the same escaping as other
# characters in the URL path. This is slightly different to `requests`,
# but is the behaviour that browsers use.
# Note that we use '%20' encoding for spaces. and '%2F for '/'.
# This is slightly different than `requests`, but is the behaviour that browsers use.
#
# See https://github.com/encode/httpx/issues/2536 and
# https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode
return "&".join([quote(k) + "=" + quote(v) for k, v in items])
# See
# - https://github.com/encode/httpx/issues/2536
# - https://github.com/encode/httpx/issues/2721
# - https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode
return "&".join([quote(k, safe="") + "=" + quote(v, safe="") for k, v in items])

View File

@ -360,10 +360,10 @@ def test_url_query_encoding():
and https://github.com/encode/httpx/discussions/2460
"""
url = httpx.URL("https://www.example.com/?a=b c&d=e/f")
assert url.raw_path == b"/?a=b%20c&d=e/f"
assert url.raw_path == b"/?a=b%20c&d=e%2Ff"
url = httpx.URL("https://www.example.com/", params={"a": "b c", "d": "e/f"})
assert url.raw_path == b"/?a=b%20c&d=e/f"
assert url.raw_path == b"/?a=b%20c&d=e%2Ff"
def test_url_with_url_encoded_path():

View File

@ -141,7 +141,7 @@ def test_param_does_not_require_encoding():
def test_param_with_existing_escape_requires_encoding():
url = httpx.URL("http://webservice", params={"u": "http://example.com?q=foo%2Fa"})
assert str(url) == "http://webservice?u=http%3A//example.com%3Fq%3Dfoo%252Fa"
assert str(url) == "http://webservice?u=http%3A%2F%2Fexample.com%3Fq%3Dfoo%252Fa"
# Tests for invalid URLs
@ -264,9 +264,9 @@ def test_path_percent_encoding():
def test_query_percent_encoding():
# Test percent encoding for SUB_DELIMS ALPHA NUM and allowable GEN_DELIMS
url = httpx.URL("https://example.com/?!$&'()*+,;= abc ABC 123 :/[]@" + "?")
assert url.raw_path == b"/?!$&'()*+,;=%20abc%20ABC%20123%20:/[]@?"
assert url.raw_path == b"/?!$&'()*+,;=%20abc%20ABC%20123%20:%2F[]@?"
assert url.path == "/"
assert url.query == b"!$&'()*+,;=%20abc%20ABC%20123%20:/[]@?"
assert url.query == b"!$&'()*+,;=%20abc%20ABC%20123%20:%2F[]@?"
assert url.fragment == ""