Differentiate between 'url.host' and 'url.raw_host' (#1590)
* Differentiate between 'url.host' and 'url.raw_host'
This commit is contained in:
parent
d98e9e7ae7
commit
39d8ee619e
117
httpx/_models.py
117
httpx/_models.py
@ -8,6 +8,7 @@ from collections.abc import MutableMapping
|
||||
from http.cookiejar import Cookie, CookieJar
|
||||
from urllib.parse import parse_qsl, quote, unquote, urlencode
|
||||
|
||||
import idna
|
||||
import rfc3986
|
||||
import rfc3986.exceptions
|
||||
|
||||
@ -60,15 +61,16 @@ from ._utils import (
|
||||
|
||||
class URL:
|
||||
"""
|
||||
url = httpx.URL("HTTPS://jo%40email.com:a%20secret@example.com:1234/pa%20th?search=ab#anchorlink")
|
||||
url = httpx.URL("HTTPS://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink")
|
||||
|
||||
assert url.scheme == "https"
|
||||
assert url.username == "jo@email.com"
|
||||
assert url.password == "a secret"
|
||||
assert url.userinfo == b"jo%40email.com:a%20secret"
|
||||
assert url.host == "example.com"
|
||||
assert url.host == "müller.de"
|
||||
assert url.raw_host == b"xn--mller-kva.de"
|
||||
assert url.port == 1234
|
||||
assert url.netloc == "example.com:1234"
|
||||
assert url.netloc == b"xn--mller-kva.de:1234"
|
||||
assert url.path == "/pa th"
|
||||
assert url.query == b"?search=ab"
|
||||
assert url.raw_path == b"/pa%20th?search=ab"
|
||||
@ -76,17 +78,28 @@ class URL:
|
||||
|
||||
The components of a URL are broken down like this:
|
||||
|
||||
https://jo%40email.com:a%20secret@example.com:1234/pa%20th?search=ab#anchorlink
|
||||
[scheme][ username ] [password] [ host ][port][ path ] [ query ] [fragment]
|
||||
[ userinfo ] [ netloc ][ raw_path ]
|
||||
https://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink
|
||||
[scheme] [ username ] [password] [ host ][port][ path ] [ query ] [fragment]
|
||||
[ userinfo ] [ netloc ][ raw_path ]
|
||||
|
||||
Note that:
|
||||
|
||||
* `url.scheme` is normalized to always be lowercased.
|
||||
|
||||
* `url.host` is normalized to always be lowercased, and is IDNA encoded. For instance:
|
||||
url = httpx.URL("http://中国.icom.museum")
|
||||
assert url.host == "xn--fiqs8s.icom.museum"
|
||||
* `url.host` is normalized to always be lowercased. Internationalized domain
|
||||
names are represented in unicode, without IDNA encoding applied. For instance:
|
||||
|
||||
url = httpx.URL("http://中国.icom.museum")
|
||||
assert url.host == "中国.icom.museum"
|
||||
url = httpx.URL("http://xn--fiqs8s.icom.museum")
|
||||
assert url.host == "中国.icom.museum"
|
||||
|
||||
* `url.raw_host` is normalized to always be lowercased, and is IDNA encoded.
|
||||
|
||||
url = httpx.URL("http://中国.icom.museum")
|
||||
assert url.raw_host == b"xn--fiqs8s.icom.museum"
|
||||
url = httpx.URL("http://xn--fiqs8s.icom.museum")
|
||||
assert url.raw_host == b"xn--fiqs8s.icom.museum"
|
||||
|
||||
* `url.userinfo` is raw bytes, without URL escaping. Usually you'll want to work with
|
||||
`url.username` and `url.password` instead, which handle the URL escaping.
|
||||
@ -150,6 +163,14 @@ class URL:
|
||||
"""
|
||||
return self._uri_reference.scheme or ""
|
||||
|
||||
@property
|
||||
def raw_scheme(self) -> bytes:
|
||||
"""
|
||||
The raw bytes representation of the URL scheme, such as b"http", b"https".
|
||||
Always normalised to lowercase.
|
||||
"""
|
||||
return self.scheme.encode("ascii")
|
||||
|
||||
@property
|
||||
def userinfo(self) -> bytes:
|
||||
"""
|
||||
@ -181,7 +202,7 @@ class URL:
|
||||
def host(self) -> str:
|
||||
"""
|
||||
The URL host as a string.
|
||||
Always normlized to lowercase, and IDNA encoded.
|
||||
Always normalized to lowercase, with IDNA hosts decoded into unicode.
|
||||
|
||||
Examples:
|
||||
|
||||
@ -189,18 +210,52 @@ class URL:
|
||||
assert url.host == "www.example.org"
|
||||
|
||||
url = httpx.URL("http://中国.icom.museum")
|
||||
assert url.host == "xn--fiqs8s.icom.museum"
|
||||
assert url.host == "中国.icom.museum"
|
||||
|
||||
url = httpx.URL("http://xn--fiqs8s.icom.museum")
|
||||
assert url.host == "中国.icom.museum"
|
||||
|
||||
url = httpx.URL("https://[::ffff:192.168.0.1]")
|
||||
assert url.host == "::ffff:192.168.0.1"
|
||||
"""
|
||||
host: str = self._uri_reference.host
|
||||
host: str = self._uri_reference.host or ""
|
||||
|
||||
if host and ":" in host and host[0] == "[":
|
||||
# it's an IPv6 address
|
||||
host = host.lstrip("[").rstrip("]")
|
||||
|
||||
return host or ""
|
||||
if host.startswith("xn--"):
|
||||
host = idna.decode(host)
|
||||
|
||||
return host
|
||||
|
||||
@property
|
||||
def raw_host(self) -> bytes:
|
||||
"""
|
||||
The raw bytes representation of the URL host.
|
||||
Always normalized to lowercase, and IDNA encoded.
|
||||
|
||||
Examples:
|
||||
|
||||
url = httpx.URL("http://www.EXAMPLE.org")
|
||||
assert url.raw_host == b"www.example.org"
|
||||
|
||||
url = httpx.URL("http://中国.icom.museum")
|
||||
assert url.raw_host == b"xn--fiqs8s.icom.museum"
|
||||
|
||||
url = httpx.URL("http://xn--fiqs8s.icom.museum")
|
||||
assert url.raw_host == b"xn--fiqs8s.icom.museum"
|
||||
|
||||
url = httpx.URL("https://[::ffff:192.168.0.1]")
|
||||
assert url.raw_host == b"::ffff:192.168.0.1"
|
||||
"""
|
||||
host: str = self._uri_reference.host or ""
|
||||
|
||||
if host and ":" in host and host[0] == "[":
|
||||
# it's an IPv6 address
|
||||
host = host.lstrip("[").rstrip("]")
|
||||
|
||||
return host.encode("ascii")
|
||||
|
||||
@property
|
||||
def port(self) -> typing.Optional[int]:
|
||||
@ -211,14 +266,17 @@ class URL:
|
||||
return int(port) if port else None
|
||||
|
||||
@property
|
||||
def netloc(self) -> str:
|
||||
def netloc(self) -> bytes:
|
||||
"""
|
||||
Either `<host>` or `<host>:<port>` as a string.
|
||||
Always normlized to lowercase, and IDNA encoded.
|
||||
Either `<host>` or `<host>:<port>` as bytes.
|
||||
Always normalized to lowercase, and IDNA encoded.
|
||||
"""
|
||||
host = self._uri_reference.host or ""
|
||||
port = self._uri_reference.port
|
||||
return host if port is None else f"{host}:{port}"
|
||||
netloc = host.encode("ascii")
|
||||
if port:
|
||||
netloc = netloc + b":" + str(port).encode("ascii")
|
||||
return netloc
|
||||
|
||||
@property
|
||||
def path(self) -> str:
|
||||
@ -277,8 +335,8 @@ class URL:
|
||||
Provides the (scheme, host, port, target) for the outgoing request.
|
||||
"""
|
||||
return (
|
||||
self.scheme.encode("ascii"),
|
||||
self.host.encode("ascii"),
|
||||
self.raw_scheme,
|
||||
self.raw_host,
|
||||
self.port,
|
||||
self.raw_path,
|
||||
)
|
||||
@ -293,7 +351,7 @@ class URL:
|
||||
# URLs with a fragment portion as not absolute.
|
||||
# What we actually care about is if the URL provides
|
||||
# a scheme and hostname to which connections should be made.
|
||||
return bool(self.scheme and self.host)
|
||||
return bool(self._uri_reference.scheme and self._uri_reference.host)
|
||||
|
||||
@property
|
||||
def is_relative_url(self) -> bool:
|
||||
@ -321,7 +379,7 @@ class URL:
|
||||
"userinfo": bytes,
|
||||
"host": str,
|
||||
"port": int,
|
||||
"netloc": str,
|
||||
"netloc": bytes,
|
||||
"path": str,
|
||||
"query": bytes,
|
||||
"raw_path": bytes,
|
||||
@ -354,12 +412,16 @@ class URL:
|
||||
# it's an IPv6 address, so it should be hidden under bracket
|
||||
host = f"[{host}]"
|
||||
|
||||
kwargs["netloc"] = f"{host}:{port}" if port is not None else host
|
||||
kwargs["netloc"] = (
|
||||
f"{host}:{port}".encode("ascii")
|
||||
if port is not None
|
||||
else host.encode("ascii")
|
||||
)
|
||||
|
||||
if "userinfo" in kwargs or "netloc" in kwargs:
|
||||
# Consolidate userinfo and netloc into authority.
|
||||
userinfo = (kwargs.pop("userinfo", self.userinfo) or b"").decode("ascii")
|
||||
netloc = kwargs.pop("netloc", self.netloc) or ""
|
||||
netloc = (kwargs.pop("netloc", self.netloc) or b"").decode("ascii")
|
||||
authority = f"{userinfo}@{netloc}" if userinfo else netloc
|
||||
kwargs["authority"] = authority
|
||||
|
||||
@ -848,11 +910,10 @@ class Request:
|
||||
)
|
||||
|
||||
if not has_host and self.url.host:
|
||||
default_port = {"http": 80, "https": 443}.get(self.url.scheme)
|
||||
if self.url.port is None or self.url.port == default_port:
|
||||
host_header = self.url.host.encode("ascii")
|
||||
else:
|
||||
host_header = self.url.netloc.encode("ascii")
|
||||
default_port = {"http": b":80", "https": b":443"}.get(self.url.scheme, b"")
|
||||
host_header = self.url.netloc
|
||||
if host_header.endswith(default_port):
|
||||
host_header = host_header[: -len(default_port)]
|
||||
auto_headers.append((b"Host", host_header))
|
||||
if not has_content_length and self.method in ("POST", "PUT", "PATCH"):
|
||||
auto_headers.append((b"Content-Length", b"0"))
|
||||
|
||||
@ -4,41 +4,53 @@ import httpx
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"given,idna,host,scheme,port",
|
||||
"given,idna,host,raw_host,scheme,port",
|
||||
[
|
||||
(
|
||||
"http://中国.icom.museum:80/",
|
||||
"http://xn--fiqs8s.icom.museum:80/",
|
||||
"xn--fiqs8s.icom.museum",
|
||||
"中国.icom.museum",
|
||||
b"xn--fiqs8s.icom.museum",
|
||||
"http",
|
||||
80,
|
||||
),
|
||||
(
|
||||
"http://Königsgäßchen.de",
|
||||
"http://xn--knigsgchen-b4a3dun.de",
|
||||
"xn--knigsgchen-b4a3dun.de",
|
||||
"königsgäßchen.de",
|
||||
b"xn--knigsgchen-b4a3dun.de",
|
||||
"http",
|
||||
None,
|
||||
),
|
||||
("https://faß.de", "https://xn--fa-hia.de", "xn--fa-hia.de", "https", None),
|
||||
(
|
||||
"https://faß.de",
|
||||
"https://xn--fa-hia.de",
|
||||
"faß.de",
|
||||
b"xn--fa-hia.de",
|
||||
"https",
|
||||
None,
|
||||
),
|
||||
(
|
||||
"https://βόλος.com:443",
|
||||
"https://xn--nxasmm1c.com:443",
|
||||
"xn--nxasmm1c.com",
|
||||
"βόλος.com",
|
||||
b"xn--nxasmm1c.com",
|
||||
"https",
|
||||
443,
|
||||
),
|
||||
(
|
||||
"http://ශ්රී.com:444",
|
||||
"http://xn--10cl1a0b660p.com:444",
|
||||
"xn--10cl1a0b660p.com",
|
||||
"ශ්රී.com",
|
||||
b"xn--10cl1a0b660p.com",
|
||||
"http",
|
||||
444,
|
||||
),
|
||||
(
|
||||
"https://نامهای.com:4433",
|
||||
"https://xn--mgba3gch31f060k.com:4433",
|
||||
"xn--mgba3gch31f060k.com",
|
||||
"نامهای.com",
|
||||
b"xn--mgba3gch31f060k.com",
|
||||
"https",
|
||||
4433,
|
||||
),
|
||||
@ -52,10 +64,11 @@ import httpx
|
||||
"https_with_custom_port",
|
||||
],
|
||||
)
|
||||
def test_idna_url(given, idna, host, scheme, port):
|
||||
def test_idna_url(given, idna, host, raw_host, scheme, port):
|
||||
url = httpx.URL(given)
|
||||
assert url == httpx.URL(idna)
|
||||
assert url.host == host
|
||||
assert url.raw_host == raw_host
|
||||
assert url.scheme == scheme
|
||||
assert url.port == port
|
||||
|
||||
@ -197,7 +210,7 @@ def test_url_copywith_authority_subcomponents():
|
||||
|
||||
def test_url_copywith_netloc():
|
||||
copy_with_kwargs = {
|
||||
"netloc": "example.net:444",
|
||||
"netloc": b"example.net:444",
|
||||
}
|
||||
url = httpx.URL("https://example.org")
|
||||
new = url.copy_with(**copy_with_kwargs)
|
||||
@ -301,7 +314,7 @@ def test_ipv6_url():
|
||||
url = httpx.URL("http://[::ffff:192.168.0.1]:5678/")
|
||||
|
||||
assert url.host == "::ffff:192.168.0.1"
|
||||
assert url.netloc == "[::ffff:192.168.0.1]:5678"
|
||||
assert url.netloc == b"[::ffff:192.168.0.1]:5678"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@ -317,7 +330,7 @@ def test_ipv6_url_copy_with_host(url_str, new_host):
|
||||
url = httpx.URL(url_str).copy_with(host=new_host)
|
||||
|
||||
assert url.host == "::ffff:192.168.0.1"
|
||||
assert url.netloc == "[::ffff:192.168.0.1]:1234"
|
||||
assert url.netloc == b"[::ffff:192.168.0.1]:1234"
|
||||
assert str(url) == "http://[::ffff:192.168.0.1]:1234"
|
||||
|
||||
|
||||
@ -327,5 +340,5 @@ def test_ipv6_url_from_raw_url(host):
|
||||
url = httpx.URL(raw_url)
|
||||
|
||||
assert url.host == "::ffff:192.168.0.1"
|
||||
assert url.netloc == "[::ffff:192.168.0.1]:443"
|
||||
assert url.netloc == b"[::ffff:192.168.0.1]:443"
|
||||
assert str(url) == "https://[::ffff:192.168.0.1]:443/"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user