httpx/tests/test_urlparse.py
Tom Christie 57daabf673
Drop rfc3986 requirement. (#2252)
* Drop RawURL

* First pass at adding urlparse

* Update urlparse

* Add urlparse

* Add urlparse

* Unicode non-printables can be valid in IDNA hostnames

* Update _urlparse.py docstring

* Linting

* Trim away ununsed codepaths

* Tweaks for path validation depending on scheme and authority presence

* Minor cleanups

* Minor cleanups

* full_path -> raw_path, forr internal consistency

* Linting fixes

* Drop rfc3986 dependency

* Add test for #1833

* Linting

* Drop 'rfc3986' dependancy from README and docs homepage

Co-authored-by: Thomas Grainger <tagrain@gmail.com>
2023-01-10 10:36:15 +00:00

233 lines
6.2 KiB
Python

import pytest
import httpx
from httpx._urlparse import urlparse
def test_urlparse():
url = urlparse("https://www.example.com/")
assert url.scheme == "https"
assert url.userinfo == ""
assert url.netloc == "www.example.com"
assert url.host == "www.example.com"
assert url.port is None
assert url.path == "/"
assert url.query is None
assert url.fragment is None
assert str(url) == "https://www.example.com/"
def test_urlparse_no_scheme():
url = urlparse("://example.com")
assert url.scheme == ""
assert url.host == "example.com"
assert url.path == ""
def test_urlparse_no_authority():
url = urlparse("http://")
assert url.scheme == "http"
assert url.host == ""
assert url.path == ""
# Tests for different host types
def test_urlparse_valid_host():
url = urlparse("https://example.com/")
assert url.host == "example.com"
def test_urlparse_normalized_host():
url = urlparse("https://EXAMPLE.com/")
assert url.host == "example.com"
def test_urlparse_valid_ipv4():
url = urlparse("https://1.2.3.4/")
assert url.host == "1.2.3.4"
def test_urlparse_invalid_ipv4():
with pytest.raises(httpx.InvalidURL) as exc:
urlparse("https://999.999.999.999/")
assert str(exc.value) == "Invalid IPv4 address"
def test_urlparse_valid_ipv6():
url = urlparse("https://[2001:db8::ff00:42:8329]/")
assert url.host == "2001:db8::ff00:42:8329"
def test_urlparse_invalid_ipv6():
with pytest.raises(httpx.InvalidURL) as exc:
urlparse("https://[2001]/")
assert str(exc.value) == "Invalid IPv6 address"
def test_urlparse_unescaped_idna_host():
url = urlparse("https://中国.icom.museum/")
assert url.host == "xn--fiqs8s.icom.museum"
def test_urlparse_escaped_idna_host():
url = urlparse("https://xn--fiqs8s.icom.museum/")
assert url.host == "xn--fiqs8s.icom.museum"
def test_urlparse_invalid_idna_host():
with pytest.raises(httpx.InvalidURL) as exc:
urlparse("https://☃.com/")
assert str(exc.value) == "Invalid IDNA hostname"
# Tests for different port types
def test_urlparse_valid_port():
url = urlparse("https://example.com:123/")
assert url.port == 123
def test_urlparse_normalized_port():
# If the port matches the scheme default it is normalized to None.
url = urlparse("https://example.com:443/")
assert url.port is None
def test_urlparse_invalid_port():
with pytest.raises(httpx.InvalidURL) as exc:
urlparse("https://example.com:abc/")
assert str(exc.value) == "Invalid port"
# Tests for path handling
def test_urlparse_normalized_path():
url = urlparse("https://example.com/abc/def/../ghi/./jkl")
assert url.path == "/abc/ghi/jkl"
def test_urlparse_escaped_path():
url = urlparse("https://example.com/ /🌟/")
assert url.path == "/%20/%F0%9F%8C%9F/"
def test_urlparse_leading_dot_prefix_on_absolute_url():
url = urlparse("https://example.com/../abc")
assert url.path == "/abc"
def test_urlparse_leading_dot_prefix_on_relative_url():
url = urlparse("../abc")
assert url.path == "../abc"
# Tests for invalid URLs
def test_urlparse_excessively_long_url():
with pytest.raises(httpx.InvalidURL) as exc:
urlparse("https://www.example.com/" + "x" * 100_000)
assert str(exc.value) == "URL too long"
def test_urlparse_excessively_long_component():
with pytest.raises(httpx.InvalidURL) as exc:
urlparse("https://www.example.com", path="/" + "x" * 100_000)
assert str(exc.value) == "URL component 'path' too long"
def test_urlparse_non_printing_character_in_url():
with pytest.raises(httpx.InvalidURL) as exc:
urlparse("https://www.example.com/\n")
assert str(exc.value) == "Invalid non-printable ASCII character in URL"
def test_urlparse_non_printing_character_in_component():
with pytest.raises(httpx.InvalidURL) as exc:
urlparse("https://www.example.com", path="/\n")
assert (
str(exc.value)
== "Invalid non-printable ASCII character in URL component 'path'"
)
# Test for urlparse components
def test_urlparse_with_components():
url = urlparse(scheme="https", host="www.example.com", path="/")
assert url.scheme == "https"
assert url.userinfo == ""
assert url.host == "www.example.com"
assert url.port is None
assert url.path == "/"
assert url.query is None
assert url.fragment is None
assert str(url) == "https://www.example.com/"
def test_urlparse_with_invalid_component():
with pytest.raises(TypeError) as exc:
urlparse(scheme="https", host="www.example.com", incorrect="/")
assert str(exc.value) == "'incorrect' is an invalid keyword argument for urlparse()"
def test_urlparse_with_invalid_scheme():
with pytest.raises(httpx.InvalidURL) as exc:
urlparse(scheme="~", host="www.example.com", path="/")
assert str(exc.value) == "Invalid URL component 'scheme'"
def test_urlparse_with_invalid_path():
with pytest.raises(httpx.InvalidURL) as exc:
urlparse(scheme="https", host="www.example.com", path="abc")
assert str(exc.value) == "For absolute URLs, path must be empty or begin with '/'"
with pytest.raises(httpx.InvalidURL) as exc:
urlparse(path="//abc")
assert (
str(exc.value)
== "URLs with no authority component cannot have a path starting with '//'"
)
with pytest.raises(httpx.InvalidURL) as exc:
urlparse(path=":abc")
assert (
str(exc.value)
== "URLs with no scheme component cannot have a path starting with ':'"
)
def test_urlparse_with_relative_path():
# This path would be invalid for an absolute URL, but is valid as a relative URL.
url = urlparse(path="abc")
assert url.path == "abc"
# Tests for accessing and modifying `urlparse` results.
def test_copy_with():
url = urlparse("https://www.example.com/")
assert str(url) == "https://www.example.com/"
url = url.copy_with()
assert str(url) == "https://www.example.com/"
url = url.copy_with(scheme="http")
assert str(url) == "http://www.example.com/"
url = url.copy_with(netloc="example.com")
assert str(url) == "http://example.com/"
url = url.copy_with(path="/abc")
assert str(url) == "http://example.com/abc"