diff --git a/python/httpx/_httpx/__init__.pyi b/python/httpx/_httpx/__init__.pyi index 87e90705..77ae401c 100644 --- a/python/httpx/_httpx/__init__.pyi +++ b/python/httpx/_httpx/__init__.pyi @@ -175,7 +175,28 @@ def quote(string: str, safe: str) -> str: def unquote(value: str) -> str: ... def find_ascii_non_printable(s: str) -> typing.Optional[int]: ... -def validate_path(path: str, has_scheme: bool, has_authority: bool) -> None: ... +def validate_path(path: str, has_scheme: bool, has_authority: bool) -> None: + """ + Path validation rules that depend on if the URL contains + a scheme or authority component. + + See https://datatracker.ietf.org/doc/html/rfc3986.html#section-3.3 + + --- + + If a URI contains an authority component, then the path component + must either be empty or begin with a slash ("/") character." + + --- + + If a URI does not contain an authority component, then the path cannot begin + with two slash characters ("//"). + + --- + + In addition, a URI reference (Section 4.1) may be a relative-path reference, + in which case the first path segment cannot contain a colon (":") character. + """ class InvalidURL(Exception): def __init__(self, message: str) -> None: ... diff --git a/python/httpx/_urlparse.py b/python/httpx/_urlparse.py index b7886c7d..90508ac6 100644 --- a/python/httpx/_urlparse.py +++ b/python/httpx/_urlparse.py @@ -25,7 +25,7 @@ import typing import idna from ._exceptions import InvalidURL -from ._httpx import find_ascii_non_printable, normalize_path, quote +from ._httpx import find_ascii_non_printable, normalize_path, quote, validate_path MAX_URL_LENGTH = 65536 @@ -413,28 +413,3 @@ def normalize_port(port: str | int | None, scheme: str) -> int | None: if port_as_int == default_port: return None return port_as_int - - -def validate_path(path: str, has_scheme: bool, has_authority: bool) -> None: - """ - Path validation rules that depend on if the URL contains - a scheme or authority component. - - See https://datatracker.ietf.org/doc/html/rfc3986.html#section-3.3 - """ - if has_authority: - # If a URI contains an authority component, then the path component - # must either be empty or begin with a slash ("/") character." - if path and not path.startswith("/"): - raise InvalidURL("For absolute URLs, path must be empty or begin with '/'") - - if not has_scheme and not has_authority: - # If a URI does not contain an authority component, then the path cannot begin - # with two slash characters ("//"). - if path.startswith("//"): - raise InvalidURL("Relative URLs cannot have a path starting with '//'") - - # In addition, a URI reference (Section 4.1) may be a relative-path reference, - # in which case the first path segment cannot contain a colon (":") character. - if path.startswith(":"): - raise InvalidURL("Relative URLs cannot have a path starting with ':'") diff --git a/src/lib.rs b/src/lib.rs index fc8d4647..a88bd149 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,3 +2,4 @@ mod py_module; mod urls; mod urlparse; mod models; +mod err; diff --git a/src/py_module.rs b/src/py_module.rs index ec99fc7c..891a0f8a 100644 --- a/src/py_module.rs +++ b/src/py_module.rs @@ -4,8 +4,9 @@ use pyo3::prelude::*; mod _httpx { #[pymodule_export] use crate::{ + err::{CookieConflict, InvalidUrl}, models::utils::unquote, - urlparse::{find_ascii_non_printable, normalize_path, quote}, + urlparse::{find_ascii_non_printable, normalize_path, quote, validate_path}, urls::QueryParams, }; } diff --git a/src/urlparse.rs b/src/urlparse.rs index 96720870..3af1191a 100644 --- a/src/urlparse.rs +++ b/src/urlparse.rs @@ -1,5 +1,7 @@ use pyo3::prelude::*; +use crate::err::InvalidUrl; + #[pyfunction] pub fn normalize_path(path: &str) -> String { if !path.contains(".") { @@ -92,3 +94,21 @@ impl PercentEncoded for &str { quote(self, safe) } } + +#[pyfunction] +pub fn validate_path(path: &str, has_scheme: bool, has_authority: bool) -> PyResult<()> { + if has_authority && !path.is_empty() && !path.starts_with('/') { + return Err(InvalidUrl::new("For absolute URLs, path must be empty or begin with '/'").into()); + } + + if !has_scheme && !has_authority { + if path.starts_with("//") { + return Err(InvalidUrl::new("Relative URLs cannot have a path starting with '//'").into()); + } + if path.starts_with(':') { + return Err(InvalidUrl::new("Relative URLs cannot have a path starting with ':'").into()); + } + } + + Ok(()) +}