feat: implement validate_path function for URL path validation

This commit is contained in:
Bakyt Niiazaliev 2025-07-07 20:25:51 +07:00
parent 3fcb66f44e
commit 43a7852f6e
5 changed files with 46 additions and 28 deletions

View File

@ -175,7 +175,28 @@ def quote(string: str, safe: str) -> str:
def unquote(value: str) -> str: ...
def find_ascii_non_printable(s: str) -> typing.Optional[int]: ...
def validate_path(path: str, has_scheme: bool, has_authority: bool) -> None: ...
def validate_path(path: str, has_scheme: bool, has_authority: bool) -> None:
"""
Path validation rules that depend on if the URL contains
a scheme or authority component.
See https://datatracker.ietf.org/doc/html/rfc3986.html#section-3.3
---
If a URI contains an authority component, then the path component
must either be empty or begin with a slash ("/") character."
---
If a URI does not contain an authority component, then the path cannot begin
with two slash characters ("//").
---
In addition, a URI reference (Section 4.1) may be a relative-path reference,
in which case the first path segment cannot contain a colon (":") character.
"""
class InvalidURL(Exception):
def __init__(self, message: str) -> None: ...

View File

@ -25,7 +25,7 @@ import typing
import idna
from ._exceptions import InvalidURL
from ._httpx import find_ascii_non_printable, normalize_path, quote
from ._httpx import find_ascii_non_printable, normalize_path, quote, validate_path
MAX_URL_LENGTH = 65536
@ -413,28 +413,3 @@ def normalize_port(port: str | int | None, scheme: str) -> int | None:
if port_as_int == default_port:
return None
return port_as_int
def validate_path(path: str, has_scheme: bool, has_authority: bool) -> None:
"""
Path validation rules that depend on if the URL contains
a scheme or authority component.
See https://datatracker.ietf.org/doc/html/rfc3986.html#section-3.3
"""
if has_authority:
# If a URI contains an authority component, then the path component
# must either be empty or begin with a slash ("/") character."
if path and not path.startswith("/"):
raise InvalidURL("For absolute URLs, path must be empty or begin with '/'")
if not has_scheme and not has_authority:
# If a URI does not contain an authority component, then the path cannot begin
# with two slash characters ("//").
if path.startswith("//"):
raise InvalidURL("Relative URLs cannot have a path starting with '//'")
# In addition, a URI reference (Section 4.1) may be a relative-path reference,
# in which case the first path segment cannot contain a colon (":") character.
if path.startswith(":"):
raise InvalidURL("Relative URLs cannot have a path starting with ':'")

View File

@ -2,3 +2,4 @@ mod py_module;
mod urls;
mod urlparse;
mod models;
mod err;

View File

@ -4,8 +4,9 @@ use pyo3::prelude::*;
mod _httpx {
#[pymodule_export]
use crate::{
err::{CookieConflict, InvalidUrl},
models::utils::unquote,
urlparse::{find_ascii_non_printable, normalize_path, quote},
urlparse::{find_ascii_non_printable, normalize_path, quote, validate_path},
urls::QueryParams,
};
}

View File

@ -1,5 +1,7 @@
use pyo3::prelude::*;
use crate::err::InvalidUrl;
#[pyfunction]
pub fn normalize_path(path: &str) -> String {
if !path.contains(".") {
@ -92,3 +94,21 @@ impl PercentEncoded for &str {
quote(self, safe)
}
}
#[pyfunction]
pub fn validate_path(path: &str, has_scheme: bool, has_authority: bool) -> PyResult<()> {
if has_authority && !path.is_empty() && !path.starts_with('/') {
return Err(InvalidUrl::new("For absolute URLs, path must be empty or begin with '/'").into());
}
if !has_scheme && !has_authority {
if path.starts_with("//") {
return Err(InvalidUrl::new("Relative URLs cannot have a path starting with '//'").into());
}
if path.starts_with(':') {
return Err(InvalidUrl::new("Relative URLs cannot have a path starting with ':'").into());
}
}
Ok(())
}