feat: implement validate_path function for URL path validation
This commit is contained in:
parent
3fcb66f44e
commit
43a7852f6e
@ -175,7 +175,28 @@ def quote(string: str, safe: str) -> str:
|
||||
|
||||
def unquote(value: str) -> str: ...
|
||||
def find_ascii_non_printable(s: str) -> typing.Optional[int]: ...
|
||||
def validate_path(path: str, has_scheme: bool, has_authority: bool) -> None: ...
|
||||
def validate_path(path: str, has_scheme: bool, has_authority: bool) -> None:
|
||||
"""
|
||||
Path validation rules that depend on if the URL contains
|
||||
a scheme or authority component.
|
||||
|
||||
See https://datatracker.ietf.org/doc/html/rfc3986.html#section-3.3
|
||||
|
||||
---
|
||||
|
||||
If a URI contains an authority component, then the path component
|
||||
must either be empty or begin with a slash ("/") character."
|
||||
|
||||
---
|
||||
|
||||
If a URI does not contain an authority component, then the path cannot begin
|
||||
with two slash characters ("//").
|
||||
|
||||
---
|
||||
|
||||
In addition, a URI reference (Section 4.1) may be a relative-path reference,
|
||||
in which case the first path segment cannot contain a colon (":") character.
|
||||
"""
|
||||
|
||||
class InvalidURL(Exception):
|
||||
def __init__(self, message: str) -> None: ...
|
||||
|
||||
@ -25,7 +25,7 @@ import typing
|
||||
import idna
|
||||
|
||||
from ._exceptions import InvalidURL
|
||||
from ._httpx import find_ascii_non_printable, normalize_path, quote
|
||||
from ._httpx import find_ascii_non_printable, normalize_path, quote, validate_path
|
||||
|
||||
MAX_URL_LENGTH = 65536
|
||||
|
||||
@ -413,28 +413,3 @@ def normalize_port(port: str | int | None, scheme: str) -> int | None:
|
||||
if port_as_int == default_port:
|
||||
return None
|
||||
return port_as_int
|
||||
|
||||
|
||||
def validate_path(path: str, has_scheme: bool, has_authority: bool) -> None:
|
||||
"""
|
||||
Path validation rules that depend on if the URL contains
|
||||
a scheme or authority component.
|
||||
|
||||
See https://datatracker.ietf.org/doc/html/rfc3986.html#section-3.3
|
||||
"""
|
||||
if has_authority:
|
||||
# If a URI contains an authority component, then the path component
|
||||
# must either be empty or begin with a slash ("/") character."
|
||||
if path and not path.startswith("/"):
|
||||
raise InvalidURL("For absolute URLs, path must be empty or begin with '/'")
|
||||
|
||||
if not has_scheme and not has_authority:
|
||||
# If a URI does not contain an authority component, then the path cannot begin
|
||||
# with two slash characters ("//").
|
||||
if path.startswith("//"):
|
||||
raise InvalidURL("Relative URLs cannot have a path starting with '//'")
|
||||
|
||||
# In addition, a URI reference (Section 4.1) may be a relative-path reference,
|
||||
# in which case the first path segment cannot contain a colon (":") character.
|
||||
if path.startswith(":"):
|
||||
raise InvalidURL("Relative URLs cannot have a path starting with ':'")
|
||||
|
||||
@ -2,3 +2,4 @@ mod py_module;
|
||||
mod urls;
|
||||
mod urlparse;
|
||||
mod models;
|
||||
mod err;
|
||||
|
||||
@ -4,8 +4,9 @@ use pyo3::prelude::*;
|
||||
mod _httpx {
|
||||
#[pymodule_export]
|
||||
use crate::{
|
||||
err::{CookieConflict, InvalidUrl},
|
||||
models::utils::unquote,
|
||||
urlparse::{find_ascii_non_printable, normalize_path, quote},
|
||||
urlparse::{find_ascii_non_printable, normalize_path, quote, validate_path},
|
||||
urls::QueryParams,
|
||||
};
|
||||
}
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
use pyo3::prelude::*;
|
||||
|
||||
use crate::err::InvalidUrl;
|
||||
|
||||
#[pyfunction]
|
||||
pub fn normalize_path(path: &str) -> String {
|
||||
if !path.contains(".") {
|
||||
@ -92,3 +94,21 @@ impl PercentEncoded for &str {
|
||||
quote(self, safe)
|
||||
}
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn validate_path(path: &str, has_scheme: bool, has_authority: bool) -> PyResult<()> {
|
||||
if has_authority && !path.is_empty() && !path.starts_with('/') {
|
||||
return Err(InvalidUrl::new("For absolute URLs, path must be empty or begin with '/'").into());
|
||||
}
|
||||
|
||||
if !has_scheme && !has_authority {
|
||||
if path.starts_with("//") {
|
||||
return Err(InvalidUrl::new("Relative URLs cannot have a path starting with '//'").into());
|
||||
}
|
||||
if path.starts_with(':') {
|
||||
return Err(InvalidUrl::new("Relative URLs cannot have a path starting with ':'").into());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user