From 6e38ea942667a02a7c2219884e420b7932246764 Mon Sep 17 00:00:00 2001 From: Bakyt Niiazaliev Date: Fri, 13 Jun 2025 18:52:07 +0700 Subject: [PATCH] feat: implement quote function for percent-encoding strings and update imports --- python/httpx/_httpx/__init__.pyi | 11 +++++++++- python/httpx/_urlparse.py | 35 +------------------------------- src/py_module.rs | 2 +- src/urlparse.rs | 32 ++++++++++++++++++++++++++++- 4 files changed, 43 insertions(+), 37 deletions(-) diff --git a/python/httpx/_httpx/__init__.pyi b/python/httpx/_httpx/__init__.pyi index 7bb65339..e0a0a866 100644 --- a/python/httpx/_httpx/__init__.pyi +++ b/python/httpx/_httpx/__init__.pyi @@ -162,5 +162,14 @@ def normalize_path(path: str) -> str: normalize_path("/path/./to/somewhere/..") == "/path/to" """ +def quote(string: str, safe: str) -> str: + """ + Use percent-encoding to quote a string, omitting existing '%xx' escape sequences. -def percent_encoded(string: str, safe: str) -> str:... + See: https://www.rfc-editor.org/rfc/rfc3986#section-2.1 + + * `string`: The string to be percent-escaped. + * `safe`: A string containing characters that may be treated as safe, and do not + need to be escaped. Unreserved characters are always treated as safe. + See: https://www.rfc-editor.org/rfc/rfc3986#section-2.3 + """ diff --git a/python/httpx/_urlparse.py b/python/httpx/_urlparse.py index 21d9483b..8d0f6429 100644 --- a/python/httpx/_urlparse.py +++ b/python/httpx/_urlparse.py @@ -25,7 +25,7 @@ import typing import idna from ._exceptions import InvalidURL -from ._httpx import normalize_path, percent_encoded +from ._httpx import normalize_path, quote MAX_URL_LENGTH = 65536 @@ -443,36 +443,3 @@ def validate_path(path: str, has_scheme: bool, has_authority: bool) -> None: # in which case the first path segment cannot contain a colon (":") character. if path.startswith(":"): raise InvalidURL("Relative URLs cannot have a path starting with ':'") - - -def quote(string: str, safe: str) -> str: - """ - Use percent-encoding to quote a string, omitting existing '%xx' escape sequences. - - See: https://www.rfc-editor.org/rfc/rfc3986#section-2.1 - - * `string`: The string to be percent-escaped. - * `safe`: A string containing characters that may be treated as safe, and do not - need to be escaped. Unreserved characters are always treated as safe. - See: https://www.rfc-editor.org/rfc/rfc3986#section-2.3 - """ - parts = [] - current_position = 0 - for match in re.finditer(PERCENT_ENCODED_REGEX, string): - start_position, end_position = match.start(), match.end() - matched_text = match.group(0) - # Add any text up to the '%xx' escape sequence. - if start_position != current_position: - leading_text = string[current_position:start_position] - parts.append(percent_encoded(leading_text, safe=safe)) - - # Add the '%xx' escape sequence. - parts.append(matched_text) - current_position = end_position - - # Add any text after the final '%xx' escape sequence. - if current_position != len(string): - trailing_text = string[current_position:] - parts.append(percent_encoded(trailing_text, safe=safe)) - - return "".join(parts) diff --git a/src/py_module.rs b/src/py_module.rs index dabc46fa..a4c8ec26 100644 --- a/src/py_module.rs +++ b/src/py_module.rs @@ -4,7 +4,7 @@ use pyo3::prelude::*; mod _httpx { #[pymodule_export] use crate::{ - urlparse::{percent_encoded, normalize_path}, + urlparse::{normalize_path, quote}, urls::QueryParams, }; } diff --git a/src/urlparse.rs b/src/urlparse.rs index 621fb6e0..9bb46c56 100644 --- a/src/urlparse.rs +++ b/src/urlparse.rs @@ -27,7 +27,6 @@ pub fn normalize_path(path: &str) -> String { const UNRESERVED_CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"; -#[pyfunction] pub fn percent_encoded(string: &str, safe: &str) -> String { let safe = safe.as_bytes(); string @@ -41,3 +40,34 @@ pub fn percent_encoded(string: &str, safe: &str) -> String { }) .collect::() } + +fn is_percent_encoded(s: &[u8]) -> bool { + s.len() == 3 && s[0] == b'%' && s[1].is_ascii_hexdigit() && s[2].is_ascii_hexdigit() +} + +#[pyfunction] +pub fn quote(string: &str, safe: &str) -> String { + let s = string.as_bytes(); + let mut result = String::with_capacity(s.len()); + + let mut start = 0; + let mut i = 0; + while i < s.len() { + if i + 2 < s.len() && s[i] == b'%' && is_percent_encoded(&s[i..i + 3]) { + if start < i { + result.push_str(&percent_encoded(&string[start..i], safe)); + } + result.push_str(&string[i..i + 3]); + i += 3; + start = i; + } else { + i += 1; + } + } + + if start < s.len() { + result.push_str(&percent_encoded(&string[start..], safe)); + } + + result +}