feat: implement quote function for percent-encoding strings and update imports

This commit is contained in:
Bakyt Niiazaliev 2025-06-13 18:52:07 +07:00
parent 6e4e9d429f
commit 6e38ea9426
4 changed files with 43 additions and 37 deletions

View File

@ -162,5 +162,14 @@ def normalize_path(path: str) -> str:
normalize_path("/path/./to/somewhere/..") == "/path/to"
"""
def quote(string: str, safe: str) -> str:
"""
Use percent-encoding to quote a string, omitting existing '%xx' escape sequences.
def percent_encoded(string: str, safe: str) -> str:...
See: https://www.rfc-editor.org/rfc/rfc3986#section-2.1
* `string`: The string to be percent-escaped.
* `safe`: A string containing characters that may be treated as safe, and do not
need to be escaped. Unreserved characters are always treated as safe.
See: https://www.rfc-editor.org/rfc/rfc3986#section-2.3
"""

View File

@ -25,7 +25,7 @@ import typing
import idna
from ._exceptions import InvalidURL
from ._httpx import normalize_path, percent_encoded
from ._httpx import normalize_path, quote
MAX_URL_LENGTH = 65536
@ -443,36 +443,3 @@ def validate_path(path: str, has_scheme: bool, has_authority: bool) -> None:
# in which case the first path segment cannot contain a colon (":") character.
if path.startswith(":"):
raise InvalidURL("Relative URLs cannot have a path starting with ':'")
def quote(string: str, safe: str) -> str:
"""
Use percent-encoding to quote a string, omitting existing '%xx' escape sequences.
See: https://www.rfc-editor.org/rfc/rfc3986#section-2.1
* `string`: The string to be percent-escaped.
* `safe`: A string containing characters that may be treated as safe, and do not
need to be escaped. Unreserved characters are always treated as safe.
See: https://www.rfc-editor.org/rfc/rfc3986#section-2.3
"""
parts = []
current_position = 0
for match in re.finditer(PERCENT_ENCODED_REGEX, string):
start_position, end_position = match.start(), match.end()
matched_text = match.group(0)
# Add any text up to the '%xx' escape sequence.
if start_position != current_position:
leading_text = string[current_position:start_position]
parts.append(percent_encoded(leading_text, safe=safe))
# Add the '%xx' escape sequence.
parts.append(matched_text)
current_position = end_position
# Add any text after the final '%xx' escape sequence.
if current_position != len(string):
trailing_text = string[current_position:]
parts.append(percent_encoded(trailing_text, safe=safe))
return "".join(parts)

View File

@ -4,7 +4,7 @@ use pyo3::prelude::*;
mod _httpx {
#[pymodule_export]
use crate::{
urlparse::{percent_encoded, normalize_path},
urlparse::{normalize_path, quote},
urls::QueryParams,
};
}

View File

@ -27,7 +27,6 @@ pub fn normalize_path(path: &str) -> String {
const UNRESERVED_CHARS: &[u8] =
b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~";
#[pyfunction]
pub fn percent_encoded(string: &str, safe: &str) -> String {
let safe = safe.as_bytes();
string
@ -41,3 +40,34 @@ pub fn percent_encoded(string: &str, safe: &str) -> String {
})
.collect::<String>()
}
fn is_percent_encoded(s: &[u8]) -> bool {
s.len() == 3 && s[0] == b'%' && s[1].is_ascii_hexdigit() && s[2].is_ascii_hexdigit()
}
#[pyfunction]
pub fn quote(string: &str, safe: &str) -> String {
let s = string.as_bytes();
let mut result = String::with_capacity(s.len());
let mut start = 0;
let mut i = 0;
while i < s.len() {
if i + 2 < s.len() && s[i] == b'%' && is_percent_encoded(&s[i..i + 3]) {
if start < i {
result.push_str(&percent_encoded(&string[start..i], safe));
}
result.push_str(&string[i..i + 3]);
i += 3;
start = i;
} else {
i += 1;
}
}
if start < s.len() {
result.push_str(&percent_encoded(&string[start..], safe));
}
result
}