Code comments for URL model (#1602)
This commit is contained in:
parent
e67b0dd15b
commit
c927f3e965
@ -262,12 +262,20 @@ class URL:
|
||||
"""
|
||||
Either `<host>` or `<host>:<port>` as bytes.
|
||||
Always normalized to lowercase, and IDNA encoded.
|
||||
|
||||
The port component is not included if it is the default for an
|
||||
"http://" or "https://" URL.
|
||||
|
||||
This property may be used for generating the value of a request
|
||||
"Host" header.
|
||||
|
||||
See: https://tools.ietf.org/html/rfc3986#section-3.2.3
|
||||
"""
|
||||
host = self._uri_reference.host or ""
|
||||
port = self._uri_reference.port
|
||||
netloc = host.encode("ascii")
|
||||
if port:
|
||||
netloc = netloc + b":" + str(port).encode("ascii")
|
||||
netloc = netloc + b":" + port.encode("ascii")
|
||||
return netloc
|
||||
|
||||
@property
|
||||
@ -331,13 +339,13 @@ class URL:
|
||||
The URL fragments, as used in HTML anchors.
|
||||
As a string, without the leading '#'.
|
||||
"""
|
||||
return self._uri_reference.fragment or ""
|
||||
return unquote(self._uri_reference.fragment or "")
|
||||
|
||||
@property
|
||||
def raw(self) -> RawURL:
|
||||
"""
|
||||
The URL in the raw representation used by the low level
|
||||
transport API. For example, see `httpcore`.
|
||||
transport API. See `BaseTransport.handle_request`.
|
||||
|
||||
Provides the (scheme, host, port, target) for the outgoing request.
|
||||
"""
|
||||
@ -393,6 +401,11 @@ class URL:
|
||||
"fragment": str,
|
||||
"params": object,
|
||||
}
|
||||
|
||||
# Step 1
|
||||
# ======
|
||||
#
|
||||
# Perform type checking for all supported keyword arguments.
|
||||
for key, value in kwargs.items():
|
||||
if key not in allowed:
|
||||
message = f"{key!r} is an invalid keyword argument for copy_with()"
|
||||
@ -403,21 +416,25 @@ class URL:
|
||||
message = f"Argument {key!r} must be {expected} but got {seen}"
|
||||
raise TypeError(message)
|
||||
|
||||
# Replace username, password, userinfo, host, port, netloc with "authority" for rfc3986
|
||||
# Step 2
|
||||
# ======
|
||||
#
|
||||
# Consolidate "username", "password", "userinfo", "host", "port" and "netloc"
|
||||
# into a single "authority" keyword, for `rfc3986`.
|
||||
if "username" in kwargs or "password" in kwargs:
|
||||
# Consolidate username and password into userinfo.
|
||||
# Consolidate "username" and "password" into "userinfo".
|
||||
username = quote(kwargs.pop("username", self.username) or "")
|
||||
password = quote(kwargs.pop("password", self.password) or "")
|
||||
userinfo = f"{username}:{password}" if password else username
|
||||
kwargs["userinfo"] = userinfo.encode("ascii")
|
||||
|
||||
if "host" in kwargs or "port" in kwargs:
|
||||
# Consolidate host and port into netloc.
|
||||
# Consolidate "host" and "port" into "netloc".
|
||||
host = kwargs.pop("host", self.host) or ""
|
||||
port = kwargs.pop("port", self.port)
|
||||
|
||||
if host and ":" in host and host[0] != "[":
|
||||
# it's an IPv6 address, so it should be hidden under bracket
|
||||
# IPv6 addresses need to be escaped within sqaure brackets.
|
||||
host = f"[{host}]"
|
||||
|
||||
kwargs["netloc"] = (
|
||||
@ -427,31 +444,65 @@ class URL:
|
||||
)
|
||||
|
||||
if "userinfo" in kwargs or "netloc" in kwargs:
|
||||
# Consolidate userinfo and netloc into authority.
|
||||
# Consolidate "userinfo" and "netloc" into authority.
|
||||
userinfo = (kwargs.pop("userinfo", self.userinfo) or b"").decode("ascii")
|
||||
netloc = (kwargs.pop("netloc", self.netloc) or b"").decode("ascii")
|
||||
authority = f"{userinfo}@{netloc}" if userinfo else netloc
|
||||
kwargs["authority"] = authority
|
||||
|
||||
# Step 3
|
||||
# ======
|
||||
#
|
||||
# Wrangle any "path", "query", "raw_path" and "params" keywords into
|
||||
# "query" and "path" keywords for `rfc3986`.
|
||||
if "raw_path" in kwargs:
|
||||
# If "raw_path" is included, then split it into "path" and "query" components.
|
||||
raw_path = kwargs.pop("raw_path") or b""
|
||||
path, has_query, query = raw_path.decode("ascii").partition("?")
|
||||
kwargs["path"] = path
|
||||
kwargs["query"] = query if has_query else None
|
||||
|
||||
else:
|
||||
# Ensure path=<url quoted str> for rfc3986
|
||||
if kwargs.get("path") is not None:
|
||||
# Ensure `kwargs["path"] = <url quoted str>` for `rfc3986`.
|
||||
kwargs["path"] = quote(kwargs["path"])
|
||||
|
||||
if kwargs.get("query") is not None:
|
||||
# Ensure query=<str> for rfc3986
|
||||
# Ensure `kwargs["query"] = <str>` for `rfc3986`.
|
||||
#
|
||||
# Note that `.copy_with(query=None)` and `.copy_with(query=b"")`
|
||||
# are subtly different. The `None` style will not include an empty
|
||||
# trailing "?" character.
|
||||
kwargs["query"] = kwargs["query"].decode("ascii")
|
||||
|
||||
if "params" in kwargs:
|
||||
# Replace any "params" keyword with the raw "query" instead.
|
||||
#
|
||||
# Ensure that empty params use `kwargs["query"] = None` rather
|
||||
# than `kwargs["query"] = ""`, so that generated URLs do not
|
||||
# include an empty trailing "?".
|
||||
params = kwargs.pop("params")
|
||||
kwargs["query"] = None if not params else str(QueryParams(params))
|
||||
|
||||
# Step 4
|
||||
# ======
|
||||
#
|
||||
# Ensure any fragment component is quoted.
|
||||
if kwargs.get("fragment") is not None:
|
||||
kwargs["fragment"] = quote(kwargs["fragment"])
|
||||
|
||||
# Step 5
|
||||
# ======
|
||||
#
|
||||
# At this point kwargs may include keys for "scheme", "authority", "path",
|
||||
# "query" and "fragment". Together these constitute the entire URL.
|
||||
#
|
||||
# See https://tools.ietf.org/html/rfc3986#section-3
|
||||
#
|
||||
# foo://example.com:8042/over/there?name=ferret#nose
|
||||
# \_/ \______________/\_________/ \_________/ \__/
|
||||
# | | | | |
|
||||
# scheme authority path query fragment
|
||||
return URL(self._uri_reference.copy_with(**kwargs).unsplit())
|
||||
|
||||
def copy_set_param(self, key: str, value: typing.Any = None) -> "URL":
|
||||
@ -505,6 +556,8 @@ class URL:
|
||||
class_name = self.__class__.__name__
|
||||
url_str = str(self)
|
||||
if self._uri_reference.userinfo:
|
||||
# Mask any password component in the URL representation, to lower the
|
||||
# risk of unintended leakage, such as in debug information and logging.
|
||||
username = quote(self.username)
|
||||
url_str = (
|
||||
rfc3986.urlparse(url_str)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user