Code comments for URL model (#1602)

This commit is contained in:
Tom Christie 2021-04-27 11:23:52 +01:00 committed by GitHub
parent e67b0dd15b
commit c927f3e965
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -262,12 +262,20 @@ class URL:
"""
Either `<host>` or `<host>:<port>` as bytes.
Always normalized to lowercase, and IDNA encoded.
The port component is not included if it is the default for an
"http://" or "https://" URL.
This property may be used for generating the value of a request
"Host" header.
See: https://tools.ietf.org/html/rfc3986#section-3.2.3
"""
host = self._uri_reference.host or ""
port = self._uri_reference.port
netloc = host.encode("ascii")
if port:
netloc = netloc + b":" + str(port).encode("ascii")
netloc = netloc + b":" + port.encode("ascii")
return netloc
@property
@ -331,13 +339,13 @@ class URL:
The URL fragments, as used in HTML anchors.
As a string, without the leading '#'.
"""
return self._uri_reference.fragment or ""
return unquote(self._uri_reference.fragment or "")
@property
def raw(self) -> RawURL:
"""
The URL in the raw representation used by the low level
transport API. For example, see `httpcore`.
transport API. See `BaseTransport.handle_request`.
Provides the (scheme, host, port, target) for the outgoing request.
"""
@ -393,6 +401,11 @@ class URL:
"fragment": str,
"params": object,
}
# Step 1
# ======
#
# Perform type checking for all supported keyword arguments.
for key, value in kwargs.items():
if key not in allowed:
message = f"{key!r} is an invalid keyword argument for copy_with()"
@ -403,21 +416,25 @@ class URL:
message = f"Argument {key!r} must be {expected} but got {seen}"
raise TypeError(message)
# Replace username, password, userinfo, host, port, netloc with "authority" for rfc3986
# Step 2
# ======
#
# Consolidate "username", "password", "userinfo", "host", "port" and "netloc"
# into a single "authority" keyword, for `rfc3986`.
if "username" in kwargs or "password" in kwargs:
# Consolidate username and password into userinfo.
# Consolidate "username" and "password" into "userinfo".
username = quote(kwargs.pop("username", self.username) or "")
password = quote(kwargs.pop("password", self.password) or "")
userinfo = f"{username}:{password}" if password else username
kwargs["userinfo"] = userinfo.encode("ascii")
if "host" in kwargs or "port" in kwargs:
# Consolidate host and port into netloc.
# Consolidate "host" and "port" into "netloc".
host = kwargs.pop("host", self.host) or ""
port = kwargs.pop("port", self.port)
if host and ":" in host and host[0] != "[":
# it's an IPv6 address, so it should be hidden under bracket
# IPv6 addresses need to be escaped within sqaure brackets.
host = f"[{host}]"
kwargs["netloc"] = (
@ -427,31 +444,65 @@ class URL:
)
if "userinfo" in kwargs or "netloc" in kwargs:
# Consolidate userinfo and netloc into authority.
# Consolidate "userinfo" and "netloc" into authority.
userinfo = (kwargs.pop("userinfo", self.userinfo) or b"").decode("ascii")
netloc = (kwargs.pop("netloc", self.netloc) or b"").decode("ascii")
authority = f"{userinfo}@{netloc}" if userinfo else netloc
kwargs["authority"] = authority
# Step 3
# ======
#
# Wrangle any "path", "query", "raw_path" and "params" keywords into
# "query" and "path" keywords for `rfc3986`.
if "raw_path" in kwargs:
# If "raw_path" is included, then split it into "path" and "query" components.
raw_path = kwargs.pop("raw_path") or b""
path, has_query, query = raw_path.decode("ascii").partition("?")
kwargs["path"] = path
kwargs["query"] = query if has_query else None
else:
# Ensure path=<url quoted str> for rfc3986
if kwargs.get("path") is not None:
# Ensure `kwargs["path"] = <url quoted str>` for `rfc3986`.
kwargs["path"] = quote(kwargs["path"])
if kwargs.get("query") is not None:
# Ensure query=<str> for rfc3986
# Ensure `kwargs["query"] = <str>` for `rfc3986`.
#
# Note that `.copy_with(query=None)` and `.copy_with(query=b"")`
# are subtly different. The `None` style will not include an empty
# trailing "?" character.
kwargs["query"] = kwargs["query"].decode("ascii")
if "params" in kwargs:
# Replace any "params" keyword with the raw "query" instead.
#
# Ensure that empty params use `kwargs["query"] = None` rather
# than `kwargs["query"] = ""`, so that generated URLs do not
# include an empty trailing "?".
params = kwargs.pop("params")
kwargs["query"] = None if not params else str(QueryParams(params))
# Step 4
# ======
#
# Ensure any fragment component is quoted.
if kwargs.get("fragment") is not None:
kwargs["fragment"] = quote(kwargs["fragment"])
# Step 5
# ======
#
# At this point kwargs may include keys for "scheme", "authority", "path",
# "query" and "fragment". Together these constitute the entire URL.
#
# See https://tools.ietf.org/html/rfc3986#section-3
#
# foo://example.com:8042/over/there?name=ferret#nose
# \_/ \______________/\_________/ \_________/ \__/
# | | | | |
# scheme authority path query fragment
return URL(self._uri_reference.copy_with(**kwargs).unsplit())
def copy_set_param(self, key: str, value: typing.Any = None) -> "URL":
@ -505,6 +556,8 @@ class URL:
class_name = self.__class__.__name__
url_str = str(self)
if self._uri_reference.userinfo:
# Mask any password component in the URL representation, to lower the
# risk of unintended leakage, such as in debug information and logging.
username = quote(self.username)
url_str = (
rfc3986.urlparse(url_str)