httpx/httpcore/models.py

import cgi
import typing

import chardet
import rfc3986

from .config import SSLConfig, TimeoutConfig
from .decoders import (
    ACCEPT_ENCODING,
    SUPPORTED_DECODERS,
    Decoder,
    IdentityDecoder,
    MultiDecoder,
)
from .exceptions import InvalidURL, ResponseClosed, ResponseNotRead, StreamConsumed
from .status_codes import codes
from .utils import (
    get_reason_phrase,
    is_known_encoding,
    normalize_header_key,
    normalize_header_value,
)

URLTypes = typing.Union["URL", str]

HeaderTypes = typing.Union[
    "Headers",
    typing.Dict[typing.AnyStr, typing.AnyStr],
    typing.List[typing.Tuple[typing.AnyStr, typing.AnyStr]],
]

ByteOrByteStream = typing.Union[bytes, typing.AsyncIterator[bytes]]


class URL:
    def __init__(self, url: URLTypes, allow_relative: bool = False) -> None:
        if isinstance(url, str):
            self.components = rfc3986.api.uri_reference(url).normalize()
        elif isinstance(url, rfc3986.uri.URIReference):
            self.components = url
        else:
            self.components = url.components

        if not allow_relative:
            if not self.scheme:
                raise InvalidURL("No scheme included in URL.")
            if self.scheme not in ("http", "https"):
                raise InvalidURL('URL scheme must be "http" or "https".')
            if not self.host:
                raise InvalidURL("No hostname included in URL.")

    @property
    def scheme(self) -> str:
        return self.components.scheme or ""

    @property
    def authority(self) -> str:
        return self.components.authority or ""

    @property
    def path(self) -> str:
        return self.components.path or "/"

    @property
    def query(self) -> str:
        return self.components.query or ""

    @property
    def fragment(self) -> str:
        return self.components.fragment or ""

    @property
    def host(self) -> str:
        return self.components.host or ""

    @property
    def port(self) -> int:
        port = self.components.port
        if port is None:
            return {"https": 443, "http": 80}[self.scheme]
        return port

    @property
    def full_path(self) -> str:
        path = self.path or "/"
        query = self.query
        if query:
            return path + "?" + query
        return path

    @property
    def is_secure(self) -> bool:
        return self.components.scheme == "https"

    @property
    def is_absolute(self) -> bool:
        return self.components.is_absolute()

    @property
    def origin(self) -> "Origin":
        return Origin(self)

    def copy_with(self, **kwargs: typing.Any) -> "URL":
        return URL(self.components.copy_with(**kwargs))

    def resolve_with(self, base_url: URLTypes) -> "URL":
        if isinstance(base_url, URL):
            base_url = base_url.components
        return URL(self.components.resolve_with(base_url))

    def __hash__(self) -> int:
        return hash(str(self))

    def __eq__(self, other: typing.Any) -> bool:
        return isinstance(other, URL) and str(self) == str(other)

    def __str__(self) -> str:
        return self.components.unsplit()

    def __repr__(self) -> str:
        class_name = self.__class__.__name__
        url_str = str(self)
        return f"{class_name}({url_str!r})"


class Origin:
    def __init__(self, url: URLTypes) -> None:
        if not isinstance(url, URL):
            url = URL(url)
        self.is_ssl = url.scheme == "https"
        self.host = url.host
        self.port = url.port

    def __eq__(self, other: typing.Any) -> bool:
        return (
            isinstance(other, self.__class__)
            and self.is_ssl == other.is_ssl
            and self.host == other.host
            and self.port == other.port
        )

    def __hash__(self) -> int:
        return hash((self.is_ssl, self.host, self.port))


class Headers(typing.MutableMapping[str, str]):
    """
    A case-insensitive multidict.
    """

    def __init__(self, headers: HeaderTypes = None, encoding: str = None) -> None:
        if headers is None:
            self._list = []  # type: typing.List[typing.Tuple[bytes, bytes]]
        elif isinstance(headers, Headers):
            self._list = list(headers.raw)
        elif isinstance(headers, dict):
            self._list = [
                (normalize_header_key(k, encoding), normalize_header_value(v, encoding))
                for k, v in headers.items()
            ]
        else:
            self._list = [
                (normalize_header_key(k, encoding), normalize_header_value(v, encoding))
                for k, v in headers
            ]
        self._encoding = encoding

    @property
    def encoding(self) -> str:
        """
        Header encoding is mandated as ascii, but utf-8 or iso-8859-1 may be
        seen in the wild.
        """
        if self._encoding is None:
            for encoding in ["ascii", "utf-8"]:
                for key, value in self.raw:
                    try:
                        key.decode(encoding)
                        value.decode(encoding)
                    except UnicodeDecodeError:
                        break
                else:
                    # The else block runs if 'break' did not occur, meaning
                    # all values fitted the encoding.
                    self._encoding = encoding
                    break
            else:
                # The ISO-8859-1 encoding covers all 256 code points in a byte,
                # so will never raise decode errors.
                self._encoding = "iso-8859-1"
        return self._encoding

    @encoding.setter
    def encoding(self, value: str) -> None:
        self._encoding = value

    @property
    def raw(self) -> typing.List[typing.Tuple[bytes, bytes]]:
        """
        Returns a list of the raw header items, as byte pairs.
        May be mutated in-place.
        """
        return self._list

    def keys(self) -> typing.List[str]:  # type: ignore
        return [key.decode(self.encoding) for key, value in self._list]

    def values(self) -> typing.List[str]:  # type: ignore
        return [value.decode(self.encoding) for key, value in self._list]

    def items(self) -> typing.List[typing.Tuple[str, str]]:  # type: ignore
        return [
            (key.decode(self.encoding), value.decode(self.encoding))
            for key, value in self._list
        ]

    def get(self, key: str, default: typing.Any = None) -> typing.Any:
        try:
            return self[key]
        except KeyError:
            return default

    def getlist(self, key: str, split_commas: bool = False) -> typing.List[str]:
        """
        Return multiple header values.
        """
        get_header_key = key.lower().encode(self.encoding)

        values = [
            item_value.decode(self.encoding)
            for item_key, item_value in self._list
            if item_key == get_header_key
        ]

        if not split_commas:
            return values

        split_values = []
        for value in values:
            split_values.extend([item.strip() for item in value.split(",")])
        return split_values

    def __getitem__(self, key: str) -> str:
        """
        Return a single header value.

        If there are multiple headers with the same key, then we concatenate
        them with commas. See: https://tools.ietf.org/html/rfc7230#section-3.2.2
        """
        normalized_key = key.lower().encode(self.encoding)

        items = []
        for header_key, header_value in self._list:
            if header_key == normalized_key:
                items.append(header_value.decode(self.encoding))

        if items:
            return ", ".join(items)

        raise KeyError(key)

    def __setitem__(self, key: str, value: str) -> None:
        """
        Set the header `key` to `value`, removing any duplicate entries.
        Retains insertion order.
        """
        set_key = key.lower().encode(self.encoding)
        set_value = value.encode(self.encoding)

        found_indexes = []
        for idx, (item_key, item_value) in enumerate(self._list):
            if item_key == set_key:
                found_indexes.append(idx)

        for idx in reversed(found_indexes[1:]):
            del self._list[idx]

        if found_indexes:
            idx = found_indexes[0]
            self._list[idx] = (set_key, set_value)
        else:
            self._list.append((set_key, set_value))

    def __delitem__(self, key: str) -> None:
        """
        Remove the header `key`.
        """
        del_key = key.lower().encode(self.encoding)

        pop_indexes = []
        for idx, (item_key, item_value) in enumerate(self._list):
            if item_key == del_key:
                pop_indexes.append(idx)

        for idx in reversed(pop_indexes):
            del self._list[idx]

    def __contains__(self, key: typing.Any) -> bool:
        get_header_key = key.lower().encode(self.encoding)
        for header_key, header_value in self._list:
            if header_key == get_header_key:
                return True
        return False

    def __iter__(self) -> typing.Iterator[typing.Any]:
        return iter(self.keys())

    def __len__(self) -> int:
        return len(self._list)

    def __eq__(self, other: typing.Any) -> bool:
        if not isinstance(other, Headers):
            return False
        return sorted(self._list) == sorted(other._list)

    def __repr__(self) -> str:
        class_name = self.__class__.__name__

        encoding_str = ""
        if self.encoding != "ascii":
            encoding_str = f", encoding={self.encoding!r}"

        as_dict = dict(self.items())
        if len(as_dict) == len(self):
            return f"{class_name}({as_dict!r}{encoding_str})"
        as_list = self.items()
        return f"{class_name}({as_list!r}{encoding_str})"


class Request:
    def __init__(
        self,
        method: str,
        url: typing.Union[str, URL],
        *,
        headers: HeaderTypes = None,
        content: ByteOrByteStream = b"",
    ):
        self.method = method.upper()
        self.url = URL(url) if isinstance(url, str) else url
        if isinstance(content, bytes):
            self.is_streaming = False
            self.content = content
        else:
            self.is_streaming = True
            self.content_aiter = content
        self.headers = Headers(headers)

    async def read(self) -> bytes:
        """
        Read and return the response content.
        """
        if not hasattr(self, "content"):
            content = b""
            async for part in self.stream():
                content += part
            self.content = content
        return self.content

    async def stream(self) -> typing.AsyncIterator[bytes]:
        if self.is_streaming:
            async for part in self.content_aiter:
                yield part
        elif self.content:
            yield self.content

    def prepare(self) -> None:
        """
        Adds in any default headers. When using the `Client`, this will
        end up being called into by the `prepare_request()` stage.

        You can omit this behavior by calling `Client.send()` with an
        explicitly built `Request` instance.
        """
        auto_headers = []  # type: typing.List[typing.Tuple[bytes, bytes]]

        has_host = "host" in self.headers
        has_content_length = (
            "content-length" in self.headers or "transfer-encoding" in self.headers
        )
        has_accept_encoding = "accept-encoding" in self.headers

        if not has_host and self.url.authority:
            auto_headers.append((b"host", self.url.authority.encode("ascii")))
        if not has_content_length:
            if self.is_streaming:
                auto_headers.append((b"transfer-encoding", b"chunked"))
            elif self.content:
                content_length = str(len(self.content)).encode()
                auto_headers.append((b"content-length", content_length))
        if not has_accept_encoding:
            auto_headers.append((b"accept-encoding", ACCEPT_ENCODING.encode()))

        for item in reversed(auto_headers):
            self.headers.raw.insert(0, item)


class Response:
    def __init__(
        self,
        status_code: int,
        *,
        reason_phrase: str = None,
        protocol: str = None,
        headers: HeaderTypes = None,
        content: ByteOrByteStream = b"",
        on_close: typing.Callable = None,
        request: Request = None,
        history: typing.List["Response"] = None,
    ):
        self.status_code = status_code
        self.reason_phrase = reason_phrase or get_reason_phrase(status_code)
        self.protocol = protocol
        self.headers = Headers(headers)

        if isinstance(content, bytes):
            self.is_closed = True
            self.is_stream_consumed = True
            self._raw_content = content
        else:
            self.is_closed = False
            self.is_stream_consumed = False
            self._raw_stream = content

        self.on_close = on_close
        self.request = request
        self.history = [] if history is None else list(history)
        self.next = None  # typing.Optional[typing.Callable]

    @property
    def url(self) -> typing.Optional[URL]:
        """
        Returns the URL for which the request was made.

        Requires that `request` was provided when instantiating the response.
        """
        return None if self.request is None else self.request.url

    @property
    def content(self) -> bytes:
        if not hasattr(self, "_content"):
            if hasattr(self, "_raw_content"):
                content = self.decoder.decode(self._raw_content)
                content += self.decoder.flush()
                self._content = content
            else:
                raise ResponseNotRead()
        return self._content

    @property
    def text(self) -> str:
        if not hasattr(self, "_text"):
            content = self.content
            if not content:
                self._text = ""
            else:
                encoding = self.encoding
                self._text = content.decode(encoding, errors="replace")
        return self._text

    @property
    def encoding(self) -> str:
        if not hasattr(self, "_encoding"):
            encoding = self.charset_encoding
            if encoding is None or not is_known_encoding(encoding):
                encoding = self.apparent_encoding
                if encoding is None or not is_known_encoding(encoding):
                    encoding = "utf-8"
            self._encoding = encoding
        return self._encoding

    @encoding.setter
    def encoding(self, value: str) -> None:
        self._encoding = value

    @property
    def charset_encoding(self) -> typing.Optional[str]:
        """
        Return the encoding, as specified by the Content-Type header.
        """
        content_type = self.headers.get("Content-Type")
        if content_type is None:
            return None

        parsed = cgi.parse_header(content_type)[-1]
        return parsed.get("charset")

    @property
    def apparent_encoding(self) -> typing.Optional[str]:
        """
        Return the encoding, as it appears to autodetection.
        """
        return chardet.detect(self.content)["encoding"]

    @property
    def decoder(self) -> Decoder:
        """
        Returns a decoder instance which can be used to decode the raw byte
        content, depending on the Content-Encoding used in the response.
        """
        if not hasattr(self, "_decoder"):
            decoders = []  # type: typing.List[Decoder]
            values = self.headers.getlist("content-encoding", split_commas=True)
            for value in values:
                value = value.strip().lower()
                decoder_cls = SUPPORTED_DECODERS[value]
                decoders.append(decoder_cls())

            if len(decoders) == 1:
                self._decoder = decoders[0]
            elif len(decoders) > 1:
                self._decoder = MultiDecoder(decoders)
            else:
                self._decoder = IdentityDecoder()

        return self._decoder

    async def read(self) -> bytes:
        """
        Read and return the response content.
        """
        if not hasattr(self, "_content"):
            content = b""
            async for part in self.stream():
                content += part
            self._content = content
        return self._content

    async def stream(self) -> typing.AsyncIterator[bytes]:
        """
        A byte-iterator over the decoded response content.
        This allows us to handle gzip, deflate, and brotli encoded responses.
        """
        if hasattr(self, "_content"):
            yield self._content
        else:
            async for chunk in self.raw():
                yield self.decoder.decode(chunk)
            yield self.decoder.flush()

    async def raw(self) -> typing.AsyncIterator[bytes]:
        """
        A byte-iterator over the raw response content.
        """
        if hasattr(self, "_raw_content"):
            yield self._raw_content
        else:
            if self.is_stream_consumed:
                raise StreamConsumed()
            if self.is_closed:
                raise ResponseClosed()

            self.is_stream_consumed = True
            async for part in self._raw_stream:
                yield part
            await self.close()

    async def close(self) -> None:
        """
        Close the response and release the connection.
        Automatically called if the response body is read to completion.
        """
        if not self.is_closed:
            self.is_closed = True
            if self.on_close is not None:
                await self.on_close()

    @property
    def is_redirect(self) -> bool:
        return (
            self.status_code
            in (
                codes.moved_permanently,
                codes.found,
                codes.see_other,
                codes.temporary_redirect,
                codes.permanent_redirect,
            )
            and "location" in self.headers
        )