From 11e6f9860a86b11a80cd0fac35ed3943a81ff520 Mon Sep 17 00:00:00 2001 From: Julius Park Date: Wed, 20 Oct 2021 13:39:32 -0700 Subject: [PATCH] PYTHON-1579 Update URI parser to adhere to new connection string spec (#755) --- .evergreen/config.yml | 4 +- doc/changelog.rst | 13 ++++- doc/examples/authentication.rst | 6 +-- doc/migrate-to-pymongo4.rst | 19 ++++++- pymongo/auth.py | 3 -- pymongo/mongo_client.py | 2 +- pymongo/uri_parser.py | 37 ++++++++++--- test/connection_string/test/invalid-uris.json | 54 +++++++++++++++---- test/connection_string/test/valid-auth.json | 21 ++++++++ .../test/valid-host_identifiers.json | 6 +-- .../test/valid-warnings.json | 30 +++++++++++ test/test_ssl.py | 8 +-- test/test_uri_parser.py | 2 +- test/test_uri_spec.py | 3 +- 14 files changed, 173 insertions(+), 35 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index f10652818..363b15ff5 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -523,7 +523,7 @@ functions: silent: true script: | cat <<'EOF' > "${PROJECT_DIRECTORY}/prepare_mongodb_aws.sh" - alias urlencode='${python3_binary} -c "import sys, urllib.parse as ulp; sys.stdout.write(ulp.quote_plus(sys.argv[1]))"' + alias urlencode='${python3_binary} -c "import sys, urllib.parse as ulp; sys.stdout.write(ulp.quote(sys.argv[1]))"' USER=$(urlencode ${iam_auth_ecs_account}) PASS=$(urlencode ${iam_auth_ecs_secret_access_key}) MONGODB_URI="mongodb://$USER:$PASS@localhost" @@ -554,7 +554,7 @@ functions: script: | # DO NOT ECHO WITH XTRACE (which PREPARE_SHELL does) cat <<'EOF' > "${PROJECT_DIRECTORY}/prepare_mongodb_aws.sh" - alias urlencode='${python3_binary} -c "import sys, urllib.parse as ulp; sys.stdout.write(ulp.quote_plus(sys.argv[1]))"' + alias urlencode='${python3_binary} -c "import sys, urllib.parse as ulp; sys.stdout.write(ulp.quote(sys.argv[1]))"' alias jsonkey='${python3_binary} -c "import json,sys;sys.stdout.write(json.load(sys.stdin)[sys.argv[1]])" < ${DRIVERS_TOOLS}/.evergreen/auth_aws/creds.json' USER=$(jsonkey AccessKeyId) USER=$(urlencode $USER) diff --git a/doc/changelog.rst b/doc/changelog.rst index 8dfe4f750..1213634ce 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -160,8 +160,19 @@ Breaking Changes in 4.0 are passed to the server as-is rather than the previous behavior which substituted in a projection of ``{"_id": 1}``. This means that an empty projection will now return the entire document, not just the ``"_id"`` field. -- ``MongoClient()`` now raises a :exc:`~pymongo.errors.ConfigurationError` +- :class:`~pymongo.mongo_client.MongoClient` now raises a :exc:`~pymongo.errors.ConfigurationError` when more than one URI is passed into the ``hosts`` argument. +- :class:`~pymongo.mongo_client.MongoClient`` now raises an + :exc:`~pymongo.errors.InvalidURI` exception + when it encounters unescaped percent signs in username and password when + parsing MongoDB URIs. +- :class:`~pymongo.mongo_client.MongoClient` now uses + :py::func:`urllib.parse.unquote` rather than + :py:func:`urllib.parse.unquote_plus`, + meaning that plus signs ("+") are no longer converted to spaces (" "). This + means that if you were previously quoting your login information using + quote_plus, you must now switch to quote. Additionally, be aware that this + change only occurs when parsing login information from the URI. Notable improvements .................... diff --git a/doc/examples/authentication.rst b/doc/examples/authentication.rst index d0a1fba15..bd6036e46 100644 --- a/doc/examples/authentication.rst +++ b/doc/examples/authentication.rst @@ -11,14 +11,14 @@ Percent-Escaping Username and Password -------------------------------------- Username and password must be percent-escaped with -:meth:`urllib.parse.quote_plus`, to be used in a MongoDB URI. For example:: +:py:func:`urllib.parse.quote`, to be used in a MongoDB URI. For example:: >>> from pymongo import MongoClient >>> import urllib.parse - >>> username = urllib.parse.quote_plus('user') + >>> username = urllib.parse.quote('user') >>> username 'user' - >>> password = urllib.parse.quote_plus('pass/word') + >>> password = urllib.parse.quote('pass/word') >>> password 'pass%2Fword' >>> MongoClient('mongodb://%s:%s@127.0.0.1' % (username, password)) diff --git a/doc/migrate-to-pymongo4.rst b/doc/migrate-to-pymongo4.rst index 7430ea50a..994f8450a 100644 --- a/doc/migrate-to-pymongo4.rst +++ b/doc/migrate-to-pymongo4.rst @@ -190,9 +190,26 @@ now you must create a new instance. MongoClient raises exception when given more than one URI ......................................................... -``MongoClient()`` now raises a :exc:`~pymongo.errors.ConfigurationError` +:class:`~pymongo.mongo_client.MongoClient` now raises a :exc:`~pymongo.errors.ConfigurationError` when more than one URI is passed into the ``hosts`` argument. +MongoClient raises exception when given unescaped percent sign in login info +............................................................................ + +:class:`~pymongo.mongo_client.MongoClient` now raises an +:exc:`~pymongo.errors.InvalidURI` exception +when it encounters unescaped percent signs in username and password. + +MongoClient uses `unquote` rather than `unquote_plus` for login info +.................................................................... + +:class:`~pymongo.mongo_client.MongoClient` now uses +:py:func:`urllib.parse.unquote` rather than +:py:func:`urllib.parse.unquote_plus`, meaning that space characters are no +longer converted to plus signs. This means that if you were previously +quoting your login information using :py:func:`urllib.parse.quote_plus`, you +must now switch to :py:func:`urllib.parse.quote`. + Database -------- diff --git a/pymongo/auth.py b/pymongo/auth.py index b94698086..0fec2c775 100644 --- a/pymongo/auth.py +++ b/pymongo/auth.py @@ -319,9 +319,6 @@ def _authenticate_gssapi(credentials, sock_info): if password is not None: if _USE_PRINCIPAL: - # Note that, though we use unquote_plus for unquoting URI - # options, we use quote here. Microsoft's UrlUnescape (used - # by WinKerberos) doesn't support +. principal = ":".join((quote(username), quote(password))) result, ctx = kerberos.authGSSClientInit( service, principal, gssflags=kerberos.GSS_C_MUTUAL_FLAG) diff --git a/pymongo/mongo_client.py b/pymongo/mongo_client.py index 279eae558..57f623889 100644 --- a/pymongo/mongo_client.py +++ b/pymongo/mongo_client.py @@ -329,7 +329,7 @@ class MongoClient(common.BaseObject): a Unicode-related error occurs during BSON decoding that would otherwise raise :exc:`UnicodeDecodeError`. Valid options include 'strict', 'replace', and 'ignore'. Defaults to 'strict'. - - ``srvServiceName`: (string) The SRV service name to use for + - `srvServiceName`: (string) The SRV service name to use for "mongodb+srv://" URIs. Defaults to "mongodb". Use it like so:: MongoClient("mongodb+srv://example.com/?srvServiceName=customname") diff --git a/pymongo/uri_parser.py b/pymongo/uri_parser.py index 79642c711..46c5eecd9 100644 --- a/pymongo/uri_parser.py +++ b/pymongo/uri_parser.py @@ -18,7 +18,7 @@ import re import warnings import sys -from urllib.parse import unquote_plus +from urllib.parse import unquote, unquote_plus from pymongo.common import ( SRV_SERVICE_NAME, @@ -35,10 +35,26 @@ SRV_SCHEME_LEN = len(SRV_SCHEME) DEFAULT_PORT = 27017 +def _unquoted_percent(s): + """Check for unescaped percent signs. + + :Paramaters: + - `s`: A string. `s` can have things like '%25', '%2525', + and '%E2%85%A8' but cannot have unquoted percent like '%foo'. + """ + for i in range(len(s)): + if s[i] == '%': + sub = s[i:i+3] + # If unquoting yields the same string this means there was an + # unquoted %. + if unquote(sub) == sub: + return True + return False + def parse_userinfo(userinfo): """Validates the format of user information in a MongoDB URI. - Reserved characters like ':', '/', '+' and '@' must be escaped - following RFC 3986. + Reserved characters that are gen-delimiters (":", "/", "?", "#", "[", + "]", "@") as per RFC 3986 must be escaped. Returns a 2-tuple containing the unescaped username followed by the unescaped password. @@ -46,14 +62,17 @@ def parse_userinfo(userinfo): :Paramaters: - `userinfo`: A string of the form : """ - if '@' in userinfo or userinfo.count(':') > 1: + if ('@' in userinfo or userinfo.count(':') > 1 or + _unquoted_percent(userinfo)): raise InvalidURI("Username and password must be escaped according to " - "RFC 3986, use urllib.parse.quote_plus") + "RFC 3986, use urllib.parse.quote") + user, _, passwd = userinfo.partition(":") # No password is expected with GSSAPI authentication. if not user: raise InvalidURI("The empty string is not valid username.") - return unquote_plus(user), unquote_plus(passwd) + + return unquote(user), unquote(passwd) def parse_ipv6_literal_host(entity, default_port): @@ -408,6 +427,12 @@ def parse_uri(uri, default_port=DEFAULT_PORT, validate=True, warn=False, wait for a response from the DNS server. - 'srv_service_name` (optional): A custom SRV service name + .. versionchanged:: 4.0 + To better follow RFC 3986, unquoted percent signs ("%") are no longer + supported and plus signs ("+") are no longer decoded into spaces (" ") + when decoding username and password. To avoid these issues, use + :py:func:`urllib.parse.quote` when building the URI. + .. versionchanged:: 3.9 Added the ``normalize`` parameter. diff --git a/test/connection_string/test/invalid-uris.json b/test/connection_string/test/invalid-uris.json index 677cb5384..e04da2b23 100644 --- a/test/connection_string/test/invalid-uris.json +++ b/test/connection_string/test/invalid-uris.json @@ -189,15 +189,6 @@ "auth": null, "options": null }, - { - "description": "Username with password containing an unescaped colon", - "uri": "mongodb://alice:foo:bar@127.0.0.1", - "valid": false, - "warning": null, - "hosts": null, - "auth": null, - "options": null - }, { "description": "Username containing an unescaped at-sign", "uri": "mongodb://alice@@127.0.0.1", @@ -251,6 +242,51 @@ "hosts": null, "auth": null, "options": null + }, + { + "description": "mongodb+srv with multiple service names", + "uri": "mongodb+srv://test5.test.mongodb.com,test6.test.mongodb.com", + "valid": false, + "warning": null, + "hosts": null, + "auth": null, + "options": null + }, + { + "description": "mongodb+srv with port number", + "uri": "mongodb+srv://test7.test.mongodb.com:27018", + "valid": false, + "warning": null, + "hosts": null, + "auth": null, + "options": null + }, + { + "description": "Username with password containing an unescaped percent sign", + "uri": "mongodb://alice%foo:bar@127.0.0.1", + "valid": false, + "warning": null, + "hosts": null, + "auth": null, + "options": null + }, + { + "description": "Username with password containing an unescaped percent sign and an escaped one", + "uri": "mongodb://user%20%:password@localhost", + "valid": false, + "warning": null, + "hosts": null, + "auth": null, + "options": null + }, + { + "description": "Username with password containing an unescaped percent sign (non hex digit)", + "uri": "mongodb://user%w:password@localhost", + "valid": false, + "warning": null, + "hosts": null, + "auth": null, + "options": null } ] } diff --git a/test/connection_string/test/valid-auth.json b/test/connection_string/test/valid-auth.json index 672777ff8..4f684ff18 100644 --- a/test/connection_string/test/valid-auth.json +++ b/test/connection_string/test/valid-auth.json @@ -240,6 +240,27 @@ "authmechanism": "MONGODB-CR" } }, + { + "description": "Subdelimiters in user/pass don't need escaping (MONGODB-CR)", + "uri": "mongodb://!$&'()*+,;=:!$&'()*+,;=@127.0.0.1/admin?authMechanism=MONGODB-CR", + "valid": true, + "warning": false, + "hosts": [ + { + "type": "ipv4", + "host": "127.0.0.1", + "port": null + } + ], + "auth": { + "username": "!$&'()*+,;=", + "password": "!$&'()*+,;=", + "db": "admin" + }, + "options": { + "authmechanism": "MONGODB-CR" + } + }, { "description": "Escaped username (MONGODB-X509)", "uri": "mongodb://CN%3DmyName%2COU%3DmyOrgUnit%2CO%3DmyOrg%2CL%3DmyLocality%2CST%3DmyState%2CC%3DmyCountry@localhost/?authMechanism=MONGODB-X509", diff --git a/test/connection_string/test/valid-host_identifiers.json b/test/connection_string/test/valid-host_identifiers.json index f33358725..e8833b4af 100644 --- a/test/connection_string/test/valid-host_identifiers.json +++ b/test/connection_string/test/valid-host_identifiers.json @@ -132,18 +132,18 @@ }, { "description": "UTF-8 hosts", - "uri": "mongodb://b\u00fccher.example.com,uml\u00e4ut.example.com/", + "uri": "mongodb://bücher.example.com,umläut.example.com/", "valid": true, "warning": false, "hosts": [ { "type": "hostname", - "host": "b\u00fccher.example.com", + "host": "bücher.example.com", "port": null }, { "type": "hostname", - "host": "uml\u00e4ut.example.com", + "host": "umläut.example.com", "port": null } ], diff --git a/test/connection_string/test/valid-warnings.json b/test/connection_string/test/valid-warnings.json index 87f7248f2..1eacbf8fc 100644 --- a/test/connection_string/test/valid-warnings.json +++ b/test/connection_string/test/valid-warnings.json @@ -63,6 +63,36 @@ "options": { "wtimeoutms": 10 } + }, + { + "description": "Empty integer option values are ignored", + "uri": "mongodb://localhost/?maxIdleTimeMS=", + "valid": true, + "warning": true, + "hosts": [ + { + "type": "hostname", + "host": "localhost", + "port": null + } + ], + "auth": null, + "options": null + }, + { + "description": "Empty boolean option value are ignored", + "uri": "mongodb://localhost/?journal=", + "valid": true, + "warning": true, + "hosts": [ + { + "type": "hostname", + "host": "localhost", + "port": null + } + ], + "auth": null, + "options": null } ] } diff --git a/test/test_ssl.py b/test/test_ssl.py index 0162eb3a0..df44c31dc 100644 --- a/test/test_ssl.py +++ b/test/test_ssl.py @@ -20,7 +20,7 @@ import sys sys.path[0:0] = [""] -from urllib.parse import quote_plus +from urllib.parse import quote from pymongo import MongoClient, ssl_support from pymongo.errors import (ConfigurationError, @@ -526,7 +526,7 @@ class TestSSL(IntegrationTest): uri = ('mongodb://%s@%s:%d/?authMechanism=' 'MONGODB-X509' % ( - quote_plus(MONGODB_X509_USERNAME), host, port)) + quote(MONGODB_X509_USERNAME), host, port)) client = MongoClient(uri, ssl=True, tlsAllowInvalidCertificates=True, @@ -546,7 +546,7 @@ class TestSSL(IntegrationTest): # Auth should fail if username and certificate do not match uri = ('mongodb://%s@%s:%d/?authMechanism=' 'MONGODB-X509' % ( - quote_plus("not the username"), host, port)) + quote("not the username"), host, port)) bad_client = MongoClient( uri, ssl=True, tlsAllowInvalidCertificates=True, @@ -571,7 +571,7 @@ class TestSSL(IntegrationTest): # Invalid certificate (using CA certificate as client certificate) uri = ('mongodb://%s@%s:%d/?authMechanism=' 'MONGODB-X509' % ( - quote_plus(MONGODB_X509_USERNAME), host, port)) + quote(MONGODB_X509_USERNAME), host, port)) try: connected(MongoClient(uri, ssl=True, diff --git a/test/test_uri_parser.py b/test/test_uri_parser.py index f044fba5b..33c727604 100644 --- a/test/test_uri_parser.py +++ b/test/test_uri_parser.py @@ -43,7 +43,7 @@ class TestURI(unittest.TestCase): self.assertTrue(parse_userinfo('user:password')) self.assertEqual(('us:r', 'p@ssword'), parse_userinfo('us%3Ar:p%40ssword')) - self.assertEqual(('us er', 'p ssword'), + self.assertEqual(('us+er', 'p+ssword'), parse_userinfo('us+er:p+ssword')) self.assertEqual(('us er', 'p ssword'), parse_userinfo('us%20er:p%20ssword')) diff --git a/test/test_uri_spec.py b/test/test_uri_spec.py index 2646f02e4..1bb5fd2f5 100644 --- a/test/test_uri_spec.py +++ b/test/test_uri_spec.py @@ -179,7 +179,8 @@ def create_tests(test_path): if not filename.endswith('.json'): # skip everything that is not a test specification continue - with open(os.path.join(dirpath, filename)) as scenario_stream: + json_path = os.path.join(dirpath, filename) + with open(json_path, encoding="utf-8") as scenario_stream: scenario_def = json.load(scenario_stream) for testcase in scenario_def['tests']: