PYTHON-1579 Update URI parser to adhere to new connection string spec (#755)

This commit is contained in:
Julius Park 2021-10-20 13:39:32 -07:00 committed by GitHub
parent d77c20497b
commit 11e6f9860a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 173 additions and 35 deletions

View File

@ -523,7 +523,7 @@ functions:
silent: true
script: |
cat <<'EOF' > "${PROJECT_DIRECTORY}/prepare_mongodb_aws.sh"
alias urlencode='${python3_binary} -c "import sys, urllib.parse as ulp; sys.stdout.write(ulp.quote_plus(sys.argv[1]))"'
alias urlencode='${python3_binary} -c "import sys, urllib.parse as ulp; sys.stdout.write(ulp.quote(sys.argv[1]))"'
USER=$(urlencode ${iam_auth_ecs_account})
PASS=$(urlencode ${iam_auth_ecs_secret_access_key})
MONGODB_URI="mongodb://$USER:$PASS@localhost"
@ -554,7 +554,7 @@ functions:
script: |
# DO NOT ECHO WITH XTRACE (which PREPARE_SHELL does)
cat <<'EOF' > "${PROJECT_DIRECTORY}/prepare_mongodb_aws.sh"
alias urlencode='${python3_binary} -c "import sys, urllib.parse as ulp; sys.stdout.write(ulp.quote_plus(sys.argv[1]))"'
alias urlencode='${python3_binary} -c "import sys, urllib.parse as ulp; sys.stdout.write(ulp.quote(sys.argv[1]))"'
alias jsonkey='${python3_binary} -c "import json,sys;sys.stdout.write(json.load(sys.stdin)[sys.argv[1]])" < ${DRIVERS_TOOLS}/.evergreen/auth_aws/creds.json'
USER=$(jsonkey AccessKeyId)
USER=$(urlencode $USER)

View File

@ -160,8 +160,19 @@ Breaking Changes in 4.0
are passed to the server as-is rather than the previous behavior which
substituted in a projection of ``{"_id": 1}``. This means that an empty
projection will now return the entire document, not just the ``"_id"`` field.
- ``MongoClient()`` now raises a :exc:`~pymongo.errors.ConfigurationError`
- :class:`~pymongo.mongo_client.MongoClient` now raises a :exc:`~pymongo.errors.ConfigurationError`
when more than one URI is passed into the ``hosts`` argument.
- :class:`~pymongo.mongo_client.MongoClient`` now raises an
:exc:`~pymongo.errors.InvalidURI` exception
when it encounters unescaped percent signs in username and password when
parsing MongoDB URIs.
- :class:`~pymongo.mongo_client.MongoClient` now uses
:py::func:`urllib.parse.unquote` rather than
:py:func:`urllib.parse.unquote_plus`,
meaning that plus signs ("+") are no longer converted to spaces (" "). This
means that if you were previously quoting your login information using
quote_plus, you must now switch to quote. Additionally, be aware that this
change only occurs when parsing login information from the URI.
Notable improvements
....................

View File

@ -11,14 +11,14 @@ Percent-Escaping Username and Password
--------------------------------------
Username and password must be percent-escaped with
:meth:`urllib.parse.quote_plus`, to be used in a MongoDB URI. For example::
:py:func:`urllib.parse.quote`, to be used in a MongoDB URI. For example::
>>> from pymongo import MongoClient
>>> import urllib.parse
>>> username = urllib.parse.quote_plus('user')
>>> username = urllib.parse.quote('user')
>>> username
'user'
>>> password = urllib.parse.quote_plus('pass/word')
>>> password = urllib.parse.quote('pass/word')
>>> password
'pass%2Fword'
>>> MongoClient('mongodb://%s:%s@127.0.0.1' % (username, password))

View File

@ -190,9 +190,26 @@ now you must create a new instance.
MongoClient raises exception when given more than one URI
.........................................................
``MongoClient()`` now raises a :exc:`~pymongo.errors.ConfigurationError`
:class:`~pymongo.mongo_client.MongoClient` now raises a :exc:`~pymongo.errors.ConfigurationError`
when more than one URI is passed into the ``hosts`` argument.
MongoClient raises exception when given unescaped percent sign in login info
............................................................................
:class:`~pymongo.mongo_client.MongoClient` now raises an
:exc:`~pymongo.errors.InvalidURI` exception
when it encounters unescaped percent signs in username and password.
MongoClient uses `unquote` rather than `unquote_plus` for login info
....................................................................
:class:`~pymongo.mongo_client.MongoClient` now uses
:py:func:`urllib.parse.unquote` rather than
:py:func:`urllib.parse.unquote_plus`, meaning that space characters are no
longer converted to plus signs. This means that if you were previously
quoting your login information using :py:func:`urllib.parse.quote_plus`, you
must now switch to :py:func:`urllib.parse.quote`.
Database
--------

View File

@ -319,9 +319,6 @@ def _authenticate_gssapi(credentials, sock_info):
if password is not None:
if _USE_PRINCIPAL:
# Note that, though we use unquote_plus for unquoting URI
# options, we use quote here. Microsoft's UrlUnescape (used
# by WinKerberos) doesn't support +.
principal = ":".join((quote(username), quote(password)))
result, ctx = kerberos.authGSSClientInit(
service, principal, gssflags=kerberos.GSS_C_MUTUAL_FLAG)

View File

@ -329,7 +329,7 @@ class MongoClient(common.BaseObject):
a Unicode-related error occurs during BSON decoding that would
otherwise raise :exc:`UnicodeDecodeError`. Valid options include
'strict', 'replace', and 'ignore'. Defaults to 'strict'.
- ``srvServiceName`: (string) The SRV service name to use for
- `srvServiceName`: (string) The SRV service name to use for
"mongodb+srv://" URIs. Defaults to "mongodb". Use it like so::
MongoClient("mongodb+srv://example.com/?srvServiceName=customname")

View File

@ -18,7 +18,7 @@ import re
import warnings
import sys
from urllib.parse import unquote_plus
from urllib.parse import unquote, unquote_plus
from pymongo.common import (
SRV_SERVICE_NAME,
@ -35,10 +35,26 @@ SRV_SCHEME_LEN = len(SRV_SCHEME)
DEFAULT_PORT = 27017
def _unquoted_percent(s):
"""Check for unescaped percent signs.
:Paramaters:
- `s`: A string. `s` can have things like '%25', '%2525',
and '%E2%85%A8' but cannot have unquoted percent like '%foo'.
"""
for i in range(len(s)):
if s[i] == '%':
sub = s[i:i+3]
# If unquoting yields the same string this means there was an
# unquoted %.
if unquote(sub) == sub:
return True
return False
def parse_userinfo(userinfo):
"""Validates the format of user information in a MongoDB URI.
Reserved characters like ':', '/', '+' and '@' must be escaped
following RFC 3986.
Reserved characters that are gen-delimiters (":", "/", "?", "#", "[",
"]", "@") as per RFC 3986 must be escaped.
Returns a 2-tuple containing the unescaped username followed
by the unescaped password.
@ -46,14 +62,17 @@ def parse_userinfo(userinfo):
:Paramaters:
- `userinfo`: A string of the form <username>:<password>
"""
if '@' in userinfo or userinfo.count(':') > 1:
if ('@' in userinfo or userinfo.count(':') > 1 or
_unquoted_percent(userinfo)):
raise InvalidURI("Username and password must be escaped according to "
"RFC 3986, use urllib.parse.quote_plus")
"RFC 3986, use urllib.parse.quote")
user, _, passwd = userinfo.partition(":")
# No password is expected with GSSAPI authentication.
if not user:
raise InvalidURI("The empty string is not valid username.")
return unquote_plus(user), unquote_plus(passwd)
return unquote(user), unquote(passwd)
def parse_ipv6_literal_host(entity, default_port):
@ -408,6 +427,12 @@ def parse_uri(uri, default_port=DEFAULT_PORT, validate=True, warn=False,
wait for a response from the DNS server.
- 'srv_service_name` (optional): A custom SRV service name
.. versionchanged:: 4.0
To better follow RFC 3986, unquoted percent signs ("%") are no longer
supported and plus signs ("+") are no longer decoded into spaces (" ")
when decoding username and password. To avoid these issues, use
:py:func:`urllib.parse.quote` when building the URI.
.. versionchanged:: 3.9
Added the ``normalize`` parameter.

View File

@ -189,15 +189,6 @@
"auth": null,
"options": null
},
{
"description": "Username with password containing an unescaped colon",
"uri": "mongodb://alice:foo:bar@127.0.0.1",
"valid": false,
"warning": null,
"hosts": null,
"auth": null,
"options": null
},
{
"description": "Username containing an unescaped at-sign",
"uri": "mongodb://alice@@127.0.0.1",
@ -251,6 +242,51 @@
"hosts": null,
"auth": null,
"options": null
},
{
"description": "mongodb+srv with multiple service names",
"uri": "mongodb+srv://test5.test.mongodb.com,test6.test.mongodb.com",
"valid": false,
"warning": null,
"hosts": null,
"auth": null,
"options": null
},
{
"description": "mongodb+srv with port number",
"uri": "mongodb+srv://test7.test.mongodb.com:27018",
"valid": false,
"warning": null,
"hosts": null,
"auth": null,
"options": null
},
{
"description": "Username with password containing an unescaped percent sign",
"uri": "mongodb://alice%foo:bar@127.0.0.1",
"valid": false,
"warning": null,
"hosts": null,
"auth": null,
"options": null
},
{
"description": "Username with password containing an unescaped percent sign and an escaped one",
"uri": "mongodb://user%20%:password@localhost",
"valid": false,
"warning": null,
"hosts": null,
"auth": null,
"options": null
},
{
"description": "Username with password containing an unescaped percent sign (non hex digit)",
"uri": "mongodb://user%w:password@localhost",
"valid": false,
"warning": null,
"hosts": null,
"auth": null,
"options": null
}
]
}

View File

@ -240,6 +240,27 @@
"authmechanism": "MONGODB-CR"
}
},
{
"description": "Subdelimiters in user/pass don't need escaping (MONGODB-CR)",
"uri": "mongodb://!$&'()*+,;=:!$&'()*+,;=@127.0.0.1/admin?authMechanism=MONGODB-CR",
"valid": true,
"warning": false,
"hosts": [
{
"type": "ipv4",
"host": "127.0.0.1",
"port": null
}
],
"auth": {
"username": "!$&'()*+,;=",
"password": "!$&'()*+,;=",
"db": "admin"
},
"options": {
"authmechanism": "MONGODB-CR"
}
},
{
"description": "Escaped username (MONGODB-X509)",
"uri": "mongodb://CN%3DmyName%2COU%3DmyOrgUnit%2CO%3DmyOrg%2CL%3DmyLocality%2CST%3DmyState%2CC%3DmyCountry@localhost/?authMechanism=MONGODB-X509",

View File

@ -132,18 +132,18 @@
},
{
"description": "UTF-8 hosts",
"uri": "mongodb://b\u00fccher.example.com,uml\u00e4ut.example.com/",
"uri": "mongodb://bücher.example.com,umläut.example.com/",
"valid": true,
"warning": false,
"hosts": [
{
"type": "hostname",
"host": "b\u00fccher.example.com",
"host": "bücher.example.com",
"port": null
},
{
"type": "hostname",
"host": "uml\u00e4ut.example.com",
"host": "umläut.example.com",
"port": null
}
],

View File

@ -63,6 +63,36 @@
"options": {
"wtimeoutms": 10
}
},
{
"description": "Empty integer option values are ignored",
"uri": "mongodb://localhost/?maxIdleTimeMS=",
"valid": true,
"warning": true,
"hosts": [
{
"type": "hostname",
"host": "localhost",
"port": null
}
],
"auth": null,
"options": null
},
{
"description": "Empty boolean option value are ignored",
"uri": "mongodb://localhost/?journal=",
"valid": true,
"warning": true,
"hosts": [
{
"type": "hostname",
"host": "localhost",
"port": null
}
],
"auth": null,
"options": null
}
]
}

View File

@ -20,7 +20,7 @@ import sys
sys.path[0:0] = [""]
from urllib.parse import quote_plus
from urllib.parse import quote
from pymongo import MongoClient, ssl_support
from pymongo.errors import (ConfigurationError,
@ -526,7 +526,7 @@ class TestSSL(IntegrationTest):
uri = ('mongodb://%s@%s:%d/?authMechanism='
'MONGODB-X509' % (
quote_plus(MONGODB_X509_USERNAME), host, port))
quote(MONGODB_X509_USERNAME), host, port))
client = MongoClient(uri,
ssl=True,
tlsAllowInvalidCertificates=True,
@ -546,7 +546,7 @@ class TestSSL(IntegrationTest):
# Auth should fail if username and certificate do not match
uri = ('mongodb://%s@%s:%d/?authMechanism='
'MONGODB-X509' % (
quote_plus("not the username"), host, port))
quote("not the username"), host, port))
bad_client = MongoClient(
uri, ssl=True, tlsAllowInvalidCertificates=True,
@ -571,7 +571,7 @@ class TestSSL(IntegrationTest):
# Invalid certificate (using CA certificate as client certificate)
uri = ('mongodb://%s@%s:%d/?authMechanism='
'MONGODB-X509' % (
quote_plus(MONGODB_X509_USERNAME), host, port))
quote(MONGODB_X509_USERNAME), host, port))
try:
connected(MongoClient(uri,
ssl=True,

View File

@ -43,7 +43,7 @@ class TestURI(unittest.TestCase):
self.assertTrue(parse_userinfo('user:password'))
self.assertEqual(('us:r', 'p@ssword'),
parse_userinfo('us%3Ar:p%40ssword'))
self.assertEqual(('us er', 'p ssword'),
self.assertEqual(('us+er', 'p+ssword'),
parse_userinfo('us+er:p+ssword'))
self.assertEqual(('us er', 'p ssword'),
parse_userinfo('us%20er:p%20ssword'))

View File

@ -179,7 +179,8 @@ def create_tests(test_path):
if not filename.endswith('.json'):
# skip everything that is not a test specification
continue
with open(os.path.join(dirpath, filename)) as scenario_stream:
json_path = os.path.join(dirpath, filename)
with open(json_path, encoding="utf-8") as scenario_stream:
scenario_def = json.load(scenario_stream)
for testcase in scenario_def['tests']: