Handle user:pass in URLs

https://github.com/ytdl-org/youtube-dl/pull/28801 (@hhirtz)
Fixes "nonnumeric port" errors when youtube-dl is given URLs with
usernames and passwords such as:

    http://username:password@example.com/myvideo.mp4

    Refs:
    - https://en.wikipedia.org/wiki/Basic_access_authentication
    - https://tools.ietf.org/html/rfc1738#section-3.1
    - https://docs.python.org/3.8/library/urllib.parse.html#urllib.parse.urlsplit

    Fixes ytdl-org#18276 (point 4)
    Fixes ytdl-org#20258
    Fixes ytdl-org#26211 (see comment)
This commit is contained in:
df 2021-06-09 12:45:00 +01:00
parent 0345a064a7
commit 35bf1f5971
2 changed files with 50 additions and 1 deletions

View File

@ -65,6 +65,8 @@ from youtube_dl.utils import (
sanitize_filename,
sanitize_path,
sanitize_url,
extract_user_pass,
sanitized_Request,
expand_path,
prepend_extension,
replace_extension,
@ -237,6 +239,26 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar')
self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
def test_extract_user_pass(self):
self.assertEqual(extract_user_pass('http://foo.bar'), ('http://foo.bar', None, None))
self.assertEqual(extract_user_pass('http://:foo.bar'), ('http://:foo.bar', None, None))
self.assertEqual(extract_user_pass('http://@foo.bar'), ('http://foo.bar', '', ''))
self.assertEqual(extract_user_pass('http://:pass@foo.bar'), ('http://foo.bar', '', 'pass'))
self.assertEqual(extract_user_pass('http://user:@foo.bar'), ('http://foo.bar', 'user', ''))
self.assertEqual(extract_user_pass('http://user:pass@foo.bar'), ('http://foo.bar', 'user', 'pass'))
def test_sanitized_Request(self):
self.assertFalse(sanitized_Request('http://foo.bar').has_header('Authorization'))
self.assertFalse(sanitized_Request('http://:foo.bar').has_header('Authorization'))
self.assertEqual(sanitized_Request('http://@foo.bar').get_header('Authorization'),
'Basic Og==')
self.assertEqual(sanitized_Request('http://:pass@foo.bar').get_header('Authorization'),
'Basic OnBhc3M=')
self.assertEqual(sanitized_Request('http://user:@foo.bar').get_header('Authorization'),
'Basic dXNlcjo=')
self.assertEqual(sanitized_Request('http://user:pass@foo.bar').get_header('Authorization'),
'Basic dXNlcjpwYXNz')
def test_expand_path(self):
def env(var):
return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)

View File

@ -2153,9 +2153,36 @@ def sanitize_url(url):
return re.sub(mistake, fixup, url)
return url
def extract_user_pass(url):
parts = compat_urlparse.urlsplit(url)
username = parts.username
password = parts.password
if username is not None:
if password is None:
password = ''
netloc = parts.hostname
if parts.port is not None:
netloc = parts.hostname + ':' + parts.port
parts = parts._replace(netloc=netloc)
url = compat_urlparse.urlunsplit(parts)
return url, username, password
def sanitized_Request(url, *args, **kwargs):
return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
url = sanitize_url(url)
url, username, password = extract_user_pass(url)
if username is not None:
# password is not None
auth_payload = username + ':' + password
auth_payload = base64.b64encode(auth_payload.encode('utf-8')).decode('utf-8')
auth_header = 'Basic ' + auth_payload
if len(args) >= 2:
args[1]['Authorization'] = auth_header
else:
if 'headers' not in kwargs:
kwargs['headers'] = {}
kwargs['headers']['Authorization'] = 'Basic ' + auth_payload
return compat_urllib_request.Request(url, *args, **kwargs)
def expand_path(s):