The snippets below show how to download a binary file, and how to fetch and parse JSON.
Note: on Python 2 you would import urllib
instead of urllib.request
.
import json
import urllib.request
import http
import os
def fetch_json(url):
with urlopen(url) as response:
return json.loads(response.read().decode())
def download(url, target_file):
# This works for HTTP only:
#urllib.request.urlretrieve(url, target_file)
with urlopen(url, cafile=certifi.where()) as response:
content = response.read()
with open(target_file, "wb") as f:
f.write(content)
Below is a longer example that shows:
- how to customize the request,
- how to read mime-types from the response
- and which errors might be thrown as a result.
This example fetches the url using the HEAD
method first to check
for mime-type, then downloads the image using the method above if
the file is an image.
def download_image(url, path, filename):
try:
# Perform the request using the HEAD method
req = Request(url=url, headers={}, method='HEAD')
with urlopen(req) as head:
head.read()
if head.headers.get_content_maintype() == 'image':
subtype = head.headers.get_content_subtype()
head.close()
# Let the mime type determine the file extension
ext = None
if subtype == 'png':
ext = '.png'
elif subtype == 'jpeg':
ext = '.jpg'
if not ext is None:
output = os.path.join(path, filename) + ext
if not os.path.exists(output):
print(f'Downloading {url} to {output}')
download_url(url, output)
return True
except TimeoutError as e:
print(f'Error loading {url}: {e}')
except UnicodeEncodeError as e:
print(f'Unicode url not supported: {url} {e}')
except ConnectionResetError as e:
print(f'Error loading {url}: {e}')
except http.client.RemoteDisconnected as e:
print(f'Error loading {url}: {e}')
except http.client.InvalidURL as e:
print(f'Error loading {url}: {e}')
except urllib.error.URLError as e:
print(f'Error loading {url}: {e}')
return False