cerbero: Add a urllib2 implementation for downloads

The wget that ships with msys is too old and cannot use TLSv1.2 or newer, which GitHub started requiring a few months ago: https://githubengineering.com/crypto-removal-notice/ The implementation is very basic right now and is only used on Windows. For instance, it does not do certificate checking, but that was always broken on Windows anyway.
author: Nirbheek Chauhan <nirbheek@centricular.com> 2018-03-08 00:20:13 +0530
committer: Nirbheek Chauhan <nirbheek@centricular.com> 2018-03-08 03:21:06 +0530
commit: a7a556ee341d9ccbd98c54509d487a9234f324e5 (patch)
tree: f05008c405dbcc03142d8a648abd512eda96ebe3
parent: b3f80c3bf0c17a9a9aa4394bb567928990900845 (diff)
2 files changed, 58 insertions, 6 deletions
diff --git a/cerbero/hacks.py b/cerbero/hacks.py
index 1b785f94..784a3807 100644
--- a/cerbero/hacks.py
+++ b/cerbero/hacks.py
@@ -166,7 +166,15 @@ shutil.rmtree = rmtree
 
 # use cURL to download instead of wget
 
-if sys.platform.startswith('darwin'):
-    import cerbero.utils.shell as cshell
-    del cshell.download
-    cshell.download = cshell.download_curl
+import cerbero.utils.shell
+# wget shipped with msys fails with an SSL error on github URLs
+# https://githubengineering.com/crypto-removal-notice/
+if not sys.platform.startswith('win') and cerbero.utils.shell.which('wget'):
+    cerbero.utils.shell.download = cerbero.utils.shell.download_wget
+elif cerbero.utils.shell.which('curl'):
+    cerbero.utils.shell.download = cerbero.utils.shell.download_curl
+else:
+    # This is a very basic implementation, replace this with the requests
+    # module or something else when porting to Python 3. We can try to remove
+    # our dependency on wget/curl.
+    cerbero.utils.shell.download = cerbero.utils.shell.download_urllib2
diff --git a/cerbero/utils/shell.py b/cerbero/utils/shell.py
index 804f33ba..bba68bb1 100644
--- a/cerbero/utils/shell.py
+++ b/cerbero/utils/shell.py
@@ -28,6 +28,7 @@ import time
 import glob
 import shutil
 import hashlib
+import urllib2
 
 from cerbero.enums import Platform
 from cerbero.utils import _, system_info, to_unixpath
@@ -210,8 +211,7 @@ def unpack(filepath, output_dir):
     else:
         raise FatalError("Unknown tarball format %s" % filepath)
 
-
-def download(url, destination=None, recursive=False, check_cert=True, overwrite=False):
+def download_wget(url, destination=None, recursive=False, check_cert=True, overwrite=False):
     '''
     Downloads a file with wget
 
@@ -253,6 +253,50 @@ def download(url, destination=None, recursive=False, check_cert=True, overwrite=
             raise e
 
 
+def download_urllib2(url, destination=None, recursive=False, check_cert=False, overwrite=False):
+    '''
+    Download a file with urllib2, which does not rely on external programs
+
+    @param url: url to download
+    @type: str
+    @param destination: destination where the file will be saved
+    @type destination: str
+    '''
+    if recursive:
+        logging.warn("Recursive download is not implemented with urllib2, trying wget")
+        download_wget(url, destination, recursive, check_cert, overwrite)
+        return
+    ctx = None
+    if not check_cert:
+        import ssl
+        ctx = ssl.create_default_context()
+        ctx.check_hostname = False
+        ctx.verify_mode = ssl.CERT_NONE
+    # This is roughly what wget and curl do
+    if not destination:
+        destination = os.path.basename(url)
+
+    if not overwrite and os.path.exists(destination):
+        if LOGFILE is None:
+            logging.info("File %s already downloaded." % destination)
+        return
+    if not os.path.exists(os.path.dirname(destination)):
+        os.makedirs(os.path.dirname(destination))
+    if LOGFILE:
+        LOGFILE.write("Downloading %s\n" % url)
+    else:
+        logging.info("Downloading %s", url)
+    try:
+        logging.info(destination)
+        with open(destination, 'wb') as d:
+            f = urllib2.urlopen(url, context=ctx)
+            d.write(f.read())
+    except urllib2.HTTPError, e:
+        if os.path.exists(destination):
+            os.remove(destination)
+        raise e
+
+
 def download_curl(url, destination=None, recursive=False, check_cert=True, overwrite=False):
     '''
     Downloads a file with cURL
author	Nirbheek Chauhan <nirbheek@centricular.com>	2018-03-08 00:20:13 +0530
committer	Nirbheek Chauhan <nirbheek@centricular.com>	2018-03-08 03:21:06 +0530
commit	a7a556ee341d9ccbd98c54509d487a9234f324e5 (patch)
tree	f05008c405dbcc03142d8a648abd512eda96ebe3
parent	b3f80c3bf0c17a9a9aa4394bb567928990900845 (diff)