aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSilvio Rhatto <rhatto@riseup.net>2019-05-22 14:04:07 -0300
committerSilvio Rhatto <rhatto@riseup.net>2019-05-22 14:04:07 -0300
commit38f37454c20fc4386b59a13b07aede8ec29f9fad (patch)
tree961d324fe9b4029f6ba90057879060f7538477fb
parent8f3428e454544c60d3da5e8ccc6873709275bc40 (diff)
downloadckandumper-38f37454c20fc4386b59a13b07aede8ec29f9fad.tar.gz
ckandumper-38f37454c20fc4386b59a13b07aede8ec29f9fad.tar.bz2
Support for custom wget invocations
-rwxr-xr-xckandumper21
1 files changed, 16 insertions, 5 deletions
diff --git a/ckandumper b/ckandumper
index 237d961..21a922b 100755
--- a/ckandumper
+++ b/ckandumper
@@ -24,7 +24,7 @@ import datetime
import random
import asyncio
import argparse
-import sys, os, json
+import sys, os, subprocess, json
from urllib.parse import urlencode
from hashlib import sha256
from tqdm import tqdm
@@ -33,8 +33,15 @@ class DownloadMultiple:
"""Downloads multiple files simultaneously with error logging and fancy output"""
def __init__(self, limit_rate, limit_concurrent = 20, progress = True, debug = False, wget = '/usr/bin/wget'):
- if not os.path.exists(wget):
- raise FileNotFoundError('Wget not found in path ' + wget + '; please install it first.')
+ # Check for wget
+ wget_bin = wget.split(' ')[0]
+ if '/' in wget_bin and not os.path.exists(wget_bin):
+ raise FileNotFoundError('Wget not found in path ' + wget_bin + '; please install it first.')
+ else:
+ result = subprocess.check_call(wget_bin + ' --help', stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True)
+ #result = subprocess.call(wget_bin + ' --help', shell=True, stdout='/dev/null', stderr='/dev/null')
+ #if result == 127:
+ # raise FileNotFoundError('Wget not found in path ' + wget_bin + '; please install it first.')
self.limit_rate = limit_rate
self.limit_concurrent = asyncio.Semaphore(int(limit_concurrent))
@@ -325,13 +332,14 @@ if __name__ == "__main__":
ckandumper --limit-concurrent=10 --limit-rate=100k --randomize https://open.canada.ca/data/en/ canada/
ckandumper --limit-concurrent=10 --limit-rate=100k --randomize https://opendata.swiss/en/ switzerland/
+ ckandumper --limit-concurrent=10 --wget="wget --no-check-certificate" --randomize http://dados.gov.br
"""
parser = argparse.ArgumentParser(description='Dump CKAN metadata and datasets.', epilog=examples, formatter_class=argparse.RawDescriptionHelpFormatter,)
parser.add_argument('url', nargs='+', help='CKAN instance URL')
parser.add_argument('dest', nargs='+', help='Destination folder')
- parser.add_argument('--limit-rate', help='Limit the download speed to amount bytes per second, per download')
+ parser.add_argument('--limit-rate', help='Limit the download speed to amount bytes per second, per download, shorthand for "--wget="wget --limit-rate"')
parser.add_argument('--limit-concurrent', help='Limit the total concurrent downloads')
- parser.add_argument('--wget', help='Path of custom wget implementation')
+ parser.add_argument('--wget', help='Custom wget invocation')
parser.add_argument('--debug', dest='debug', action='store_true', help='Enable debug')
parser.add_argument('--no-debug', dest='debug', action='store_false', help='Disable debug')
parser.add_argument('--progress', dest='progress', action='store_true', help='Enable progress')
@@ -360,3 +368,6 @@ if __name__ == "__main__":
except KeyboardInterrupt as e:
print(e)
exit(1)
+ except CalledProcessError as e:
+ print(e)
+ exit(1)