From 38f37454c20fc4386b59a13b07aede8ec29f9fad Mon Sep 17 00:00:00 2001 From: Silvio Rhatto Date: Wed, 22 May 2019 14:04:07 -0300 Subject: Support for custom wget invocations --- ckandumper | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/ckandumper b/ckandumper index 237d961..21a922b 100755 --- a/ckandumper +++ b/ckandumper @@ -24,7 +24,7 @@ import datetime import random import asyncio import argparse -import sys, os, json +import sys, os, subprocess, json from urllib.parse import urlencode from hashlib import sha256 from tqdm import tqdm @@ -33,8 +33,15 @@ class DownloadMultiple: """Downloads multiple files simultaneously with error logging and fancy output""" def __init__(self, limit_rate, limit_concurrent = 20, progress = True, debug = False, wget = '/usr/bin/wget'): - if not os.path.exists(wget): - raise FileNotFoundError('Wget not found in path ' + wget + '; please install it first.') + # Check for wget + wget_bin = wget.split(' ')[0] + if '/' in wget_bin and not os.path.exists(wget_bin): + raise FileNotFoundError('Wget not found in path ' + wget_bin + '; please install it first.') + else: + result = subprocess.check_call(wget_bin + ' --help', stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True) + #result = subprocess.call(wget_bin + ' --help', shell=True, stdout='/dev/null', stderr='/dev/null') + #if result == 127: + # raise FileNotFoundError('Wget not found in path ' + wget_bin + '; please install it first.') self.limit_rate = limit_rate self.limit_concurrent = asyncio.Semaphore(int(limit_concurrent)) @@ -325,13 +332,14 @@ if __name__ == "__main__": ckandumper --limit-concurrent=10 --limit-rate=100k --randomize https://open.canada.ca/data/en/ canada/ ckandumper --limit-concurrent=10 --limit-rate=100k --randomize https://opendata.swiss/en/ switzerland/ + ckandumper --limit-concurrent=10 --wget="wget --no-check-certificate" --randomize http://dados.gov.br """ parser = argparse.ArgumentParser(description='Dump CKAN metadata and datasets.', epilog=examples, formatter_class=argparse.RawDescriptionHelpFormatter,) parser.add_argument('url', nargs='+', help='CKAN instance URL') parser.add_argument('dest', nargs='+', help='Destination folder') - parser.add_argument('--limit-rate', help='Limit the download speed to amount bytes per second, per download') + parser.add_argument('--limit-rate', help='Limit the download speed to amount bytes per second, per download, shorthand for "--wget="wget --limit-rate"') parser.add_argument('--limit-concurrent', help='Limit the total concurrent downloads') - parser.add_argument('--wget', help='Path of custom wget implementation') + parser.add_argument('--wget', help='Custom wget invocation') parser.add_argument('--debug', dest='debug', action='store_true', help='Enable debug') parser.add_argument('--no-debug', dest='debug', action='store_false', help='Disable debug') parser.add_argument('--progress', dest='progress', action='store_true', help='Enable progress') @@ -360,3 +368,6 @@ if __name__ == "__main__": except KeyboardInterrupt as e: print(e) exit(1) + except CalledProcessError as e: + print(e) + exit(1) -- cgit v1.2.3