aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSilvio Rhatto <rhatto@riseup.net>2019-05-16 08:04:29 -0300
committerSilvio Rhatto <rhatto@riseup.net>2019-05-16 08:04:29 -0300
commit3932b2640920ded3f5966937899626bea2cb37b7 (patch)
tree0ef89c82b8cc3d114b93c51194ee7b01a55f5771
parenta2c49d3701b9a8b4ea27730e94b2e769d7084254 (diff)
downloadckandumper-3932b2640920ded3f5966937899626bea2cb37b7.tar.gz
ckandumper-3932b2640920ded3f5966937899626bea2cb37b7.tar.bz2
Enhanced output with tqdm
-rwxr-xr-xckandumper111
1 files changed, 54 insertions, 57 deletions
diff --git a/ckandumper b/ckandumper
index 9f19f8c..979db18 100755
--- a/ckandumper
+++ b/ckandumper
@@ -21,49 +21,23 @@
# Dependencies
import asyncio
import argparse
-import curses
import sys, os, json
from urllib.parse import urlencode
-
-class StatusLine:
- """Handle printing in a given status line"""
-
- def __init__(self):
- self.stdscr = curses.initscr()
-
- curses.noecho()
- curses.cbreak()
-
- def print(self, line, content):
- """Print content in a specific status line"""
- height, width = self.stdscr.getmaxyx()
-
- if line < height:
- self.stdscr.addstr(line, 0, ' ' * width)
- self.stdscr.addstr(line, 0, content)
- self.stdscr.refresh()
-
- def clear(self, line):
- self.print(line, ' ')
-
- def teardown(self):
- """Finish status lines"""
- curses.echo()
- curses.nocbreak()
- curses.endwin()
+from tqdm import tqdm
class DownloadMultiple:
"""Downloads multiple files simultaneously with error logging and fancy output"""
wget = '/usr/bin/wget'
- def __init__(self, limit_rate, limit_concurrent = 20):
+ def __init__(self, limit_rate, limit_concurrent = 20, progress = True, debug = False):
if not os.path.exists(self.wget):
raise FileNotFoundError('Wget not found in your path; please install it first.')
self.limit_rate = limit_rate
self.limit_concurrent = asyncio.Semaphore(int(limit_concurrent))
- self.status = StatusLine()
+ self.progress = progress
+ self.debug = debug
def ensuredir(self, dest):
"""Ensures that the destination folder exists"""
@@ -81,7 +55,9 @@ class DownloadMultiple:
"""
async with semaphore:
- #print('Downloading ' + url + '...')
+ if self.debug:
+ print('Downloading ' + url + '...')
+
self.ensuredir(os.path.dirname(local_filename));
# Other opts: -q --show-progress
@@ -92,15 +68,21 @@ class DownloadMultiple:
stdout, stderr = await proc.communicate()
- #if stdout:
- # self.status.print(semaphore._value, f'[stdout] {stdout.decode()}')
+ if stdout:
+ if self.debug:
+ print(f'[stdout] {url} {stdout.decode()}')
if stderr:
- self.status.print(semaphore._value, f'[stderr] {stderr.decode()} {url}')
+ if self.debug:
+ print(f'[stderr] {url} {stderr.decode()}')
- #self.status.print(semaphore._value, f'[{cmd!r} exited with {proc.returncode}]')
+ if self.debug:
+ print(f'[{cmd!r} exited with {proc.returncode}]')
- self.status.clear(semaphore._value)
+ if hasattr(self.bar, 'update'):
+ self.bar.update(1)
+
+ return proc.returncode
async def gather(self, filepairs):
"""Gather all files to be downloaded
@@ -116,8 +98,10 @@ class DownloadMultiple:
await asyncio.gather(*jobs)
def get(self, filepairs):
- loop = asyncio.get_event_loop()
- #loop.set_debug(True)
+ self.bar = tqdm(total=len(filepairs)) if self.progress and len(filepairs) > 1 else False
+ loop = asyncio.get_event_loop()
+
+ loop.set_debug(self.debug)
loop.run_until_complete(self.gather(filepairs))
class CkanDumper:
@@ -133,6 +117,12 @@ class CkanDumper:
self.tag_list = '/api/3/action/tag_list'
self.tag_show = '/api/3/action/tag_show?'
+ if args.debug != None:
+ self.debug = args.debug
+
+ if args.progress != None:
+ self.progress = args.progress
+
if args.limit_rate != None:
self.limit_rate = '--limit-rate=' + args.limit_rate
else:
@@ -143,7 +133,7 @@ class CkanDumper:
else:
self.limit_concurrent = '20'
- self.download = DownloadMultiple(self.limit_rate, self.limit_concurrent)
+ self.download = DownloadMultiple(self.limit_rate, self.limit_concurrent, self.progress, self.debug)
def load_json(self, file):
"""Loads a file with contents serialized as JSON"""
@@ -153,12 +143,14 @@ class CkanDumper:
descriptor.close()
return data
+ def iterate(self, iter):
+ if self.progress == True:
+ return tqdm(iter)
+ else:
+ return iter
+
def dump(self):
"""Downloads all content listed in a CKAN repository"""
- # Switch to dest folder
- #self.ensuredir(self.dest)
- #os.chdir(self.dest)
-
package_list = self.dest + os.sep + 'package_list.json'
group_list = self.dest + os.sep + 'group_list.json'
tag_list = self.dest + os.sep + 'tag_list.json'
@@ -166,17 +158,17 @@ class CkanDumper:
#
# Groups
#
+ print(f'Downloading {self.url}{self.group_list}...')
self.download.get([[self.url + self.group_list, group_list]])
groups = self.load_json(group_list)
group_downloads = []
+ print(f'Downloading each group info...')
for group in groups['result']:
group_folder = self.dest + os.sep + 'groups' + os.sep + group
group_file = group_folder + os.sep + 'group.json'
- #print("Downloading " + self.url + self.group_show + 'id=' + group + '...')
- #self.ensuredir(group_folder)
group_downloads.append([self.url + self.group_show + urlencode({ 'id': group }, False, '', 'utf-8'), group_file])
self.download.get(group_downloads)
@@ -184,17 +176,17 @@ class CkanDumper:
#
# Tags
#
+ print(f'Downloading {self.url}{self.tag_list}...')
self.download.get([[self.url + self.tag_list, tag_list]])
tags = self.load_json(tag_list)
tags_downloads = []
+ print(f'Downloading each tag info...')
for tag in tags['result']:
tag_folder = self.dest + os.sep + 'tags' + os.sep + tag
tag_file = tag_folder + os.sep + 'tag.json'
- #print("Downloading " + self.url + self.tag_show + 'id=' + tag + '...')
- #self.ensuredir(tag_folder)
tags_downloads.append([self.url + self.tag_show + urlencode({ 'id': tag }, False, '', 'utf-8'), tag_file])
self.download.get(tags_downloads)
@@ -202,17 +194,18 @@ class CkanDumper:
#
# Packages
#
+ print(f'Downloading {self.url}{self.package_list}...')
self.download.get([[self.url + self.package_list, package_list]])
packages = self.load_json(package_list)
packages_downloads = []
+ print(f'Downloading each package info...')
+
for package in packages['result']:
package_folder = self.dest + os.sep + 'packages' + os.sep + package
package_file = package_folder + os.sep + 'package.json'
- #print("Downloading " + self.url + self.package_show + 'id=' + package + '...')
- #self.ensuredir(package_folder + os.sep + 'data')
packages_downloads.append([self.url + self.package_show + urlencode({ 'id': package }, False, '', 'utf-8'), package_file])
self.download.get(packages_downloads)
@@ -226,6 +219,8 @@ class CkanDumper:
package_file = package_folder + os.sep + 'package.json'
contents = self.load_json(package_file)
+ print(f'Downloading contents of package {package}...')
+
for resource in contents['result']['resources']:
#if resource['name'] != None:
# name = resource['name']
@@ -233,7 +228,6 @@ class CkanDumper:
# name = resource['id']
name = resource['id']
-
if resource['format'] != None:
format = '.' + resource['format'].lower()
else:
@@ -245,16 +239,19 @@ class CkanDumper:
self.download.get(package_downloads)
- # Run only once during development
- #return
-
if __name__ == "__main__":
# Parse CLI
parser = argparse.ArgumentParser(description='Dump CKAN metadata and datasets.')
- parser.add_argument('url', nargs='+', help='CKAN instance URL')
- parser.add_argument('dest', nargs='+', help='Destination folder')
- parser.add_argument("--limit-rate", help="Limit the download speed to amount bytes per second, per download")
- parser.add_argument("--limit-concurrent", help="Limit the total concurrent downloads")
+ parser.add_argument('url', nargs='+', help='CKAN instance URL')
+ parser.add_argument('dest', nargs='+', help='Destination folder')
+ parser.add_argument("--limit-rate", help="Limit the download speed to amount bytes per second, per download")
+ parser.add_argument("--limit-concurrent", help="Limit the total concurrent downloads")
+ parser.add_argument('--debug', dest='debug', action='store_true', help="Enable debug")
+ parser.add_argument('--no-debug', dest='debug', action='store_false', help="Disable debug")
+ parser.add_argument('--progress', dest='progress', action='store_true', help="Enable progress")
+ parser.add_argument('--no-progress', dest='progress', action='store_false', help="Disable progress")
+ parser.set_defaults(debug=False)
+ parser.set_defaults(progress=True)
args = parser.parse_args()
# Dispatch