aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSilvio Rhatto <rhatto@riseup.net>2019-05-13 10:56:38 -0300
committerSilvio Rhatto <rhatto@riseup.net>2019-05-13 10:56:38 -0300
commitfb23b97ad02b749fa876785fb032162e35a36c85 (patch)
tree7032b6fd17f70b4adb898b31f9ac7b0065557111
parent48912fbbd0abc2bea414a658d2bc7019ec519b7d (diff)
downloadckandumper-fb23b97ad02b749fa876785fb032162e35a36c85.tar.gz
ckandumper-fb23b97ad02b749fa876785fb032162e35a36c85.tar.bz2
Coding style
-rwxr-xr-xckandumper224
1 files changed, 113 insertions, 111 deletions
diff --git a/ckandumper b/ckandumper
index 0d3bd31..3f8f08e 100755
--- a/ckandumper
+++ b/ckandumper
@@ -24,117 +24,119 @@ import sys, os, subprocess, pycurl, json
from urllib.parse import urlencode
class ckandumper:
- """Dumps CKAN data: metadata plus entire datasets"""
-
- def __init__(self, args):
- self.url = args.url[0]
- self.dest = args.dest[0]
- self.package_list = '/api/3/action/package_list'
- self.package_show = '/api/3/action/package_show?'
- self.group_list = '/api/3/action/group_list'
- self.group_show = '/api/3/action/group_show?'
- self.tag_list = '/api/3/action/tag_list'
- self.tag_show = '/api/3/action/tag_show?'
-
- if args.limit_rate != None
- self.limit_rate = '--limit-rate=' + args.limit_rate
-
- # Using wget as it is more reliable
- def download(self, url, local_filename):
- subprocess.call('/usr/bin/wget ' + self.limit_rate + ' -c -O "' + local_filename + '" ' + url, shell=True)
-
- def ensuredir(self, dest):
- # Ensure that the destination folder exists
- if not os.path.exists(dest) and not os.path.isdir(dest):
- os.makedirs(dest, 0o755);
- elif os.path.exists(dest) and not os.path.isdir(dest):
- raise ValueError('File exists and is not a folder:' + dest)
-
- def loadJSON(self, file):
- descriptor = open(file)
- data = json.load(descriptor)
- file.close()
-
- def dump(self):
- self.ensuredir(self.dest)
-
- # Move to dest folder
- #os.chdir(self.dest)
-
- package_list = self.dest + os.sep + 'package_list.json'
- group_list = self.dest + os.sep + 'group_list.json'
- tag_list = self.dest + os.sep + 'tag_list.json'
-
- #
- # Groups
- #
- self.download(self.url + self.group_list, group_list)
- groups = self.loadJSON(group_list)
-
- for group in groups['result']:
- group_folder = self.dest + os.sep + 'groups' + os.sep + group
- group_file = group_folder + os.sep + 'group.json'
- self.ensuredir(group_folder)
- print("Downloading " + self.url + self.group_show + 'id=' + group + '...')
- self.download(self.url + self.group_show + urlencode({ 'id': group }, False, '', 'utf-8'), group_file)
-
- #
- # Tags
- #
- self.download(self.url + self.tag_list, tag_list)
- tags = self.loadJSON(tag_list)
-
- for tag in tags['result']:
- tag_folder = self.dest + os.sep + 'tags' + os.sep + tag
- tag_file = tag_folder + os.sep + 'tag.json'
- self.ensuredir(tag_folder)
- print("Downloading " + self.url + self.tag_show + 'id=' + tag + '...')
- self.download(self.url + self.tag_show + urlencode({ 'id': tag }, False, '', 'utf-8'), tag_file)
-
- #
- # Packages
- #
- self.download(self.url + self.package_list, package_list)
- packages = self.loadJSON(package_list)
-
- for package in packages['result']:
- package_folder = self.dest + os.sep + 'packages' + os.sep + package
- package_file = package_folder + os.sep + 'package.json'
- self.ensuredir(package_folder + os.sep + 'data')
- print("Downloading " + self.url + self.package_show + 'id=' + package + '...')
- self.download(self.url + self.package_show + urlencode({ 'id': package }, False, '', 'utf-8'), package_file)
-
- contents = self.loadJSON(package_file)
-
- for resource in contents['result']['resources']:
- #if resource['name'] != None:
- # name = resource['name']
- #else
- # name = resource['id']
-
- name = resource['id']
-
- if resource['format'] != None:
- format = '.' + resource['format'].lower()
- else:
- format = ''
-
- resource_file = package_folder + os.sep + 'data' + os.sep + name + format
-
- self.download(resource['url'], resource_file)
-
- # Run only once during development
- #return
+ """Dumps CKAN data: metadata plus entire datasets"""
+
+ def __init__(self, args):
+ self.url = args.url[0]
+ self.dest = args.dest[0]
+ self.package_list = '/api/3/action/package_list'
+ self.package_show = '/api/3/action/package_show?'
+ self.group_list = '/api/3/action/group_list'
+ self.group_show = '/api/3/action/group_show?'
+ self.tag_list = '/api/3/action/tag_list'
+ self.tag_show = '/api/3/action/tag_show?'
+
+ if args.limit_rate != None:
+ self.limit_rate = '--limit-rate=' + args.limit_rate
+
+ # Using wget as it is more reliable
+ def download(self, url, local_filename):
+ subprocess.call('/usr/bin/wget ' + self.limit_rate + ' -c -O "' + local_filename + '" ' + url, shell=True)
+
+ def ensuredir(self, dest):
+ # Ensure that the destination folder exists
+ if not os.path.exists(dest) and not os.path.isdir(dest):
+ os.makedirs(dest, 0o755);
+ elif os.path.exists(dest) and not os.path.isdir(dest):
+ raise ValueError('File exists and is not a folder:' + dest)
+
+ def loadJSON(self, file):
+ descriptor = open(file)
+ data = json.load(descriptor)
+ file.close()
+
+ def dump(self):
+ self.ensuredir(self.dest)
+
+ # Move to dest folder
+ #os.chdir(self.dest)
+
+ package_list = self.dest + os.sep + 'package_list.json'
+ group_list = self.dest + os.sep + 'group_list.json'
+ tag_list = self.dest + os.sep + 'tag_list.json'
+
+ #
+ # Groups
+ #
+ self.download(self.url + self.group_list, group_list)
+ groups = self.loadJSON(group_list)
+
+ for group in groups['result']:
+ group_folder = self.dest + os.sep + 'groups' + os.sep + group
+ group_file = group_folder + os.sep + 'group.json'
+
+ self.ensuredir(group_folder)
+ print("Downloading " + self.url + self.group_show + 'id=' + group + '...')
+ self.download(self.url + self.group_show + urlencode({ 'id': group }, False, '', 'utf-8'), group_file)
+ #
+ # Tags
+ #
+ self.download(self.url + self.tag_list, tag_list)
+ tags = self.loadJSON(tag_list)
+
+ for tag in tags['result']:
+ tag_folder = self.dest + os.sep + 'tags' + os.sep + tag
+ tag_file = tag_folder + os.sep + 'tag.json'
+
+ self.ensuredir(tag_folder)
+ print("Downloading " + self.url + self.tag_show + 'id=' + tag + '...')
+ self.download(self.url + self.tag_show + urlencode({ 'id': tag }, False, '', 'utf-8'), tag_file)
+
+ #
+ # Packages
+ #
+ self.download(self.url + self.package_list, package_list)
+ packages = self.loadJSON(package_list)
+
+ for package in packages['result']:
+ package_folder = self.dest + os.sep + 'packages' + os.sep + package
+ package_file = package_folder + os.sep + 'package.json'
+
+ self.ensuredir(package_folder + os.sep + 'data')
+ print("Downloading " + self.url + self.package_show + 'id=' + package + '...')
+ self.download(self.url + self.package_show + urlencode({ 'id': package }, False, '', 'utf-8'), package_file)
+
+ contents = self.loadJSON(package_file)
+
+ for resource in contents['result']['resources']:
+ #if resource['name'] != None:
+ # name = resource['name']
+ #else
+ # name = resource['id']
+
+ name = resource['id']
+
+ if resource['format'] != None:
+ format = '.' + resource['format'].lower()
+ else:
+ format = ''
+
+ resource_file = package_folder + os.sep + 'data' + os.sep + name + format
+
+ self.download(resource['url'], resource_file)
+
+ # Run only once during development
+ #return
# Standalone usage
if __name__ == "__main__":
- # Parse CLI
- parser = argparse.ArgumentParser(description='Dump CKAN metadata and datasets.')
- parser.add_argument('url', nargs='+', help='CKAN instance URL')
- parser.add_argument('dest', nargs='+', help='Destination folder')
- parser.add_argument("--limit-rate", help="Limit the download speed to amount bytes per second, per download")
- args = parser.parse_args()
-
- # Dispatch
- ckan = ckandumper(args)
- ckan.dump()
+ # Parse CLI
+ parser = argparse.ArgumentParser(description='Dump CKAN metadata and datasets.')
+ parser.add_argument('url', nargs='+', help='CKAN instance URL')
+ parser.add_argument('dest', nargs='+', help='Destination folder')
+ parser.add_argument("--limit-rate", help="Limit the download speed to amount bytes per second, per download")
+ args = parser.parse_args()
+
+ # Dispatch
+ ckan = ckandumper(args)
+ ckan.dump()