From 4649677f1084d1867460fcf0de34fa01ac32cff1 Mon Sep 17 00:00:00 2001 From: Silvio Rhatto Date: Thu, 16 May 2019 09:06:41 -0300 Subject: Dump stats for each batch --- ckandumper | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/ckandumper b/ckandumper index affaf24..5640dee 100755 --- a/ckandumper +++ b/ckandumper @@ -114,7 +114,7 @@ class DownloadMultiple: await asyncio.gather(*jobs) def get(self, filepairs): - self.stats = { 'exitstatus': {} } + self.stats = { 'exitstatus': {} } self.bar = tqdm(total=len(filepairs)) if self.progress and len(filepairs) > 1 else False loop = asyncio.get_event_loop() @@ -168,6 +168,16 @@ class CkanDumper: descriptor.close() return data + def dump_stats(self, stats): + """Dump download batch statistics""" + if stats != None: + print('Statistics (exit status / total downloads): ', end='') + + for status in stats['exitstatus']: + print(status + ': ' + str(len(stats['exitstatus'][status])), end='; ') + + print('') + def dump(self): """Downloads all content listed in a CKAN repository""" package_list = self.dest + os.sep + 'package_list.json' @@ -178,7 +188,7 @@ class CkanDumper: # Groups # print(f'Downloading {self.url}{self.group_list}...') - status = self.download.get([[self.url + self.group_list, group_list]]) + stats = self.download.get([[self.url + self.group_list, group_list]]) groups = self.load_json(group_list) group_downloads = [] @@ -190,13 +200,14 @@ class CkanDumper: group_downloads.append([self.url + self.group_show + urlencode({ 'id': group }, False, '', 'utf-8'), group_file]) - status = self.download.get(group_downloads) + stats = self.download.get(group_downloads) + self.dump_stats(stats) # # Tags # print(f'Downloading {self.url}{self.tag_list}...') - status = self.download.get([[self.url + self.tag_list, tag_list]]) + stats = self.download.get([[self.url + self.tag_list, tag_list]]) tags = self.load_json(tag_list) tags_downloads = [] @@ -208,13 +219,13 @@ class CkanDumper: tags_downloads.append([self.url + self.tag_show + urlencode({ 'id': tag }, False, '', 'utf-8'), tag_file]) - status = self.download.get(tags_downloads) + stats = self.download.get(tags_downloads) # # Packages # print(f'Downloading {self.url}{self.package_list}...') - status = self.download.get([[self.url + self.package_list, package_list]]) + stats = self.download.get([[self.url + self.package_list, package_list]]) packages = self.load_json(package_list) packages_downloads = [] @@ -227,7 +238,7 @@ class CkanDumper: packages_downloads.append([self.url + self.package_show + urlencode({ 'id': package }, False, '', 'utf-8'), package_file]) - status = self.download.get(packages_downloads) + stats = self.download.get(packages_downloads) # # Package contents @@ -256,7 +267,7 @@ class CkanDumper: package_downloads.append([resource['url'], resource_file]) - status = self.download.get(package_downloads) + stats = self.download.get(package_downloads) if __name__ == "__main__": # Parse CLI -- cgit v1.2.3