From 6142e853fa2cf1c1be695ca7cce121e48b177d7b Mon Sep 17 00:00:00 2001
From: Silvio Rhatto <rhatto@riseup.net>
Date: Fri, 29 Jan 2021 18:50:12 -0300
Subject: Feat: change default and test params; use return status

---
 Makefile       |  8 ++++----
 bin/provision  |  2 +-
 csv-hasher.py  | 30 ++++++++++++++++++------------
 csv-sampler.py | 18 +++++++++++-------
 4 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/Makefile b/Makefile
index 8732717..df79e73 100644
--- a/Makefile
+++ b/Makefile
@@ -2,10 +2,10 @@
 # Makefile for csv-hasher
 #
 
-CHUNKSIZE   							= 10000
-CHECK_LINES 							= 20
-SAMPLE_ITERATIONS         = 1000
-SAMPLE_ROWS_PER_ITERATION = 1000
+CHUNKSIZE   							= 64K
+CHECK_LINES 							= 16
+SAMPLE_ITERATIONS         = 1024
+SAMPLE_ROWS_PER_ITERATION = 1024
 TESTS       							= tests
 COLNAME     							= id
 SAMPLE      							= $(TESTS)/sample.csv
diff --git a/bin/provision b/bin/provision
index 89da228..df1ef5a 100755
--- a/bin/provision
+++ b/bin/provision
@@ -18,4 +18,4 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 # Setuo pipenv
-sudo apt install pipenv
+sudo apt install -y pipenv
diff --git a/csv-hasher.py b/csv-hasher.py
index c07adb0..e76c7b0 100755
--- a/csv-hasher.py
+++ b/csv-hasher.py
@@ -90,7 +90,7 @@ class CsvHasher:
         # Check the input file
         if nlines < 2:
             print('CSV file is too small.')
-            exit (1)
+            return False
 
         # Holds columns definition
         columns = None
@@ -104,7 +104,7 @@ class CsvHasher:
         # Check for the column
         if self.args.colname[0] not in columns:
             print('Column not found: ' + self.args.colname[0])
-            exit (1)
+            return False
 
         # Start with an empty file
         try:
@@ -112,7 +112,7 @@ class CsvHasher:
                 f.truncate(0)
         except IOError:
             print('Error writing to ' + outfile)
-            exit(1)
+            return False
 
         # Initialize progress bar
         progress_bar = tqdm(total=nlines) if self.args.progress else False
@@ -136,7 +136,7 @@ class CsvHasher:
                 df[self.args.colname[0]] = self.apply_hash(df)
             except KeyError as e:
                 print('Column not found: ' + self.args.colname[0])
-                exit (1)
+                return False
 
             # Writing the new CSV output
             df.to_csv(outfile, index=False, mode='a', header=write_header)
@@ -173,7 +173,11 @@ def cmdline():
     :return: Command line arguments.
     """
 
-    basename = os.path.basename(__file__)
+    # Defaults
+    basename  = os.path.basename(__file__)
+    chunksize = '1M'
+    hashfunc  = 'sha256'
+    progress  = True
 
     # Parse CLI
     #examples  = "Examples:\n\t" + basename + " --no-progress \n"
@@ -190,23 +194,23 @@ def cmdline():
     parser.add_argument('--sep', dest='sep', help='Separator, defaults to ","')
 
     parser.add_argument('--chunksize', dest='chunksize',
-            help='Read chunks at a time, defaults to 1M, supports human-readable notation')
+            help='Read chunks at a time, supports human-readable notation, defaults to ' + chunksize)
 
-    parser.add_argument('--hashfunc', dest='hashfunc', help='Hash function, defaults do sha256')
+    parser.add_argument('--hashfunc', dest='hashfunc', help='Hash function, defaults do ' + hashfunc)
 
     parser.add_argument('--progress', dest='progress', action='store_true',
-                        help='Enable progress bar.')
+                        help='Enable progress bar, defaults to ' + str(progress))
 
     parser.add_argument('--no-progress', dest='progress', action='store_false',
                         help='Disable progress bar.')
 
     parser.add_argument('--check', dest='check', action='store_true',
-                        help='Check both files for differences (test suite), defaults to false.')
+                        help='Check both files for differences (test suite), defaults to ' + str(not progress))
 
     # Add default values and get args
     parser.set_defaults(sep=',')
-    parser.set_defaults(chunksize='1M')
-    parser.set_defaults(hashfunc='sha256')
+    parser.set_defaults(chunksize=chunksize)
+    parser.set_defaults(hashfunc=hashfunc)
     parser.set_defaults(progress=True)
     parser.set_defaults(check=False)
     args = parser.parse_args()
@@ -216,8 +220,10 @@ def cmdline():
 if __name__ == "__main__":
     args     = cmdline()
     instance = CsvHasher(args)
+    status   = instance.run()
 
-    instance.run()
+    if status is False:
+        exit(1)
 
     if args.check == True:
         instance.check()
diff --git a/csv-sampler.py b/csv-sampler.py
index 35d82db..fa861a8 100755
--- a/csv-sampler.py
+++ b/csv-sampler.py
@@ -66,7 +66,11 @@ def cmdline():
     :return: Command line arguments.
     """
 
-    basename = os.path.basename(__file__)
+    # Defaults
+    basename           = os.path.basename(__file__)
+    rows_per_iteration = 1024
+    iterations         = 1024
+    progress           = True
 
     # Parse CLI
     #examples  = "Examples:\n\t" + basename + " --no-progress \n"
@@ -76,23 +80,23 @@ def cmdline():
                                      epilog=epilog,
                                      formatter_class=argparse.RawDescriptionHelpFormatter,)
 
-    parser.add_argument('outfile',  nargs=1, help='CSV output file name')
+    parser.add_argument('outfile', nargs=1, help='CSV output file name')
 
     parser.add_argument('--rows_per_iteration', dest='rows_per_iteration',
-            type=int, help='Rows per iteration, defaults to 1000')
+            type=int, help='Rows per iteration, defaults to ' + str(rows_per_iteration))
 
     parser.add_argument('--iterations', dest='iterations',
-            help='Number of iterations, defaults to 1000')
+            help='Number of iterations, defaults to ' + str(iterations))
 
     parser.add_argument('--progress', dest='progress', action='store_true',
-                        help='Enable progress bar.')
+                        help='Enable progress bar, defaults to ' + str(progress))
 
     parser.add_argument('--no-progress', dest='progress', action='store_false',
                         help='Disable progress bar.')
 
     # Add default values and get args
-    parser.set_defaults(rows_per_iteration=1000)
-    parser.set_defaults(iterations=1000)
+    parser.set_defaults(rows_per_iteration=rows_per_iteration)
+    parser.set_defaults(iterations=iterations)
     parser.set_defaults(progress=True)
     args = parser.parse_args()
 
-- 
cgit v1.2.3