From fab1f153ca231936908bfb2ed33537adacbebfa6 Mon Sep 17 00:00:00 2001 From: Silvio Rhatto Date: Thu, 28 Jan 2021 22:10:13 -0300 Subject: Feat: adds --check option for the test suite --- Makefile | 2 +- csv-hasher.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1a13c56..8732717 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ sample: pipenv run ./csv-sampler.py --iterations $(SAMPLE_ITERATIONS) --rows_per_iteration $(SAMPLE_ROWS_PER_ITERATION) $(SAMPLE) test-sample: - pipenv run ./csv-hasher.py --chunksize $(CHUNKSIZE) $(SAMPLE) $(OUTPUT) $(COLNAME) + pipenv run ./csv-hasher.py --check --chunksize $(CHUNKSIZE) $(SAMPLE) $(OUTPUT) $(COLNAME) show-test-output: head -$(CHECK_LINES) $(SAMPLE) diff --git a/csv-hasher.py b/csv-hasher.py index bdd9950..c07adb0 100755 --- a/csv-hasher.py +++ b/csv-hasher.py @@ -151,6 +151,21 @@ class CsvHasher: if hasattr(progress_bar, 'close'): progress_bar.close() + def check(self): + """Check both files for differences""" + + df_infile = pd.read_csv(self.args.infile[0], sep=self.args.sep) + df_outfile = pd.read_csv(self.args.outfile[0], sep=self.args.sep) + + print('Comparing both files without excluding the ' + self.args.colname[0] + ' column:') + print(df_infile.compare(df_outfile)) + + del df_infile[self.args.colname[0]] + del df_outfile[self.args.colname[0]] + + print('Comparing both files excluding the ' + self.args.colname[0] + ' column:') + print(df_infile.compare(df_outfile)) + def cmdline(): """ Evalutate the command line. @@ -185,11 +200,15 @@ def cmdline(): parser.add_argument('--no-progress', dest='progress', action='store_false', help='Disable progress bar.') + parser.add_argument('--check', dest='check', action='store_true', + help='Check both files for differences (test suite), defaults to false.') + # Add default values and get args parser.set_defaults(sep=',') parser.set_defaults(chunksize='1M') parser.set_defaults(hashfunc='sha256') parser.set_defaults(progress=True) + parser.set_defaults(check=False) args = parser.parse_args() return args @@ -199,3 +218,6 @@ if __name__ == "__main__": instance = CsvHasher(args) instance.run() + + if args.check == True: + instance.check() -- cgit v1.2.3