diff options
author | Silvio Rhatto <rhatto@riseup.net> | 2021-01-28 22:10:13 -0300 |
---|---|---|
committer | Silvio Rhatto <rhatto@riseup.net> | 2021-01-28 22:10:13 -0300 |
commit | fab1f153ca231936908bfb2ed33537adacbebfa6 (patch) | |
tree | 48d91e77e8b222bf422b28dceb01f18b023a18df | |
parent | 5b6dd528366b6792e16099c628066931d8762134 (diff) | |
download | csv-hasher-fab1f153ca231936908bfb2ed33537adacbebfa6.tar.gz csv-hasher-fab1f153ca231936908bfb2ed33537adacbebfa6.tar.bz2 |
Feat: adds --check option for the test suite
-rw-r--r-- | Makefile | 2 | ||||
-rwxr-xr-x | csv-hasher.py | 22 |
2 files changed, 23 insertions, 1 deletions
@@ -19,7 +19,7 @@ sample: pipenv run ./csv-sampler.py --iterations $(SAMPLE_ITERATIONS) --rows_per_iteration $(SAMPLE_ROWS_PER_ITERATION) $(SAMPLE) test-sample: - pipenv run ./csv-hasher.py --chunksize $(CHUNKSIZE) $(SAMPLE) $(OUTPUT) $(COLNAME) + pipenv run ./csv-hasher.py --check --chunksize $(CHUNKSIZE) $(SAMPLE) $(OUTPUT) $(COLNAME) show-test-output: head -$(CHECK_LINES) $(SAMPLE) diff --git a/csv-hasher.py b/csv-hasher.py index bdd9950..c07adb0 100755 --- a/csv-hasher.py +++ b/csv-hasher.py @@ -151,6 +151,21 @@ class CsvHasher: if hasattr(progress_bar, 'close'): progress_bar.close() + def check(self): + """Check both files for differences""" + + df_infile = pd.read_csv(self.args.infile[0], sep=self.args.sep) + df_outfile = pd.read_csv(self.args.outfile[0], sep=self.args.sep) + + print('Comparing both files without excluding the ' + self.args.colname[0] + ' column:') + print(df_infile.compare(df_outfile)) + + del df_infile[self.args.colname[0]] + del df_outfile[self.args.colname[0]] + + print('Comparing both files excluding the ' + self.args.colname[0] + ' column:') + print(df_infile.compare(df_outfile)) + def cmdline(): """ Evalutate the command line. @@ -185,11 +200,15 @@ def cmdline(): parser.add_argument('--no-progress', dest='progress', action='store_false', help='Disable progress bar.') + parser.add_argument('--check', dest='check', action='store_true', + help='Check both files for differences (test suite), defaults to false.') + # Add default values and get args parser.set_defaults(sep=',') parser.set_defaults(chunksize='1M') parser.set_defaults(hashfunc='sha256') parser.set_defaults(progress=True) + parser.set_defaults(check=False) args = parser.parse_args() return args @@ -199,3 +218,6 @@ if __name__ == "__main__": instance = CsvHasher(args) instance.run() + + if args.check == True: + instance.check() |