aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSilvio Rhatto <rhatto@riseup.net>2021-01-28 22:10:13 -0300
committerSilvio Rhatto <rhatto@riseup.net>2021-01-28 22:10:13 -0300
commitfab1f153ca231936908bfb2ed33537adacbebfa6 (patch)
tree48d91e77e8b222bf422b28dceb01f18b023a18df
parent5b6dd528366b6792e16099c628066931d8762134 (diff)
downloadcsv-hasher-fab1f153ca231936908bfb2ed33537adacbebfa6.tar.gz
csv-hasher-fab1f153ca231936908bfb2ed33537adacbebfa6.tar.bz2
Feat: adds --check option for the test suite
-rw-r--r--Makefile2
-rwxr-xr-xcsv-hasher.py22
2 files changed, 23 insertions, 1 deletions
diff --git a/Makefile b/Makefile
index 1a13c56..8732717 100644
--- a/Makefile
+++ b/Makefile
@@ -19,7 +19,7 @@ sample:
pipenv run ./csv-sampler.py --iterations $(SAMPLE_ITERATIONS) --rows_per_iteration $(SAMPLE_ROWS_PER_ITERATION) $(SAMPLE)
test-sample:
- pipenv run ./csv-hasher.py --chunksize $(CHUNKSIZE) $(SAMPLE) $(OUTPUT) $(COLNAME)
+ pipenv run ./csv-hasher.py --check --chunksize $(CHUNKSIZE) $(SAMPLE) $(OUTPUT) $(COLNAME)
show-test-output:
head -$(CHECK_LINES) $(SAMPLE)
diff --git a/csv-hasher.py b/csv-hasher.py
index bdd9950..c07adb0 100755
--- a/csv-hasher.py
+++ b/csv-hasher.py
@@ -151,6 +151,21 @@ class CsvHasher:
if hasattr(progress_bar, 'close'):
progress_bar.close()
+ def check(self):
+ """Check both files for differences"""
+
+ df_infile = pd.read_csv(self.args.infile[0], sep=self.args.sep)
+ df_outfile = pd.read_csv(self.args.outfile[0], sep=self.args.sep)
+
+ print('Comparing both files without excluding the ' + self.args.colname[0] + ' column:')
+ print(df_infile.compare(df_outfile))
+
+ del df_infile[self.args.colname[0]]
+ del df_outfile[self.args.colname[0]]
+
+ print('Comparing both files excluding the ' + self.args.colname[0] + ' column:')
+ print(df_infile.compare(df_outfile))
+
def cmdline():
"""
Evalutate the command line.
@@ -185,11 +200,15 @@ def cmdline():
parser.add_argument('--no-progress', dest='progress', action='store_false',
help='Disable progress bar.')
+ parser.add_argument('--check', dest='check', action='store_true',
+ help='Check both files for differences (test suite), defaults to false.')
+
# Add default values and get args
parser.set_defaults(sep=',')
parser.set_defaults(chunksize='1M')
parser.set_defaults(hashfunc='sha256')
parser.set_defaults(progress=True)
+ parser.set_defaults(check=False)
args = parser.parse_args()
return args
@@ -199,3 +218,6 @@ if __name__ == "__main__":
instance = CsvHasher(args)
instance.run()
+
+ if args.check == True:
+ instance.check()