aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSilvio Rhatto <rhatto@riseup.net>2021-01-28 16:16:02 -0300
committerSilvio Rhatto <rhatto@riseup.net>2021-01-28 16:16:02 -0300
commitfc5d9ba9e251db721a525974e6a8a7acde58814a (patch)
tree1435af89b2f0105af76a0ed35e03a74d6e57a4c7
parent4b075c2096d0e464c848e9c894071330c68dcd73 (diff)
downloadcsv-hasher-fc5d9ba9e251db721a525974e6a8a7acde58814a.tar.gz
csv-hasher-fc5d9ba9e251db721a525974e6a8a7acde58814a.tar.bz2
Fix: coding style
-rwxr-xr-xcsv-hasher.py3
1 files changed, 3 insertions, 0 deletions
diff --git a/csv-hasher.py b/csv-hasher.py
index 090b226..e3f71e5 100755
--- a/csv-hasher.py
+++ b/csv-hasher.py
@@ -45,6 +45,7 @@ class CsvHasher:
exit (1)
def apply_hash(self, df):
+ """Apply the hash function into a column from a dataframe"""
return df[self.args.colname[0]].apply(lambda x: \
getattr(hashlib, self.args.hashfunc)(str(x).encode('utf-8')).hexdigest())
@@ -57,6 +58,7 @@ class CsvHasher:
Thanks https://stackoverflow.com/questions/55775674/how-do-i-hash-specific-columns-from-a-csv-file
Thanks https://stackoverflow.com/questions/11622652/large-persistent-dataframe-in-pandas/12193309#12193309
"""
+
# Read the CSV
df = pd.read_csv(self.args.infile[0], sep=self.args.sep, iterator=True, chunksize=self.args.chunksize)
df = pd.concat(tp, ignore_index=True)
@@ -73,6 +75,7 @@ class CsvHasher:
Thanks https://stackoverflow.com/questions/11622652/large-persistent-dataframe-in-pandas/12193309#12193309
"""
+
infile = self.args.infile[0]
# Get number of lines in the CSV file