aboutsummaryrefslogtreecommitdiff
path: root/csv-hasher.py
diff options
context:
space:
mode:
Diffstat (limited to 'csv-hasher.py')
-rwxr-xr-xcsv-hasher.py3
1 files changed, 3 insertions, 0 deletions
diff --git a/csv-hasher.py b/csv-hasher.py
index 090b226..e3f71e5 100755
--- a/csv-hasher.py
+++ b/csv-hasher.py
@@ -45,6 +45,7 @@ class CsvHasher:
exit (1)
def apply_hash(self, df):
+ """Apply the hash function into a column from a dataframe"""
return df[self.args.colname[0]].apply(lambda x: \
getattr(hashlib, self.args.hashfunc)(str(x).encode('utf-8')).hexdigest())
@@ -57,6 +58,7 @@ class CsvHasher:
Thanks https://stackoverflow.com/questions/55775674/how-do-i-hash-specific-columns-from-a-csv-file
Thanks https://stackoverflow.com/questions/11622652/large-persistent-dataframe-in-pandas/12193309#12193309
"""
+
# Read the CSV
df = pd.read_csv(self.args.infile[0], sep=self.args.sep, iterator=True, chunksize=self.args.chunksize)
df = pd.concat(tp, ignore_index=True)
@@ -73,6 +75,7 @@ class CsvHasher:
Thanks https://stackoverflow.com/questions/11622652/large-persistent-dataframe-in-pandas/12193309#12193309
"""
+
infile = self.args.infile[0]
# Get number of lines in the CSV file