diff options
-rwxr-xr-x | csv-hasher.py | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/csv-hasher.py b/csv-hasher.py index 090b226..e3f71e5 100755 --- a/csv-hasher.py +++ b/csv-hasher.py @@ -45,6 +45,7 @@ class CsvHasher: exit (1) def apply_hash(self, df): + """Apply the hash function into a column from a dataframe""" return df[self.args.colname[0]].apply(lambda x: \ getattr(hashlib, self.args.hashfunc)(str(x).encode('utf-8')).hexdigest()) @@ -57,6 +58,7 @@ class CsvHasher: Thanks https://stackoverflow.com/questions/55775674/how-do-i-hash-specific-columns-from-a-csv-file Thanks https://stackoverflow.com/questions/11622652/large-persistent-dataframe-in-pandas/12193309#12193309 """ + # Read the CSV df = pd.read_csv(self.args.infile[0], sep=self.args.sep, iterator=True, chunksize=self.args.chunksize) df = pd.concat(tp, ignore_index=True) @@ -73,6 +75,7 @@ class CsvHasher: Thanks https://stackoverflow.com/questions/11622652/large-persistent-dataframe-in-pandas/12193309#12193309 """ + infile = self.args.infile[0] # Get number of lines in the CSV file |