From fc5d9ba9e251db721a525974e6a8a7acde58814a Mon Sep 17 00:00:00 2001 From: Silvio Rhatto Date: Thu, 28 Jan 2021 16:16:02 -0300 Subject: Fix: coding style --- csv-hasher.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'csv-hasher.py') diff --git a/csv-hasher.py b/csv-hasher.py index 090b226..e3f71e5 100755 --- a/csv-hasher.py +++ b/csv-hasher.py @@ -45,6 +45,7 @@ class CsvHasher: exit (1) def apply_hash(self, df): + """Apply the hash function into a column from a dataframe""" return df[self.args.colname[0]].apply(lambda x: \ getattr(hashlib, self.args.hashfunc)(str(x).encode('utf-8')).hexdigest()) @@ -57,6 +58,7 @@ class CsvHasher: Thanks https://stackoverflow.com/questions/55775674/how-do-i-hash-specific-columns-from-a-csv-file Thanks https://stackoverflow.com/questions/11622652/large-persistent-dataframe-in-pandas/12193309#12193309 """ + # Read the CSV df = pd.read_csv(self.args.infile[0], sep=self.args.sep, iterator=True, chunksize=self.args.chunksize) df = pd.concat(tp, ignore_index=True) @@ -73,6 +75,7 @@ class CsvHasher: Thanks https://stackoverflow.com/questions/11622652/large-persistent-dataframe-in-pandas/12193309#12193309 """ + infile = self.args.infile[0] # Get number of lines in the CSV file -- cgit v1.2.3