aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSilvio Rhatto <rhatto@riseup.net>2021-01-28 15:50:11 -0300
committerSilvio Rhatto <rhatto@riseup.net>2021-01-28 15:50:11 -0300
commit2b343942870441b1c0f83cc6afdb030056d45c2e (patch)
tree8773f01a5f8d1cf711e7bcf91f915ba47991b493
parent8f381d2dd5af97f3663449a5ffc7ed76d11976fd (diff)
downloadcsv-hasher-2b343942870441b1c0f83cc6afdb030056d45c2e.tar.gz
csv-hasher-2b343942870441b1c0f83cc6afdb030056d45c2e.tar.bz2
Feat: initial version
-rw-r--r--.gitignore2
-rw-r--r--CODE_OF_CONDUCT.md74
-rw-r--r--Makefile55
-rw-r--r--Pipfile13
-rw-r--r--Pipfile.lock112
-rw-r--r--README.md22
-rwxr-xr-xbin/make-sample59
-rwxr-xr-xbin/provision21
l---------csv-hasher1
-rwxr-xr-xcsv-hasher.py164
-rw-r--r--kvmxfile8
-rw-r--r--tests/.gitgnore0
12 files changed, 484 insertions, 47 deletions
diff --git a/.gitignore b/.gitignore
index e69de29..d1f825c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
+tests/output.csv
+tests/sample.csv
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..234ed0f
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,74 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to making participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, gender identity and expression, level of experience,
+nationality, personal appearance, race, religion, or sexual identity and
+orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+ advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+ address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community. Examples of
+representing a project or community include using an official project e-mail
+address, posting via an official social media account, or acting as an appointed
+representative at an online or offline event. Representation of a project may be
+further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at [INSERT EMAIL ADDRESS]. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
+[homepage]: https://www.contributor-covenant.org
+
diff --git a/Makefile b/Makefile
index 3a988d4..16311a1 100644
--- a/Makefile
+++ b/Makefile
@@ -1,50 +1,21 @@
#
-# Global Makefile - https://templater.fluxo.info
+# Makefile for csv-hasher
#
-# This Makefile contains basic, common targets and also includes
-# any Makefile.* available in the current folder.
-#
-
-# Port to serve content
-HTTP_PORT="8000"
-HTTP_SERVER="http.server"
-# Base to serve the content
-HTTP_BASE="."
+vendor:
+ pipenv install
-# Set CONTAINER based in what we have available in the system
-# This variable can be user in other, included Makefiles to handle virtualization tasks
-ifeq ($(shell which kvmx > /dev/null && test -s kvmxfile && echo yes), yes)
- CONTAINER = kvmx
-else ifeq ($(shell which vagrant > /dev/null && test -s Vagrantfile && echo yes), yes)
- CONTAINER = vagrant
-else ifeq ($(shell which docker > /dev/null && test -s Dockerfile && echo yes), yes)
- CONTAINER = docker
-else
- CONTAINER = ''
-endif
+sample:
+ bin/make-sample 200
-# See http://unix.stackexchange.com/questions/32182/simple-command-line-http-server#32200
-# http://php.net/manual/en/features.commandline.webserver.php
-serve:
- @if [ "$(HTTP_SERVER)" = "SimpleHTTPServer" ]; then cd $(HTTP_BASE) && python -m SimpleHTTPServer $(HTTP_PORT); fi
- @if [ "$(HTTP_SERVER)" = "ssi_server" ]; then cd $(HTTP_BASE) && PYTHONDONTWRITEBYTECODE=0 ssi_server.py $(HTTP_PORT); fi
- @if [ "$(HTTP_SERVER)" = "http.server" ]; then cd $(HTTP_BASE) && python3 -m http.server $(HTTP_PORT); fi
- @if [ "$(HTTP_SERVER)" = "php" ]; then cd $(HTTP_BASE) && php -S localhost:$(HTTP_PORT); fi
+test-sample:
+ pipenv run ./csv-hasher.py --chunksize 5 tests/sample.csv tests/output.csv id
-# Configure a git post-receive hook
-post_receive:
- git config receive.denyCurrentBranch ignore
- test -s bin/post-receive && cd .git/hooks && ln -sf ../../bin/post-receive
+show-test-output:
+ head -20 tests/sample.csv
+ head -20 tests/output.csv
-# Process any other Makefile whose filename matches Makefile.*
-# See https://www.gnu.org/software/make/manual/html_node/Include.html
-#
-# Some of those files might even contain local customizations/overrides
-# that can be .gitignore'd, like a Makefile.local for example.
--include Makefile.*
+clean-sample:
+ rm tests/*.csv
-# Customization examples can be as simple as setting variables:
-#CONTAINER = vagrant
-#CONTAINER = docker
-#DESTDIR ?= vendor
+test: clean-sample sample test-sample show-test-output clean-sample
diff --git a/Pipfile b/Pipfile
new file mode 100644
index 0000000..b143e89
--- /dev/null
+++ b/Pipfile
@@ -0,0 +1,13 @@
+[[source]]
+url = "https://pypi.python.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+pandas = "*"
+tqdm = "*"
+
+[dev-packages]
+
+[requires]
+python_version = "3.7"
diff --git a/Pipfile.lock b/Pipfile.lock
new file mode 100644
index 0000000..194341f
--- /dev/null
+++ b/Pipfile.lock
@@ -0,0 +1,112 @@
+{
+ "_meta": {
+ "hash": {
+ "sha256": "19ab6829f09294559ac6466b24082f8537cb5c7be2d6aec8bbe7b18814d3d587"
+ },
+ "pipfile-spec": 6,
+ "requires": {
+ "python_version": "3.7"
+ },
+ "sources": [
+ {
+ "name": "pypi",
+ "url": "https://pypi.python.org/simple",
+ "verify_ssl": true
+ }
+ ]
+ },
+ "default": {
+ "numpy": {
+ "hashes": [
+ "sha256:012426a41bc9ab63bb158635aecccc7610e3eff5d31d1eb43bc099debc979d94",
+ "sha256:06fab248a088e439402141ea04f0fffb203723148f6ee791e9c75b3e9e82f080",
+ "sha256:0eef32ca3132a48e43f6a0f5a82cb508f22ce5a3d6f67a8329c81c8e226d3f6e",
+ "sha256:1ded4fce9cfaaf24e7a0ab51b7a87be9038ea1ace7f34b841fe3b6894c721d1c",
+ "sha256:2e55195bc1c6b705bfd8ad6f288b38b11b1af32f3c8289d6c50d47f950c12e76",
+ "sha256:2ea52bd92ab9f768cc64a4c3ef8f4b2580a17af0a5436f6126b08efbd1838371",
+ "sha256:36674959eed6957e61f11c912f71e78857a8d0604171dfd9ce9ad5cbf41c511c",
+ "sha256:384ec0463d1c2671170901994aeb6dce126de0a95ccc3976c43b0038a37329c2",
+ "sha256:39b70c19ec771805081578cc936bbe95336798b7edf4732ed102e7a43ec5c07a",
+ "sha256:400580cbd3cff6ffa6293df2278c75aef2d58d8d93d3c5614cd67981dae68ceb",
+ "sha256:43d4c81d5ffdff6bae58d66a3cd7f54a7acd9a0e7b18d97abb255defc09e3140",
+ "sha256:50a4a0ad0111cc1b71fa32dedd05fa239f7fb5a43a40663269bb5dc7877cfd28",
+ "sha256:603aa0706be710eea8884af807b1b3bc9fb2e49b9f4da439e76000f3b3c6ff0f",
+ "sha256:6149a185cece5ee78d1d196938b2a8f9d09f5a5ebfbba66969302a778d5ddd1d",
+ "sha256:759e4095edc3c1b3ac031f34d9459fa781777a93ccc633a472a5468587a190ff",
+ "sha256:7fb43004bce0ca31d8f13a6eb5e943fa73371381e53f7074ed21a4cb786c32f8",
+ "sha256:811daee36a58dc79cf3d8bdd4a490e4277d0e4b7d103a001a4e73ddb48e7e6aa",
+ "sha256:8b5e972b43c8fc27d56550b4120fe6257fdc15f9301914380b27f74856299fea",
+ "sha256:99abf4f353c3d1a0c7a5f27699482c987cf663b1eac20db59b8c7b061eabd7fc",
+ "sha256:a0d53e51a6cb6f0d9082decb7a4cb6dfb33055308c4c44f53103c073f649af73",
+ "sha256:a12ff4c8ddfee61f90a1633a4c4afd3f7bcb32b11c52026c92a12e1325922d0d",
+ "sha256:a4646724fba402aa7504cd48b4b50e783296b5e10a524c7a6da62e4a8ac9698d",
+ "sha256:a76f502430dd98d7546e1ea2250a7360c065a5fdea52b2dffe8ae7180909b6f4",
+ "sha256:a9d17f2be3b427fbb2bce61e596cf555d6f8a56c222bd2ca148baeeb5e5c783c",
+ "sha256:ab83f24d5c52d60dbc8cd0528759532736b56db58adaa7b5f1f76ad551416a1e",
+ "sha256:aeb9ed923be74e659984e321f609b9ba54a48354bfd168d21a2b072ed1e833ea",
+ "sha256:c843b3f50d1ab7361ca4f0b3639bf691569493a56808a0b0c54a051d260b7dbd",
+ "sha256:cae865b1cae1ec2663d8ea56ef6ff185bad091a5e33ebbadd98de2cfa3fa668f",
+ "sha256:cc6bd4fd593cb261332568485e20a0712883cf631f6f5e8e86a52caa8b2b50ff",
+ "sha256:cf2402002d3d9f91c8b01e66fbb436a4ed01c6498fffed0e4c7566da1d40ee1e",
+ "sha256:d051ec1c64b85ecc69531e1137bb9751c6830772ee5c1c426dbcfe98ef5788d7",
+ "sha256:d6631f2e867676b13026e2846180e2c13c1e11289d67da08d71cacb2cd93d4aa",
+ "sha256:dbd18bcf4889b720ba13a27ec2f2aac1981bd41203b3a3b27ba7a33f88ae4827",
+ "sha256:df609c82f18c5b9f6cb97271f03315ff0dbe481a2a02e56aeb1b1a985ce38e60"
+ ],
+ "version": "==1.19.5"
+ },
+ "pandas": {
+ "hashes": [
+ "sha256:050ed2c9d825ef36738e018454e6d055c63d947c1d52010fbadd7584f09df5db",
+ "sha256:055647e7f4c5e66ba92c2a7dcae6c2c57898b605a3fb007745df61cc4015937f",
+ "sha256:23ac77a3a222d9304cb2a7934bb7b4805ff43d513add7a42d1a22dc7df14edd2",
+ "sha256:2de012a36cc507debd9c3351b4d757f828d5a784a5fc4e6766eafc2b56e4b0f5",
+ "sha256:30e9e8bc8c5c17c03d943e8d6f778313efff59e413b8dbdd8214c2ed9aa165f6",
+ "sha256:324e60bea729cf3b55c1bf9e88fe8b9932c26f8669d13b928e3c96b3a1453dff",
+ "sha256:37443199f451f8badfe0add666e43cdb817c59fa36bceedafd9c543a42f236ca",
+ "sha256:47ec0808a8357ab3890ce0eca39a63f79dcf941e2e7f494470fe1c9ec43f6091",
+ "sha256:496fcc29321e9a804d56d5aa5d7ec1320edfd1898eee2f451aa70171cf1d5a29",
+ "sha256:50e6c0a17ef7f831b5565fd0394dbf9bfd5d615ee4dd4bb60a3d8c9d2e872323",
+ "sha256:5527c5475d955c0bc9689c56865aaa2a7b13c504d6c44f0aadbf57b565af5ebd",
+ "sha256:57d5c7ac62925a8d2ab43ea442b297a56cc8452015e71e24f4aa7e4ed6be3d77",
+ "sha256:9d45f58b03af1fea4b48e44aa38a819a33dccb9821ef9e1d68f529995f8a632f",
+ "sha256:b26e2dabda73d347c7af3e6fed58483161c7b87a886a4e06d76ccfe55a044aa9",
+ "sha256:cfd237865d878da9b65cfee883da5e0067f5e2ff839e459466fb90565a77bda3",
+ "sha256:d7cca42dba13bfee369e2944ae31f6549a55831cba3117e17636955176004088",
+ "sha256:fe7de6fed43e7d086e3d947651ec89e55ddf00102f9dd5758763d56d182f0564"
+ ],
+ "index": "pypi",
+ "version": "==1.2.1"
+ },
+ "python-dateutil": {
+ "hashes": [
+ "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c",
+ "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"
+ ],
+ "version": "==2.8.1"
+ },
+ "pytz": {
+ "hashes": [
+ "sha256:16962c5fb8db4a8f63a26646d8886e9d769b6c511543557bc84e9569fb9a9cb4",
+ "sha256:180befebb1927b16f6b57101720075a984c019ac16b1b7575673bea42c6c3da5"
+ ],
+ "version": "==2020.5"
+ },
+ "six": {
+ "hashes": [
+ "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
+ "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
+ ],
+ "version": "==1.15.0"
+ },
+ "tqdm": {
+ "hashes": [
+ "sha256:4621f6823bab46a9cc33d48105753ccbea671b68bab2c50a9f0be23d4065cb5a",
+ "sha256:fe3d08dd00a526850568d542ff9de9bbc2a09a791da3c334f3213d8d0bbbca65"
+ ],
+ "index": "pypi",
+ "version": "==4.56.0"
+ }
+ },
+ "develop": {}
+}
diff --git a/README.md b/README.md
index 7111eb0..0e9531a 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,20 @@
-csv-hasher
-==========
+# csv-hasher
+
+Hashes a given column of a CSV file.
+
+## Requirements
+
+Running:
+
+* [Python 3](https://python.org).
+* [Pandas](https://pandas.pydata.org).
+
+Testing:
+
+* [GNU Make](https://www.gnu.org/software/make/).
+* [Pipenv](https://pipenv.pypa.io).
+
+## Testing
+
+ make vendor
+ make test
diff --git a/bin/make-sample b/bin/make-sample
new file mode 100755
index 0000000..c282a30
--- /dev/null
+++ b/bin/make-sample
@@ -0,0 +1,59 @@
+#!/bin/bash
+#
+# Build a sample dataset.
+#
+# Copyright (C) 2021 Silvio Rhatto - rhatto@riseup.net
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+# Parameters
+DIRNAME="`dirname $0`"
+TESTS="$DIRNAME/../tests"
+SAMPLE="$TESTS/sample.csv"
+SEPARATOR=","
+COLS="id a b c d e f g h i"
+NCOLS="`echo $COLS | wc -w`"
+ENTRIES="${1:-20}"
+
+# Ensure the test folder and sample file exists
+mkdir -p $TESTS
+touch $SAMPLE
+echo -n "" > $SAMPLE
+
+# Write sample header
+n=1
+for col in $COLS; do
+ if ((n < $NCOLS)); then
+ echo -n "$col""$SEPARATOR" >> $SAMPLE
+ else
+ echo -n "$col" >> $SAMPLE
+ fi
+
+ let n++
+done
+
+echo "" >> $SAMPLE
+
+# Write some rows
+let limit="$NCOLS - 1"
+for n in `seq 1 $ENTRIES`; do
+ #echo -n "$n" >> $SAMPLE
+ echo -n "$RANDOM" >> $SAMPLE
+
+ for n in `seq 1 $limit`; do
+ echo -n "$SEPARATOR""$RANDOM" >> $SAMPLE
+ done
+
+ echo "" >> $SAMPLE
+done
diff --git a/bin/provision b/bin/provision
new file mode 100755
index 0000000..89da228
--- /dev/null
+++ b/bin/provision
@@ -0,0 +1,21 @@
+#!/bin/bash
+#
+# Provision development environment.
+#
+# Copyright (C) 2021 Silvio Rhatto - rhatto@riseup.net
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+# Setuo pipenv
+sudo apt install pipenv
diff --git a/csv-hasher b/csv-hasher
new file mode 120000
index 0000000..b0c5fe1
--- /dev/null
+++ b/csv-hasher
@@ -0,0 +1 @@
+csv-hasher.py \ No newline at end of file
diff --git a/csv-hasher.py b/csv-hasher.py
new file mode 100755
index 0000000..71c3593
--- /dev/null
+++ b/csv-hasher.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Hash a given column from a CSV file.
+#
+# Copyright (C) 2021 Silvio Rhatto - rhatto@riseup.net
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import os
+import argparse
+import pandas as pd
+import hashlib
+import subprocess
+from sys import exit
+from tqdm import tqdm
+
+class CsvHasher:
+ """Hashes a column from a CSV file"""
+
+ def __init__(self, args):
+ # Save arguments
+ self.args = args
+
+ # Check if source file exists
+ if not os.path.exists(args.infile[0]):
+ print('File not found: ' + args.infile[0])
+ exit (1)
+
+ if hasattr(hashlib, self.args.hashfunc) is False:
+ print('Invalid hash function ' + self.args.hashfunc)
+ exit (1)
+
+ def apply_hash(self, df):
+ return df[self.args.colname[0]].apply(lambda x: \
+ getattr(hashlib, self.args.hashfunc)(str(x).encode('utf-8')).hexdigest())
+
+ def run_legacy(self):
+ """
+ Process CSV in "legacy" mode: open the input file, process and write the output in a single step.
+ This won't work with CSVs larger than the available memory in the system.
+
+ Thanks https://stackoverflow.com/questions/55775674/how-do-i-hash-specific-columns-from-a-csv-file
+ Thanks https://stackoverflow.com/questions/11622652/large-persistent-dataframe-in-pandas/12193309#12193309
+ """
+ # Read the CSV
+ df = pd.read_csv(self.args.infile[0], sep=self.args.sep, iterator=True, chunksize=self.args.chunksize)
+ df = pd.concat(tp, ignore_index=True)
+
+ # Hashing the column
+ df[self.args.colname[0]] = self.apply_hash(df)
+
+ # Writing the new CSV output
+ df.to_csv(self.args.outfile[0], index=False)
+
+ def run(self):
+ """
+ Improved CSV processor for large files.
+
+ Thanks https://stackoverflow.com/questions/11622652/large-persistent-dataframe-in-pandas/12193309#12193309
+ """
+ infile = self.args.infile[0]
+
+ # Get number of lines in the CSV file
+ nlines = subprocess.check_output('wc -l %s' % infile, shell=True)
+ nlines = int(nlines.split()[0])
+
+ if nlines < 2:
+ print('CSV file is too small.')
+ exit (1)
+
+ # Read the just to get the column names
+ sample_tp = pd.read_csv(self.args.infile[0], sep=self.args.sep, iterator=True, chunksize=self.args.chunksize)
+ sample = pd.concat(sample_tp, ignore_index=True)
+
+ # Initialize progress bar
+ progress_bar = tqdm(total=nlines) if self.args.progress else False
+
+ write_header = True
+
+ for i in range(0, nlines, self.args.chunksize):
+ df = pd.read_csv(infile,
+ sep=self.args.sep,
+ header=None, # no header, define column header manually later
+ nrows=self.args.chunksize, # number of rows to read at each iteration
+ skiprows=i) # skip rows that were already read
+
+ # Add column information
+ df.columns = sample.columns
+
+ # Hashing the column
+ df[self.args.colname[0]] = self.apply_hash(df)
+
+ # Writing the new CSV output
+ df.to_csv(self.args.outfile[0], index=False, mode='a', header=write_header)
+
+ # Write the header only in the first iteration
+ write_header = False
+
+ if hasattr(progress_bar, 'update'):
+ progress_bar.update(self.args.chunksize)
+
+ # Teardown
+ if hasattr(progress_bar, 'close'):
+ progress_bar.close()
+
+def cmdline():
+ """
+ Evalutate the command line.
+
+ :return: Command line arguments.
+ """
+
+ basename = os.path.basename(__file__)
+
+ # Parse CLI
+ #examples = "Examples:\n\t" + basename + " --no-progress \n"
+
+ epilog = ''
+ parser = argparse.ArgumentParser(description='Hashes a column from a CSV file.',
+ epilog=epilog,
+ formatter_class=argparse.RawDescriptionHelpFormatter,)
+
+ parser.add_argument('infile', nargs=1, help='CSV input file name')
+ parser.add_argument('outfile', nargs=1, help='CSV output file name')
+ parser.add_argument('colname', nargs=1, help='Column name')
+
+ parser.add_argument('--sep', dest='sep', help='Separator, defaults to ","')
+
+ parser.add_argument('--chunksize', dest='chunksize', type=int, help='Read chunks at a time, defaults to 1000')
+
+ parser.add_argument('--hashfunc', dest='hashfunc', help='Hash function, defaults do sha256')
+
+ parser.add_argument('--progress', dest='progress', action='store_true',
+ help='Enable progress bar.')
+
+ parser.add_argument('--no-progress', dest='progress', action='store_false',
+ help='Disable progress bar.')
+
+ # Add default values and get args
+ parser.set_defaults(sep=',')
+ parser.set_defaults(chunksize=1000)
+ parser.set_defaults(hashfunc='sha256')
+ parser.set_defaults(progress=True)
+ args = parser.parse_args()
+
+ return args
+
+if __name__ == "__main__":
+ args = cmdline()
+ instance = CsvHasher(args)
+
+ instance.run()
diff --git a/kvmxfile b/kvmxfile
index 4a6f7f3..7a342f5 100644
--- a/kvmxfile
+++ b/kvmxfile
@@ -7,6 +7,7 @@ hostname="csv-hasher"
# Which base box you should use. Leave unconfigured to use kvmx-create instead.
#basebox="buster"
+basebox="dev"
# First user name
user="user"
@@ -31,10 +32,10 @@ net="user"
# Set this is you want to be able to share a single folder between host and guest.
# Needs ssh_support set to "y" and a workable SSH connection to the guest.
shared_folder="."
-shared_folder_mountpoint="/home/$user/code/$VM"
+#shared_folder_mountpoint="/home/$user/code/$VM"
#shared_folder="$HOME/temp/shared/$VM"
#shared_folder_mountpoint="/home/$user/temp/shared/$VM"
-#shared_folder_mountpoint="/srv/shared"
+shared_folder_mountpoint="/srv/shared"
#shared_folder_mountpoint="/srv/kvmx"
#shared_folder_mountpoint="/vagrant"
@@ -74,6 +75,7 @@ shared_folder_mountpoint="/home/$user/code/$VM"
#provision_command="/usr/local/share/kvmx/provision/debian/development"
#provision_command="/usr/local/share/kvmx/provision/debian/trashman"
#provision_command="/usr/local/share/kvmx/provision/debian/desktop-basic"
+provision_command="/usr/local/share/kvmx/provision/debian/development && /srv/shared/bin/provision"
# Startup command
#startup_command="/path/to/custom/command"
@@ -102,7 +104,7 @@ vnc_client="virt-viewer"
spice="1"
# Set this if you want to attach an spice client when the machine boots.
-run_spice_client="1"
+run_spice_client="0"
# SPICE client
#spice_client="spicec"
diff --git a/tests/.gitgnore b/tests/.gitgnore
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/.gitgnore