#!/usr/bin/env bash # vim:ts=4:sts=4:sw=4:et # # Author: Hari Sekhon # Date: 2019-03-05 18:27:00 +0000 (Tue, 05 Mar 2019) # # https://github.com/harisekhon/devops-python-tools # # License: see accompanying Hari Sekhon LICENSE file # # If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish # # https://www.linkedin.com/in/harisekhon # set -euo pipefail [ -n "${DEBUG:-}" ] && set -x srcdir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" # shellcheck disable=SC1090 . "$srcdir/bash-tools/lib/utils.sh" # re-establish srcdir local to this script since util.sh include brings its own srcdir srcdir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" # shellcheck disable=SC2120 usage(){ if [ -n "$*" ]; then echo "$@" >&2 echo >&2 fi cat >&2 < -p --parallelism Number of parts to split files in to and anonymize in parallel before reconstituting -h --help Show usage and exit EOF exit 3 } parallelism="${PARALLELISM:-$(cpu_count)}" file_list="" while [ $# -gt 0 ]; do case $1 in -p|--parallel) parallelism="$2" shift ;; -h|--help|-*) usage ;; *) file_list="$file_list $1" ;; esac shift done for filename in $file_list; do echo echo "Processing file '$filename':" echo echo "Removing any pre-existing parts:" rm -v "$filename".* 2>/dev/null || : echo "$srcdir/bash-tools/split.sh" --parts "$parallelism" "$filename" echo "Anonymizing parts" for file_part in "$filename".*; do cmd="$srcdir/anonymize.py -a $file_part > $file_part.anonymized" echo "$cmd" done | parallel -j "$parallelism" echo "Concatenating parts" cat "$filename".*.anonymized > "$filename".anonymized echo echo "Removing parts:" rm -v "$filename".*.anonymized || : rm -v "$filename".[a-z0-9][a-z0-9] "$filename".[a-z0-9][a-z0-9][a-z0-9] 2>/dev/null || : echo echo "Anonymized file ready: $filename.anonymized" echo done