forked from HariSekhon/DevOps-Python-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_spark_json_to_parquet.sh
More file actions
executable file
·61 lines (54 loc) · 1.65 KB
/
test_spark_json_to_parquet.sh
File metadata and controls
executable file
·61 lines (54 loc) · 1.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env bash
# vim:ts=4:sts=4:sw=4:et
#
# Author: Hari Sekhon
# Date: 2015-11-05 23:29:15 +0000 (Thu, 05 Nov 2015)
#
# https://github.com/harisekhon/devops-python-tools
#
# License: see accompanying Hari Sekhon LICENSE file
#
# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help improve or steer this or other code I publish
#
# https://www.linkedin.com/in/harisekhon
#
set -eu
[ -n "${DEBUG:-}" ] && set -x
srcdir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd "$srcdir"
# shellcheck disable=SC1091
. ./utils.sh
section "Spark JSON => Parquet"
if is_inside_docker; then
echo "detected running inside docker, skipping test..."
return 0 &>/dev/null || :
exit 0
fi
export SPARK_VERSIONS="${*:-1.3.1 1.4.0 1.5.1 1.6.2 2.0.0}"
for SPARK_VERSION in $SPARK_VERSIONS; do
dir="spark-$SPARK_VERSION-bin-hadoop2.6"
tar="$dir.tgz"
if ! [ -d "$dir" ]; then
if ! [ -f "$tar" ]; then
echo "fetching $tar"
# some systems don't have wget
if type -P wget &>/dev/null; then
wget "http://d3kbcqa49mib13.cloudfront.net/$tar"
else
curl -L "http://d3kbcqa49mib13.cloudfront.net/$tar" > "$tar"
fi
fi
echo "untarring $tar"
tar zxf "$tar" || rm -f "$tar" "$dir"
fi
echo
export SPARK_HOME="$dir"
rm -fr "test-$dir.parquet"
if ../spark_json_to_parquet.py -j data/multirecord.json -p "test-$dir.parquet"; then
echo "SUCCEEDED with Spark $SPARK_VERSION"
else
echo "FAILED test with Spark $SPARK_VERSION"
exit 1
fi
done
echo "SUCCESS"