Skip to content

Commit c6edd60

Browse files
author
Romeo Kienzer
committed
update to latest spec
1 parent eb22f66 commit c6edd60

File tree

1 file changed

+27
-33
lines changed

1 file changed

+27
-33
lines changed

component-library/transform/spark-csv-to-parquet.ipynb

Lines changed: 27 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,15 @@
1414
"tags": []
1515
},
1616
"source": [
17-
"# Converts a CSV file with header to parquet using ApacheSpark"
17+
"# spark-csv-to-parquet"
18+
]
19+
},
20+
{
21+
"cell_type": "markdown",
22+
"id": "63d16770-79ac-4255-a2d0-29412418bebf",
23+
"metadata": {},
24+
"source": [
25+
"Converts a CSV file with header to parquet using ApacheSpark"
1826
]
1927
},
2028
{
@@ -46,28 +54,6 @@
4654
"fi"
4755
]
4856
},
49-
{
50-
"cell_type": "code",
51-
"execution_count": null,
52-
"id": "norman-memory",
53-
"metadata": {
54-
"papermill": {
55-
"duration": 0.023625,
56-
"end_time": "2021-03-22T20:29:25.776130",
57-
"exception": false,
58-
"start_time": "2021-03-22T20:29:25.752505",
59-
"status": "completed"
60-
},
61-
"tags": []
62-
},
63-
"outputs": [],
64-
"source": [
65-
"# @param data_dir temporal data storage for local execution\n",
66-
"# @param data_csv csv path and file name (default: data.csv)\n",
67-
"# @param data_parquet path and parquet file name (default: data.parquet)\n",
68-
"# @param master url of master (default: local mode)"
69-
]
70-
},
7157
{
7258
"cell_type": "code",
7359
"execution_count": null,
@@ -107,9 +93,16 @@
10793
},
10894
"outputs": [],
10995
"source": [
96+
"# source path and file name (default: data.csv)\n",
11097
"data_csv = os.environ.get('data_csv', 'data.csv')\n",
98+
"\n",
99+
"# destination path and parquet file name (default: data.parquet)\n",
111100
"data_parquet = os.environ.get('data_parquet', 'data.parquet')\n",
101+
"\n",
102+
"# url of master (default: local mode)\n",
112103
"master = os.environ.get('master', \"local[*]\")\n",
104+
"\n",
105+
"# temporal data storage for local execution\n",
113106
"data_dir = os.environ.get('data_dir', '../../data/')"
114107
]
115108
},
@@ -122,19 +115,20 @@
122115
"source": [
123116
"# override parameters received from a potential call using %run magic\n",
124117
"parameters = list(\n",
125-
" map(\n",
126-
" lambda s: re.sub('$', '\"', s),\n",
127-
" map(\n",
128-
" lambda s: s.replace('=', '=\"'),\n",
129-
" filter(\n",
130-
" lambda s: s.find('=') > -1,\n",
131-
" sys.argv\n",
132-
" )\n",
133-
" )\n",
134-
" )\n",
118+
" map(\n",
119+
" lambda s: re.sub('$', '\"', s),\n",
120+
" map(\n",
121+
" lambda s: s.replace('=', '=\"'),\n",
122+
" filter(\n",
123+
" lambda s: s.find('=') > -1 and bool(re.match('[A-Za-z0-9_]*=[.\\/A-Za-z0-9]*', s)),\n",
124+
" sys.argv\n",
125+
" )\n",
126+
" )\n",
127+
" )\n",
135128
")\n",
136129
"\n",
137130
"for parameter in parameters:\n",
131+
" logging.warning('Parameter: '+parameter) \n",
138132
" exec(parameter)"
139133
]
140134
},

0 commit comments

Comments
 (0)