Skip to content

Commit bdb948a

Browse files
author
Jesse Whitehouse
committed
Refactor to allow uploads_base_path to be either a single string object
or a list of strings. Signed-off-by: Jesse Whitehouse <[email protected]>
1 parent 469f35f commit bdb948a

File tree

4 files changed

+69
-9
lines changed

4 files changed

+69
-9
lines changed

examples/staging_ingestion.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
REMOVE 'stage://tmp/[email protected]/salesdata/september.csv'
2020
2121
Ingestion queries are passed to cursor.execute() like any other query. For GET and PUT commands, a local file
22-
will be read or written. For security, this local file must be contained within, or descended from, the
22+
will be read or written. For security, this local file must be contained within, or descended from, an
2323
uploads_base_path of the connection.
2424
2525
Additionally, the connection can only manipulate files within the cloud storage location of the authenticated user.
@@ -29,6 +29,8 @@
2929
1. Set the INGESTION_USER constant to the account email address of the authenticated user
3030
2. Set the FILEPATH constant to the path of a file that will be uploaded (this example assumes its a CSV file)
3131
3. Run this file
32+
33+
Note: uploads_base_path can be either a Pathlike object or a list of Pathlike objects.
3234
"""
3335

3436
INGESTION_USER = "[email protected]"

src/databricks/sql/client.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -300,29 +300,47 @@ def _check_not_closed(self):
300300
if not self.open:
301301
raise Error("Attempting operation on closed cursor")
302302

303-
def _handle_staging_operation(self, uploads_base_path: str):
303+
def _handle_staging_operation(self, uploads_base_path: Union[List, str]):
304304
"""Fetch the HTTP request instruction from a staging ingestion command
305305
and call the designated handler.
306-
306+
307307
Raise an exception if localFile is specified by the server but the localFile
308308
is not descended from uploads_base_path.
309309
"""
310310

311-
if uploads_base_path is None:
311+
if isinstance(uploads_base_path, type(str())):
312+
_uploads_base_paths = [uploads_base_path]
313+
elif isinstance(uploads_base_path, type(list())):
314+
_uploads_base_paths = uploads_base_path
315+
else:
312316
raise Error(
313-
"You must provide an uploads_base_path when initialising a connection to perform ingestion commands"
317+
"You must provide at least one uploads_base_path when initialising a connection to perform ingestion commands"
314318
)
315319

320+
abs_uploads_base_paths = [os.path.abspath(i) for i in _uploads_base_paths]
321+
316322
row = self.active_result_set.fetchone()
317323

318324
# Must set to None in cases where server response does not include localFile
319325
abs_localFile = None
320326

327+
# Default to not allow staging operations
328+
allow_operation = False
321329
if getattr(row, "localFile", None):
322330
abs_localFile = os.path.abspath(row.localFile)
323-
abs_uploads_base_path = os.path.abspath(uploads_base_path)
324-
if os.path.commonpath([abs_localFile, abs_uploads_base_path]) != abs_uploads_base_path:
325-
raise Error("Local file operations are restricted to paths within the configured uploads_base_path")
331+
for abs_uploads_base_path in abs_uploads_base_paths:
332+
# If the indicated local file matches at least one allowed base path, allow the operation
333+
if (
334+
os.path.commonpath([abs_localFile, abs_uploads_base_path])
335+
== abs_uploads_base_path
336+
):
337+
allow_operation = True
338+
else:
339+
continue
340+
if not allow_operation:
341+
raise Error(
342+
"Local file operations are restricted to paths within the configured uploads_base_path"
343+
)
326344

327345
# TODO: Experiment with DBR sending real headers.
328346
# The specification says headers will be in JSON format but the current null value is actually an empty list []

src/databricks/sql/thrift_backend.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import threading
77
import lz4.frame
88
from ssl import CERT_NONE, CERT_REQUIRED, create_default_context
9+
from typing import List, Union
910

1011
import pyarrow
1112
import thrift.transport.THttpClient
@@ -61,7 +62,7 @@ def __init__(
6162
http_path: str,
6263
http_headers,
6364
auth_provider: AuthProvider,
64-
uploads_base_path: str = None,
65+
uploads_base_path: Union[str, List] = None,
6566
**kwargs,
6667
):
6768
# Internal arguments in **kwargs:

tests/e2e/driver_tests.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -864,6 +864,45 @@ def test_staging_ingestion_invalid_staging_path_fails_at_server(self):
864864
query = f"PUT '{target_file}' INTO 'stageRANDOMSTRINGOFCHARACTERS://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' OVERWRITE"
865865
cursor.execute(query)
866866

867+
def test_staging_ingestion_supports_multiple_uploadsbasepath_values(self):
868+
"""uploads_base_path may be either a path-like object or a list of path-like objects.
869+
870+
This test confirms that two configured base paths:
871+
1 - doesn't raise an exception
872+
2 - allows uploads from both paths
873+
3 - doesn't allow uploads from a third path
874+
"""
875+
876+
def generate_file_and_path_and_queries():
877+
"""
878+
1. Makes a temp file with some contents.
879+
2. Write a query to PUT it into a staging location
880+
3. Write a query to REMOVE it from that location (for cleanup)
881+
"""
882+
fh, temp_path = tempfile.mkstemp()
883+
with open(fh, "wb") as fp:
884+
original_text = "hello world!".encode("utf-8")
885+
fp.write(original_text)
886+
put_query = f"PUT '{temp_path}' INTO 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/{id(temp_path)}.csv' OVERWRITE"
887+
remove_query = f"REMOVE 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/{id(temp_path)}.csv'"
888+
return fh, temp_path, put_query, remove_query
889+
890+
fh1, temp_path1, put_query1, remove_query1 = generate_file_and_path_and_queries()
891+
fh2, temp_path2, put_query2, remove_query2 = generate_file_and_path_and_queries()
892+
fh3, temp_path3, put_query3, remove_query3 = generate_file_and_path_and_queries()
893+
894+
with self.connection(extra_params={"uploads_base_path": [temp_path1, temp_path2]}) as conn:
895+
cursor = conn.cursor()
896+
897+
cursor.execute(put_query1)
898+
cursor.execute(put_query2)
899+
900+
with pytest.raises(Error, match="Local file operations are restricted to paths within the configured uploads_base_path"):
901+
cursor.execute(put_query3)
902+
903+
# Then clean up the files we made
904+
cursor.execute(remove_query1)
905+
cursor.execute(remove_query2)
867906

868907

869908
def main(cli_args):

0 commit comments

Comments
 (0)