Skip to content

Commit f40e221

Browse files
author
Jesse Whitehouse
committed
Update all e2e tests per guidance from YD.
Signed-off-by: Jesse Whitehouse <[email protected]>
1 parent 3c70e1b commit f40e221

File tree

5 files changed

+535
-261
lines changed

5 files changed

+535
-261
lines changed

src/databricks/sql/client.py

Lines changed: 105 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,25 @@
1919
ExecuteResponse,
2020
ParamEscaper,
2121
named_parameters_to_tsparkparams,
22-
inject_parameters
22+
inject_parameters,
23+
ParameterApproach,
2324
)
2425
from databricks.sql.types import Row
2526
from databricks.sql.auth.auth import get_python_sql_connector_auth_provider
2627
from databricks.sql.experimental.oauth_persistence import OAuthPersistence
2728

29+
from databricks.sql.thrift_api.TCLIService.ttypes import (
30+
TSparkParameter,
31+
)
32+
33+
2834
logger = logging.getLogger(__name__)
2935

3036
DEFAULT_RESULT_BUFFER_SIZE_BYTES = 104857600
3137
DEFAULT_ARRAY_SIZE = 100000
3238

39+
NO_NATIVE_PARAMS = []
40+
3341

3442
class Connection:
3543
def __init__(
@@ -41,7 +49,7 @@ def __init__(
4149
session_configuration: Dict[str, Any] = None,
4250
catalog: Optional[str] = None,
4351
schema: Optional[str] = None,
44-
use_inline_params: Optional[bool] = False,
52+
use_inline_params: Optional[bool] = True,
4553
**kwargs,
4654
) -> None:
4755
"""
@@ -67,11 +75,12 @@ def __init__(
6775
:param schema: An optional initial schema to use. Requires DBR version 9.0+
6876
6977
Other Parameters:
70-
use_inline_params: `boolean`, optional (default is False)
78+
use_inline_params: `boolean`, optional (default is True)
7179
When True, parameterized calls to cursor.execute() will try to render parameter values inline with the
7280
query text instead of using native bound parameters supported in DBR. This connector will attempt to
73-
sanitise parameterized inputs to prevent SQL injection. This option should be considered dangerous and
74-
is maintained here for certain legacy use-cases before Databricks had native parameter support.
81+
sanitise parameterized inputs to prevent SQL injection. Before you can switch this to False, you must
82+
update your queries to use the PEP-249 `named` paramstyle instead of the `pyformat` paramstyle used
83+
in INLINE mode.
7584
auth_type: `str`, optional
7685
`databricks-oauth` : to use oauth with fine-grained permission scopes, set to `databricks-oauth`.
7786
This is currently in private preview for Databricks accounts on AWS.
@@ -366,6 +375,78 @@ def __iter__(self):
366375
else:
367376
raise Error("There is no active result set")
368377

378+
def _determine_parameter_approach(self) -> ParameterApproach:
379+
"""Encapsulates the logic for choosing whether to send parameters in native vs inline mode
380+
381+
If self.use_inline_params is True then inline mode is used.
382+
If self.use_inline_params is False, then check if the server supports them and proceed.
383+
Else raise an exception.
384+
385+
Returns a ParameterApproach enumeration or raises an exception
386+
"""
387+
388+
if self.connection.use_inline_params:
389+
return ParameterApproach.INLINE
390+
391+
if self.connection.server_parameterized_queries_enabled(
392+
self.connection.protocol_version
393+
):
394+
return ParameterApproach.NATIVE
395+
else:
396+
raise NotSupportedError(
397+
"Parameterized operations are not supported by this server. DBR 14.1 is required."
398+
)
399+
400+
def _prepare_inline_parameters(
401+
self, stmt: str, params: Union[List, Dict[str, Any]]
402+
) -> Tuple[str, List]:
403+
"""Return a statement and list of native parameters to be passed to thrift_backend for execution
404+
405+
:stmt:
406+
A string SQL query containing parameter markers of PEP-249 paramstyle `pyformat`.
407+
For example `%(param)s`.
408+
409+
:params:
410+
An iterable of parameter values to be rendered inline. If passed as a Dict, the keys
411+
must match the names of the markers included in :stmt:. If passed as a List, its length
412+
must equal the count of parameter markers in :stmt:.
413+
414+
Returns a tuple of:
415+
stmt: the passed statement with the param markers replaced by literal rendered values
416+
params: an empty list representing the native parameters to be passed with this query.
417+
The list is always empty because native parameters are never used under the inline approach
418+
"""
419+
420+
escaped_values = self.escaper.escape_args(params)
421+
rendered_statement = inject_parameters(stmt, escaped_values)
422+
423+
return rendered_statement, NO_NATIVE_PARAMS
424+
425+
def _prepare_native_parameters(
426+
self, stmt: str, params: Union[List[Any], Dict[str, Any]]
427+
) -> Tuple[str, List[TSparkParameter]]:
428+
"""Return a statement and a list of native parameters to be passed to thrift_backend for execution
429+
430+
:stmt:
431+
A string SQL query containing parameter markers of PEP-249 paramstyle `named`.
432+
For example `:param`.
433+
434+
:params:
435+
An iterable of parameter values to be sent natively. If passed as a Dict, the keys
436+
must match the names of the markers included in :stmt:. If passed as a List, its length
437+
must equal the count of parameter markers in :stmt:. In list form, any member of the list
438+
can be wrapped in a DbsqlParameter class.
439+
440+
Returns a tuple of:
441+
stmt: the passed statement with the param markers replaced by literal rendered values
442+
params: a list of TSparkParameters that will be passed in native mode
443+
"""
444+
445+
stmt = stmt
446+
params = named_parameters_to_tsparkparams(params)
447+
448+
return stmt, params
449+
369450
def _close_and_clear_active_result_set(self):
370451
try:
371452
if self.active_result_set:
@@ -534,24 +615,34 @@ def execute(
534615
Will result in the query "SELECT * FROM table WHERE field = 'foo' being sent to the server
535616
:returns self
536617
"""
537-
if parameters and self.connection.use_inline_params:
538-
_op = inject_parameters(operation, parameters)
539-
_params = []
540-
if parameters and not self.connection.use_inline_params:
541-
_op = operation
542-
_params = named_parameters_to_tsparkparams(parameters)
543-
618+
619+
if parameters:
620+
param_approach = self._determine_parameter_approach()
621+
else:
622+
param_approach = ParameterApproach.NONE
623+
prepared_params = NO_NATIVE_PARAMS
624+
prepared_operation = operation
625+
626+
if param_approach == ParameterApproach.INLINE:
627+
prepared_operation, prepared_params = self._prepare_inline_parameters(
628+
operation, parameters
629+
)
630+
if param_approach == ParameterApproach.NATIVE:
631+
prepared_operation, prepared_params = self._prepare_native_parameters(
632+
operation, parameters
633+
)
634+
544635
self._check_not_closed()
545636
self._close_and_clear_active_result_set()
546637
execute_response = self.thrift_backend.execute_command(
547-
operation=_op,
638+
operation=prepared_operation,
548639
session_handle=self.connection._session_handle,
549640
max_rows=self.arraysize,
550641
max_bytes=self.buffer_size_bytes,
551642
lz4_compression=self.connection.lz4_compression,
552643
cursor=self,
553644
use_cloud_fetch=self.connection.use_cloud_fetch,
554-
parameters=_params,
645+
parameters=prepared_params,
555646
)
556647
self.active_result_set = ResultSet(
557648
self.connection,

src/databricks/sql/utils.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@
2525
BIT_MASKS = [1, 2, 4, 8, 16, 32, 64, 128]
2626

2727

28+
class ParameterApproach(Enum):
29+
INLINE = 1
30+
NATIVE = 2
31+
NONE = 3
32+
2833
class ResultSetQueue(ABC):
2934
@abstractmethod
3035
def next_n_rows(self, num_rows: int) -> pyarrow.Table:

0 commit comments

Comments
 (0)