Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 4 additions & 43 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pandas = [
{ version = ">=1.2.5,<2.3.0", python = ">=3.8,<3.13" },
{ version = ">=2.2.3,<2.3.0", python = ">=3.13" }
]
lz4 = "^4.0.2"
cramjam = "^2.7.0"
requests = "^2.18.1"
oauthlib = "^3.1.0"
openpyxl = "^3.0.10"
Expand Down
20 changes: 3 additions & 17 deletions src/databricks/sql/cloudfetch/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import requests
from requests.adapters import HTTPAdapter, Retry
import lz4.frame
import cramjam
import time

from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink
Expand Down Expand Up @@ -158,19 +158,5 @@ def _decompress_data(compressed_data: bytes) -> bytes:

Decompresses data that has been lz4 compressed, either via the whole frame or by series of chunks.
"""
uncompressed_data, bytes_read = lz4.frame.decompress(
compressed_data, return_bytes_read=True
)
# The last cloud fetch file of the entire result is commonly punctuated by frequent end-of-frame markers.
# Full frame decompression above will short-circuit, so chunking is necessary
if bytes_read < len(compressed_data):
d_context = lz4.frame.create_decompression_context()
start = 0
uncompressed_data = bytearray()
while start < len(compressed_data):
data, num_bytes, is_end = lz4.frame.decompress_chunk(
d_context, compressed_data[start:]
)
uncompressed_data += data
start += num_bytes
return uncompressed_data
# cramjam returns a Buffer object; convert to bytes
return bytes(cramjam.lz4.decompress(compressed_data))
4 changes: 2 additions & 2 deletions src/databricks/sql/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from typing import Any, Dict, List, Optional, Union, Sequence
import re

import lz4.frame
import cramjam

try:
import pyarrow
Expand Down Expand Up @@ -613,7 +613,7 @@ def convert_arrow_based_set_to_arrow_table(arrow_batches, lz4_compressed, schema
for arrow_batch in arrow_batches:
n_rows += arrow_batch.rowCount
ba += (
lz4.frame.decompress(arrow_batch.batch)
bytes(cramjam.lz4.decompress(arrow_batch.batch))
if lz4_compressed
else arrow_batch.batch
)
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_thrift_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -1438,7 +1438,7 @@ def test_create_arrow_table_calls_correct_conversion_method(
arrow_batches, lz4_compressed, schema
)

@patch("lz4.frame.decompress")
@patch("cramjam.lz4.decompress")
@patch("pyarrow.ipc.open_stream")
def test_convert_arrow_based_set_to_arrow_table(
self, open_stream_mock, lz4_decompress_mock
Expand Down