fermumen · fermumen · Jul 3, 2025
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -15,7 +15,7 @@ pandas = [
     { version = ">=1.2.5,<2.3.0", python = ">=3.8,<3.13" },
     { version = ">=2.2.3,<2.3.0", python = ">=3.13" }
 ]
-lz4 = "^4.0.2"
+cramjam = "^2.7.0"
 requests = "^2.18.1"
 oauthlib = "^3.1.0"
 openpyxl = "^3.0.10"

diff --git a/src/databricks/sql/cloudfetch/downloader.py b/src/databricks/sql/cloudfetch/downloader.py
@@ -3,7 +3,7 @@
 
 import requests
 from requests.adapters import HTTPAdapter, Retry
-import lz4.frame
+import cramjam
 import time
 
 from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink
@@ -158,19 +158,5 @@ def _decompress_data(compressed_data: bytes) -> bytes:
 
         Decompresses data that has been lz4 compressed, either via the whole frame or by series of chunks.
         """
-        uncompressed_data, bytes_read = lz4.frame.decompress(
-            compressed_data, return_bytes_read=True
-        )
-        # The last cloud fetch file of the entire result is commonly punctuated by frequent end-of-frame markers.
-        # Full frame decompression above will short-circuit, so chunking is necessary
-        if bytes_read < len(compressed_data):
-            d_context = lz4.frame.create_decompression_context()
-            start = 0
-            uncompressed_data = bytearray()
-            while start < len(compressed_data):
-                data, num_bytes, is_end = lz4.frame.decompress_chunk(
-                    d_context, compressed_data[start:]
-                )
-                uncompressed_data += data
-                start += num_bytes
-        return uncompressed_data
+        # cramjam returns a Buffer object; convert to bytes
+        return bytes(cramjam.lz4.decompress(compressed_data))
diff --git a/src/databricks/sql/utils.py b/src/databricks/sql/utils.py
@@ -11,7 +11,7 @@
 from typing import Any, Dict, List, Optional, Union, Sequence
 import re
 
-import lz4.frame
+import cramjam
 
 try:
     import pyarrow
@@ -613,7 +613,7 @@ def convert_arrow_based_set_to_arrow_table(arrow_batches, lz4_compressed, schema
     for arrow_batch in arrow_batches:
         n_rows += arrow_batch.rowCount
         ba += (
-            lz4.frame.decompress(arrow_batch.batch)
+            bytes(cramjam.lz4.decompress(arrow_batch.batch))
             if lz4_compressed
             else arrow_batch.batch
         )

diff --git a/tests/unit/test_thrift_backend.py b/tests/unit/test_thrift_backend.py
@@ -1438,7 +1438,7 @@ def test_create_arrow_table_calls_correct_conversion_method(
             arrow_batches, lz4_compressed, schema
         )
 
-    @patch("lz4.frame.decompress")
+    @patch("cramjam.lz4.decompress")
     @patch("pyarrow.ipc.open_stream")
     def test_convert_arrow_based_set_to_arrow_table(
         self, open_stream_mock, lz4_decompress_mock