getindata · grzegorz8 · Dec 29, 2022 · Dec 28, 2022 · Dec 28, 2022 · Dec 29, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,7 @@
 
 -   Fix execution of SHOW, EXPLAIN and DESCRIBE commands.
 -   Speed up results display.
+-   Do not truncate results when SHOW, EXPLAIN or DESCRIBE command is executed.
 
 ## [0.12.0] - 2022-11-30
 

diff --git a/streaming_jupyter_integrations/magics.py b/streaming_jupyter_integrations/magics.py
@@ -7,7 +7,7 @@
 import subprocess
 import sys
 from functools import wraps
-from typing import Any, Callable, Dict, Iterable, Tuple, Union, cast
+from typing import Any, Callable, Dict, Iterable, Optional, Tuple, Union, cast
 
 import nest_asyncio
 import pandas as pd
@@ -36,7 +36,8 @@
 from .jar_handler import JarHandler
 from .reflection import get_method_names_for
 from .sql_syntax_highlighting import SQLSyntaxHighlighting
-from .sql_utils import inline_sql_in_cell, is_dml, is_dql, is_query
+from .sql_utils import (inline_sql_in_cell, is_dml, is_dql, is_metadata_query,
+                        is_query)
 from .variable_substitution import CellContentFormatter
 from .yarn import find_session_jm_address
 
@@ -341,10 +342,18 @@ async def __internal_execute_sql(self, stmt: str, display_row_kind: bool) -> Non
         else:
             execution_result = self.st_env.execute_sql(stmt)
         print("Job started")
-        await self.__pull_results(execution_result, display_row_kind, is_dql(stmt))
+        # Pandas lib truncates view if the number of results exceeds the limit. The same applies to column width.
+        # If the query shows metadata, e.g. list of tables or list of columns, then no limit is applied.
+        pd_display_options = {
+            "display.max_rows": None if is_metadata_query(stmt) else 100,
+            "display.max_colwidth": None if is_metadata_query(stmt) else 100,
+        }
+        await self.__pull_results(execution_result, display_row_kind, is_dql(stmt), pd_display_options)
 
     async def __pull_results(self, execution_result: TableResult, display_row_kind: bool,
-                             display_results: bool) -> None:
+                             display_results: bool, pd_display_options: Optional[Dict[str, Any]] = None) -> None:
+        if not pd_display_options:
+            pd_display_options = {}
         # active polling
         while not self.interrupted:
             try:
@@ -355,7 +364,7 @@ async def __pull_results(self, execution_result: TableResult, display_row_kind:
                     # if a select query has been executing then `wait` returns as soon as the first
                     # row is available. To display the results
                     print("Pulling query results...")
-                    await self.display_execution_result(execution_result, display_row_kind)
+                    await self.display_execution_result(execution_result, display_row_kind, pd_display_options)
                     return
                 else:
                     # if finished then return early even if the user interrupts after this
@@ -384,7 +393,8 @@ async def __pull_results(self, execution_result: TableResult, display_row_kind:
         # usual happy path
         print("Execution successful")
 
-    async def display_execution_result(self, execution_result: TableResult, display_row_kind: bool) -> pd.DataFrame:
+    async def display_execution_result(self, execution_result: TableResult, display_row_kind: bool,
+                                       pd_display_options: Dict[str, Any]) -> pd.DataFrame:
         """
         Displays the execution result and returns a dataframe containing all the results.
         Display is done in a stream-like fashion displaying the results as they come.
@@ -393,6 +403,8 @@ async def display_execution_result(self, execution_result: TableResult, display_
         columns = execution_result.get_table_schema().get_field_names()
         if display_row_kind:
             columns = ["row_kind"] + columns
+        for key, value in pd_display_options.items():
+            pd.set_option(key, value)
         df = pd.DataFrame(columns=columns)
         result_kind = execution_result.get_result_kind()
 

diff --git a/streaming_jupyter_integrations/sql_utils.py b/streaming_jupyter_integrations/sql_utils.py
@@ -17,14 +17,18 @@
     'SELECT'
 }
 
-DQL_KEYWORDS = {
-    *QUERY_KEYWORDS,
+METADATA_KEYWORDS = {
     'DESCRIBE',
     'DESC',
     'EXPLAIN',
     'SHOW'
 }
 
+DQL_KEYWORDS = {
+    *QUERY_KEYWORDS,
+    *METADATA_KEYWORDS
+}
+
 DML_KEYWORDS = {
     "INSERT",
     "EXECUTE"
@@ -54,6 +58,10 @@ def is_dql(sql: str) -> bool:
     return __first_token_is_keyword(sql, DQL_KEYWORDS)
 
 
+def is_metadata_query(sql: str) -> bool:
+    return __first_token_is_keyword(sql, METADATA_KEYWORDS)
+
+
 def __first_token_is_keyword(sql: str, keywords: Iterable[str]) -> bool:
     if not sql or not sql.strip():
         return False