Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix duckdb leading comments #895

Merged
merged 26 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
0940c6d
#892 Fix empty result in certain duckdb and queries with leading co…
marshallwhiteorg Sep 22, 2023
3058bd1
#892 Fix test
marshallwhiteorg Sep 22, 2023
dfb15f5
Merge branch 'master' into sql-comments
marshallwhiteorg Sep 22, 2023
b10d539
#892 Formatting
marshallwhiteorg Sep 22, 2023
df7ef10
#892 Remove duplicate test
marshallwhiteorg Sep 22, 2023
d75ff84
#892 flake8
marshallwhiteorg Sep 22, 2023
9e4aac9
#892 fake usage for df to satisfy pkgmt
marshallwhiteorg Sep 26, 2023
ed82991
#892 Works with UNION expressions, comments, ... more robust in gener…
marshallwhiteorg Sep 27, 2023
b078476
#892 Works with WITH etc.
marshallwhiteorg Sep 27, 2023
bda0e38
#892 format
marshallwhiteorg Sep 27, 2023
a6170c2
Merge branch 'master' into sql-comments
marshallwhiteorg Sep 27, 2023
a0a3445
#892
marshallwhiteorg Sep 27, 2023
4820b2a
Merge branch 'master' into sql-comments
marshallwhiteorg Sep 27, 2023
1ea147d
#892 Detect SQLAlchemy + duckdb
marshallwhiteorg Sep 27, 2023
2c46490
#892 pass in the case that sqlglot fails to parse the statement. This…
marshallwhiteorg Sep 28, 2023
2ed754e
#892 fix link
marshallwhiteorg Sep 28, 2023
df82836
Merge remote-tracking branch 'upstream/master' into sql-comments
marshallwhiteorg Oct 4, 2023
01cf0c5
#892 Test cases and split parsing into separate function
marshallwhiteorg Oct 5, 2023
16823a1
#892 Use sqlparse instead of sqlglot
marshallwhiteorg Oct 5, 2023
c380372
#892 format
marshallwhiteorg Oct 5, 2023
a909152
#892 Use @parameterize
marshallwhiteorg Oct 5, 2023
5946e65
#892 format
marshallwhiteorg Oct 5, 2023
826fd4e
#892 pr feedback
marshallwhiteorg Oct 6, 2023
1c6db35
#892 Make the error more descriptive
marshallwhiteorg Oct 9, 2023
8c12067
Merge branch 'master' into sql-comments
marshallwhiteorg Oct 11, 2023
172966f
#892 format
marshallwhiteorg Oct 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## 0.10.3dev

* [Fix] Fix empty result in certain duckdb `SELECT` and `SUMMARIZE` queries with leading comments (#892)
* [Fix] Remove force deleted snippets from dependent snippet's `with` (#717)

## 0.10.2 (2023-09-22)
Expand Down
2 changes: 1 addition & 1 deletion doc/jupyterlab/sharing.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ you with a unique URL that you can share.
pip install jupysql
```

> **For detailed instructions [click here.](https://docs.cloud.ploomber.io/en/latest/dashboards/jupyterlab-plugin.html)**
> **For detailed instructions [click here.](https://docs.cloud.ploomber.io/en/latest/apps/jupyterlab-plugin.html)**
48 changes: 28 additions & 20 deletions src/sql/connection/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
)
from IPython.core.error import UsageError
import sqlglot
from sqlglot import parse_one, exp
from sqlglot.generator import Generator
import sqlparse
from ploomber_core.exceptions import modify_exceptions

Expand Down Expand Up @@ -717,34 +719,40 @@ def _connection_execute(self, query, parameters=None):
Parameters to use in the query (:variable format)
"""
parameters = parameters or {}

# we do not support multiple statements
if len(sqlparse.split(query)) > 1:
raise NotImplementedError("Only one statement is supported.")

words = query.split()

if words:
first_word_statement = words[0].lower()
else:
first_word_statement = ""

# NOTE: in duckdb db "from TABLE_NAME" is valid
# TODO: we can parse the query to ensure that it's a SELECT statement
# for example, it might start with WITH but the final statement might
# not be a SELECT
# `summarize` is added to support %sql SUMMARIZE table in duckdb
is_select = first_word_statement in {"select", "with", "from", "summarize"}

operation = partial(self._execute_with_parameters, query, parameters)
out = self._execute_with_error_handling(operation)

if self._requires_manual_commit:
# calling connection.commit() when using duckdb-engine will yield
# empty results if we commit after a SELECT statement
# see: https://github.com/Mause/duckdb_engine/issues/734
if is_select and self.dialect == "duckdb":
return out
# Calling connection.commit() when using duckdb-engine will yield
# empty results if we commit after a SELECT or SUMMARIZE statement,
# see: https://github.com/Mause/duckdb_engine/issues/734.
if self.dialect == "duckdb":
edublancas marked this conversation as resolved.
Show resolved Hide resolved
is_duckdb_sqlalchemy = not self.is_dbapi_connection
if is_duckdb_sqlalchemy:
edublancas marked this conversation as resolved.
Show resolved Hide resolved
parse_dialect = "tsql"
else:
parse_dialect = "duckdb"

# Attempt to use sqlglot to detect SELECT and SUMMARIZE.
try:
expression = parse_one(query, dialect=parse_dialect)
sql_stripped = Generator(comments=False).generate(expression)
words = sql_stripped.split()
if (
words
and (
words[0].lower() == "select"
or words[0].lower() == "summarize"
)
or isinstance(expression, exp.Select)
):
return out
except sqlglot.errors.ParseError:
pass

# in sqlalchemy 1.x, connection has no commit attribute
if IS_SQLALCHEMY_ONE:
Expand Down
70 changes: 70 additions & 0 deletions src/tests/test_magic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1987,3 +1987,73 @@ def test_accessing_previously_nonexisting_file(ip_empty, tmp_empty, capsys):
ip_empty.run_cell("%sql SELECT * FROM 'data.csv' LIMIT 3")
out, _ = capsys.readouterr()
assert expected in out


def test_comments_in_duckdb_select_summarize(ip_empty):
expected_summarize = {
"column_name": ("memid",),
"column_type": ("BIGINT",),
"min": ("1",),
"max": ("8",),
"approx_unique": ("5",),
"avg": ("3.8",),
"std": ("2.7748873851023217",),
"q25": ("2",),
"q50": ("3",),
"q75": ("6",),
"count": (5,),
"null_percentage": ("0.0%",),
}

df = pd.DataFrame(
data=dict(
memid=[1, 2, 3, 5, 8],
),
)
_ = df
edublancas marked this conversation as resolved.
Show resolved Hide resolved

ip_empty.run_cell("%sql duckdb://")

out = ip_empty.run_cell("%sql /* x */ SUMMARIZE df").result
assert out.dict() == expected_summarize

out = ip_empty.run_cell("%sql /*x*//*x*/ SUMMARIZE /*x*/ df").result
assert out.dict() == expected_summarize
edublancas marked this conversation as resolved.
Show resolved Hide resolved

out = ip_empty.run_cell(
"""%%sql
/*x*/
SUMMARIZE df
"""
).result
assert out.dict() == expected_summarize

out = ip_empty.run_cell(
"""%%sql
/*x*/

/*x*/
-- comment
SUMMARIZE df
/*x*/
"""
).result
assert out.dict() == expected_summarize

expected_select = {"memid": (1, 2, 3, 5, 8)}

out = ip_empty.run_cell(
"""%%sql
/*x*/
SELECT * FROM df
"""
).result
assert out.dict() == expected_select

out = ip_empty.run_cell(
"""%%sql
/*x*/
FROM df SELECT *
"""
).result
assert out.dict() == expected_select
Loading