Skip to content

Commit

Permalink
Improve Update/Delete/Insert parsers
Browse files Browse the repository at this point in the history
  • Loading branch information
bblommers committed Feb 1, 2024
1 parent 5adcfd4 commit c1bb673
Show file tree
Hide file tree
Showing 10 changed files with 186 additions and 41 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
CHANGELOG
=========

0.5.1
-----

- Support INSERT/DELETE/UPDATE queries:

- that contain a table name without quotes
- that contain parameters
- when calling get_query_metadata()


0.5.0
-----
- Improved typing support
Expand Down
2 changes: 1 addition & 1 deletion py_partiql_parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.5.0"
__version__ = "0.5.1"


from ._internal.parser import DynamoDBStatementParser, S3SelectParser # noqa
Expand Down
5 changes: 5 additions & 0 deletions py_partiql_parser/_internal/delete_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ def parse(self, query: str) -> Tuple[str, List[Tuple[str, Dict[str, Any]]]]:
assert current_phrase.upper() == "AND"
section = "WHERE"
current_phrase = ""
if section == "TABLE_NAME":
table_name = current_phrase
current_phrase = ""
tokenizer.skip_white_space()
section = "SECTION_WHERE"
continue
elif c in ["'", '"']:
if section == "TABLE_NAME":
Expand Down
5 changes: 5 additions & 0 deletions py_partiql_parser/_internal/insert_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ def parse(self, query: str) -> Tuple[str, Dict[str, Any]]:
attr = JsonParser().parse(tokenizer.give_remaining())
for key, value in attr.items():
attr[key] = serializer.serialize(value)
if section == "TABLE_NAME":
table_name = current_phrase
current_phrase = ""
tokenizer.skip_white_space()
section = "SECTION_VALUE"
continue
elif c in ["'", '"']:
if section == "TABLE_NAME":
Expand Down
65 changes: 51 additions & 14 deletions py_partiql_parser/_internal/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from typing import Dict, Any, List, Optional, Tuple

from ..exceptions import ParserException
from .delete_parser import DeleteParser
from .from_parser import DynamoDBFromParser, S3FromParser, FromParser
from .insert_parser import InsertParser
Expand Down Expand Up @@ -86,7 +87,7 @@ def parse( # type: ignore[return]
return return_data, updates

if query.lower().startswith("update"):
return self._parse_update(query)
return self._parse_update(query, parameters)

if query.lower().startswith("delete"):
return self._parse_delete(query)
Expand Down Expand Up @@ -123,11 +124,31 @@ def _parse_select(
] = {}
return queried_data, updates

def _parse_update(self, query: str) -> TYPE_RESPONSE:
def _parse_update(
self, query: str, parameters: Optional[List[Dict[str, Any]]] = None
) -> TYPE_RESPONSE:
query = query.replace("\n", " ")

table_name, attrs_to_update, attrs_to_filter = UpdateParser().parse(query)

parameters_requested = len(
[_ for _, val in attrs_to_update + attrs_to_filter if val == "?"]
)
if parameters_requested and len(parameters) != parameters_requested: # type: ignore
raise ParserException(
name="ValidationError",
message="Number of parameters in request and statement don't match.",
)

attrs_to_update = [
(key, parameters.pop(0) if val == "?" else val) # type: ignore
for key, val in attrs_to_update
]
attrs_to_filter = [
(key, parameters.pop(0) if val == "?" else val) # type: ignore
for key, val in attrs_to_filter
]

source_data = self.documents[table_name]
updates_per_table: Dict[
str, List[Tuple[Optional[Dict[str, Any]], Optional[Dict[str, Any]]]]
Expand Down Expand Up @@ -172,15 +193,31 @@ def _parse_insert(self, query: str) -> TYPE_RESPONSE:
@classmethod
def get_query_metadata(cls, query: str) -> QueryMetadata:
query = query.replace("\n", " ")
clauses = re.split("SELECT | FROM | WHERE ", query, flags=re.IGNORECASE)

from_parser = FromParser(clauses[2])

# WHERE
if len(clauses) > 3:
where_clause = clauses[3]
where = WhereParser.parse_where_clause(where_clause)
else:
where = None

return QueryMetadata(tables=from_parser.clauses, where_clause=where)
if query.lower().startswith("select"):
clauses = re.split("SELECT | FROM | WHERE ", query, flags=re.IGNORECASE)

from_parser = FromParser(clauses[2])
# WHERE
if len(clauses) > 3:
where_clause = clauses[3]
where = WhereParser.parse_where_clause(where_clause)
else:
where = None

return QueryMetadata(
tables=from_parser.clauses, where_clause=where, is_select_query=True
)
elif query.lower().startswith("update"):
table_name, attrs_to_update, attrs_to_filter = UpdateParser().parse(query)
return QueryMetadata(tables={table_name: table_name}, where_clause=None)
elif query.lower().startswith("delete"):
query = query.replace("\n", " ")

table_name, attrs_to_filter = DeleteParser().parse(query)
return QueryMetadata(tables={table_name: table_name})
elif query.lower().startswith("insert"):
query = query.replace("\n", " ")

table_name, new_item = InsertParser().parse(query)
return QueryMetadata(tables={table_name: table_name})
raise Exception
66 changes: 45 additions & 21 deletions py_partiql_parser/_internal/update_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, List, Tuple, Optional
from typing import Any, List, Tuple

from .clause_tokenizer import ClauseTokenizer
from .utils import serializer
Expand All @@ -7,19 +7,15 @@
class UpdateParser:
def parse(
self, query: str
) -> Tuple[
str,
List[Tuple[str, Optional[Dict[str, Any]]]],
List[Tuple[str, Dict[str, Any]]],
]:
) -> Tuple[str, List[Tuple[str, Any]], List[Tuple[str, Any]]]:
tokenizer = ClauseTokenizer(query)

section = "START"
current_phrase = ""

table_name = ""
attrs_to_update: List[Tuple[str, Optional[Dict[str, Any]]]] = []
attr_filters = []
attrs_to_update: List[Tuple[str, Any]] = []
attr_filters: List[Tuple[str, Any]] = []

while True:
c = tokenizer.next()
Expand All @@ -33,6 +29,7 @@ def parse(
), f"{current_phrase} should be UPDATE"
current_phrase = ""
section = "TABLE_NAME"
continue
if section == "ACTION":
assert current_phrase.upper() in ["SET", "REMOVE"]
section = f"ACTION_{current_phrase.upper()}"
Expand All @@ -42,6 +39,7 @@ def parse(
assert current_phrase.upper() == "WHERE"
section = "WHERE"
current_phrase = ""
continue
if section == "WHERE_AND":
assert current_phrase.upper() == "AND"
section = "WHERE"
Expand All @@ -54,38 +52,64 @@ def parse(

tokenizer.skip_white_space()
section = "SECTION_WHERE"
if section == "TABLE_NAME":
table_name = current_phrase
current_phrase = ""
tokenizer.skip_white_space()
section = "ACTION"
continue
elif c in ["'", '"']:
if section == "TABLE_NAME":
table_name = tokenizer.next_until([c])
tokenizer.skip_white_space()
section = "ACTION"
if section == "ACTION_SET":
current_phrase = tokenizer.next_until([c])
tokenizer.skip_white_space()
if section == "WHERE":
current_phrase = tokenizer.next_until([c])
tokenizer.skip_white_space()
continue
elif c == "=":
if section == "ACTION_SET":
attr_name = current_phrase
tokenizer.skip_white_space()
quote_type = tokenizer.current()
assert quote_type in ["'", '"']
if tokenizer.current() == "?":
attrs_to_update.append((attr_name, "?"))
tokenizer.next_until(["?"])
else:
quote_type = tokenizer.current()
assert quote_type in ["'", '"', "?"]

tokenizer.next()
attr_value = tokenizer.next_until([quote_type])
attrs_to_update.append(
(attr_name, serializer.serialize(attr_value))
)
tokenizer.next()
attr_value = tokenizer.next_until([quote_type])
attrs_to_update.append(
(attr_name, serializer.serialize(attr_value))
)
current_phrase = ""

tokenizer.skip_white_space()
section = "SECTION_WHERE"
if tokenizer.current() == ",":
tokenizer.next_until([","])
# Another attr to update
pass
else:
section = "SECTION_WHERE"
elif section == "WHERE":
attr_name = current_phrase
tokenizer.skip_white_space()
quote_type = tokenizer.current()
assert quote_type in ["'", '"']
if tokenizer.current() == "?":
attr_filters.append((current_phrase, "?"))
tokenizer.next_until(["?"])
else:
quote_type = tokenizer.current()
assert quote_type in ["'", '"']

tokenizer.next()
attr_value = tokenizer.next_until([quote_type])
attr_filters.append((attr_name, serializer.serialize(attr_value)))
tokenizer.next()
attr_value = tokenizer.next_until([quote_type])
attr_filters.append(
(attr_name, serializer.serialize(attr_value))
)

tokenizer.skip_white_space()
current_phrase = ""
Expand Down
5 changes: 5 additions & 0 deletions py_partiql_parser/_internal/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,11 @@ def __init__(
self,
tables: Dict[str, str],
where_clause: Optional["AbstractWhereClause"] = None,
is_select_query: bool = False,
):
self._tables = tables
self._where_clause = where_clause
self._is_select_query = is_select_query

def get_table_names(self) -> List[str]:
return list(self._tables.values())
Expand All @@ -145,6 +147,9 @@ def get_filter_names(self) -> List[str]:
return self._where_clause.get_filter_names()
return []

def is_select_query(self) -> bool:
return self._is_select_query


class Variable:
def __init__(self, value: Any) -> None:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "py-partiql-parser"
version = "0.5.0"
version = "0.5.1"
description = "Pure Python PartiQL Parser"
readme = "README.md"
keywords = ["pypartiql", "parser"]
Expand Down
43 changes: 39 additions & 4 deletions tests/test_dynamodb_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,32 @@ def test_update_with_multiple_keys() -> None:
assert updates == {"table": [(items[1], updated_item)]}


def test_update_with_quoted_attributes_and_parameters() -> None:
# Note that the table is without parameters
query = 'UPDATE users SET "first_name" = ?, "last_name" = ? WHERE "id"= ?'
items = [
{"id": {"S": "yes"}, "first_name": {"S": "old"}, "last_name": {"S": "old"}},
{"id": {"S": "no"}, "first_name": {"S": "old"}, "last_name": {"S": "old"}},
]
return_value, updates = DynamoDBStatementParser(source_data={"users": items}).parse(
query, parameters=[{"S": "fn"}, {"S": "ln"}, {"S": "yes"}]
)

assert return_value == []
assert len(updates["users"]) == 1
old, new = updates["users"][0]
assert old == {
"id": {"S": "yes"},
"first_name": {"S": "old"},
"last_name": {"S": "old"},
}
assert new == {
"id": {"S": "yes"},
"first_name": {"S": "fn"},
"last_name": {"S": "ln"},
}


def test_update_remove() -> None:
query = "UPDATE 'table' REMOVE attr WHERE Id='id1'"
items = [
Expand All @@ -276,8 +302,11 @@ def test_update_remove() -> None:
assert updates == {"table": [(items[0], updated_item)]}


def test_delete() -> None:
query = "DELETE FROM 'tablename' WHERE Id='id1'"
@pytest.mark.parametrize(
"query",
["DELETE FROM 'tablename' WHERE Id='id1'", "DELETE FROM tablename WHERE Id='id1'"],
)
def test_delete(query: str) -> None:
items = [
{"id": {"S": "id1"}, "attr": {"S": "sth"}},
{"id": {"S": "id2"}, "attr": {"S": "oth"}},
Expand Down Expand Up @@ -317,8 +346,14 @@ def test_delete_with_no_hits() -> None:
assert updates == {"tablename": []}


def test_insert() -> None:
query = "INSERT INTO 'mytable' value {'id': 'id1'}"
@pytest.mark.parametrize(
"query",
[
"INSERT INTO 'mytable' value {'id': 'id1'}",
"INSERT INTO mytable value {'id': 'id1'}",
],
)
def test_insert(query: str) -> None:
items = [{"id": {"S": "asdf"}}]
return_value, updates = DynamoDBStatementParser(
source_data={"mytable": items}
Expand Down
24 changes: 24 additions & 0 deletions tests/test_query_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,27 @@ def test_nested_filter_names() -> None:
metadata = DynamoDBStatementParser.get_query_metadata(query)

assert metadata.get_filter_names() == ["k1", "k2.sth"]


def test_update_statement() -> None:
query = 'UPDATE users SET "first_name" = ?, "last_name" = ? WHERE "username"= ?'
metadata = DynamoDBStatementParser.get_query_metadata(query)

assert metadata.get_table_names() == ["users"]
assert metadata.get_filter_names() == []


def test_insert_statement() -> None:
query = "INSERT INTO 'mytable' value {'id': 'id1'}"
metadata = DynamoDBStatementParser.get_query_metadata(query)

assert metadata.get_table_names() == ["mytable"]
assert metadata.get_filter_names() == []


def test_delete_statement() -> None:
query = "DELETE FROM 'tablename' WHERE Id='id1'"
metadata = DynamoDBStatementParser.get_query_metadata(query)

assert metadata.get_table_names() == ["tablename"]
assert metadata.get_filter_names() == []

0 comments on commit c1bb673

Please sign in to comment.