From 8e4c099cbd0b5e29d5b182579e529ba463c9da00 Mon Sep 17 00:00:00 2001 From: Bert Blommers Date: Sat, 11 May 2024 17:57:08 +0000 Subject: [PATCH] API: Add JsonParser, and make parse() a static method --- py_partiql_parser/__init__.py | 2 +- py_partiql_parser/_internal/from_parser.py | 4 +- py_partiql_parser/_internal/insert_parser.py | 2 +- py_partiql_parser/_internal/json_parser.py | 31 ++++++++----- tests/test_json_parser.py | 46 ++++++++++---------- 5 files changed, 45 insertions(+), 40 deletions(-) diff --git a/py_partiql_parser/__init__.py b/py_partiql_parser/__init__.py index cbe3fd1..459d165 100644 --- a/py_partiql_parser/__init__.py +++ b/py_partiql_parser/__init__.py @@ -2,6 +2,6 @@ from ._internal.parser import DynamoDBStatementParser, S3SelectParser # noqa -from ._internal.json_parser import SelectEncoder # noqa +from ._internal.json_parser import SelectEncoder, JsonParser # noqa from ._internal.csv_converter import csv_to_json, json_to_csv # noqa from ._internal.utils import MissingVariable, QueryMetadata # noqa diff --git a/py_partiql_parser/_internal/from_parser.py b/py_partiql_parser/_internal/from_parser.py index afdcba7..d2b271e 100644 --- a/py_partiql_parser/_internal/from_parser.py +++ b/py_partiql_parser/_internal/from_parser.py @@ -88,7 +88,7 @@ def get_source_data(self, documents: Dict[str, str]) -> Any: from_query ].endswith("]") - source_data = list(JsonParser().parse(documents[from_query])) + source_data = list(JsonParser.parse(documents[from_query])) if doc_is_list: return {"_1": source_data[0]} @@ -132,7 +132,7 @@ def _get_nested_source_data(self, documents: Dict[str, Any]) -> Any: doc_is_list = source_data[new_key].startswith("[") and source_data[ new_key ].endswith("]") - source_data = list(JsonParser().parse(source_data[new_key])) # type: ignore + source_data = list(JsonParser.parse(source_data[new_key])) # type: ignore if root_doc and doc_is_list: # AWS behaviour when the root-document is a list source_data = {"_1": source_data[0]} # type: ignore diff --git a/py_partiql_parser/_internal/insert_parser.py b/py_partiql_parser/_internal/insert_parser.py index a5b504c..a6472f9 100644 --- a/py_partiql_parser/_internal/insert_parser.py +++ b/py_partiql_parser/_internal/insert_parser.py @@ -38,7 +38,7 @@ def parse(self, query: str) -> Tuple[str, Dict[str, Any]]: if section == "SECTION_VALUE": assert current_phrase.upper() in ["VALUE"] tokenizer.skip_white_space() - attr = next(JsonParser().parse(tokenizer.give_remaining())) + attr = next(JsonParser.parse(tokenizer.give_remaining())) for key, value in attr.items(): attr[key] = serializer.serialize(value) if section == "TABLE_NAME": diff --git a/py_partiql_parser/_internal/json_parser.py b/py_partiql_parser/_internal/json_parser.py index 4c67716..7e97ab5 100644 --- a/py_partiql_parser/_internal/json_parser.py +++ b/py_partiql_parser/_internal/json_parser.py @@ -1,5 +1,5 @@ from json import JSONEncoder -from typing import Any, List, Optional +from typing import Any, List, Iterator, Optional from .clause_tokenizer import ClauseTokenizer from .utils import CaseInsensitiveDict, Variable @@ -14,23 +14,25 @@ class JsonParser: So we can't use the builtin JSON parser """ - def parse( - self, + @staticmethod + def parse( # type: ignore[misc] original: str, tokenizer: Optional[ClauseTokenizer] = None, only_parse_initial: bool = False, - ) -> Any: + ) -> Iterator[Any]: if not (original.startswith("{") or original.startswith("[")): # Doesn't look like JSON - let's return as a variable yield original if original.isnumeric() else Variable(original) tokenizer = tokenizer or ClauseTokenizer(original) while tokenizer.current() is not None: - result = self._parse(original, tokenizer, only_parse_initial) + result = JsonParser._get_next_document( + original, tokenizer, only_parse_initial + ) if result is not None: yield result - def _parse( - self, + @staticmethod + def _get_next_document( # type: ignore[misc] original: str, tokenizer: ClauseTokenizer, only_parse_initial: bool = False, @@ -48,9 +50,9 @@ def _parse( level += 1 # Start of a list if not section: - return self._parse_list(original, tokenizer) + return JsonParser._parse_list(original, tokenizer) else: - result[dict_key] = self._parse_list(original, tokenizer) + result[dict_key] = JsonParser._parse_list(original, tokenizer) section = None current_phrase = "" elif c in ["{", ","] and (not section or section == "OBJECT_END"): @@ -70,7 +72,7 @@ def _parse( level += 1 # Start of a value with a new dictionary tokenizer.revert() # Ensure we start the new parser with the initial { - result[dict_key] = self._parse(original, tokenizer) + result[dict_key] = JsonParser._get_next_document(original, tokenizer) section = None current_phrase = "" elif c in ACCEPTED_QUOTES and section == "KEY_TO_VALUE": @@ -127,7 +129,8 @@ def _parse( current_phrase += c return result - def _parse_list(self, original: str, tokenizer: ClauseTokenizer) -> Any: + @staticmethod + def _parse_list(original: str, tokenizer: ClauseTokenizer) -> List[Any]: # type: ignore result: List[Any] = list() section = None current_phrase = "" @@ -137,7 +140,11 @@ def _parse_list(self, original: str, tokenizer: ClauseTokenizer) -> Any: break if c == "{": tokenizer.revert() # Ensure we start the new parser with the initial { - result.append(self._parse(original, tokenizer, only_parse_initial=True)) + result.append( + JsonParser._get_next_document( + original, tokenizer, only_parse_initial=True + ) + ) if tokenizer.current() == "]": break tokenizer.skip_until([","]) diff --git a/tests/test_json_parser.py b/tests/test_json_parser.py index fe8dd48..c144e16 100644 --- a/tests/test_json_parser.py +++ b/tests/test_json_parser.py @@ -6,23 +6,21 @@ def test_static_value() -> None: - assert next(JsonParser().parse("a")) == Variable("a") + assert next(JsonParser.parse("a")) == Variable("a") def test_dict() -> None: - assert next(JsonParser().parse(json.dumps({"a": "b"}))) == {"a": "b"} - assert next(JsonParser().parse("{'a': 'b'}")) == {"a": "b"} - assert next(JsonParser().parse('{"a": "b"}')) == {"a": "b"} + assert next(JsonParser.parse(json.dumps({"a": "b"}))) == {"a": "b"} + assert next(JsonParser.parse("{'a': 'b'}")) == {"a": "b"} + assert next(JsonParser.parse('{"a": "b"}')) == {"a": "b"} def test_dict_with_spaces_in_keys_and_values() -> None: - assert next(JsonParser().parse(json.dumps({"a sth": "b sth"}))) == { - "a sth": "b sth" - } + assert next(JsonParser.parse(json.dumps({"a sth": "b sth"}))) == {"a sth": "b sth"} def test_dict_with_multiple_entries() -> None: - assert next(JsonParser().parse(json.dumps({"a": "b", "c": "d"}))) == { + assert next(JsonParser.parse(json.dumps({"a": "b", "c": "d"}))) == { "a": "b", "c": "d", } @@ -30,18 +28,18 @@ def test_dict_with_multiple_entries() -> None: def test_dict_with_nested_entries() -> None: original = {"a": {"b1": {"b1.1": "b1.2"}}, "c": "d"} - assert next(JsonParser().parse(json.dumps(original))) == original + assert next(JsonParser.parse(json.dumps(original))) == original def test_dict_with_list() -> None: - assert next(JsonParser().parse(json.dumps({"a": ["b1", "b2"], "c": "d"}))) == { + assert next(JsonParser.parse(json.dumps({"a": ["b1", "b2"], "c": "d"}))) == { "a": ["b1", "b2"], "c": "d", } def test_list() -> None: - assert next(JsonParser().parse(json.dumps(["a", "b", "asdfasdf"]))) == [ + assert next(JsonParser.parse(json.dumps(["a", "b", "asdfasdf"]))) == [ "a", "b", "asdfasdf", @@ -49,7 +47,7 @@ def test_list() -> None: def test_list_with_only_numbers() -> None: - assert next(JsonParser().parse(json.dumps([1, 1234, 12341234]))) == [ + assert next(JsonParser.parse(json.dumps([1, 1234, 12341234]))) == [ 1, 1234, 12341234, @@ -57,19 +55,19 @@ def test_list_with_only_numbers() -> None: def test_list_with_numbers_and_strings() -> None: - assert next(JsonParser().parse(json.dumps(["x", 1324, "y"]))) == ["x", 1324, "y"] + assert next(JsonParser.parse(json.dumps(["x", 1324, "y"]))) == ["x", 1324, "y"] def test_list_with_variables() -> None: - assert next(JsonParser().parse("[v.a, v.b]")) == [Variable("v.a"), Variable("v.b")] + assert next(JsonParser.parse("[v.a, v.b]")) == [Variable("v.a"), Variable("v.b")] def test_dict_with_key_containing_a_special_char() -> None: - assert next(JsonParser().parse(json.dumps({"a:a": "b"}))) == {"a:a": "b"} + assert next(JsonParser.parse(json.dumps({"a:a": "b"}))) == {"a:a": "b"} def test_dict_with_value_containing_a_special_char() -> None: - assert next(JsonParser().parse(json.dumps({"a": "b:b"}))) == {"a": "b:b"} + assert next(JsonParser.parse(json.dumps({"a": "b:b"}))) == {"a": "b:b"} @pytest.mark.parametrize( @@ -77,12 +75,12 @@ def test_dict_with_value_containing_a_special_char() -> None: [[{"a": "legit", "b": 1}, {"a": 400, "b": 2}], {"a": "legit", "b": {"nr": 25}}], ) def test_dict_containing_a_number(original: str) -> None: - assert next(JsonParser().parse(json.dumps(original))) == original + assert next(JsonParser.parse(json.dumps(original))) == original def test_dict_containing_a_variable() -> None: original = "[{'a':'legit', 'b':1}, {'a':qwer, 'b':'2'}]" - assert next(JsonParser().parse(original)) == [ + assert next(JsonParser.parse(original)) == [ {"a": "legit", "b": 1}, {"a": Variable("qwer"), "b": "2"}, ] @@ -90,7 +88,7 @@ def test_dict_containing_a_variable() -> None: def test_unusual_quotes() -> None: original = "[{’a’:1, ’b’:true}, {’a’:2, ’b’:null}, {’a’:3}]" - assert next(JsonParser().parse(original)) == [ + assert next(JsonParser.parse(original)) == [ {"a": 1, "b": True}, {"a": 2, "b": Variable(None)}, {"a": 3}, @@ -109,7 +107,7 @@ def test_parse_multiple_objects() -> None: } """ - assert list(JsonParser().parse(multi_object_string)) == [ + assert list(JsonParser.parse(multi_object_string)) == [ {"a1": "v1", "a1": "v2"}, {"a2": "w1", "a2": "w2"}, {"a3": "z"}, @@ -125,18 +123,18 @@ def test_parse_multiple_objects() -> None: ], ) def test_list_and_string_are_siblings(source: Any) -> None: # type: ignore[misc] - assert next(JsonParser().parse(json.dumps(source))) == source + assert next(JsonParser.parse(json.dumps(source))) == source def test_bool_parser() -> None: - assert next(JsonParser().parse(json.dumps({"sth": False}))) == {"sth": False} + assert next(JsonParser.parse(json.dumps({"sth": False}))) == {"sth": False} def test_multiline_bool_parser() -> None: obj1 = {"sth": False} obj2 = {"k1": "v1"} combined = json.dumps(obj1) + "\n" + json.dumps(obj2) - assert list(JsonParser().parse(combined)) == [obj1, obj2] + assert list(JsonParser.parse(combined)) == [obj1, obj2] @pytest.mark.parametrize("nr_of_docs", [1, 25, 2500]) @@ -145,5 +143,5 @@ def test_large_object(nr_of_docs: int) -> None: [json.dumps({"pk": f"pk{i}", "data": str(uuid4())}) for i in range(nr_of_docs)] ) - res = list(JsonParser().parse(data)) + res = list(JsonParser.parse(data)) assert len(res) == nr_of_docs