API: Add JsonParser, and make parse() a static method

getmoto · May 11, 2024 · 8e4c099 · 8e4c099
1 parent cec43ac
commit 8e4c099
Show file tree

Hide file tree

Showing 5 changed files with 45 additions and 40 deletions.
diff --git a/py_partiql_parser/__init__.py b/py_partiql_parser/__init__.py
@@ -2,6 +2,6 @@
 
 
 from ._internal.parser import DynamoDBStatementParser, S3SelectParser  # noqa
-from ._internal.json_parser import SelectEncoder  # noqa
+from ._internal.json_parser import SelectEncoder, JsonParser  # noqa
 from ._internal.csv_converter import csv_to_json, json_to_csv  # noqa
 from ._internal.utils import MissingVariable, QueryMetadata  # noqa
diff --git a/py_partiql_parser/_internal/from_parser.py b/py_partiql_parser/_internal/from_parser.py
@@ -88,7 +88,7 @@ def get_source_data(self, documents: Dict[str, str]) -> Any:
             from_query
         ].endswith("]")
 
-        source_data = list(JsonParser().parse(documents[from_query]))
+        source_data = list(JsonParser.parse(documents[from_query]))
 
         if doc_is_list:
             return {"_1": source_data[0]}
@@ -132,7 +132,7 @@ def _get_nested_source_data(self, documents: Dict[str, Any]) -> Any:
                     doc_is_list = source_data[new_key].startswith("[") and source_data[
                         new_key
                     ].endswith("]")
-                    source_data = list(JsonParser().parse(source_data[new_key]))  # type: ignore
+                    source_data = list(JsonParser.parse(source_data[new_key]))  # type: ignore
                     if root_doc and doc_is_list:
                         # AWS behaviour when the root-document is a list
                         source_data = {"_1": source_data[0]}  # type: ignore

diff --git a/py_partiql_parser/_internal/insert_parser.py b/py_partiql_parser/_internal/insert_parser.py
@@ -38,7 +38,7 @@ def parse(self, query: str) -> Tuple[str, Dict[str, Any]]:
                 if section == "SECTION_VALUE":
                     assert current_phrase.upper() in ["VALUE"]
                     tokenizer.skip_white_space()
-                    attr = next(JsonParser().parse(tokenizer.give_remaining()))
+                    attr = next(JsonParser.parse(tokenizer.give_remaining()))
                     for key, value in attr.items():
                         attr[key] = serializer.serialize(value)
                 if section == "TABLE_NAME":

diff --git a/py_partiql_parser/_internal/json_parser.py b/py_partiql_parser/_internal/json_parser.py
@@ -1,5 +1,5 @@
 from json import JSONEncoder
-from typing import Any, List, Optional
+from typing import Any, List, Iterator, Optional
 
 from .clause_tokenizer import ClauseTokenizer
 from .utils import CaseInsensitiveDict, Variable
@@ -14,23 +14,25 @@ class JsonParser:
     So we can't use the builtin JSON parser
     """
 
-    def parse(
-        self,
+    @staticmethod
+    def parse(  # type: ignore[misc]
         original: str,
         tokenizer: Optional[ClauseTokenizer] = None,
         only_parse_initial: bool = False,
-    ) -> Any:
+    ) -> Iterator[Any]:
         if not (original.startswith("{") or original.startswith("[")):
             # Doesn't look like JSON - let's return as a variable
             yield original if original.isnumeric() else Variable(original)
         tokenizer = tokenizer or ClauseTokenizer(original)
         while tokenizer.current() is not None:
-            result = self._parse(original, tokenizer, only_parse_initial)
+            result = JsonParser._get_next_document(
+                original, tokenizer, only_parse_initial
+            )
             if result is not None:
                 yield result
 
-    def _parse(
-        self,
+    @staticmethod
+    def _get_next_document(  # type: ignore[misc]
         original: str,
         tokenizer: ClauseTokenizer,
         only_parse_initial: bool = False,
@@ -48,9 +50,9 @@ def _parse(
                 level += 1
                 # Start of a list
                 if not section:
-                    return self._parse_list(original, tokenizer)
+                    return JsonParser._parse_list(original, tokenizer)
                 else:
-                    result[dict_key] = self._parse_list(original, tokenizer)
+                    result[dict_key] = JsonParser._parse_list(original, tokenizer)
                     section = None
                     current_phrase = ""
             elif c in ["{", ","] and (not section or section == "OBJECT_END"):
@@ -70,7 +72,7 @@ def _parse(
                 level += 1
                 # Start of a value with a new dictionary
                 tokenizer.revert()  # Ensure we start the new parser with the initial {
-                result[dict_key] = self._parse(original, tokenizer)
+                result[dict_key] = JsonParser._get_next_document(original, tokenizer)
                 section = None
                 current_phrase = ""
             elif c in ACCEPTED_QUOTES and section == "KEY_TO_VALUE":
@@ -127,7 +129,8 @@ def _parse(
                     current_phrase += c
         return result
 
-    def _parse_list(self, original: str, tokenizer: ClauseTokenizer) -> Any:
+    @staticmethod
+    def _parse_list(original: str, tokenizer: ClauseTokenizer) -> List[Any]:  # type: ignore
         result: List[Any] = list()
         section = None
         current_phrase = ""
@@ -137,7 +140,11 @@ def _parse_list(self, original: str, tokenizer: ClauseTokenizer) -> Any:
                 break
             if c == "{":
                 tokenizer.revert()  # Ensure we start the new parser with the initial {
-                result.append(self._parse(original, tokenizer, only_parse_initial=True))
+                result.append(
+                    JsonParser._get_next_document(
+                        original, tokenizer, only_parse_initial=True
+                    )
+                )
                 if tokenizer.current() == "]":
                     break
                 tokenizer.skip_until([","])

diff --git a/tests/test_json_parser.py b/tests/test_json_parser.py
@@ -6,91 +6,89 @@
 
 
 def test_static_value() -> None:
-    assert next(JsonParser().parse("a")) == Variable("a")
+    assert next(JsonParser.parse("a")) == Variable("a")
 
 
 def test_dict() -> None:
-    assert next(JsonParser().parse(json.dumps({"a": "b"}))) == {"a": "b"}
-    assert next(JsonParser().parse("{'a': 'b'}")) == {"a": "b"}
-    assert next(JsonParser().parse('{"a": "b"}')) == {"a": "b"}
+    assert next(JsonParser.parse(json.dumps({"a": "b"}))) == {"a": "b"}
+    assert next(JsonParser.parse("{'a': 'b'}")) == {"a": "b"}
+    assert next(JsonParser.parse('{"a": "b"}')) == {"a": "b"}
 
 
 def test_dict_with_spaces_in_keys_and_values() -> None:
-    assert next(JsonParser().parse(json.dumps({"a sth": "b sth"}))) == {
-        "a sth": "b sth"
-    }
+    assert next(JsonParser.parse(json.dumps({"a sth": "b sth"}))) == {"a sth": "b sth"}
 
 
 def test_dict_with_multiple_entries() -> None:
-    assert next(JsonParser().parse(json.dumps({"a": "b", "c": "d"}))) == {
+    assert next(JsonParser.parse(json.dumps({"a": "b", "c": "d"}))) == {
         "a": "b",
         "c": "d",
     }
 
 
 def test_dict_with_nested_entries() -> None:
     original = {"a": {"b1": {"b1.1": "b1.2"}}, "c": "d"}
-    assert next(JsonParser().parse(json.dumps(original))) == original
+    assert next(JsonParser.parse(json.dumps(original))) == original
 
 
 def test_dict_with_list() -> None:
-    assert next(JsonParser().parse(json.dumps({"a": ["b1", "b2"], "c": "d"}))) == {
+    assert next(JsonParser.parse(json.dumps({"a": ["b1", "b2"], "c": "d"}))) == {
         "a": ["b1", "b2"],
         "c": "d",
     }
 
 
 def test_list() -> None:
-    assert next(JsonParser().parse(json.dumps(["a", "b", "asdfasdf"]))) == [
+    assert next(JsonParser.parse(json.dumps(["a", "b", "asdfasdf"]))) == [
         "a",
         "b",
         "asdfasdf",
     ]
 
 
 def test_list_with_only_numbers() -> None:
-    assert next(JsonParser().parse(json.dumps([1, 1234, 12341234]))) == [
+    assert next(JsonParser.parse(json.dumps([1, 1234, 12341234]))) == [
         1,
         1234,
         12341234,
     ]
 
 
 def test_list_with_numbers_and_strings() -> None:
-    assert next(JsonParser().parse(json.dumps(["x", 1324, "y"]))) == ["x", 1324, "y"]
+    assert next(JsonParser.parse(json.dumps(["x", 1324, "y"]))) == ["x", 1324, "y"]
 
 
 def test_list_with_variables() -> None:
-    assert next(JsonParser().parse("[v.a, v.b]")) == [Variable("v.a"), Variable("v.b")]
+    assert next(JsonParser.parse("[v.a, v.b]")) == [Variable("v.a"), Variable("v.b")]
 
 
 def test_dict_with_key_containing_a_special_char() -> None:
-    assert next(JsonParser().parse(json.dumps({"a:a": "b"}))) == {"a:a": "b"}
+    assert next(JsonParser.parse(json.dumps({"a:a": "b"}))) == {"a:a": "b"}
 
 
 def test_dict_with_value_containing_a_special_char() -> None:
-    assert next(JsonParser().parse(json.dumps({"a": "b:b"}))) == {"a": "b:b"}
+    assert next(JsonParser.parse(json.dumps({"a": "b:b"}))) == {"a": "b:b"}
 
 
 @pytest.mark.parametrize(
     "original",
     [[{"a": "legit", "b": 1}, {"a": 400, "b": 2}], {"a": "legit", "b": {"nr": 25}}],
 )
 def test_dict_containing_a_number(original: str) -> None:
-    assert next(JsonParser().parse(json.dumps(original))) == original
+    assert next(JsonParser.parse(json.dumps(original))) == original
 
 
 def test_dict_containing_a_variable() -> None:
     original = "[{'a':'legit', 'b':1}, {'a':qwer, 'b':'2'}]"
-    assert next(JsonParser().parse(original)) == [
+    assert next(JsonParser.parse(original)) == [
         {"a": "legit", "b": 1},
         {"a": Variable("qwer"), "b": "2"},
     ]
 
 
 def test_unusual_quotes() -> None:
     original = "[{’a’:1, ’b’:true}, {’a’:2, ’b’:null}, {’a’:3}]"
-    assert next(JsonParser().parse(original)) == [
+    assert next(JsonParser.parse(original)) == [
         {"a": 1, "b": True},
         {"a": 2, "b": Variable(None)},
         {"a": 3},
@@ -109,7 +107,7 @@ def test_parse_multiple_objects() -> None:
 }
     
     """
-    assert list(JsonParser().parse(multi_object_string)) == [
+    assert list(JsonParser.parse(multi_object_string)) == [
         {"a1": "v1", "a1": "v2"},
         {"a2": "w1", "a2": "w2"},
         {"a3": "z"},
@@ -125,18 +123,18 @@ def test_parse_multiple_objects() -> None:
     ],
 )
 def test_list_and_string_are_siblings(source: Any) -> None:  # type: ignore[misc]
-    assert next(JsonParser().parse(json.dumps(source))) == source
+    assert next(JsonParser.parse(json.dumps(source))) == source
 
 
 def test_bool_parser() -> None:
-    assert next(JsonParser().parse(json.dumps({"sth": False}))) == {"sth": False}
+    assert next(JsonParser.parse(json.dumps({"sth": False}))) == {"sth": False}
 
 
 def test_multiline_bool_parser() -> None:
     obj1 = {"sth": False}
     obj2 = {"k1": "v1"}
     combined = json.dumps(obj1) + "\n" + json.dumps(obj2)
-    assert list(JsonParser().parse(combined)) == [obj1, obj2]
+    assert list(JsonParser.parse(combined)) == [obj1, obj2]
 
 
 @pytest.mark.parametrize("nr_of_docs", [1, 25, 2500])
@@ -145,5 +143,5 @@ def test_large_object(nr_of_docs: int) -> None:
         [json.dumps({"pk": f"pk{i}", "data": str(uuid4())}) for i in range(nr_of_docs)]
     )
 
-    res = list(JsonParser().parse(data))
+    res = list(JsonParser.parse(data))
     assert len(res) == nr_of_docs