Skip to content

Commit

Permalink
API: Add JsonParser, and make parse() a static method
Browse files Browse the repository at this point in the history
  • Loading branch information
bblommers committed May 11, 2024
1 parent cec43ac commit 8e4c099
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 40 deletions.
2 changes: 1 addition & 1 deletion py_partiql_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@


from ._internal.parser import DynamoDBStatementParser, S3SelectParser # noqa
from ._internal.json_parser import SelectEncoder # noqa
from ._internal.json_parser import SelectEncoder, JsonParser # noqa
from ._internal.csv_converter import csv_to_json, json_to_csv # noqa
from ._internal.utils import MissingVariable, QueryMetadata # noqa
4 changes: 2 additions & 2 deletions py_partiql_parser/_internal/from_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def get_source_data(self, documents: Dict[str, str]) -> Any:
from_query
].endswith("]")

source_data = list(JsonParser().parse(documents[from_query]))
source_data = list(JsonParser.parse(documents[from_query]))

if doc_is_list:
return {"_1": source_data[0]}
Expand Down Expand Up @@ -132,7 +132,7 @@ def _get_nested_source_data(self, documents: Dict[str, Any]) -> Any:
doc_is_list = source_data[new_key].startswith("[") and source_data[
new_key
].endswith("]")
source_data = list(JsonParser().parse(source_data[new_key])) # type: ignore
source_data = list(JsonParser.parse(source_data[new_key])) # type: ignore
if root_doc and doc_is_list:
# AWS behaviour when the root-document is a list
source_data = {"_1": source_data[0]} # type: ignore
Expand Down
2 changes: 1 addition & 1 deletion py_partiql_parser/_internal/insert_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def parse(self, query: str) -> Tuple[str, Dict[str, Any]]:
if section == "SECTION_VALUE":
assert current_phrase.upper() in ["VALUE"]
tokenizer.skip_white_space()
attr = next(JsonParser().parse(tokenizer.give_remaining()))
attr = next(JsonParser.parse(tokenizer.give_remaining()))
for key, value in attr.items():
attr[key] = serializer.serialize(value)
if section == "TABLE_NAME":
Expand Down
31 changes: 19 additions & 12 deletions py_partiql_parser/_internal/json_parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from json import JSONEncoder
from typing import Any, List, Optional
from typing import Any, List, Iterator, Optional

from .clause_tokenizer import ClauseTokenizer
from .utils import CaseInsensitiveDict, Variable
Expand All @@ -14,23 +14,25 @@ class JsonParser:
So we can't use the builtin JSON parser
"""

def parse(
self,
@staticmethod
def parse( # type: ignore[misc]
original: str,
tokenizer: Optional[ClauseTokenizer] = None,
only_parse_initial: bool = False,
) -> Any:
) -> Iterator[Any]:
if not (original.startswith("{") or original.startswith("[")):
# Doesn't look like JSON - let's return as a variable
yield original if original.isnumeric() else Variable(original)
tokenizer = tokenizer or ClauseTokenizer(original)
while tokenizer.current() is not None:
result = self._parse(original, tokenizer, only_parse_initial)
result = JsonParser._get_next_document(
original, tokenizer, only_parse_initial
)
if result is not None:
yield result

def _parse(
self,
@staticmethod
def _get_next_document( # type: ignore[misc]
original: str,
tokenizer: ClauseTokenizer,
only_parse_initial: bool = False,
Expand All @@ -48,9 +50,9 @@ def _parse(
level += 1
# Start of a list
if not section:
return self._parse_list(original, tokenizer)
return JsonParser._parse_list(original, tokenizer)
else:
result[dict_key] = self._parse_list(original, tokenizer)
result[dict_key] = JsonParser._parse_list(original, tokenizer)
section = None
current_phrase = ""
elif c in ["{", ","] and (not section or section == "OBJECT_END"):
Expand All @@ -70,7 +72,7 @@ def _parse(
level += 1
# Start of a value with a new dictionary
tokenizer.revert() # Ensure we start the new parser with the initial {
result[dict_key] = self._parse(original, tokenizer)
result[dict_key] = JsonParser._get_next_document(original, tokenizer)
section = None
current_phrase = ""
elif c in ACCEPTED_QUOTES and section == "KEY_TO_VALUE":
Expand Down Expand Up @@ -127,7 +129,8 @@ def _parse(
current_phrase += c
return result

def _parse_list(self, original: str, tokenizer: ClauseTokenizer) -> Any:
@staticmethod
def _parse_list(original: str, tokenizer: ClauseTokenizer) -> List[Any]: # type: ignore
result: List[Any] = list()
section = None
current_phrase = ""
Expand All @@ -137,7 +140,11 @@ def _parse_list(self, original: str, tokenizer: ClauseTokenizer) -> Any:
break
if c == "{":
tokenizer.revert() # Ensure we start the new parser with the initial {
result.append(self._parse(original, tokenizer, only_parse_initial=True))
result.append(
JsonParser._get_next_document(
original, tokenizer, only_parse_initial=True
)
)
if tokenizer.current() == "]":
break
tokenizer.skip_until([","])
Expand Down
46 changes: 22 additions & 24 deletions tests/test_json_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,91 +6,89 @@


def test_static_value() -> None:
assert next(JsonParser().parse("a")) == Variable("a")
assert next(JsonParser.parse("a")) == Variable("a")


def test_dict() -> None:
assert next(JsonParser().parse(json.dumps({"a": "b"}))) == {"a": "b"}
assert next(JsonParser().parse("{'a': 'b'}")) == {"a": "b"}
assert next(JsonParser().parse('{"a": "b"}')) == {"a": "b"}
assert next(JsonParser.parse(json.dumps({"a": "b"}))) == {"a": "b"}
assert next(JsonParser.parse("{'a': 'b'}")) == {"a": "b"}
assert next(JsonParser.parse('{"a": "b"}')) == {"a": "b"}


def test_dict_with_spaces_in_keys_and_values() -> None:
assert next(JsonParser().parse(json.dumps({"a sth": "b sth"}))) == {
"a sth": "b sth"
}
assert next(JsonParser.parse(json.dumps({"a sth": "b sth"}))) == {"a sth": "b sth"}


def test_dict_with_multiple_entries() -> None:
assert next(JsonParser().parse(json.dumps({"a": "b", "c": "d"}))) == {
assert next(JsonParser.parse(json.dumps({"a": "b", "c": "d"}))) == {
"a": "b",
"c": "d",
}


def test_dict_with_nested_entries() -> None:
original = {"a": {"b1": {"b1.1": "b1.2"}}, "c": "d"}
assert next(JsonParser().parse(json.dumps(original))) == original
assert next(JsonParser.parse(json.dumps(original))) == original


def test_dict_with_list() -> None:
assert next(JsonParser().parse(json.dumps({"a": ["b1", "b2"], "c": "d"}))) == {
assert next(JsonParser.parse(json.dumps({"a": ["b1", "b2"], "c": "d"}))) == {
"a": ["b1", "b2"],
"c": "d",
}


def test_list() -> None:
assert next(JsonParser().parse(json.dumps(["a", "b", "asdfasdf"]))) == [
assert next(JsonParser.parse(json.dumps(["a", "b", "asdfasdf"]))) == [
"a",
"b",
"asdfasdf",
]


def test_list_with_only_numbers() -> None:
assert next(JsonParser().parse(json.dumps([1, 1234, 12341234]))) == [
assert next(JsonParser.parse(json.dumps([1, 1234, 12341234]))) == [
1,
1234,
12341234,
]


def test_list_with_numbers_and_strings() -> None:
assert next(JsonParser().parse(json.dumps(["x", 1324, "y"]))) == ["x", 1324, "y"]
assert next(JsonParser.parse(json.dumps(["x", 1324, "y"]))) == ["x", 1324, "y"]


def test_list_with_variables() -> None:
assert next(JsonParser().parse("[v.a, v.b]")) == [Variable("v.a"), Variable("v.b")]
assert next(JsonParser.parse("[v.a, v.b]")) == [Variable("v.a"), Variable("v.b")]


def test_dict_with_key_containing_a_special_char() -> None:
assert next(JsonParser().parse(json.dumps({"a:a": "b"}))) == {"a:a": "b"}
assert next(JsonParser.parse(json.dumps({"a:a": "b"}))) == {"a:a": "b"}


def test_dict_with_value_containing_a_special_char() -> None:
assert next(JsonParser().parse(json.dumps({"a": "b:b"}))) == {"a": "b:b"}
assert next(JsonParser.parse(json.dumps({"a": "b:b"}))) == {"a": "b:b"}


@pytest.mark.parametrize(
"original",
[[{"a": "legit", "b": 1}, {"a": 400, "b": 2}], {"a": "legit", "b": {"nr": 25}}],
)
def test_dict_containing_a_number(original: str) -> None:
assert next(JsonParser().parse(json.dumps(original))) == original
assert next(JsonParser.parse(json.dumps(original))) == original


def test_dict_containing_a_variable() -> None:
original = "[{'a':'legit', 'b':1}, {'a':qwer, 'b':'2'}]"
assert next(JsonParser().parse(original)) == [
assert next(JsonParser.parse(original)) == [
{"a": "legit", "b": 1},
{"a": Variable("qwer"), "b": "2"},
]


def test_unusual_quotes() -> None:
original = "[{’a’:1, ’b’:true}, {’a’:2, ’b’:null}, {’a’:3}]"
assert next(JsonParser().parse(original)) == [
assert next(JsonParser.parse(original)) == [
{"a": 1, "b": True},
{"a": 2, "b": Variable(None)},
{"a": 3},
Expand All @@ -109,7 +107,7 @@ def test_parse_multiple_objects() -> None:
}
"""
assert list(JsonParser().parse(multi_object_string)) == [
assert list(JsonParser.parse(multi_object_string)) == [
{"a1": "v1", "a1": "v2"},
{"a2": "w1", "a2": "w2"},
{"a3": "z"},
Expand All @@ -125,18 +123,18 @@ def test_parse_multiple_objects() -> None:
],
)
def test_list_and_string_are_siblings(source: Any) -> None: # type: ignore[misc]
assert next(JsonParser().parse(json.dumps(source))) == source
assert next(JsonParser.parse(json.dumps(source))) == source


def test_bool_parser() -> None:
assert next(JsonParser().parse(json.dumps({"sth": False}))) == {"sth": False}
assert next(JsonParser.parse(json.dumps({"sth": False}))) == {"sth": False}


def test_multiline_bool_parser() -> None:
obj1 = {"sth": False}
obj2 = {"k1": "v1"}
combined = json.dumps(obj1) + "\n" + json.dumps(obj2)
assert list(JsonParser().parse(combined)) == [obj1, obj2]
assert list(JsonParser.parse(combined)) == [obj1, obj2]


@pytest.mark.parametrize("nr_of_docs", [1, 25, 2500])
Expand All @@ -145,5 +143,5 @@ def test_large_object(nr_of_docs: int) -> None:
[json.dumps({"pk": f"pk{i}", "data": str(uuid4())}) for i in range(nr_of_docs)]
)

res = list(JsonParser().parse(data))
res = list(JsonParser.parse(data))
assert len(res) == nr_of_docs

0 comments on commit 8e4c099

Please sign in to comment.