Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CORE-6836] schema_registry/json: Support Internal References #22815

Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 79 additions & 50 deletions src/v/pandaproxy/schema_registry/json.cc
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,20 @@ struct pj {
}
};

class schema_context {
public:
explicit schema_context(json_schema_definition::impl const& schema)
: _schema{schema} {}

private:
json_schema_definition::impl const& _schema;
};

struct context {
schema_context older;
schema_context newer;
};

template<json_schema_dialect Dialect>
jsoncons::jsonschema::json_schema<jsoncons::json> const& get_metaschema() {
static auto const meteschema_doc = [] {
Expand Down Expand Up @@ -328,7 +342,8 @@ result<json::Document> parse_json(iobuf buf) {
// a schema O is a superset of another schema N if every schema that is valid
// for N is also valid for O. precondition: older and newer are both valid
// schemas
bool is_superset(json::Value const& older, json::Value const& newer);
bool is_superset(
context const& ctx, json::Value const& older, json::Value const& newer);

// close the implementation in a namespace to keep it contained
namespace is_superset_impl {
Expand Down Expand Up @@ -485,7 +500,8 @@ json_type_list normalized_type(json::Value const& v) {
}

// helper to convert a boolean to a schema
json::Value::ConstObject get_schema(json::Value const& v) {
json::Value::ConstObject
get_schema(schema_context const&, json::Value const& v) {
if (v.IsObject()) {
return v.GetObject();
}
Expand All @@ -503,12 +519,12 @@ json::Value::ConstObject get_schema(json::Value const& v) {

// helper to retrieve the object value for a key, or an empty object if the key
// is not present
json::Value::ConstObject
get_object_or_empty(json::Value const& v, std::string_view key) {
json::Value::ConstObject get_object_or_empty(
schema_context const& ctx, json::Value const& v, std::string_view key) {
auto it = v.FindMember(
json::Value{key.data(), rapidjson::SizeType(key.size())});
if (it != v.MemberEnd()) {
return get_schema(it->value);
return get_schema(ctx, it->value);
}

return get_true_schema();
Expand Down Expand Up @@ -629,6 +645,7 @@ bool is_numeric_property_value_superset(
enum class additional_field_for { object, array };

bool is_additional_superset(
context const& ctx,
json::Value const& older,
json::Value const& newer,
additional_field_for field_type) {
Expand Down Expand Up @@ -676,25 +693,25 @@ bool is_additional_superset(
// older=false -> newer=true - not compatible
return older;
},
[](bool older, json::Value const* newer) {
[&ctx](bool older, json::Value const* newer) {
if (older) {
// true is compatible with any schema
return true;
}
// likely false, but need to check
return is_superset(get_false_schema(), *newer);
return is_superset(ctx, get_false_schema(), *newer);
},
[](json::Value const* older, bool newer) {
[&ctx](json::Value const* older, bool newer) {
if (!newer) {
// any schema is compatible with false
return true;
}
// convert newer to {} and check against that
return is_superset(*older, get_true_schema());
return is_superset(ctx, *older, get_true_schema());
},
[](json::Value const* older, json::Value const* newer) {
[&ctx](json::Value const* older, json::Value const* newer) {
// check subschemas for compatibility
return is_superset(*older, *newer);
return is_superset(ctx, *older, *newer);
}),
get_additional_props(older),
get_additional_props(newer));
Expand Down Expand Up @@ -839,7 +856,8 @@ bool is_numeric_superset(json::Value const& older, json::Value const& newer) {
return true;
}

bool is_array_superset(json::Value const& older, json::Value const& newer) {
bool is_array_superset(
context const& ctx, json::Value const& older, json::Value const& newer) {
// "type": "array" is used to model an array or a tuple.
// for array, "items" is a schema that validates all the elements.
// for tuple in Draft4, "items" is an array of schemas to validate the
Expand Down Expand Up @@ -913,24 +931,30 @@ bool is_array_superset(json::Value const& older, json::Value const& newer) {
// not used by validation because every element is validated against
// "items"
return is_superset(
get_object_or_empty(older, "items"),
get_object_or_empty(newer, "items"));
ctx,
get_object_or_empty(ctx.older, older, "items"),
get_object_or_empty(ctx.newer, newer, "items"));
}

// both are tuple schemas, validation is similar to object. one side
// effect is that the "items" key is present.

// first check is for "additionalItems" compatibility, it's cheaper than the
// rest
if (!is_additional_superset(older, newer, additional_field_for::array)) {
if (!is_additional_superset(
ctx, older, newer, additional_field_for::array)) {
return false;
}

auto older_tuple_schema = older["items"].GetArray();
auto newer_tuple_schema = newer["items"].GetArray();
// find the first pair of schemas that do not match
auto [older_it, newer_it] = std::ranges::mismatch(
older_tuple_schema, newer_tuple_schema, is_superset);
older_tuple_schema,
newer_tuple_schema,
[&ctx](auto const& older, auto const& newer) {
return is_superset(ctx, older, newer);
});

if (
older_it != older_tuple_schema.end()
Expand All @@ -948,18 +972,18 @@ bool is_array_superset(json::Value const& older, json::Value const& newer) {
// excess elements with older["additionalItems"]

auto older_additional_schema = get_object_or_empty(
older, "additionalItems");
ctx.older, older, "additionalItems");

// check that all excess schemas are compatible with
// older["additionalItems"]
return std::all_of(
newer_it, newer_tuple_schema.end(), [&](json::Value const& n) {
return is_superset(older_additional_schema, n);
return is_superset(ctx, older_additional_schema, n);
});
}

bool is_object_properties_superset(
json::Value const& older, json::Value const& newer) {
context const& ctx, json::Value const& older, json::Value const& newer) {
// check that every property in newer["properties"]
// if it appears in older["properties"],
// then it has to be compatible with the schema
Expand All @@ -968,27 +992,27 @@ bool is_object_properties_superset(
// or
// it has to be compatible with older["additionalProperties"]

auto newer_properties = get_object_or_empty(newer, "properties");
auto newer_properties = get_object_or_empty(ctx.newer, newer, "properties");
if (newer_properties.ObjectEmpty()) {
// no "properties" in newer, all good
return true;
}

// older["properties"] is a map of <prop, schema>
auto older_properties = get_object_or_empty(older, "properties");
auto older_properties = get_object_or_empty(ctx.older, older, "properties");
// older["patternProperties"] is a map of <pattern, schema>
auto older_pattern_properties = get_object_or_empty(
older, "patternProperties");
ctx.older, older, "patternProperties");
// older["additionalProperties"] is a schema
auto older_additional_properties = get_object_or_empty(
older, "additionalProperties");
ctx.older, older, "additionalProperties");
// scan every prop in newer["properties"]
for (auto const& [prop, schema] : newer_properties) {
// it is either an evolution of a schema in older["properties"]
if (auto older_it = older_properties.FindMember(prop);
older_it != older_properties.MemberEnd()) {
// prop exists in both
if (!is_superset(older_it->value, schema)) {
if (!is_superset(ctx, older_it->value, schema)) {
// not compatible
return false;
}
Expand All @@ -1006,7 +1030,7 @@ bool is_object_properties_superset(
auto regex = re2::RE2(as_string_view(propPattern));
if (re2::RE2::PartialMatch(pname, regex)) {
pattern_match_found = true;
if (!is_superset(schemaPattern, schema)) {
if (!is_superset(ctx, schemaPattern, schema)) {
// not compatible
return false;
}
Expand All @@ -1017,7 +1041,7 @@ bool is_object_properties_superset(
// in patternProperties was found
if (
!pattern_match_found
&& !is_superset(older_additional_properties, schema)) {
&& !is_superset(ctx, older_additional_properties, schema)) {
// not compatible
return false;
}
Expand All @@ -1027,15 +1051,15 @@ bool is_object_properties_superset(
}

bool is_object_pattern_properties_superset(
json::Value const& older, json::Value const& newer) {
context const& ctx, json::Value const& older, json::Value const& newer) {
// check that every pattern property in newer["patternProperties"]
// appears in older["patternProperties"] and is compatible with the schema

// "patternProperties" is a map of <pattern, schema>
auto newer_pattern_properties = get_object_or_empty(
newer, "patternProperties");
ctx.newer, newer, "patternProperties");
auto older_pattern_properties = get_object_or_empty(
older, "patternProperties");
ctx.older, older, "patternProperties");

// TODO O(n^2) lookup
for (auto const& [pattern, schema] : newer_pattern_properties) {
Expand All @@ -1046,7 +1070,7 @@ bool is_object_pattern_properties_superset(
return false;
}

if (!is_superset(older_pp_it->value, schema)) {
if (!is_superset(ctx, older_pp_it->value, schema)) {
// not compatible
return false;
}
Expand All @@ -1056,7 +1080,7 @@ bool is_object_pattern_properties_superset(
}

bool is_object_required_superset(
json::Value const& older, json::Value const& newer) {
context const& ctx, json::Value const& older, json::Value const& newer) {
// to pass the check, a required property from newer has to be present in
// older, or if new it needs to be without a default value note that:
// 1. we check only required properties that are in both newer["properties"]
Expand All @@ -1067,8 +1091,8 @@ bool is_object_required_superset(

auto older_req = get_array_or_empty(older, "required");
auto newer_req = get_array_or_empty(newer, "required");
auto older_props = get_object_or_empty(older, "properties");
auto newer_props = get_object_or_empty(newer, "properties");
auto older_props = get_object_or_empty(ctx.older, older, "properties");
auto newer_props = get_object_or_empty(ctx.newer, newer, "properties");

// TODO O(n^2) lookup that can be a set_intersection
auto newer_props_in_older = older_props
Expand Down Expand Up @@ -1104,7 +1128,8 @@ bool is_object_required_superset(
return true;
}

bool is_object_superset(json::Value const& older, json::Value const& newer) {
bool is_object_superset(
context const& ctx, json::Value const& older, json::Value const& newer) {
if (!is_numeric_property_value_superset(
older, newer, "minProperties", std::less_equal<>{}, 0)) {
// newer requires less properties to be set
Expand All @@ -1115,11 +1140,12 @@ bool is_object_superset(json::Value const& older, json::Value const& newer) {
// newer requires more properties to be set
return false;
}
if (!is_additional_superset(older, newer, additional_field_for::object)) {
if (!is_additional_superset(
ctx, older, newer, additional_field_for::object)) {
// additional properties are not compatible
return false;
}
if (!is_object_properties_superset(older, newer)) {
if (!is_object_properties_superset(ctx, older, newer)) {
// "properties" in newer might not be compatible with
// older["properties"] (incompatible evolution) or
// older["patternProperties"] (it is not compatible with the pattern
Expand All @@ -1128,11 +1154,11 @@ bool is_object_superset(json::Value const& older, json::Value const& newer) {
// newer)
return false;
}
if (!is_object_pattern_properties_superset(older, newer)) {
if (!is_object_pattern_properties_superset(ctx, older, newer)) {
// pattern properties checks are not compatible
return false;
}
if (!is_object_required_superset(older, newer)) {
if (!is_object_required_superset(ctx, older, newer)) {
// required properties are not compatible
return false;
}
Expand Down Expand Up @@ -1182,7 +1208,7 @@ bool is_enum_superset(json::Value const& older, json::Value const& newer) {
}

bool is_not_combinator_superset(
json::Value const& older, json::Value const& newer) {
context const& ctx, json::Value const& older, json::Value const& newer) {
auto older_it = older.FindMember("not");
auto newer_it = newer.FindMember("not");
auto older_has_not = older_it != older.MemberEnd();
Expand All @@ -1197,7 +1223,7 @@ bool is_not_combinator_superset(
// for not combinator, we want to check if the "not" newer subschema is
// less strict than the older subschema, because this means that newer
// validated less data than older
return is_superset(newer_it->value, older_it->value);
return is_superset(ctx, newer_it->value, older_it->value);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to swap the order of contexts , like newer and older are subschema are swapped.
pseudo code:

Suggested change
return is_superset(ctx, newer_it->value, older_it->value);
return is_superset({ctx.newer, ctx.older}, newer_it->value, older_it->value);

otherwise newer will access the context for older as older will access the context for newer

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good spot!

}

// both do not have a "not" key, compatible
Expand All @@ -1217,7 +1243,7 @@ json::Value to_keyword(p_combinator c) {
}

bool is_positive_combinator_superset(
json::Value const& older, json::Value const& newer) {
context const& ctx, json::Value const& older, json::Value const& newer) {
auto get_combinator = [](json::Value const& v) {
auto res = std::optional<p_combinator>{};
for (auto c :
Expand Down Expand Up @@ -1301,7 +1327,7 @@ bool is_positive_combinator_superset(
auto superset_graph = graph_t{older_schemas.Size() + newer_schemas.Size()};
for (auto o = 0u; o < older_schemas.Size(); ++o) {
for (auto n = 0u; n < newer_schemas.Size(); ++n) {
if (is_superset(older_schemas[o], newer_schemas[n])) {
if (is_superset(ctx, older_schemas[o], newer_schemas[n])) {
// translate n for the graph
auto n_index = n + older_schemas.Size();
add_edge(o, n_index, superset_graph);
Expand Down Expand Up @@ -1335,16 +1361,18 @@ using namespace is_superset_impl;
// for N is also valid for O. precondition: older and newer are both valid
// schemas
bool is_superset(
json::Value const& older_schema, json::Value const& newer_schema) {
context const& ctx,
json::Value const& older_schema,
json::Value const& newer_schema) {
// break recursion if parameters are atoms:
if (is_true_schema(older_schema) || is_false_schema(newer_schema)) {
// either older is the superset of every possible schema, or newer is
// the subset of every possible schema
return true;
}

auto older = get_schema(older_schema);
auto newer = get_schema(newer_schema);
auto older = get_schema(ctx.older, older_schema);
auto newer = get_schema(ctx.newer, newer_schema);

// extract { "type" : ... }
auto older_types = normalized_type(older);
Expand Down Expand Up @@ -1385,12 +1413,12 @@ bool is_superset(
}
break;
case json_type::object:
if (!is_object_superset(older, newer)) {
if (!is_object_superset(ctx, older, newer)) {
return false;
}
break;
case json_type::array:
if (!is_array_superset(older, newer)) {
if (!is_array_superset(ctx, older, newer)) {
return false;
}
break;
Expand All @@ -1407,11 +1435,11 @@ bool is_superset(
return false;
}

if (!is_not_combinator_superset(older, newer)) {
if (!is_not_combinator_superset(ctx, older, newer)) {
return false;
}

if (!is_positive_combinator_superset(older, newer)) {
if (!is_positive_combinator_superset(ctx, older, newer)) {
return false;
}

Expand Down Expand Up @@ -1554,7 +1582,8 @@ bool check_compatible(
}
// reader is a superset of writer iff every schema that is valid for writer
// is also valid for reader
return is_superset(reader().doc, writer().doc);
context ctx{.older{reader()}, .newer{writer()}};
return is_superset(ctx, reader().doc, writer().doc);
}

} // namespace pandaproxy::schema_registry