From aea265d4f4df90b3927c28f03500940436c9ab21 Mon Sep 17 00:00:00 2001 From: Tim Schaub Date: Thu, 20 Jun 2024 13:38:40 -0600 Subject: [PATCH] Allow empty and null geometries in validation --- internal/geo/geo.go | 3 + .../testdata/bad-bbox-type/expected.json | 42 +++---- .../testdata/bad-crs-type/expected.json | 44 ++++---- .../with-empty-geometry/expected.json | 105 ++++++++++++++++++ .../testdata/with-empty-geometry/input.json | 37 ++++++ .../testdata/with-null-geometry/expected.json | 105 ++++++++++++++++++ .../testdata/with-null-geometry/input.json | 34 ++++++ internal/validator/validator.go | 3 + internal/validator/validator_test.go | 74 ++++++++++++ 9 files changed, 404 insertions(+), 43 deletions(-) create mode 100644 internal/validator/testdata/with-empty-geometry/expected.json create mode 100644 internal/validator/testdata/with-empty-geometry/input.json create mode 100644 internal/validator/testdata/with-null-geometry/expected.json create mode 100644 internal/validator/testdata/with-null-geometry/input.json diff --git a/internal/geo/geo.go b/internal/geo/geo.go index ed899b8..f560dbe 100644 --- a/internal/geo/geo.go +++ b/internal/geo/geo.go @@ -117,6 +117,9 @@ func DecodeGeometry(value any, encoding string) (*orbjson.Geometry, error) { if !ok { return nil, fmt.Errorf("expected bytes for wkb geometry, got %T", value) } + if len(data) == 0 { + return nil, nil + } g, err := wkb.Unmarshal(data) if err != nil { return nil, err diff --git a/internal/validator/testdata/bad-bbox-type/expected.json b/internal/validator/testdata/bad-bbox-type/expected.json index 8a0710c..4c8af10 100644 --- a/internal/validator/testdata/bad-bbox-type/expected.json +++ b/internal/validator/testdata/bad-bbox-type/expected.json @@ -47,59 +47,59 @@ }, { "title": "optional \"orientation\" must be a valid string", - "passed": true, - "run": true + "run": true, + "passed": true }, { "title": "optional \"edges\" must be a valid string", - "passed": true, - "run": true + "run": true, + "passed": true }, { "title": "optional \"bbox\" must be an array of 4 or 6 numbers", - "passed": false, "run": true, + "passed": false, "message": "expected \"bbox\" for column \"geometry\" to be a list, got a string: \"bogus\"" }, { "title": "optional \"epoch\" must be a number", - "passed": false, - "run": false + "run": false, + "passed": false }, { "title": "geometry columns must not be grouped", - "passed": false, - "run": false + "run": false, + "passed": false }, { "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", - "passed": false, - "run": false + "run": false, + "passed": false }, { "title": "geometry columns must be required or optional, not repeated", - "passed": false, - "run": false + "run": false, + "passed": false }, { "title": "all geometry values match the \"encoding\" metadata", - "passed": false, - "run": false + "run": false, + "passed": false }, { "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", - "passed": false, - "run": false + "run": false, + "passed": false }, { "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", - "passed": false, - "run": false + "run": false, + "passed": false }, { "title": "all geometries must fall within the \"bbox\" metadata (if present)", - "passed": false, - "run": false + "run": false, + "passed": false } ], "metadataOnly": false diff --git a/internal/validator/testdata/bad-crs-type/expected.json b/internal/validator/testdata/bad-crs-type/expected.json index 181474a..db37812 100644 --- a/internal/validator/testdata/bad-crs-type/expected.json +++ b/internal/validator/testdata/bad-crs-type/expected.json @@ -48,58 +48,58 @@ }, { "title": "optional \"orientation\" must be a valid string", - "passed": false, - "run": false + "run": false, + "passed": false }, { "title": "optional \"edges\" must be a valid string", - "passed": false, - "run": false + "run": false, + "passed": false }, { "title": "optional \"bbox\" must be an array of 4 or 6 numbers", - "passed": false, - "run": false + "run": false, + "passed": false }, { "title": "optional \"epoch\" must be a number", - "passed": false, - "run": false + "run": false, + "passed": false }, { "title": "geometry columns must not be grouped", - "passed": false, - "run": false + "run": false, + "passed": false }, { "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", - "passed": false, - "run": false + "run": false, + "passed": false }, { "title": "geometry columns must be required or optional, not repeated", - "passed": false, - "run": false + "run": false, + "passed": false }, { "title": "all geometry values match the \"encoding\" metadata", - "passed": false, - "run": false + "run": false, + "passed": false }, { "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", - "passed": false, - "run": false + "run": false, + "passed": false }, { "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", - "passed": false, - "run": false + "run": false, + "passed": false }, { "title": "all geometries must fall within the \"bbox\" metadata (if present)", - "passed": false, - "run": false + "run": false, + "passed": false } ], "metadataOnly": false diff --git a/internal/validator/testdata/with-empty-geometry/expected.json b/internal/validator/testdata/with-empty-geometry/expected.json new file mode 100644 index 0000000..313c688 --- /dev/null +++ b/internal/validator/testdata/with-empty-geometry/expected.json @@ -0,0 +1,105 @@ +{ + "checks": [ + { + "title": "file must include a \"geo\" metadata key", + "run": true, + "passed": true + }, + { + "title": "metadata must be a JSON object", + "run": true, + "passed": true + }, + { + "title": "metadata must include a \"version\" string", + "run": true, + "passed": true + }, + { + "title": "metadata must include a \"primary_column\" string", + "run": true, + "passed": true + }, + { + "title": "metadata must include a \"columns\" object", + "run": true, + "passed": true + }, + { + "title": "column metadata must include the \"primary_column\" name", + "run": true, + "passed": true + }, + { + "title": "column metadata must include a valid \"encoding\" string", + "run": true, + "passed": true + }, + { + "title": "column metadata must include a \"geometry_types\" list", + "run": true, + "passed": true + }, + { + "title": "optional \"crs\" must be null or a PROJJSON object", + "run": true, + "passed": true + }, + { + "title": "optional \"orientation\" must be a valid string", + "run": true, + "passed": true + }, + { + "title": "optional \"edges\" must be a valid string", + "run": true, + "passed": true + }, + { + "title": "optional \"bbox\" must be an array of 4 or 6 numbers", + "run": true, + "passed": true + }, + { + "title": "optional \"epoch\" must be a number", + "run": true, + "passed": true + }, + { + "title": "geometry columns must not be grouped", + "run": true, + "passed": true + }, + { + "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", + "run": true, + "passed": true + }, + { + "title": "geometry columns must be required or optional, not repeated", + "run": true, + "passed": true + }, + { + "title": "all geometry values match the \"encoding\" metadata", + "run": true, + "passed": true + }, + { + "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", + "run": true, + "passed": true + }, + { + "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", + "run": true, + "passed": true + }, + { + "title": "all geometries must fall within the \"bbox\" metadata (if present)", + "run": true, + "passed": true + } + ], + "metadataOnly": false +} \ No newline at end of file diff --git a/internal/validator/testdata/with-empty-geometry/input.json b/internal/validator/testdata/with-empty-geometry/input.json new file mode 100644 index 0000000..572fd9e --- /dev/null +++ b/internal/validator/testdata/with-empty-geometry/input.json @@ -0,0 +1,37 @@ +{ + "metadata": { + "version": "1.0.0", + "primary_column": "geometry", + "columns": { + "geometry": { + "encoding": "WKB", + "geometry_types": ["Point"] + } + } + }, + "data": { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": { + "name": "with geometry" + }, + "geometry": { + "type": "Point", + "coordinates": [0, 0] + } + }, + { + "type": "Feature", + "properties": { + "name": "empty geometry" + }, + "geometry": { + "type": "Point", + "coordinates": [] + } + } + ] + } +} \ No newline at end of file diff --git a/internal/validator/testdata/with-null-geometry/expected.json b/internal/validator/testdata/with-null-geometry/expected.json new file mode 100644 index 0000000..313c688 --- /dev/null +++ b/internal/validator/testdata/with-null-geometry/expected.json @@ -0,0 +1,105 @@ +{ + "checks": [ + { + "title": "file must include a \"geo\" metadata key", + "run": true, + "passed": true + }, + { + "title": "metadata must be a JSON object", + "run": true, + "passed": true + }, + { + "title": "metadata must include a \"version\" string", + "run": true, + "passed": true + }, + { + "title": "metadata must include a \"primary_column\" string", + "run": true, + "passed": true + }, + { + "title": "metadata must include a \"columns\" object", + "run": true, + "passed": true + }, + { + "title": "column metadata must include the \"primary_column\" name", + "run": true, + "passed": true + }, + { + "title": "column metadata must include a valid \"encoding\" string", + "run": true, + "passed": true + }, + { + "title": "column metadata must include a \"geometry_types\" list", + "run": true, + "passed": true + }, + { + "title": "optional \"crs\" must be null or a PROJJSON object", + "run": true, + "passed": true + }, + { + "title": "optional \"orientation\" must be a valid string", + "run": true, + "passed": true + }, + { + "title": "optional \"edges\" must be a valid string", + "run": true, + "passed": true + }, + { + "title": "optional \"bbox\" must be an array of 4 or 6 numbers", + "run": true, + "passed": true + }, + { + "title": "optional \"epoch\" must be a number", + "run": true, + "passed": true + }, + { + "title": "geometry columns must not be grouped", + "run": true, + "passed": true + }, + { + "title": "geometry columns must be stored using the BYTE_ARRAY parquet type", + "run": true, + "passed": true + }, + { + "title": "geometry columns must be required or optional, not repeated", + "run": true, + "passed": true + }, + { + "title": "all geometry values match the \"encoding\" metadata", + "run": true, + "passed": true + }, + { + "title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)", + "run": true, + "passed": true + }, + { + "title": "all polygon geometries must follow the \"orientation\" metadata (if present)", + "run": true, + "passed": true + }, + { + "title": "all geometries must fall within the \"bbox\" metadata (if present)", + "run": true, + "passed": true + } + ], + "metadataOnly": false +} \ No newline at end of file diff --git a/internal/validator/testdata/with-null-geometry/input.json b/internal/validator/testdata/with-null-geometry/input.json new file mode 100644 index 0000000..a09df02 --- /dev/null +++ b/internal/validator/testdata/with-null-geometry/input.json @@ -0,0 +1,34 @@ +{ + "metadata": { + "version": "1.0.0", + "primary_column": "geometry", + "columns": { + "geometry": { + "encoding": "WKB", + "geometry_types": ["Point"] + } + } + }, + "data": { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": { + "name": "with geometry" + }, + "geometry": { + "type": "Point", + "coordinates": [0, 0] + } + }, + { + "type": "Feature", + "properties": { + "name": "without geometry" + }, + "geometry": null + } + ] + } +} \ No newline at end of file diff --git a/internal/validator/validator.go b/internal/validator/validator.go index d42ce3f..b330763 100644 --- a/internal/validator/validator.go +++ b/internal/validator/validator.go @@ -237,6 +237,9 @@ func (v *Validator) Report(ctx context.Context, file *file.Reader) (*Report, err if err != nil { return nil, fmt.Errorf("failed to decode geometry for %q: %w", field.Name, err) } + if geometry == nil { + continue + } for i, rule := range decodedGeometryRules { check := decodedGeometryChecks[i] if err := rule.Value(field.Name, geometry.Geometry()); errors.Is(err, ErrFatal) { diff --git a/internal/validator/validator_test.go b/internal/validator/validator_test.go index f1b155f..090bcaf 100644 --- a/internal/validator/validator_test.go +++ b/internal/validator/validator_test.go @@ -280,6 +280,78 @@ func (s *Suite) TestConvertedWKB() { s.assertExpectedReport("all-pass-meta", metaReport) } +func (s *Suite) TestWKBWithNoData() { + type Row struct { + Name string `parquet:"name=name, logical=String" json:"name"` + Geometry []byte `parquet:"name=geometry" json:"geometry"` + } + + rows := []*Row{ + { + Name: "test-point-1", + Geometry: toWKB(s.T(), orb.Point{1, 2}), + }, + { + Name: "nil-geometry", + Geometry: nil, + }, + } + + input := test.ParquetFromStructs(s.T(), rows) + + geoparquetBytes := &bytes.Buffer{} + s.Require().NoError(geoparquet.FromParquet(input, geoparquetBytes, nil)) + + filePath := "test-wkb.parquet" + ctx := context.Background() + validatorAll := validator.New(false) + validatorMeta := validator.New(true) + + allReport, allErr := validatorAll.Validate(ctx, bytes.NewReader(geoparquetBytes.Bytes()), filePath) + s.Require().NoError(allErr) + s.assertExpectedReport("all-pass", allReport) + + metaReport, metaErr := validatorMeta.Validate(ctx, bytes.NewReader(geoparquetBytes.Bytes()), filePath) + s.Require().NoError(metaErr) + s.assertExpectedReport("all-pass-meta", metaReport) +} + +func (s *Suite) TestWKBWithEmptyPoint() { + type Row struct { + Name string `parquet:"name=name, logical=String" json:"name"` + Geometry []byte `parquet:"name=geometry" json:"geometry"` + } + + rows := []*Row{ + { + Name: "test-point-1", + Geometry: toWKB(s.T(), orb.Point{1, 2}), + }, + { + Name: "empty-geometry", + Geometry: toWKB(s.T(), orb.Point{}), + }, + } + + input := test.ParquetFromStructs(s.T(), rows) + + geoparquetBytes := &bytes.Buffer{} + s.Require().NoError(geoparquet.FromParquet(input, geoparquetBytes, nil)) + + filePath := "test-wkb.parquet" + ctx := context.Background() + validatorAll := validator.New(false) + validatorMeta := validator.New(true) + + allReport, allErr := validatorAll.Validate(ctx, bytes.NewReader(geoparquetBytes.Bytes()), filePath) + s.Require().NoError(allErr) + s.assertExpectedReport("all-pass", allReport) + + metaReport, metaErr := validatorMeta.Validate(ctx, bytes.NewReader(geoparquetBytes.Bytes()), filePath) + s.Require().NoError(metaErr) + s.assertExpectedReport("all-pass-meta", metaReport) +} + func (s *Suite) TestConvertedAltPrimaryColumnWKB() { type Row struct { Name string `parquet:"name=name, logical=String" json:"name"` @@ -348,6 +420,8 @@ func (s *Suite) TestReport() { "geometry-outside-bbox", "geometry-inside-antimeridian-spanning-bbox", "geometry-outside-antimeridian-spanning-bbox", + "with-empty-geometry", + "with-null-geometry", } ctx := context.Background()