Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validateur NeTEx avec enRoute's Chouette #4140

Merged
merged 6 commits into from
Sep 2, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
311 changes: 311 additions & 0 deletions apps/transport/lib/validators/netex_validator.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,311 @@
defmodule Transport.Validators.NeTEx do
import TransportWeb.Gettext, only: [dgettext: 2]

@moduledoc """
Validator for NeTEx files calling enRoute Chouette Valid API.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Documenter que ce validateur effectue la phase de polling d'une API distante et que les appels à validate_ vont prendre plusieurs minutes ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Je peux.

Pour ma gouverne, n'est-ce pas commun à tous les validateurs d'être bloquants et potentiellement longs ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Si, tout à fait. La différence est que les autres validateurs ont des temps de traitement bien plus rapides, de l'ordre de 1-5s la plupart du temps.

Avoir un temps classique de plusieurs minutes a donc un impact si on veut proposer ça à la demande ou dans une chaine de validations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

En effet pour enchainer les validations ça peut surprendre. (Pour le on demand le pulling est implémenté via un job oban.)

"""
require Logger
@behaviour Transport.Validators.Validator

@impl Transport.Validators.Validator
def validator_name, do: "enroute-chouette-validator"
AntoineAugusti marked this conversation as resolved.
Show resolved Hide resolved

@impl Transport.Validators.Validator
def validate_and_save(%DB.Resource{format: "NeTEx", id: resource_id}) do
Logger.info("Validating #{resource_id} with enRoute Chouette Valid")

resource_history = DB.ResourceHistory.latest_resource_history(resource_id)
with_resource_file(resource_history, &validate_resource_history(resource_history, &1))
end

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pour les 2 validate_, envisager de stocker des informations pour analyser la performance du validateur : temps d'exécution, nombre de retries etc ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dans les metadata ?

def validate_resource_history(resource_history, filepath) do
case validate_with_enroute(filepath) do
{:ok, result_url} ->
insert_validation_results(resource_history.id, result_url)
:ok

{:error, {result_url, errors}} ->
insert_validation_results(resource_history.id, result_url, errors)
:ok

{:error, :unexpected_validation_status} ->
Logger.error("Invalid API call to enRoute Chouette Valid")
{:error, "enRoute Chouette Valid: Unexpected validation status"}
end
end

@type validate_options :: [{:graceful_retry, boolean()}]

@doc """
Validate the resource from the given URL.
"""
@spec validate(binary(), validate_options()) :: {:ok, map()} | {:error, binary()}
def validate(url, opts \\ []) do
with_url(url, fn filepath ->
case validate_with_enroute(filepath, opts) do
{:ok, result_url} ->
# result_url in metadata?
Logger.info("Result URL: #{result_url}")
{:ok, %{"validations" => index_messages([]), "metadata" => %{}}}

{:error, {result_url, errors}} ->
Logger.info("Result URL: #{result_url}")
# result_url in metadata?
{:ok, %{"validations" => index_messages(errors), "metadata" => %{}}}

{:error, :unexpected_validation_status} ->
Logger.error("Invalid API call to enRoute Chouette Valid")
{:error, "enRoute Chouette Valid: Unexpected validation status"}
end
end)
end

@spec with_resource_file(DB.ResourceHistory.t(), (Path.t() -> any())) :: any()
def with_resource_file(resource_history, closure) do
%DB.ResourceHistory{payload: %{"permanent_url" => permanent_url}} = resource_history
filepath = tmp_path(resource_history)

with_temp_file(permanent_url, filepath, closure)
end

@spec with_url(binary(), (Path.t() -> any())) :: any()
def with_url(url, closure) do
with_temp_file(url, tmp_path(url), closure)
end

@spec with_temp_file(binary(), Path.t(), (Path.t() -> any())) :: any()
def with_temp_file(url, filepath, closure) do
AntoineAugusti marked this conversation as resolved.
Show resolved Hide resolved
http_client().get!(url, compressed: false, into: File.stream!(filepath))
ptitfred marked this conversation as resolved.
Show resolved Hide resolved
closure.(filepath)
after
File.rm(filepath)
end

defp http_client, do: Transport.Req.impl()

defp tmp_path(%DB.ResourceHistory{id: resource_history_id}) do
System.tmp_dir!() |> Path.join("enroute_validation_netex_#{resource_history_id}")
end

defp tmp_path(_other) do
AntoineAugusti marked this conversation as resolved.
Show resolved Hide resolved
System.tmp_dir!() |> Path.join("enroute_validation_netex_#{Ecto.UUID.generate()}")
end

def insert_validation_results(resource_history_id, result_url, errors \\ []) do
result = index_messages(errors)

%DB.MultiValidation{
validation_timestamp: DateTime.utc_now(),
validator: validator_name(),
result: result,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Se pose la question d'uniformiser ce qui est stocké dans result entre les différents validateurs, ce n'est pas facile. Les validateurs JSONSchema, TableSchema et GBFS ont des clés du style has_errors, nb_errors, errors (liste d'erreurs).

Tu as comparé ce qui était fait entre ceux-ci et GTFS-RT et GTFS ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Initialement je pensais pouvoir faire ça et j'avais quelque chose équivalent à JSONSchema TableSchema et GBFS puis finalement ça s'est rapproché de GTFS (notamment en implémentant la visualisation) et j'ai découvert au passage que ce n'était pas standardisé.

Les erreurs elle-même ne seront pas standardisées si j'en crois les différents templates de rendu, et la présence de helpers spécifiques à chaque format pour interpréter ces résultats ne simplifie pas la tâche.

On pourrait avoir une description plus générale des résultats de validation avec toujours une gestion des spécificités, ce n'est pas un problème, mais je ne suis pas à l'aise pour faire un tel changement actuellement. On aurait quoi qu'il en soit des templates spécifiques à GTFS et NeTEx et déjà du code générique pour les autres formats.

Mon avis est donc en résumé que chercher à généraliser les validateurs est peu bénéfique en l'état. Et si on décidait de le faire je serais bien peu à l'aise pour le faire par le manque de spécification des structures de données que l'on manipule dans la validation de ressources.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Merci pour tes explications, je partage ton point de vue. J'aimerais que ce soit plus simple et évident mais ce n'est pas le cas je crois.

resource_history_id: resource_history_id,
validator_version: validator_version(),
command: result_url,
max_error: get_max_severity_error(result)
}
|> DB.Repo.insert!()
end

@doc """
Returns the maximum issue severity found

iex> validation_result = %{"uic-operating-period" => [%{"criticity" => "error"}], "valid-day-bits" => [%{"criticity" => "error"}], "frame-arret-resources" => [%{"criticity" => "error"}]}
iex> get_max_severity_error(validation_result)
"error"

iex> get_max_severity_error(%{})
"NoError"
"""
@spec get_max_severity_error(map()) :: binary() | nil
def get_max_severity_error(validation_result) do
{severity, _} = validation_result |> count_max_severity()
severity
end

@no_error "NoError"
AntoineAugusti marked this conversation as resolved.
Show resolved Hide resolved

@doc """
Returns the maximum severity, with the issues count

iex> validation_result = %{"uic-operating-period" => [%{"criticity" => "error"}], "valid-day-bits" => [%{"criticity" => "error"}], "frame-arret-resources" => [%{"criticity" => "error"}]}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ajouter un cas warning ici ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok.

iex> count_max_severity(validation_result)
{"error", 3}
iex> count_max_severity(%{})
{"NoError", 0}
"""
@spec count_max_severity(map()) :: {binary(), integer()}
def count_max_severity(validation_result) when validation_result == %{} do
{@no_error, 0}
end

def count_max_severity(%{} = validation_result) do
validation_result
|> count_by_severity()
|> Enum.min_by(fn {severity, _count} -> severity |> severities() |> Map.get(:level) end)
end

@spec severities_map() :: map()
def severities_map,
do: %{
"error" => %{level: 1, text: dgettext("netex-validator", "errors")},
"warning" => %{level: 2, text: dgettext("netex-validator", "warnings")},
"information" => %{level: 3, text: dgettext("netex-validator", "informations")}
}

@spec severities(binary()) :: %{level: integer(), text: binary()}
def severities(key), do: severities_map()[key]
AntoineAugusti marked this conversation as resolved.
Show resolved Hide resolved

@doc """
Returns the number of issues by severity level

iex> validation_result = %{"uic-operating-period" => [%{"criticity" => "warning"}], "valid-day-bits" => [%{"criticity" => "error"}], "frame-arret-resources" => [%{"criticity" => "error"}]}
iex> count_by_severity(validation_result)
%{"warning" => 1, "error" => 2}

iex> count_by_severity(%{})
%{}
"""
@spec count_by_severity(map()) :: map()
def count_by_severity(%{} = validation_result) do
validation_result
|> Enum.flat_map(fn {_, v} -> v end)
|> Enum.reduce(%{}, fn v, acc -> Map.update(acc, v["criticity"], 1, &(&1 + 1)) end)
end

def count_by_severity(_), do: %{}

defp validate_with_enroute(filepath, opts \\ []) do
client().create_a_validation(filepath) |> fetch_validation_results(0, opts)
end

defp fetch_validation_results(validation_id, retries, opts) do
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On a un max retry ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pas encore, je comptais le faire dans un second temps. En l'état ce code espère que le validateur implémentera ce timeout, mais l'expérience a montré que c'était pas le cas.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

D'acc, à voir plus tard alors !

case client().get_a_validation(validation_id) do
:pending ->
if Keyword.get(opts, :graceful_retry, true) do
retries |> poll_interval() |> :timer.sleep()
end
AntoineAugusti marked this conversation as resolved.
Show resolved Hide resolved

fetch_validation_results(validation_id, retries + 1, opts)

{:successful, url} ->
{:ok, url}

value when value in [:warning, :failed] ->
{:error, client().get_messages(validation_id)}

:unexpected_validation_status ->
{:error, :unexpected_validation_status}
end
end

@doc """
iex> index_messages([])
%{}

iex> index_messages([%{"code"=>"a", "id"=> 1}, %{"code"=>"a", "id"=> 2}, %{"code"=>"b", "id"=> 3}])
%{"a"=>[%{"code"=>"a", "id"=> 1}, %{"code"=>"a", "id"=> 2}], "b"=>[%{"code"=>"b", "id"=> 3}]}
"""
def index_messages(messages) do
messages |> Enum.group_by(fn %{"code" => code} -> code end)
end

@doc """
Poll interval to play nice with the tier.

iex> 0..9 |> Enum.map(&poll_interval(&1))
[10000, 10000, 10000, 10000, 10000, 10000, 20000, 20000, 20000, 20000]
ptitfred marked this conversation as resolved.
Show resolved Hide resolved
"""
def poll_interval(nb_tries) when nb_tries < 6, do: 10_000
def poll_interval(_), do: 20_000
AntoineAugusti marked this conversation as resolved.
Show resolved Hide resolved

# This will change with an actual versioning of the validator
def validator_version, do: "saas-production"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Déplacer à côté du nom du validateur ? Ce n'est pas dans le behaviour ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ce n'est pas dans le behaviour ?

Non, c'est un champs de MultiValidation.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Peut-être à ajouter dans

defmodule Transport.Validators.Validator do
un jour


@doc """
iex> validation_result = %{"uic-operating-period" => [%{"code" => "uic-operating-period", "message" => "Resource 23504000009 hasn't expected class but Netex::OperatingPeriod", "criticity" => "error"}], "valid-day-bits" => [%{"code" => "valid-day-bits", "message" => "Mandatory attribute valid_day_bits not found", "criticity" => "error"}], "frame-arret-resources" => [%{"code" => "frame-arret-resources", "message" => "Tag frame_id doesn't match ''", "criticity" => "warning"}]}
iex> summary(validation_result)
[
{"error", [
{"uic-operating-period", %{count: 1, criticity: "error", title: "UIC operating period"}},
{"valid-day-bits", %{count: 1, criticity: "error", title: "Valid day bits"}}
]},
{"warning", [{"frame-arret-resources", %{count: 1, criticity: "warning", title: "Frame arret resources"}}]}
]
iex> summary(%{})
[]
"""
@spec summary(map()) :: list()
def summary(%{} = validation_result) do
validation_result
|> Enum.map(fn {code, errors} ->
{code,
%{
count: length(errors),
criticity: Map.get(hd(errors), "criticity"),
title: Map.get(issues_short_translation(), code, code)
}}
end)
|> Enum.group_by(fn {_, details} -> details.criticity end)
|> Enum.sort_by(fn {criticity, _} -> severities(criticity).level end)
end

@spec issues_short_translation() :: %{binary() => binary()}
def issues_short_translation,
do: %{
"composite-frame-ligne-mandatory" => dgettext("netex-validator", "Composite frame ligne mandatory"),
"frame-arret-resources" => dgettext("netex-validator", "Frame arret resources"),
"frame-calendrier-resources" => dgettext("netex-validator", "Frame calendrier resources"),
"frame-horaire-resources" => dgettext("netex-validator", "Frame horaire resources"),
"frame-ligne-resources" => dgettext("netex-validator", "Frame ligne resources"),
"frame-reseau-resources" => dgettext("netex-validator", "Frame reseau resources"),
"latitude-mandatory" => dgettext("netex-validator", "Latitude mandatory"),
"longitude-mandatory" => dgettext("netex-validator", "Longitude mandatory"),
"uic-operating-period" => dgettext("netex-validator", "UIC operating period"),
"valid-day-bits" => dgettext("netex-validator", "Valid day bits"),
"version-any" => dgettext("netex-validator", "Version any")
}

@doc """
Get issues from validation results. For a specific issue type if specified, or the most severe.

iex> validation_result = %{"uic-operating-period" => [%{"code" => "uic-operating-period", "message" => "Resource 23504000009 hasn't expected class but Netex::OperatingPeriod", "criticity" => "error"}], "valid-day-bits" => [%{"code" => "valid-day-bits", "message" => "Mandatory attribute valid_day_bits not found", "criticity" => "error"}], "frame-arret-resources" => [%{"code" => "frame-arret-resources", "message" => "Tag frame_id doesn't match ''", "criticity" => "warning"}]}
iex> get_issues(validation_result, %{"issue_type" => "uic-operating-period"})
[%{"code" => "uic-operating-period", "message" => "Resource 23504000009 hasn't expected class but Netex::OperatingPeriod", "criticity" => "error"}]
iex> get_issues(validation_result, %{"issue_type" => "broken-file"})
[]
iex> get_issues(validation_result, nil)
[%{"code" => "uic-operating-period", "message" => "Resource 23504000009 hasn't expected class but Netex::OperatingPeriod", "criticity" => "error"}]
iex> get_issues(%{}, nil)
[]
iex> get_issues([], nil)
[]
"""
def get_issues(%{} = validation_result, %{"issue_type" => issue_type}) do
Map.get(validation_result, issue_type, []) |> order_issues_by_location()
end

def get_issues(%{} = validation_result, _) do
validation_result
|> Map.values()
|> Enum.sort_by(fn [%{"criticity" => severity} | _] -> severities(severity).level end)
|> List.first([])
|> order_issues_by_location()
end

def get_issues(_, _), do: []

def order_issues_by_location(issues) do
issues
|> Enum.sort_by(fn issue ->
message = Map.get(issue, "message", "")
resource = Map.get(issue, "resource", %{})
filename = Map.get(resource, "filename", "")
line = Map.get(resource, "line", "")
{filename, line, message}
end)
end

defp client do
Transport.EnRouteChouetteValidClient.Wrapper.impl()
end
end
1 change: 1 addition & 0 deletions apps/transport/lib/validators/validator_selection.ex
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ defmodule Transport.ValidatorsSelection.Impl do
def validators(%{format: "GTFS"}), do: [Validators.GTFSTransport]
def validators(%{format: "gtfs-rt"}), do: [Validators.GTFSRT]
def validators(%{format: "gbfs"}), do: [Validators.GBFSValidator]
def validators(%{format: "NeTEx"}), do: [Validators.NeTEx]

def validators(%{schema_name: schema_name}) when not is_nil(schema_name) do
cond do
Expand Down
68 changes: 68 additions & 0 deletions apps/transport/priv/gettext/en/LC_MESSAGES/netex-validator.po
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
## "msgid"s in this file come from POT (.pot) files.
###
### Do not add, change, or remove "msgid"s manually here as
### they're tied to the ones in the corresponding POT file
### (with the same domain).
###
### Use "mix gettext.extract --merge" or "mix gettext.merge"
### to merge POT files into PO files.
msgid ""
msgstr ""
"Language: en\n"
"Plural-Forms: nplurals=2; plural=(n != 1);\n"

#, elixir-autogen, elixir-format
msgid "Composite frame ligne mandatory"
msgstr ""

#, elixir-autogen, elixir-format
msgid "Frame arret resources"
msgstr ""

#, elixir-autogen, elixir-format
msgid "Frame calendrier resources"
msgstr ""

#, elixir-autogen, elixir-format
msgid "Frame horaire resources"
msgstr ""

#, elixir-autogen, elixir-format
msgid "Frame ligne resources"
msgstr ""

#, elixir-autogen, elixir-format
msgid "Frame reseau resources"
msgstr ""

#, elixir-autogen, elixir-format
msgid "Latitude mandatory"
msgstr ""

#, elixir-autogen, elixir-format
msgid "Longitude mandatory"
msgstr ""

#, elixir-autogen, elixir-format
msgid "UIC operating period"
msgstr ""

#, elixir-autogen, elixir-format
msgid "Valid day bits"
msgstr ""

#, elixir-autogen, elixir-format
msgid "Version any"
msgstr ""

#, elixir-autogen, elixir-format
msgid "errors"
msgstr ""

#, elixir-autogen, elixir-format
msgid "informations"
msgstr ""

#, elixir-autogen, elixir-format
msgid "warnings"
msgstr ""
Loading