Skip to content

Commit

Permalink
DataChecker : gère JDD sans producteur (#4091)
Browse files Browse the repository at this point in the history
  • Loading branch information
AntoineAugusti authored Jul 25, 2024
1 parent 0285419 commit a97ba4b
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 11 deletions.
23 changes: 15 additions & 8 deletions apps/transport/lib/transport/data_checker.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ defmodule Transport.DataChecker do
@moduledoc """
Use to check data, and act about it, like send email
"""
alias Datagouvfr.Client.Datasets
alias DB.{Dataset, Repo}
import Ecto.Query
require Logger

@type delay_and_records :: {integer(), [{DB.Dataset.t(), [DB.Resource.t()]}]}
@type dataset_status :: :active | :inactive | :ignore | :no_producer | {:archived, DateTime.t()}
@expiration_reason Transport.NotificationReason.reason(:expiration)
@new_dataset_reason Transport.NotificationReason.reason(:new_dataset)
# If delay < 0, the resource is already expired
Expand Down Expand Up @@ -36,7 +36,11 @@ defmodule Transport.DataChecker do
# Some datasets marked as active in our database may have disappeared
# on the data gouv side, mark them as inactive.
current_nb_active_datasets = Repo.aggregate(Dataset.base_query(), :count, :id)
inactive_datasets = for {%Dataset{is_active: true} = dataset, :inactive} <- datasets_statuses, do: dataset

inactive_datasets =
for {%DB.Dataset{is_active: true} = dataset, status} <- datasets_statuses,
status in [:inactive, :no_producer],
do: dataset

inactive_ids = Enum.map(inactive_datasets, & &1.id)
desactivates_over_10_percent_datasets = Enum.count(inactive_datasets) > current_nb_active_datasets * 10 / 100
Expand All @@ -60,24 +64,27 @@ defmodule Transport.DataChecker do
send_inactive_datasets_mail(to_reactivate_datasets, inactive_datasets, archived_datasets)
end

@spec datasets_datagouv_statuses :: list
@spec datasets_datagouv_statuses :: [{DB.Dataset.t(), dataset_status()}]
def datasets_datagouv_statuses do
Dataset
DB.Dataset
|> order_by(:id)
|> Repo.all()
|> DB.Repo.all()
|> Enum.map(&{&1, dataset_status(&1)})
end

@spec dataset_status(Dataset.t()) :: :active | :inactive | :ignore | {:archived, DateTime.t()}
defp dataset_status(%Dataset{datagouv_id: datagouv_id}) do
case Datasets.get(datagouv_id) do
@spec dataset_status(DB.Dataset.t()) :: dataset_status()
defp dataset_status(%DB.Dataset{datagouv_id: datagouv_id}) do
case Datagouvfr.Client.Datasets.get(datagouv_id) do
{:ok, %{"archived" => nil}} ->
:active

{:ok, %{"archived" => archived}} ->
{:ok, datetime, 0} = DateTime.from_iso8601(archived)
{:archived, datetime}

{:ok, %{"organization" => nil, "owner" => nil}} ->
:no_producer

{:error, %HTTPoison.Error{} = error} ->
Sentry.capture_message(
"Unable to get Dataset status from data.gouv.fr",
Expand Down
17 changes: 14 additions & 3 deletions apps/transport/test/transport/data_checker_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ defmodule Transport.DataCheckerTest do
{:ok, %HTTPoison.Response{status_code: 200, body: ~s({"archived":null})}}
end)

# running the job (...)
# running the job
Transport.DataChecker.inactive_data()

assert_email_sent(
Expand All @@ -48,7 +48,7 @@ defmodule Transport.DataCheckerTest do
test "warns our team of datasets disappearing on data gouv and mark them as such locally" do
# Create a bunch of random datasets to avoid triggering the safety net
# of desactivating more than 10% of active datasets
Enum.each(1..20, fn _ ->
Enum.each(1..25, fn _ ->
dataset = insert(:dataset, is_active: true, datagouv_id: Ecto.UUID.generate())
api_url = "https://demo.data.gouv.fr/api/1/datasets/#{dataset.datagouv_id}/"

Expand Down Expand Up @@ -100,7 +100,16 @@ defmodule Transport.DataCheckerTest do
{:ok, %HTTPoison.Response{status_code: 410, body: "{\"message\": \"Dataset has been deleted\"}"}}
end)

# running the job (...)
# This dataset does not have a producer anymore
dataset_no_producer = insert(:dataset, is_active: true)
url_no_producer = "https://demo.data.gouv.fr/api/1/datasets/#{dataset_no_producer.datagouv_id}/"

Transport.HTTPoison.Mock
|> expect(:request, fn :get, ^url_no_producer, "", [], [follow_redirect: true] ->
{:ok, %HTTPoison.Response{status_code: 200, body: Jason.encode!(%{owner: nil, organization: nil})}}
end)

# running the job
Transport.DataChecker.inactive_data()

assert_email_sent(
Expand All @@ -119,6 +128,8 @@ defmodule Transport.DataCheckerTest do
assert %DB.Dataset{is_active: true} = DB.Repo.reload!(dataset_500)
# we got a 410 GONE HTTP code: we should deactivate the dataset
assert %DB.Dataset{is_active: false} = DB.Repo.reload!(dataset_410)
# no owner or organization: we should deactivate the dataset
assert %DB.Dataset{is_active: false} = DB.Repo.reload!(dataset_no_producer)

verify!(Transport.HTTPoison.Mock)
end
Expand Down

0 comments on commit a97ba4b

Please sign in to comment.