diff --git a/README.md b/README.md index 23739461e..ab69e7c42 100644 --- a/README.md +++ b/README.md @@ -498,3 +498,7 @@ The following environment variables need to be defined in your .env file in orde OPEN_CATALOG_URLS=delimited list of api endpoint urls that webhooks should be sent to OPEN_CATALOG_WEBHOOK_KEY=secret key that will be used to confirm that webhook requests are legitimate ``` + +# Checking External Resource Availability + +This feature sets up a cron job to validate external resource urls. The workflow for checking external resource availability is described [here](/external_resources/README.md). diff --git a/external_resources/README.md b/external_resources/README.md new file mode 100644 index 000000000..089cc8d6d --- /dev/null +++ b/external_resources/README.md @@ -0,0 +1,48 @@ +# External Resource Availability Workflow + +This document describes the workflow for the external resources validation tasks. + +**SECTIONS** + +1. [Overview](#overview) +1. [Enabling Task](#enabling-task) +1. [Frequency Control](#frequency-control) +1. [Rate Limiting](#rate-limiting) +1. [Task Priority](#task-priority) + + +# Overview + +This assumes that celery beat scheduler is installed and enabled, which is required for the task scheduling. + +Frequency for the task is set to `1/week`. After each week, all external resources, new or existing, will be validated regardless of their last status. + +The high-level description of the process is below, and each subsequent section contains additional details, including links to the relevant code. + +* Task is automatically added in scheduler on system start. +* On execution, all available external resources are retrieved from DB. +* Gathered data is divided into preconfigured batch sizes. +* All batches are grouped into a single celery task and executed. +* Each batch-task iterates over batch to validate availability of each resource and its backup resource if available. +* The status of resource is then added to DB. +* Batch tasks have a preconfigured rate-limiter and lower priority by default. + + +## Enabling Task +The task for external resource checking can be enabled/disabled using the `CHECK_EXTERNAL_RESOURCE_TASK_ENABLE` defined in [here](/main/settings.py). However, once scheduled, the task can be removed only if `Celery` is restarted along with toggling `CHECK_EXTERNAL_RESOURCE_TASK_ENABLE` to `False`. + +## Frequency Control + +The task frequency (in seconds) is set using the `CHECK_EXTERNAL_RESOURCE_STATUS_FREQUENCY` in [here](/main/settings.py). Default value for the frequency is set to `604800 seconds -> 1 week`. + + +## Rate Limiting + +The rate-limit for the external resource batch-tasks is set using `EXTERNAL_RESOURCE_TASK_RATE_LIMIT` in [here](constants.py). The assigned value for the rate-limiter is set to `100/s`. + + +## Task Priority + +Batch-task priority is set using the `EXTERNAL_RESOURCE_TASK_PRIORITY` in [here](constants.py). The default priority for each celery task has been preconfigured to `2` out of range `0(lowest) - 4(highest)`. External resource tasks have lowest (`0`) priority by default. + +Priority levels and celery default task priority can be configured by `PRIORITY_STEPS` and `DEFAULT_PRIORITY`, respectively, in [here](/main/constants.py). diff --git a/external_resources/__init__.py b/external_resources/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/external_resources/admin.py b/external_resources/admin.py new file mode 100644 index 000000000..580019097 --- /dev/null +++ b/external_resources/admin.py @@ -0,0 +1,56 @@ +"""External Resources Admin""" + +from django.contrib import admin +from mitol.common.admin import TimestampedModelAdmin + +from external_resources.models import ExternalResourceState + + +class ExternalResourceStateAdmin(TimestampedModelAdmin): + """ExternalResourceState Admin""" + + model = ExternalResourceState + + include_created_on_in_list = True + search_fields = ( + "content__text_id", + "content__title", + "content__website__name", + "content__website__title", + ) + list_display = ( + "get_content_title", + "get_content_text_id", + "get_website_name", + ) + list_filter = ("status",) + raw_id_fields = ("content",) + ordering = ("-created_on",) + + def get_queryset(self, request): # noqa: ARG002 + """Return data along with the related WebsiteContent""" + return self.model.objects.get_queryset().select_related("content__website") + + def get_content_title(self, obj): + """Return the related WebsiteContent title""" + return obj.content.title + + get_content_title.short_description = "Content Title" + get_content_title.admin_order_field = "content__title" + + def get_content_text_id(self, obj): + """Return the related WebsiteContent text ID""" + return obj.content.text_id + + get_content_text_id.short_description = "Content Text ID" + get_content_text_id.admin_order_field = "content__text_id" + + def get_website_name(self, obj): + """Return the related Website name""" + return obj.content.website.name + + get_website_name.short_description = "Website" + get_website_name.admin_order_field = "content__website__name" + + +admin.site.register(ExternalResourceState, ExternalResourceStateAdmin) diff --git a/external_resources/api.py b/external_resources/api.py new file mode 100644 index 000000000..bc94c642b --- /dev/null +++ b/external_resources/api.py @@ -0,0 +1,53 @@ +"""External Resources API""" + +import logging +from typing import Optional + +import requests + +from external_resources.constants import ( + RESOURCE_BROKEN_STATUS_END, + RESOURCE_BROKEN_STATUS_START, +) +from external_resources.exceptions import CheckFailedError +from websites.models import WebsiteContent + +log = logging.getLogger() + + +def is_url_broken(url: str) -> tuple[bool, Optional[int]]: + """Check if provided url is broken""" + if url.strip() == "": + return False, None + + log.debug("Making a HEAD request for url: %s", url) + + try: + response = requests.head(url, allow_redirects=True, timeout=30) + except Exception as ex: + log.debug(ex) + raise CheckFailedError from ex + + if ( + response.status_code >= RESOURCE_BROKEN_STATUS_START + and response.status_code < RESOURCE_BROKEN_STATUS_END + ): + return True, response.status_code + + return False, response.status_code + + +def is_external_url_broken( + external_resource: WebsiteContent, +) -> tuple[bool, Optional[int]]: + """Check if external url of the provided WebsiteContent is broken""" + url = external_resource.metadata.get("external_url", "") + return is_url_broken(url) + + +def is_backup_url_broken( + external_resource: WebsiteContent, +) -> tuple[bool, Optional[int]]: + """Check if backup url of the provided WebsiteContent is broken""" + url = external_resource.metadata.get("backup_url", "") + return is_url_broken(url) diff --git a/external_resources/api_test.py b/external_resources/api_test.py new file mode 100644 index 000000000..347e8c9de --- /dev/null +++ b/external_resources/api_test.py @@ -0,0 +1,46 @@ +"""Tests for External Resources API""" + +import pytest + +from external_resources.api import is_url_broken +from external_resources.constants import RESOURCE_UNCHECKED_STATUSES +from external_resources.exceptions import CheckFailedError + + +def test_is_url_broken_valid(mocker): + """Test for working url""" + mock_response = mocker.Mock(status_code=200) + mocker.patch("external_resources.api.requests.head", return_value=mock_response) + + result, status_code = is_url_broken("http://google.com") + assert not result + assert status_code == 200 + + +@pytest.mark.parametrize("status_code", RESOURCE_UNCHECKED_STATUSES) +def test_is_url_broken_whitelisted(mocker, status_code): + """Test for broken url""" + mock_response = mocker.Mock(status_code=status_code) + mocker.patch("external_resources.api.requests.head", return_value=mock_response) + + result, response_status_code = is_url_broken("http://google.com/") + assert result + assert response_status_code == status_code + + +def test_is_url_broken_empty(): + """Test for empty url""" + result, status_code = is_url_broken("") + assert not result + assert status_code is None + + +def test_is_url_broken_exception(mocker): + """Test for connection error""" + mocker.patch( + "external_resources.api.requests.head", + side_effect=CheckFailedError, + ) + + with pytest.raises(CheckFailedError): + is_url_broken("http://google.com") diff --git a/external_resources/apps.py b/external_resources/apps.py new file mode 100644 index 000000000..0b2f497ea --- /dev/null +++ b/external_resources/apps.py @@ -0,0 +1,10 @@ +"""External Resources Apps""" + +from django.apps import AppConfig + + +class ExternalResourcesConfig(AppConfig): + """App for External Resources""" + + default_auto_field = "django.db.models.BigAutoField" + name = "external_resources" diff --git a/external_resources/constants.py b/external_resources/constants.py new file mode 100644 index 000000000..130fb5e14 --- /dev/null +++ b/external_resources/constants.py @@ -0,0 +1,26 @@ +"""Constants for External Resources module""" + +# HTTP Status Codes +HTTP_BAD_REQUEST = 400 +HTTP_UNAUTHORIZED = 401 +HTTP_PAYMENT_REQUIRED = 402 +HTTP_FORBIDDEN = 403 +HTTP_TOO_MANY_REQUESTS = 429 +HTTP_REQUEST_TIMEOUT = 408 +HTTP_SERVICE_UNAVAILABLE = 503 + +# External Resource +RESOURCE_BROKEN_STATUS_START = HTTP_BAD_REQUEST +RESOURCE_BROKEN_STATUS_END = 600 +RESOURCE_UNCHECKED_STATUSES = [ + HTTP_UNAUTHORIZED, + HTTP_PAYMENT_REQUIRED, + HTTP_FORBIDDEN, + HTTP_TOO_MANY_REQUESTS, + HTTP_REQUEST_TIMEOUT, + HTTP_SERVICE_UNAVAILABLE, +] + +# Celery Task +EXTERNAL_RESOURCE_TASK_RATE_LIMIT = "100/s" +EXTERNAL_RESOURCE_TASK_PRIORITY = 4 # Lowest priority from range (0 - 4) diff --git a/external_resources/exceptions.py b/external_resources/exceptions.py new file mode 100644 index 000000000..69a651b5e --- /dev/null +++ b/external_resources/exceptions.py @@ -0,0 +1,5 @@ +"""External Resources Exceptions""" + + +class CheckFailedError(Exception): + """Check Failed Exception""" diff --git a/external_resources/factories.py b/external_resources/factories.py new file mode 100644 index 000000000..882da0c5e --- /dev/null +++ b/external_resources/factories.py @@ -0,0 +1,24 @@ +"""External Resources Factories""" + +import factory +import pytz + +from external_resources.models import ExternalResourceState +from websites.factories import WebsiteContentFactory + + +class ExternalResourceStateFactory(factory.django.DjangoModelFactory): + """External Resource Factory""" + + class Meta: + """Meta class for External Resource State Factory""" + + model = ExternalResourceState + + content = factory.SubFactory(WebsiteContentFactory) + status = ExternalResourceState.Status.UNCHECKED + external_url_response_code = factory.Faker("random_int", min=100, max=599) + backup_url_response_code = factory.Faker("random_int", min=100, max=599) + is_external_url_broken = factory.Faker("boolean") + is_backup_url_broken = factory.Faker("boolean") + last_checked = factory.Faker("date_time", tzinfo=pytz.utc) diff --git a/external_resources/migrations/0001_initial.py b/external_resources/migrations/0001_initial.py new file mode 100644 index 000000000..9b43d9cfc --- /dev/null +++ b/external_resources/migrations/0001_initial.py @@ -0,0 +1,83 @@ +# Generated by Django 4.2.11 on 2024-05-02 09:34 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + """Initial Migration for External Resources""" + + initial = True + + dependencies = [ + ("websites", "0053_safedelete_deleted_by_cascade"), + ] + + operations = [ + migrations.CreateModel( + name="ExternalResourceState", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("created_on", models.DateTimeField(auto_now_add=True)), + ("updated_on", models.DateTimeField(auto_now=True)), + ( + "status", + models.CharField( + choices=[ + ("unchecked", "Unchecked or pending check"), + ("valid", "Either URL or backup URL is valid"), + ("broken", "Both URL and backup URL are broken"), + ], + default="unchecked", + help_text="The status of this external resource.", + max_length=16, + ), + ), + ( + "external_url_response_code", + models.IntegerField(blank=True, default=None, null=True), + ), + ( + "backup_url_response_code", + models.IntegerField(blank=True, default=None, null=True), + ), + ( + "is_external_url_broken", + models.BooleanField(blank=True, default=None, null=True), + ), + ( + "is_backup_url_broken", + models.BooleanField(blank=True, default=None, null=True), + ), + ( + "last_checked", + models.DateTimeField( + blank=True, + default=None, + help_text="The last time when this resource" + " was checked for breakages.", + null=True, + ), + ), + ( + "content", + models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, + related_name="external_resource_state", + to="websites.websitecontent", + ), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/external_resources/migrations/0002_alter_externalresourcestate_status.py b/external_resources/migrations/0002_alter_externalresourcestate_status.py new file mode 100644 index 000000000..66a450a50 --- /dev/null +++ b/external_resources/migrations/0002_alter_externalresourcestate_status.py @@ -0,0 +1,32 @@ +# Generated by Django 4.2.11 on 2024-05-03 11:39 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + """Django Migration for External Resources""" + + dependencies = [ + ("external_resources", "0001_initial"), + ] + + operations = [ + migrations.AlterField( + model_name="externalresourcestate", + name="status", + field=models.CharField( + choices=[ + ("unchecked", "Unchecked or pending check"), + ("valid", "Either URL or backup URL is valid"), + ("broken", "Both URL and backup URL are broken"), + ( + "check_failed", + "Last attempt to check the resource failed unexpectedly", + ), + ], + default="unchecked", + help_text="The status of this external resource.", + max_length=16, + ), + ), + ] diff --git a/external_resources/migrations/__init__.py b/external_resources/migrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/external_resources/models.py b/external_resources/models.py new file mode 100644 index 000000000..4da49804d --- /dev/null +++ b/external_resources/models.py @@ -0,0 +1,73 @@ +"""External Resources models""" + +from bulk_update_or_create import BulkUpdateOrCreateQuerySet +from django.db import models +from mitol.common.models import TimestampedModel + +from websites.models import WebsiteContent + + +class ExternalResourceState(TimestampedModel): + """Data model for tracking the state of external resources""" + + class Status(models.TextChoices): + """Choices for External Resource Status""" + + UNCHECKED = "unchecked", "Unchecked or pending check" + VALID = "valid", "Either URL or backup URL is valid" + BROKEN = "broken", "Both URL and backup URL are broken" + CHECK_FAILED = ( + "check_failed", + "Last attempt to check the resource failed unexpectedly", + ) + + objects = BulkUpdateOrCreateQuerySet.as_manager() + + content = models.OneToOneField( + WebsiteContent, + on_delete=models.CASCADE, + related_name="external_resource_state", + ) + + status = models.CharField( + max_length=16, + choices=Status.choices, + default=Status.UNCHECKED, + help_text="The status of this external resource.", + ) + + external_url_response_code = models.IntegerField( + default=None, + null=True, + blank=True, + ) + + backup_url_response_code = models.IntegerField( + default=None, + null=True, + blank=True, + ) + + is_external_url_broken = models.BooleanField( + default=None, + null=True, + blank=True, + ) + + is_backup_url_broken = models.BooleanField( + default=None, + null=True, + blank=True, + ) + + last_checked = models.DateTimeField( + default=None, + null=True, + blank=True, + help_text="The last time when this resource was checked for breakages.", + ) + + def __str__(self): + """Return a string representation of the state""" + name = self.content.title if self.content else None + return f"State for external resource: {name}" diff --git a/external_resources/tasks.py b/external_resources/tasks.py new file mode 100644 index 000000000..0dc8286ac --- /dev/null +++ b/external_resources/tasks.py @@ -0,0 +1,97 @@ +"""External Resources Tasks""" + +import logging + +import celery +from django.utils import timezone +from mitol.common.utils import chunks + +from external_resources import api +from external_resources.constants import ( + EXTERNAL_RESOURCE_TASK_PRIORITY, + EXTERNAL_RESOURCE_TASK_RATE_LIMIT, + RESOURCE_UNCHECKED_STATUSES, +) +from external_resources.exceptions import CheckFailedError +from external_resources.models import ExternalResourceState +from main.celery import app +from websites.constants import ( + BATCH_SIZE_EXTERNAL_RESOURCE_STATUS_CHECK, + CONTENT_TYPE_EXTERNAL_RESOURCE, +) +from websites.models import WebsiteContent + +log = logging.getLogger() + + +@app.task( + acks_late=True, + rate_limit=EXTERNAL_RESOURCE_TASK_RATE_LIMIT, + priority=EXTERNAL_RESOURCE_TASK_PRIORITY, +) +def check_external_resources(resources: list[int]): + """Check external resources for broken links""" + + resources = WebsiteContent.objects.filter(id__in=resources).select_related( + "external_resource_state" + ) + + for resource in resources: + try: + state = resource.external_resource_state + except ExternalResourceState.DoesNotExist as ex: + log.debug(ex) + state = ExternalResourceState( + content=resource, + ) + + try: + is_url_broken, url_status = api.is_external_url_broken(resource) + state.external_url_response_code = url_status + state.is_external_url_broken = is_url_broken + + # If url is broken, check backup url for the resource + is_backup_url_broken, backup_url_status = api.is_backup_url_broken(resource) + state.backup_url_response_code = backup_url_status + state.is_backup_url_broken = is_backup_url_broken + except CheckFailedError as ex: + log.debug(ex) + state.status = ExternalResourceState.Status.CHECK_FAILED + else: + if ( + url_status not in RESOURCE_UNCHECKED_STATUSES + or backup_url_status not in RESOURCE_UNCHECKED_STATUSES + ): + if is_url_broken and is_backup_url_broken: + # both external_url and backup_url are broken. + state.status = ExternalResourceState.Status.BROKEN + else: + # Either external_url or backup_url is valid. + state.status = ExternalResourceState.Status.VALID + + if is_url_broken and not resource.metadata.get("is_broken", False): + resource.metadata["is_broken"] = True + resource.save() + finally: + state.last_checked = timezone.now() + + state.save() + + +@app.task(bind=True, acks_late=True) +def check_external_resources_for_breakages(self): + """Check external resources for broken links.""" + external_resources = list( + WebsiteContent.objects.filter(type=CONTENT_TYPE_EXTERNAL_RESOURCE).values_list( + "id", flat=True + ) + ) + + tasks = [ + check_external_resources.s(resources) + for resources in chunks( + external_resources, chunk_size=BATCH_SIZE_EXTERNAL_RESOURCE_STATUS_CHECK + ) + ] + if tasks: + raise self.replace(celery.group(tasks)) diff --git a/external_resources/tasks_test.py b/external_resources/tasks_test.py new file mode 100644 index 000000000..e1285004d --- /dev/null +++ b/external_resources/tasks_test.py @@ -0,0 +1,136 @@ +"""Tests for External Resources Tasks""" + +from types import SimpleNamespace +from typing import Literal + +import pytest + +from external_resources.exceptions import CheckFailedError +from external_resources.factories import ExternalResourceStateFactory +from external_resources.models import ExternalResourceState +from external_resources.tasks import ( + check_external_resources, + check_external_resources_for_breakages, +) +from websites.constants import ( + BATCH_SIZE_EXTERNAL_RESOURCE_STATUS_CHECK, + CONTENT_TYPE_EXTERNAL_RESOURCE, +) + + +@pytest.mark.parametrize("website_content_subset", [10, 110]) +def test_check_external_resources_for_breakages_valid( + mocker, + mocked_celery: SimpleNamespace, + website_content_subset: Literal[10, 110], +): + """Test for external Resource Task""" + mock_filter = mocker.patch("websites.models.WebsiteContent.objects.filter") + mock_filter.return_value.values_list.return_value = list( + range(website_content_subset) + ) + + mock_batch = mocker.patch("external_resources.tasks.check_external_resources.s") + + with pytest.raises(TabError): + check_external_resources_for_breakages.delay() + mock_filter.assert_called_once_with(type=CONTENT_TYPE_EXTERNAL_RESOURCE) + assert ( + mock_batch.call_count + == website_content_subset // BATCH_SIZE_EXTERNAL_RESOURCE_STATUS_CHECK + + ( + 1 + if website_content_subset % BATCH_SIZE_EXTERNAL_RESOURCE_STATUS_CHECK + else 0 + ) + ) + assert mocked_celery.group.call_count == 1 + assert mocked_celery.replace.call_count == 1 + + +def test_check_external_resources_for_breakages_zero_websites( + mocker, mocked_celery: SimpleNamespace +): + """Test for external Resource Task""" + mock_filter = mocker.patch("websites.models.WebsiteContent.objects.filter") + mock_filter.return_value.values_list.return_value = [] + + mock_batch = mocker.patch("external_resources.tasks.check_external_resources.s") + + assert mock_batch.call_count == 0 + assert mocked_celery.group.call_count == 0 + assert mocked_celery.replace.call_count == 0 + + +@pytest.mark.django_db() +@pytest.mark.parametrize( + ( + "url_status", + "url_status_code", + "backup_url_status", + "backup_url_status_code", + "resource_status", + ), + [ + (False, 200, False, 200, ExternalResourceState.Status.VALID), + (False, 200, True, 400, ExternalResourceState.Status.VALID), + (True, 400, False, 200, ExternalResourceState.Status.VALID), + (True, 400, True, 400, ExternalResourceState.Status.BROKEN), + (False, 200, True, 401, ExternalResourceState.Status.VALID), + (True, 401, False, 200, ExternalResourceState.Status.VALID), + (True, 401, True, 401, ExternalResourceState.Status.UNCHECKED), + ], +) +def test_check_external_resources( # noqa: PLR0913 + mocker, + url_status, + url_status_code, + backup_url_status, + backup_url_status_code, + resource_status, +): + """Create test data""" + external_resource_state = ExternalResourceStateFactory() + + mocker.patch( + "external_resources.tasks.api.is_external_url_broken", + return_value=(url_status, url_status_code), + ) + mocker.patch( + "external_resources.tasks.api.is_backup_url_broken", + return_value=(backup_url_status, backup_url_status_code), + ) + assert external_resource_state.status == ExternalResourceState.Status.UNCHECKED + + # Run the task + check_external_resources.delay((external_resource_state.content.id,)) + + updated_state = ExternalResourceState.objects.get(id=external_resource_state.id) + + assert updated_state.status == resource_status + assert updated_state.last_checked is not None + + assert updated_state.is_external_url_broken is url_status + assert updated_state.is_backup_url_broken is backup_url_status + + assert updated_state.external_url_response_code == url_status_code + assert updated_state.backup_url_response_code == backup_url_status_code + + assert updated_state.content.metadata.get("is_broken", False) is url_status + + +@pytest.mark.django_db() +def test_check_external_resources_failed(mocker): + """Test for failed api check""" + external_resource_state = ExternalResourceStateFactory() + + mocker.patch( + "external_resources.tasks.api.is_external_url_broken", + side_effect=CheckFailedError, + ) + + check_external_resources.delay((external_resource_state.content.id,)) + + updated_state = ExternalResourceState.objects.get(id=external_resource_state.id) + + assert updated_state.status == ExternalResourceState.Status.CHECK_FAILED diff --git a/external_resources/views.py b/external_resources/views.py new file mode 100644 index 000000000..de8934c52 --- /dev/null +++ b/external_resources/views.py @@ -0,0 +1 @@ +"""External Resources Views""" diff --git a/main/celery.py b/main/celery.py index 1079181ea..d4c756daa 100644 --- a/main/celery.py +++ b/main/celery.py @@ -10,6 +10,8 @@ from celery import Celery from celery.signals import before_task_publish, task_postrun +from main.constants import DEFAULT_PRIORITY, PRIORITY_STEPS + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "main.settings") log = logging.getLogger(__name__) @@ -19,6 +21,12 @@ # Using a string here means the worker will not have to # pickle the object when using Windows. app.conf.task_default_queue = "default" +app.conf.task_default_priority = DEFAULT_PRIORITY +app.conf.broker_transport_options = { + "priority_steps": list(range(PRIORITY_STEPS)), + "sep": ":", + "queue_order_strategy": "priority", +} app.config_from_object("django.conf:settings", namespace="CELERY") app.autodiscover_tasks() @@ -34,6 +42,7 @@ "content_sync.tasks.trigger_mass_build": {"queue": "batch"}, "content_sync.tasks.publish_website_batch": {"queue": "batch"}, "content_sync.tasks.publish_websites": {"queue": "batch"}, + "external_resources.tasks.check_external_resources": {"queue": "batch"}, } diff --git a/main/constants.py b/main/constants.py index afff352de..2245f18fd 100644 --- a/main/constants.py +++ b/main/constants.py @@ -7,3 +7,6 @@ STATUS_FAILED = "Failed" PRODUCTION_NAMES = {"prod", "production"} + +PRIORITY_STEPS = 5 # priority range (0 - 4) +DEFAULT_PRIORITY = 2 # Half step of range (0 - 4) diff --git a/main/settings.py b/main/settings.py index 6fee6a3b3..a65b854ea 100644 --- a/main/settings.py +++ b/main/settings.py @@ -152,6 +152,7 @@ "content_sync", "gdrive_sync", "videos", + "external_resources", # common apps, need to be after ocw-studio apps for template overridding "mitol.common.apps.CommonApp", "mitol.authentication.apps.AuthenticationApp", @@ -649,6 +650,20 @@ required=False, ) +# Check External Resources settings +CHECK_EXTERNAL_RESOURCE_STATUS_FREQUENCY = get_int( + name="CHECK_EXTERNAL_RESOURCE_STATUS_FREQUENCY", + default=604800, + description="Frequency (in seconds) to check potentially broken external urls", + required=False, +) + +CHECK_EXTERNAL_RESOURCE_TASK_ENABLE = get_bool( + name="CHECK_EXTERNAL_RESOURCE_TASK_STATUS", + default=True, + description="Enables celery task to check potentially broken external urls", + required=False, +) # Celery REDISCLOUD_URL = get_string( @@ -717,6 +732,12 @@ }, } +if CHECK_EXTERNAL_RESOURCE_TASK_ENABLE: + CELERY_BEAT_SCHEDULE["check-broken-external-urls"] = { + "task": "external_resources.tasks.check_external_resources_for_breakages", + "schedule": CHECK_EXTERNAL_RESOURCE_STATUS_FREQUENCY, + } + # django cache back-ends CACHES = { "default": { diff --git a/websites/constants.py b/websites/constants.py index fc935c5a1..16bf01cba 100644 --- a/websites/constants.py +++ b/websites/constants.py @@ -13,6 +13,7 @@ CONTENT_TYPE_COURSE_LIST = "course-lists" CONTENT_TYPE_EXTERNAL_RESOURCE = "external-resource" +BATCH_SIZE_EXTERNAL_RESOURCE_STATUS_CHECK = 100 COURSE_PAGE_LAYOUTS = ["instructor_insights"] COURSE_RESOURCE_LAYOUTS = ["pdf", "video"]