Skip to content

Commit

Permalink
Merge pull request #1296 from uktrade/IGUK-200_Sectors
Browse files Browse the repository at this point in the history
Iguk 200 sectors
  • Loading branch information
stuart-mindt authored Jul 25, 2024
2 parents 31a16fc + f1bb125 commit 39ee5ed
Show file tree
Hide file tree
Showing 11 changed files with 407 additions and 0 deletions.
3 changes: 3 additions & 0 deletions conf/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,9 @@
'dataservices-business-cluster-information-by-dbt-sector',
'dataservices-eyb-salary-data',
'dataservices-eyb-commercial-rent-data',
'dataservices-dbt-sector',
'dataservices-sector-gva-value-band',
'dataservices-dbt-investment-opportunity',
'enrolment-preverified',
'enrolment-claim-preverified',
'offices-by-postcode',
Expand Down
15 changes: 15 additions & 0 deletions conf/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,21 @@
dataservices.views.EYBRentDataView.as_view(),
name='dataservices-eyb-commercial-rent-data',
),
re_path(
r'^dataservices/dbt-sector/$',
dataservices.views.DBTSectorsView.as_view(),
name='dataservices-dbt-sector',
),
re_path(
r'^dataservices/sector-gva-value-band/$',
dataservices.views.SectorGVAValueBandView.as_view(),
name='dataservices-sector-gva-value-band',
),
re_path(
r'^dataservices/dbt-investment-opportunity/$',
dataservices.views.DBTInvestmentOpportunityView.as_view(),
name='dataservices-dbt-investment-opportunity',
),
re_path(r'^testapi/buyer/(?P<email>.*)/$', testapi.views.BuyerTestAPIView.as_view(), name='buyer_by_email'),
re_path(r'^testapi/test-buyers/$', testapi.views.BuyerTestAPIView.as_view(), name='delete_test_buyers'),
re_path(
Expand Down
8 changes: 8 additions & 0 deletions dataservices/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,11 @@ class EYBCommercialRentDataFilter(django_filters.rest_framework.FilterSet):
class Meta:
model = models.EYBCommercialPropertyRent
fields = ['geo_description', 'vertical', 'sub_vertical']


class SectorGVAValueBandFilter(django_filters.rest_framework.FilterSet):
full_sector_name = django_filters.CharFilter(field_name='full_sector_name', lookup_expr='iexact', required=True)

class Meta:
model = models.SectorGVAValueBand
fields = ['full_sector_name']
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import pandas as pd
import sqlalchemy as sa

from dataservices.models import DBTInvestmentOpportunity

from .helpers import BaseDataWorkspaceIngestionCommand


class Command(BaseDataWorkspaceIngestionCommand):
help = 'Import DBT investment opportunities data from Data Workspace'
sql = '''
SELECT
id,
updated_date,
investment_opportunity_code,
opportunity_title,
description,
nomination_round,
launched,
opportunity_type,
location,
sub_sector,
levelling_up,
net_zero,
science_technology_superpower,
sector_cluster
FROM public.dit_investment_opportunities
'''

def load_data(self):
data = []
chunks = pd.read_sql(sa.text(self.sql), self.engine, chunksize=5000)

for chunk in chunks:

for _idx, row in chunk.iterrows():
data.append(
DBTInvestmentOpportunity(
opportunity_title=row.opportunity_title,
description=row.description,
nomination_round=row.nomination_round,
launched=row.launched,
opportunity_type=row.opportunity_type,
location=row.location,
sub_sector=row.sub_sector,
levelling_up=row.levelling_up,
net_zero=row.net_zero,
science_technology_superpower=row.science_technology_superpower,
sector_cluster=row.sector_cluster,
)
)

return data
45 changes: 45 additions & 0 deletions dataservices/management/commands/import_dbt_sectors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import pandas as pd
import sqlalchemy as sa

from dataservices.models import DBTSector

from .helpers import BaseDataWorkspaceIngestionCommand


class Command(BaseDataWorkspaceIngestionCommand):
help = 'Import DBT sector list data from Data Workspace'
sql = '''
SELECT
id,
updated_date,
field_01,
field_03,
full_sector_name,
field_04,
field_05,
field_02,
field_06,
field_07,
sector_cluster__april_2023
FROM public.ref_dit_sectors
'''

def load_data(self):
data = []
chunks = pd.read_sql(sa.text(self.sql), self.engine, chunksize=5000)

for chunk in chunks:

for _idx, row in chunk.iterrows():
data.append(
DBTSector(
sector_id=row.field_01,
full_sector_name=row.full_sector_name,
sector_cluster_name=row.sector_cluster__april_2023,
sector_name=row.field_04,
sub_sector_name=row.field_05,
sub_sub_sector_name=row.field_02,
)
)

return data
53 changes: 53 additions & 0 deletions dataservices/management/commands/import_sectors_gva_value_bands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import pandas as pd
import sqlalchemy as sa

from dataservices.models import SectorGVAValueBand

from .helpers import BaseDataWorkspaceIngestionCommand


class Command(BaseDataWorkspaceIngestionCommand):
help = 'Import sector GVA value bands data from Data Workspace'
sql = '''
SELECT
id,
updated_date,
sector_gva_and_value_band_id,
full_sector_name,
gva_grouping,
gva_multiplier,
value_band_a_minimum,
value_band_b_minimum,
value_band_c_minimum,
value_band_d_minimum,
value_band_e_minimum,
sector_classification_value_band,
sector_classification_gva_multiplier,
start_date,
end_date
FROM public.ref_sectors_gva_value_bands
'''

def load_data(self):
data = []
chunks = pd.read_sql(sa.text(self.sql), self.engine, chunksize=5000)

for chunk in chunks:

for _idx, row in chunk.iterrows():
data.append(
SectorGVAValueBand(
full_sector_name=row.full_sector_name,
value_band_a_minimum=row.value_band_a_minimum,
value_band_b_minimum=row.value_band_b_minimum,
value_band_c_minimum=row.value_band_c_minimum,
value_band_d_minimum=row.value_band_d_minimum,
value_band_e_minimum=row.value_band_e_minimum,
start_date=row.start_date,
end_date=row.end_date,
sector_classification_value_band=row.sector_classification_value_band,
sector_classification_gva_multiplier=row.sector_classification_gva_multiplier,
)
)

return data
96 changes: 96 additions & 0 deletions dataservices/management/commands/tests/test_import_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,3 +798,99 @@ def test_comtrade_load_data(read_sql_mock):
assert data_chn_wld.first().commodity_code == '010649'
assert data_chn_wld.first().trade_value == 4520000
assert data_chn_wld.first().uk_or_world == 'WLD'


@pytest.mark.django_db
@mock.patch('pandas.read_sql')
@override_settings(DATA_WORKSPACE_DATASETS_URL='postgresql://')
def test_import_dbt_sectors(read_sql_mock):
data = {
'id': [1],
'updated_date': [''],
'field_01': ['SL0001'],
'field_03': [''],
'full_sector_name': ['Advanced engineering'],
'field_04': ['Advanced engineering'],
'field_05': [''],
'field_02': [''],
'field_06': [''],
'field_07': [''],
'sector_cluster__april_2023': ['Sustainability and Infrastructure'],
}
read_sql_mock.return_value = [pd.DataFrame(data)]

assert len(models.DBTSector.objects.all()) == 0

# dry run
management.call_command('import_dbt_sectors')
assert len(models.DBTSector.objects.all()) == 0

# write
management.call_command('import_dbt_sectors', '--write')
assert len(models.DBTSector.objects.all()) == 1


@pytest.mark.django_db
@mock.patch('pandas.read_sql')
@override_settings(DATA_WORKSPACE_DATASETS_URL='postgresql://')
def test_import_sectors_gva_value_bands(read_sql_mock):
data = {
'id': [1],
'updated_date': [''],
'full_sector_name': ['Advanced engineering'],
'gva_grouping': [''],
'gva_multiplier': ['Advanced engineering'],
'value_band_a_minimum': [5700000],
'value_band_b_minimum': [2600000],
'value_band_c_minimum': [848513],
'value_band_d_minimum': [260000],
'value_band_e_minimum': [10000],
'sector_classification_value_band': ['Capital intensive'],
'sector_classification_gva_multiplier': ['Capital intensive'],
'start_date': ['2022-04-01'],
'end_date': ['2025-03-31'],
}
read_sql_mock.return_value = [pd.DataFrame(data)]

assert len(models.SectorGVAValueBand.objects.all()) == 0

# dry run
management.call_command('import_sectors_gva_value_bands')
assert len(models.SectorGVAValueBand.objects.all()) == 0

# write
management.call_command('import_sectors_gva_value_bands', '--write')
assert len(models.SectorGVAValueBand.objects.all()) == 1


@pytest.mark.django_db
@mock.patch('pandas.read_sql')
@override_settings(DATA_WORKSPACE_DATASETS_URL='postgresql://')
def test_import_dbt_investment_opportunities(read_sql_mock):
data = {
'id': [1],
'updated_date': [''],
'investment_opportunity_code': ['INVESTMENT_OPPORTUNITY_001'],
'opportunity_title': ['Precision Agriculture'],
'description': ['An opportunity to meet the demand for new food production systems to support changing'],
'nomination_round': [1],
'launched': [True],
'opportunity_type': ['High potential opportunity'],
'location': ['North East'],
'sub_sector': ['Food and Drink'],
'levelling_up': [True],
'net_zero': [True],
'science_technology_superpower': [False],
'sector_cluster': ['Agriculture, Food & Drink'],
}
read_sql_mock.return_value = [pd.DataFrame(data)]

assert len(models.DBTInvestmentOpportunity.objects.all()) == 0

# dry run
management.call_command('import_dbt_investment_opportunities')
assert len(models.DBTInvestmentOpportunity.objects.all()) == 0

# write
management.call_command('import_dbt_investment_opportunities', '--write')
assert len(models.DBTInvestmentOpportunity.objects.all()) == 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Generated by Django 4.2.11 on 2024-07-25 08:17

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('dataservices', '0054_eybsalarydata_occupation_eybsalarydata_soc_code'),
]

operations = [
migrations.CreateModel(
name='DBTInvestmentOpportunity',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('opportunity_title', models.CharField()),
('description', models.CharField()),
('nomination_round', models.FloatField()),
('launched', models.BooleanField()),
('opportunity_type', models.CharField()),
('location', models.CharField()),
('sub_sector', models.CharField()),
('levelling_up', models.BooleanField()),
('net_zero', models.BooleanField()),
('science_technology_superpower', models.BooleanField()),
('sector_cluster', models.CharField()),
],
),
migrations.CreateModel(
name='DBTSector',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('sector_id', models.CharField()),
('full_sector_name', models.CharField()),
('sector_cluster_name', models.CharField()),
('sector_name', models.CharField()),
('sub_sector_name', models.CharField(blank=True, null=True)),
('sub_sub_sector_name', models.CharField(blank=True, null=True)),
],
),
migrations.CreateModel(
name='SectorGVAValueBand',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('full_sector_name', models.CharField()),
('value_band_a_minimum', models.IntegerField()),
('value_band_b_minimum', models.IntegerField()),
('value_band_c_minimum', models.IntegerField()),
('value_band_d_minimum', models.IntegerField()),
('value_band_e_minimum', models.IntegerField()),
('start_date', models.DateField()),
('end_date', models.DateField()),
('sector_classification_value_band', models.CharField()),
('sector_classification_gva_multiplier', models.CharField()),
],
),
]
36 changes: 36 additions & 0 deletions dataservices/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,3 +497,39 @@ class EYBSalaryData(models.Model):
median_salary = models.IntegerField(null=True, blank=True)
mean_salary = models.IntegerField(null=True, blank=True)
dataset_year = models.SmallIntegerField(null=True, blank=True)


class DBTSector(models.Model):
sector_id = models.CharField()
full_sector_name = models.CharField()
sector_cluster_name = models.CharField()
sector_name = models.CharField()
sub_sector_name = models.CharField(null=True, blank=True)
sub_sub_sector_name = models.CharField(null=True, blank=True)


class SectorGVAValueBand(models.Model):
full_sector_name = models.CharField()
value_band_a_minimum = models.IntegerField()
value_band_b_minimum = models.IntegerField()
value_band_c_minimum = models.IntegerField()
value_band_d_minimum = models.IntegerField()
value_band_e_minimum = models.IntegerField()
start_date = models.DateField()
end_date = models.DateField()
sector_classification_value_band = models.CharField()
sector_classification_gva_multiplier = models.CharField()


class DBTInvestmentOpportunity(models.Model):
opportunity_title = models.CharField()
description = models.CharField()
nomination_round = models.FloatField()
launched = models.BooleanField()
opportunity_type = models.CharField()
location = models.CharField()
sub_sector = models.CharField()
levelling_up = models.BooleanField()
net_zero = models.BooleanField()
science_technology_superpower = models.BooleanField()
sector_cluster = models.CharField()
Loading

0 comments on commit 39ee5ed

Please sign in to comment.