Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 21 additions & 8 deletions src/django/api/extended_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,25 @@ def get_facility_and_processing_type_extendfield_value(


def get_isic_4_extendedfield_value(field_value):
if field_value is None:
return {'raw_value': []}

if isinstance(field_value, list):
normalized_value = (
field_value[0] if len(field_value) == 1 else field_value
)
entries = field_value
else:
normalized_value = field_value
return {
'raw_value': normalized_value,
}
entries = [field_value]

normalized_entries = []
for entry in entries:
if entry in (None, '', {}):
continue
if not isinstance(entry, dict):
continue
if all_values_empty(entry):
continue
normalized_entries.append(entry)

return {'raw_value': normalized_entries}


def get_parent_company_extendedfield_value(field_value):
Expand Down Expand Up @@ -140,7 +150,10 @@ def create_extendedfield(field, field_value, item, contributor):
'raw_value': field_value,
}
elif field == ExtendedField.ISIC_4:
field_value = get_isic_4_extendedfield_value(field_value)
normalized_isic = get_isic_4_extendedfield_value(field_value)
if not normalized_isic.get('raw_value'):
return
field_value = normalized_isic

ExtendedField.objects.create(
contributor=contributor,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from django.core.management.base import BaseCommand
from django.core.management.base import BaseCommand, CommandError
from django.db import transaction, IntegrityError, DatabaseError
from django.utils import timezone

Expand All @@ -15,6 +15,25 @@ class Command(BaseCommand):
"where the contributor has a corresponding NonstandardField('isic_4')."
)

@staticmethod
def _normalize_isic_entries(raw_value):
normalized = get_isic_4_extendedfield_value(raw_value)
entries = normalized.get('raw_value', [])
return entries

@staticmethod
def _build_extended_field(item, normalized_entry):
return ExtendedField(
contributor=item.source.contributor,
facility=item.facility,
facility_list_item=item,
facility_claim=None,
is_verified=False,
field_name=ExtendedField.ISIC_4,
value={'raw_value': normalized_entry},
origin_source=item.origin_source,
)

def add_arguments(self, parser):
parser.add_argument(
'--batch-size', type=int, default=1000,
Expand Down Expand Up @@ -45,6 +64,15 @@ def add_arguments(self, parser):
'print the related facility OS ID'
)
)
parser.add_argument(
'--os-id',
type=str,
default=None,
help=(
'Available only with --singleisic. If provided, backfill only '
'the specified OS ID.'
)
)

def handle(self, *args, **options):
self.stdout.write('Backfilling isic_4 extended fields (ORM)...')
Expand All @@ -54,6 +82,12 @@ def handle(self, *args, **options):
continue_on_error = options['continue_on_error']
contributor_filter = options['contributor_id']
single_only = options['singleisic']
os_id_filter = options['os_id']

if os_id_filter and not single_only:
raise CommandError(
'--os-id can only be used together with --singleisic.'
)

if dry_run:
self.stdout.write(self.style.WARNING(
Expand Down Expand Up @@ -101,7 +135,11 @@ def handle(self, *args, **options):

# If only one record should be backfilled, handle here and exit.
if single_only:
item = items_qs.first()
single_qs = items_qs
if os_id_filter:
single_qs = single_qs.filter(facility__id=os_id_filter)

item = single_qs.first()
if item is None:
self.stdout.write(
'No eligible items found for single backfill.'
Expand All @@ -115,35 +153,36 @@ def handle(self, *args, **options):
)
return

# Normalize via shared helper, then wrap in {'raw_value': ...}.
normalized_value = get_isic_4_extendedfield_value(raw)['raw_value']
value = {'raw_value': normalized_value}
normalized_entries = self._normalize_isic_entries(raw)
if not normalized_entries:
self.stdout.write(
'Eligible item does not contain valid isic_4; '
'nothing to do.'
)
return

extended_fields = [
self._build_extended_field(item, normalized_entries)
]

if dry_run:
self.stdout.write(
self.style.WARNING(
"[DRY-RUN] Would backfill one isic_4 row for OS ID "
"[DRY-RUN] Would backfill "
f"{len(extended_fields)} isic_4 row(s) for OS ID "
f"{item.facility.id}"
)
)
return

try:
with transaction.atomic():
ef = ExtendedField(
contributor=item.source.contributor,
facility=item.facility,
facility_list_item=item,
facility_claim=None,
is_verified=False,
field_name=ExtendedField.ISIC_4,
value=value,
origin_source=item.origin_source,
)
ef.save()
ExtendedField.objects.bulk_create(extended_fields)
self.stdout.write(
self.style.SUCCESS(
f"Backfilled 1 isic_4 row. OS ID: {item.facility.id}"
"Backfilled "
f"{len(extended_fields)} isic_4 row(s). "
f"OS ID: {item.facility.id}"
)
)
except (IntegrityError, DatabaseError) as exc:
Expand Down Expand Up @@ -214,21 +253,17 @@ def flush_batch():
stats['skipped_empty_value'] += 1
continue

# Normalize via shared helper, then wrap in {'raw_value': ...}.
normalized_value = get_isic_4_extendedfield_value(raw)['raw_value']
value = {'raw_value': normalized_value}

extended_field = ExtendedField(
contributor=item.source.contributor,
facility=item.facility,
facility_list_item=item,
facility_claim=None,
is_verified=False,
field_name=ExtendedField.ISIC_4,
value=value,
origin_source=item.origin_source,
normalized_entries = self._normalize_isic_entries(raw)
if not normalized_entries:
stats['skipped_empty_value'] += 1
continue

extended_field = self._build_extended_field(
item,
normalized_entries
)
to_create.append(extended_field)

stats['queued'] += 1

if len(to_create) >= batch_size:
Expand Down
54 changes: 54 additions & 0 deletions src/django/api/serializers/v1/isic4_entry_serializer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from rest_framework import serializers


class ISIC4EntrySerializer(serializers.Serializer):
isic_class = serializers.CharField(
source='class',
required=False,
allow_blank=False,
error_messages={
'required': 'Field class is required for isic_4.',
'blank': 'Field class must be a non-empty string.',
'invalid': 'Field class must be a valid string.',
},
)
group = serializers.CharField(
required=False,
allow_blank=False,
error_messages={
'required': 'Field group is required for isic_4.',
'blank': 'Field group must be a non-empty string.',
'invalid': 'Field group must be a valid string.',
},
)
section = serializers.CharField(
required=False,
allow_blank=False,
error_messages={
'required': 'Field section is required for isic_4.',
'blank': 'Field section must be a non-empty string.',
'invalid': 'Field section must be a valid string.',
},
)
division = serializers.CharField(
required=False,
allow_blank=False,
error_messages={
'required': 'Field division is required for isic_4.',
'blank': 'Field division must be a non-empty string.',
'invalid': 'Field division must be a valid string.',
},
)

def validate(self, attrs):
errors = {}
raw_data = getattr(self, 'initial_data', {}) or {}
for field_name in ('class', 'group', 'section', 'division'):
if field_name not in raw_data:
continue
if not isinstance(raw_data[field_name], str):
errors[field_name] = ['Field '
f'{field_name} must be a string.']
if errors:
raise serializers.ValidationError(errors)
return attrs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from api.serializers.v1.isic4_entry_serializer \
import ISIC4EntrySerializer
from api.serializers.v1.coordinates_serializer \
import CoordinatesSerializer
import CoordinatesSerializer
from api.serializers.v1.number_of_workers_serializer \
import NumberOfWorkersSerializer
import NumberOfWorkersSerializer
from api.serializers.v1.string_or_list_field import StringOrListField
from rest_framework import serializers

Expand Down Expand Up @@ -53,6 +55,19 @@ class ProductionLocationSchemaSerializer(serializers.Serializer):
'invalid': 'Field coordinates must be a valid geopoint.'
},
)
isic_4 = serializers.ListField(
child=ISIC4EntrySerializer(),
required=False,
allow_empty=False,
min_length=1,
max_length=15,
error_messages={
'min_length': 'Provide at least one isic_4 object.',
'max_length': 'Provide at most 15 isic_4 objects.',
'invalid': 'Field isic_4 must be a list of objects.',
'empty': 'Field isic_4 cannot be empty.',
},
)

# Use only subclasses.
def __init__(self, *args, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def assert_extended_fields_creation(self, response, status_code):
expected_value = {
'raw_value': self.moderation_event.cleaned_data[
'fields'
]['isic_4'][0]
]['isic_4']
}
self.assertEqual(isic_field.value, expected_value)

Expand Down
73 changes: 73 additions & 0 deletions src/django/api/tests/test_isic4_entry_serializer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from django.test import SimpleTestCase

from api.serializers.v1.isic4_entry_serializer import ISIC4EntrySerializer
from api.serializers.v1.production_location_post_schema_serializer import (
ProductionLocationPostSchemaSerializer,
)


class ISIC4EntrySerializerTest(SimpleTestCase):
def setUp(self):
self.valid_payload = {
'class': '620 - Computer programming, consultancy and related '
'activities',
'group': '62 - Computer programming, consultancy and related '
'activities',
'section': 'J - Information and communication',
'division': '62 - Computer programming, consultancy and related '
'activities',
}

def test_valid_isic4_payload(self):
serializer = ISIC4EntrySerializer(data=self.valid_payload)

self.assertTrue(serializer.is_valid(), serializer.errors)
self.assertEqual(
serializer.validated_data['group'],
self.valid_payload['group'],
)

def test_missing_fields_are_allowed(self):
serializer = ISIC4EntrySerializer(data={})

self.assertTrue(serializer.is_valid(), serializer.errors)

def test_blank_strings_are_rejected(self):
invalid_payload = {
**self.valid_payload,
'section': '',
}
serializer = ISIC4EntrySerializer(data=invalid_payload)

self.assertFalse(serializer.is_valid())
self.assertIn('section', serializer.errors)


class ProductionLocationSchemaIsic4Test(SimpleTestCase):
def setUp(self):
self.valid_isic_entry = {
'class': '620 - Computer programming, consultancy and related '
'activities',
'group': '62 - Computer programming, consultancy and related '
'activities',
'section': 'J - Information and communication',
'division': '62 - Computer programming, consultancy and related '
'activities',
}
self.base_payload = {
'name': 'Test Facility',
'address': '123 Test Street',
'country': 'US',
}

def test_multiple_isic_entries_allowed(self):
payload = {
**self.base_payload,
'isic_4': [
self.valid_isic_entry,
self.valid_isic_entry,
],
}
serializer = ProductionLocationPostSchemaSerializer(data=payload)

self.assertTrue(serializer.is_valid(), serializer.errors)
Loading