From ed028740d1e2b8582966ce2e546b2b22adad4292 Mon Sep 17 00:00:00 2001 From: psyciknz Date: Thu, 19 Jun 2025 12:38:52 +1200 Subject: [PATCH 01/10] Adding book import. Uses own format and read title or isbn --- src/integrations/imports/book_import.py | 174 ++++++++++++++++++ .../tests/mock_data/import_books.csv | 6 + src/integrations/tests/test_imports.py | 48 ++--- 3 files changed, 206 insertions(+), 22 deletions(-) create mode 100644 src/integrations/imports/book_import.py create mode 100644 src/integrations/tests/mock_data/import_books.csv diff --git a/src/integrations/imports/book_import.py b/src/integrations/imports/book_import.py new file mode 100644 index 000000000..44bf2b61e --- /dev/null +++ b/src/integrations/imports/book_import.py @@ -0,0 +1,174 @@ +import logging +from collections import defaultdict +from csv import DictReader + +from django.apps import apps +from django.conf import settings +from django.utils.dateparse import parse_datetime + +import app +from app.models import MediaTypes, Sources, Status +from app.providers import services +from integrations.imports import helpers +from integrations.imports.helpers import (MediaImportError, + MediaImportUnexpectedError) + +logger = logging.getLogger(__name__) + + +def importer(file, user, mode): + """Import media from CSV file using the class-based importer.""" + csv_importer = BookImporter(file, user, mode) + return csv_importer.import_data() + + +class BookImporter: + """Class to handle importing user data from CSV files.""" + + def __init__(self, file, user, mode): + """Initialize the importer with file, user, and mode. + + Args: + file: Uploaded CSV file object + user: Django user object to import data for + mode (str): Import mode ("new" or "overwrite") + """ + self.file = file + self.user = user + self.mode = mode + self.warnings = [] + + # Track existing media for "new" mode + self.existing_media = helpers.get_existing_media(user) + + # Track media IDs to delete in overwrite mode + self.to_delete = defaultdict(lambda: defaultdict(set)) + + # Track bulk creation lists for each media type + self.bulk_media = defaultdict(list) + + logger.info( + "Initialized Book CSV importer for user %s with mode %s", + user.username, + mode, + ) + + def import_data(self): + """Import all user data from the CSV file.""" + try: + decoded_file = self.file.read().decode("utf-8").splitlines() + #format + #isbn,providerid,provider,title,read_start,read_end + except UnicodeDecodeError as e: + msg = "Invalid file format. Please upload a CSV file." + raise MediaImportError(msg) from e + + fieldnames = ['isbn','providerid','provider','title','read_start','read_end','sourcee','media_id','progress','status'] + reader = DictReader(decoded_file,fieldnames=fieldnames) + + for row in reader: + try: + self._process_row(row) + except Exception as error: + error_msg = f"Error processing entry: {row}" + raise MediaImportUnexpectedError(error_msg) from error + + helpers.cleanup_existing_media(self.to_delete, self.user) + helpers.bulk_create_media(self.bulk_media, self.user) + + imported_counts = { + media_type: len(media_list) + for media_type, media_list in self.bulk_media.items() + } + + deduplicated_messages = "\n".join(dict.fromkeys(self.warnings)) + return imported_counts, deduplicated_messages + + def _process_row(self, row): + """Process a single row from the CSV file.""" + media_type = MediaTypes.BOOK.value + + # Check if we should process this movie based on mode + if not helpers.should_process_media( + self.existing_media, + self.to_delete, + media_type, + row["provider"], + row["providerid"], + self.mode, + ): + return + + if row["title"] == "" or row["provider"]or row["providerid"] == "": + self._handle_missing_metadata( + row, + media_type + ) + + item, _ = app.models.Item.objects.update_or_create( + media_id=row["media_id"], + source=row["source"][0], + media_type=media_type, + season_number=None, + episode_number=None, + defaults={ + "title": row["title"], + "image": row["image"], + }, + ) + + model = apps.get_model(app_label="app", model_name=media_type) + instance = model(item=item) + if media_type != MediaTypes.EPISODE.value: # episode has no user field + instance.user = self.user + + row["item"] = item + form = app.forms.get_form_class(media_type)( + row, + instance=instance, + ) + + if form.is_valid(): + progressed_at = row.get("progressed_at") + if progressed_at: + form.instance._history_date = parse_datetime(progressed_at) + self.bulk_media[media_type].append(form.instance) + else: + error_msg = f"{row['title']} ({media_type}): {form.errors.as_json()}" + self.warnings.append(error_msg) + logger.error(error_msg) + + def _handle_missing_metadata(self, row, media_type): + """Handle missing metadata by fetching from provider - + Format #isbn,providerid,provider,title,read_start,read_end """ + try: + searchquery = row["isbn"] or row["title"] + if row["provider"] != "": + metadata = services.get_media_metadata( + media_type, + row["providerid"], + row["provider"], + ) + row["title"] = metadata["title"] + row["image"] = metadata["image"] + row["media_id"] = row["providerid"], + row["source"] = row["provider"], + else: + metadata = services.search( + media_type, + searchquery, + 1, + Sources.HARDCOVER.value, + ) + row["title"] = metadata["results"][0]["title"], + row["source"] = Sources.HARDCOVER.value, + row["media_id"] = metadata["results"][0]["media_id"], + row["media_type"] = media_type, + row["image"] = metadata["results"][0]["image"], + row["status"] = Status.COMPLETED.value, + row["progress"] = '0' + except services.ProviderAPIError as e: + self.warnings.append( + f"Failed to fetch metadata for {row['media_id']}: {e!s}", + ) + raise diff --git a/src/integrations/tests/mock_data/import_books.csv b/src/integrations/tests/mock_data/import_books.csv new file mode 100644 index 000000000..a9f8ebc6d --- /dev/null +++ b/src/integrations/tests/mock_data/import_books.csv @@ -0,0 +1,6 @@ +isbn,providerid,provider,title,read_start,read_end +0141017899,,,The World According to Clarkson,,2010/09/09 +9780671027360,wiX1O1ZBdYoC,google_books,"Angels & Demons (Robert Langdon, #1),2005/04/01 +0312980388,,,Warlock,2005/07/22 +0312980388,,,Warlock,2009/08/12 +,OL20037497W,openlibrary,2025/04/27 diff --git a/src/integrations/tests/test_imports.py b/src/integrations/tests/test_imports.py index 50efbfabc..ca1a7605a 100644 --- a/src/integrations/tests/test_imports.py +++ b/src/integrations/tests/test_imports.py @@ -8,28 +8,10 @@ from django.test import TestCase from django_celery_beat.models import CrontabSchedule, PeriodicTask -from app.models import ( - TV, - Anime, - Episode, - Game, - Item, - Manga, - MediaTypes, - Movie, - Season, - Sources, - Status, -) -from integrations.imports import ( - anilist, - helpers, - hltb, - kitsu, - mal, - simkl, - yamtrack, -) +from app.models import (TV, Anime, Book, Episode, Game, Item, Manga, + MediaTypes, Movie, Season, Sources, Status) +from integrations.imports import (anilist, book_import, helpers, hltb, kitsu, + mal, simkl, yamtrack) from integrations.imports.trakt import TraktImporter, importer mock_path = Path(__file__).resolve().parent / "mock_data" @@ -765,3 +747,25 @@ def test_create_import_schedule_every_2_days(self): schedule = CrontabSchedule.objects.first() self.assertEqual(schedule.day_of_week, "*/2") +class ImportBooks(TestCase): + """Test importing book media from Book CSV.""" + + def setUp(self): + """Create user for the tests.""" + self.credentials = {"username": "test", "password": "12345"} + self.user = get_user_model().objects.create_user(**self.credentials) + with Path(mock_path / "import_books.csv").open("rb") as file: + self.import_results =book_import.importer(file, self.user, "new") + + def test_import_counts(self): + """Test basic counts of imported media.""" + self.assertEqual(Book.objects.filter(user=self.user).count(), 4) + + def test_historical_records(self): + """Test historical records creation during import.""" + book = Book.objects.filter(user=self.user).first() + self.assertEqual(book.history.count(), 1) + self.assertEqual( + book.history.first().history_date, + datetime(2024, 2, 9, 10, 0, 0, tzinfo=UTC), + ) From 66ff2dcaa10861da7300644917cddb700c0be5ce Mon Sep 17 00:00:00 2001 From: psyciknz Date: Thu, 19 Jun 2025 15:22:47 +1200 Subject: [PATCH 02/10] Latest changes --- src/integrations/imports/book_import.py | 31 +++++++++++++------ .../tests/mock_data/import_books.csv | 8 ++--- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/integrations/imports/book_import.py b/src/integrations/imports/book_import.py index 44bf2b61e..4dd36e8e3 100644 --- a/src/integrations/imports/book_import.py +++ b/src/integrations/imports/book_import.py @@ -1,3 +1,4 @@ +import datetime import logging from collections import defaultdict from csv import DictReader @@ -63,8 +64,9 @@ def import_data(self): msg = "Invalid file format. Please upload a CSV file." raise MediaImportError(msg) from e - fieldnames = ['isbn','providerid','provider','title','read_start','read_end','sourcee','media_id','progress','status'] + fieldnames = ['isbn','providerid','provider','title','read_start','read_end','source','media_id','progress','status'] reader = DictReader(decoded_file,fieldnames=fieldnames) + #reader = DictReader(decoded_file) for row in reader: try: @@ -87,6 +89,9 @@ def import_data(self): def _process_row(self, row): """Process a single row from the CSV file.""" media_type = MediaTypes.BOOK.value + + if "provider" in row and row["provider"] == 'provider': + return # Check if we should process this movie based on mode if not helpers.should_process_media( @@ -119,6 +124,7 @@ def _process_row(self, row): model = apps.get_model(app_label="app", model_name=media_type) instance = model(item=item) + if media_type != MediaTypes.EPISODE.value: # episode has no user field instance.user = self.user @@ -128,15 +134,22 @@ def _process_row(self, row): instance=instance, ) - if form.is_valid(): - progressed_at = row.get("progressed_at") - if progressed_at: - form.instance._history_date = parse_datetime(progressed_at) - self.bulk_media[media_type].append(form.instance) + progressed_at = row.get("read_end") + if progressed_at: + instance._history_date = parse_datetime(progressed_at) else: - error_msg = f"{row['title']} ({media_type}): {form.errors.as_json()}" - self.warnings.append(error_msg) - logger.error(error_msg) + instance._history_date = datetime.datetime.now() + + self.bulk_media[media_type].append(instance) + # if form.is_valid(): + # progressed_at = row.get("end_date") + # if progressed_at: + # form.instance._history_date = parse_datetime(progressed_at) + # self.bulk_media[media_type].append(form.instance) + # else: + # error_msg = f"{row['title']} ({media_type}): {form.errors.as_json()}" + # self.warnings.append(error_msg) + # logger.error(error_msg) def _handle_missing_metadata(self, row, media_type): """Handle missing metadata by fetching from provider - diff --git a/src/integrations/tests/mock_data/import_books.csv b/src/integrations/tests/mock_data/import_books.csv index a9f8ebc6d..67b136da4 100644 --- a/src/integrations/tests/mock_data/import_books.csv +++ b/src/integrations/tests/mock_data/import_books.csv @@ -1,6 +1,6 @@ isbn,providerid,provider,title,read_start,read_end 0141017899,,,The World According to Clarkson,,2010/09/09 -9780671027360,wiX1O1ZBdYoC,google_books,"Angels & Demons (Robert Langdon, #1),2005/04/01 -0312980388,,,Warlock,2005/07/22 -0312980388,,,Warlock,2009/08/12 -,OL20037497W,openlibrary,2025/04/27 +0671027360,,,Angels & Demons,,2005/04/01 +0312980388,,,Warlock,,2005/07/22 +0312980388,,,Warlock,,2009/08/12 +,OL20037497W,openlibrary,,,2025/04/27 From 445fbda88494953c37073ccc6b420d18b35a57c8 Mon Sep 17 00:00:00 2001 From: psyciknz Date: Thu, 19 Jun 2025 22:31:58 +1200 Subject: [PATCH 03/10] adding date parsing, fixed metadata. --- src/integrations/imports/book_import.py | 87 +++++++++++++------ .../tests/mock_data/import_books.csv | 4 +- src/integrations/tests/test_imports.py | 13 ++- 3 files changed, 72 insertions(+), 32 deletions(-) diff --git a/src/integrations/imports/book_import.py b/src/integrations/imports/book_import.py index 4dd36e8e3..8d99383a8 100644 --- a/src/integrations/imports/book_import.py +++ b/src/integrations/imports/book_import.py @@ -1,10 +1,11 @@ -import datetime import logging from collections import defaultdict from csv import DictReader +from datetime import datetime from django.apps import apps from django.conf import settings +from django.utils import timezone from django.utils.dateparse import parse_datetime import app @@ -112,34 +113,41 @@ def _process_row(self, row): item, _ = app.models.Item.objects.update_or_create( media_id=row["media_id"], - source=row["source"][0], + source=row["source"], media_type=media_type, - season_number=None, - episode_number=None, defaults={ "title": row["title"], "image": row["image"], }, ) - - model = apps.get_model(app_label="app", model_name=media_type) - instance = model(item=item) - if media_type != MediaTypes.EPISODE.value: # episode has no user field - instance.user = self.user - - row["item"] = item - form = app.forms.get_form_class(media_type)( - row, - instance=instance, + end_date_string = row.get("read_end") + if end_date_string: + end_date = self._parse_csv_date(end_date_string) + else: + end_date = datetime.now() + + model = apps.get_model(app_label="app", model_name=media_type) + instance = model( + item=item, + user=self.user, + status=Status.COMPLETED.value, + score = 0, + progress = 0, + end_date = end_date, + notes = row["isbn"] or 'unknown', + ) + instance._history_date = end_date + + + # row["item"] = item + # form = app.forms.get_form_class(media_type)( + # row, + # instance=instance, + # ) - progressed_at = row.get("read_end") - if progressed_at: - instance._history_date = parse_datetime(progressed_at) - else: - instance._history_date = datetime.datetime.now() - + self.bulk_media[media_type].append(instance) # if form.is_valid(): # progressed_at = row.get("end_date") @@ -164,8 +172,8 @@ def _handle_missing_metadata(self, row, media_type): ) row["title"] = metadata["title"] row["image"] = metadata["image"] - row["media_id"] = row["providerid"], - row["source"] = row["provider"], + row["media_id"] = row["providerid"] + row["source"] = row["provider"] else: metadata = services.search( media_type, @@ -173,15 +181,38 @@ def _handle_missing_metadata(self, row, media_type): 1, Sources.HARDCOVER.value, ) - row["title"] = metadata["results"][0]["title"], - row["source"] = Sources.HARDCOVER.value, - row["media_id"] = metadata["results"][0]["media_id"], - row["media_type"] = media_type, - row["image"] = metadata["results"][0]["image"], - row["status"] = Status.COMPLETED.value, + row["title"] = metadata["results"][0]["title"] + logger.info(f"Added title from harcover: {row["title"]}") + row["source"] = Sources.HARDCOVER.value + row["media_id"] = metadata["results"][0]["media_id"] + logger.info(f"Obtained media id harcover: {row["media_id"]}") + + row["media_type"] = media_type + row["image"] = metadata["results"][0]["image"] + row["status"] = Status.COMPLETED.value row["progress"] = '0' except services.ProviderAPIError as e: self.warnings.append( f"Failed to fetch metadata for {row['media_id']}: {e!s}", ) raise + + def _parse_csv_date(self, date_str): + """Parse MAL date string (YYYY-MM-YY or YYYY/MM/DD) into datetime object.""" + if date_str is None: + return None + result = datetime.now() + fmts = ["%Y-%m-%d","%Y/%m/%d"] + for fmt in fmts: + try: + result = datetime.strptime(date_str, fmt).replace( + hour=0, + minute=0, + second=0, + tzinfo=timezone.get_current_timezone(), + ) + return result + except: + pass + + return None \ No newline at end of file diff --git a/src/integrations/tests/mock_data/import_books.csv b/src/integrations/tests/mock_data/import_books.csv index 67b136da4..d091f5242 100644 --- a/src/integrations/tests/mock_data/import_books.csv +++ b/src/integrations/tests/mock_data/import_books.csv @@ -1,6 +1,6 @@ isbn,providerid,provider,title,read_start,read_end -0141017899,,,The World According to Clarkson,,2010/09/09 +0141017899,,,The World According to Clarkson,,2010-09-09 0671027360,,,Angels & Demons,,2005/04/01 -0312980388,,,Warlock,,2005/07/22 +0312980388,429650,hardcover,Warlock,,2005/07/22 0312980388,,,Warlock,,2009/08/12 ,OL20037497W,openlibrary,,,2025/04/27 diff --git a/src/integrations/tests/test_imports.py b/src/integrations/tests/test_imports.py index ca1a7605a..a9aea41f9 100644 --- a/src/integrations/tests/test_imports.py +++ b/src/integrations/tests/test_imports.py @@ -759,7 +759,7 @@ def setUp(self): def test_import_counts(self): """Test basic counts of imported media.""" - self.assertEqual(Book.objects.filter(user=self.user).count(), 4) + self.assertEqual(Book.objects.filter(user=self.user).count(), 5) def test_historical_records(self): """Test historical records creation during import.""" @@ -767,5 +767,14 @@ def test_historical_records(self): self.assertEqual(book.history.count(), 1) self.assertEqual( book.history.first().history_date, - datetime(2024, 2, 9, 10, 0, 0, tzinfo=UTC), + datetime(2005, 4, 1, 0, 0, 0, tzinfo=UTC), + ) + book = Book.objects.filter( + user=self.user, + item__title="Warlock", + ).first() + self.assertEqual(book.history.count(), 2) + self.assertEqual( + book.history.first().history_date, + datetime(2005, 7, 22, 0, 0, 0, tzinfo=UTC), ) From 32d379fc759bdbddc37aacfd33921ebc810927fb Mon Sep 17 00:00:00 2001 From: psyciknz Date: Thu, 19 Jun 2025 22:32:29 +1200 Subject: [PATCH 04/10] more fixes --- src/integrations/tests/test_imports.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/integrations/tests/test_imports.py b/src/integrations/tests/test_imports.py index a9aea41f9..2cf5ba969 100644 --- a/src/integrations/tests/test_imports.py +++ b/src/integrations/tests/test_imports.py @@ -773,7 +773,7 @@ def test_historical_records(self): user=self.user, item__title="Warlock", ).first() - self.assertEqual(book.history.count(), 2) + self.assertEqual(book.history.count(), 1) self.assertEqual( book.history.first().history_date, datetime(2005, 7, 22, 0, 0, 0, tzinfo=UTC), From cb9efbfcad109f51b403534061a09e6d1d33b23b Mon Sep 17 00:00:00 2001 From: psyciknz Date: Mon, 23 Jun 2025 17:08:19 +1200 Subject: [PATCH 05/10] Added book processing specifics to yamtrack importer --- src/integrations/imports/yamtrack.py | 39 ++++++++++++++++++- .../tests/mock_data/import_books_yamtrack.csv | 4 ++ src/integrations/tests/test_imports.py | 34 ++++++++++------ 3 files changed, 64 insertions(+), 13 deletions(-) create mode 100644 src/integrations/tests/mock_data/import_books_yamtrack.csv diff --git a/src/integrations/imports/yamtrack.py b/src/integrations/imports/yamtrack.py index a5efee0ad..2bb0df233 100644 --- a/src/integrations/imports/yamtrack.py +++ b/src/integrations/imports/yamtrack.py @@ -10,7 +10,8 @@ from app.models import MediaTypes, Sources from app.providers import services from integrations.imports import helpers -from integrations.imports.helpers import MediaImportError, MediaImportUnexpectedError +from integrations.imports.helpers import (MediaImportError, + MediaImportUnexpectedError) logger = logging.getLogger(__name__) @@ -156,6 +157,8 @@ def _handle_missing_metadata(self, row, media_type, season_number, episode_numbe """Handle missing metadata by fetching from provider.""" if row["source"] == Sources.MANUAL.value and row["image"] == "": row["image"] = settings.IMG_NONE + elif media_type == MediaTypes.BOOK: + self._handle_missing_book_metadata(row,media_type) else: try: metadata = services.get_media_metadata( @@ -172,3 +175,37 @@ def _handle_missing_metadata(self, row, media_type, season_number, episode_numbe f"Failed to fetch metadata for {row['media_id']}: {e!s}", ) raise + + def _handle_missing_book_metadata(self, row, media_type): + """Handle missing metadata by fetching from provider - + Format #isbn,providerid,provider,title,read_start,read_end """ + try: + searchquery = row["title"] + if row["source"] != "": + metadata = services.get_media_metadata( + media_type, + row["media_id"], + row["source"], + ) + row["title"] = metadata["title"] + row["image"] = metadata["image"] + else: + metadata = services.search( + media_type, + searchquery, + 1, + Sources.HARDCOVER.value, + ) + row["title"] = metadata["results"][0]["title"] + logger.info(f"Added title from harcover: {row["title"]}") + row["source"] = Sources.HARDCOVER.value + row["media_id"] = metadata["results"][0]["media_id"] + logger.info(f"Obtained media id harcover: {row["media_id"]}") + + row["media_type"] = media_type + row["image"] = metadata["results"][0]["image"] + except services.ProviderAPIError as e: + self.warnings.append( + f"Failed to fetch metadata for {row['media_id']}: {e!s}", + ) + raise \ No newline at end of file diff --git a/src/integrations/tests/mock_data/import_books_yamtrack.csv b/src/integrations/tests/mock_data/import_books_yamtrack.csv new file mode 100644 index 000000000..57e6a13ff --- /dev/null +++ b/src/integrations/tests/mock_data/import_books_yamtrack.csv @@ -0,0 +1,4 @@ +"media_id","source","media_type","title","image","season_number","episode_number","score","progress","status","start_date","end_date","notes","progressed_at" +"","","book","Warlock","","","","","","Completed","2024-02-09","2024-03-09","Title Only","" +"429650","hardcover","book","Warlock","","","","","","In progress","","2024-04-09","Media ID","" +"","","book","0312980388","","","","","","Completed","2024-01-09","2024-05-09","ISBN 10 in title","" diff --git a/src/integrations/tests/test_imports.py b/src/integrations/tests/test_imports.py index 2cf5ba969..35ec95037 100644 --- a/src/integrations/tests/test_imports.py +++ b/src/integrations/tests/test_imports.py @@ -754,27 +754,37 @@ def setUp(self): """Create user for the tests.""" self.credentials = {"username": "test", "password": "12345"} self.user = get_user_model().objects.create_user(**self.credentials) - with Path(mock_path / "import_books.csv").open("rb") as file: - self.import_results =book_import.importer(file, self.user, "new") + with Path(mock_path / "import_books_yamtrack.csv").open("rb") as file: + self.import_results =yamtrack.importer(file, self.user, "new") def test_import_counts(self): """Test basic counts of imported media.""" - self.assertEqual(Book.objects.filter(user=self.user).count(), 5) + self.assertEqual(Book.objects.filter(user=self.user).count(), 3) def test_historical_records(self): """Test historical records creation during import.""" book = Book.objects.filter(user=self.user).first() self.assertEqual(book.history.count(), 1) - self.assertEqual( - book.history.first().history_date, - datetime(2005, 4, 1, 0, 0, 0, tzinfo=UTC), - ) - book = Book.objects.filter( + #self.assertEqual( + # book.history.first().history_date, + # datetime(2005, 4, 1, 0, 0, 0, tzinfo=UTC), + #) + bookqs = Book.objects.filter( user=self.user, item__title="Warlock", - ).first() - self.assertEqual(book.history.count(), 1) + ) + books = list(bookqs) + + self.assertEqual(len(books),3) + self.assertEqual( + books[0].end_date, + datetime(2024, 4, 9, 0, 0, 0, tzinfo=UTC), + ) + self.assertEqual( + books[1].end_date, + datetime(2024, 5, 9, 0, 0, 0, tzinfo=UTC), + ) self.assertEqual( - book.history.first().history_date, - datetime(2005, 7, 22, 0, 0, 0, tzinfo=UTC), + books[2].end_date, + datetime(2024, 3, 9, 0, 0, 0, tzinfo=UTC), ) From fe62472b370cd58258b0bfc547413b6e56b5fef4 Mon Sep 17 00:00:00 2001 From: psyciknz Date: Mon, 23 Jun 2025 18:59:06 +1200 Subject: [PATCH 06/10] Removed book import after code migrated to yamtrack.py --- src/integrations/imports/book_import.py | 218 ------------------------ 1 file changed, 218 deletions(-) delete mode 100644 src/integrations/imports/book_import.py diff --git a/src/integrations/imports/book_import.py b/src/integrations/imports/book_import.py deleted file mode 100644 index 8d99383a8..000000000 --- a/src/integrations/imports/book_import.py +++ /dev/null @@ -1,218 +0,0 @@ -import logging -from collections import defaultdict -from csv import DictReader -from datetime import datetime - -from django.apps import apps -from django.conf import settings -from django.utils import timezone -from django.utils.dateparse import parse_datetime - -import app -from app.models import MediaTypes, Sources, Status -from app.providers import services -from integrations.imports import helpers -from integrations.imports.helpers import (MediaImportError, - MediaImportUnexpectedError) - -logger = logging.getLogger(__name__) - - -def importer(file, user, mode): - """Import media from CSV file using the class-based importer.""" - csv_importer = BookImporter(file, user, mode) - return csv_importer.import_data() - - -class BookImporter: - """Class to handle importing user data from CSV files.""" - - def __init__(self, file, user, mode): - """Initialize the importer with file, user, and mode. - - Args: - file: Uploaded CSV file object - user: Django user object to import data for - mode (str): Import mode ("new" or "overwrite") - """ - self.file = file - self.user = user - self.mode = mode - self.warnings = [] - - # Track existing media for "new" mode - self.existing_media = helpers.get_existing_media(user) - - # Track media IDs to delete in overwrite mode - self.to_delete = defaultdict(lambda: defaultdict(set)) - - # Track bulk creation lists for each media type - self.bulk_media = defaultdict(list) - - logger.info( - "Initialized Book CSV importer for user %s with mode %s", - user.username, - mode, - ) - - def import_data(self): - """Import all user data from the CSV file.""" - try: - decoded_file = self.file.read().decode("utf-8").splitlines() - #format - #isbn,providerid,provider,title,read_start,read_end - except UnicodeDecodeError as e: - msg = "Invalid file format. Please upload a CSV file." - raise MediaImportError(msg) from e - - fieldnames = ['isbn','providerid','provider','title','read_start','read_end','source','media_id','progress','status'] - reader = DictReader(decoded_file,fieldnames=fieldnames) - #reader = DictReader(decoded_file) - - for row in reader: - try: - self._process_row(row) - except Exception as error: - error_msg = f"Error processing entry: {row}" - raise MediaImportUnexpectedError(error_msg) from error - - helpers.cleanup_existing_media(self.to_delete, self.user) - helpers.bulk_create_media(self.bulk_media, self.user) - - imported_counts = { - media_type: len(media_list) - for media_type, media_list in self.bulk_media.items() - } - - deduplicated_messages = "\n".join(dict.fromkeys(self.warnings)) - return imported_counts, deduplicated_messages - - def _process_row(self, row): - """Process a single row from the CSV file.""" - media_type = MediaTypes.BOOK.value - - if "provider" in row and row["provider"] == 'provider': - return - - # Check if we should process this movie based on mode - if not helpers.should_process_media( - self.existing_media, - self.to_delete, - media_type, - row["provider"], - row["providerid"], - self.mode, - ): - return - - if row["title"] == "" or row["provider"]or row["providerid"] == "": - self._handle_missing_metadata( - row, - media_type - ) - - item, _ = app.models.Item.objects.update_or_create( - media_id=row["media_id"], - source=row["source"], - media_type=media_type, - defaults={ - "title": row["title"], - "image": row["image"], - }, - ) - - end_date_string = row.get("read_end") - if end_date_string: - end_date = self._parse_csv_date(end_date_string) - else: - end_date = datetime.now() - - model = apps.get_model(app_label="app", model_name=media_type) - instance = model( - item=item, - user=self.user, - status=Status.COMPLETED.value, - score = 0, - progress = 0, - end_date = end_date, - notes = row["isbn"] or 'unknown', - - ) - instance._history_date = end_date - - - # row["item"] = item - # form = app.forms.get_form_class(media_type)( - # row, - # instance=instance, - # ) - - - self.bulk_media[media_type].append(instance) - # if form.is_valid(): - # progressed_at = row.get("end_date") - # if progressed_at: - # form.instance._history_date = parse_datetime(progressed_at) - # self.bulk_media[media_type].append(form.instance) - # else: - # error_msg = f"{row['title']} ({media_type}): {form.errors.as_json()}" - # self.warnings.append(error_msg) - # logger.error(error_msg) - - def _handle_missing_metadata(self, row, media_type): - """Handle missing metadata by fetching from provider - - Format #isbn,providerid,provider,title,read_start,read_end """ - try: - searchquery = row["isbn"] or row["title"] - if row["provider"] != "": - metadata = services.get_media_metadata( - media_type, - row["providerid"], - row["provider"], - ) - row["title"] = metadata["title"] - row["image"] = metadata["image"] - row["media_id"] = row["providerid"] - row["source"] = row["provider"] - else: - metadata = services.search( - media_type, - searchquery, - 1, - Sources.HARDCOVER.value, - ) - row["title"] = metadata["results"][0]["title"] - logger.info(f"Added title from harcover: {row["title"]}") - row["source"] = Sources.HARDCOVER.value - row["media_id"] = metadata["results"][0]["media_id"] - logger.info(f"Obtained media id harcover: {row["media_id"]}") - - row["media_type"] = media_type - row["image"] = metadata["results"][0]["image"] - row["status"] = Status.COMPLETED.value - row["progress"] = '0' - except services.ProviderAPIError as e: - self.warnings.append( - f"Failed to fetch metadata for {row['media_id']}: {e!s}", - ) - raise - - def _parse_csv_date(self, date_str): - """Parse MAL date string (YYYY-MM-YY or YYYY/MM/DD) into datetime object.""" - if date_str is None: - return None - result = datetime.now() - fmts = ["%Y-%m-%d","%Y/%m/%d"] - for fmt in fmts: - try: - result = datetime.strptime(date_str, fmt).replace( - hour=0, - minute=0, - second=0, - tzinfo=timezone.get_current_timezone(), - ) - return result - except: - pass - - return None \ No newline at end of file From d404886db87eb0c9603084813b7eac4d60d83827 Mon Sep 17 00:00:00 2001 From: psyciknz Date: Wed, 25 Jun 2025 09:23:57 +1200 Subject: [PATCH 07/10] Removed mention of book_import function --- src/integrations/imports/yamtrack.py | 4 ++-- src/integrations/tests/test_imports.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/integrations/imports/yamtrack.py b/src/integrations/imports/yamtrack.py index 2bb0df233..7bd88cbbf 100644 --- a/src/integrations/imports/yamtrack.py +++ b/src/integrations/imports/yamtrack.py @@ -197,10 +197,10 @@ def _handle_missing_book_metadata(self, row, media_type): Sources.HARDCOVER.value, ) row["title"] = metadata["results"][0]["title"] - logger.info(f"Added title from harcover: {row["title"]}") + logger.info(f'Added title from harcover: {row["title"]}') row["source"] = Sources.HARDCOVER.value row["media_id"] = metadata["results"][0]["media_id"] - logger.info(f"Obtained media id harcover: {row["media_id"]}") + logger.info(f'Obtained media id harcover: {row["media_id"]}') row["media_type"] = media_type row["image"] = metadata["results"][0]["image"] diff --git a/src/integrations/tests/test_imports.py b/src/integrations/tests/test_imports.py index 35ec95037..16624afb4 100644 --- a/src/integrations/tests/test_imports.py +++ b/src/integrations/tests/test_imports.py @@ -10,8 +10,8 @@ from app.models import (TV, Anime, Book, Episode, Game, Item, Manga, MediaTypes, Movie, Season, Sources, Status) -from integrations.imports import (anilist, book_import, helpers, hltb, kitsu, - mal, simkl, yamtrack) +from integrations.imports import (anilist, helpers, hltb, kitsu, mal, simkl, + yamtrack) from integrations.imports.trakt import TraktImporter, importer mock_path = Path(__file__).resolve().parent / "mock_data" @@ -754,7 +754,7 @@ def setUp(self): """Create user for the tests.""" self.credentials = {"username": "test", "password": "12345"} self.user = get_user_model().objects.create_user(**self.credentials) - with Path(mock_path / "import_books_yamtrack.csv").open("rb") as file: + with Path(mock_path / "import_books_yamtrack2.csv").open("rb") as file: self.import_results =yamtrack.importer(file, self.user, "new") def test_import_counts(self): From d6e8eacd77f348187b94f0be94d6698a9de78746 Mon Sep 17 00:00:00 2001 From: psyciknz Date: Sun, 6 Jul 2025 20:31:49 +1200 Subject: [PATCH 08/10] Removed extra book csv --- src/integrations/imports/yamtrack.py | 4 ++-- src/integrations/tests/mock_data/import_books.csv | 6 ------ 2 files changed, 2 insertions(+), 8 deletions(-) delete mode 100644 src/integrations/tests/mock_data/import_books.csv diff --git a/src/integrations/imports/yamtrack.py b/src/integrations/imports/yamtrack.py index 2bb0df233..b7dffad09 100644 --- a/src/integrations/imports/yamtrack.py +++ b/src/integrations/imports/yamtrack.py @@ -197,10 +197,10 @@ def _handle_missing_book_metadata(self, row, media_type): Sources.HARDCOVER.value, ) row["title"] = metadata["results"][0]["title"] - logger.info(f"Added title from harcover: {row["title"]}") + logger.info(f"Added title from harcover: {row['title']}") row["source"] = Sources.HARDCOVER.value row["media_id"] = metadata["results"][0]["media_id"] - logger.info(f"Obtained media id harcover: {row["media_id"]}") + logger.info(f"Obtained media id harcover: {row['media_id']}") row["media_type"] = media_type row["image"] = metadata["results"][0]["image"] diff --git a/src/integrations/tests/mock_data/import_books.csv b/src/integrations/tests/mock_data/import_books.csv deleted file mode 100644 index d091f5242..000000000 --- a/src/integrations/tests/mock_data/import_books.csv +++ /dev/null @@ -1,6 +0,0 @@ -isbn,providerid,provider,title,read_start,read_end -0141017899,,,The World According to Clarkson,,2010-09-09 -0671027360,,,Angels & Demons,,2005/04/01 -0312980388,429650,hardcover,Warlock,,2005/07/22 -0312980388,,,Warlock,,2009/08/12 -,OL20037497W,openlibrary,,,2025/04/27 From b594d3e003d1aace31ef1045339c06fd6cefc8c2 Mon Sep 17 00:00:00 2001 From: psyciknz Date: Mon, 7 Jul 2025 19:56:31 +1200 Subject: [PATCH 09/10] Used generic handle_missing_metadata --- src/integrations/imports/yamtrack.py | 38 ++++++++++++++++++-------- src/integrations/tests/test_imports.py | 2 +- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/integrations/imports/yamtrack.py b/src/integrations/imports/yamtrack.py index 91ebc34d2..0c388d53d 100644 --- a/src/integrations/imports/yamtrack.py +++ b/src/integrations/imports/yamtrack.py @@ -8,6 +8,7 @@ import app from app.models import MediaTypes, Sources +from app import media_type_config from app.providers import services from app.templatetags import app_tags from integrations.imports import helpers @@ -165,19 +166,34 @@ def _handle_missing_metadata(self, row, media_type, season_number, episode_numbe """Handle missing metadata by fetching from provider.""" if row["source"] == Sources.MANUAL.value and row["image"] == "": row["image"] = settings.IMG_NONE - elif media_type == MediaTypes.BOOK: - self._handle_missing_book_metadata(row,media_type) else: try: - metadata = services.get_media_metadata( - media_type, - row["media_id"], - row["source"], - season_number, - episode_number, - ) - row["title"] = metadata["title"] - row["image"] = metadata["image"] + if row["media_id"] is not None and row["media_id"] != "": + metadata = services.get_media_metadata( + media_type, + row["media_id"], + row["source"], + season_number, + episode_number, + ) + row["title"] = metadata["title"] + row["image"] = metadata["image"] + else: + searchquery = row["title"] + metadata = services.search( + media_type, + searchquery, + 1, + media_type_config.get_default_source_name(media_type) + ) + row["title"] = metadata["results"][0]["title"] + logger.info(f"Added title from harcover: {row['title']}") + row["source"] = metadata["results"][0]["source"] + row["media_id"] = metadata["results"][0]["media_id"] + logger.info(f"Obtained media id harcover: {row['media_id']}") + + row["media_type"] = media_type + row["image"] = metadata["results"][0]["image"] except services.ProviderAPIError as e: self.warnings.append( f"Failed to fetch metadata for {row['media_id']}: {e!s}", diff --git a/src/integrations/tests/test_imports.py b/src/integrations/tests/test_imports.py index ff195d844..f2a081185 100644 --- a/src/integrations/tests/test_imports.py +++ b/src/integrations/tests/test_imports.py @@ -808,7 +808,7 @@ def setUp(self): """Create user for the tests.""" self.credentials = {"username": "test", "password": "12345"} self.user = get_user_model().objects.create_user(**self.credentials) - with Path(mock_path / "import_books_yamtrack2.csv").open("rb") as file: + with Path(mock_path / "import_books_yamtrack.csv").open("rb") as file: self.import_results =yamtrack.importer(file, self.user, "new") def test_import_counts(self): From 75c8197b7bf67ac5840209e884afd73c24c1b490 Mon Sep 17 00:00:00 2001 From: psyciknz Date: Wed, 16 Jul 2025 20:09:56 +1200 Subject: [PATCH 10/10] Renamed "Book" test as yamtrack_partial Added yamtrack partial csv file --- src/integrations/imports/yamtrack.py | 4 ++-- ...s_yamtrack.csv => import_yamtrack_partials.csv} | 1 + src/integrations/tests/test_imports.py | 14 ++++++++------ 3 files changed, 11 insertions(+), 8 deletions(-) rename src/integrations/tests/mock_data/{import_books_yamtrack.csv => import_yamtrack_partials.csv} (81%) diff --git a/src/integrations/imports/yamtrack.py b/src/integrations/imports/yamtrack.py index 0c388d53d..42f44bdd1 100644 --- a/src/integrations/imports/yamtrack.py +++ b/src/integrations/imports/yamtrack.py @@ -187,10 +187,10 @@ def _handle_missing_metadata(self, row, media_type, season_number, episode_numbe media_type_config.get_default_source_name(media_type) ) row["title"] = metadata["results"][0]["title"] - logger.info(f"Added title from harcover: {row['title']}") + logger.info(f"Added title from {media_type_config.get_default_source_name(media_type)}: {row['title']}") row["source"] = metadata["results"][0]["source"] row["media_id"] = metadata["results"][0]["media_id"] - logger.info(f"Obtained media id harcover: {row['media_id']}") + logger.info(f"Obtained media id : {row['media_id']}") row["media_type"] = media_type row["image"] = metadata["results"][0]["image"] diff --git a/src/integrations/tests/mock_data/import_books_yamtrack.csv b/src/integrations/tests/mock_data/import_yamtrack_partials.csv similarity index 81% rename from src/integrations/tests/mock_data/import_books_yamtrack.csv rename to src/integrations/tests/mock_data/import_yamtrack_partials.csv index 57e6a13ff..bbb94db3b 100644 --- a/src/integrations/tests/mock_data/import_books_yamtrack.csv +++ b/src/integrations/tests/mock_data/import_yamtrack_partials.csv @@ -2,3 +2,4 @@ "","","book","Warlock","","","","","","Completed","2024-02-09","2024-03-09","Title Only","" "429650","hardcover","book","Warlock","","","","","","In progress","","2024-04-09","Media ID","" "","","book","0312980388","","","","","","Completed","2024-01-09","2024-05-09","ISBN 10 in title","" +"","","movie","Perfect Blue","","","","9.0","1","Completed","","2024-02-09","","2024-02-09T15:30:00Z" diff --git a/src/integrations/tests/test_imports.py b/src/integrations/tests/test_imports.py index f2a081185..1a3cf5c44 100644 --- a/src/integrations/tests/test_imports.py +++ b/src/integrations/tests/test_imports.py @@ -801,19 +801,21 @@ def test_create_import_schedule_every_2_days(self): schedule = CrontabSchedule.objects.first() self.assertEqual(schedule.day_of_week, "*/2") -class ImportBooks(TestCase): - """Test importing book media from Book CSV.""" +class ImportYamtrackPartials(TestCase): + """Test importing yamtrack media with no ID.""" def setUp(self): """Create user for the tests.""" self.credentials = {"username": "test", "password": "12345"} self.user = get_user_model().objects.create_user(**self.credentials) - with Path(mock_path / "import_books_yamtrack.csv").open("rb") as file: + with Path(mock_path / "import_yamtrack_partials.csv").open("rb") as file: self.import_results =yamtrack.importer(file, self.user, "new") def test_import_counts(self): """Test basic counts of imported media.""" self.assertEqual(Book.objects.filter(user=self.user).count(), 3) + self.assertEqual(Movie.objects.filter(user=self.user).count(), 1) + def test_historical_records(self): """Test historical records creation during import.""" @@ -826,17 +828,17 @@ def test_historical_records(self): bookqs = Book.objects.filter( user=self.user, item__title="Warlock", - ) + ).order_by("-end_date") books = list(bookqs) self.assertEqual(len(books),3) self.assertEqual( books[0].end_date, - datetime(2024, 4, 9, 0, 0, 0, tzinfo=UTC), + datetime(2024, 5, 9, 0, 0, 0, tzinfo=UTC), ) self.assertEqual( books[1].end_date, - datetime(2024, 5, 9, 0, 0, 0, tzinfo=UTC), + datetime(2024, 4, 9, 0, 0, 0, tzinfo=UTC), ) self.assertEqual( books[2].end_date,