diff --git a/src/co_op_translator/config/constants.py b/src/co_op_translator/config/constants.py index 9a392a2a..a8e7256f 100644 --- a/src/co_op_translator/config/constants.py +++ b/src/co_op_translator/config/constants.py @@ -3,6 +3,9 @@ RGB_IMAGE_EXTENSIONS = {".jpg", ".jpeg"} SUPPORTED_IMAGE_EXTENSIONS = RGBA_IMAGE_EXTENSIONS.union(RGB_IMAGE_EXTENSIONS) +# Supported notebook file extensions +SUPPORTED_NOTEBOOK_EXTENSIONS = {".ipynb"} + EXCLUDED_DIRS = { "translations", "translated_images", diff --git a/src/co_op_translator/core/llm/__init__.py b/src/co_op_translator/core/llm/__init__.py index 68d59bf0..621308c4 100644 --- a/src/co_op_translator/core/llm/__init__.py +++ b/src/co_op_translator/core/llm/__init__.py @@ -6,10 +6,14 @@ OpenAITextTranslator, OpenAIMarkdownTranslator, ) +from co_op_translator.core.llm.jupyter_notebook_translator import ( + JupyterNotebookTranslator, +) __all__ = [ "AzureTextTranslator", "AzureMarkdownTranslator", "OpenAITextTranslator", "OpenAIMarkdownTranslator", + "JupyterNotebookTranslator", ] diff --git a/src/co_op_translator/core/llm/jupyter_notebook_translator.py b/src/co_op_translator/core/llm/jupyter_notebook_translator.py new file mode 100644 index 00000000..4a9d126e --- /dev/null +++ b/src/co_op_translator/core/llm/jupyter_notebook_translator.py @@ -0,0 +1,135 @@ +""" +Jupyter Notebook translator for translating .ipynb files. + +This module provides functionality to translate Jupyter Notebook files by +extracting markdown cells, translating them, and preserving code cells unchanged. +""" + +import json +import logging +from pathlib import Path +from typing import Dict, Any, List + +from .markdown_translator import MarkdownTranslator + +logger = logging.getLogger(__name__) + + +class JupyterNotebookTranslator: + """Handles translation of Jupyter Notebook (.ipynb) files. + + Translates markdown cells while preserving code cells, metadata, + and the overall notebook structure. + """ + + def __init__(self, root_dir: Path = None): + """Initialize the notebook translator. + + Args: + root_dir: Root directory of the project for path calculations + """ + self.root_dir = root_dir + self.markdown_translator = MarkdownTranslator.create(root_dir) + + async def translate_notebook( + self, + notebook_path: str | Path, + language_code: str, + markdown_only: bool = False, + ) -> str: + """Translate a Jupyter Notebook file to the target language. + + Extracts markdown cells from the notebook, translates them using + the existing markdown translator, and reconstructs the notebook + with translated content. + + Args: + notebook_path: Path to the .ipynb file + language_code: Target language code + markdown_only: Skip embedded image translation if True + + Returns: + str: The translated notebook content as JSON string + """ + notebook_path = Path(notebook_path) + + # Read the notebook file + with open(notebook_path, 'r', encoding='utf-8') as f: + notebook = json.load(f) + + # Track which cells were translated for logging + translated_cells = 0 + total_markdown_cells = 0 + + # Process each cell + for cell in notebook.get('cells', []): + if cell.get('cell_type') == 'markdown': + total_markdown_cells += 1 + + # Extract the source content + source = cell.get('source', []) + if not source: + continue + + # Convert source to string (it might be a list of lines) + if isinstance(source, list): + markdown_content = ''.join(source) + else: + markdown_content = str(source) + + # Skip empty cells + if not markdown_content.strip(): + continue + + try: + # Translate the markdown content + # Don't add metadata and disclaimer to individual cells + translated_content = await self.markdown_translator.translate_markdown( + markdown_content, + language_code, + notebook_path, + markdown_only=markdown_only, + add_metadata=False, + add_disclaimer=False, + ) + + # Convert back to the original format (list or string) + if isinstance(source, list): + # Split by lines but preserve the original line ending behavior + translated_lines = translated_content.splitlines(keepends=True) + # Ensure lines end with \n if they don't already + translated_lines = [line if line.endswith('\n') else line + '\n' + for line in translated_lines] + cell['source'] = translated_lines + else: + cell['source'] = translated_content + + translated_cells += 1 + + except Exception as e: + logger.warning( + f"Failed to translate cell in {notebook_path}: {e}. " + f"Keeping original content." + ) + + logger.info( + f"Translated {translated_cells}/{total_markdown_cells} markdown cells " + f"in {notebook_path.name}" + ) + + # Return the modified notebook as JSON string + return json.dumps(notebook, ensure_ascii=False, indent=1) + + @classmethod + def create(cls, root_dir: Path = None) -> "JupyterNotebookTranslator": + """Create a Jupyter Notebook translator instance. + + Factory method for creating the translator. + + Args: + root_dir: Root directory of the project for path calculations + + Returns: + JupyterNotebookTranslator instance + """ + return cls(root_dir) \ No newline at end of file diff --git a/src/co_op_translator/core/llm/markdown_translator.py b/src/co_op_translator/core/llm/markdown_translator.py index 8c637a5f..8b66df27 100644 --- a/src/co_op_translator/core/llm/markdown_translator.py +++ b/src/co_op_translator/core/llm/markdown_translator.py @@ -90,6 +90,8 @@ async def translate_markdown( language_code: str, md_file_path: str | Path, markdown_only: bool = False, + add_metadata: bool = True, + add_disclaimer: bool = True, ) -> str: """Translate markdown document to target language. @@ -101,15 +103,19 @@ async def translate_markdown( language_code: Target language code md_file_path: Path to the markdown file markdown_only: Skip embedded image translation if True + add_metadata: Whether to add metadata comment at the beginning + add_disclaimer: Whether to add disclaimer at the end Returns: - str: The translated content with metadata, updated links and a disclaimer. + str: The translated content with optional metadata and disclaimer. """ md_file_path = Path(md_file_path) - # Create and format metadata - metadata = self.create_metadata(md_file_path, language_code) - metadata_comment = self.format_metadata_comment(metadata) + # Create and format metadata (only if requested) + metadata_comment = "" + if add_metadata: + metadata = self.create_metadata(md_file_path, language_code) + metadata_comment = self.format_metadata_comment(metadata) # Step 1: Replace code blocks and inline code with placeholders ( @@ -147,7 +153,7 @@ async def translate_markdown( translated_content, placeholder_map ) - # Step 5: Update links and add disclaimer + # Step 5: Update links updated_content = update_links( md_file_path, translated_content, @@ -155,10 +161,16 @@ async def translate_markdown( self.root_dir, markdown_only=markdown_only, ) - disclaimer = await self.generate_disclaimer(language_code) - updated_content = metadata_comment + updated_content + "\n\n" + disclaimer - - return updated_content + + # Step 6: Add metadata and disclaimer (only if requested) + result = updated_content + if add_metadata: + result = metadata_comment + result + if add_disclaimer: + disclaimer = await self.generate_disclaimer(language_code) + result = result + "\n\n" + disclaimer + + return result async def _run_prompts_sequentially(self, prompts): """Execute translation prompts in sequence with timeout protection. diff --git a/src/co_op_translator/core/project/project_translator.py b/src/co_op_translator/core/project/project_translator.py index f9f8913b..1b35eb85 100644 --- a/src/co_op_translator/core/project/project_translator.py +++ b/src/co_op_translator/core/project/project_translator.py @@ -4,6 +4,7 @@ from co_op_translator.core.llm import ( markdown_translator, text_translator, + JupyterNotebookTranslator, ) from co_op_translator.core.vision import ( image_translator, @@ -11,6 +12,7 @@ from co_op_translator.config.constants import ( EXCLUDED_DIRS, SUPPORTED_IMAGE_EXTENSIONS, + SUPPORTED_NOTEBOOK_EXTENSIONS, ) from .directory_manager import DirectoryManager @@ -67,6 +69,9 @@ def __init__(self, language_codes, root_dir=".", markdown_only=False): self.root_dir ) + # Initialize notebook translator + self.notebook_translator = JupyterNotebookTranslator.create(self.root_dir) + # Initialize directory and translation managers self.directory_manager = DirectoryManager( self.root_dir, self.translations_dir, self.language_codes, EXCLUDED_DIRS @@ -78,8 +83,10 @@ def __init__(self, language_codes, root_dir=".", markdown_only=False): self.language_codes, EXCLUDED_DIRS, SUPPORTED_IMAGE_EXTENSIONS, + SUPPORTED_NOTEBOOK_EXTENSIONS, self.markdown_translator, self.image_translator, + self.notebook_translator, self.markdown_only, ) @@ -105,10 +112,11 @@ def translate_project( async def check_and_retry_translations(self): """Check for outdated translations and translate missing content. - Performs a three-step process: + Performs a four-step process: 1. Identifies and updates outdated translations 2. Translates all markdown files that need translation - 3. Translates all image files that need translation (when available) + 3. Translates all notebook files that need translation + 4. Translates all image files that need translation (when available) Returns: Tuple containing (total_translated_count, combined_errors_list) @@ -128,6 +136,15 @@ async def check_and_retry_translations(self): if markdown_errors: logger.warning(f"Errors during markdown translation: {markdown_errors}") + # Translate all notebook files + ( + notebook_count, + notebook_errors, + ) = await self.translation_manager.translate_all_notebook_files() + logger.info(f"Translated {notebook_count} notebook files") + if notebook_errors: + logger.warning(f"Errors during notebook translation: {notebook_errors}") + # Translate images if image translator is available ( image_count, @@ -138,6 +155,6 @@ async def check_and_retry_translations(self): logger.warning(f"Errors during image translation: {image_errors}") return ( - modified_count + markdown_count + image_count, - errors + markdown_errors + image_errors, + modified_count + markdown_count + notebook_count + image_count, + errors + markdown_errors + notebook_errors + image_errors, ) diff --git a/src/co_op_translator/core/project/translation_manager.py b/src/co_op_translator/core/project/translation_manager.py index 5d061fc9..4beba6c9 100644 --- a/src/co_op_translator/core/project/translation_manager.py +++ b/src/co_op_translator/core/project/translation_manager.py @@ -18,6 +18,7 @@ ) from co_op_translator.utils.common.metadata_utils import calculate_file_hash from co_op_translator.core.llm.markdown_translator import MarkdownTranslator +from co_op_translator.core.llm.jupyter_notebook_translator import JupyterNotebookTranslator from co_op_translator.core.project.directory_manager import DirectoryManager from co_op_translator.config.constants import SUPPORTED_IMAGE_EXTENSIONS from co_op_translator.utils.common.task_utils import worker @@ -41,8 +42,10 @@ def __init__( language_codes: list[str], excluded_dirs: list[str], supported_image_extensions: list[str], + supported_notebook_extensions: list[str], markdown_translator: MarkdownTranslator, image_translator=None, + notebook_translator=None, markdown_only: bool = False, ): """Initialize translation manager with required components and settings. @@ -56,8 +59,10 @@ def __init__( language_codes: List of target language codes excluded_dirs: List of directories to exclude supported_image_extensions: List of supported image extensions + supported_notebook_extensions: List of supported notebook extensions markdown_translator: Translator instance for markdown files image_translator: Translator instance for image files + notebook_translator: Translator instance for notebook files markdown_only: Whether to only translate markdown files """ self.root_dir = root_dir @@ -66,8 +71,10 @@ def __init__( self.language_codes = language_codes self.excluded_dirs = excluded_dirs self.supported_image_extensions = supported_image_extensions + self.supported_notebook_extensions = supported_notebook_extensions self.markdown_translator = markdown_translator self.image_translator = image_translator + self.notebook_translator = notebook_translator self.markdown_only = markdown_only self.directory_manager = DirectoryManager( root_dir, translations_dir, language_codes, excluded_dirs @@ -183,6 +190,56 @@ async def translate_markdown(self, file_path: Path, language_code: str) -> str: logger.error(f"Failed to translate {file_path}: {e}") return "" + async def translate_notebook(self, file_path: Path, language_code: str) -> str: + """Translate a Jupyter notebook file to the specified language. + + Handles empty documents and translation failures. + + Args: + file_path: Path to the notebook file + language_code: Target language code + + Returns: + Path to translated notebook file if successful, otherwise empty string + """ + file_path = Path(file_path).resolve() + try: + document = read_input_file(file_path) + if not document: + relative_path = file_path.relative_to(self.root_dir) + output_file = self.translations_dir / language_code / relative_path + handle_empty_document(file_path, output_file) + return str(output_file) + + # Perform translation + translated_content = await self.notebook_translator.translate_notebook( + file_path, language_code, markdown_only=self.markdown_only + ) + if not translated_content: + logger.error( + f"Translation failed for {file_path}: Empty translation result" + ) + return "" + + relative_path = file_path.relative_to(self.root_dir) + translated_path = self.translations_dir / language_code / relative_path + translated_path.parent.mkdir(parents=True, exist_ok=True) + + try: + with open(translated_path, "w", encoding="utf-8") as f: + f.write(translated_content) + logger.info( + f"Translated {file_path} to {language_code} and saved to {translated_path}" + ) + return str(translated_path) + except Exception as e: + logger.error(f"Failed to write translation to {translated_path}: {e}") + return "" + + except Exception as e: + logger.error(f"Failed to translate {file_path}: {e}") + return "" + async def translate_all_markdown_files( self, update: bool = False ) -> tuple[int, list[str]]: @@ -212,6 +269,7 @@ async def translate_all_markdown_files( # Discover markdown files requiring translation markdown_files = filter_files(self.root_dir, self.excluded_dirs) tasks = [] + task_info = [] # Store (file_path, language_code) for error reporting for md_file_path in markdown_files: md_file_path = md_file_path.resolve() @@ -238,6 +296,7 @@ async def translate_all_markdown_files( md_file_path, language_code ) ) + task_info.append((str(md_file_path), language_code)) if tasks: # Check if there are tasks to process # Process translations sequentially to avoid rate limiting @@ -248,8 +307,8 @@ async def translate_all_markdown_files( 1 for r in results if r ) # Count successful translations errors = [ - f"Failed to translate {task.__name__}" - for task, result in zip(tasks, results) + f"Failed to translate markdown file: {file_path} (lang: {lang_code})" + for (file_path, lang_code), result in zip(task_info, results) if not result ] else: @@ -257,6 +316,89 @@ async def translate_all_markdown_files( return modified_count, errors + async def translate_all_notebook_files( + self, update: bool = False + ) -> tuple[int, list[str]]: + """Process and translate all Jupyter notebook files in the project directory. + + Optionally updates existing translations if requested. + + Args: + update: Whether to update existing translations + + Returns: + Tuple containing (number_of_modified_files, error_messages_list) + """ + modified_count = 0 + errors = [] + + if not self.notebook_translator: + logger.info("Notebook translator not available, skipping notebook files") + return modified_count, errors + + # Delete existing translations when update mode is enabled + if update: + for language_code in self.language_codes: + # Find and delete translated notebook files + translation_dir = self.translations_dir / language_code + if translation_dir.exists(): + for notebook_file in translation_dir.rglob("*.ipynb"): + notebook_file.unlink() + logger.info(f"Deleted translated notebook: {notebook_file}") + + # Discover notebook files requiring translation using supported_notebook_extensions + all_files = filter_files(self.root_dir, self.excluded_dirs) + notebook_files = [ + file for file in all_files + if file.suffix.lower() in self.supported_notebook_extensions + ] + tasks = [] + task_info = [] # Store (file_path, language_code) for error reporting + + for notebook_file_path in notebook_files: + notebook_file_path = notebook_file_path.resolve() + + for language_code in self.language_codes: + relative_path = notebook_file_path.relative_to(self.root_dir) + translated_notebook_path = ( + self.translations_dir / language_code / relative_path + ) + + if not update and translated_notebook_path.exists(): + logger.info( + f"Skipping already translated notebook file: {translated_notebook_path}" + ) + continue + + logger.info( + f"Translating notebook file: {notebook_file_path} for language: {language_code}" + ) + # Create a task for each notebook file translation + tasks.append( + lambda notebook_file_path=notebook_file_path, language_code=language_code: self.translate_notebook( + notebook_file_path, language_code + ) + ) + task_info.append((str(notebook_file_path), language_code)) + + if tasks: # Check if there are tasks to process + # Process translations sequentially to avoid rate limiting + results = await self.process_api_requests_sequential( + tasks, "📓 Translating notebook files" + ) + modified_count = sum( + 1 for r in results if r + ) # Count successful translations + errors = [ + f"Failed to translate notebook file: {file_path} (lang: {lang_code})" + for (file_path, lang_code), result in zip(task_info, results) + if not result + ] + else: + logger.warning("No notebook files found for translation.") + + return modified_count, errors + async def translate_all_image_files( self, update: bool = False, fast_mode: bool = False ) -> tuple[int, list[str]]: @@ -287,6 +429,7 @@ async def translate_all_image_files( # Discover image files requiring translation image_files = filter_files(self.root_dir, self.excluded_dirs) tasks = [] + task_info = [] # Store (file_path, language_code) for error reporting for image_file_path in image_files: image_file_path = image_file_path.resolve() @@ -315,6 +458,7 @@ async def translate_all_image_files( image_file_path, language_code, fast_mode=fast_mode ) ) + task_info.append((str(image_file_path), language_code)) if tasks: # Process image translations in parallel for efficiency @@ -325,9 +469,9 @@ async def translate_all_image_files( 1 for r in results if r != str(image_file_path) ) # Count successful translations errors = [ - f"Failed to translate {task.__name__}" - for task, result in zip(tasks, results) - if result == str(image_file_path) + f"Failed to translate image file: {file_path} (lang: {lang_code})" + for (file_path, lang_code), result in zip(task_info, results) + if result == file_path ] else: logger.warning("No image files found for translation.") @@ -522,6 +666,13 @@ async def translate_project_async( total_modified += md_modified all_errors.extend(md_errors) + # Also translate notebook files when markdown translation is enabled + nb_modified, nb_errors = await self.translate_all_notebook_files( + update=update + ) + total_modified += nb_modified + all_errors.extend(nb_errors) + if images and not self.markdown_only: img_modified, img_errors = await self.translate_all_image_files( update=update, fast_mode=fast_mode @@ -556,6 +707,8 @@ def get_outdated_translations(self) -> List[tuple[Path, Path]]: continue for md_file in translation_dir.rglob("*.md"): all_translation_files.append((lang_code, md_file)) + for nb_file in translation_dir.rglob("*.ipynb"): + all_translation_files.append((lang_code, nb_file)) if not all_translation_files: return [] diff --git a/tests/co_op_translator/core/llm/test_jupyter_notebook_translator.py b/tests/co_op_translator/core/llm/test_jupyter_notebook_translator.py new file mode 100644 index 00000000..a125b381 --- /dev/null +++ b/tests/co_op_translator/core/llm/test_jupyter_notebook_translator.py @@ -0,0 +1,183 @@ +""" +Tests for JupyterNotebookTranslator functionality. +""" + +import json +import pytest +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +from co_op_translator.core.llm.jupyter_notebook_translator import JupyterNotebookTranslator + + +@pytest.fixture +def sample_notebook(): + """Sample notebook content for testing.""" + return { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Hello World\n", + "\n", + "This is a test notebook." + ] + }, + { + "cell_type": "code", + "execution_count": None, + "metadata": {}, + "outputs": [], + "source": [ + "print('Hello, World!')\n", + "# This should not be translated" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": ["## Section 2\n", "\n", "Another markdown cell."] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 + } + + +@pytest.fixture +def temp_notebook_file(tmp_path, sample_notebook): + """Create a temporary notebook file for testing.""" + notebook_file = tmp_path / "test.ipynb" + with open(notebook_file, 'w', encoding='utf-8') as f: + json.dump(sample_notebook, f, ensure_ascii=False, indent=1) + return notebook_file + + +class TestJupyterNotebookTranslator: + """Test cases for JupyterNotebookTranslator.""" + + @patch('co_op_translator.core.llm.jupyter_notebook_translator.MarkdownTranslator') + def test_create_translator(self, mock_markdown_translator_class): + """Test that translator can be created.""" + mock_markdown_translator_class.create.return_value = MagicMock() + translator = JupyterNotebookTranslator.create() + assert translator is not None + assert translator.root_dir is None + + @patch('co_op_translator.core.llm.jupyter_notebook_translator.MarkdownTranslator') + def test_create_translator_with_root_dir(self, mock_markdown_translator_class, tmp_path): + """Test that translator can be created with root directory.""" + mock_markdown_translator_class.create.return_value = MagicMock() + translator = JupyterNotebookTranslator.create(tmp_path) + assert translator.root_dir == tmp_path + + @patch('co_op_translator.core.llm.jupyter_notebook_translator.MarkdownTranslator') + @pytest.mark.asyncio + async def test_translate_notebook_basic(self, mock_markdown_translator_class, temp_notebook_file): + """Test basic notebook translation functionality.""" + # Setup mock + mock_translator = AsyncMock() + mock_translator.translate_markdown = AsyncMock(return_value="# Translated Content\n\nTranslated text.") + mock_markdown_translator_class.create.return_value = mock_translator + + # Create translator and translate + translator = JupyterNotebookTranslator.create() + result = await translator.translate_notebook( + temp_notebook_file, "es", markdown_only=True + ) + + # Verify result is valid JSON + translated_notebook = json.loads(result) + assert "cells" in translated_notebook + assert len(translated_notebook["cells"]) == 3 + + # Verify markdown cells were processed + assert mock_translator.translate_markdown.call_count == 2 # Two markdown cells + + # Verify code cell remains unchanged + code_cell = translated_notebook["cells"][1] + assert code_cell["cell_type"] == "code" + assert "print('Hello, World!')" in "".join(code_cell["source"]) + + @patch('co_op_translator.core.llm.jupyter_notebook_translator.MarkdownTranslator') + @pytest.mark.asyncio + async def test_translate_notebook_empty_cells(self, mock_markdown_translator_class, tmp_path): + """Test translation with empty markdown cells.""" + # Create notebook with empty cells + notebook_content = { + "cells": [ + {"cell_type": "markdown", "metadata": {}, "source": []}, + {"cell_type": "markdown", "metadata": {}, "source": ""}, + {"cell_type": "code", "metadata": {}, "source": ["print('test')"]} + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 4 + } + + notebook_file = tmp_path / "empty_test.ipynb" + with open(notebook_file, 'w', encoding='utf-8') as f: + json.dump(notebook_content, f) + + # Setup mock + mock_translator = AsyncMock() + mock_markdown_translator_class.create.return_value = mock_translator + + # Create translator and translate + translator = JupyterNotebookTranslator.create() + result = await translator.translate_notebook(notebook_file, "es") + + # Verify no translation calls were made for empty cells + mock_translator.translate_markdown.assert_not_called() + + # Verify result is valid + translated_notebook = json.loads(result) + assert len(translated_notebook["cells"]) == 3 + + @patch('co_op_translator.core.llm.jupyter_notebook_translator.MarkdownTranslator') + @pytest.mark.asyncio + async def test_translate_notebook_with_list_source(self, mock_markdown_translator_class, tmp_path): + """Test translation with source as list of strings.""" + # Create notebook with list-based source + notebook_content = { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": ["# Title\n", "\n", "Some content"] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 4 + } + + notebook_file = tmp_path / "list_test.ipynb" + with open(notebook_file, 'w', encoding='utf-8') as f: + json.dump(notebook_content, f) + + # Setup mock + mock_translator = AsyncMock() + mock_translator.translate_markdown = AsyncMock(return_value="# Translated Title\n\nTranslated content") + mock_markdown_translator_class.create.return_value = mock_translator + + # Create translator and translate + translator = JupyterNotebookTranslator.create() + result = await translator.translate_notebook(notebook_file, "es") + + # Verify translation was called + mock_translator.translate_markdown.assert_called_once() + + # Verify result maintains list format + translated_notebook = json.loads(result) + cell_source = translated_notebook["cells"][0]["source"] + assert isinstance(cell_source, list) + assert all(isinstance(line, str) for line in cell_source) \ No newline at end of file diff --git a/tests/co_op_translator/core/project/test_jupyter_notebook_integration.py b/tests/co_op_translator/core/project/test_jupyter_notebook_integration.py new file mode 100644 index 00000000..d7cc07b4 --- /dev/null +++ b/tests/co_op_translator/core/project/test_jupyter_notebook_integration.py @@ -0,0 +1,171 @@ +""" +Integration tests for Jupyter Notebook translation functionality. +""" + +import json +import pytest +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +from co_op_translator.core.project.project_translator import ProjectTranslator + + +@pytest.fixture +def temp_project_with_notebook(tmp_path): + """Create a temporary project directory with notebook files.""" + # Create a sample notebook + notebook_content = { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": ["# Test Notebook\n", "\n", "This is a test for translation."] + }, + { + "cell_type": "code", + "execution_count": None, + "metadata": {}, + "outputs": [], + "source": ["print('Hello, World!')"] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": "## Section 2\n\nAnother section to translate." + } + ], + "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}}, + "nbformat": 4, + "nbformat_minor": 4 + } + + # Create notebook file + notebook_file = tmp_path / "test_notebook.ipynb" + with open(notebook_file, 'w', encoding='utf-8') as f: + json.dump(notebook_content, f, ensure_ascii=False, indent=1) + + # Create markdown file for comparison + (tmp_path / "test.md").write_text("# Test Document\nThis is a test.") + + return tmp_path + + +class TestJupyterNotebookIntegration: + """Integration tests for Jupyter Notebook translation.""" + + @patch("co_op_translator.core.llm.text_translator.TextTranslator") + @patch("co_op_translator.core.llm.markdown_translator.MarkdownTranslator") + @patch("co_op_translator.core.llm.jupyter_notebook_translator.MarkdownTranslator") + @patch("co_op_translator.config.llm_config.config.LLMConfig.get_available_provider") + def test_project_translator_includes_notebooks( + self, + mock_get_provider, + mock_jupyter_markdown_translator, + mock_markdown_translator, + mock_text_translator, + temp_project_with_notebook, + ): + """Test that ProjectTranslator correctly initializes notebook translator.""" + # Setup mocks + mock_get_provider.return_value = "azure" + mock_text_translator.create.return_value = MagicMock() + mock_markdown_translator.create.return_value = MagicMock() + mock_jupyter_markdown_translator.create.return_value = MagicMock() + + # Create translator + translator = ProjectTranslator("ko", root_dir=temp_project_with_notebook, markdown_only=True) + + # Verify notebook translator was created + assert translator.notebook_translator is not None + + # Verify translation manager has notebook translator + assert translator.translation_manager.notebook_translator is not None + + @patch("co_op_translator.core.llm.text_translator.TextTranslator") + @patch("co_op_translator.core.llm.markdown_translator.MarkdownTranslator") + @patch("co_op_translator.core.llm.jupyter_notebook_translator.MarkdownTranslator") + @patch("co_op_translator.config.llm_config.config.LLMConfig.get_available_provider") + @pytest.mark.asyncio + async def test_translation_manager_processes_notebooks( + self, + mock_get_provider, + mock_jupyter_markdown_translator, + mock_markdown_translator, + mock_text_translator, + temp_project_with_notebook, + ): + """Test that TranslationManager processes notebook files.""" + # Setup mocks + mock_get_provider.return_value = "azure" + mock_text_translator.create.return_value = MagicMock() + + # Mock markdown translator + mock_md_translator = AsyncMock() + mock_md_translator.translate_markdown = AsyncMock(return_value="# Translated\nTranslated content") + mock_markdown_translator.create.return_value = mock_md_translator + + # Mock jupyter notebook translator + mock_jupyter_md_translator = AsyncMock() + mock_jupyter_md_translator.translate_markdown = AsyncMock(return_value="# Translated\nTranslated content") + mock_jupyter_markdown_translator.create.return_value = mock_jupyter_md_translator + + # Create translator + translator = ProjectTranslator("ko", root_dir=temp_project_with_notebook, markdown_only=True) + + # Mock the notebook translator's translate_notebook method + mock_notebook_translator = AsyncMock() + translated_notebook = { + "cells": [ + {"cell_type": "markdown", "metadata": {}, "source": ["# Translated\n", "Translated content"]}, + {"cell_type": "code", "metadata": {}, "source": ["print('Hello, World!')"]}, + {"cell_type": "markdown", "metadata": {}, "source": "## Translated Section\nTranslated content"} + ], + "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}}, + "nbformat": 4, + "nbformat_minor": 4 + } + mock_notebook_translator.translate_notebook = AsyncMock( + return_value=json.dumps(translated_notebook, ensure_ascii=False, indent=1) + ) + translator.translation_manager.notebook_translator = mock_notebook_translator + + # Run translation + total_modified, errors = await translator.translation_manager.translate_all_notebook_files() + + # Verify notebook translation was attempted + assert mock_notebook_translator.translate_notebook.call_count >= 1 + assert total_modified >= 0 # Should have attempted to translate + assert isinstance(errors, list) + + @patch("co_op_translator.core.llm.text_translator.TextTranslator") + @patch("co_op_translator.core.llm.markdown_translator.MarkdownTranslator") + @patch("co_op_translator.core.llm.jupyter_notebook_translator.MarkdownTranslator") + @patch("co_op_translator.config.llm_config.config.LLMConfig.get_available_provider") + def test_project_translator_handles_no_notebook_translator( + self, + mock_get_provider, + mock_jupyter_markdown_translator, + mock_markdown_translator, + mock_text_translator, + temp_project_with_notebook, + ): + """Test that system gracefully handles missing notebook translator.""" + # Setup mocks + mock_get_provider.return_value = "azure" + mock_text_translator.create.return_value = MagicMock() + mock_markdown_translator.create.return_value = MagicMock() + mock_jupyter_markdown_translator.create.return_value = MagicMock() + + # Create translator + translator = ProjectTranslator("ko", root_dir=temp_project_with_notebook, markdown_only=True) + + # Simulate missing notebook translator + translator.translation_manager.notebook_translator = None + + # Should not raise exception + assert translator.translation_manager.notebook_translator is None + + def test_constants_include_notebook_extensions(self): + """Test that notebook extensions are included in constants.""" + from co_op_translator.config.constants import SUPPORTED_NOTEBOOK_EXTENSIONS + assert ".ipynb" in SUPPORTED_NOTEBOOK_EXTENSIONS \ No newline at end of file