diff --git a/.gitignore b/.gitignore index 81be09e..1814dca 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,9 @@ sdist/ var/ .idea/ .idea +.cached +*.pdf +*.log wheels/ *.egg-info/ .installed.cfg diff --git a/PDF-Translator-for-Human.jpg b/PDF-Translator-for-Human.jpg new file mode 100644 index 0000000..7199a9d Binary files /dev/null and b/PDF-Translator-for-Human.jpg differ diff --git a/README.md b/README.md new file mode 100644 index 0000000..a141dcb --- /dev/null +++ b/README.md @@ -0,0 +1,123 @@ +--- +title: Pdf Translator For Human +emoji: 🦀 +colorFrom: indigo +colorTo: green +sdk: streamlit +sdk_version: 1.42.0 +app_file: app.py +pinned: false +license: apache-2.0 +short_description: PDF Translator powered by local llm, side by side reading +--- + +# PDF Translator for Human: A PDF Reader/Translator with Local LLM/ChatGPT or Google + +## Use Case + +There is tons of PDF reader/translator with AI supported. However none of them meets my need. I hope it could run totally on local with local LLMs. + +I hope to read both the original PDF and the translated pages side by side. +Also I don't like to translate a 1000 pages long PDF file all at once, it costs lots of time and tokens. And most of the time, I never complete reading through all contents of a long paper. + +## Features in PDF Translator for Human +You can read both the original PDF file and the translated content side by side. + +The local/remote translation API is invoked on a per-page basis as needed, triggered by page turns during reading. + +## Snapshot + +![PDF Translator for Human](PDF-Translator-for-Human.jpg) + +## Huggingface Space + +https://huggingface.co/spaces/davideuler/pdf-translator-for-human + +## Supported translators and LLMs: +* Google Translator (NO need api-key, it it totally free) +* Local deployed LLMs (ollama, llama.cpp, mlx_lm ... etc.) +* ChatGPT +* DeepSeek (Use the OpenAI Compatible endpoint at https://api.deepseek.com/v1) +* Qwen (Use the OpenAI Compatible endpoint) + +* Other OpenAI Compatible LLMs like GLM/Moonshot etc. + +## Start the Web Application for PDF Translator for Human + + +``` bash +./run_translator_web.sh + +# or just start the streamlit application if you have run the previous script: +streamlit run app.py + +``` + +## Notes on deployment and starting a local llm inference service + +### Option 1.Start local llm By mlx_lm (works on Mac Sillicon.) + +Here I download aya-expanse-8b 4bit as an example. + +``` Bash +# download mlx models from huggingface to local folder +git clone https://huggingface.co/mlx-community/aya-expanse-8b-4bit + +# install mlx_lm +pip install mlx_lm + +# start the server +mlx_lm.server --model ./aya-expanse-8b-4bit --port 8080 + +``` + +### Option 2. By llama.cpp (Works on CPU/GPU/Mac Machines) + +Llama.cpp works on CPU machines and Mac Intel/Sillicon machines, you need 48GB memories for aya-expanse-32b-q4_k_m.gguf. + +``` Bash +# download gguf models from huggingface to local folder +wget https://hf-mirror.co/bartowski/aya-expanse-32b-GGUF/resolve/main/aya-expanse-32b-Q4_K_M.gguf -O aya-expanse-32b-Q4_K_M.gguf + +# download llama.cpp and install llama.cpp +git clone https://github.com/ggerganov/llama.cpp +cd llama.cpp +mkdir -p build && cmake -B build +cmake --build build --config Release -j 12 + +# start llama.cpp server +./llama-server -m ~/models/aya-expanse-32b-Q4_K_M.gguf --port 8080 + +``` + +### Options 3. Local inference service by ollama/vLLM and other application such as LMStudio + +Please read the official guide for you LLM inferencing tool. + +### Option 4. Note on using OpenAI Compatible LLM service provider + +For example, run the following command before start the streamlit application to enable translation by deepseek : + +``` bash +export OPENAI_MODEL=deepseek-chat +export OPENAI_API_BASE=https://api.deepseek.com/v1 +export OPENAI_API_KEY=sk-xxxx +``` + +Run the following command before start the streamlit application to enable translation by moonshot : + +``` bash +export OPENAI_MODEL=moonshot-v1-8k +export OPENAI_API_BASE=https://api.moonshot.cn/v1 +export OPENAI_API_KEY=sk-xxxx +``` + + + +## Acknowlegement + +https://github.com/nidhaloff/deep-translator + +The project is based on the awesome deep-translator. Thanks to the excellent work in the original project, I can integrate it to the pdf translator tool. + +Pull Requests are welcome. \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000..6fa8837 --- /dev/null +++ b/app.py @@ -0,0 +1,567 @@ +import os +import json +import hashlib +from pathlib import Path +import streamlit as st +import pymupdf +from deep_translator import ( + GoogleTranslator, +) +from deep_translator.openai_compatible import OpenAICompatibleTranslator +import logging +import argparse + +# Constants +DEFAULT_PAGES_PER_LOAD = 2 +DEFAULT_MODEL = "default_model" +DEFAULT_API_BASE = "http://localhost:8080/v1" + +# Supported translators +TRANSLATORS = { + 'OpenAI Compatible': OpenAICompatibleTranslator, + 'OpenAI': OpenAICompatibleTranslator, + 'Google': GoogleTranslator, +} + +# Color options +COLOR_MAP = { + "darkred": (0.8, 0, 0), + "black": (0, 0, 0), + "blue": (0, 0, 0.8), + "darkgreen": (0, 0.5, 0), + "purple": (0.5, 0, 0.5), +} + +# Target language options for ChatGPT +LANGUAGE_OPTIONS = { + "简体中文": "zh-CN", + "繁體中文": "zh-TW", + "English": "en", + "日本語": "ja", + "한국어": "ko", + "Español": "es", + "Français": "fr", + "Deutsch": "de", +} + +# Add source language options +SOURCE_LANGUAGE_OPTIONS = { + "English": "en", + "简体中文": "zh-CN", + "繁體中文": "zh-TW", + "日本語": "ja", + "한국어": "ko", + "Español": "es", + "Français": "fr", + "Deutsch": "de", + "Auto": "auto", +} + +# Global translation configuration +TRANSLATOR_CONFIG = { + "type": "Google", # Options: "Google" or "OpenAI" + # OpenAI settings (used only if type is "OpenAI") + "openai": { + "default_api_base": DEFAULT_API_BASE, + "default_model": DEFAULT_MODEL, # "gpt-4o-mini", + "default_api_key": "sk-xxx" + }, + # Google settings (used only if type is "Google") + "google": { + "default_api_base": "https://translate.googleapis.com" + } +} + +# Add argument parser +def parse_args(): + parser = argparse.ArgumentParser(description='PDF Translator Application') + parser.add_argument( + '--translator', + type=str, + choices=['google', 'openai'], + default='google', + help='Specify translator type: google or openai' + ) + parser.add_argument( + '--api-base', + type=str, + help='API base URL for the translator' + ) + parser.add_argument( + '--api-key', + type=str, + help='API key for OpenAI compatible translator' + ) + parser.add_argument( + '--model', + type=str, + help='Model name for OpenAI compatible translator' + ) + return parser.parse_args() + +# Update TRANSLATOR_CONFIG based on command line arguments +def update_translator_config(args): + global TRANSLATOR_CONFIG + + TRANSLATOR_CONFIG["type"] = "Google" if args.translator.lower() == "google" else "OpenAI" + + if args.translator.lower() == "google": + if args.api_base: + TRANSLATOR_CONFIG["google"]["default_api_base"] = args.api_base + else: # OpenAI + if args.api_base: + TRANSLATOR_CONFIG["openai"]["default_api_base"] = args.api_base + if args.api_key: + TRANSLATOR_CONFIG["openai"]["default_api_key"] = args.api_key + if args.model: + TRANSLATOR_CONFIG["openai"]["default_model"] = args.model + +def get_cache_dir(): + """Get or create cache directory""" + cache_dir = Path('.cached') + cache_dir.mkdir(exist_ok=True) + return cache_dir + +def get_cache_key(doc_info: dict, page_num: int, translator_name: str, target_lang: str, text_content: str): + """Generate cache key for a specific page translation""" + # 使用文档信息和页面内容的组合生成唯一标识 + content_hash = hashlib.md5(text_content.encode('utf-8')).hexdigest()[:8] + doc_id = f"{doc_info.get('title', '')}_{doc_info.get('author', '')}_{doc_info.get('pagecount', '')}" + doc_hash = hashlib.md5(doc_id.encode('utf-8')).hexdigest()[:8] + return f"{doc_hash}_{content_hash}_page{page_num}_{translator_name}_{target_lang}.pdf" + +def get_cached_translation(cache_key: str) -> pymupdf.Document: + """Get cached translation if exists""" + cache_path = get_cache_dir() / cache_key + if cache_path.exists(): + try: + return pymupdf.open(str(cache_path)) + except Exception as e: + logging.error(f"Error loading cache: {str(e)}") + return None + return None + +def save_translation_cache(doc: pymupdf.Document, cache_key: str): + """Save translation to cache""" + cache_path = get_cache_dir() / cache_key + doc.save(str(cache_path)) # 确保提供文件路径字符串 + +def translate_pdf_pages(doc, doc_bytes, start_page, num_pages, translator, text_color, translator_name, target_lang): + """Translate specific pages of a PDF document with progress and caching""" + # Log translator information + logging.info(f"Using translator: {translator_name}, source: {translator._source}, target: {translator._target}") + logging.info(f"Selected translator: {translator_name}, Class: {translator.__class__.__name__}") + + WHITE = pymupdf.pdfcolor["white"] + rgb_color = COLOR_MAP.get(text_color.lower(), COLOR_MAP["darkred"]) + + translated_pages = [] + total_pages = min(start_page + num_pages, doc.page_count) - start_page + cache_hits = 0 + + # Create a progress bar + progress_bar = st.progress(0) + status_text = st.empty() + + for i, page_num in enumerate(range(start_page, min(start_page + num_pages, doc.page_count))): + status_text.text(f"Translating page {page_num + 1}...") + + # Extract text content for cache key + page = doc[page_num] + text_content = page.get_text("text") + + # Check cache first using text content + cache_key = get_cache_key( + doc.metadata, + page_num, + translator_name, + target_lang, + text_content + ) + + cached_doc = get_cached_translation(cache_key) + + if cached_doc is not None: + translated_pages.append(cached_doc) + cache_hits += 1 + logging.info(f"Cache hit: Using cached translation for page {page_num + 1}") + status_text.text(f"Using cached translation for page {page_num + 1}") + else: + logging.info(f"Cache miss: Translating page {page_num + 1}") + status_text.text(f"Translating page {page_num + 1} (not in cache)") + + # Create a new PDF document for this page + new_doc = pymupdf.open() + new_doc.insert_pdf(doc, from_page=page_num, to_page=page_num) + page = new_doc[0] + + # Extract and translate text blocks + blocks = page.get_text("blocks", flags=pymupdf.TEXT_DEHYPHENATE) + + for block in blocks: + bbox = block[:4] + text = block[4] + translated = translator.translate(text) + translated = str(translated) # Ensure the value is a string + + # Cover original text with white and add translation in color + page.draw_rect(bbox, color=None, fill=WHITE) + page.insert_htmlbox( + bbox, + translated, + css=f"* {{font-family: sans-serif; color: rgb({int(rgb_color[0]*255)}, {int(rgb_color[1]*255)}, {int(rgb_color[2]*255)});}}" + ) + + # Save to cache + save_translation_cache(new_doc, cache_key) + translated_pages.append(new_doc) + logging.info(f"Cached new translation for page {page_num + 1}") + + # Update progress + progress = (i + 1) / total_pages + progress_bar.progress(progress) + + # Clear progress indicators and show summary + progress_bar.empty() + if cache_hits > 0: + st.info(f"Used cache for {cache_hits} out of {total_pages} pages") + + return translated_pages + +def get_page_image(page, scale=2): + """Get high quality image from PDF page""" + # 计算缩放后的尺寸 + zoom = scale + mat = pymupdf.Matrix(zoom, zoom) + + # 使用较低分辨率渲染页面,但保持清晰度 + pix = page.get_pixmap( + matrix=mat, + alpha=False, + colorspace="rgb", # Use RGB instead of RGBA + ) + + return pix + +def translate_all_pages( + input_doc, + output_doc, + translator, + progress_bar, + batch_size=1, + **kwargs +): + """Translate all pages of the PDF document""" + # Log translator information for full document translation + logging.info(f"Starting full document translation with: {kwargs.get('translator_name', 'unknown')}") + logging.info(f"Translator settings - source: {translator._source}, target: {translator._target}") + + # Define colors + WHITE = pymupdf.pdfcolor["white"] + rgb_color = COLOR_MAP.get(kwargs.get('text_color', 'darkred').lower(), COLOR_MAP["darkred"]) + + total_pages = input_doc.page_count + + # Create a progress bar for overall progress + status_text = st.empty() + + # Translate all pages using translate_pdf_pages + translated_pages = translate_pdf_pages( + input_doc, + None, # doc_bytes not needed as we're using text content for cache + 0, # start from first page + total_pages, # translate all pages + translator, + kwargs.get('text_color', 'darkred'), + kwargs.get('translator_name', 'google'), + kwargs.get('target_lang', 'zh-CN') + ) + + # Combine all pages into one PDF with compression + output_path = kwargs.get('output_path', 'output.pdf') + for trans_doc in translated_pages: + output_doc.insert_pdf(trans_doc) + + # Save with compression options + output_doc.save( + output_path, + garbage=4, + deflate=True, + clean=True, + linear=True + ) + + return output_doc + +def init_session_state(): + """Initialize session state variables""" + if 'current_page' not in st.session_state: + st.session_state.current_page = 0 + if 'translation_started' not in st.session_state: + st.session_state.translation_started = True + if 'all_translated' not in st.session_state: + st.session_state.all_translated = False + if 'translated_doc' not in st.session_state: + st.session_state.translated_doc = None + if 'previous_file' not in st.session_state: + st.session_state.previous_file = None + if 'api_settings' not in st.session_state: + st.session_state.api_settings = {} + +def main(): + st.set_page_config(layout="wide", page_title="PDF Translator for Human") + st.title("PDF Translator for Human") + + # Initialize session state + init_session_state() + + # Sidebar configuration + with st.sidebar: + st.header("Settings") + + uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") + + # Reset session state when a new file is uploaded + if uploaded_file is not None and (st.session_state.previous_file is None or + uploaded_file.name != st.session_state.previous_file): + st.session_state.current_page = 0 + st.session_state.translation_started = True + st.session_state.all_translated = False + st.session_state.translated_doc = None + st.session_state.previous_file = uploaded_file.name + st.rerun() + + # Add source language selection + source_lang_name = st.selectbox( + "Source Language", + options=list(SOURCE_LANGUAGE_OPTIONS.keys()), + index=0 # Default to English + ) + source_lang = SOURCE_LANGUAGE_OPTIONS[source_lang_name] + + pages_per_load = st.number_input( + "Pages per load", + min_value=1, + max_value=5, + value=DEFAULT_PAGES_PER_LOAD + ) + + text_color = st.selectbox( + "Translation Color", + options=list(COLOR_MAP.keys()), + index=0 + ) + + target_lang = st.selectbox( + "Target Language", + options=list(LANGUAGE_OPTIONS.keys()), + index=0 + ) + target_lang_code = LANGUAGE_OPTIONS[target_lang] + + # Add translator selection + st.subheader("Translator Settings") + translator_type = st.radio( + "Translator", + options=["Google", "OpenAI Compatible"], + index=0 if TRANSLATOR_CONFIG["type"] == "Google" else 1 + ) + + # API Configuration based on translator selection + if translator_type == "OpenAI Compatible": + api_key = st.text_input( + "API Key", + value=TRANSLATOR_CONFIG["openai"]["default_api_key"], + type="password" + ) + api_base = st.text_input( + "API Base URL", + value=TRANSLATOR_CONFIG["openai"]["default_api_base"] + ) + model = st.text_input( + "Model Name", + value=TRANSLATOR_CONFIG["openai"]["default_model"] + ) + + # Store API settings + st.session_state.api_settings.update({ + 'api_key': api_key, + 'api_base': api_base, + 'model': model + }) + else: # Google Translator + # No configuration needed for Google Translator + st.session_state.api_settings.update({ + 'api_base': TRANSLATOR_CONFIG["google"]["default_api_base"] + }) + + # Main content area + if uploaded_file is not None: + doc_bytes = uploaded_file.read() + doc = pymupdf.open(stream=doc_bytes) + + # Create two columns for side-by-side display + col1, col2 = st.columns(2) + + # Display original pages + with col1: + st.header("Original") + for page_num in range(st.session_state.current_page, + min(st.session_state.current_page + pages_per_load, doc.page_count)): + page = doc[page_num] + pix = get_page_image(page) + st.image(pix.tobytes(), caption=f"Page {page_num + 1}", use_container_width=True) + + # Translation column + with col2: + st.header("Translated") + + try: + # Initialize translator based on user selection + if translator_type == "Google": + translator = GoogleTranslator( + source=source_lang, + target=target_lang_code + ) + else: + translator = OpenAICompatibleTranslator( + source=source_lang, + target=target_lang_code, + api_key=st.session_state.api_settings.get('api_key'), + base_url=st.session_state.api_settings.get('api_base'), + model=st.session_state.api_settings.get('model') + ) + + # Translate current batch of pages + translated_pages = translate_pdf_pages( + doc, + doc_bytes, + st.session_state.current_page, + pages_per_load, + translator, + text_color, + translator_type, + target_lang_code + ) + + # Display translated pages + for i, trans_doc in enumerate(translated_pages): + page = trans_doc[0] + pix = get_page_image(page) + st.image(pix.tobytes(), caption=f"Page {st.session_state.current_page + i + 1}", use_container_width=True) + + except Exception as e: + st.error(f"Translation error: {str(e)}") + logging.error(f"Translation error: {str(e)}") + return + + # Navigation and action buttons + st.markdown("---") # Add a separator + button_col1, button_col2, button_col3, button_col4 = st.columns(4) + + # Previous Pages button + with button_col1: + if st.session_state.current_page > 0: + if st.button("Previous Pages", use_container_width=True): + st.session_state.current_page = max(0, st.session_state.current_page - pages_per_load) + st.rerun() + else: + st.button("Previous Pages", disabled=True, use_container_width=True) + + # Next Pages button + with button_col2: + if st.session_state.current_page + pages_per_load < doc.page_count: + if st.button("Next Pages", use_container_width=True): + st.session_state.current_page = min( + doc.page_count - 1, + st.session_state.current_page + pages_per_load + ) + st.rerun() + else: + st.button("Next Pages", disabled=True, use_container_width=True) + + # Translate All button + with button_col3: + if st.button("Translate All", + disabled=st.session_state.all_translated, + use_container_width=True): + try: + # Initialize translator based on user selection + if translator_type == "Google": + translator = GoogleTranslator( + source=source_lang, + target=target_lang_code + ) + else: + translator = OpenAICompatibleTranslator( + source=source_lang, + target=target_lang_code, + api_key=st.session_state.api_settings.get('api_key'), + base_url=st.session_state.api_settings.get('api_base'), + model=st.session_state.api_settings.get('model') + ) + + # Translate all pages + output_doc = pymupdf.open() + output_path = f"translated_{uploaded_file.name}" + output_doc = translate_all_pages( + doc, + output_doc, + translator, + st.empty(), + pages_per_load, + text_color=text_color, + translator_name=translator_type, + target_lang=target_lang_code, + output_path=output_path + ) + + st.session_state.all_translated = True + st.session_state.translated_doc = output_path + st.rerun() + except Exception as e: + st.error(f"Translation error: {str(e)}") + logging.error(f"Translation error: {str(e)}") + return + + # Download button + with button_col4: + if not st.session_state.all_translated: + st.markdown( + """ +
+ +
+ """, + unsafe_allow_html=True + ) + else: + with open(st.session_state.translated_doc, "rb") as file: + st.download_button( + "Download", + file, + file_name=f"translated_{uploaded_file.name}", + mime="application/pdf", + use_container_width=True + ) + else: + st.info("Please upload a PDF file to begin translation") + + + # 使用Google翻译(默认): + # streamlit run app.py + + # 使用Google翻译并指定API base: + # streamlit run app.py --translator google --api-base https://translate.googleapis.com + + # 使用OpenAI兼容模型: + # python app.py --translator openai --model default_model --api-key sk-xxx --api-base http://localhost:8080/v1 + + # 使用OpenAI翻译并指定API base: + # python app.py --translator openai --api-base https://api.openai.com/v1 --model gpt-4o-mini --api-key sk-xxx + + +if __name__ == "__main__": + args = parse_args() + update_translator_config(args) + main() \ No newline at end of file diff --git a/deep_translator/chatgpt.py b/deep_translator/chatgpt.py index 2ab3777..78cf937 100644 --- a/deep_translator/chatgpt.py +++ b/deep_translator/chatgpt.py @@ -4,7 +4,11 @@ from typing import List, Optional from deep_translator.base import BaseTranslator -from deep_translator.constants import OPEN_AI_ENV_VAR +from deep_translator.constants import ( + OPEN_AI_ENV_VAR, + OPEN_AI_BASE_URL_ENV_VAR, + OPEN_AI_MODEL_ENV_VAR, +) from deep_translator.exceptions import ApiKeyException @@ -19,19 +23,23 @@ def __init__( source: str = "auto", target: str = "english", api_key: Optional[str] = os.getenv(OPEN_AI_ENV_VAR, None), - model: Optional[str] = "gpt-3.5-turbo", + model: Optional[str] = os.getenv(OPEN_AI_MODEL_ENV_VAR, "gpt-4o-mini"), + base_url: Optional[str] = os.getenv(OPEN_AI_BASE_URL_ENV_VAR, None), **kwargs, ): """ @param api_key: your openai api key. @param source: source language @param target: target language + @param model: OpenAI model to use + @param base_url: custom OpenAI API base URL """ if not api_key: raise ApiKeyException(env_var=OPEN_AI_ENV_VAR) self.api_key = api_key self.model = model + self.base_url = base_url super().__init__(source=source, target=target, **kwargs) @@ -42,13 +50,18 @@ def translate(self, text: str, **kwargs) -> str: """ import openai - openai.api_key = self.api_key + client = openai.OpenAI( + api_key=self.api_key, + base_url=self.base_url if self.base_url else None + ) prompt = f"Translate the text below into {self.target}.\n" prompt += f'Text: "{text}"' - response = openai.ChatCompletion.create( - model=self.model, + # if model is empty (for mlx_lm.server, the model should be default_model) + # export OPENAI_MODEL=default_model + response = client.chat.completions.create( + model=self.model if self.model else "default_model", messages=[ { "role": "user", @@ -56,6 +69,7 @@ def translate(self, text: str, **kwargs) -> str: } ], ) + return response.choices[0].message.content diff --git a/deep_translator/constants.py b/deep_translator/constants.py index 447c81f..b474293 100644 --- a/deep_translator/constants.py +++ b/deep_translator/constants.py @@ -1,7 +1,9 @@ __copyright__ = "Copyright (C) 2020 Nidhal Baccouri" -OPEN_AI_ENV_VAR = "OPEN_API_KEY" +OPEN_AI_ENV_VAR = "OPENAI_API_KEY" +OPEN_AI_BASE_URL_ENV_VAR = "OPENAI_API_BASE" +OPEN_AI_MODEL_ENV_VAR = "OPENAI_MODEL" DEEPL_ENV_VAR = "DEEPL_API_KEY" LIBRE_ENV_VAR = "LIBRE_API_KEY" MSFT_ENV_VAR = "MICROSOFT_API_KEY" diff --git a/deep_translator/openai_compatible.py b/deep_translator/openai_compatible.py new file mode 100644 index 0000000..a71c4b9 --- /dev/null +++ b/deep_translator/openai_compatible.py @@ -0,0 +1,38 @@ +import json +import time +import os,logging + +import streamlit as st +from .chatgpt import ChatGptTranslator + +logging.basicConfig(filename='application.log', level=logging.INFO, format='%(asctime)s - %(levelname)-5s %(lineno)d %(filename)s:%(funcName)s - %(message)s') + +class OpenAICompatibleTranslator(ChatGptTranslator): + """Translator that handles OpenAI compatible APIs with better error handling""" + def __init__(self, source="en", target="zh-CN", **kwargs): + super().__init__(source=source, target=target, **kwargs) + self.retry_count = 3 + self.retry_delay = 1 # seconds + + def translate(self, text: str, **kwargs) -> str: + """ + Translate text with retry mechanism and error handling + """ + if not text.strip(): + return text + + for attempt in range(self.retry_count): + try: + logging.info(f"Request OpenAI compatible api, base_url: {self.base_url}") + return super().translate(text, **kwargs) + except json.JSONDecodeError: + logging.warn(f"Translation API response JSONDecodeError, will retry later...") + if attempt == self.retry_count - 1: + logging.error(f"Translation API response error, using original text") + st.warning(f"Translation API response error, using original text") + return text + time.sleep(self.retry_delay) + except Exception as e: + logging.error(f"Translation error: {str(e)}") + st.error(f"Translation error: {str(e)}") + return text \ No newline at end of file diff --git a/docs/README.rst b/docs/README.rst index aee43ff..74bc54f 100644 --- a/docs/README.rst +++ b/docs/README.rst @@ -184,7 +184,7 @@ or even directly from terminal: or shorter - $ dt -tg de -txt "hello world" + $ dt --translator chatgpt -tg de -txt "hello world" ===== @@ -592,13 +592,17 @@ ChatGpt Translator There are two required attributes, namely "api_key" (string) and "target" (string or list). Attribute "source" is optional. - You can provide your api key as an argument or you can export it as an env var - e.g. `export OPENAI_API_KEY="your_key"` + You can provide your api key, api base as an argument or you can export it as an env var + e.g. + + `export OPENAI_API_KEY="your_key"` + + `export OPENAI_API_BASE=https://api.openai.com/v1` .. code-block:: python text = 'happy coding' - translated = ChatGptTranslator(api_key='your_key', target='german').translate(text=text) + translated = ChatGptTranslator(api_key='your_key', base_url='https://api.xxx.com/v1', target='german').translate(text=text) - Translate batch of texts @@ -824,13 +828,15 @@ To translate a string or line of text: .. code-block:: console - $ deep_translator google --source "english" --target "german" --text "happy coding" + $ deep_translator --translator google --source "english" --target "german" --text "happy coding" + $ Alternate short option names, along with using language abbreviations: .. code-block:: console - $ deep_translator google -src "en" -tgt "de" -txt "happy coding" + $ dt -trans google -src "en" -tg "de" -txt "happy coding" + $ dt -trans chatgpt -src "en" -tg "ja" -txt "happy coding" Finally, to retrieve a list of available languages for a given translator: diff --git a/pyproject.toml b/pyproject.toml index fb77272..2725dc4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ description = "A flexible free and unlimited python tool to translate between di license = "MIT" authors = ["Nidhal Baccouri "] maintainers = ["Nidhal Baccouri ", "Chris Trenthem "] -readme = "docs/README.rst" +readme = "README.md" homepage = "https://github.com/nidhaloff/deep_translator" repository = "https://github.com/nidhaloff/deep_translator" documentation = "https://deep-translator.readthedocs.io/en/latest/" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7ea9f86 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +git+https://github.com/davideuler/pdf-translator-for-human.git +openai==1.61.0 +PyMuPDF==1.25.2 +streamlit==1.41.1 \ No newline at end of file diff --git a/run_translator_web.sh b/run_translator_web.sh new file mode 100755 index 0000000..0c6ff02 --- /dev/null +++ b/run_translator_web.sh @@ -0,0 +1,9 @@ +# install the project and dependencies + +git clone https://github.com/davideuler/pdf-translator-for-human +cd pdf-translator-for-human +pip install -e . +pip install streamlit pymupdf openai + +# Start the Web Application +streamlit run app.py \ No newline at end of file diff --git a/translator_cli.py b/translator_cli.py new file mode 100644 index 0000000..6f2fbf7 --- /dev/null +++ b/translator_cli.py @@ -0,0 +1,161 @@ +import argparse +import pymupdf +from deep_translator import ( + GoogleTranslator, + ChatGptTranslator, +) + +# Map of supported translators +TRANSLATORS = { + 'google': GoogleTranslator, + 'chatgpt': ChatGptTranslator, +} + +def translate_pdf(input_file: str, source_lang: str, target_lang: str, layer: str = "Text", + translator_name: str = "google", text_color: str = "darkred", keep_original: bool = True): + """ + Translate a PDF file from source language to target language + + Args: + input_file: Path to input PDF file + source_lang: Source language code (e.g. 'en', 'fr') + target_lang: Target language code (e.g. 'ko', 'ja') + layer: Name of the OCG layer (default: "Text") + translator_name: Name of the translator to use (default: "google") + text_color: Color of translated text (default: "darkred") + keep_original: Whether to keep original text visible (default: True) + """ + # Define colors + WHITE = pymupdf.pdfcolor["white"] + + # Color mapping + COLOR_MAP = { + "darkred": (0.8, 0, 0), + "black": (0, 0, 0), + "blue": (0, 0, 0.8), + "darkgreen": (0, 0.5, 0), + "purple": (0.5, 0, 0.5), + } + + # Get RGB color values, default to darkred if color not found + rgb_color = COLOR_MAP.get(text_color.lower(), COLOR_MAP["darkred"]) + + # This flag ensures that text will be dehyphenated after extraction. + textflags = pymupdf.TEXT_DEHYPHENATE + + # Get the translator class + if translator_name not in TRANSLATORS: + raise ValueError(f"Unsupported translator: {translator_name}. Available translators: {', '.join(TRANSLATORS.keys())}") + + TranslatorClass = TRANSLATORS[translator_name] + + # Configure the translator + translator = TranslatorClass(source=source_lang, target=target_lang) + + # Generate output filename + output_file = input_file.rsplit('.', 1)[0] + f'-{target_lang}.pdf' + + # Open the document + doc = pymupdf.open(input_file) + + # Define an Optional Content layer for translation + ocg_trans = doc.add_ocg(layer, on=True) + + # If not keeping original, create a layer for original text and hide it + if not keep_original: + ocg_orig = doc.add_ocg("Original", on=False) + + # Iterate over all pages + for page in doc: + # Extract text grouped like lines in a paragraph. + blocks = page.get_text("blocks", flags=textflags) + + # Every block of text is contained in a rectangle ("bbox") + for block in blocks: + bbox = block[:4] # area containing the text + text = block[4] # the text of this block + + # Invoke the actual translation + translated = translator.translate(text) + + if not keep_original: + # Move original text to hidden layer + page.insert_htmlbox( + bbox, + text, + css="* {font-family: sans-serif;}", + oc=ocg_orig + ) + # Clear original text area in base layer + page.draw_rect(bbox, color=None, fill=WHITE) + else: + # Cover the original text only in translation layer + page.draw_rect(bbox, color=None, fill=WHITE, oc=ocg_trans) + + # Write the translated text in specified color + page.insert_htmlbox( + bbox, + translated, + css=f"* {{font-family: sans-serif; color: rgb({int(rgb_color[0]*255)}, {int(rgb_color[1]*255)}, {int(rgb_color[2]*255)});}}", + oc=ocg_trans + ) + + doc.subset_fonts() + doc.ez_save(output_file) + print(f"Translated PDF saved as: {output_file}") + +def main(): + """ + can be invoked like this: + ``` + # Basic usage + python translator_cli.py --source english --target zh-CN input.pdf + + # With custom color and hiding original text + python translator_cli.py --source english --target zh-CN --color blue --no-original input.pdf + + # Using ChatGPT translator + export OPENAI_API_KEY=sk-proj-xxxx + export OPENAI_API_BASE=https://api.xxxx.com/v1 + export OPENAI_API_BASE=http://localhost:8080/v1 # for local llm api + export OPENAI_MODEL=default_model + + python translator_cli.py --source english --translator chatgpt --target zh-CN input.pdf + + # do not keep original text as an optional layer: + python translator_cli.py --source english --translator chatgpt --target zh-CN --no-original input.pdf + + ``` + + The translated content is an optional content layer in the new PDF file. + The optional layer can be hidden in Acrobat PDF Reader and Foxit Reader. + """ + + parser = argparse.ArgumentParser(description='Translate PDF documents.') + parser.add_argument('input_file', help='Input PDF file path') + parser.add_argument('--source', '-s', default='en', + help='Source language code (default: en)') + parser.add_argument('--target', '-t', default='zh-CN', + help='Target language code (default: zh-CN)') + parser.add_argument('--layer', '-l', default='Text', + help='Name of the OCG layer (default: Text)') + parser.add_argument('--translator', '-tr', default='google', + choices=list(TRANSLATORS.keys()), + help='Translator to use (default: google)') + parser.add_argument('--color', '-c', default='darkred', + choices=['darkred', 'black', 'blue', 'darkgreen', 'purple'], + help='Color of translated text (default: darkred)') + parser.add_argument('--no-original', action='store_true', + help='Do not keep original text in base layer (default: False)') + + args = parser.parse_args() + + try: + translate_pdf(args.input_file, args.source, args.target, args.layer, + args.translator, args.color, not args.no_original) + except Exception as e: + print(f"Error: {str(e)}") + exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file