diff --git a/.gitignore b/.gitignore index 81be09e..1814dca 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,9 @@ sdist/ var/ .idea/ .idea +.cached +*.pdf +*.log wheels/ *.egg-info/ .installed.cfg diff --git a/PDF-Translator-for-Human.jpg b/PDF-Translator-for-Human.jpg new file mode 100644 index 0000000..7199a9d Binary files /dev/null and b/PDF-Translator-for-Human.jpg differ diff --git a/README.md b/README.md new file mode 100644 index 0000000..a141dcb --- /dev/null +++ b/README.md @@ -0,0 +1,123 @@ +--- +title: Pdf Translator For Human +emoji: 🦀 +colorFrom: indigo +colorTo: green +sdk: streamlit +sdk_version: 1.42.0 +app_file: app.py +pinned: false +license: apache-2.0 +short_description: PDF Translator powered by local llm, side by side reading +--- + +# PDF Translator for Human: A PDF Reader/Translator with Local LLM/ChatGPT or Google + +## Use Case + +There is tons of PDF reader/translator with AI supported. However none of them meets my need. I hope it could run totally on local with local LLMs. + +I hope to read both the original PDF and the translated pages side by side. +Also I don't like to translate a 1000 pages long PDF file all at once, it costs lots of time and tokens. And most of the time, I never complete reading through all contents of a long paper. + +## Features in PDF Translator for Human +You can read both the original PDF file and the translated content side by side. + +The local/remote translation API is invoked on a per-page basis as needed, triggered by page turns during reading. + +## Snapshot + + + +## Huggingface Space + +https://huggingface.co/spaces/davideuler/pdf-translator-for-human + +## Supported translators and LLMs: +* Google Translator (NO need api-key, it it totally free) +* Local deployed LLMs (ollama, llama.cpp, mlx_lm ... etc.) +* ChatGPT +* DeepSeek (Use the OpenAI Compatible endpoint at https://api.deepseek.com/v1) +* Qwen (Use the OpenAI Compatible endpoint) + +* Other OpenAI Compatible LLMs like GLM/Moonshot etc. + +## Start the Web Application for PDF Translator for Human + + +``` bash +./run_translator_web.sh + +# or just start the streamlit application if you have run the previous script: +streamlit run app.py + +``` + +## Notes on deployment and starting a local llm inference service + +### Option 1.Start local llm By mlx_lm (works on Mac Sillicon.) + +Here I download aya-expanse-8b 4bit as an example. + +``` Bash +# download mlx models from huggingface to local folder +git clone https://huggingface.co/mlx-community/aya-expanse-8b-4bit + +# install mlx_lm +pip install mlx_lm + +# start the server +mlx_lm.server --model ./aya-expanse-8b-4bit --port 8080 + +``` + +### Option 2. By llama.cpp (Works on CPU/GPU/Mac Machines) + +Llama.cpp works on CPU machines and Mac Intel/Sillicon machines, you need 48GB memories for aya-expanse-32b-q4_k_m.gguf. + +``` Bash +# download gguf models from huggingface to local folder +wget https://hf-mirror.co/bartowski/aya-expanse-32b-GGUF/resolve/main/aya-expanse-32b-Q4_K_M.gguf -O aya-expanse-32b-Q4_K_M.gguf + +# download llama.cpp and install llama.cpp +git clone https://github.com/ggerganov/llama.cpp +cd llama.cpp +mkdir -p build && cmake -B build +cmake --build build --config Release -j 12 + +# start llama.cpp server +./llama-server -m ~/models/aya-expanse-32b-Q4_K_M.gguf --port 8080 + +``` + +### Options 3. Local inference service by ollama/vLLM and other application such as LMStudio + +Please read the official guide for you LLM inferencing tool. + +### Option 4. Note on using OpenAI Compatible LLM service provider + +For example, run the following command before start the streamlit application to enable translation by deepseek : + +``` bash +export OPENAI_MODEL=deepseek-chat +export OPENAI_API_BASE=https://api.deepseek.com/v1 +export OPENAI_API_KEY=sk-xxxx +``` + +Run the following command before start the streamlit application to enable translation by moonshot : + +``` bash +export OPENAI_MODEL=moonshot-v1-8k +export OPENAI_API_BASE=https://api.moonshot.cn/v1 +export OPENAI_API_KEY=sk-xxxx +``` + + + +## Acknowlegement + +https://github.com/nidhaloff/deep-translator + +The project is based on the awesome deep-translator. Thanks to the excellent work in the original project, I can integrate it to the pdf translator tool. + +Pull Requests are welcome. \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000..6fa8837 --- /dev/null +++ b/app.py @@ -0,0 +1,567 @@ +import os +import json +import hashlib +from pathlib import Path +import streamlit as st +import pymupdf +from deep_translator import ( + GoogleTranslator, +) +from deep_translator.openai_compatible import OpenAICompatibleTranslator +import logging +import argparse + +# Constants +DEFAULT_PAGES_PER_LOAD = 2 +DEFAULT_MODEL = "default_model" +DEFAULT_API_BASE = "http://localhost:8080/v1" + +# Supported translators +TRANSLATORS = { + 'OpenAI Compatible': OpenAICompatibleTranslator, + 'OpenAI': OpenAICompatibleTranslator, + 'Google': GoogleTranslator, +} + +# Color options +COLOR_MAP = { + "darkred": (0.8, 0, 0), + "black": (0, 0, 0), + "blue": (0, 0, 0.8), + "darkgreen": (0, 0.5, 0), + "purple": (0.5, 0, 0.5), +} + +# Target language options for ChatGPT +LANGUAGE_OPTIONS = { + "简体中文": "zh-CN", + "繁體中文": "zh-TW", + "English": "en", + "日本語": "ja", + "한국어": "ko", + "Español": "es", + "Français": "fr", + "Deutsch": "de", +} + +# Add source language options +SOURCE_LANGUAGE_OPTIONS = { + "English": "en", + "简体中文": "zh-CN", + "繁體中文": "zh-TW", + "日本語": "ja", + "한국어": "ko", + "Español": "es", + "Français": "fr", + "Deutsch": "de", + "Auto": "auto", +} + +# Global translation configuration +TRANSLATOR_CONFIG = { + "type": "Google", # Options: "Google" or "OpenAI" + # OpenAI settings (used only if type is "OpenAI") + "openai": { + "default_api_base": DEFAULT_API_BASE, + "default_model": DEFAULT_MODEL, # "gpt-4o-mini", + "default_api_key": "sk-xxx" + }, + # Google settings (used only if type is "Google") + "google": { + "default_api_base": "https://translate.googleapis.com" + } +} + +# Add argument parser +def parse_args(): + parser = argparse.ArgumentParser(description='PDF Translator Application') + parser.add_argument( + '--translator', + type=str, + choices=['google', 'openai'], + default='google', + help='Specify translator type: google or openai' + ) + parser.add_argument( + '--api-base', + type=str, + help='API base URL for the translator' + ) + parser.add_argument( + '--api-key', + type=str, + help='API key for OpenAI compatible translator' + ) + parser.add_argument( + '--model', + type=str, + help='Model name for OpenAI compatible translator' + ) + return parser.parse_args() + +# Update TRANSLATOR_CONFIG based on command line arguments +def update_translator_config(args): + global TRANSLATOR_CONFIG + + TRANSLATOR_CONFIG["type"] = "Google" if args.translator.lower() == "google" else "OpenAI" + + if args.translator.lower() == "google": + if args.api_base: + TRANSLATOR_CONFIG["google"]["default_api_base"] = args.api_base + else: # OpenAI + if args.api_base: + TRANSLATOR_CONFIG["openai"]["default_api_base"] = args.api_base + if args.api_key: + TRANSLATOR_CONFIG["openai"]["default_api_key"] = args.api_key + if args.model: + TRANSLATOR_CONFIG["openai"]["default_model"] = args.model + +def get_cache_dir(): + """Get or create cache directory""" + cache_dir = Path('.cached') + cache_dir.mkdir(exist_ok=True) + return cache_dir + +def get_cache_key(doc_info: dict, page_num: int, translator_name: str, target_lang: str, text_content: str): + """Generate cache key for a specific page translation""" + # 使用文档信息和页面内容的组合生成唯一标识 + content_hash = hashlib.md5(text_content.encode('utf-8')).hexdigest()[:8] + doc_id = f"{doc_info.get('title', '')}_{doc_info.get('author', '')}_{doc_info.get('pagecount', '')}" + doc_hash = hashlib.md5(doc_id.encode('utf-8')).hexdigest()[:8] + return f"{doc_hash}_{content_hash}_page{page_num}_{translator_name}_{target_lang}.pdf" + +def get_cached_translation(cache_key: str) -> pymupdf.Document: + """Get cached translation if exists""" + cache_path = get_cache_dir() / cache_key + if cache_path.exists(): + try: + return pymupdf.open(str(cache_path)) + except Exception as e: + logging.error(f"Error loading cache: {str(e)}") + return None + return None + +def save_translation_cache(doc: pymupdf.Document, cache_key: str): + """Save translation to cache""" + cache_path = get_cache_dir() / cache_key + doc.save(str(cache_path)) # 确保提供文件路径字符串 + +def translate_pdf_pages(doc, doc_bytes, start_page, num_pages, translator, text_color, translator_name, target_lang): + """Translate specific pages of a PDF document with progress and caching""" + # Log translator information + logging.info(f"Using translator: {translator_name}, source: {translator._source}, target: {translator._target}") + logging.info(f"Selected translator: {translator_name}, Class: {translator.__class__.__name__}") + + WHITE = pymupdf.pdfcolor["white"] + rgb_color = COLOR_MAP.get(text_color.lower(), COLOR_MAP["darkred"]) + + translated_pages = [] + total_pages = min(start_page + num_pages, doc.page_count) - start_page + cache_hits = 0 + + # Create a progress bar + progress_bar = st.progress(0) + status_text = st.empty() + + for i, page_num in enumerate(range(start_page, min(start_page + num_pages, doc.page_count))): + status_text.text(f"Translating page {page_num + 1}...") + + # Extract text content for cache key + page = doc[page_num] + text_content = page.get_text("text") + + # Check cache first using text content + cache_key = get_cache_key( + doc.metadata, + page_num, + translator_name, + target_lang, + text_content + ) + + cached_doc = get_cached_translation(cache_key) + + if cached_doc is not None: + translated_pages.append(cached_doc) + cache_hits += 1 + logging.info(f"Cache hit: Using cached translation for page {page_num + 1}") + status_text.text(f"Using cached translation for page {page_num + 1}") + else: + logging.info(f"Cache miss: Translating page {page_num + 1}") + status_text.text(f"Translating page {page_num + 1} (not in cache)") + + # Create a new PDF document for this page + new_doc = pymupdf.open() + new_doc.insert_pdf(doc, from_page=page_num, to_page=page_num) + page = new_doc[0] + + # Extract and translate text blocks + blocks = page.get_text("blocks", flags=pymupdf.TEXT_DEHYPHENATE) + + for block in blocks: + bbox = block[:4] + text = block[4] + translated = translator.translate(text) + translated = str(translated) # Ensure the value is a string + + # Cover original text with white and add translation in color + page.draw_rect(bbox, color=None, fill=WHITE) + page.insert_htmlbox( + bbox, + translated, + css=f"* {{font-family: sans-serif; color: rgb({int(rgb_color[0]*255)}, {int(rgb_color[1]*255)}, {int(rgb_color[2]*255)});}}" + ) + + # Save to cache + save_translation_cache(new_doc, cache_key) + translated_pages.append(new_doc) + logging.info(f"Cached new translation for page {page_num + 1}") + + # Update progress + progress = (i + 1) / total_pages + progress_bar.progress(progress) + + # Clear progress indicators and show summary + progress_bar.empty() + if cache_hits > 0: + st.info(f"Used cache for {cache_hits} out of {total_pages} pages") + + return translated_pages + +def get_page_image(page, scale=2): + """Get high quality image from PDF page""" + # 计算缩放后的尺寸 + zoom = scale + mat = pymupdf.Matrix(zoom, zoom) + + # 使用较低分辨率渲染页面,但保持清晰度 + pix = page.get_pixmap( + matrix=mat, + alpha=False, + colorspace="rgb", # Use RGB instead of RGBA + ) + + return pix + +def translate_all_pages( + input_doc, + output_doc, + translator, + progress_bar, + batch_size=1, + **kwargs +): + """Translate all pages of the PDF document""" + # Log translator information for full document translation + logging.info(f"Starting full document translation with: {kwargs.get('translator_name', 'unknown')}") + logging.info(f"Translator settings - source: {translator._source}, target: {translator._target}") + + # Define colors + WHITE = pymupdf.pdfcolor["white"] + rgb_color = COLOR_MAP.get(kwargs.get('text_color', 'darkred').lower(), COLOR_MAP["darkred"]) + + total_pages = input_doc.page_count + + # Create a progress bar for overall progress + status_text = st.empty() + + # Translate all pages using translate_pdf_pages + translated_pages = translate_pdf_pages( + input_doc, + None, # doc_bytes not needed as we're using text content for cache + 0, # start from first page + total_pages, # translate all pages + translator, + kwargs.get('text_color', 'darkred'), + kwargs.get('translator_name', 'google'), + kwargs.get('target_lang', 'zh-CN') + ) + + # Combine all pages into one PDF with compression + output_path = kwargs.get('output_path', 'output.pdf') + for trans_doc in translated_pages: + output_doc.insert_pdf(trans_doc) + + # Save with compression options + output_doc.save( + output_path, + garbage=4, + deflate=True, + clean=True, + linear=True + ) + + return output_doc + +def init_session_state(): + """Initialize session state variables""" + if 'current_page' not in st.session_state: + st.session_state.current_page = 0 + if 'translation_started' not in st.session_state: + st.session_state.translation_started = True + if 'all_translated' not in st.session_state: + st.session_state.all_translated = False + if 'translated_doc' not in st.session_state: + st.session_state.translated_doc = None + if 'previous_file' not in st.session_state: + st.session_state.previous_file = None + if 'api_settings' not in st.session_state: + st.session_state.api_settings = {} + +def main(): + st.set_page_config(layout="wide", page_title="PDF Translator for Human") + st.title("PDF Translator for Human") + + # Initialize session state + init_session_state() + + # Sidebar configuration + with st.sidebar: + st.header("Settings") + + uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") + + # Reset session state when a new file is uploaded + if uploaded_file is not None and (st.session_state.previous_file is None or + uploaded_file.name != st.session_state.previous_file): + st.session_state.current_page = 0 + st.session_state.translation_started = True + st.session_state.all_translated = False + st.session_state.translated_doc = None + st.session_state.previous_file = uploaded_file.name + st.rerun() + + # Add source language selection + source_lang_name = st.selectbox( + "Source Language", + options=list(SOURCE_LANGUAGE_OPTIONS.keys()), + index=0 # Default to English + ) + source_lang = SOURCE_LANGUAGE_OPTIONS[source_lang_name] + + pages_per_load = st.number_input( + "Pages per load", + min_value=1, + max_value=5, + value=DEFAULT_PAGES_PER_LOAD + ) + + text_color = st.selectbox( + "Translation Color", + options=list(COLOR_MAP.keys()), + index=0 + ) + + target_lang = st.selectbox( + "Target Language", + options=list(LANGUAGE_OPTIONS.keys()), + index=0 + ) + target_lang_code = LANGUAGE_OPTIONS[target_lang] + + # Add translator selection + st.subheader("Translator Settings") + translator_type = st.radio( + "Translator", + options=["Google", "OpenAI Compatible"], + index=0 if TRANSLATOR_CONFIG["type"] == "Google" else 1 + ) + + # API Configuration based on translator selection + if translator_type == "OpenAI Compatible": + api_key = st.text_input( + "API Key", + value=TRANSLATOR_CONFIG["openai"]["default_api_key"], + type="password" + ) + api_base = st.text_input( + "API Base URL", + value=TRANSLATOR_CONFIG["openai"]["default_api_base"] + ) + model = st.text_input( + "Model Name", + value=TRANSLATOR_CONFIG["openai"]["default_model"] + ) + + # Store API settings + st.session_state.api_settings.update({ + 'api_key': api_key, + 'api_base': api_base, + 'model': model + }) + else: # Google Translator + # No configuration needed for Google Translator + st.session_state.api_settings.update({ + 'api_base': TRANSLATOR_CONFIG["google"]["default_api_base"] + }) + + # Main content area + if uploaded_file is not None: + doc_bytes = uploaded_file.read() + doc = pymupdf.open(stream=doc_bytes) + + # Create two columns for side-by-side display + col1, col2 = st.columns(2) + + # Display original pages + with col1: + st.header("Original") + for page_num in range(st.session_state.current_page, + min(st.session_state.current_page + pages_per_load, doc.page_count)): + page = doc[page_num] + pix = get_page_image(page) + st.image(pix.tobytes(), caption=f"Page {page_num + 1}", use_container_width=True) + + # Translation column + with col2: + st.header("Translated") + + try: + # Initialize translator based on user selection + if translator_type == "Google": + translator = GoogleTranslator( + source=source_lang, + target=target_lang_code + ) + else: + translator = OpenAICompatibleTranslator( + source=source_lang, + target=target_lang_code, + api_key=st.session_state.api_settings.get('api_key'), + base_url=st.session_state.api_settings.get('api_base'), + model=st.session_state.api_settings.get('model') + ) + + # Translate current batch of pages + translated_pages = translate_pdf_pages( + doc, + doc_bytes, + st.session_state.current_page, + pages_per_load, + translator, + text_color, + translator_type, + target_lang_code + ) + + # Display translated pages + for i, trans_doc in enumerate(translated_pages): + page = trans_doc[0] + pix = get_page_image(page) + st.image(pix.tobytes(), caption=f"Page {st.session_state.current_page + i + 1}", use_container_width=True) + + except Exception as e: + st.error(f"Translation error: {str(e)}") + logging.error(f"Translation error: {str(e)}") + return + + # Navigation and action buttons + st.markdown("---") # Add a separator + button_col1, button_col2, button_col3, button_col4 = st.columns(4) + + # Previous Pages button + with button_col1: + if st.session_state.current_page > 0: + if st.button("Previous Pages", use_container_width=True): + st.session_state.current_page = max(0, st.session_state.current_page - pages_per_load) + st.rerun() + else: + st.button("Previous Pages", disabled=True, use_container_width=True) + + # Next Pages button + with button_col2: + if st.session_state.current_page + pages_per_load < doc.page_count: + if st.button("Next Pages", use_container_width=True): + st.session_state.current_page = min( + doc.page_count - 1, + st.session_state.current_page + pages_per_load + ) + st.rerun() + else: + st.button("Next Pages", disabled=True, use_container_width=True) + + # Translate All button + with button_col3: + if st.button("Translate All", + disabled=st.session_state.all_translated, + use_container_width=True): + try: + # Initialize translator based on user selection + if translator_type == "Google": + translator = GoogleTranslator( + source=source_lang, + target=target_lang_code + ) + else: + translator = OpenAICompatibleTranslator( + source=source_lang, + target=target_lang_code, + api_key=st.session_state.api_settings.get('api_key'), + base_url=st.session_state.api_settings.get('api_base'), + model=st.session_state.api_settings.get('model') + ) + + # Translate all pages + output_doc = pymupdf.open() + output_path = f"translated_{uploaded_file.name}" + output_doc = translate_all_pages( + doc, + output_doc, + translator, + st.empty(), + pages_per_load, + text_color=text_color, + translator_name=translator_type, + target_lang=target_lang_code, + output_path=output_path + ) + + st.session_state.all_translated = True + st.session_state.translated_doc = output_path + st.rerun() + except Exception as e: + st.error(f"Translation error: {str(e)}") + logging.error(f"Translation error: {str(e)}") + return + + # Download button + with button_col4: + if not st.session_state.all_translated: + st.markdown( + """ +