Web2Ebook/convert_to_both.py at main · JonathanJing/Web2Ebook · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python3
"""
Combined converter to extract website content and convert to both EPUB and PDF
Usage: python3 convert_to_both.py [URL] [OUTPUT_NAME]
"""

import sys
import os
from ebook_extractor import EbookExtractor
from pdf_converter import PDFConverter

def main():
    # Default values
    url = "https://wellsofgrace.com/books/spiritual/rskndam/index.htm"
    output_name = "extracted_book"

    # Parse command line arguments
    if len(sys.argv) > 1:
        url = sys.argv[1]
    if len(sys.argv) > 2:
        output_name = sys.argv[2]

    epub_file = f"{output_name}.epub"
    pdf_file = f"{output_name}.pdf"

    print(f"Converting website to both EPUB and PDF")
    print(f"URL: {url}")
    print(f"Output files: {epub_file}, {pdf_file}")
    print("=" * 60)

    # Extract content once
    print("Step 1: Extracting content from website...")
    extractor = EbookExtractor(url)

    if not extractor.analyze_site_structure():
        print("✗ Failed to analyze site structure")
        return

    if not extractor.chapters:
        print("✗ No chapters found")
        return

    print(f"✓ Successfully extracted {len(extractor.chapters)} chapters")
    print(f"✓ Book title: {extractor.book_title}")
    print()

    # Create EPUB
    print("Step 2: Creating EPUB...")
    epub_success = extractor.create_epub(epub_file)
    if epub_success:
        print(f"✓ EPUB created successfully: {epub_file}")
    else:
        print(f"✗ Failed to create EPUB")
    print()

    # Create PDF
    print("Step 3: Creating PDF...")
    pdf_converter = PDFConverter()
    pdf_success = pdf_converter.create_pdf_from_chapters(
        extractor.chapters,
        extractor.book_title,
        pdf_file
    )
    if pdf_success:
        print(f"✓ PDF created successfully: {pdf_file}")
    else:
        print(f"✗ Failed to create PDF")
    print()

    # Summary
    print("=" * 60)
    print("CONVERSION SUMMARY")
    print("=" * 60)
    print(f"Source URL: {url}")
    print(f"Book title: {extractor.book_title}")
    print(f"Chapters extracted: {len(extractor.chapters)}")
    print()

    if epub_success:
        epub_size = os.path.getsize(epub_file) if os.path.exists(epub_file) else 0
        print(f"✓ EPUB: {epub_file} ({epub_size:,} bytes)")
    else:
        print(f"✗ EPUB: Failed to create")

    if pdf_success:
        pdf_size = os.path.getsize(pdf_file) if os.path.exists(pdf_file) else 0
        print(f"✓ PDF: {pdf_file} ({pdf_size:,} bytes)")
    else:
        print(f"✗ PDF: Failed to create")

    print()
    if epub_success or pdf_success:
        print("✓ Conversion completed successfully!")
    else:
        print("✗ Conversion failed")

if __name__ == "__main__":
    main()