OBrainRot/main.py at master · harvestingmoon/OBrainRot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from scraping import *
from audio import *
from force_alignment import *
#from dict import *
from video_generator import *
from search import *
import os
from dotenv import load_dotenv
from dict import *
from image_overlay import *
load_dotenv()


asset_name = "trump" #either trump, lebron, spongebob or griffin

def main(reddit_url, llm  = False, scraped_url = 'texts/scraped_url.txt', output_pre = 'texts/processed_output.txt', \
          final_output = 'texts/oof.txt',speech_final = 'audio/output_converted.wav', subtitle_path = 'texts/testing.ass', \
            output_path_before_overlay = 'final/before_overlay.mp4', output_path = "final/final.mp4",speaker_wav=f"assets/{asset_name}.mp3", video_path = 'assets/subway.mp4'):
    print("L1: SCRAPING RIGHT NOW")
    if not llm:
        map_request = scrape(reddit_url)
    else:
        print("Using LLM to determine best thread to scrape")
        print("-------------------")
        reddit_scrape = scrape_llm(reddit_url)
        text = vader(reddit_scrape)
        api = os.getenv('GROQ_API_KEY')
        map_request= groq(text, api)
    print(map_request)
    save_map_to_txt(map_request,scraped_url)
    # ## AUDIO CONVERSION
    print("L2: AUDIO CONVERSION NOW (TAKES THE LONGEST)")
    audio(scraped_url, speaker_wav = speaker_wav)
    convert_audio('audio/output.wav',speech_final)

    # IMPORTANT PRE PROCESSING STUFF
    process_text(scraped_url, output_pre)
    process_text_section2(output_pre, final_output)

    with open(final_output, 'r') as file:
        text = file.read().strip()

    # A BUNCH OF HARDCORE FORCED ALIGNMENT FORMATTING
    print("L3: FORCE ALIGNMENT")
    transcript = format_text(text)
    bundle, waveform, labels, emission1 = class_label_prob(speech_final)
    trellis,emission,tokens = trellis_algo(labels,text,emission1)
    path = backtrack(trellis, emission, tokens)
    segments = merge_repeats(path, transcript)
    word_segments = merge_words(segments)
    timing_list = []
    for (i, word) in enumerate(word_segments):
        timing_list.append((display_segment(bundle, trellis, word_segments, waveform, i)))

    with open("testing.txt", "w") as file:
        for item in timing_list:
            word, start_time, end_time = item
            file.write(f"{word} {start_time} {end_time}\n")

    # FINAL VIDEO
    print("L4: VIDEO GENERATION")
    convert_timing_to_ass(timing_list, subtitle_path)

    ## Finally, we need to generate the brain rot video tself
    add_subtitles_and_overlay_audio(video_path,speech_final, subtitle_path, output_path_before_overlay)


    ## NEW STEP: Adding image figures to bottom left of the image

    print("L5: IMAGE OVERLAY!!")
    overlay_images_on_video(output_path_before_overlay, f"assets/{asset_name}", output_path, "texts/image_overlay.txt", timing_list)


    print("DONE! SAVED AT " + output_path)

# if __name__ == "__main__":

#     main("https://www.reddit.com/r/confessions/comments/1jt63ey/i_pooped_during_my_run_yesterday/")