monadical.com/server at master · Monadical-SAS/monadical.com · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
#!/usr/bin/env python3

import json
import os
import subprocess
import sys
from datetime import datetime
from bs4 import BeautifulSoup
from markupsafe import Markup
import markdown2

# Library to implement rss
from feedgen.feed import FeedGenerator
from flask import Flask, render_template, redirect, make_response, abort

### Config
app = Flask(__name__)

def get_doc_content(filename):
    """Extract only the content from within the markdown-body div and render markdown to HTML"""
    content = app.jinja_loader.get_source(app.jinja_env, filename)[0]
    soup = BeautifulSoup(content, 'html.parser')
    doc_div = soup.find('div', {'id': 'doc'})

    if doc_div:
        # Get the raw content
        raw_content = doc_div.decode_contents()
        # Convert markdown to HTML
        html_content = markdown2.markdown(raw_content, extras=['fenced-code-blocks', 'header-ids'])
        return Markup(html_content)

    return Markup('')

app.jinja_env.globals['include_raw'] = lambda filename: Markup(app.jinja_loader.get_source(app.jinja_env, filename)[0])
app.jinja_env.globals['get_doc_content'] = get_doc_content

CONFIG_FILE = 'content.json'
TEAM_FILE = 'team.json'
STACK_FILE = 'stacks.json'
OPEN_POSITIONS = 'open-positions.json'
PORTFOLIO = 'portfolio.json'
HOST = 'http://127.0.0.1:5000'


def load_config(fname=CONFIG_FILE):
    """read the content.json file and load it as a dictionary"""
    with open(fname, 'r') as f:
        return json.load(f)


def download_post(post):
    template_path = os.path.join('templates', post['template'])
    CMD = [
        "wget",
        "--convert-links",
        "--output-document",
        template_path,
        post['source'],
    ]
    proc = subprocess.run(CMD, capture_output=True)

    # Strip conflicting meta tags from downloaded content
    with open(template_path, 'r', encoding='utf-8') as f:
        content = f.read()

    # Parse and clean HTML
    soup = BeautifulSoup(content, 'html.parser')

    # Remove " - HedgeDoc" suffix from title tags
    title_tag = soup.find('title')
    if title_tag and title_tag.string:
        title_text = title_tag.string
        if title_text.endswith(' - HedgeDoc'):
            title_tag.string = title_text[:-len(' - HedgeDoc')]

    # Remove conflicting meta tags that would override our template's meta tags
    meta_tags_to_remove = [
        'og:image', 'og:image:alt', 'og:image:type', 'og:title', 'og:description', 'og:type', 'og:url', 'og:site_name',
        'twitter:image', 'twitter:title', 'twitter:description', 'twitter:url', 'twitter:card'
    ]

    for tag in soup.find_all('meta'):
        # Check property attribute (og: tags)
        if tag.get('property') in meta_tags_to_remove:
            tag.decompose()
        # Check name attribute (twitter: tags)
        elif tag.get('name') in meta_tags_to_remove:
            tag.decompose()

    # Write cleaned content back
    with open(template_path, 'w', encoding='utf-8') as f:
        f.write(str(soup))


CONFIG = load_config(CONFIG_FILE)

PAGES = {page['url']: page for page in list(CONFIG['PAGES'].values())}  # {url: {page_data}}
POSTS = {post['url']: post for post in list(CONFIG['POSTS'].values())}  # {url: {post_data}}
JOBS = {job['url']: job for job in list(CONFIG['JOB_POSTS'].values())}  # {url: {post_data}}
WEBINARS = {webinar['url']: webinar for webinar in list(CONFIG['WEBINARS'].values())}  # {url: {post_data}}
CASES = {case['url']: case for case in list(CONFIG['CASES'].values())}  # {url: {post_data}}
RATES = {rate['url']: rate for rate in list(CONFIG['RATES'].values())}  # {url: {post_data}}
REPORTS = {report['url']: report for report in list(CONFIG['REPORTS'].values())}  # {url: {post_data}}


### Routes

# Similar to wordpress, pages and posts are separate.  Every page has its own template
# in templates/page.html, but all posts use the same template + an iframe URL for the
# post content

@app.route('/')
def index():
    return redirect("/index.html")


@app.route('/favicon.ico')
def favicon():
    return redirect("/static/favicon.ico")


@app.route('/robots.txt')
def static_from_root():
    return redirect("/static/robots.txt")


@app.route('/cases.html')
def render_projects():
    page = PAGES['/projects.html'].copy()  # get it from content.json
    return render_template(page['template'], now=datetime.now(), **CONFIG, **page)


@app.route('/<path>')
def render_page(path):
    page = PAGES[f'/{path}'].copy()   # get it from content.json

    # special projects
    if page['title'] == "Team":
        team_list = load_config(TEAM_FILE)
        open_positions = load_config(OPEN_POSITIONS)
        page['team'] = team_list
        page['positions'] = open_positions

    elif page['title'] == "Services":
        page['stacks'] = load_config(STACK_FILE)

    elif page['title'] == "Portfolio":
        portfolio = load_config(PORTFOLIO)
        page['ai_projects'] = portfolio['ai-projects']
        page['web_projects'] = portfolio['web-projects']
        page['oss'] = portfolio['oss']
        page['crypto'] = portfolio['crypto']
        page['public'] = portfolio['public']

    elif page['title'] == "Principles":
        return render_post('principles-handbook.html')

    # common case
    return render_template(page['template'], now=datetime.now(), **CONFIG, **page)


@app.route('/rates/<path>')
def render_pricing_model(path):
    rate = RATES[f'/rates/{path}']
    download_post(rate)
    return render_template('rate.html', now=datetime.now(), **CONFIG, **rate)


@app.route('/posts/<path>')
def render_post(path):
    post = POSTS[f'/posts/{path}']
    download_post(post)
    return render_template('post.html', now=datetime.now(), **CONFIG, **post)


@app.route('/jobs/<path>')
def render_job(path):
    job = JOBS[f'/jobs/{path}']
    return render_template('job_post.html', now=datetime.now(), **CONFIG, **job)


@app.route('/webinars/<path>')
def render_webinar(path):
    try:
        webinar = WEBINARS[f'/webinars/{path}']
    except KeyError:
        abort(404)

    # Create promotion template
    promotion_template = f"webinars/{webinar['key']}_promotion.html"
    transcription_template = f"webinars/{webinar['key']}_transcription.html"

    # Download promotion and transcription content
    if 'promotion_source' in webinar:
        download_post({
            'template': promotion_template,
            'source': webinar['promotion_source']
        })
        webinar['promotion_template'] = promotion_template

    if 'transcription_source' in webinar:
        download_post({
            'template': transcription_template,
            'source': webinar['transcription_source']
        })
        webinar['transcription_template'] = transcription_template

    # Calculate if webinar is in the future using timezone-aware comparison
    webinar_date = datetime.fromisoformat(webinar['date'])
    current_time = datetime.now(webinar_date.tzinfo)  # Use same timezone as webinar_date
    is_future_webinar = webinar_date > current_time

    # Format the date for display
    day_suffix = 'th'
    if webinar_date.day in [1, 21, 31]:
        day_suffix = 'st'
    elif webinar_date.day == 2:
        day_suffix = 'nd'
    elif webinar_date.day == 3:
        day_suffix = 'rd'

    formatted_date = webinar_date.strftime(f'%A, %B %-d{day_suffix} @ %-I%p EST')

    # For now, access is controlled client-side via localStorage
    # In a real implementation, this would be server-side session management
    access_granted = False

    return render_template(
        'webinar.html',
        now=current_time,
        webinar_date=webinar_date,
        is_future_webinar=is_future_webinar,
        access_granted=access_granted,
        formatted_date=formatted_date,
        **CONFIG,
        **webinar
    )


@app.route('/reports/<path>')
def render_report(path):
    report = REPORTS[f'/reports/{path}']
    download_post(report)
    return render_template('report.html', now=datetime.now(), **CONFIG, **report)


# Add feed rss
@app.route("/posts/feed.xml")
def generate_rss():

    feedg = FeedGenerator()
    feedg.title("Monadical Blog Posts")
    feedg.description("Here you can find our publications")
    feedg.link(href="https://monadical.com/blog.html")

    for post_key, post_info in CONFIG["POSTS"].items():
        feed_entry = feedg.add_entry()
        feed_entry.title(post_info["title"])
        feed_entry.guid(post_info["url"])
        feed_entry.pubDate("{date} 00:00:00 UTC".format(**post_info))
        feed_entry.description(post_info['description'])
        feed_entry.link(href="https://{SITE_DOMAIN}{url}".format(**CONFIG, **post_info))

    response = make_response(feedg.rss_str(pretty=True))
    response.headers.set('Content-Type', 'application/xml')  # headers dont work when serving the static site, this only applies when using the development server. however, it's still nice to have as it allows wget (or any other tool we use in the future to generate the static site) to verify that the content is indeed xml and not application/octet-stream, xhtml, or other random MIME types it might try to auto-detect/rename.

    return response


@app.route('/projects/<path>')
def render_case(path):
    case = CASES[f'/projects/{path}']
    return render_template(case['template'], now=datetime.now(), **CONFIG, **case)


if __name__ == '__main__':
    if len(sys.argv) > 1 and sys.argv[1] == '--pages':
        # just print list of page urls
        if len(PAGES) > 0:
            print('\n'.join(HOST + url for url in PAGES.keys()))
    elif len(sys.argv) > 1 and sys.argv[1] == '--posts':
        # just print list of post urls
        if len(POSTS) > 0:
            print('\n'.join(HOST + url for url in POSTS.keys()))
    elif len(sys.argv) > 1 and sys.argv[1] == '--rates':
        # just print list of post urls
        if len(RATES) > 0:
            print('\n'.join(HOST + url for url in RATES.keys()))
    elif len(sys.argv) > 1 and sys.argv[1] == '--reports':
        # just print list of report urls
        if len(REPORTS) > 0:
            print('\n'.join(HOST + url for url in REPORTS.keys()))
    elif len(sys.argv) > 1 and sys.argv[1] == '--webinars':
        # just print list of webinar urls
        if len(WEBINARS) > 0:
            print('\n'.join(HOST + url for url in WEBINARS.keys()))
    elif len(sys.argv) > 1 and sys.argv[1] == '--jobs':
        # just print list of job urls
        if len(JOBS) > 0:
            print('\n'.join(HOST + url for url in JOBS.keys()))
    else:
        # run the flask http server
        port = int(os.environ.get('PORT', 5000))
        app.run(port=port)