@@ -4,24 +4,31 @@ Reads the mkdocs.yml file and constructs a JSON file with title and URL for each
44Includes semantic embeddings for each title for similarity search.
55"""
66
7+ import from byllm .lib {Model }
78import yaml ;
9+ import from dotenv {load_dotenv }
810import json ;
911import os ;
1012import from pathlib {Path }
1113import from typing {List , Dict , Any }
1214import numpy as np ;
1315import from sentence_transformers {SentenceTransformer }
1416
17+ glob llm = Model(model_name = " gpt-4.1-mini" );
1518
16- # Create custom YAML loader that ignores MkDocs-specific tags like !python/name:...
19+ with entry {
20+ load_dotenv(override = True );
21+ }
22+
23+ """ Create custom YAML loader that ignores MkDocs-specific tags like !python/name:..."""
1724def get_yaml_loader () -> Any {
1825 loader = yaml.SafeLoader;
19-
26+
2027 # Add constructor that ignores unknown tags
2128 def ignore_unknown (ldr : Any, tag_suffix : str , node : Any) -> Any {
2229 return None ;
2330 }
24-
31+
2532 loader.add_multi_constructor(' ' , ignore_unknown);
2633 return loader;
2734}
@@ -46,14 +53,13 @@ def construct_url(base_url: str, md_path: str) -> str {
4653 return f " { base_url}{ url_path} " ;
4754}
4855
49- """
50- Recursively extract navigation items from the mkdocs nav structure.
56+ """ Recursively extract navigation items from the mkdocs nav structure.
5157
5258Args:
53- nav_item: Current navigation item (can be dict, list, or string)
54- parent_sections: List of parent section titles for building hierarchical titles
55- base_url: Base URL for the documentation site
56- site_url: Site URL from mkdocs.yml
59+ nav_item: Current navigation item (can be dict, list, or string)
60+ parent_sections: List of parent section titles for building hierarchical titles
61+ base_url: Base URL for the documentation site
62+ site_url: Site URL from mkdocs.yml
5763
5864Returns:
5965 List of dictionaries containing title and url
@@ -63,20 +69,15 @@ def extract_nav_items(nav_item: Any, parent_sections: List[str], base_url: str,
6369
6470 if isinstance (nav_item, dict ) {
6571 for (key, value) in nav_item.items() {
66- # Key is the section title
6772 if isinstance (value, str ) {
68- # This is a direct link: "Title": "path/to/file.md"
6973 # Only process .md files
7074 if value.endswith(' .md' ) {
7175 title = " ● " .join(parent_sections + [key]) if parent_sections else key;
7276 url = construct_url(site_url, value);
73- results.append({
74- " title" : title,
75- " url" : url
76- });
77+ results.append({" title" : title, " url" : url});
7778 }
7879 } elif isinstance (value, list ) {
79- # This is a section with nested items
80+ # Section with nested items
8081 new_parent_sections = parent_sections + [key];
8182 for item in value {
8283 results.extend(extract_nav_items(item, new_parent_sections, base_url, site_url));
@@ -92,31 +93,25 @@ def extract_nav_items(nav_item: Any, parent_sections: List[str], base_url: str,
9293 results.extend(extract_nav_items(item, parent_sections, base_url, site_url));
9394 }
9495 } elif isinstance (nav_item, str ) {
95- # Direct path without title
9696 # Only process .md files
9797 if nav_item.endswith(' .md' ) {
9898 title = " ● " .join(parent_sections) if parent_sections else nav_item;
9999 url = construct_url(site_url, nav_item);
100- results.append({
101- " title" : title,
102- " url" : url
103- });
100+ results.append({" title" : title, " url" : url});
104101 }
105102 }
106103
107104 return results;
108105}
109106
110107
111- """
112- Parse mkdocs.yml and generate all_section_links.json.
108+ """ Parse mkdocs.yml and generate all_section_links.json.
113109
114110Args:
115111 mkdocs_path: Path to mkdocs.yml file
116112 output_path: Path where all_section_links.json should be created
117113"""
118114def parse_mkdocs_nav (mkdocs_path : str , output_path : str ) {
119-
120115 # Read mkdocs.yml (using custom loader to ignore MkDocs-specific tags)
121116 with open (mkdocs_path , ' r' , encoding = ' utf-8' ) as f {
122117 mkdocs_config = yaml.load(f, Loader = get_yaml_loader());
@@ -152,15 +147,73 @@ def parse_mkdocs_nav(mkdocs_path: str, output_path: str) {
152147 json.dump(all_links, f, indent = 2 , ensure_ascii = False );
153148 }
154149
155- print (f " Generated { len (all_links)} links with embeddings in { output_path} " );
156- return all_links;
150+ print (f " Successfully generated { len (all_links)} documentation links with embeddings " );
151+ }
152+
153+
154+ """
155+ Match each default title to the most semantically related documentation link from all available links.
156+
157+ Given:
158+ - default_titles: A list of topic/concept titles (e.g., "AI integration", "Object Spatial Programming")
159+ - all_links: A list of documentation pages, where each item contains:
160+ - 'title': The page title (may include hierarchical sections separated by ●)
161+ - 'url': The full URL to the documentation page
162+
163+ Task:
164+ For each default_title, analyze all available documentation links and select the single most relevant match based on semantic similarity and topic relevance. Consider the full hierarchical context in link titles.
165+
166+ Return a JSON object mapping each default title to its best matching documentation URL:
167+ {
168+ "AI integration": "https://jac-lang.org/learn/guide/...",
169+ "Object Spatial Programming": "https://jac-lang.org/learn/..."
170+ }
171+
172+ Ensure all default titles are included in the output, even if the match is approximate.
173+ """
174+ def determine_default_doc_links (default_titles : List[str ], all_links : List[Dict[str , str ]]) -> Dict [str , str ] by llm (method = " Reason" , temperature = 0.4 );
175+
176+
177+ def generate_default_doc_links (mkdocs_path : str , output_path : str ) -> Dict [str , str ] {
178+ titles = [
179+ " Introduction to Jac programming language" ,
180+ " Object Spatial Programming" ,
181+ " AI integration" ,
182+ " Building full stack applications"
183+ ];
184+
185+ with open (mkdocs_path , ' r' , encoding = ' utf-8' ) as f {
186+ mkdocs_config = yaml.load(f, Loader = get_yaml_loader());
187+ }
188+
189+ # Ensure site_url has trailing slash
190+ site_url = mkdocs_config.get(' site_url' , ' https://jac-lang.org/' );
191+ if not site_url.endswith(' /' ) {
192+ site_url + = ' /' ;
193+ }
194+
195+ result = determine_default_doc_links(titles, extract_nav_items(mkdocs_config.get(' nav' , []), [], site_url, site_url));
196+
197+ # Convert dict to array format for JSON file
198+ links_array = [{" url" : url, " title" : title} for (title, url) in result.items()];
199+
200+ # Ensure output directory exists
201+ Path(output_path).parent.mkdir(parents = True , exist_ok = True );
202+
203+ # Write to JSON file
204+ with open (output_path , ' w' , encoding = ' utf-8' ) as f {
205+ json.dump(links_array, f, indent = 2 , ensure_ascii = False );
206+ }
207+
208+ print (f " Successfully generated { len (links_array)} default documentation links " );
157209}
158210
159211with entry {
160212 # Determine paths relative to this script
161213 script_dir = Path(__file__ ).parent;
162214 mkdocs_path = script_dir.parent.parent / " jaseci" / " docs" / " mkdocs.yml" ;
163215 output_path = script_dir / " docs_links" / " all_section_links.json" ;
216+ default_links_path = script_dir / " docs_links" / " default_suggestion_links.json" ;
164217
165218 # Ensure output directory exists
166219 output_path.parent.mkdir(parents = True , exist_ok = True );
@@ -172,9 +225,9 @@ with entry {
172225 exit (1 );
173226 }
174227
175- # Parse and generate
176- all_links = parse_mkdocs_nav(str (mkdocs_path), str (output_path));
228+ # Generate all documentation links with embeddings
229+ parse_mkdocs_nav(str (mkdocs_path), str (output_path));
177230
178- print ( f " \n ✅ Successfully generated { len (all_links) } documentation links with embeddings! " );
179- print ( f " 📄 File saved at: { output_path } " );
231+ # Generate default suggestion links
232+ generate_default_doc_links( str (mkdocs_path), str (default_links_path) );
180233}
0 commit comments