diff --git a/go.mod b/go.mod index f60be046f1..098843666c 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( github.com/ergochat/irc-go v0.5.0 github.com/gdamore/tcell/v2 v2.13.8 github.com/google/uuid v1.6.0 + github.com/gomarkdown/markdown v0.0.0-20260217112301-37c66b85d6ab github.com/gorilla/websocket v1.5.3 github.com/h2non/filetype v1.1.3 github.com/larksuite/oapi-sdk-go/v3 v3.5.3 diff --git a/go.sum b/go.sum index 4060997f8a..2e2b1a1ec4 100644 --- a/go.sum +++ b/go.sum @@ -79,6 +79,8 @@ github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvq github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/gomarkdown/markdown v0.0.0-20260217112301-37c66b85d6ab h1:VYNivV7P8IRHUam2swVUNkhIdp0LRRFKe4hXNnoZKTc= +github.com/gomarkdown/markdown v0.0.0-20260217112301-37c66b85d6ab/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= diff --git a/pkg/skills/loader.go b/pkg/skills/loader.go index 30d84635a5..f5985a662a 100644 --- a/pkg/skills/loader.go +++ b/pkg/skills/loader.go @@ -10,14 +10,15 @@ import ( "regexp" "strings" + "github.com/gomarkdown/markdown" + "github.com/gomarkdown/markdown/ast" + "github.com/gomarkdown/markdown/parser" + "gopkg.in/yaml.v3" + "github.com/sipeed/picoclaw/pkg/logger" ) -var ( - namePattern = regexp.MustCompile(`^[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*$`) - reFrontmatter = regexp.MustCompile(`(?s)^---(?:\r\n|\n|\r)(.*?)(?:\r\n|\n|\r)---`) - reStripFrontmatter = regexp.MustCompile(`(?s)^---(?:\r\n|\n|\r)(.*?)(?:\r\n|\n|\r)---(?:\r\n|\n|\r)*`) -) +var namePattern = regexp.MustCompile(`^[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*$`) const ( MaxNameLength = 64 @@ -226,11 +227,20 @@ func (sl *SkillsLoader) getSkillMetadata(skillPath string) *SkillMetadata { return nil } - frontmatter := sl.extractFrontmatter(string(content)) + frontmatter, bodyContent := splitFrontmatter(string(content)) + dirName := filepath.Base(filepath.Dir(skillPath)) + title, bodyDescription := extractMarkdownMetadata(bodyContent) + + metadata := &SkillMetadata{ + Name: dirName, + Description: bodyDescription, + } + if title != "" && namePattern.MatchString(title) && len(title) <= MaxNameLength { + metadata.Name = title + } + if frontmatter == "" { - return &SkillMetadata{ - Name: filepath.Base(filepath.Dir(skillPath)), - } + return metadata } // Try JSON first (for backward compatibility) @@ -239,60 +249,133 @@ func (sl *SkillsLoader) getSkillMetadata(skillPath string) *SkillMetadata { Description string `json:"description"` } if err := json.Unmarshal([]byte(frontmatter), &jsonMeta); err == nil { - return &SkillMetadata{ - Name: jsonMeta.Name, - Description: jsonMeta.Description, + if jsonMeta.Name != "" { + metadata.Name = jsonMeta.Name + } + if jsonMeta.Description != "" { + metadata.Description = jsonMeta.Description } + return metadata } // Fall back to simple YAML parsing yamlMeta := sl.parseSimpleYAML(frontmatter) - return &SkillMetadata{ - Name: yamlMeta["name"], - Description: yamlMeta["description"], + if name := yamlMeta["name"]; name != "" { + metadata.Name = name } + if description := yamlMeta["description"]; description != "" { + metadata.Description = description + } + return metadata } -// parseSimpleYAML parses simple key: value YAML format -// Example: name: github\n description: "..." -// Normalizes line endings to handle \n (Unix), \r\n (Windows), and \r (classic Mac) -func (sl *SkillsLoader) parseSimpleYAML(content string) map[string]string { - result := make(map[string]string) +func extractMarkdownMetadata(content string) (title, description string) { + p := parser.NewWithExtensions(parser.CommonExtensions) + doc := markdown.Parse([]byte(content), p) + if doc == nil { + return "", "" + } - // Normalize line endings: convert \r\n and \r to \n - normalized := strings.ReplaceAll(content, "\r\n", "\n") - normalized = strings.ReplaceAll(normalized, "\r", "\n") + ast.WalkFunc(doc, func(node ast.Node, entering bool) ast.WalkStatus { + if !entering { + return ast.GoToNext + } - for line := range strings.SplitSeq(normalized, "\n") { - line = strings.TrimSpace(line) - if line == "" || strings.HasPrefix(line, "#") { - continue + switch n := node.(type) { + case *ast.Heading: + if title == "" && n.Level == 1 { + title = nodeText(n) + if title != "" && description != "" { + return ast.Terminate + } + } + case *ast.Paragraph: + if description == "" { + description = nodeText(n) + if title != "" && description != "" { + return ast.Terminate + } + } } + return ast.GoToNext + }) + + return title, description +} - parts := strings.SplitN(line, ":", 2) - if len(parts) == 2 { - key := strings.TrimSpace(parts[0]) - value := strings.TrimSpace(parts[1]) - // Remove quotes if present - value = strings.Trim(value, "\"'") - result[key] = value +func nodeText(n ast.Node) string { + var b strings.Builder + ast.WalkFunc(n, func(node ast.Node, entering bool) ast.WalkStatus { + if !entering { + return ast.GoToNext } + + switch t := node.(type) { + case *ast.Text: + b.Write(t.Literal) + case *ast.Code: + b.Write(t.Literal) + case *ast.Softbreak, *ast.Hardbreak, *ast.NonBlockingSpace: + b.WriteByte(' ') + } + return ast.GoToNext + }) + return strings.Join(strings.Fields(b.String()), " ") +} + +// parseSimpleYAML parses YAML frontmatter and extracts known metadata fields. +func (sl *SkillsLoader) parseSimpleYAML(content string) map[string]string { + result := make(map[string]string) + + var meta struct { + Name string `yaml:"name"` + Description string `yaml:"description"` + } + if err := yaml.Unmarshal([]byte(content), &meta); err != nil { + return result + } + if meta.Name != "" { + result["name"] = meta.Name + } + if meta.Description != "" { + result["description"] = meta.Description } return result } func (sl *SkillsLoader) extractFrontmatter(content string) string { - // Support \n (Unix), \r\n (Windows), and \r (classic Mac) line endings for frontmatter blocks - match := reFrontmatter.FindStringSubmatch(content) - if len(match) > 1 { - return match[1] - } - return "" + frontmatter, _ := splitFrontmatter(content) + return frontmatter } func (sl *SkillsLoader) stripFrontmatter(content string) string { - return reStripFrontmatter.ReplaceAllString(content, "") + _, body := splitFrontmatter(content) + return body +} + +func splitFrontmatter(content string) (frontmatter, body string) { + normalized := string(parser.NormalizeNewlines([]byte(content))) + lines := strings.Split(normalized, "\n") + if len(lines) == 0 || lines[0] != "---" { + return "", content + } + + end := -1 + for i := 1; i < len(lines); i++ { + if lines[i] == "---" { + end = i + break + } + } + if end == -1 { + return "", content + } + + frontmatter = strings.Join(lines[1:end], "\n") + body = strings.Join(lines[end+1:], "\n") + body = strings.TrimLeft(body, "\n") + return frontmatter, body } func escapeXML(s string) string { diff --git a/pkg/skills/loader_test.go b/pkg/skills/loader_test.go index 31619f9c25..645d8b7ac0 100644 --- a/pkg/skills/loader_test.go +++ b/pkg/skills/loader_test.go @@ -342,3 +342,78 @@ func TestSkillRootsTrimsWhitespaceAndDedups(t *testing.T) { builtin, }, roots) } + +func TestGetSkillMetadata_UsesMarkdownParagraphWhenNoFrontmatter(t *testing.T) { + tmp := t.TempDir() + skillDir := filepath.Join(tmp, "workspace", "skills", "plain-skill") + require.NoError(t, os.MkdirAll(skillDir, 0o755)) + + content := "# Plain Skill\n\nThis is parsed from markdown paragraph.\n" + require.NoError(t, os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(content), 0o644)) + + sl := &SkillsLoader{} + meta := sl.getSkillMetadata(filepath.Join(skillDir, "SKILL.md")) + require.NotNil(t, meta) + assert.Equal(t, "plain-skill", meta.Name) + assert.Equal(t, "This is parsed from markdown paragraph.", meta.Description) +} + +func TestGetSkillMetadata_FrontmatterOverridesMarkdown(t *testing.T) { + tmp := t.TempDir() + skillDir := filepath.Join(tmp, "workspace", "skills", "plain-skill") + require.NoError(t, os.MkdirAll(skillDir, 0o755)) + + content := "---\nname: frontmatter-skill\ndescription: frontmatter description\n---\n\n# Plain Skill\n\nBody description.\n" + require.NoError(t, os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(content), 0o644)) + + sl := &SkillsLoader{} + meta := sl.getSkillMetadata(filepath.Join(skillDir, "SKILL.md")) + require.NotNil(t, meta) + assert.Equal(t, "frontmatter-skill", meta.Name) + assert.Equal(t, "frontmatter description", meta.Description) +} + +func TestGetSkillMetadata_YAMLMultilineDescription(t *testing.T) { + tmp := t.TempDir() + skillDir := filepath.Join(tmp, "workspace", "skills", "plain-skill") + require.NoError(t, os.MkdirAll(skillDir, 0o755)) + + content := "---\nname: frontmatter-skill\ndescription: |\n line 1: with colon\n line 2\n---\n\n# Plain Skill\n\nBody description.\n" + require.NoError(t, os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(content), 0o644)) + + sl := &SkillsLoader{} + meta := sl.getSkillMetadata(filepath.Join(skillDir, "SKILL.md")) + require.NotNil(t, meta) + assert.Equal(t, "frontmatter-skill", meta.Name) + assert.Equal(t, "line 1: with colon\nline 2", meta.Description) +} + +func TestGetSkillMetadata_InvalidHeadingNameFallsBackToDirName(t *testing.T) { + tmp := t.TempDir() + skillDir := filepath.Join(tmp, "workspace", "skills", "valid-name") + require.NoError(t, os.MkdirAll(skillDir, 0o755)) + + content := "# Invalid Heading Name\n\nBody description.\n" + require.NoError(t, os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(content), 0o644)) + + sl := &SkillsLoader{} + meta := sl.getSkillMetadata(filepath.Join(skillDir, "SKILL.md")) + require.NotNil(t, meta) + assert.Equal(t, "valid-name", meta.Name) + assert.Equal(t, "Body description.", meta.Description) +} + +func TestGetSkillMetadata_IgnoresHTMLCommentBlocks(t *testing.T) { + tmp := t.TempDir() + skillDir := filepath.Join(tmp, "workspace", "skills", "biomed-skill") + require.NoError(t, os.MkdirAll(skillDir, 0o755)) + + content := "\n\n# Biomed Skill\n\nSummarize biomedical papers.\n" + require.NoError(t, os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(content), 0o644)) + + sl := &SkillsLoader{} + meta := sl.getSkillMetadata(filepath.Join(skillDir, "SKILL.md")) + require.NotNil(t, meta) + assert.Equal(t, "biomed-skill", meta.Name) + assert.Equal(t, "Summarize biomedical papers.", meta.Description) +}