generated from ossf/project-template
-
Notifications
You must be signed in to change notification settings - Fork 64
Implementation of Git repository support for the Malicious Packages repository. #1047
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
13 commits
Select commit
Hold shift + click to select a range
12fbbd2
Commit WIP for git-repo support
calebbrown 7d94af3
Implement initial git-based repository support.
calebbrown 8a49b90
Improve documentation and shrink the gitname API.
calebbrown 546bed6
Fix lint errors
calebbrown 37a861e
Fix the nolint lint warning
calebbrown 8ec6c6b
Improve the git-based report testing and fix some edge cases.
calebbrown debc8af
Add initial doc changes for Git repo implementation.
calebbrown 4ac2d9e
Minor readme change.
calebbrown 12d296f
Merge branch 'main' into git_repo_impl
calebbrown 8239e1e
Move git support doc changes out to another PR.
calebbrown 2595588
Allow versions for git repos. Require versions OR ranges otherwise.
calebbrown b088290
Respond to code review comments.
calebbrown 3e7794b
Merge branch 'main' into git_repo_impl
calebbrown File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| package gitname | ||
|
|
||
| import ( | ||
| "net/url" | ||
| "strings" | ||
| ) | ||
|
|
||
| // Canon will canonicalize the a git repository name as returned by Parse. | ||
| // | ||
| // The method adjusts it's behaviour based on the hostname to ensure that | ||
| // URLs are correctly canonicalized for each git hosting provider. | ||
| // | ||
| // Any password present in the URL will be stripped. | ||
| func Canon(name *url.URL) *url.URL { | ||
| u := *name // shallow copy | ||
|
|
||
| u.Host = strings.ToLower(u.Host) | ||
|
|
||
| // We can only adjust the hosts we are aware of. | ||
| if handler := handlerForHost(u.Host); handler != nil { | ||
| handler.Canon(&u) | ||
| } | ||
|
|
||
| // Always strip passwords if they are present. | ||
| if _, ok := u.User.Password(); ok { | ||
| u.User = url.User(u.User.Username()) | ||
| } | ||
|
|
||
| return &u | ||
| } | ||
|
|
||
| // CanonForStorage canonicalizes the git repository name and ensures that it | ||
| // is nicely formatted for use in output. | ||
| // | ||
| // The scheme is dropped from the URL. | ||
| // If the username is "git" it is dropped. | ||
| // The ".git" suffix is removed. | ||
| // | ||
| // If the repository name is invalid, the string is returned without changes. | ||
| func CanonForStorage(name string) string { | ||
| u, err := Parse(name) | ||
| if err != nil { | ||
| return name | ||
| } | ||
|
|
||
| u = Canon(u) | ||
|
|
||
| u.Scheme = "" | ||
|
|
||
| if u.User.Username() == "git" { | ||
| u.User = nil | ||
| } | ||
|
|
||
| u.Path, _ = strings.CutSuffix(u.Path, ".git") | ||
|
|
||
| canon := u.String() | ||
| canon = canon[2:] // Strip "//" prefix | ||
| return canon | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| package gitname_test | ||
|
|
||
| import ( | ||
| "net/url" | ||
| "testing" | ||
|
|
||
| "github.com/ossf/malicious-packages/internal/gitname" | ||
| ) | ||
|
|
||
| func TestCanon(t *testing.T) { | ||
| tests := map[string]string{ | ||
| "https://github.com/org/repo.git": "https://github.com/org/repo.git", | ||
| "https://github.com/org/repo": "https://github.com/org/repo.git", | ||
| "http://github.com/org/repo.git": "https://github.com/org/repo.git", | ||
| "ssh://[email protected]/Org/Repo.git": "https://github.com/org/repo.git", | ||
| "ssh://[email protected]/org/REPO.git": "https://gitlab.com/org/repo.git", | ||
| "https://go.googlesource.com/go": "https://go.googlesource.com/go", | ||
| "https://gitee.com/ignOre/CASE.git": "https://gitee.com/ignore/CASE.git", | ||
| "git://user:[email protected]/repo.git": "git://[email protected]/repo.git", | ||
| } | ||
| for repo, want := range tests { | ||
| t.Run(repo, func(t *testing.T) { | ||
| u, err := url.Parse(repo) | ||
| if err != nil { | ||
| t.Fatalf("url.Parse() = %v; want no error", err) | ||
| } | ||
| got := gitname.Canon(u).String() | ||
| if got != want { | ||
| t.Fatalf("Canon() = %q; want %q", got, want) | ||
| } | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| func TestCanonForStorage(t *testing.T) { | ||
| tests := map[string]string{ | ||
| "invalid:": "invalid:", | ||
| "invalid": "invalid", | ||
| "ftp://invalid.com/repo.git": "ftp://invalid.com/repo.git", | ||
| "https://github.com/org/repo.git": "github.com/org/repo", | ||
| "[email protected]:org/repo.git": "github.com/org/repo", | ||
| "ssh://[email protected]/Org/Repo.git": "github.com/org/repo", | ||
| "https://go.googlesource.com/go": "go.googlesource.com/go", | ||
| "[email protected]:path/to/repo.git": "[email protected]/path/to/repo", | ||
| "[email protected]:ignOre/CASE.git": "gitee.com/ignore/CASE", | ||
| "[email protected]:repo.git": "example.com/repo", | ||
| } | ||
| for repo, want := range tests { | ||
| t.Run(repo, func(t *testing.T) { | ||
| got := gitname.CanonForStorage(repo) | ||
| if got != want { | ||
| t.Fatalf("CanonForStorage() = %q; want %q", got, want) | ||
| } | ||
| }) | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,141 @@ | ||
| package gitname | ||
|
|
||
| import ( | ||
| "net/url" | ||
| "strings" | ||
| ) | ||
|
|
||
| type gitHostHandler struct { | ||
| CheckPath func(string) bool | ||
| CanonScheme string | ||
| CanonPath func(string) string | ||
| KeepUser bool | ||
| EnsureGitExt bool | ||
| } | ||
|
|
||
| // Canon canonicalizes the supplied url for a specific git hosting service | ||
| // based on the configuration in the gitHostHandler. | ||
| func (h *gitHostHandler) Canon(u *url.URL) { | ||
| // Replace the scheme if we have an override. | ||
| if h.CanonScheme != "" { | ||
| u.Scheme = h.CanonScheme | ||
| } | ||
|
|
||
| // Fix the path. | ||
| u.Path = h.CanonPath(u.Path) | ||
|
|
||
| // Strip the user if we should not keep it. | ||
| if !h.KeepUser { | ||
| u.User = nil | ||
| } | ||
|
|
||
| // Ensure the .git extension is always present. | ||
| if h.EnsureGitExt && !strings.HasSuffix(u.Path, ".git") { | ||
| u.Path += ".git" | ||
| } | ||
| } | ||
|
|
||
| // defaultGitHost covers common git hosting services that have a url structure | ||
| // of "example.com/org/repo.git", where "org" and "repo" are case-insensitive. | ||
| var defaultGitHost = &gitHostHandler{ | ||
| CheckPath: checkOrgRepoPath, | ||
| CanonScheme: "https", | ||
| CanonPath: strings.ToLower, | ||
| KeepUser: false, | ||
| EnsureGitExt: true, | ||
| } | ||
|
|
||
| // sensitiveRepoGitHost is similar to defaultGitHost, except it preserves the | ||
| // case on the "repo" part of the URL. | ||
| var sensitiveRepoGitHost = &gitHostHandler{ | ||
| CheckPath: checkOrgRepoPath, | ||
| CanonScheme: "https", | ||
| CanonPath: canonLowerOrgPath, | ||
| KeepUser: false, | ||
| EnsureGitExt: true, | ||
| } | ||
|
|
||
| // googlesourceGitHost is specifically for .googlesource.com git repositories. | ||
| var googlesourceGitHost = &gitHostHandler{ | ||
| CheckPath: checkRepoOnlyPath, | ||
| CanonScheme: "https", | ||
| CanonPath: strings.ToLower, | ||
| KeepUser: false, | ||
| EnsureGitExt: false, | ||
| } | ||
|
|
||
| // gitHosts maps either entire host matches or host suffixes to a gitHostHandler | ||
| // instance. | ||
| // Any key starting with a "." will be checked as a suffix. The order the | ||
| // suffixes are checked is random. | ||
| var gitHosts = map[string]*gitHostHandler{ | ||
| ".googlesource.com": googlesourceGitHost, | ||
| "github.com": defaultGitHost, | ||
| "gitlab.com": defaultGitHost, | ||
| "bitbucket.org": defaultGitHost, | ||
| "codeberg.org": defaultGitHost, | ||
| "gitee.com": sensitiveRepoGitHost, | ||
| "gitee.cn": sensitiveRepoGitHost, | ||
| } | ||
|
|
||
| func handlerForHost(host string) *gitHostHandler { | ||
| if handler, ok := gitHosts[host]; ok { | ||
| // There is a direct match, so return the handler immediately. | ||
| return handler | ||
| } | ||
| for suffix, handler := range gitHosts { | ||
| if suffix[0] != '.' { | ||
| // The suffix must start with a "." to ensure subdomains are | ||
| // matched correctly. | ||
| continue | ||
| } | ||
| if strings.HasSuffix(host, suffix) { | ||
| // The suffix matches the given host, so return the handler. | ||
| return handler | ||
| } | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| // checkRepoOnlyPath ensures that the path being supplied only has one path | ||
| // component. | ||
| func checkRepoOnlyPath(path string) bool { | ||
| return checkPathParts(path, 1) | ||
| } | ||
|
|
||
| // checkOrgRepoPath ensures that the path being supplied only has two path | ||
| // components. | ||
| func checkOrgRepoPath(path string) bool { | ||
| return checkPathParts(path, 2) | ||
| } | ||
|
|
||
| // checkPathParts ensures that path only contains count number of components. | ||
| // Initial slashes are ignored. Component parts must not be empty. | ||
| func checkPathParts(path string, count int) bool { | ||
| tail := strings.TrimLeft(path, "/") | ||
| parts := strings.Split(tail, "/") | ||
| if len(parts) != count { | ||
| return false | ||
| } | ||
| for _, p := range parts { | ||
| if p == "" { | ||
| return false | ||
| } | ||
| } | ||
| return true | ||
| } | ||
|
|
||
| // canonLowerOrgPath lowercases the first path component in the supplied path. | ||
| func canonLowerOrgPath(path string) string { | ||
| parts := strings.Split(path, "/") | ||
| for i := 0; i < len(parts); i++ { | ||
| p := parts[i] | ||
| if len(p) == 0 { | ||
| // Skip empty parts. | ||
| continue | ||
| } | ||
| parts[i] = strings.ToLower(p) | ||
| break | ||
| } | ||
| return strings.Join(parts, "/") | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,109 @@ | ||
| package gitname | ||
|
|
||
| import ( | ||
| "errors" | ||
| "fmt" | ||
| "net/url" | ||
| "slices" | ||
| "strings" | ||
| ) | ||
|
|
||
| // ErrInvalidGitRepo is wrapped by any errors returned by Parse. | ||
| var ErrInvalidGitRepo = errors.New("invalid git repository") | ||
|
|
||
| var validGitRemoteSchemes = []string{ | ||
| "http", | ||
| "https", | ||
| "ssh", | ||
| "git", | ||
| } | ||
|
|
||
| // Parse parses a git repository name into a url.URL. If the name cannot be | ||
| // parsed an error will be returned, and the url will be nil. | ||
| // | ||
| // Both URL and SCP-like git repository names are supported. | ||
| func Parse(name string) (*url.URL, error) { | ||
| u, err := url.Parse(name) | ||
| if err == nil { | ||
| // Apply some further validation to the parsed URL for of the repo. | ||
| err = validateURLRepo(u) | ||
| } | ||
| if err != nil { | ||
| // Assume if we still have an error we *might* have an SSH-based repo. | ||
| u, err = parseSSH(name) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
| } | ||
| if handler := handlerForHost(u.Host); handler != nil && !handler.CheckPath(u.Path) { | ||
| return nil, fmt.Errorf("%w: invalid path for host %q", ErrInvalidGitRepo, u.Host) | ||
| } | ||
| return u, nil | ||
| } | ||
|
|
||
| func parseSSH(name string) (*url.URL, error) { | ||
| // Hunt for the end of an IPv6 address first, to avoid matching the colons | ||
| // in the IPv6 path itself. | ||
| ipv6End := strings.Index(name, "]:") | ||
| pathIdx := 0 | ||
|
|
||
| if ipv6End >= 0 { | ||
| // Skip the separator "]:" | ||
| pathIdx = ipv6End + 2 | ||
| } else { | ||
| i := strings.Index(name, ":") | ||
| if i < 0 { | ||
| return nil, fmt.Errorf("%w: no path separator", ErrInvalidGitRepo) | ||
| } | ||
| // Skip the separator ":" | ||
| pathIdx = i + 1 | ||
| } | ||
|
|
||
| path := name[pathIdx:] | ||
| if len(path) == 0 { | ||
| return nil, fmt.Errorf("%w: empty path", ErrInvalidGitRepo) | ||
| } else if path[0] == '/' { | ||
| return nil, fmt.Errorf("%w: absolute path", ErrInvalidGitRepo) | ||
| } | ||
| // TODO: should we force a ".git" suffix? | ||
|
|
||
| userHost := name[0 : pathIdx-1] | ||
| if len(userHost) == 0 { | ||
| return nil, fmt.Errorf("%w: no user or host", ErrInvalidGitRepo) | ||
| } | ||
|
|
||
| // Build a raw URL string that we parse later from the components of the | ||
| // Git scp-like repository. | ||
| raw := "ssh://" | ||
|
|
||
| userEnd := strings.LastIndex(userHost, "@") | ||
| switch { | ||
| case userEnd == 0: | ||
| return nil, fmt.Errorf("%w: empty user", ErrInvalidGitRepo) | ||
| case userEnd == len(userHost)-1: | ||
| return nil, fmt.Errorf("%w: empty host", ErrInvalidGitRepo) | ||
| default: | ||
| raw += userHost | ||
| } | ||
|
|
||
| raw += "/" + path | ||
| u, err := url.Parse(raw) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("%w: %w", ErrInvalidGitRepo, err) | ||
| } | ||
|
|
||
| return u, nil | ||
| } | ||
|
|
||
| func validateURLRepo(u *url.URL) error { | ||
| if !slices.Contains(validGitRemoteSchemes, u.Scheme) { | ||
| return fmt.Errorf("%w: unsupported git scheme", ErrInvalidGitRepo) | ||
| } | ||
| if u.Host == "" { | ||
| return fmt.Errorf("%w: empty host", ErrInvalidGitRepo) | ||
| } | ||
| if u.Path == "" || u.Path == "/" { | ||
| return fmt.Errorf("%w: empty path", ErrInvalidGitRepo) | ||
| } | ||
| return nil | ||
| } |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess iteration order will be random, if we ever have two suffix patterns that would match.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That is true. I have added a comment above to make this clearer.
For now I am okay with this.