Skip to content

Commit 656d58d

Browse files
DmitriyLewenyutatokoi
authored andcommitted
fix(python): impove package name normalization (#9290)
1 parent 67262cd commit 656d58d

9 files changed

Lines changed: 177 additions & 43 deletions

File tree

pkg/dependency/parser/python/poetry/parse.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ func (p *Parser) parseDependencies(deps map[string]any, pkgVersions map[string][
105105
}
106106

107107
func (p *Parser) parseDependency(name string, versRange any, pkgVersions map[string][]string) (string, error) {
108-
name = python.NormalizePkgName(name)
108+
name = python.NormalizePkgName(name, true)
109109
vers, ok := pkgVersions[name]
110110
if !ok {
111111
return "", xerrors.Errorf("no version found for %q", name)

pkg/dependency/parser/python/pyproject/pyproject.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func (d *Dependencies) UnmarshalTOML(data any) error {
5555
switch deps := data.(type) {
5656
case map[string]any: // For Poetry v1
5757
d.Set = set.New[string](lo.MapToSlice(deps, func(pkgName string, _ any) string {
58-
return python.NormalizePkgName(pkgName)
58+
return python.NormalizePkgName(pkgName, true)
5959
})...)
6060
case []any: // For Poetry v2
6161
d.Set = set.New[string]()
Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,22 @@
11
package python
22

3-
import "strings"
4-
5-
// NormalizePkgName normalizes the package name based on pep-0426
6-
func NormalizePkgName(name string) string {
7-
// The package names don't use `_`, `.` or upper case, but dependency names can contain them.
8-
// We need to normalize those names.
9-
// cf. https://peps.python.org/pep-0426/#name
10-
name = strings.ToLower(name) // e.g. https://github.com/python-poetry/poetry/blob/c8945eb110aeda611cc6721565d7ad0c657d453a/poetry.lock#L819
11-
name = strings.ReplaceAll(name, "_", "-") // e.g. https://github.com/python-poetry/poetry/blob/c8945eb110aeda611cc6721565d7ad0c657d453a/poetry.lock#L50
12-
name = strings.ReplaceAll(name, ".", "-") // e.g. https://github.com/python-poetry/poetry/blob/c8945eb110aeda611cc6721565d7ad0c657d453a/poetry.lock#L816
3+
import (
4+
"regexp"
5+
"strings"
6+
)
7+
8+
var normalizePkgNameRegexp = regexp.MustCompile(`[-_.]+`)
9+
10+
// NormalizePkgName normalizes the package name based on pep-0503 (with the option to disable conversion to lowercase).
11+
// cf. https://peps.python.org/pep-0503/#normalized-names:
12+
// The name should be lowercased with all runs of the characters ., -, or _ replaced with a single - character.
13+
func NormalizePkgName(name string, inLowerCase bool) string {
14+
name = normalizePkgNameRegexp.ReplaceAllString(name, "-")
15+
16+
// pep-0503 requires that all packages names MUST be lowercase.
17+
// But there are cases where the original case should be preserved (e.g. dist-info dir names).
18+
if inLowerCase {
19+
name = strings.ToLower(name)
20+
}
1321
return name
1422
}

pkg/dependency/parser/python/python_test.go

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,30 +10,45 @@ import (
1010

1111
func Test_NormalizePkgName(t *testing.T) {
1212
tests := []struct {
13-
pkgName string
14-
expected string
13+
pkgName string
14+
lowerCase bool
15+
expected string
1516
}{
1617
{
17-
pkgName: "SecretStorage",
18-
expected: "secretstorage",
18+
pkgName: "SecretStorage",
19+
lowerCase: true,
20+
expected: "secretstorage",
1921
},
2022
{
21-
pkgName: "pywin32-ctypes",
22-
expected: "pywin32-ctypes",
23+
pkgName: "SecretStorage",
24+
lowerCase: false,
25+
expected: "SecretStorage",
2326
},
2427
{
25-
pkgName: "jaraco.classes",
26-
expected: "jaraco-classes",
28+
pkgName: "pywin32-ctypes",
29+
lowerCase: true,
30+
expected: "pywin32-ctypes",
2731
},
2832
{
29-
pkgName: "green_gdk",
30-
expected: "green-gdk",
33+
pkgName: "jaraco.classes",
34+
lowerCase: true,
35+
expected: "jaraco-classes",
36+
},
37+
{
38+
pkgName: "green_gdk",
39+
lowerCase: true,
40+
expected: "green-gdk",
41+
},
42+
{
43+
pkgName: "foo--bar__baz",
44+
lowerCase: true,
45+
expected: "foo-bar-baz",
3146
},
3247
}
3348

3449
for _, tt := range tests {
3550
t.Run(tt.pkgName, func(t *testing.T) {
36-
assert.Equal(t, tt.expected, python.NormalizePkgName(tt.pkgName))
51+
assert.Equal(t, tt.expected, python.NormalizePkgName(tt.pkgName, tt.lowerCase))
3752
})
3853
}
3954
}

pkg/fanal/analyzer/language/python/pip/pip.go

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package pip
22

33
import (
44
"context"
5-
"fmt"
65
"io"
76
"io/fs"
87
"os"
@@ -15,6 +14,7 @@ import (
1514
"golang.org/x/xerrors"
1615

1716
goversion "github.com/aquasecurity/go-version/pkg/version"
17+
"github.com/aquasecurity/trivy/pkg/dependency/parser/python"
1818
"github.com/aquasecurity/trivy/pkg/dependency/parser/python/packaging"
1919
"github.com/aquasecurity/trivy/pkg/dependency/parser/python/pip"
2020
"github.com/aquasecurity/trivy/pkg/fanal/analyzer"
@@ -109,19 +109,15 @@ func (a pipLibraryAnalyzer) Version() int {
109109

110110
// pkgLicense parses `METADATA` pkg file to look for licenses
111111
func (a pipLibraryAnalyzer) pkgLicense(pkgName, pkgVer, spDir string) []string {
112-
// METADATA path is `**/site-packages/<pkg_name>-<pkg_version>.dist-info/METADATA`
113-
pkgDir := fmt.Sprintf("%s-%s.dist-info", pkgName, pkgVer)
114-
metadataPath := filepath.Join(spDir, pkgDir, "METADATA")
115-
metadataFile, err := os.Open(metadataPath)
116-
if os.IsNotExist(err) {
117-
a.logger.Debug("No package metadata found", log.String("site-packages", pkgDir),
118-
log.String("name", pkgName), log.String("version", pkgVer))
112+
metadataFile := a.metadataFile(pkgName, pkgVer, spDir)
113+
if metadataFile == nil {
119114
return nil
120115
}
116+
defer metadataFile.Close()
121117

122118
metadataPkg, _, err := a.metadataParser.Parse(metadataFile)
123119
if err != nil {
124-
a.logger.Warn("Unable to parse METADATA file", log.FilePath(metadataPath), log.Err(err))
120+
a.logger.Warn("Unable to parse METADATA file", log.FilePath(metadataFile.Name()), log.Err(err))
125121
return nil
126122
}
127123

@@ -231,3 +227,38 @@ func (a pipLibraryAnalyzer) sortPythonDirs(entries []os.DirEntry) []string {
231227
return "python" + v.String()
232228
})
233229
}
230+
231+
// metadataFile returns METADATA file for package (if exists)
232+
func (a pipLibraryAnalyzer) metadataFile(pkgName, pkgVer, spDir string) *os.File {
233+
pkgDirs := distInfoDirs(pkgName, pkgVer)
234+
for _, pkgDir := range distInfoDirs(pkgName, pkgVer) {
235+
metadataPath := filepath.Join(spDir, pkgDir, "METADATA")
236+
metadataFile, err := os.Open(metadataPath)
237+
if err == nil {
238+
return metadataFile
239+
}
240+
}
241+
242+
a.logger.Debug("No package metadata found", log.String("site-packages", spDir),
243+
log.String("dist-info", strings.Join(pkgDirs, ", ")), log.String("name", pkgName), log.String("version", pkgVer))
244+
return nil
245+
}
246+
247+
// distInfoDir returns normalized dist-info dir name for package
248+
// cf. https://packaging.python.org/en/latest/specifications/recording-installed-packages/#the-dist-info-directory
249+
// e.g. `foo-1.0.dist-info` or `foo_bar-1.0.dist-info`
250+
func distInfoDirs(name, version string) []string {
251+
dirs := []string{
252+
// Any packages don't use lower case.
253+
// e.g. Flask uses `Flask-2.0.1.dist-info`
254+
python.NormalizePkgName(name, false),
255+
python.NormalizePkgName(name, true),
256+
}
257+
258+
for i := range dirs {
259+
dirs[i] = strings.ReplaceAll(dirs[i], "-", "_")
260+
dirs[i] = dirs[i] + "-" + version + ".dist-info"
261+
}
262+
263+
return dirs
264+
}

pkg/fanal/analyzer/language/python/pip/pip_test.go

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,21 +23,21 @@ func Test_pipAnalyzer_Analyze(t *testing.T) {
2323
FilePath: "requirements.txt",
2424
Packages: types.Packages{
2525
{
26-
Name: "click",
27-
Version: "8.0.0",
26+
Name: "annotated-types",
27+
Version: "0.7.0",
2828
Locations: []types.Location{
2929
{
3030
StartLine: 1,
3131
EndLine: 1,
3232
},
3333
},
3434
Licenses: []string{
35-
"BSD License",
35+
"MIT License",
3636
},
3737
},
3838
{
39-
Name: "Flask",
40-
Version: "2.0.0",
39+
Name: "click",
40+
Version: "8.0.0",
4141
Locations: []types.Location{
4242
{
4343
StartLine: 2,
@@ -49,14 +49,27 @@ func Test_pipAnalyzer_Analyze(t *testing.T) {
4949
},
5050
},
5151
{
52-
Name: "itsdangerous",
52+
Name: "Flask",
5353
Version: "2.0.0",
5454
Locations: []types.Location{
5555
{
5656
StartLine: 3,
5757
EndLine: 3,
5858
},
5959
},
60+
Licenses: []string{
61+
"BSD License",
62+
},
63+
},
64+
{
65+
Name: "itsdangerous",
66+
Version: "2.0.0",
67+
Locations: []types.Location{
68+
{
69+
StartLine: 4,
70+
EndLine: 4,
71+
},
72+
},
6073
},
6174
},
6275
},
@@ -100,8 +113,8 @@ func Test_pipAnalyzer_Analyze(t *testing.T) {
100113
FilePath: "requirements.txt",
101114
Packages: types.Packages{
102115
{
103-
Name: "click",
104-
Version: "8.0.0",
116+
Name: "annotated-types",
117+
Version: "0.7.0",
105118
Locations: []types.Location{
106119
{
107120
StartLine: 1,
@@ -110,8 +123,8 @@ func Test_pipAnalyzer_Analyze(t *testing.T) {
110123
},
111124
},
112125
{
113-
Name: "Flask",
114-
Version: "2.0.0",
126+
Name: "click",
127+
Version: "8.0.0",
115128
Locations: []types.Location{
116129
{
117130
StartLine: 2,
@@ -120,7 +133,7 @@ func Test_pipAnalyzer_Analyze(t *testing.T) {
120133
},
121134
},
122135
{
123-
Name: "itsdangerous",
136+
Name: "Flask",
124137
Version: "2.0.0",
125138
Locations: []types.Location{
126139
{
@@ -129,6 +142,16 @@ func Test_pipAnalyzer_Analyze(t *testing.T) {
129142
},
130143
},
131144
},
145+
{
146+
Name: "itsdangerous",
147+
Version: "2.0.0",
148+
Locations: []types.Location{
149+
{
150+
StartLine: 4,
151+
EndLine: 4,
152+
},
153+
},
154+
},
132155
},
133156
},
134157
},

pkg/fanal/analyzer/language/python/pip/testdata/happy/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
annotated-types==0.7.0
12
click==8.0.0
23
Flask==2.0.0
34
itsdangerous==2.0.0
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
Metadata-Version: 2.3
2+
Name: annotated-types
3+
Version: 0.7.0
4+
Summary: Reusable constraint types to use with typing.Annotated
5+
Project-URL: Homepage, https://github.com/annotated-types/annotated-types
6+
Project-URL: Source, https://github.com/annotated-types/annotated-types
7+
Project-URL: Changelog, https://github.com/annotated-types/annotated-types/releases
8+
Author-email: Adrian Garcia Badaracco <[email protected]>, Samuel Colvin <[email protected]>, Zac Hatfield-Dodds <[email protected]>
9+
License-File: LICENSE
10+
Classifier: Development Status :: 4 - Beta
11+
Classifier: Environment :: Console
12+
Classifier: Environment :: MacOS X
13+
Classifier: Intended Audience :: Developers
14+
Classifier: Intended Audience :: Information Technology
15+
Classifier: License :: OSI Approved :: MIT License
16+
Classifier: Operating System :: POSIX :: Linux
17+
Classifier: Operating System :: Unix
18+
Classifier: Programming Language :: Python :: 3 :: Only
19+
Classifier: Programming Language :: Python :: 3.8
20+
Classifier: Programming Language :: Python :: 3.9
21+
Classifier: Programming Language :: Python :: 3.10
22+
Classifier: Programming Language :: Python :: 3.11
23+
Classifier: Programming Language :: Python :: 3.12
24+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
25+
Classifier: Typing :: Typed
26+
Requires-Python: >=3.8
27+
Requires-Dist: typing-extensions>=4.0.0; python_version < '3.9'
28+
Description-Content-Type: text/markdown
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
Metadata-Version: 2.3
2+
Name: annotated-types
3+
Version: 0.7.0
4+
Summary: Reusable constraint types to use with typing.Annotated
5+
Project-URL: Homepage, https://github.com/annotated-types/annotated-types
6+
Project-URL: Source, https://github.com/annotated-types/annotated-types
7+
Project-URL: Changelog, https://github.com/annotated-types/annotated-types/releases
8+
Author-email: Adrian Garcia Badaracco <[email protected]>, Samuel Colvin <[email protected]>, Zac Hatfield-Dodds <[email protected]>
9+
License-File: LICENSE
10+
Classifier: Development Status :: 4 - Beta
11+
Classifier: Environment :: Console
12+
Classifier: Environment :: MacOS X
13+
Classifier: Intended Audience :: Developers
14+
Classifier: Intended Audience :: Information Technology
15+
Classifier: License :: OSI Approved :: MIT License
16+
Classifier: Operating System :: POSIX :: Linux
17+
Classifier: Operating System :: Unix
18+
Classifier: Programming Language :: Python :: 3 :: Only
19+
Classifier: Programming Language :: Python :: 3.8
20+
Classifier: Programming Language :: Python :: 3.9
21+
Classifier: Programming Language :: Python :: 3.10
22+
Classifier: Programming Language :: Python :: 3.11
23+
Classifier: Programming Language :: Python :: 3.12
24+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
25+
Classifier: Typing :: Typed
26+
Requires-Python: >=3.8
27+
Requires-Dist: typing-extensions>=4.0.0; python_version < '3.9'
28+
Description-Content-Type: text/markdown

0 commit comments

Comments
 (0)