Skip to content

Commit

Permalink
Add the --hash per-requirement options to a metadata field instead of…
Browse files Browse the repository at this point in the history
… to the version.

These flags were previously included in the version, resulting in very large purls.

PiperOrigin-RevId: 640634059
  • Loading branch information
SCALIBR Team authored and copybara-github committed Jun 5, 2024
1 parent c45fdee commit 058884d
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 0 deletions.
21 changes: 21 additions & 0 deletions extractor/filesystem/language/python/requirements/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) ([]
continue
}

// Per-requirement options may be present. We extract the --hash options, and discard the others.
l, hashOptions := splitPerRequirementOptions(l)

l = removeWhiteSpaces(l)
l = ignorePythonSpecifier(l)
l = removeExtras(l)
Expand Down Expand Up @@ -99,10 +102,15 @@ func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) ([]
continue
}

var metadata any
if len(hashOptions) > 0 {
metadata = &Metadata{HashCheckingModeValues: hashOptions}
}
inventory = append(inventory, &extractor.Inventory{
Name: name,
Version: version,
Locations: []string{input.Path},
Metadata: metadata,
})
}

Expand Down Expand Up @@ -153,6 +161,19 @@ func hasEnvVariable(s string) bool {
return regexp.MustCompile(`(?P<var>\$\{(?P<name>[A-Z0-9_]+)\})`).FindString(s) != ""
}

// splitPerRequirementOptions removes from the input all text after the first per requirement option
// and returns the remaining input along with the values of the --hash options. See the documentation
// in https://pip.pypa.io/en/stable/reference/requirements-file-format/#per-requirement-options.
func splitPerRequirementOptions(s string) (string, []string) {
textAfterFirstOptionInclusive := regexp.MustCompile(`(?:--hash|--global-option|--config-settings).*`)
hashOption := regexp.MustCompile(`--hash=(.+?)(?:$|\s)`)
hashes := []string{}
for _, hashOptionMatch := range hashOption.FindAllStringSubmatch(s, -1) {
hashes = append(hashes, hashOptionMatch[1])
}
return textAfterFirstOptionInclusive.ReplaceAllString(s, ""), hashes
}

// ToPURL converts an inventory created by this extractor into a PURL.
func (e Extractor) ToPURL(i *extractor.Inventory) (*purl.PackageURL, error) {
return &purl.PackageURL{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,85 @@ func TestExtract(t *testing.T) {
path: "testdata/invalid.txt",
wantInventory: []*extractor.Inventory{},
},
{
name: "per requirement options",
path: "testdata/per_req_options.txt",
wantInventory: []*extractor.Inventory{
{
// foo1==1.0 --hash=sha256:123
Name: "foo1",
Version: "1.0",
Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123"}},
},
{
// foo2==1.0 --hash=sha256:123 --global-option=foo --config-settings=bar
Name: "foo2",
Version: "1.0",
Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123"}},
},
{
// foo3==1.0 --config-settings=bar --global-option=foo --hash=sha256:123
Name: "foo3",
Version: "1.0",
Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123"}},
},
{
// foo4==1.0 --hash=wrongformatbutok
Name: "foo4",
Version: "1.0",
Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"wrongformatbutok"}},
},
{
// foo5==1.0; python_version < "2.7" --hash=sha256:123
Name: "foo5",
Version: "1.0",
Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123"}},
},
{
// foo6==1.0 --hash=sha256:123 unexpected_text_after_first_option_does_not_stay_around --global-option=foo
Name: "foo6",
Version: "1.0",
Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123"}},
},
{
// foo7==1.0 unexpected_text_before_options_stays_around --hash=sha256:123
Name: "foo7",
Version: "1.0unexpected_text_before_options_stays_around",
Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123"}},
},
{
// foo7==1.0 --hash=sha256:123 --hash=sha256:456
Name: "foo8",
Version: "1.0",
Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123", "sha256:456"}},
},
{
// foo8==1.0 --hash=sha256:123 \
// --hash=sha256:456
Name: "foo9",
Version: "1.0",
Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123", "sha256:456"}},
},

// The following case might be valid, where "--hash=sha256:123" is the version and not a
// per-requirement option, per the grammar in https://peps.python.org/pep-0508/#grammar.
// This is a contrived input, though, so we interpret it as missing the version.
//
// foo10== --hash=sha256:123 --hash=sha256:456

{
// foo11==1.0 --hash=sha256:not_base16_encoded_is_ok_;#
Name: "foo11",
Version: "1.0",
Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:not_base16_encoded_is_ok_;#"}},
},
{
// foo12==1.0 --hash=
Name: "foo12",
Version: "1.0",
},
},
},
}

// fill Location and Extractor
Expand Down Expand Up @@ -172,6 +251,7 @@ func TestToPURL(t *testing.T) {
Name: "Name",
Version: "1.2.3",
Locations: []string{"location"},
Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123xyz"}},
}
want := &purl.PackageURL{
Type: purl.TypePyPi,
Expand Down
22 changes: 22 additions & 0 deletions extractor/filesystem/language/python/requirements/metadata.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package requirements

// Metadata contains additional information from a package in a requirements file.
type Metadata struct {
// The values from the --hash flags, as in https://pip.pypa.io/en/stable/topics/secure-installs/#hash-checking-mode.
// These are the hashes of the distributions of the package.
HashCheckingModeValues []string
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
foo1==1.0 --hash=sha256:123

foo2==1.0 --hash=sha256:123 --global-option=foo --config-settings=bar

foo3==1.0 --config-settings=bar --global-option=foo --hash=sha256:123

foo4==1.0 --hash=wrongformatbutok

foo5==1.0; python_version < "2.7" --hash=sha256:123

foo6==1.0 --hash=sha256:123 unexpected_text_after_options_does_not_stay_around

foo7==1.0 unexpected_text_before_options_stays_around --hash=sha256:123

foo8==1.0 --hash=sha256:123 --hash=sha256:456

foo9==1.0 --hash=sha256:123 \
--hash=sha256:456

foo10== --hash=sha256:123 --hash=sha256:123

foo11==1.0 --hash=sha256:not_base16_encoded_is_ok_;#

foo12==1.0 --hash=

0 comments on commit 058884d

Please sign in to comment.