diff --git a/extractor/extractor.go b/extractor/extractor.go index 30978bb7..4329daa5 100644 --- a/extractor/extractor.go +++ b/extractor/extractor.go @@ -23,12 +23,10 @@ import ( "os" "path/filepath" "regexp" - "runtime" "slices" "time" "github.com/google/osv-scalibr/extractor/internal" - "github.com/google/osv-scalibr/extractor/internal/units" "github.com/google/osv-scalibr/log" "github.com/google/osv-scalibr/plugin" "github.com/google/osv-scalibr/purl" @@ -79,8 +77,6 @@ type Config struct { ReadSymlinks bool // Optional: Limit for visited inodes. If 0, no limit is applied. MaxInodes int - // Optional: Logs extractor name and path, which trigger a high memory increase. - LogMemoryUsage bool } // LINT.IfChange @@ -124,17 +120,16 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status, } start := time.Now() wc := walkContext{ - ctx: ctx, - stats: config.Stats, - extractors: config.Extractors, - fs: config.FS, - scanRoot: config.ScanRoot, - dirsToSkip: stringListToMap(config.DirsToSkip), - skipDirRegex: config.SkipDirRegex, - readSymlinks: config.ReadSymlinks, - maxInodes: config.MaxInodes, - logMemoryUsage: config.LogMemoryUsage, - inodesVisited: 0, + ctx: ctx, + stats: config.Stats, + extractors: config.Extractors, + fs: config.FS, + scanRoot: config.ScanRoot, + dirsToSkip: stringListToMap(config.DirsToSkip), + skipDirRegex: config.SkipDirRegex, + readSymlinks: config.ReadSymlinks, + maxInodes: config.MaxInodes, + inodesVisited: 0, lastStatus: time.Now(), @@ -156,16 +151,15 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status, } type walkContext struct { - ctx context.Context - stats stats.Collector - extractors []InventoryExtractor - fs fs.FS - scanRoot string - dirsToSkip map[string]bool // Anything under these paths should be skipped. - skipDirRegex *regexp.Regexp - maxInodes int - inodesVisited int - logMemoryUsage bool + ctx context.Context + stats stats.Collector + extractors []InventoryExtractor + fs fs.FS + scanRoot string + dirsToSkip map[string]bool // Anything under these paths should be skipped. + skipDirRegex *regexp.Regexp + maxInodes int + inodesVisited int // Inventories found. inventory []*Inventory @@ -234,24 +228,8 @@ func (wc *walkContext) handleFile(path string, d fs.DirEntry, fserr error) error wc.mapInodes[internal.ParentDir(filepath.Dir(path), 3)]++ - var before int64 - if wc.logMemoryUsage { - before = internal.MaxResident() * units.KiB - } for _, ex := range wc.extractors { - extractRun := wc.runExtractor(ex, path, s.Mode()) - if wc.logMemoryUsage && extractRun { - // Assuming the Extract function is the memory intense function. If no extract run, we don't - // need to query MaxResident again. - after := internal.MaxResident() * units.KiB - if after > before+5*units.MiB { - runtime.GC() - afterGC := internal.MaxResident() * units.KiB - log.Infof("Memory increase: before: %d, after: %d, after GC: %d extractor: %s path: %s\n", - before, after, afterGC, ex.Name(), path) - } - before = after - } + wc.runExtractor(ex, path, s.Mode()) } return nil } @@ -266,21 +244,21 @@ func (wc *walkContext) shouldSkipDir(path string) bool { return false } -func (wc *walkContext) runExtractor(ex InventoryExtractor, path string, mode fs.FileMode) bool { +func (wc *walkContext) runExtractor(ex InventoryExtractor, path string, mode fs.FileMode) { if !ex.FileRequired(path, mode) { - return false + return } rc, err := wc.fs.Open(path) if err != nil { addErrToMap(wc.errors, ex.Name(), fmt.Errorf("Open(%s): %v", path, err)) - return false + return } defer rc.Close() info, err := rc.Stat() if err != nil { addErrToMap(wc.errors, ex.Name(), fmt.Errorf("stat(%s): %v", path, err)) - return false + return } wc.mapExtracts[internal.ParentDir(filepath.Dir(path), 3)]++ @@ -299,7 +277,6 @@ func (wc *walkContext) runExtractor(ex InventoryExtractor, path string, mode fs. wc.inventory = append(wc.inventory, r) } } - return true } func stringListToMap(paths []string) map[string]bool { diff --git a/extractor/internal/memory_unix.go b/extractor/internal/memory_unix.go deleted file mode 100644 index 7d652883..00000000 --- a/extractor/internal/memory_unix.go +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2024 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package internal - -import ( - "syscall" - - "github.com/google/osv-scalibr/log" -) - -// MaxResident returns the max resident memory. This can be bytes or kilobytes, depending on the -// operating system. -func MaxResident() int64 { - var u syscall.Rusage - err := syscall.Getrusage(syscall.RUSAGE_SELF, &u) - if err != nil { - log.Warnf("Failed to get rusage: %v", err) - return 0 - } - - return u.Maxrss -} diff --git a/extractor/language/javascript/packagejson/extractor.go b/extractor/language/javascript/packagejson/extractor.go index 9ca26958..73d0305d 100644 --- a/extractor/language/javascript/packagejson/extractor.go +++ b/extractor/language/javascript/packagejson/extractor.go @@ -97,9 +97,7 @@ func (e Extractor) FileRequired(path string, _ fs.FileMode) bool { // Extract extracts packages from package.json files passed through the scan input. func (e Extractor) Extract(ctx context.Context, input *extractor.ScanInput) ([]*extractor.Inventory, error) { if input.Info != nil && input.Info.Size() > e.maxJSONSize { - err := fmt.Errorf("package.json file %s is too large: %d", input.Path, input.Info.Size()) - log.Error(err) - return nil, err + return nil, fmt.Errorf("package.json file %s is too large: %d", input.Path, input.Info.Size()) } i, err := parse(input.Path, input.Reader) if err != nil { diff --git a/scalibr.go b/scalibr.go index 80e6ba21..b11d3a96 100644 --- a/scalibr.go +++ b/scalibr.go @@ -54,8 +54,6 @@ type ScanConfig struct { ReadSymlinks bool // Optional: Limit for visited inodes. If 0, no limit is applied. MaxInodes int - // Optional: Logs extractor name and path, which trigger a high memory increase. - LogMemoryUsage bool } // LINT.IfChange @@ -96,7 +94,6 @@ func (Scanner) Scan(ctx context.Context, config *ScanConfig) (sr *ScanResult) { SkipDirRegex: config.SkipDirRegex, ScanRoot: config.ScanRoot, MaxInodes: config.MaxInodes, - LogMemoryUsage: config.LogMemoryUsage, } inventories, extractorStatus, err := extractor.Run(ctx, extractorConfig) sro.Inventories = inventories