Skip to content

Commit

Permalink
Add logging for memory increases
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 627840494
  • Loading branch information
SCALIBR Team authored and copybara-github committed Apr 24, 2024
1 parent f24d408 commit 8fcf720
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 87 deletions.
71 changes: 24 additions & 47 deletions extractor/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,10 @@ import (
"os"
"path/filepath"
"regexp"
"runtime"
"slices"
"time"

"github.com/google/osv-scalibr/extractor/internal"
"github.com/google/osv-scalibr/extractor/internal/units"
"github.com/google/osv-scalibr/log"
"github.com/google/osv-scalibr/plugin"
"github.com/google/osv-scalibr/purl"
Expand Down Expand Up @@ -79,8 +77,6 @@ type Config struct {
ReadSymlinks bool
// Optional: Limit for visited inodes. If 0, no limit is applied.
MaxInodes int
// Optional: Logs extractor name and path, which trigger a high memory increase.
LogMemoryUsage bool
}

// LINT.IfChange
Expand Down Expand Up @@ -124,17 +120,16 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status,
}
start := time.Now()
wc := walkContext{
ctx: ctx,
stats: config.Stats,
extractors: config.Extractors,
fs: config.FS,
scanRoot: config.ScanRoot,
dirsToSkip: stringListToMap(config.DirsToSkip),
skipDirRegex: config.SkipDirRegex,
readSymlinks: config.ReadSymlinks,
maxInodes: config.MaxInodes,
logMemoryUsage: config.LogMemoryUsage,
inodesVisited: 0,
ctx: ctx,
stats: config.Stats,
extractors: config.Extractors,
fs: config.FS,
scanRoot: config.ScanRoot,
dirsToSkip: stringListToMap(config.DirsToSkip),
skipDirRegex: config.SkipDirRegex,
readSymlinks: config.ReadSymlinks,
maxInodes: config.MaxInodes,
inodesVisited: 0,

lastStatus: time.Now(),

Expand All @@ -156,16 +151,15 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status,
}

type walkContext struct {
ctx context.Context
stats stats.Collector
extractors []InventoryExtractor
fs fs.FS
scanRoot string
dirsToSkip map[string]bool // Anything under these paths should be skipped.
skipDirRegex *regexp.Regexp
maxInodes int
inodesVisited int
logMemoryUsage bool
ctx context.Context
stats stats.Collector
extractors []InventoryExtractor
fs fs.FS
scanRoot string
dirsToSkip map[string]bool // Anything under these paths should be skipped.
skipDirRegex *regexp.Regexp
maxInodes int
inodesVisited int

// Inventories found.
inventory []*Inventory
Expand Down Expand Up @@ -234,24 +228,8 @@ func (wc *walkContext) handleFile(path string, d fs.DirEntry, fserr error) error

wc.mapInodes[internal.ParentDir(filepath.Dir(path), 3)]++

var before int64
if wc.logMemoryUsage {
before = internal.MaxResident() * units.KiB
}
for _, ex := range wc.extractors {
extractRun := wc.runExtractor(ex, path, s.Mode())
if wc.logMemoryUsage && extractRun {
// Assuming the Extract function is the memory intense function. If no extract run, we don't
// need to query MaxResident again.
after := internal.MaxResident() * units.KiB
if after > before+5*units.MiB {
runtime.GC()
afterGC := internal.MaxResident() * units.KiB
log.Infof("Memory increase: before: %d, after: %d, after GC: %d extractor: %s path: %s\n",
before, after, afterGC, ex.Name(), path)
}
before = after
}
wc.runExtractor(ex, path, s.Mode())
}
return nil
}
Expand All @@ -266,21 +244,21 @@ func (wc *walkContext) shouldSkipDir(path string) bool {
return false
}

func (wc *walkContext) runExtractor(ex InventoryExtractor, path string, mode fs.FileMode) bool {
func (wc *walkContext) runExtractor(ex InventoryExtractor, path string, mode fs.FileMode) {
if !ex.FileRequired(path, mode) {
return false
return
}
rc, err := wc.fs.Open(path)
if err != nil {
addErrToMap(wc.errors, ex.Name(), fmt.Errorf("Open(%s): %v", path, err))
return false
return
}
defer rc.Close()

info, err := rc.Stat()
if err != nil {
addErrToMap(wc.errors, ex.Name(), fmt.Errorf("stat(%s): %v", path, err))
return false
return
}

wc.mapExtracts[internal.ParentDir(filepath.Dir(path), 3)]++
Expand All @@ -299,7 +277,6 @@ func (wc *walkContext) runExtractor(ex InventoryExtractor, path string, mode fs.
wc.inventory = append(wc.inventory, r)
}
}
return true
}

func stringListToMap(paths []string) map[string]bool {
Expand Down
34 changes: 0 additions & 34 deletions extractor/internal/memory_unix.go

This file was deleted.

4 changes: 1 addition & 3 deletions extractor/language/javascript/packagejson/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,7 @@ func (e Extractor) FileRequired(path string, _ fs.FileMode) bool {
// Extract extracts packages from package.json files passed through the scan input.
func (e Extractor) Extract(ctx context.Context, input *extractor.ScanInput) ([]*extractor.Inventory, error) {
if input.Info != nil && input.Info.Size() > e.maxJSONSize {
err := fmt.Errorf("package.json file %s is too large: %d", input.Path, input.Info.Size())
log.Error(err)
return nil, err
return nil, fmt.Errorf("package.json file %s is too large: %d", input.Path, input.Info.Size())
}
i, err := parse(input.Path, input.Reader)
if err != nil {
Expand Down
3 changes: 0 additions & 3 deletions scalibr.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ type ScanConfig struct {
ReadSymlinks bool
// Optional: Limit for visited inodes. If 0, no limit is applied.
MaxInodes int
// Optional: Logs extractor name and path, which trigger a high memory increase.
LogMemoryUsage bool
}

// LINT.IfChange
Expand Down Expand Up @@ -96,7 +94,6 @@ func (Scanner) Scan(ctx context.Context, config *ScanConfig) (sr *ScanResult) {
SkipDirRegex: config.SkipDirRegex,
ScanRoot: config.ScanRoot,
MaxInodes: config.MaxInodes,
LogMemoryUsage: config.LogMemoryUsage,
}
inventories, extractorStatus, err := extractor.Run(ctx, extractorConfig)
sro.Inventories = inventories
Expand Down

0 comments on commit 8fcf720

Please sign in to comment.