Skip to content

Commit

Permalink
Add logging for memory increases
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 627041617
  • Loading branch information
vpasdf authored and copybara-github committed Apr 24, 2024
1 parent 291aaee commit 337dbb0
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 25 deletions.
71 changes: 47 additions & 24 deletions extractor/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ import (
"os"
"path/filepath"
"regexp"
"runtime"
"slices"
"time"

"github.com/google/osv-scalibr/extractor/internal"
"github.com/google/osv-scalibr/extractor/internal/units"
"github.com/google/osv-scalibr/log"
"github.com/google/osv-scalibr/plugin"
"github.com/google/osv-scalibr/purl"
Expand Down Expand Up @@ -77,6 +79,8 @@ type Config struct {
ReadSymlinks bool
// Optional: Limit for visited inodes. If 0, no limit is applied.
MaxInodes int
// Optional: Logs extractor name and path, which trigger a high memory increase.
LogMemoryUsage bool
}

// LINT.IfChange
Expand Down Expand Up @@ -120,16 +124,17 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status,
}
start := time.Now()
wc := walkContext{
ctx: ctx,
stats: config.Stats,
extractors: config.Extractors,
fs: config.FS,
scanRoot: config.ScanRoot,
dirsToSkip: stringListToMap(config.DirsToSkip),
skipDirRegex: config.SkipDirRegex,
readSymlinks: config.ReadSymlinks,
maxInodes: config.MaxInodes,
inodesVisited: 0,
ctx: ctx,
stats: config.Stats,
extractors: config.Extractors,
fs: config.FS,
scanRoot: config.ScanRoot,
dirsToSkip: stringListToMap(config.DirsToSkip),
skipDirRegex: config.SkipDirRegex,
readSymlinks: config.ReadSymlinks,
maxInodes: config.MaxInodes,
logMemoryUsage: config.LogMemoryUsage,
inodesVisited: 0,

lastStatus: time.Now(),

Expand All @@ -151,15 +156,16 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status,
}

type walkContext struct {
ctx context.Context
stats stats.Collector
extractors []InventoryExtractor
fs fs.FS
scanRoot string
dirsToSkip map[string]bool // Anything under these paths should be skipped.
skipDirRegex *regexp.Regexp
maxInodes int
inodesVisited int
ctx context.Context
stats stats.Collector
extractors []InventoryExtractor
fs fs.FS
scanRoot string
dirsToSkip map[string]bool // Anything under these paths should be skipped.
skipDirRegex *regexp.Regexp
maxInodes int
inodesVisited int
logMemoryUsage bool

// Inventories found.
inventory []*Inventory
Expand Down Expand Up @@ -228,8 +234,24 @@ func (wc *walkContext) handleFile(path string, d fs.DirEntry, fserr error) error

wc.mapInodes[internal.ParentDir(filepath.Dir(path), 3)]++

var before int64
if wc.logMemoryUsage {
before = internal.MaxResident() * units.KiB
}
for _, ex := range wc.extractors {
wc.runExtractor(ex, path, s.Mode())
extractRun := wc.runExtractor(ex, path, s.Mode())
if wc.logMemoryUsage && extractRun {
// Assuming the Extract function is the memory intense function. If no extract run, we don't
// need to query MaxResident again.
after := internal.MaxResident() * units.KiB
if after > before+5*units.MiB {
runtime.GC()
afterGC := internal.MaxResident() * units.KiB
log.Infof("Memory increase: before: %d, after: %d, after GC: %d extractor: %s path: %s\n",
before, after, afterGC, ex.Name(), path)
}
before = after
}
}
return nil
}
Expand All @@ -244,21 +266,21 @@ func (wc *walkContext) shouldSkipDir(path string) bool {
return false
}

func (wc *walkContext) runExtractor(ex InventoryExtractor, path string, mode fs.FileMode) {
func (wc *walkContext) runExtractor(ex InventoryExtractor, path string, mode fs.FileMode) bool {
if !ex.FileRequired(path, mode) {
return
return false
}
rc, err := wc.fs.Open(path)
if err != nil {
addErrToMap(wc.errors, ex.Name(), fmt.Errorf("Open(%s): %v", path, err))
return
return false
}
defer rc.Close()

info, err := rc.Stat()
if err != nil {
addErrToMap(wc.errors, ex.Name(), fmt.Errorf("stat(%s): %v", path, err))
return
return false
}

wc.mapExtracts[internal.ParentDir(filepath.Dir(path), 3)]++
Expand All @@ -277,6 +299,7 @@ func (wc *walkContext) runExtractor(ex InventoryExtractor, path string, mode fs.
wc.inventory = append(wc.inventory, r)
}
}
return true
}

func stringListToMap(paths []string) map[string]bool {
Expand Down
34 changes: 34 additions & 0 deletions extractor/internal/memory_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package internal

import (
"syscall"

"github.com/google/osv-scalibr/log"
)

// MaxResident returns the max resident memory. This can be bytes or kilobytes, depending on the
// operating system.
func MaxResident() int64 {
var u syscall.Rusage
err := syscall.Getrusage(syscall.RUSAGE_SELF, &u)
if err != nil {
log.Warnf("Failed to get rusage: %v", err)
return 0
}

return u.Maxrss
}
4 changes: 3 additions & 1 deletion extractor/language/javascript/packagejson/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ func (e Extractor) FileRequired(path string, _ fs.FileMode) bool {
// Extract extracts packages from package.json files passed through the scan input.
func (e Extractor) Extract(ctx context.Context, input *extractor.ScanInput) ([]*extractor.Inventory, error) {
if input.Info != nil && input.Info.Size() > e.maxJSONSize {
return nil, fmt.Errorf("package.json file %s is too large: %d", input.Path, input.Info.Size())
err := fmt.Errorf("package.json file %s is too large: %d", input.Path, input.Info.Size())
log.Error(err)
return nil, err
}
i, err := parse(input.Path, input.Reader)
if err != nil {
Expand Down
3 changes: 3 additions & 0 deletions scalibr.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ type ScanConfig struct {
ReadSymlinks bool
// Optional: Limit for visited inodes. If 0, no limit is applied.
MaxInodes int
// Optional: Logs extractor name and path, which trigger a high memory increase.
LogMemoryUsage bool
}

// LINT.IfChange
Expand Down Expand Up @@ -94,6 +96,7 @@ func (Scanner) Scan(ctx context.Context, config *ScanConfig) (sr *ScanResult) {
SkipDirRegex: config.SkipDirRegex,
ScanRoot: config.ScanRoot,
MaxInodes: config.MaxInodes,
LogMemoryUsage: config.LogMemoryUsage,
}
inventories, extractorStatus, err := extractor.Run(ctx, extractorConfig)
sro.Inventories = inventories
Expand Down

0 comments on commit 337dbb0

Please sign in to comment.