Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add logging for memory increases #23

Merged
merged 1 commit into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 47 additions & 24 deletions extractor/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ import (
"os"
"path/filepath"
"regexp"
"runtime"
"slices"
"time"

"github.com/google/osv-scalibr/extractor/internal"
"github.com/google/osv-scalibr/extractor/internal/units"
"github.com/google/osv-scalibr/log"
"github.com/google/osv-scalibr/plugin"
"github.com/google/osv-scalibr/purl"
Expand Down Expand Up @@ -77,6 +79,8 @@ type Config struct {
ReadSymlinks bool
// Optional: Limit for visited inodes. If 0, no limit is applied.
MaxInodes int
// Optional: Logs extractor name and path, which trigger a high memory increase.
LogMemoryUsage bool
}

// LINT.IfChange
Expand Down Expand Up @@ -120,16 +124,17 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status,
}
start := time.Now()
wc := walkContext{
ctx: ctx,
stats: config.Stats,
extractors: config.Extractors,
fs: config.FS,
scanRoot: config.ScanRoot,
dirsToSkip: stringListToMap(config.DirsToSkip),
skipDirRegex: config.SkipDirRegex,
readSymlinks: config.ReadSymlinks,
maxInodes: config.MaxInodes,
inodesVisited: 0,
ctx: ctx,
stats: config.Stats,
extractors: config.Extractors,
fs: config.FS,
scanRoot: config.ScanRoot,
dirsToSkip: stringListToMap(config.DirsToSkip),
skipDirRegex: config.SkipDirRegex,
readSymlinks: config.ReadSymlinks,
maxInodes: config.MaxInodes,
logMemoryUsage: config.LogMemoryUsage,
inodesVisited: 0,

lastStatus: time.Now(),

Expand All @@ -151,15 +156,16 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status,
}

type walkContext struct {
ctx context.Context
stats stats.Collector
extractors []InventoryExtractor
fs fs.FS
scanRoot string
dirsToSkip map[string]bool // Anything under these paths should be skipped.
skipDirRegex *regexp.Regexp
maxInodes int
inodesVisited int
ctx context.Context
stats stats.Collector
extractors []InventoryExtractor
fs fs.FS
scanRoot string
dirsToSkip map[string]bool // Anything under these paths should be skipped.
skipDirRegex *regexp.Regexp
maxInodes int
inodesVisited int
logMemoryUsage bool

// Inventories found.
inventory []*Inventory
Expand Down Expand Up @@ -228,8 +234,24 @@ func (wc *walkContext) handleFile(path string, d fs.DirEntry, fserr error) error

wc.mapInodes[internal.ParentDir(filepath.Dir(path), 3)]++

var before int64
if wc.logMemoryUsage {
before = internal.MaxResident() * units.KiB
}
for _, ex := range wc.extractors {
wc.runExtractor(ex, path, s.Mode())
extractRun := wc.runExtractor(ex, path, s.Mode())
if wc.logMemoryUsage && extractRun {
// Assuming the Extract function is the memory intense function. If no extract run, we don't
// need to query MaxResident again.
after := internal.MaxResident() * units.KiB
if after > before+5*units.MiB {
runtime.GC()
afterGC := internal.MaxResident() * units.KiB
log.Infof("Memory increase: before: %d, after: %d, after GC: %d extractor: %s path: %s\n",
before, after, afterGC, ex.Name(), path)
}
before = after
}
}
return nil
}
Expand All @@ -244,21 +266,21 @@ func (wc *walkContext) shouldSkipDir(path string) bool {
return false
}

func (wc *walkContext) runExtractor(ex InventoryExtractor, path string, mode fs.FileMode) {
func (wc *walkContext) runExtractor(ex InventoryExtractor, path string, mode fs.FileMode) bool {
if !ex.FileRequired(path, mode) {
return
return false
}
rc, err := wc.fs.Open(path)
if err != nil {
addErrToMap(wc.errors, ex.Name(), fmt.Errorf("Open(%s): %v", path, err))
return
return false
}
defer rc.Close()

info, err := rc.Stat()
if err != nil {
addErrToMap(wc.errors, ex.Name(), fmt.Errorf("stat(%s): %v", path, err))
return
return false
}

wc.mapExtracts[internal.ParentDir(filepath.Dir(path), 3)]++
Expand All @@ -277,6 +299,7 @@ func (wc *walkContext) runExtractor(ex InventoryExtractor, path string, mode fs.
wc.inventory = append(wc.inventory, r)
}
}
return true
}

func stringListToMap(paths []string) map[string]bool {
Expand Down
34 changes: 34 additions & 0 deletions extractor/internal/memory_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package internal

import (
"syscall"

"github.com/google/osv-scalibr/log"
)

// MaxResident returns the max resident memory. This can be bytes or kilobytes, depending on the
// operating system.
func MaxResident() int64 {
var u syscall.Rusage
err := syscall.Getrusage(syscall.RUSAGE_SELF, &u)
if err != nil {
log.Warnf("Failed to get rusage: %v", err)
return 0
}

return u.Maxrss
}
4 changes: 3 additions & 1 deletion extractor/language/javascript/packagejson/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ func (e Extractor) FileRequired(path string, _ fs.FileMode) bool {
// Extract extracts packages from package.json files passed through the scan input.
func (e Extractor) Extract(ctx context.Context, input *extractor.ScanInput) ([]*extractor.Inventory, error) {
if input.Info != nil && input.Info.Size() > e.maxJSONSize {
return nil, fmt.Errorf("package.json file %s is too large: %d", input.Path, input.Info.Size())
err := fmt.Errorf("package.json file %s is too large: %d", input.Path, input.Info.Size())
log.Error(err)
return nil, err
}
i, err := parse(input.Path, input.Reader)
if err != nil {
Expand Down
3 changes: 3 additions & 0 deletions scalibr.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ type ScanConfig struct {
ReadSymlinks bool
// Optional: Limit for visited inodes. If 0, no limit is applied.
MaxInodes int
// Optional: Logs extractor name and path, which trigger a high memory increase.
LogMemoryUsage bool
}

// LINT.IfChange
Expand Down Expand Up @@ -94,6 +96,7 @@ func (Scanner) Scan(ctx context.Context, config *ScanConfig) (sr *ScanResult) {
SkipDirRegex: config.SkipDirRegex,
ScanRoot: config.ScanRoot,
MaxInodes: config.MaxInodes,
LogMemoryUsage: config.LogMemoryUsage,
}
inventories, extractorStatus, err := extractor.Run(ctx, extractorConfig)
sro.Inventories = inventories
Expand Down
Loading