diff --git a/artifact/image/layerscanning/image/file_node.go b/artifact/image/layerscanning/image/file_node.go index fbf1e7b6..0071f4ca 100644 --- a/artifact/image/layerscanning/image/file_node.go +++ b/artifact/image/layerscanning/image/file_node.go @@ -18,6 +18,7 @@ import ( "io/fs" "os" "path" + "path/filepath" ) const ( @@ -36,6 +37,7 @@ type fileNode struct { originLayerID string isWhiteout bool virtualPath string + targetPath string mode fs.FileMode file *os.File } @@ -94,7 +96,7 @@ func (f *fileNode) Close() error { // RealFilePath returns the real file path of the fileNode. This is the concatenation of the // root image extract directory, origin layer ID, and the virtual path. func (f *fileNode) RealFilePath() string { - return path.Join(f.extractDir, f.originLayerID, f.virtualPath) + return filepath.Join(f.extractDir, f.originLayerID, filepath.FromSlash(f.virtualPath)) } // ======================================================== diff --git a/artifact/image/layerscanning/image/file_node_test.go b/artifact/image/layerscanning/image/file_node_test.go index 277f5c9f..8e56ebff 100644 --- a/artifact/image/layerscanning/image/file_node_test.go +++ b/artifact/image/layerscanning/image/file_node_test.go @@ -19,6 +19,7 @@ import ( "io/fs" "os" "path" + "path/filepath" "testing" "github.com/google/go-cmp/cmp" @@ -435,22 +436,22 @@ func TestRealFilePath(t *testing.T) { { name: "root directory", node: rootDirectory, - want: "/tmp/extract/layer1", + want: filepath.FromSlash("/tmp/extract/layer1"), }, { name: "root file", node: rootFile, - want: "/tmp/extract/layer1/bar", + want: filepath.FromSlash("/tmp/extract/layer1/bar"), }, { name: "non-root file", node: nonRootFile, - want: "/tmp/extract/layer1/dir1/foo", + want: filepath.FromSlash("/tmp/extract/layer1/dir1/foo"), }, { name: "non-root directory", node: nonRootDirectory, - want: "/tmp/extract/layer1/dir1/dir2", + want: filepath.FromSlash("/tmp/extract/layer1/dir1/dir2"), }, } diff --git a/artifact/image/layerscanning/image/image.go b/artifact/image/layerscanning/image/image.go index e797d34d..5b708ccf 100644 --- a/artifact/image/layerscanning/image/image.go +++ b/artifact/image/layerscanning/image/image.go @@ -33,7 +33,9 @@ import ( "github.com/google/go-containerregistry/pkg/v1/tarball" scalibrImage "github.com/google/osv-scalibr/artifact/image" "github.com/google/osv-scalibr/artifact/image/pathtree" + "github.com/google/osv-scalibr/artifact/image/symlink" "github.com/google/osv-scalibr/artifact/image/whiteout" + "github.com/google/osv-scalibr/log" ) const ( @@ -46,6 +48,8 @@ var ( // ErrFileReadLimitExceeded is returned when a file exceeds the read limit. This is intended to // prevent zip bomb attacks, for example. ErrFileReadLimitExceeded = errors.New("file exceeds read limit") + // ErrSymlinkPointsOutsideRoot is returned when a symlink points outside the root. + ErrSymlinkPointsOutsideRoot = errors.New("symlink points outside the root") ) // ======================================================== @@ -278,7 +282,7 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay // Some tools prepend everything with "./", so if we don't Clean the // name, we may have duplicate entries, which angers tar-split. // Using path instead of filepath to keep `/` and deterministic behavior - cleanedFilePath := path.Clean(header.Name) + cleanedFilePath := path.Clean(filepath.ToSlash(header.Name)) // Prevent "Zip Slip" if strings.HasPrefix(cleanedFilePath, "../") { @@ -320,72 +324,112 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay // realFilePath is where the file will be written to disk. filepath.Clean first to convert // to OS specific file path. // TODO: b/377553499 - Escape invalid characters on windows that's valid on linux - realFilePath := filepath.Join(dirPath, filepath.Clean(cleanedFilePath)) + // realFilePath := filepath.Join(dirPath, filepath.Clean(cleanedFilePath)) + realFilePath := filepath.Join(dirPath, filepath.FromSlash(cleanedFilePath)) - var fileMode fs.FileMode - // Write out the file/dir to disk. + var newNode *fileNode switch header.Typeflag { case tar.TypeDir: - fileMode, err = img.handleDir(realFilePath, tarReader, header) - if err != nil { - return fmt.Errorf("failed to handle directory: %w", err) - } - + newNode, err = img.handleDir(realFilePath, virtualPath, originLayerID, tarReader, header, tombstone) + case tar.TypeReg: + newNode, err = img.handleFile(realFilePath, virtualPath, originLayerID, tarReader, header, tombstone) + case tar.TypeSymlink, tar.TypeLink: + newNode, err = img.handleSymlink(realFilePath, virtualPath, originLayerID, tarReader, header, tombstone) default: - // TODO: b/374769529 - Handle symlinks. - // Assume if it's not a directory, it's a normal file. - fileMode, err = img.handleFile(realFilePath, tarReader, header) - if err != nil { - return fmt.Errorf("failed to handle file: %w", err) - } + log.Warnf("unsupported file type: %v, path: %s", header.Typeflag, header.Name) + continue + } + + if err != nil { + return fmt.Errorf("failed to handle tar entry with path %s: %w", virtualPath, err) } // In each outer loop, a layer is added to each relevant output chainLayer slice. Because the // outer loop is looping backwards (latest layer first), we ignore any files that are already in // each chainLayer, as they would have been overwritten. - fillChainLayersWithVirtualPath(img, chainLayersToFill, originLayerID, virtualPath, tombstone, fileMode) + fillChainLayersWithFileNode(chainLayersToFill, newNode) } - return nil } +// handleSymlink returns the symlink header mode. Symlinks are handled by creating a fileNode with +// the symlink mode with additional metadata. +func (img *Image) handleSymlink(realFilePath, virtualPath, originLayerID string, tarReader *tar.Reader, header *tar.Header, isWhiteout bool) (*fileNode, error) { + targetPath := filepath.ToSlash(header.Linkname) + if targetPath == "" { + return nil, fmt.Errorf("symlink header has no target path") + } + + if symlink.TargetOutsideRoot(virtualPath, targetPath) { + log.Warnf("Found symlink that points outside the root, skipping: %q -> %q", virtualPath, targetPath) + return nil, fmt.Errorf("%w: %q -> %q", ErrSymlinkPointsOutsideRoot, virtualPath, targetPath) + } + + // Resolve the relative symlink path to an absolute path. + if !path.IsAbs(targetPath) { + targetPath = path.Clean(path.Join(path.Dir(virtualPath), targetPath)) + } + + return &fileNode{ + extractDir: img.ExtractDir, + originLayerID: originLayerID, + virtualPath: virtualPath, + targetPath: targetPath, + isWhiteout: isWhiteout, + mode: fs.FileMode(header.Mode) | fs.ModeSymlink, + }, nil +} + // handleDir creates the directory specified by path, if it doesn't exist. -func (img *Image) handleDir(path string, tarReader *tar.Reader, header *tar.Header) (fs.FileMode, error) { - if _, err := os.Stat(path); err != nil { - if err := os.MkdirAll(path, dirPermission); err != nil { - return 0, fmt.Errorf("failed to create directory with path %s: %w", path, err) +func (img *Image) handleDir(realFilePath, virtualPath, originLayerID string, tarReader *tar.Reader, header *tar.Header, isWhiteout bool) (*fileNode, error) { + if _, err := os.Stat(realFilePath); err != nil { + if err := os.MkdirAll(realFilePath, dirPermission); err != nil { + return nil, fmt.Errorf("failed to create directory with realFilePath %s: %w", realFilePath, err) } } - return fs.FileMode(header.Mode) | fs.ModeDir, nil + return &fileNode{ + extractDir: img.ExtractDir, + originLayerID: originLayerID, + virtualPath: virtualPath, + isWhiteout: isWhiteout, + mode: fs.FileMode(header.Mode) | fs.ModeDir, + }, nil } // handleFile creates the file specified by path, and then copies the contents of the tarReader into // the file. -func (img *Image) handleFile(path string, tarReader *tar.Reader, header *tar.Header) (fs.FileMode, error) { +func (img *Image) handleFile(realFilePath, virtualPath, originLayerID string, tarReader *tar.Reader, header *tar.Header, isWhiteout bool) (*fileNode, error) { // Write all files as read/writable by the current user, inaccessible by anyone else // Actual permission bits are stored in FileNode - f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, filePermission) + f, err := os.OpenFile(realFilePath, os.O_CREATE|os.O_RDWR, filePermission) if err != nil { - return 0, err + return nil, err } defer f.Close() numBytes, err := io.Copy(f, io.LimitReader(tarReader, img.maxFileBytes)) if numBytes >= img.maxFileBytes || errors.Is(err, io.EOF) { - return 0, ErrFileReadLimitExceeded + return nil, ErrFileReadLimitExceeded } if err != nil { - return 0, fmt.Errorf("unable to copy file: %w", err) + return nil, fmt.Errorf("unable to copy file: %w", err) } - return fs.FileMode(header.Mode), nil + return &fileNode{ + extractDir: img.ExtractDir, + originLayerID: originLayerID, + virtualPath: virtualPath, + isWhiteout: isWhiteout, + mode: fs.FileMode(header.Mode), + }, nil } -// fillChainLayersWithVirtualPath fills the chain layers with the virtual path. -func fillChainLayersWithVirtualPath(img *Image, chainLayers []*chainLayer, originLayerID, virtualPath string, isWhiteout bool, fileMode fs.FileMode) { - for _, chainLayer := range chainLayers { +// fillChainLayersWithFileNode fills the chain layers with a new fileNode. +func fillChainLayersWithFileNode(chainLayersToFill []*chainLayer, newNode *fileNode) { + virtualPath := newNode.virtualPath + for _, chainLayer := range chainLayersToFill { if node := chainLayer.fileNodeTree.Get(virtualPath); node != nil { // A newer version of the file already exists on a later chainLayer. // Since we do not want to overwrite a later layer with information @@ -393,21 +437,15 @@ func fillChainLayersWithVirtualPath(img *Image, chainLayers []*chainLayer, origi continue } - // check for a whited out parent directory + // Check for a whited out parent directory. if inWhiteoutDir(chainLayer, virtualPath) { - // The entire directory has been deleted, so no need to save this file + // The entire directory has been deleted, so no need to save this file. continue } // Add the file to the chain layer. If there is an error, then we fail open. // TODO: b/379154069 - Add logging for fail open errors. - chainLayer.fileNodeTree.Insert(virtualPath, &fileNode{ - extractDir: img.ExtractDir, - originLayerID: originLayerID, - virtualPath: virtualPath, - isWhiteout: isWhiteout, - mode: fileMode, - }) + chainLayer.fileNodeTree.Insert(virtualPath, newNode) } } diff --git a/artifact/image/layerscanning/image/image_test.go b/artifact/image/layerscanning/image/image_test.go index f58a92b3..44d47070 100644 --- a/artifact/image/layerscanning/image/image_test.go +++ b/artifact/image/layerscanning/image/image_test.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "io" + "io/fs" "path/filepath" "strings" "testing" @@ -98,6 +99,8 @@ func (fakeV1Image *fakeV1Image) LayerByDiffID(v1.Hash) (v1.Layer, error) { } // Testing plan: +// +// Basic Cases: // 1. Create a scratch image with a single layer that adds a file. There should be one layer with // one file. // 2. Create a scratch image with two layers. The first layer adds a file, the second layer adds a @@ -111,13 +114,23 @@ func (fakeV1Image *fakeV1Image) LayerByDiffID(v1.Hash) (v1.Layer, error) { // 6. Create a scratch image with three layers. The first layer adds file X, the second layer // deletes file X, and the third layer adds file X back. The second chain layer should // have no files, and the third chain layer should have file X. +// +// Symlink Cases: +// 1. Create a scratch image with one layer. The layer contains a regular file and two symlinks (A +// and B). The symlink chain is A -> B -> file. +// 2. Create an image that has a symlink chain whose size is greater than the max symlink depth. +// An error should be returned. +// 3. Create an image that has a symlink cycle. An error should be returned. +// 4. Create an image that has a dangling symlink. +// 5. Create an image that has a symlink that point to a file outside of the virtual root. func TestFromTarball(t *testing.T) { tests := []struct { - name string - tarPath string - config *Config - wantChainLayerEntries []chainLayerEntries - wantErr error + name string + tarPath string + config *Config + wantChainLayerEntries []chainLayerEntries + wantErrDuringImageCreation error + wantErrWhileReadingFiles error }{ { name: "image with one file", @@ -133,7 +146,6 @@ func TestFromTarball(t *testing.T) { }, }, }, - wantErr: nil, }, { name: "image with two files", @@ -161,7 +173,6 @@ func TestFromTarball(t *testing.T) { }, }, }, - wantErr: nil, }, { name: "second layer overwrites file with different content", @@ -185,7 +196,6 @@ func TestFromTarball(t *testing.T) { }, }, }, - wantErr: nil, }, { name: "second layer deletes file", @@ -212,7 +222,6 @@ func TestFromTarball(t *testing.T) { filepathContentPairs: []filepathContentPair{}, }, }, - wantErr: nil, }, { name: "multiple files and directories added across layers", @@ -245,7 +254,6 @@ func TestFromTarball(t *testing.T) { }, }, }, - wantErr: nil, }, { name: "file is deleted and later added back", @@ -280,7 +288,6 @@ func TestFromTarball(t *testing.T) { }, }, }, - wantErr: nil, }, { name: "image with file surpassing max file size", @@ -288,7 +295,125 @@ func TestFromTarball(t *testing.T) { config: &Config{ MaxFileBytes: 1, }, - wantErr: ErrFileReadLimitExceeded, + wantErrDuringImageCreation: ErrFileReadLimitExceeded, + }, + { + name: "image with relative, absolute, and chain symlinks", + tarPath: filepath.Join(testdataDir, "symlink-basic.tar"), + config: DefaultConfig(), + wantChainLayerEntries: []chainLayerEntries{ + { + filepathContentPairs: []filepathContentPair{ + { + filepath: "dir1/sample.txt", + content: "sample text\n", + }, + { + filepath: "dir1/absolute-symlink.txt", + content: "sample text\n", + }, + { + filepath: "dir1/relative-dot-symlink.txt", + content: "sample text\n", + }, + { + filepath: "dir1/relative-symlink.txt", + content: "sample text\n", + }, + { + filepath: "dir1/chain-symlink.txt", + content: "sample text\n", + }, + }, + }, + }, + }, + { + name: "image with symlink cycle", + tarPath: filepath.Join(testdataDir, "symlink-cycle.tar"), + config: DefaultConfig(), + wantChainLayerEntries: []chainLayerEntries{ + { + filepathContentPairs: []filepathContentPair{ + { + filepath: "dir1/sample.txt", + }, + { + filepath: "dir1/absolute-symlink.txt", + }, + { + filepath: "dir1/chain-symlink.txt", + }, + }, + }, + }, + wantErrWhileReadingFiles: ErrSymlinkCycle, + }, + { + name: "image with symlink depth exceeded", + tarPath: filepath.Join(testdataDir, "symlink-depth-exceeded.tar"), + config: DefaultConfig(), + wantChainLayerEntries: []chainLayerEntries{ + { + filepathContentPairs: []filepathContentPair{ + { + filepath: "dir1/symlink7.txt", + content: "sample text\n", + }, + }, + }, + }, + wantErrWhileReadingFiles: ErrSymlinkDepthExceeded, + }, + { + name: "image with dangling symlinks", + tarPath: filepath.Join(testdataDir, "symlink-dangling.tar"), + config: DefaultConfig(), + wantChainLayerEntries: []chainLayerEntries{ + { + filepathContentPairs: []filepathContentPair{ + { + filepath: "dir1/absolute-symlink.txt", + content: "sample text\n", + }, + { + filepath: "dir1/relative-dot-symlink.txt", + content: "sample text\n", + }, + { + filepath: "dir1/relative-symlink.txt", + content: "sample text\n", + }, + { + filepath: "dir1/chain-symlink.txt", + content: "sample text\n", + }, + }, + }, + { + filepathContentPairs: []filepathContentPair{ + { + filepath: "dir2/dir3/relative-subfolder-symlink.txt", + content: "sample text\n", + }, + { + filepath: "dir2/dir3/absolute-subfolder-symlink.txt", + content: "sample text\n", + }, + { + filepath: "dir2/dir3/absolute-chain-symlink.txt", + content: "sample text\n", + }, + }, + }, + }, + wantErrWhileReadingFiles: fs.ErrNotExist, + }, + { + name: "image with symlink pointing outside of root", + tarPath: filepath.Join(testdataDir, "symlink-attack.tar"), + config: DefaultConfig(), + wantErrDuringImageCreation: ErrSymlinkPointsOutsideRoot, }, } @@ -297,19 +422,18 @@ func TestFromTarball(t *testing.T) { gotImage, gotErr := FromTarball(tc.tarPath, tc.config) defer gotImage.CleanUp() - if tc.wantErr != nil { - if errors.Is(gotErr, tc.wantErr) { + if tc.wantErrDuringImageCreation != nil { + if errors.Is(gotErr, tc.wantErrDuringImageCreation) { return } - t.Fatalf("Load(%v) returned error: %v, want error: %v", tc.tarPath, gotErr, tc.wantErr) + t.Fatalf("FromTarball(%v) returned error: %v, want error: %v", tc.tarPath, gotErr, tc.wantErrDuringImageCreation) } if gotErr != nil { - t.Fatalf("Load(%v) returned unexpected error: %v", tc.tarPath, gotErr) + t.Fatalf("FromTarball(%v) returned unexpected error: %v", tc.tarPath, gotErr) } chainLayers, err := gotImage.ChainLayers() - if err != nil { t.Fatalf("ChainLayers() returned error: %v", err) } @@ -326,7 +450,7 @@ func TestFromTarball(t *testing.T) { continue } - compareChainLayerEntries(t, chainLayer, wantChainLayerEntries) + compareChainLayerEntries(t, chainLayer, wantChainLayerEntries, tc.wantErrWhileReadingFiles) } }) } @@ -385,23 +509,35 @@ func TestFromV1Image(t *testing.T) { // compareChainLayerEntries compares the files in a chain layer to the expected files in the // chainLayerEntries. -func compareChainLayerEntries(t *testing.T, gotChainLayer image.ChainLayer, wantChainLayerEntries chainLayerEntries) { +func compareChainLayerEntries(t *testing.T, gotChainLayer image.ChainLayer, wantChainLayerEntries chainLayerEntries, wantErrWhileReadingFiles error) { t.Helper() - chainfs := gotChainLayer.FS() for _, filepathContentPair := range wantChainLayerEntries.filepathContentPairs { - gotFile, err := chainfs.Open(filepathContentPair.filepath) - if err != nil { - t.Fatalf("Open(%v) returned error: %v", filepathContentPair.filepath, err) - } - contentBytes, err := io.ReadAll(gotFile) - if err != nil { - t.Fatalf("ReadAll(%v) returned error: %v", filepathContentPair.filepath, err) - } - gotContent := string(contentBytes[:]) - if diff := cmp.Diff(gotContent, filepathContentPair.content); diff != "" { - t.Errorf("Open(%v) returned incorrect content: got %s, want %s", filepathContentPair.filepath, gotContent, filepathContentPair.content) - } + func() { + gotFile, gotErr := chainfs.Open(filepathContentPair.filepath) + if wantErrWhileReadingFiles != nil { + if errors.Is(gotErr, wantErrWhileReadingFiles) { + return + } + t.Fatalf("Open(%v) returned error: %v", filepathContentPair.filepath, gotErr) + } + + if gotErr != nil { + t.Fatalf("Open(%v) returned unexpected error: %v", filepathContentPair.filepath, gotErr) + } + + defer gotFile.Close() + + contentBytes, err := io.ReadAll(gotFile) + if err != nil { + t.Fatalf("ReadAll(%v) returned error: %v", filepathContentPair.filepath, err) + } + + gotContent := string(contentBytes[:]) + if diff := cmp.Diff(gotContent, filepathContentPair.content); diff != "" { + t.Errorf("Open(%v) returned incorrect content: got \"%s\", want \"%s\"", filepathContentPair.filepath, gotContent, filepathContentPair.content) + } + }() } } diff --git a/artifact/image/layerscanning/image/layer.go b/artifact/image/layerscanning/image/layer.go index f8834bc2..5faa961b 100644 --- a/artifact/image/layerscanning/image/layer.go +++ b/artifact/image/layerscanning/image/layer.go @@ -33,6 +33,13 @@ var ( ErrDiffIDMissingFromLayer = errors.New("failed to get diffID from v1 layer") // ErrUncompressedReaderMissingFromLayer is returned when the uncompressed reader is missing from a v1 layer. ErrUncompressedReaderMissingFromLayer = errors.New("failed to get uncompressed reader from v1 layer") + // ErrSymlinkDepthExceeded is returned when the symlink depth is exceeded. + ErrSymlinkDepthExceeded = errors.New("symlink depth exceeded") + // ErrSymlinkCycle is returned when a symlink cycle is found. + ErrSymlinkCycle = errors.New("symlink cycle found") + + // DefaultMaxSymlinkDepth is the default maximum symlink depth. + DefaultMaxSymlinkDepth = 6 ) // ======================================================== @@ -109,7 +116,8 @@ type chainLayer struct { func (chainLayer *chainLayer) FS() scalibrfs.FS { // root should be "/" given we are dealing with file paths. return &FS{ - tree: chainLayer.fileNodeTree, + tree: chainLayer.fileNodeTree, + maxSymlinkDepth: DefaultMaxSymlinkDepth, } } @@ -129,7 +137,55 @@ func (chainLayer *chainLayer) Layer() image.Layer { // FS implements the scalibrfs.FS interface that will be used when scanning for inventory. type FS struct { - tree *pathtree.Node[fileNode] + tree *pathtree.Node[fileNode] + maxSymlinkDepth int +} + +// resolveSymlink resolves a symlink by following the target path. It returns the resolved file +// node or an error if the symlink depth is exceeded or a cycle is found. An ErrSymlinkDepthExceeded +// error if the symlink depth is exceeded or an ErrSymlinkCycle error if a cycle is +// found. Whichever error is encountered first is returned. The cycle detection leverages Floyd's +// tortoise and hare algorithm, which utilizes a slow and fast pointer. +func (chainfs FS) resolveSymlink(node *fileNode, depth int) (*fileNode, error) { + // slowNode is used to keep track of the slow moving node. + slowNode := node + advanceSlowNode := false + initialSymlinkPath := node.virtualPath + + var err error + for { + if depth < 0 { + return nil, ErrSymlinkDepthExceeded + } + + isSymlink := node.mode&fs.ModeSymlink != 0 + if !isSymlink { + return node, nil + } + + // Move on to the next fileNode. + node, err = chainfs.getFileNode(node.targetPath) + if err != nil { + return nil, err + } + + // If the slowNode is the same as the current node, then a cycle is found since the node caught + // up to the slowNode. + if node == slowNode { + return nil, fmt.Errorf("%w, initial symlink: %s", ErrSymlinkCycle, initialSymlinkPath) + } + + if advanceSlowNode { + slowNode, err = chainfs.getFileNode(slowNode.targetPath) + if err != nil { + fmt.Println("failed to get SLOW node") + return nil, err + } + } + + advanceSlowNode = !advanceSlowNode + depth-- + } } // Open opens a file from the virtual filesystem. @@ -138,24 +194,47 @@ func (chainfs FS) Open(name string) (fs.File, error) { if err != nil { return nil, fmt.Errorf("failed to get file node to open %s: %w", name, err) } - return fileNode, nil + + resolvedNode, err := chainfs.resolveSymlink(fileNode, chainfs.maxSymlinkDepth) + if err != nil { + return nil, fmt.Errorf("failed to resolve symlink for file node %s: %w", fileNode.virtualPath, err) + } + + fmt.Printf("resolved path: %s\n", resolvedNode.virtualPath) + return resolvedNode, nil } // Stat returns a FileInfo object describing the file found at name. func (chainfs *FS) Stat(name string) (fs.FileInfo, error) { - fileNode, err := chainfs.getFileNode(name) + node, err := chainfs.getFileNode(name) if err != nil { return nil, fmt.Errorf("failed to get file node to stat %s: %w", name, err) } - return fileNode.Stat() + + resolvedNode, err := chainfs.resolveSymlink(node, chainfs.maxSymlinkDepth) + if err != nil { + return nil, fmt.Errorf("failed to resolve symlink for file node %s: %w", node.virtualPath, err) + } + return resolvedNode.Stat() } // ReadDir returns the directory entries found at path name. func (chainfs *FS) ReadDir(name string) ([]fs.DirEntry, error) { - children, err := chainfs.getFileNodeChildren(name) + node, err := chainfs.getFileNode(name) + if err != nil { + return nil, fmt.Errorf("failed to get file node to read directory %s: %w", name, err) + } + + resolvedNode, err := chainfs.resolveSymlink(node, chainfs.maxSymlinkDepth) + if err != nil { + return nil, fmt.Errorf("failed to resolve symlink for file node %s: %w", node.virtualPath, err) + } + + children, err := chainfs.getFileNodeChildren(resolvedNode.virtualPath) if err != nil { return nil, err } + dirEntries := []fs.DirEntry{} for _, child := range children { if child.isWhiteout { diff --git a/artifact/image/layerscanning/image/layer_test.go b/artifact/image/layerscanning/image/layer_test.go index 3b351302..09597e40 100644 --- a/artifact/image/layerscanning/image/layer_test.go +++ b/artifact/image/layerscanning/image/layer_test.go @@ -168,7 +168,7 @@ func TestChainLayerFS(t *testing.T) { } func TestChainFSOpen(t *testing.T) { - populatedChainFS, extractDir := setUpChainFS(t) + populatedChainFS, extractDir := setUpChainFS(t, 3) tests := []struct { name string @@ -239,6 +239,50 @@ func TestChainFSOpen(t *testing.T) { mode: filePermission, }, }, + { + name: "open absolute symlink from filled tree with depth 1", + chainfs: populatedChainFS, + path: "/symlink1", + // The node the symlink points to is expected. + wantNode: &fileNode{ + extractDir: extractDir, + originLayerID: "layer2", + virtualPath: "/dir2/bar", + isWhiteout: false, + mode: filePermission, + }, + }, + { + name: "open absolute symlink from filled tree with depth 2", + chainfs: populatedChainFS, + path: "/symlink2", + // The node the symlink points to is expected. + wantNode: &fileNode{ + extractDir: extractDir, + originLayerID: "layer2", + virtualPath: "/dir2/bar", + isWhiteout: false, + mode: filePermission, + }, + }, + { + name: "error opening symlink due to non-existent target", + chainfs: populatedChainFS, + path: "/symlink-to-non-existent-file", + wantErr: fs.ErrNotExist, + }, + { + name: "error opening symlink due to depth exceeded", + chainfs: populatedChainFS, + path: "/symlink4", + wantErr: ErrSymlinkDepthExceeded, + }, + { + name: "error opening symlink due to cycle", + chainfs: populatedChainFS, + path: "/symlink-cycle1", + wantErr: ErrSymlinkCycle, + }, } for _, tc := range tests { @@ -265,7 +309,7 @@ func TestChainFSOpen(t *testing.T) { } func TestChainFSStat(t *testing.T) { - populatedChainFS, _ := setUpChainFS(t) + populatedChainFS, _ := setUpChainFS(t, DefaultMaxSymlinkDepth) tests := []struct { name string @@ -322,7 +366,7 @@ func TestChainFSStat(t *testing.T) { } func TestChainFSReadDir(t *testing.T) { - populatedChainFS, extractDir := setUpChainFS(t) + populatedChainFS, extractDir := setUpChainFS(t, DefaultMaxSymlinkDepth) tests := []struct { name string @@ -340,10 +384,10 @@ func TestChainFSReadDir(t *testing.T) { wantErr: fs.ErrNotExist, }, { - name: "root directory", - chainfs: setUpEmptyChainFS(t), - path: "/", - wantNodes: []*fileNode{}, + name: "root directory", + chainfs: setUpEmptyChainFS(t), + path: "/", + wantErr: fs.ErrNotExist, }, { name: "non-root directory in empty tree", @@ -361,25 +405,94 @@ func TestChainFSReadDir(t *testing.T) { extractDir: extractDir, originLayerID: "layer1", virtualPath: "/dir1", - - isWhiteout: false, - mode: fs.ModeDir | dirPermission, + isWhiteout: false, + mode: fs.ModeDir | dirPermission, }, { extractDir: extractDir, originLayerID: "layer1", virtualPath: "/baz", - - isWhiteout: false, - mode: filePermission, + isWhiteout: false, + mode: filePermission, }, { extractDir: extractDir, originLayerID: "layer2", virtualPath: "/dir2", - - isWhiteout: false, - mode: fs.ModeDir | dirPermission, + isWhiteout: false, + mode: fs.ModeDir | dirPermission, + }, + { + extractDir: extractDir, + originLayerID: "layer2", + virtualPath: "/symlink1", + isWhiteout: false, + mode: fs.ModeSymlink, + targetPath: "/dir2/bar", + }, + { + extractDir: extractDir, + originLayerID: "layer2", + virtualPath: "/symlink2", + isWhiteout: false, + mode: fs.ModeSymlink, + targetPath: "/symlink1", + }, + { + extractDir: extractDir, + originLayerID: "layer2", + virtualPath: "/symlink3", + isWhiteout: false, + mode: fs.ModeSymlink, + targetPath: "/symlink2", + }, + { + extractDir: extractDir, + originLayerID: "layer2", + virtualPath: "/symlink4", + isWhiteout: false, + mode: fs.ModeSymlink, + targetPath: "/symlink3", + }, + { + extractDir: extractDir, + originLayerID: "layer2", + virtualPath: "/symlink-cycle1", + isWhiteout: false, + mode: fs.ModeSymlink, + targetPath: "/symlink-cycle2", + }, + { + extractDir: extractDir, + originLayerID: "layer2", + virtualPath: "/symlink-cycle2", + isWhiteout: false, + mode: fs.ModeSymlink, + targetPath: "/symlink-cycle3", + }, + { + extractDir: extractDir, + originLayerID: "layer2", + virtualPath: "/symlink-cycle3", + isWhiteout: false, + mode: fs.ModeSymlink, + targetPath: "/symlink-cycle2", + }, + { + extractDir: extractDir, + originLayerID: "layer2", + virtualPath: "/symlink-to-non-existent-file", + isWhiteout: false, + mode: fs.ModeSymlink, + targetPath: "/non-existent-file", + }, + { + extractDir: extractDir, + originLayerID: "layer2", + virtualPath: "/symlink-to-dir", + isWhiteout: false, + mode: fs.ModeSymlink, + targetPath: "/dir2", }, }, }, @@ -392,9 +505,8 @@ func TestChainFSReadDir(t *testing.T) { extractDir: extractDir, originLayerID: "layer2", virtualPath: "/dir1/foo", - - isWhiteout: false, - mode: filePermission, + isWhiteout: false, + mode: filePermission, }, }, }, @@ -404,14 +516,36 @@ func TestChainFSReadDir(t *testing.T) { path: "/dir1/foo", wantNodes: []*fileNode{}, }, + { + name: "read symlink from filled tree", + chainfs: populatedChainFS, + path: "/symlink-to-dir", + wantNodes: []*fileNode{ + { + extractDir: extractDir, + originLayerID: "layer2", + virtualPath: "/dir2/bar", + isWhiteout: false, + mode: filePermission, + }, + }, + }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { gotDirEntries, gotErr := tc.chainfs.ReadDir(tc.path) - if tc.wantErr != nil && tc.wantErr != gotErr { - t.Errorf("ReadDir(%v) returned error: %v, want error: %v", tc.path, gotErr, tc.wantErr) + if tc.wantErr != nil { + if !errors.Is(gotErr, tc.wantErr) { + t.Fatalf("ReadDir(%v) returned error: %v, want error: %v", tc.path, gotErr, tc.wantErr) + } + return + } + + if gotErr != nil { + t.Fatalf("ReadDir(%v) returned error: %v", tc.path, gotErr) + return } // Convert fileNodes to DirEntries for comparison. @@ -472,18 +606,20 @@ func setUpEmptyChainFS(t *testing.T) FS { t.Helper() return FS{ - tree: pathtree.NewNode[fileNode](), + tree: pathtree.NewNode[fileNode](), + maxSymlinkDepth: DefaultMaxSymlinkDepth, } } // setUpChainFS creates a chainFS with a populated tree and creates the corresponding files in a // temporary directory. It returns the chainFS and the temporary directory path. -func setUpChainFS(t *testing.T) (FS, string) { +func setUpChainFS(t *testing.T, maxSymlinkDepth int) (FS, string) { t.Helper() tempDir := t.TempDir() chainfs := FS{ - tree: pathtree.NewNode[fileNode](), + tree: pathtree.NewNode[fileNode](), + maxSymlinkDepth: maxSymlinkDepth, } vfsMap := map[string]*fileNode{ @@ -531,7 +667,6 @@ func setUpChainFS(t *testing.T) (FS, string) { isWhiteout: false, mode: filePermission, }, - "/wh.foobar": &fileNode{ extractDir: tempDir, originLayerID: "layer2", @@ -539,6 +674,78 @@ func setUpChainFS(t *testing.T) (FS, string) { isWhiteout: true, mode: filePermission, }, + "/symlink1": &fileNode{ + extractDir: tempDir, + originLayerID: "layer2", + virtualPath: "/symlink1", + isWhiteout: false, + mode: fs.ModeSymlink, + targetPath: "/dir2/bar", + }, + "/symlink2": &fileNode{ + extractDir: tempDir, + originLayerID: "layer2", + virtualPath: "symlink2", + isWhiteout: false, + mode: fs.ModeSymlink, + targetPath: "/symlink1", + }, + "/symlink3": &fileNode{ + extractDir: tempDir, + originLayerID: "layer2", + virtualPath: "symlink3", + isWhiteout: false, + mode: fs.ModeSymlink, + targetPath: "/symlink2", + }, + "/symlink4": &fileNode{ + extractDir: tempDir, + originLayerID: "layer2", + virtualPath: "symlink4", + isWhiteout: false, + mode: fs.ModeSymlink, + targetPath: "/symlink3", + }, + "/symlink-to-dir": &fileNode{ + extractDir: tempDir, + originLayerID: "layer2", + virtualPath: "symlink-to-dir", + isWhiteout: false, + mode: fs.ModeSymlink, + targetPath: "/dir2", + }, + "/symlink-to-non-existent-file": &fileNode{ + extractDir: tempDir, + originLayerID: "layer2", + virtualPath: "symlink-to-non-existent-file", + isWhiteout: false, + mode: fs.ModeSymlink, + targetPath: "/non-existent-file", + }, + "/symlink-cycle1": &fileNode{ + extractDir: tempDir, + originLayerID: "layer2", + virtualPath: "symlink-cycle1", + isWhiteout: false, + mode: fs.ModeSymlink, + targetPath: "/symlink-cycle2", + }, + "/symlink-cycle2": &fileNode{ + extractDir: tempDir, + originLayerID: "layer2", + virtualPath: "symlink-cycle2", + isWhiteout: false, + mode: fs.ModeSymlink, + targetPath: "/symlink-cycle3", + }, + "/symlink-cycle3": &fileNode{ + extractDir: tempDir, + originLayerID: "layer2", + virtualPath: "symlink-cycle3", + isWhiteout: false, + mode: fs.ModeSymlink, + targetPath: "/symlink-cycle1", + }, } for path, node := range vfsMap { @@ -547,6 +754,9 @@ func setUpChainFS(t *testing.T) (FS, string) { if node.IsDir() { os.MkdirAll(node.RealFilePath(), dirPermission) } else { + if node.mode == fs.ModeSymlink { + continue + } os.WriteFile(node.RealFilePath(), []byte(path), filePermission) } } diff --git a/artifact/image/layerscanning/image/testdata/symlink-attack.tar b/artifact/image/layerscanning/image/testdata/symlink-attack.tar new file mode 100644 index 00000000..1d190df3 Binary files /dev/null and b/artifact/image/layerscanning/image/testdata/symlink-attack.tar differ diff --git a/artifact/image/layerscanning/image/testdata/symlink-basic.tar b/artifact/image/layerscanning/image/testdata/symlink-basic.tar new file mode 100644 index 00000000..d799726c Binary files /dev/null and b/artifact/image/layerscanning/image/testdata/symlink-basic.tar differ diff --git a/artifact/image/layerscanning/image/testdata/symlink-cycle.tar b/artifact/image/layerscanning/image/testdata/symlink-cycle.tar new file mode 100644 index 00000000..faf9466a Binary files /dev/null and b/artifact/image/layerscanning/image/testdata/symlink-cycle.tar differ diff --git a/artifact/image/layerscanning/image/testdata/symlink-dangling.tar b/artifact/image/layerscanning/image/testdata/symlink-dangling.tar new file mode 100644 index 00000000..807bbcb0 Binary files /dev/null and b/artifact/image/layerscanning/image/testdata/symlink-dangling.tar differ diff --git a/artifact/image/layerscanning/image/testdata/symlink-depth-exceeded.tar b/artifact/image/layerscanning/image/testdata/symlink-depth-exceeded.tar new file mode 100644 index 00000000..3305a56a Binary files /dev/null and b/artifact/image/layerscanning/image/testdata/symlink-depth-exceeded.tar differ diff --git a/artifact/image/testfixtures/symlink-attack/Dockerfile b/artifact/image/testfixtures/symlink-attack/Dockerfile new file mode 100644 index 00000000..7ebf248f --- /dev/null +++ b/artifact/image/testfixtures/symlink-attack/Dockerfile @@ -0,0 +1,18 @@ +# Use Alpine as the builder since the final image is built on scratch +# which doesn't contain the `ln` command to generate symlinks. +FROM alpine:latest as builder + +RUN mkdir dir1 + +RUN ln -s ../../secret.txt /dir1/attack-symlink.txt +RUN ln -s /../secret.txt /dir1/attack-symlink-absolute.txt + +# - root +# - dir1 +# - attack-symlink.txt -> ../../secret.txt +# - attack-symlink-absolute.txt -> /../secret.txt + +FROM scratch + +# Must copy over the entire directory to preserve the symlinks. +COPY --from=builder /dir1/ /dir1/ diff --git a/artifact/image/testfixtures/symlink-basic/Dockerfile b/artifact/image/testfixtures/symlink-basic/Dockerfile new file mode 100644 index 00000000..dd0e314f --- /dev/null +++ b/artifact/image/testfixtures/symlink-basic/Dockerfile @@ -0,0 +1,24 @@ +# Use Alpine as the builder since the final image is built on scratch +# which doesn't contain the `ln` command to generate symlinks. +FROM alpine:latest as builder + +RUN mkdir dir1 + +RUN echo "sample text" > dir1/sample.txt + +RUN ln -s /dir1/sample.txt /dir1/absolute-symlink.txt +RUN ln -s ./sample.txt /dir1/relative-dot-symlink.txt +RUN ln -s sample.txt /dir1/relative-symlink.txt +RUN ln -s absolute-symlink.txt /dir1/chain-symlink.txt + +# - root +# - dir1 +# - sample.txt +# - absolute-symlink.txt -> /dir1/sample.txt +# - relative-dot-symlink.txt -> ./sample.txt +# - relative-symlink.txt -> sample.txt + +FROM scratch + +# Must copy over the entire directory to preserve the symlinks. +COPY --from=builder /dir1/ /dir1/ diff --git a/artifact/image/testfixtures/symlink-cycle/Dockerfile b/artifact/image/testfixtures/symlink-cycle/Dockerfile new file mode 100644 index 00000000..6c4a809d --- /dev/null +++ b/artifact/image/testfixtures/symlink-cycle/Dockerfile @@ -0,0 +1,20 @@ +# Use Alpine as the builder since the final image is built on scratch +# which doesn't contain the `ln` command to generate symlinks. +FROM alpine:latest as builder + +RUN mkdir dir1 + +RUN ln -s /dir1/sample.txt /dir1/absolute-symlink.txt +RUN ln -s absolute-symlink.txt /dir1/chain-symlink.txt +RUN ln -s /dir1/chain-symlink.txt /dir1/sample.txt + +# - root +# - dir1 +# - /dir1/sample.txt -> /dir1/absolute-symlink.txt +# - absolute-symlink.txt -> /dir1/chain-symlink.txt +# - /dir1/chain-symlink.txt -> /dir1/sample.txt + +FROM scratch + +# Must copy over the entire directory to preserve the symlinks. +COPY --from=builder /dir1/ /dir1/ diff --git a/artifact/image/testfixtures/symlink-depth-exceeded/Dockerfile b/artifact/image/testfixtures/symlink-depth-exceeded/Dockerfile new file mode 100644 index 00000000..0fbec92e --- /dev/null +++ b/artifact/image/testfixtures/symlink-depth-exceeded/Dockerfile @@ -0,0 +1,33 @@ +# Use Alpine as the builder since the final image is built on scratch +# which doesn't contain the `ln` command to generate symlinks. +FROM alpine:latest as builder + +RUN mkdir dir1 +RUN mkdir dir2 +RUN mkdir dir2/dir3 + +RUN echo "sample text" > dir1/sample.txt + +RUN ln -s /dir1/sample.txt /dir1/symlink1.txt +RUN ln -s /dir1/symlink1.txt /dir1/symlink2.txt +RUN ln -s /dir1/symlink2.txt /dir1/symlink3.txt +RUN ln -s /dir1/symlink3.txt /dir1/symlink4.txt +RUN ln -s /dir1/symlink4.txt /dir1/symlink5.txt +RUN ln -s /dir1/symlink5.txt /dir1/symlink6.txt +RUN ln -s /dir1/symlink6.txt /dir1/symlink7.txt + +# - root +# - dir1 +# - sample.txt +# - symlink1.txt -> /dir1/sample.txt +# - symlink2.txt -> /dir1/symlink1.txt +# - symlink3.txt -> /dir1/symlink2.txt +# - symlink4.txt -> /dir1/symlink3.txt +# - symlink5.txt -> /dir1/symlink4.txt +# - symlink6.txt -> /dir1/symlink5.txt +# - symlink7.txt -> /dir1/symlink6.txt + +FROM scratch + +# Must copy over the entire directory to preserve the symlinks. +COPY --from=builder /dir1/ /dir1/