diff --git a/Cargo.toml b/Cargo.toml index 34eb3d4b3..cd5ed3a1a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,8 @@ tokio-uring = { version = "0.4.0", optional = true } tokio-test = "0.4.2" vmm-sys-util = "0.11" vm-memory = { version = "0.10", features = ["backend-mmap", "backend-bitmap"] } +[target.'cfg(target_os = "macos")'.dev-dependencies] +tempfile = "3.2.0" [features] default = ["fusedev"] diff --git a/src/api/server/sync_io.rs b/src/api/server/sync_io.rs index d9925cca7..0330678e3 100644 --- a/src/api/server/sync_io.rs +++ b/src/api/server/sync_io.rs @@ -143,9 +143,9 @@ impl Server { x if x == Opcode::Rename2 as u32 => self.rename2(ctx), #[cfg(target_os = "linux")] x if x == Opcode::Lseek as u32 => self.lseek(ctx), - #[cfg(feature = "virtiofs")] + #[cfg(all(target_os = "linux", feature = "virtiofs"))] x if x == Opcode::SetupMapping as u32 => self.setupmapping(ctx, vu_req), - #[cfg(feature = "virtiofs")] + #[cfg(all(target_os = "linux", feature = "virtiofs"))] x if x == Opcode::RemoveMapping as u32 => self.removemapping(ctx, vu_req), // Group reqeusts don't need reply together x => match x { diff --git a/src/lib.rs b/src/lib.rs index b8756920a..1eb2782cd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -117,7 +117,10 @@ pub type Result = ::std::result::Result; pub mod abi; pub mod api; -#[cfg(all(any(feature = "fusedev", feature = "virtiofs"), target_os = "linux"))] +#[cfg(all( + any(feature = "fusedev", feature = "virtiofs"), + any(target_os = "macos", target_os = "linux") +))] pub mod passthrough; pub mod transport; diff --git a/src/passthrough/inode_store.rs b/src/passthrough/inode_store.rs index 1cfeab40a..20eddd13f 100644 --- a/src/passthrough/inode_store.rs +++ b/src/passthrough/inode_store.rs @@ -4,15 +4,23 @@ use std::collections::BTreeMap; use std::sync::Arc; +#[cfg(target_os = "linux")] use super::file_handle::FileHandle; +#[cfg(target_os = "macos")] +use super::stat::Stat as StatExt; +#[cfg(target_os = "linux")] use super::statx::StatExt; -use super::{Inode, InodeData, InodeHandle}; + +#[cfg(target_os = "linux")] +use super::InodeHandle; +use super::{InoT, Inode, InodeData}; #[derive(Clone, Copy, Default, PartialOrd, Ord, PartialEq, Eq, Debug)] /// Identify an inode in `PassthroughFs` by `InodeId`. pub struct InodeId { - pub ino: libc::ino64_t, + pub ino: InoT, pub dev: libc::dev_t, + #[cfg(target_os = "linux")] pub mnt: u64, } @@ -22,6 +30,7 @@ impl InodeId { InodeId { ino: st.st.st_ino, dev: st.st.st_dev, + #[cfg(target_os = "linux")] mnt: st.mnt_id, } } @@ -31,6 +40,7 @@ impl InodeId { pub struct InodeStore { data: BTreeMap>, by_id: BTreeMap, + #[cfg(target_os = "linux")] by_handle: BTreeMap, Inode>, } @@ -41,6 +51,7 @@ impl InodeStore { /// will get lost. pub fn insert(&mut self, data: Arc) { self.by_id.insert(data.id, data.inode); + #[cfg(target_os = "linux")] if let InodeHandle::Handle(handle) = &data.handle { self.by_handle .insert(handle.file_handle().clone(), data.inode); @@ -59,6 +70,7 @@ impl InodeStore { } if let Some(data) = data.as_ref() { + #[cfg(target_os = "linux")] if let InodeHandle::Handle(handle) = &data.handle { self.by_handle.remove(handle.file_handle()); } @@ -69,6 +81,7 @@ impl InodeStore { pub fn clear(&mut self) { self.data.clear(); + #[cfg(target_os = "linux")] self.by_handle.clear(); self.by_id.clear(); } @@ -82,6 +95,7 @@ impl InodeStore { self.get(inode) } + #[cfg(target_os = "linux")] pub fn get_by_handle(&self, handle: &FileHandle) -> Option<&Arc> { let inode = self.inode_by_handle(handle)?; self.get(inode) @@ -91,6 +105,7 @@ impl InodeStore { self.by_id.get(id) } + #[cfg(target_os = "linux")] pub fn inode_by_handle(&self, handle: &FileHandle) -> Option<&Inode> { self.by_handle.get(handle) } @@ -101,12 +116,20 @@ mod test { use super::super::*; use super::*; + #[cfg(target_os = "linux")] use std::ffi::CStr; + #[cfg(target_os = "linux")] use std::mem::MaybeUninit; use std::os::unix::io::AsRawFd; use std::sync::atomic::Ordering; + #[cfg(target_os = "macos")] + use tempfile::Builder; + #[cfg(target_os = "linux")] use vmm_sys_util::tempfile::TempFile; + #[cfg(target_os = "macos")] + use stat::stat; + impl PartialEq for InodeData { fn eq(&self, other: &Self) -> bool { if self.inode != other.inode @@ -117,6 +140,7 @@ mod test { return false; } + #[cfg(target_os = "linux")] match (&self.handle, &other.handle) { (InodeHandle::File(f1), InodeHandle::File(f2)) => f1.as_raw_fd() == f2.as_raw_fd(), (InodeHandle::Handle(h1), InodeHandle::Handle(h2)) => { @@ -124,9 +148,18 @@ mod test { } _ => false, } + + #[cfg(target_os = "macos")] + match (&self.handle, &other.handle) { + (InodeHandle::File(f1, _), InodeHandle::File(f2, _)) => { + f1.as_raw_fd() == f2.as_raw_fd() + } + _ => false, + } } } + #[cfg(target_os = "linux")] fn stat_fd(fd: &impl AsRawFd) -> io::Result { let mut st = MaybeUninit::::zeroed(); let null_path = unsafe { CStr::from_bytes_with_nul_unchecked(b"\0") }; @@ -148,6 +181,7 @@ mod test { } } + #[cfg(target_os = "linux")] #[test] fn test_inode_store() { let mut m = InodeStore::default(); @@ -214,4 +248,65 @@ mod test { assert!(m.get(&inode2).is_none()); assert!(m.get_by_id(&id2).is_none()); } + + #[cfg(target_os = "macos")] + #[test] + fn test_inode_store() { + let mut m = InodeStore::default(); + let tmpfile1 = Builder::new().tempfile().unwrap(); + let tmpfile2 = Builder::new().tempfile().unwrap(); + + let inode1: Inode = 3; + let inode2: Inode = 4; + let inode_stat1 = stat(tmpfile1.as_file()).unwrap(); + let inode_stat2 = stat(tmpfile2.as_file()).unwrap(); + let id1 = InodeId::from_stat(&inode_stat1); + let id2 = InodeId::from_stat(&inode_stat2); + let cstr1 = CString::new(tmpfile1.path().to_string_lossy().to_string()).unwrap(); + let cstr2 = CString::new(tmpfile2.path().to_string_lossy().to_string()).unwrap(); + let file_or_handle1 = InodeHandle::File(tmpfile1.into_file(), cstr1); + let file_or_handle2 = InodeHandle::File(tmpfile2.into_file(), cstr2); + let data1 = InodeData::new(inode1, file_or_handle1, 2, id1, inode_stat1.st.st_mode); + let data2 = InodeData::new(inode2, file_or_handle2, 2, id2, inode_stat2.st.st_mode); + let data1 = Arc::new(data1); + let data2 = Arc::new(data2); + + m.insert(data1.clone()); + + // get not present key, expect none + assert!(m.get(&1).is_none()); + + // get just inserted value by key, by id, by handle + assert!(m.get_by_id(&InodeId::default()).is_none()); + assert_eq!(m.get(&inode1).unwrap(), &data1); + assert_eq!(m.get_by_id(&id1).unwrap(), &data1); + + // insert another value, and check again + m.insert(data2.clone()); + assert!(m.get(&1).is_none()); + assert!(m.get_by_id(&InodeId::default()).is_none()); + assert_eq!(m.get(&inode1).unwrap(), &data1); + assert_eq!(m.get_by_id(&id1).unwrap(), &data1); + assert_eq!(m.get(&inode2).unwrap(), &data2); + assert_eq!(m.get_by_id(&id2).unwrap(), &data2); + + // remove non-present key + assert!(m.remove(&1, false).is_none()); + + // remove present key, return its value + assert_eq!(m.remove(&inode1, false).unwrap(), data1.clone()); + assert!(m.get(&inode1).is_none()); + assert!(m.get_by_id(&id1).is_none()); + assert_eq!(m.get(&inode2).unwrap(), &data2); + assert_eq!(m.get_by_id(&id2).unwrap(), &data2); + + // clear the map + m.clear(); + assert!(m.get(&1).is_none()); + assert!(m.get_by_id(&InodeId::default()).is_none()); + assert!(m.get(&inode1).is_none()); + assert!(m.get_by_id(&id1).is_none()); + assert!(m.get(&inode2).is_none()); + assert!(m.get_by_id(&id2).is_none()); + } } diff --git a/src/passthrough/mod.rs b/src/passthrough/mod.rs index f4859006e..71a91d6a0 100644 --- a/src/passthrough/mod.rs +++ b/src/passthrough/mod.rs @@ -10,52 +10,75 @@ //! The code is derived from the //! [CrosVM](https://chromium.googlesource.com/chromiumos/platform/crosvm/) project, //! with heavy modification/enhancements from Alibaba Cloud OS team. +#![allow(missing_docs)] use std::any::Any; use std::collections::{btree_map, BTreeMap}; -use std::ffi::{CStr, CString, OsString}; +use std::ffi::{CStr, CString}; use std::fs::File; use std::io; use std::marker::PhantomData; use std::ops::{Deref, DerefMut}; use std::os::fd::{AsFd, BorrowedFd}; -use std::os::unix::ffi::OsStringExt; use std::os::unix::io::{AsRawFd, RawFd}; +#[cfg(target_os = "macos")] use std::path::PathBuf; use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering}; use std::sync::{Arc, Mutex, MutexGuard, RwLock, RwLockWriteGuard}; use std::time::Duration; -use vm_memory::{bitmap::BitmapSlice, ByteValued}; - pub use self::config::{CachePolicy, Config}; -use self::file_handle::{FileHandle, OpenableFileHandle}; +#[cfg(target_os = "linux")] +use self::file_handle::FileHandle; use self::inode_store::{InodeId, InodeStore}; +#[cfg(target_os = "linux")] use self::mount_fd::MountFds; -use self::statx::{statx, StatExt}; -use self::util::{ - ebadf, einval, enosys, eperm, is_dir, is_safe_inode, openat, reopen_fd_through_proc, stat_fd, - UniqueInodeGenerator, -}; +use vm_memory::bitmap::BitmapSlice; + +#[cfg(target_os = "macos")] +use self::stat::stat; +use self::util::{ebadf, einval, enosys, is_dir, is_safe_inode, openat, UniqueInodeGenerator}; use crate::abi::fuse_abi as fuse; -use crate::abi::fuse_abi::Opcode; use crate::api::filesystem::Entry; +#[cfg(target_os = "linux")] +use crate::api::PROC_SELF_FD_CSTR; use crate::api::{ validate_path_component, BackendFileSystem, CURRENT_DIR_CSTR, EMPTY_CSTR, PARENT_DIR_CSTR, - PROC_SELF_FD_CSTR, SLASH_ASCII, VFS_MAX_INO, + SLASH_ASCII, VFS_MAX_INO, }; #[cfg(feature = "async-io")] mod async_io; mod config; +#[cfg(target_os = "linux")] mod file_handle; mod inode_store; +#[cfg(target_os = "linux")] mod mount_fd; +#[cfg(target_os = "linux")] mod os_compat; +#[cfg(target_os = "macos")] +mod stat; +#[cfg(target_os = "linux")] mod statx; mod sync_io; mod util; +#[cfg(target_os = "linux")] +mod passthrough_fs_linux; +#[cfg(target_os = "macos")] +mod passthrough_fs_macos; + +#[cfg(target_os = "linux")] +mod sync_io_linux; +#[cfg(target_os = "macos")] +mod sync_io_macos; + +#[cfg(target_os = "linux")] +pub use passthrough_fs_linux::*; +#[cfg(target_os = "macos")] +pub use passthrough_fs_macos::*; + type Inode = u64; type Handle = u64; @@ -70,7 +93,8 @@ const MAX_HOST_INO: u64 = 0x7fff_ffff_ffff; * case the object's lifetime is that of the respective `InodeData` object. */ #[derive(Debug)] -enum InodeFile<'a> { +pub enum InodeFile<'a> { + #[cfg(target_os = "linux")] Owned(File), Ref(&'a File), } @@ -80,6 +104,7 @@ impl AsRawFd for InodeFile<'_> { /// Note: This fd is only valid as long as the `InodeFile` exists. fn as_raw_fd(&self) -> RawFd { match self { + #[cfg(target_os = "linux")] Self::Owned(file) => file.as_raw_fd(), Self::Ref(file_ref) => file_ref.as_raw_fd(), } @@ -89,54 +114,13 @@ impl AsRawFd for InodeFile<'_> { impl AsFd for InodeFile<'_> { fn as_fd(&self) -> BorrowedFd<'_> { match self { + #[cfg(target_os = "linux")] Self::Owned(file) => file.as_fd(), Self::Ref(file_ref) => file_ref.as_fd(), } } } -#[derive(Debug)] -enum InodeHandle { - File(File), - Handle(Arc), -} - -impl InodeHandle { - fn file_handle(&self) -> Option<&FileHandle> { - match self { - InodeHandle::File(_) => None, - InodeHandle::Handle(h) => Some(h.file_handle().deref()), - } - } - - fn get_file(&self) -> io::Result> { - match self { - InodeHandle::File(f) => Ok(InodeFile::Ref(f)), - InodeHandle::Handle(h) => { - let f = h.open(libc::O_PATH)?; - Ok(InodeFile::Owned(f)) - } - } - } - - fn open_file(&self, flags: libc::c_int, proc_self_fd: &File) -> io::Result { - match self { - InodeHandle::File(f) => reopen_fd_through_proc(f, flags, proc_self_fd), - InodeHandle::Handle(h) => h.open(flags), - } - } - - fn stat(&self) -> io::Result { - match self { - InodeHandle::File(f) => stat_fd(f, None), - InodeHandle::Handle(_h) => { - let file = self.get_file()?; - stat_fd(&file, None) - } - } - } -} - /// Represents an inode in `PassthroughFs`. #[derive(Debug)] pub struct InodeData { @@ -146,11 +130,11 @@ pub struct InodeData { id: InodeId, refcount: AtomicU64, // File type and mode - mode: u32, + mode: InodeMode, } impl InodeData { - fn new(inode: Inode, f: InodeHandle, refcount: u64, id: InodeId, mode: u32) -> Self { + fn new(inode: Inode, f: InodeHandle, refcount: u64, id: InodeId, mode: InodeMode) -> Self { InodeData { inode, handle: f, @@ -163,10 +147,6 @@ impl InodeData { fn get_file(&self) -> io::Result> { self.handle.get_file() } - - fn open_file(&self, flags: libc::c_int, proc_self_fd: &File) -> io::Result { - self.handle.open_file(flags, proc_self_fd) - } } /// Data structures to manage accessed inodes. @@ -196,47 +176,6 @@ impl InodeMap { .ok_or_else(ebadf) } - fn get_inode_locked( - inodes: &InodeStore, - id: &InodeId, - handle: Option<&FileHandle>, - ) -> Option { - match handle { - Some(h) => inodes.inode_by_handle(h).copied(), - None => inodes.inode_by_id(id).copied(), - } - } - - fn get_alt(&self, id: &InodeId, handle: Option<&FileHandle>) -> Option> { - // Do not expect poisoned lock here, so safe to unwrap(). - let inodes = self.inodes.read().unwrap(); - - Self::get_alt_locked(inodes.deref(), id, handle) - } - - fn get_alt_locked( - inodes: &InodeStore, - id: &InodeId, - handle: Option<&FileHandle>, - ) -> Option> { - handle - .and_then(|h| inodes.get_by_handle(h)) - .or_else(|| { - inodes.get_by_id(id).filter(|data| { - // When we have to fall back to looking up an inode by its IDs, ensure that - // we hit an entry that does not have a file handle. Entries with file - // handles must also have a handle alt key, so if we have not found it by - // that handle alt key, we must have found an entry with a mismatching - // handle; i.e. an entry for a different file, even though it has the same - // inode ID. - // (This can happen when we look up a new file that has reused the inode ID - // of some previously unlinked inode we still have in `.inodes`.) - handle.is_none() || data.handle.file_handle().is_none() - }) - }) - .map(Arc::clone) - } - fn get_map_mut(&self) -> RwLockWriteGuard { // Do not expect poisoned lock here, so safe to unwrap(). self.inodes.write().unwrap() @@ -253,7 +192,7 @@ impl InodeMap { } } -struct HandleData { +pub struct HandleData { inode: Inode, file: File, lock: Mutex<()>, @@ -362,13 +301,16 @@ pub struct PassthroughFs { // Use to generate unique inode ino_allocator: UniqueInodeGenerator, + // Maps mount IDs to an open FD on the respective ID for the purpose of open_by_handle_at(). + #[cfg(target_os = "linux")] mount_fds: MountFds, // File descriptor pointing to the `/proc/self/fd` directory. This is used to convert an fd from // `inodes` into one that can go into `handles`. This is accomplished by reading the // `/proc/self/fd/{}` symlink. We keep an open fd here in case the file system tree that we are meant // to be serving doesn't have access to `/proc/self/fd`. + #[cfg(target_os = "linux")] proc_self_fd: File, // Whether writeback caching is enabled for this directory. This will only be true when @@ -382,6 +324,7 @@ pub struct PassthroughFs { no_opendir: AtomicBool, // Whether kill_priv_v2 is enabled. + #[cfg(target_os = "linux")] killpriv_v2: AtomicBool, // Whether no_readdir is enabled. @@ -417,7 +360,9 @@ impl PassthroughFs { } // Safe because this is a constant value and a valid C string. + #[cfg(target_os = "linux")] let proc_self_fd_cstr = unsafe { CStr::from_bytes_with_nul_unchecked(PROC_SELF_FD_CSTR) }; + #[cfg(target_os = "linux")] let proc_self_fd = Self::open_file( &libc::AT_FDCWD, proc_self_fd_cstr, @@ -433,6 +378,7 @@ impl PassthroughFs { (None, None) => (cfg.entry_timeout, cfg.attr_timeout), }; + #[cfg(target_os = "linux")] let mount_fds = MountFds::new(None)?; Ok(PassthroughFs { @@ -443,12 +389,15 @@ impl PassthroughFs { handle_map: HandleMap::new(), next_handle: AtomicU64::new(1), + #[cfg(target_os = "linux")] mount_fds, + #[cfg(target_os = "linux")] proc_self_fd, writeback: AtomicBool::new(false), no_open: AtomicBool::new(false), no_opendir: AtomicBool::new(false), + #[cfg(target_os = "linux")] killpriv_v2: AtomicBool::new(false), no_readdir: AtomicBool::new(cfg.no_readdir), seal_size: AtomicBool::new(cfg.seal_size), @@ -465,16 +414,33 @@ impl PassthroughFs { pub fn import(&self) -> io::Result<()> { let root = CString::new(self.cfg.root_dir.as_str()).expect("CString::new failed"); - let (path_fd, handle_opt, st) = Self::open_file_and_handle(self, &libc::AT_FDCWD, &root) - .map_err(|e| { - error!("fuse: import: failed to get file or handle: {:?}", e); - e - })?; - let id = InodeId::from_stat(&st); - let handle = if let Some(h) = handle_opt { - InodeHandle::Handle(self.to_openable_handle(h)?) - } else { - InodeHandle::File(path_fd) + #[cfg(target_os = "linux")] + let (st, id, handle) = { + let (path_fd, handle_opt, st) = + Self::open_file_and_handle(self, &libc::AT_FDCWD, &root).map_err(|e| { + error!("fuse: import: failed to get file or handle: {:?}", e); + e + })?; + + let id = InodeId::from_stat(&st); + + let handle = if let Some(h) = handle_opt { + InodeHandle::Handle(self.to_openable_handle(h)?) + } else { + InodeHandle::File(path_fd) + }; + + (st, id, handle) + }; + + #[cfg(target_os = "macos")] + let (st, id, handle) = { + let (path_fd, st) = self.open_file(&root).unwrap(); + + let id = InodeId::from_stat(&st); + + let handle = InodeHandle::File(path_fd, root); + (st, id, handle) }; // Safe because this doesn't modify any memory and there is no need to check the return @@ -494,49 +460,6 @@ impl PassthroughFs { Ok(()) } - /// Get the list of file descriptors which should be reserved across live upgrade. - pub fn keep_fds(&self) -> Vec { - vec![self.proc_self_fd.as_raw_fd()] - } - - fn readlinkat(dfd: i32, pathname: &CStr) -> io::Result { - let mut buf = Vec::with_capacity(libc::PATH_MAX as usize); - - // Safe because the kernel will only write data to buf and we check the return value - let buf_read = unsafe { - libc::readlinkat( - dfd, - pathname.as_ptr(), - buf.as_mut_ptr() as *mut libc::c_char, - buf.capacity(), - ) - }; - if buf_read < 0 { - error!("fuse: readlinkat error"); - return Err(io::Error::last_os_error()); - } - - // Safe because we trust the value returned by kernel. - unsafe { buf.set_len(buf_read as usize) }; - buf.shrink_to_fit(); - - // Be careful: - // - readlink() does not append a terminating null byte to buf - // - OsString instances are not NUL terminated - Ok(PathBuf::from(OsString::from_vec(buf))) - } - - /// Get the file pathname corresponding to the Inode - /// This function is used by Nydus blobfs - pub fn readlinkat_proc_file(&self, inode: Inode) -> io::Result { - let data = self.inode_map.get(inode)?; - let file = data.get_file()?; - let pathname = CString::new(format!("{}", file.as_raw_fd())) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; - - Self::readlinkat(self.proc_self_fd.as_raw_fd(), &pathname) - } - fn create_file_excl( dir: &impl AsRawFd, pathname: &CStr, @@ -558,83 +481,6 @@ impl PassthroughFs { } } - fn open_file(dfd: &impl AsRawFd, pathname: &CStr, flags: i32, mode: u32) -> io::Result { - openat(dfd, pathname, flags, mode) - } - - fn open_file_restricted( - &self, - dir: &impl AsRawFd, - pathname: &CStr, - flags: i32, - mode: u32, - ) -> io::Result { - let flags = libc::O_NOFOLLOW | libc::O_CLOEXEC | flags; - - // TODO - //if self.os_facts.has_openat2 { - // oslib::do_open_relative_to(dir, pathname, flags, mode) - //} else { - openat(dir, pathname, flags, mode) - //} - } - - /// Create a File or File Handle for `name` under directory `dir_fd` to support `lookup()`. - fn open_file_and_handle( - &self, - dir: &impl AsRawFd, - name: &CStr, - ) -> io::Result<(File, Option, StatExt)> { - let path_file = self.open_file_restricted(dir, name, libc::O_PATH, 0)?; - let st = statx(&path_file, None)?; - let handle = if self.cfg.inode_file_handles { - FileHandle::from_fd(&path_file)? - } else { - None - }; - - Ok((path_file, handle, st)) - } - - fn to_openable_handle(&self, fh: FileHandle) -> io::Result> { - fh.into_openable(&self.mount_fds, |fd, flags, _mode| { - reopen_fd_through_proc(&fd, flags, &self.proc_self_fd) - }) - .map(Arc::new) - .map_err(|e| { - if !e.silent() { - error!("{}", e); - } - e.into_inner() - }) - } - - fn allocate_inode( - &self, - inodes: &InodeStore, - id: &InodeId, - handle_opt: Option<&FileHandle>, - ) -> io::Result { - if !self.cfg.use_host_ino { - // If the inode has already been assigned before, the new inode is not reassigned, - // ensuring that the same file is always the same inode - Ok(InodeMap::get_inode_locked(inodes, id, handle_opt) - .unwrap_or_else(|| self.next_inode.fetch_add(1, Ordering::Relaxed))) - } else { - let inode = if id.ino > MAX_HOST_INO { - // Prefer looking for previous mappings from memory - match InodeMap::get_inode_locked(inodes, id, handle_opt) { - Some(ino) => ino, - None => self.ino_allocator.get_unique_inode(id)?, - } - } else { - self.ino_allocator.get_unique_inode(id)? - }; - - Ok(inode) - } - } - fn do_lookup(&self, parent: Inode, name: &CStr) -> io::Result { let name = if parent == fuse::ROOT_ID && name.to_bytes_with_nul().starts_with(PARENT_DIR_CSTR) { @@ -645,13 +491,34 @@ impl PassthroughFs { }; let dir = self.inode_map.get(parent)?; - let dir_file = dir.get_file()?; - let (path_fd, handle_opt, st) = Self::open_file_and_handle(self, &dir_file, name)?; + + #[cfg(target_os = "linux")] + let (path_fd, handle_opt, st) = { + let dir_file = dir.get_file()?; + Self::open_file_and_handle(self, &dir_file, name)? + }; + + #[cfg(target_os = "macos")] + let (path_fd, st, cstring_path) = { + let string_from_name: String = name.to_string_lossy().to_string(); + let dir_path = dir.get_path()?.into_string().unwrap(); + let mut full_path = PathBuf::from(dir_path); + full_path.push(string_from_name); + let string_path = full_path.to_string_lossy().to_string(); + let cstring_path = CString::new(string_path).expect("Failed to convert to CString"); + let (path_fd, st) = self.open_file(&cstring_path)?; + (path_fd, st, cstring_path) + }; + let id = InodeId::from_stat(&st); let mut found = None; 'search: loop { - match self.inode_map.get_alt(&id, handle_opt.as_ref()) { + match self.inode_map.get_alt( + &id, + #[cfg(target_os = "linux")] + handle_opt.as_ref(), + ) { // No existing entry found None => break 'search, Some(data) => { @@ -680,12 +547,16 @@ impl PassthroughFs { let inode = if let Some(v) = found { v } else { + #[cfg(target_os = "linux")] let handle = if let Some(h) = handle_opt.clone() { InodeHandle::Handle(self.to_openable_handle(h)?) } else { InodeHandle::File(path_fd) }; + #[cfg(target_os = "macos")] + let handle = InodeHandle::File(path_fd, cstring_path); + // Write guard get_alt_locked() and insert_lock() to avoid race conditions. let mut inodes = self.inode_map.get_map_mut(); @@ -693,7 +564,12 @@ impl PassthroughFs { // racing thread already added an inode with the same id while we're not holding // the lock. If so just use the newly added inode, otherwise the inode will be replaced // and results in EBADF. - match InodeMap::get_alt_locked(inodes.deref(), &id, handle_opt.as_ref()) { + match InodeMap::get_alt_locked( + inodes.deref(), + &id, + #[cfg(target_os = "linux")] + handle_opt.as_ref(), + ) { Some(data) => { // An inode was added concurrently while we did not hold a lock on // `self.inodes_map`, so we use that instead. `handle` will be dropped. @@ -701,7 +577,12 @@ impl PassthroughFs { data.inode } None => { - let inode = self.allocate_inode(inodes.deref(), &id, handle_opt.as_ref())?; + let inode = self.allocate_inode( + inodes.deref(), + &id, + #[cfg(target_os = "linux")] + handle_opt.as_ref(), + )?; if inode > VFS_MAX_INO { error!("fuse: max inode number reached: {}", VFS_MAX_INO); @@ -799,57 +680,6 @@ impl PassthroughFs { validate_path_component(name) } - // When seal_size is set, we don't allow operations that could change file size nor allocate - // space beyond EOF - fn seal_size_check( - &self, - opcode: Opcode, - file_size: u64, - offset: u64, - size: u64, - mode: i32, - ) -> io::Result<()> { - if offset.checked_add(size).is_none() { - error!( - "fuse: {:?}: invalid `offset` + `size` ({}+{}) overflows u64::MAX", - opcode, offset, size - ); - return Err(einval()); - } - - match opcode { - // write should not exceed the file size. - Opcode::Write => { - if size + offset > file_size { - return Err(eperm()); - } - } - - Opcode::Fallocate => { - let op = mode & !(libc::FALLOC_FL_KEEP_SIZE | libc::FALLOC_FL_UNSHARE_RANGE); - match op { - // Allocate, punch and zero, must not change file size. - 0 | libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_ZERO_RANGE => { - if size + offset > file_size { - return Err(eperm()); - } - } - // collapse and insert will change file size, forbid. - libc::FALLOC_FL_COLLAPSE_RANGE | libc::FALLOC_FL_INSERT_RANGE => { - return Err(eperm()); - } - // Invalid operation - _ => return Err(einval()), - } - } - - // setattr operation should be handled in setattr handler. - _ => return Err(enosys()), - } - - Ok(()) - } - fn get_writeback_open_flags(&self, flags: i32) -> i32 { let mut new_flags = flags; let writeback = self.writeback.load(Ordering::Relaxed); @@ -916,7 +746,10 @@ macro_rules! scoped_cred { // This call is safe because it doesn't modify any memory and we // check the return value. + #[cfg(target_os = "linux")] let res = unsafe { libc::syscall($syscall_nr, -1, val, -1) }; + #[cfg(target_os = "macos")] + let res = unsafe { $syscall_nr(val) }; if res == 0 { Ok(Some($name)) } else { @@ -927,7 +760,10 @@ macro_rules! scoped_cred { impl Drop for $name { fn drop(&mut self) { + #[cfg(target_os = "linux")] let res = unsafe { libc::syscall($syscall_nr, -1, 0, -1) }; + #[cfg(target_os = "macos")] + let res = unsafe { $syscall_nr(0) }; if res < 0 { error!( "fuse: failed to change credentials back to root: {}", @@ -938,9 +774,17 @@ macro_rules! scoped_cred { } }; } + +#[cfg(target_os = "linux")] scoped_cred!(ScopedUid, libc::uid_t, libc::SYS_setresuid); +#[cfg(target_os = "linux")] scoped_cred!(ScopedGid, libc::gid_t, libc::SYS_setresgid); +#[cfg(target_os = "macos")] +scoped_cred!(ScopedUid, libc::uid_t, libc::seteuid); +#[cfg(target_os = "macos")] +scoped_cred!(ScopedGid, libc::gid_t, libc::setegid); + fn set_creds( uid: libc::uid_t, gid: libc::gid_t, @@ -950,48 +794,35 @@ fn set_creds( ScopedGid::new(gid).and_then(|gid| Ok((ScopedUid::new(uid)?, gid))) } -struct CapFsetid {} - -impl Drop for CapFsetid { - fn drop(&mut self) { - if let Err(e) = caps::raise(None, caps::CapSet::Effective, caps::Capability::CAP_FSETID) { - error!("fail to restore thread cap_fsetid: {}", e); - }; - } -} - -fn drop_cap_fsetid() -> io::Result> { - if !caps::has_cap(None, caps::CapSet::Effective, caps::Capability::CAP_FSETID) - .map_err(|_e| io::Error::new(io::ErrorKind::PermissionDenied, "no CAP_FSETID capability"))? - { - return Ok(None); - } - caps::drop(None, caps::CapSet::Effective, caps::Capability::CAP_FSETID).map_err(|_e| { - io::Error::new( - io::ErrorKind::PermissionDenied, - "failed to drop CAP_FSETID capability", - ) - })?; - Ok(Some(CapFsetid {})) -} - #[cfg(test)] mod tests { use super::*; use crate::abi::fuse_abi::CreateIn; use crate::api::filesystem::*; - use crate::api::filesystem::{ZeroCopyReader, ZeroCopyWriter}; + #[cfg(target_os = "linux")] use crate::api::{Vfs, VfsOptions}; use crate::common::file_buf::FileVolatileSlice; use crate::common::file_traits::FileReadWriteVolatile; - + #[cfg(target_os = "linux")] use caps::{CapSet, Capability}; use log; use std::io::{Read, Seek, SeekFrom, Write}; + #[cfg(target_os = "linux")] use std::ops::Deref; use std::os::unix::prelude::MetadataExt; + + #[cfg(target_os = "macos")] + use std::fs; + #[cfg(target_os = "macos")] + use std::os::unix::fs::PermissionsExt; + + #[cfg(target_os = "linux")] use vmm_sys_util::{tempdir::TempDir, tempfile::TempFile}; + #[cfg(target_os = "macos")] + use tempfile::{tempdir, tempdir_in, tempfile, NamedTempFile}; + + #[cfg(target_os = "linux")] fn prepare_passthroughfs() -> PassthroughFs { let source = TempDir::new().expect("Cannot create temporary directory."); let parent_path = @@ -1017,14 +848,77 @@ mod tests { fs } + #[cfg(target_os = "macos")] + fn set_dir_permissions(dir_path: &str) { + let mut permissions = fs::metadata(&dir_path) + .expect("Failed to get directory metadata") + .permissions(); + permissions.set_mode(0o40777); + let _r = permissions.mode(); + set_permissions_recursive(&dir_path).expect("Failed to set permissions"); + } + + #[cfg(target_os = "macos")] + fn set_permissions_recursive(path: &str) -> std::io::Result<()> { + let mut permissions = fs::metadata(path) + .expect("Failed to get directory metadata") + .permissions(); + permissions.set_mode(0o40777); + + let entries = fs::read_dir(path)?; + + for entry in entries { + let entry = entry?; + let entry_path = entry.path(); + + if entry_path.is_dir() { + set_permissions_recursive(entry_path.to_str().unwrap())?; + } else { + let mut permissions = fs::metadata(entry_path) + .expect("Failed to get directory metadata") + .permissions(); + permissions.set_mode(0o40777); + } + } + + Ok(()) + } + + #[cfg(target_os = "macos")] + fn prepare_passthroughfs() -> PassthroughFs { + let source = tempdir().expect("Cannot create temporary directory."); + let tmp_path = source.into_path(); + let parent = tempdir_in(&tmp_path).expect("Cannot create temporary directory."); + let parent_path = parent.into_path(); + let child = NamedTempFile::new_in(&parent_path).expect("Cannot create temporary file."); + child.keep().unwrap(); + set_dir_permissions(tmp_path.to_str().unwrap()); + + // Rest of the code remains unchanged + let fs_cfg = Config { + writeback: true, + do_import: true, + no_open: true, + inode_file_handles: false, + root_dir: tmp_path.to_string_lossy().to_string(), + ..Default::default() + }; + let fs = PassthroughFs::<()>::new(fs_cfg).unwrap(); + fs.import().unwrap(); + fs + } + + #[cfg(target_os = "linux")] fn passthroughfs_no_open(cfg: bool) { let opts = VfsOptions { + #[cfg(target_os = "linux")] no_open: cfg, ..Default::default() }; let vfs = &Vfs::new(opts); // Assume that fuse kernel supports no_open. + vfs.init(FsOptions::ZERO_MESSAGE_OPEN).unwrap(); let fs_cfg = Config { @@ -1046,6 +940,7 @@ mod tests { .unwrap(); } + #[cfg(target_os = "linux")] #[test] fn test_passthroughfs_no_open() { passthroughfs_no_open(true); @@ -1056,6 +951,7 @@ mod tests { fn test_passthroughfs_inode_file_handles() { log::set_max_level(log::LevelFilter::Trace); + #[cfg(target_os = "linux")] match caps::has_cap(None, CapSet::Effective, Capability::CAP_DAC_READ_SEARCH) { Ok(false) | Err(_) => { println!("invoking open_by_handle_at needs CAP_DAC_READ_SEARCH"); @@ -1064,22 +960,40 @@ mod tests { Ok(true) => {} } - let source = TempDir::new().expect("Cannot create temporary directory."); - let parent_path = - TempDir::new_in(source.as_path()).expect("Cannot create temporary directory."); - let child_path = - TempFile::new_in(parent_path.as_path()).expect("Cannot create temporary file."); + #[cfg(target_os = "linux")] + let (source, parent_path, child_path) = { + let source = TempDir::new().expect("Cannot create temporary directory."); + let parent_path = + TempDir::new_in(source.as_path()).expect("Cannot create temporary directory."); + let child_path = + TempFile::new_in(parent_path.as_path()).expect("Cannot create temporary file."); + (source, parent_path, child_path) + }; + + #[cfg(target_os = "macos")] + let (source, parent_path, child_path) = { + let source = tempdir().expect("Cannot create temporary directory."); + let tmp_path = source.into_path(); + let parent = tempdir_in(&tmp_path).expect("Cannot create temporary directory."); + let parent_path = parent.into_path(); + let child = NamedTempFile::new_in(&parent_path).expect("Cannot create temporary file."); + let (_, child_path) = child.keep().unwrap(); + (tmp_path, parent_path, child_path) + }; let fs_cfg = Config { writeback: true, do_import: true, no_open: true, inode_file_handles: true, + #[cfg(target_os = "linux")] root_dir: source .as_path() .to_str() .expect("source path to string") .to_string(), + #[cfg(target_os = "macos")] + root_dir: source.to_str().expect("source path to string").to_string(), ..Default::default() }; let fs = PassthroughFs::<()>::new(fs_cfg).unwrap(); @@ -1089,24 +1003,38 @@ mod tests { // read a few files to inode map. let parent = CString::new( + #[cfg(target_os = "linux")] parent_path .as_path() .file_name() .unwrap() .to_str() .expect("path to string"), + #[cfg(target_os = "macos")] + parent_path + .file_name() + .unwrap() + .to_str() + .expect("path to string"), ) .unwrap(); let p_entry = fs.lookup(&ctx, ROOT_ID, &parent).unwrap(); let p_inode = p_entry.inode; let child = CString::new( + #[cfg(target_os = "linux")] child_path .as_path() .file_name() .unwrap() .to_str() .expect("path to string"), + #[cfg(target_os = "macos")] + child_path + .file_name() + .unwrap() + .to_str() + .expect("path to string"), ) .unwrap(); let c_entry = fs.lookup(&ctx, p_inode, &child).unwrap(); @@ -1201,24 +1129,43 @@ mod tests { fn test_writeback_open_and_create() { // prepare a fs with writeback cache and open being true, so a write-only opened file // should have read permission as well. + #[cfg(target_os = "linux")] let source = TempDir::new().expect("Cannot create temporary directory."); + #[cfg(target_os = "macos")] + let source = tempdir().expect("Cannot create temporary directory."); + #[cfg(target_os = "linux")] let _ = std::process::Command::new("sh") .arg("-c") .arg(format!("touch {}/existfile", source.as_path().to_str().unwrap()).as_str()) .output() .unwrap(); + #[cfg(target_os = "macos")] + let _ = std::process::Command::new("sh") + .arg("-c") + .arg(format!("touch {}/existfile", source.path().to_str().unwrap()).as_str()) + .output() + .unwrap(); let fs_cfg = Config { writeback: true, do_import: true, no_open: false, inode_file_handles: false, + #[cfg(target_os = "linux")] root_dir: source .as_path() .to_str() .expect("source path to string") .to_string(), + #[cfg(target_os = "macos")] + root_dir: source + .path() + .to_str() + .expect("source path to string") + .to_string(), ..Default::default() }; + #[cfg(target_os = "macos")] + set_dir_permissions(source.path().to_str().unwrap()); let mut fs = PassthroughFs::<()>::new(fs_cfg).unwrap(); fs.writeback = AtomicBool::new(true); fs.no_open = AtomicBool::new(false); @@ -1233,7 +1180,10 @@ mod tests { let fname = CString::new("testfile").unwrap(); let args = CreateIn { flags: libc::O_WRONLY as u32, + #[cfg(target_os = "linux")] mode: 0644, + #[cfg(target_os = "macos")] + mode: 0o40777, umask: 0, fuse_flags: 0, }; @@ -1262,11 +1212,26 @@ mod tests { fn test_passthroughfs_dir_timeout() { log::set_max_level(log::LevelFilter::Trace); - let source = TempDir::new().expect("Cannot create temporary directory."); - let parent_path = - TempDir::new_in(source.as_path()).expect("Cannot create temporary directory."); - let child_path = - TempFile::new_in(parent_path.as_path()).expect("Cannot create temporary file."); + #[cfg(target_os = "linux")] + let (source, parent_path, child_path) = { + let source = TempDir::new().expect("Cannot create temporary directory."); + let parent_path = + TempDir::new_in(source.as_path()).expect("Cannot create temporary directory."); + let child_path = + TempFile::new_in(parent_path.as_path()).expect("Cannot create temporary file."); + (source, parent_path, child_path) + }; + + #[cfg(target_os = "macos")] + let (source, parent_path, child_path) = { + let source = tempdir().expect("Cannot create temporary directory."); + let tmp_path = source.into_path(); + let parent = tempdir_in(&tmp_path).expect("Cannot create temporary directory."); + let parent_path = parent.into_path(); + let child = NamedTempFile::new_in(&parent_path).expect("Cannot create temporary file."); + let (_, child_path) = child.keep().unwrap(); + (tmp_path, parent_path, child_path) + }; // passthroughfs with cache=none, but non-zero dir entry/attr timeout. let fs_cfg = Config { @@ -1324,8 +1289,21 @@ mod tests { #[test] fn test_stable_inode() { use std::os::unix::fs::MetadataExt; - let source = TempDir::new().expect("Cannot create temporary directory."); - let child_path = TempFile::new_in(source.as_path()).expect("Cannot create temporary file."); + #[cfg(target_os = "linux")] + let (source, child_path) = { + let source = TempDir::new().expect("Cannot create temporary directory."); + let child_path = + TempFile::new_in(source.as_path()).expect("Cannot create temporary file."); + (source, child_path) + }; + #[cfg(target_os = "macos")] + let (source, chile_file, child_path) = { + let source = tempdir().expect("Cannot create temporary directory."); + let tmp_path = source.into_path(); + let child = NamedTempFile::new_in(&tmp_path).expect("Cannot create temporary file."); + let (chile_file, child_path) = child.keep().unwrap(); + (tmp_path, chile_file, child_path) + }; let child = CString::new( child_path .as_path() @@ -1335,7 +1313,10 @@ mod tests { .expect("path to string"), ) .unwrap(); + #[cfg(target_os = "linux")] let meta = child_path.as_file().metadata().unwrap(); + #[cfg(target_os = "macos")] + let meta = chile_file.metadata().unwrap(); let ctx = Context::default(); { let fs_cfg = Config { @@ -1390,11 +1371,16 @@ mod tests { let id = InodeId { ino: MAX_HOST_INO + 1, dev: 1, + #[cfg(target_os = "linux")] mnt: 1, }; // Default + #[cfg(target_os = "linux")] let inode = fs.allocate_inode(&m, &id, None).unwrap(); + + #[cfg(target_os = "macos")] + let inode = fs.allocate_inode(&m, &id).unwrap(); assert_eq!(inode, 2); } @@ -1405,10 +1391,15 @@ mod tests { let id = InodeId { ino: 12345, dev: 1, + #[cfg(target_os = "linux")] mnt: 1, }; // direct return host inode 12345 + #[cfg(target_os = "linux")] let inode = fs.allocate_inode(&m, &id, None).unwrap(); + + #[cfg(target_os = "macos")] + let inode = fs.allocate_inode(&m, &id).unwrap(); assert_eq!(inode & MAX_HOST_INO, 12345) } @@ -1419,17 +1410,48 @@ mod tests { let id = InodeId { ino: MAX_HOST_INO + 1, dev: 1, + #[cfg(target_os = "linux")] mnt: 1, }; // allocate a virtual inode + #[cfg(target_os = "linux")] let inode = fs.allocate_inode(&m, &id, None).unwrap(); + + #[cfg(target_os = "macos")] + let inode = fs.allocate_inode(&m, &id).unwrap(); assert_eq!(inode & MAX_HOST_INO, 2); + #[cfg(target_os = "linux")] let file = TempFile::new().expect("Cannot create temporary file."); + #[cfg(target_os = "macos")] + let (_, file, child_path) = { + let source = tempdir().expect("Cannot create temporary directory."); + let tmp_path = source.into_path(); + let child = + NamedTempFile::new_in(&tmp_path).expect("Cannot create temporary file."); + let (chile_file, child_path) = child.keep().unwrap(); + (tmp_path, chile_file, child_path) + }; + #[cfg(target_os = "linux")] let mode = file.as_file().metadata().unwrap().mode(); + #[cfg(target_os = "macos")] + let (mode, cstring_path) = { + let mode = file.metadata().unwrap().mode(); + let cstring_path = CString::new(child_path.to_string_lossy().to_string()) + .expect("Failed to convert to CString"); + (mode as u16, cstring_path) + }; + #[cfg(target_os = "linux")] let inode_data = InodeData::new(inode, InodeHandle::File(file.into_file()), 1, id, mode); + #[cfg(target_os = "macos")] + let inode_data = + InodeData::new(inode, InodeHandle::File(file, cstring_path), 1, id, mode); m.insert(Arc::new(inode_data)); + #[cfg(target_os = "linux")] let inode = fs.allocate_inode(&m, &id, None).unwrap(); + + #[cfg(target_os = "macos")] + let inode = fs.allocate_inode(&m, &id).unwrap(); assert_eq!(inode & MAX_HOST_INO, 2); } } @@ -1539,17 +1561,28 @@ mod tests { #[test] fn test_generic_read_write_noopen() { + #[cfg(target_os = "linux")] let tmpdir = TempDir::new().expect("Cannot create temporary directory."); + #[cfg(target_os = "macos")] + let source = tempdir().expect("Cannot create temporary directory."); + #[cfg(target_os = "macos")] + let tmp_path = source.into_path(); // Prepare passthrough fs. let fs_cfg = Config { do_import: false, no_open: true, + #[cfg(target_os = "linux")] root_dir: tmpdir.as_path().to_string_lossy().to_string(), + #[cfg(target_os = "macos")] + root_dir: tmp_path.to_string_lossy().to_string(), ..Default::default() }; let fs = PassthroughFs::<()>::new(fs_cfg.clone()).unwrap(); fs.import().unwrap(); + #[cfg(target_os = "linux")] fs.init(FsOptions::ZERO_MESSAGE_OPEN).unwrap(); + #[cfg(target_os = "macos")] + fs.init(FsOptions::ASYNC_READ).unwrap(); fs.mount().unwrap(); // Create a new file for testing. @@ -1572,8 +1605,12 @@ mod tests { // Write on the inode let data = b"hello world"; // Write to one intermidiate temp file. + #[cfg(target_os = "linux")] let buffer_file = TempFile::new().expect("Cannot create temporary file."); + #[cfg(target_os = "linux")] let mut buffer_file = buffer_file.into_file(); + #[cfg(target_os = "macos")] + let mut buffer_file = tempfile().expect("Cannot create temporary file."); buffer_file.write_all(data).unwrap(); let _ = buffer_file.flush(); @@ -1602,8 +1639,12 @@ mod tests { assert_eq!(write_sz, data.len()); // Create a new temp file as read buffer. + #[cfg(target_os = "linux")] let read_buffer_file = TempFile::new().expect("Cannot create temporary file."); + #[cfg(target_os = "linux")] let mut read_buffer_file = read_buffer_file.into_file(); + #[cfg(target_os = "macos")] + let mut read_buffer_file = tempfile().expect("Cannot create temporary file."); let read_sz = fs .read( &ctx, diff --git a/src/passthrough/passthrough_fs_linux.rs b/src/passthrough/passthrough_fs_linux.rs new file mode 100644 index 000000000..d57c31876 --- /dev/null +++ b/src/passthrough/passthrough_fs_linux.rs @@ -0,0 +1,330 @@ +// Copyright (C) 2023 Alibaba Cloud. All rights reserved. +// Copyright 2021 Red Hat, Inc. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. + +use std::{ + ffi::{CStr, OsString}, + fs::File, + io, + ops::Deref, + os::{ + fd::{AsRawFd, RawFd}, + unix::ffi::OsStringExt, + }, + path::PathBuf, + sync::{atomic::Ordering, Arc}, +}; + +use vm_memory::bitmap::BitmapSlice; + +use crate::{abi::fuse_abi::Opcode, passthrough::util::einval}; + +use super::{ + file_handle::{FileHandle, OpenableFileHandle}, + inode_store::{InodeId, InodeStore}, + statx::{statx, StatExt}, + util::{enosys, eperm, openat, reopen_fd_through_proc, stat_fd}, + Inode, InodeData, InodeFile, InodeMap, PassthroughFs, MAX_HOST_INO, +}; + +pub type InoT = libc::ino64_t; +pub type InodeMode = u32; +pub type LibcStat = libc::stat64; +pub type OffT = libc::off64_t; +pub type StatVfs = libc::statvfs64; + +#[derive(Debug)] +pub enum InodeHandle { + File(File), + Handle(Arc), +} + +impl InodeHandle { + pub fn file_handle(&self) -> Option<&FileHandle> { + match self { + InodeHandle::File(_) => None, + InodeHandle::Handle(h) => Some(h.file_handle().deref()), + } + } + + pub fn get_file(&self) -> io::Result> { + match self { + InodeHandle::File(f) => Ok(InodeFile::Ref(f)), + InodeHandle::Handle(h) => { + let f = h.open(libc::O_PATH)?; + Ok(InodeFile::Owned(f)) + } + } + } + + pub fn open_file(&self, flags: libc::c_int, proc_self_fd: &File) -> io::Result { + match self { + InodeHandle::File(f) => reopen_fd_through_proc(f, flags, proc_self_fd), + InodeHandle::Handle(h) => h.open(flags), + } + } + + pub fn stat(&self) -> io::Result { + match self { + InodeHandle::File(f) => stat_fd(f, None), + InodeHandle::Handle(_h) => { + let file = self.get_file()?; + stat_fd(&file, None) + } + } + } +} + +impl InodeData { + pub fn open_file(&self, flags: libc::c_int, proc_self_fd: &File) -> io::Result { + self.handle.open_file(flags, proc_self_fd) + } +} + +impl InodeMap { + fn get_inode_locked( + inodes: &InodeStore, + id: &InodeId, + handle: Option<&FileHandle>, + ) -> Option { + match handle { + Some(h) => inodes.inode_by_handle(h).copied(), + None => inodes.inode_by_id(id).copied(), + } + } + + pub fn get_alt(&self, id: &InodeId, handle: Option<&FileHandle>) -> Option> { + // Do not expect poisoned lock here, so safe to unwrap(). + let inodes = self.inodes.read().unwrap(); + + Self::get_alt_locked(inodes.deref(), id, handle) + } + + pub fn get_alt_locked( + inodes: &InodeStore, + id: &InodeId, + handle: Option<&FileHandle>, + ) -> Option> { + handle + .and_then(|h| inodes.get_by_handle(h)) + .or_else(|| { + inodes.get_by_id(id).filter(|data| { + // When we have to fall back to looking up an inode by its IDs, ensure that + // we hit an entry that does not have a file handle. Entries with file + // handles must also have a handle alt key, so if we have not found it by + // that handle alt key, we must have found an entry with a mismatching + // handle; i.e. an entry for a different file, even though it has the same + // inode ID. + // (This can happen when we look up a new file that has reused the inode ID + // of some previously unlinked inode we still have in `.inodes`.) + handle.is_none() || data.handle.file_handle().is_none() + }) + }) + .map(Arc::clone) + } +} + +impl PassthroughFs { + pub fn keep_fds(&self) -> Vec { + vec![self.proc_self_fd.as_raw_fd()] + } + + fn readlinkat(dfd: i32, pathname: &CStr) -> io::Result { + let mut buf = Vec::with_capacity(libc::PATH_MAX as usize); + + // Safe because the kernel will only write data to buf and we check the return value + let buf_read = unsafe { + libc::readlinkat( + dfd, + pathname.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_char, + buf.capacity(), + ) + }; + if buf_read < 0 { + error!("fuse: readlinkat error"); + return Err(io::Error::last_os_error()); + } + + // Safe because we trust the value returned by kernel. + unsafe { buf.set_len(buf_read as usize) }; + buf.shrink_to_fit(); + + // Be careful: + // - readlink() does not append a terminating null byte to buf + // - OsString instances are not NUL terminated + Ok(PathBuf::from(OsString::from_vec(buf))) + } + + /// Get the file pathname corresponding to the Inode + /// This function is used by Nydus blobfs + pub fn readlinkat_proc_file(&self, inode: Inode) -> io::Result { + use std::ffi::CString; + + let data = self.inode_map.get(inode)?; + let file = data.get_file()?; + let pathname = CString::new(format!("{}", file.as_raw_fd())) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + + Self::readlinkat(self.proc_self_fd.as_raw_fd(), &pathname) + } + + pub fn open_file( + dfd: &impl AsRawFd, + pathname: &CStr, + flags: i32, + mode: u32, + ) -> io::Result { + openat(dfd, pathname, flags, mode) + } + + fn open_file_restricted( + &self, + dir: &impl AsRawFd, + pathname: &CStr, + flags: i32, + mode: u32, + ) -> io::Result { + let flags = libc::O_NOFOLLOW | libc::O_CLOEXEC | flags; + + // TODO + //if self.os_facts.has_openat2 { + // oslib::do_open_relative_to(dir, pathname, flags, mode) + //} else { + openat(dir, pathname, flags, mode) + //} + } + + /// Create a File or File Handle for `name` under directory `dir_fd` to support `lookup()`. + pub fn open_file_and_handle( + &self, + dir: &impl AsRawFd, + name: &CStr, + ) -> io::Result<(File, Option, StatExt)> { + let path_file = self.open_file_restricted(dir, name, libc::O_PATH, 0)?; + let st = statx(&path_file, None)?; + let handle = if self.cfg.inode_file_handles { + FileHandle::from_fd(&path_file)? + } else { + None + }; + + Ok((path_file, handle, st)) + } + + pub fn to_openable_handle(&self, fh: FileHandle) -> io::Result> { + fh.into_openable(&self.mount_fds, |fd, flags, _mode| { + reopen_fd_through_proc(&fd, flags, &self.proc_self_fd) + }) + .map(Arc::new) + .map_err(|e| { + if !e.silent() { + error!("{}", e); + } + e.into_inner() + }) + } + + pub fn allocate_inode( + &self, + inodes: &InodeStore, + id: &InodeId, + handle_opt: Option<&FileHandle>, + ) -> io::Result { + if !self.cfg.use_host_ino { + // If the inode has already been assigned before, the new inode is not reassigned, + // ensuring that the same file is always the same inode + Ok(InodeMap::get_inode_locked(inodes, id, handle_opt) + .unwrap_or_else(|| self.next_inode.fetch_add(1, Ordering::Relaxed))) + } else { + let inode = if id.ino > MAX_HOST_INO { + // Prefer looking for previous mappings from memory + match InodeMap::get_inode_locked(inodes, id, handle_opt) { + Some(ino) => ino, + None => self.ino_allocator.get_unique_inode(id)?, + } + } else { + self.ino_allocator.get_unique_inode(id)? + }; + + Ok(inode) + } + } + + // When seal_size is set, we don't allow operations that could change file size nor allocate + // space beyond EOF + pub fn seal_size_check( + &self, + opcode: Opcode, + file_size: u64, + offset: u64, + size: u64, + mode: i32, + ) -> io::Result<()> { + if offset.checked_add(size).is_none() { + error!( + "fuse: {:?}: invalid `offset` + `size` ({}+{}) overflows u64::MAX", + opcode, offset, size + ); + return Err(einval()); + } + + match opcode { + // write should not exceed the file size. + Opcode::Write => { + if size + offset > file_size { + return Err(eperm()); + } + } + + Opcode::Fallocate => { + let op = mode & !(libc::FALLOC_FL_KEEP_SIZE | libc::FALLOC_FL_UNSHARE_RANGE); + match op { + // Allocate, punch and zero, must not change file size. + 0 | libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_ZERO_RANGE => { + if size + offset > file_size { + return Err(eperm()); + } + } + // collapse and insert will change file size, forbid. + libc::FALLOC_FL_COLLAPSE_RANGE | libc::FALLOC_FL_INSERT_RANGE => { + return Err(eperm()); + } + // Invalid operation + _ => return Err(einval()), + } + } + + // setattr operation should be handled in setattr handler. + _ => return Err(enosys()), + } + + Ok(()) + } +} + +pub struct CapFsetid {} + +impl Drop for CapFsetid { + fn drop(&mut self) { + if let Err(e) = caps::raise(None, caps::CapSet::Effective, caps::Capability::CAP_FSETID) { + error!("fail to restore thread cap_fsetid: {}", e); + }; + } +} + +pub fn drop_cap_fsetid() -> io::Result> { + if !caps::has_cap(None, caps::CapSet::Effective, caps::Capability::CAP_FSETID) + .map_err(|_e| io::Error::new(io::ErrorKind::PermissionDenied, "no CAP_FSETID capability"))? + { + return Ok(None); + } + caps::drop(None, caps::CapSet::Effective, caps::Capability::CAP_FSETID).map_err(|_e| { + io::Error::new( + io::ErrorKind::PermissionDenied, + "failed to drop CAP_FSETID capability", + ) + })?; + Ok(Some(CapFsetid {})) +} diff --git a/src/passthrough/passthrough_fs_macos.rs b/src/passthrough/passthrough_fs_macos.rs new file mode 100644 index 000000000..b6335d66e --- /dev/null +++ b/src/passthrough/passthrough_fs_macos.rs @@ -0,0 +1,152 @@ +// Copyright (C) 2023 Alibaba Cloud. All rights reserved. +// Copyright 2021 Red Hat, Inc. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. + +use std::{ + ffi::{CStr, CString}, + fs::File, + io, + ops::Deref, + sync::{atomic::Ordering, Arc}, +}; + +use vm_memory::bitmap::BitmapSlice; + +use crate::{abi::fuse_abi::Opcode, passthrough::util::einval}; + +use super::{ + inode_store::{InodeId, InodeStore}, + stat::{open, stat, Stat}, + util::{enosys, eperm}, + Inode, InodeData, InodeFile, InodeMap, PassthroughFs, MAX_HOST_INO, +}; + +pub type InoT = libc::ino_t; +pub type InodeMode = u16; +pub type LibcStat = libc::stat; +pub type OffT = libc::off_t; +pub type StatVfs = libc::statvfs; + +#[derive(Debug)] +pub enum InodeHandle { + File(File, CString), +} + +impl InodeHandle { + pub fn get_file(&self) -> io::Result> { + match self { + InodeHandle::File(f, _) => Ok(InodeFile::Ref(f)), + } + } + + fn open_file(&self, flags: libc::c_int) -> io::Result { + match self { + InodeHandle::File(_, pathname) => open(pathname, flags, 0), + } + } + + pub fn stat(&self) -> io::Result { + match self { + InodeHandle::File(f, _) => stat(f), + } + } + + fn get_path(&self) -> io::Result { + match self { + InodeHandle::File(_, pathname) => Ok(pathname.clone()), + } + } +} + +impl InodeData { + pub fn open_file(&self, flags: libc::c_int) -> io::Result { + self.handle.open_file(flags) + } + + pub fn get_path(&self) -> io::Result { + self.handle.get_path() + } +} + +impl InodeMap { + pub fn get_inode_locked(inodes: &InodeStore, id: &InodeId) -> Option { + inodes.inode_by_id(id).copied() + } + + pub fn get_alt(&self, id: &InodeId) -> Option> { + let inodes = self.inodes.read().unwrap(); + + Self::get_alt_locked(inodes.deref(), id) + } + + pub fn get_alt_locked(inodes: &InodeStore, id: &InodeId) -> Option> { + inodes.get_by_id(id).map(Arc::clone) + } +} + +impl PassthroughFs { + pub fn open_file(&self, pathname: &CStr) -> io::Result<(File, Stat)> { + let path_file = self.open_file_restricted(pathname, libc::O_NOFOLLOW, 0o40777)?; + let st = stat(&path_file)?; + + Ok((path_file, st)) + } + + fn open_file_restricted(&self, pathname: &CStr, flags: i32, mode: u32) -> io::Result { + let flags = libc::O_NOFOLLOW | libc::O_CLOEXEC | flags; + open(pathname, flags, mode) + } + + pub fn allocate_inode(&self, inodes: &InodeStore, id: &InodeId) -> io::Result { + if !self.cfg.use_host_ino { + // If the inode has already been assigned before, the new inode is not reassigned, + // ensuring that the same file is always the same inode + Ok(InodeMap::get_inode_locked(inodes, id) + .unwrap_or_else(|| self.next_inode.fetch_add(1, Ordering::Relaxed))) + } else { + let inode = if id.ino > MAX_HOST_INO { + // Prefer looking for previous mappings from memory + match InodeMap::get_inode_locked(inodes, id) { + Some(ino) => ino, + None => self.ino_allocator.get_unique_inode(id)?, + } + } else { + self.ino_allocator.get_unique_inode(id)? + }; + + Ok(inode) + } + } + + pub fn seal_size_check( + &self, + opcode: Opcode, + file_size: u64, + offset: u64, + size: u64, + _mode: i32, + ) -> io::Result<()> { + if offset.checked_add(size).is_none() { + error!( + "fuse: {:?}: invalid `offset` + `size` ({}+{}) overflows u64::MAX", + opcode, offset, size + ); + return Err(einval()); + } + + match opcode { + // write should not exceed the file size. + Opcode::Write => { + if size + offset > file_size { + return Err(eperm()); + } + } + + // setattr operation should be handled in setattr handler. + _ => return Err(enosys()), + } + + Ok(()) + } +} diff --git a/src/passthrough/stat.rs b/src/passthrough/stat.rs new file mode 100644 index 000000000..9898decb4 --- /dev/null +++ b/src/passthrough/stat.rs @@ -0,0 +1,52 @@ +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Fuse passthrough file system, mirroring an existing FS hierarchy. +//! +//! This file system mirrors the existing file system hierarchy of the system, starting at the +//! root file system. This is implemented by just "passing through" all requests to the +//! corresponding underlying file system. +//! +//! The code is derived from the +//! [CrosVM](https://chromium.googlesource.com/chromiumos/platform/crosvm/) project, +//! with heavy modification/enhancements from Alibaba Cloud OS team. +use std::{ + ffi::{CStr, CString}, + fs::File, + io, + mem::MaybeUninit, + os::fd::{AsRawFd, FromRawFd}, +}; + +pub struct Stat { + pub st: libc::stat, +} + +pub fn stat(path_file: &impl AsRawFd) -> io::Result { + let mut st_ui = MaybeUninit::::zeroed(); + + let res = unsafe { libc::fstat(path_file.as_raw_fd(), st_ui.as_mut_ptr()) }; + if res >= 0 { + let st = unsafe { st_ui.assume_init() }; + Ok(Stat { st }) + } else { + Err(io::Error::last_os_error()) + } +} + +pub fn open(path: &CStr, flags: libc::c_int, mode: u32) -> io::Result { + let path_cstr = CString::new(path.to_bytes()).expect("CString conversion failed"); + let fd = unsafe { + if flags & libc::O_CREAT == libc::O_CREAT { + libc::open(path_cstr.as_ptr(), flags, mode) + } else { + libc::open(path_cstr.as_ptr(), flags) + } + }; + + if fd >= 0 { + Ok(unsafe { File::from_raw_fd(fd) }) + } else { + Err(io::Error::last_os_error()) + } +} diff --git a/src/passthrough/sync_io.rs b/src/passthrough/sync_io.rs index 584d2b93d..f081c3677 100644 --- a/src/passthrough/sync_io.rs +++ b/src/passthrough/sync_io.rs @@ -8,23 +8,26 @@ use std::ffi::{CStr, CString}; use std::fs::File; use std::io; -use std::mem::{self, size_of, ManuallyDrop, MaybeUninit}; +use std::mem::{ManuallyDrop, MaybeUninit}; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::sync::atomic::Ordering; use std::sync::Arc; use std::time::Duration; -use super::os_compat::LinuxDirent64; +#[cfg(target_os = "macos")] +use super::stat::stat as stat_fd; +#[cfg(target_os = "linux")] use super::util::stat_fd; use super::*; -use crate::abi::fuse_abi::{CreateIn, Opcode, FOPEN_IN_KILL_SUIDGID, WRITE_KILL_PRIV}; +use crate::abi::fuse_abi::{CreateIn, Opcode}; +#[cfg(target_os = "linux")] +use crate::abi::fuse_abi::{FOPEN_IN_KILL_SUIDGID, WRITE_KILL_PRIV}; #[cfg(any(feature = "vhost-user-fs", feature = "virtiofs"))] use crate::abi::virtio_fs; use crate::api::filesystem::{ Context, DirEntry, Entry, FileSystem, FsOptions, GetxattrReply, ListxattrReply, OpenOptions, SetattrValid, ZeroCopyReader, ZeroCopyWriter, }; -use crate::bytes_to_cstr; #[cfg(any(feature = "vhost-user-fs", feature = "virtiofs"))] use crate::transport::FsCacheReqHandler; @@ -35,7 +38,10 @@ impl PassthroughFs { Err(ebadf()) } else { let new_flags = self.get_writeback_open_flags(flags); - data.open_file(new_flags | libc::O_CLOEXEC, &self.proc_self_fd) + #[cfg(target_os = "linux")] + return data.open_file(new_flags | libc::O_CLOEXEC, &self.proc_self_fd); + #[cfg(target_os = "macos")] + return data.open_file(new_flags | libc::O_CLOEXEC); } } @@ -55,133 +61,52 @@ impl PassthroughFs { Ok(()) } - fn do_readdir( + pub fn do_getattr( &self, inode: Inode, - handle: Handle, - size: u32, - offset: u64, - add_entry: &mut dyn FnMut(DirEntry, RawFd) -> io::Result, - ) -> io::Result<()> { - if size == 0 { - return Ok(()); - } - - let mut buf = Vec::::with_capacity(size as usize); - let data = self.get_dirdata(handle, inode, libc::O_RDONLY)?; - - { - // Since we are going to work with the kernel offset, we have to acquire the file lock - // for both the `lseek64` and `getdents64` syscalls to ensure that no other thread - // changes the kernel offset while we are using it. - let (guard, dir) = data.get_file_mut(); - - // Safe because this doesn't modify any memory and we check the return value. - let res = - unsafe { libc::lseek64(dir.as_raw_fd(), offset as libc::off64_t, libc::SEEK_SET) }; - if res < 0 { - return Err(io::Error::last_os_error()); - } - - // Safe because the kernel guarantees that it will only write to `buf` and we check the - // return value. - let res = unsafe { - libc::syscall( - libc::SYS_getdents64, - dir.as_raw_fd(), - buf.as_mut_ptr() as *mut LinuxDirent64, - size as libc::c_int, - ) - }; - if res < 0 { - return Err(io::Error::last_os_error()); - } - - // Safe because we trust the value returned by kernel. - unsafe { buf.set_len(res as usize) }; - - // Explicitly drop the lock so that it's not held while we fill in the fuse buffer. - mem::drop(guard); - } - - let mut rem = &buf[..]; - let orig_rem_len = rem.len(); - while !rem.is_empty() { - // We only use debug asserts here because these values are coming from the kernel and we - // trust them implicitly. - debug_assert!( - rem.len() >= size_of::(), - "fuse: not enough space left in `rem`" - ); - - let (front, back) = rem.split_at(size_of::()); - - let dirent64 = LinuxDirent64::from_slice(front) - .expect("fuse: unable to get LinuxDirent64 from slice"); - - let namelen = dirent64.d_reclen as usize - size_of::(); - debug_assert!( - namelen <= back.len(), - "fuse: back is smaller than `namelen`" - ); - - let name = &back[..namelen]; - let res = if name.starts_with(CURRENT_DIR_CSTR) || name.starts_with(PARENT_DIR_CSTR) { - // We don't want to report the "." and ".." entries. However, returning `Ok(0)` will - // break the loop so return `Ok` with a non-zero value instead. - Ok(1) - } else { - // The Sys_getdents64 in kernel will pad the name with '\0' - // bytes up to 8-byte alignment, so @name may contain a few null - // terminators. This causes an extra lookup from fuse when - // called by readdirplus, because kernel path walking only takes - // name without null terminators, the dentry with more than 1 - // null terminators added by readdirplus doesn't satisfy the - // path walking. - let name = bytes_to_cstr(name) - .map_err(|e| { - error!("fuse: do_readdir: {:?}", e); - einval() - })? - .to_bytes(); - - add_entry( - DirEntry { - ino: dirent64.d_ino, - offset: dirent64.d_off as u64, - type_: u32::from(dirent64.d_ty), - name, - }, - data.borrow_fd().as_raw_fd(), - ) - }; + handle: Option, + ) -> io::Result<(LibcStat, Duration)> { + let st; + let data = self.inode_map.get(inode).map_err(|e| { + error!("fuse: do_getattr ino {} Not find err {:?}", inode, e); + e + })?; - debug_assert!( - rem.len() >= dirent64.d_reclen as usize, - "fuse: rem is smaller than `d_reclen`" - ); - - match res { - Ok(0) => break, - Ok(_) => rem = &rem[dirent64.d_reclen as usize..], - // If there's an error, we can only signal it if we haven't - // stored any entries yet - otherwise we'd end up with wrong - // lookup counts for the entries that are already in the - // buffer. So we return what we've collected until that point. - Err(e) if rem.len() == orig_rem_len => return Err(e), - Err(_) => return Ok(()), - } + // kernel sends 0 as handle in case of no_open, and it depends on fuse server to handle + // this case correctly. + if !self.no_open.load(Ordering::Relaxed) && handle.is_some() { + // Safe as we just checked handle + let hd = self.handle_map.get(handle.unwrap(), inode)?; + st = stat_fd( + hd.get_file(), + #[cfg(target_os = "linux")] + None, + ) + } else { + st = data.handle.stat(); } - Ok(()) + let st = st.map_err(|e| { + error!("fuse: do_getattr stat failed ino {} err {:?}", inode, e); + e + })?; + Ok(( + #[cfg(target_os = "linux")] + st, + #[cfg(target_os = "macos")] + st.st, + self.cfg.attr_timeout, + )) } fn do_open( &self, inode: Inode, flags: u32, - fuse_flags: u32, + #[cfg(target_os = "linux")] fuse_flags: u32, + #[cfg(target_os = "macos")] _fuse_flags: u32, ) -> io::Result<(Option, OpenOptions, Option)> { + #[cfg(target_os = "linux")] let killpriv = if self.killpriv_v2.load(Ordering::Relaxed) && (fuse_flags & FOPEN_IN_KILL_SUIDGID != 0) { @@ -190,6 +115,7 @@ impl PassthroughFs { None }; let file = self.open_inode(inode, flags as i32)?; + #[cfg(target_os = "linux")] drop(killpriv); let data = HandleData::new(inode, file, flags); @@ -204,14 +130,23 @@ impl PassthroughFs { flags & (libc::O_DIRECTORY as u32) == 0, ), CachePolicy::Metadata => { + #[cfg(target_os = "linux")] if flags & (libc::O_DIRECTORY as u32) == 0 { opts |= OpenOptions::DIRECT_IO; } else { opts |= OpenOptions::CACHE_DIR | OpenOptions::KEEP_CACHE; } + + #[cfg(target_os = "macos")] + if flags & (libc::O_DIRECTORY as u32) == 0 { + opts |= OpenOptions::DIRECT_IO; + } else { + opts |= OpenOptions::KEEP_CACHE; + } } CachePolicy::Always => { opts |= OpenOptions::KEEP_CACHE; + #[cfg(target_os = "linux")] if flags & (libc::O_DIRECTORY as u32) != 0 { opts |= OpenOptions::CACHE_DIR; } @@ -222,40 +157,12 @@ impl PassthroughFs { Ok((Some(handle), opts, None)) } - fn do_getattr( - &self, - inode: Inode, - handle: Option, - ) -> io::Result<(libc::stat64, Duration)> { - let st; - let data = self.inode_map.get(inode).map_err(|e| { - error!("fuse: do_getattr ino {} Not find err {:?}", inode, e); - e - })?; - - // kernel sends 0 as handle in case of no_open, and it depends on fuse server to handle - // this case correctly. - if !self.no_open.load(Ordering::Relaxed) && handle.is_some() { - // Safe as we just checked handle - let hd = self.handle_map.get(handle.unwrap(), inode)?; - st = stat_fd(hd.get_file(), None); - } else { - st = data.handle.stat(); - } - - let st = st.map_err(|e| { - error!("fuse: do_getattr stat failed ino {} err {:?}", inode, e); - e - })?; - - Ok((st, self.cfg.attr_timeout)) - } - fn do_unlink(&self, parent: Inode, name: &CStr, flags: libc::c_int) -> io::Result<()> { let data = self.inode_map.get(parent)?; let file = data.get_file()?; // Safe because this doesn't modify any memory and we check the return value. let res = unsafe { libc::unlinkat(file.as_raw_fd(), name.as_ptr(), flags) }; + if res == 0 { Ok(()) } else { @@ -263,7 +170,7 @@ impl PassthroughFs { } } - fn get_dirdata( + pub fn get_dirdata( &self, handle: Handle, inode: Inode, @@ -298,6 +205,7 @@ impl FileSystem for PassthroughFs { type Inode = Inode; type Handle = Handle; + #[cfg(target_os = "linux")] fn init(&self, capable: FsOptions) -> io::Result { if self.cfg.do_import { self.import()?; @@ -306,6 +214,7 @@ impl FileSystem for PassthroughFs { let mut opts = FsOptions::DO_READDIRPLUS | FsOptions::READDIRPLUS_AUTO; // !cfg.do_import means we are under vfs, in which case capable is already // negotiated and must be honored. + if (!self.cfg.do_import || self.cfg.writeback) && capable.contains(FsOptions::WRITEBACK_CACHE) { @@ -341,6 +250,27 @@ impl FileSystem for PassthroughFs { Ok(opts) } + #[cfg(target_os = "macos")] + fn init(&self, _capable: FsOptions) -> io::Result { + if self.cfg.do_import { + self.import()?; + } + + let opts = FsOptions::ASYNC_READ | FsOptions::BIG_WRITES | FsOptions::ATOMIC_O_TRUNC; + + if !self.cfg.do_import || self.cfg.writeback { + self.writeback.store(true, Ordering::Relaxed); + } + if !self.cfg.do_import || self.cfg.no_open { + self.no_open.store(true, Ordering::Relaxed); + } + if !self.cfg.do_import || self.cfg.no_opendir { + self.no_opendir.store(true, Ordering::Relaxed); + } + + Ok(opts) + } + fn destroy(&self) { self.handle_map.clear(); self.inode_map.clear(); @@ -350,17 +280,24 @@ impl FileSystem for PassthroughFs { }; } - fn statfs(&self, _ctx: &Context, inode: Inode) -> io::Result { - let mut out = MaybeUninit::::zeroed(); + fn statfs(&self, _ctx: &Context, inode: Inode) -> io::Result { + let mut out = MaybeUninit::::zeroed(); let data = self.inode_map.get(inode)?; let file = data.get_file()?; // Safe because this will only modify `out` and we check the return value. + #[cfg(target_os = "linux")] match unsafe { libc::fstatvfs64(file.as_raw_fd(), out.as_mut_ptr()) } { // Safe because the kernel guarantees that `out` has been initialized. 0 => Ok(unsafe { out.assume_init() }), _ => Err(io::Error::last_os_error()), } + #[cfg(target_os = "macos")] + match unsafe { libc::fstatvfs(file.as_raw_fd(), out.as_mut_ptr()) } { + // Safe because the kernel guarantees that `out` has been initialized. + 0 => Ok(unsafe { out.assume_init() }), + _ => Err(io::Error::last_os_error()), + } } fn lookup(&self, _ctx: &Context, parent: Inode, name: &CStr) -> io::Result { @@ -427,7 +364,18 @@ impl FileSystem for PassthroughFs { let file = data.get_file()?; // Safe because this doesn't modify any memory and we check the return value. - unsafe { libc::mkdirat(file.as_raw_fd(), name.as_ptr(), mode & !umask) } + #[cfg(target_os = "macos")] + unsafe { + libc::mkdirat( + file.as_raw_fd(), + name.as_ptr(), + (mode & !umask) as libc::mode_t, + ) + } + #[cfg(target_os = "linux")] + unsafe { + libc::mkdirat(file.as_raw_fd(), name.as_ptr(), mode & !umask) + } }; if res < 0 { return Err(io::Error::last_os_error()); @@ -570,6 +518,7 @@ impl FileSystem for PassthroughFs { // open_inode(). None => { // Cap restored when _killpriv is dropped + #[cfg(target_os = "linux")] let _killpriv = if self.killpriv_v2.load(Ordering::Relaxed) && (args.fuse_flags & FOPEN_IN_KILL_SUIDGID != 0) { @@ -683,7 +632,8 @@ impl FileSystem for PassthroughFs { _lock_owner: Option, _delayed_write: bool, flags: u32, - fuse_flags: u32, + #[cfg(target_os = "linux")] fuse_flags: u32, + #[cfg(target_os = "macos")] _fuse_flags: u32, ) -> io::Result { let data = self.get_data(handle, inode, libc::O_RDWR)?; @@ -695,13 +645,18 @@ impl FileSystem for PassthroughFs { self.check_fd_flags(data.clone(), f.as_raw_fd(), flags)?; if self.seal_size.load(Ordering::Relaxed) { + #[cfg(target_os = "linux")] let st = stat_fd(&f, None)?; + + #[cfg(target_os = "macos")] + let st = stat(&f)?.st; self.seal_size_check(Opcode::Write, st.st_size as u64, offset, size as u64, 0)?; } let mut f = ManuallyDrop::new(f); // Cap restored when _killpriv is dropped + #[cfg(target_os = "linux")] let _killpriv = if self.killpriv_v2.load(Ordering::Relaxed) && (fuse_flags & WRITE_KILL_PRIV != 0) { self::drop_cap_fsetid()? @@ -717,7 +672,7 @@ impl FileSystem for PassthroughFs { _ctx: &Context, inode: Inode, handle: Option, - ) -> io::Result<(libc::stat64, Duration)> { + ) -> io::Result<(LibcStat, Duration)> { self.do_getattr(inode, handle) } @@ -725,10 +680,10 @@ impl FileSystem for PassthroughFs { &self, _ctx: &Context, inode: Inode, - attr: libc::stat64, + attr: LibcStat, handle: Option, valid: SetattrValid, - ) -> io::Result<(libc::stat64, Duration)> { + ) -> io::Result<(LibcStat, Duration)> { let inode_data = self.inode_map.get(inode)?; enum Data { @@ -737,6 +692,7 @@ impl FileSystem for PassthroughFs { } let file = inode_data.get_file()?; + #[cfg(target_os = "linux")] let data = if self.no_open.load(Ordering::Relaxed) { let pathname = CString::new(format!("{}", file.as_raw_fd())) .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; @@ -753,6 +709,21 @@ impl FileSystem for PassthroughFs { } }; + #[cfg(target_os = "macos")] + let data = if self.no_open.load(Ordering::Relaxed) { + let pathname = inode_data.get_path()?; + Data::ProcPath(pathname) + } else { + // If we have a handle then use it otherwise get a new fd from the inode. + if let Some(handle) = handle { + let hd = self.handle_map.get(handle, inode)?; + Data::Handle(hd) + } else { + let pathname = inode_data.get_path()?; + Data::ProcPath(pathname) + } + }; + if valid.contains(SetattrValid::SIZE) && self.seal_size.load(Ordering::Relaxed) { return Err(io::Error::from_raw_os_error(libc::EPERM)); } @@ -762,9 +733,12 @@ impl FileSystem for PassthroughFs { let res = unsafe { match data { Data::Handle(ref h) => libc::fchmod(h.borrow_fd().as_raw_fd(), attr.st_mode), + #[cfg(target_os = "linux")] Data::ProcPath(ref p) => { libc::fchmodat(self.proc_self_fd.as_raw_fd(), p.as_ptr(), attr.st_mode, 0) } + #[cfg(target_os = "macos")] + Data::ProcPath(ref p) => libc::chmod(p.as_ptr(), attr.st_mode), } }; if res < 0 { @@ -790,6 +764,7 @@ impl FileSystem for PassthroughFs { let empty = unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) }; // Safe because this doesn't modify any memory and we check the return value. + #[cfg(target_os = "linux")] let res = unsafe { libc::fchownat( file.as_raw_fd(), @@ -799,6 +774,16 @@ impl FileSystem for PassthroughFs { libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW, ) }; + #[cfg(target_os = "macos")] + let res = unsafe { + libc::fchownat( + file.as_raw_fd(), + empty.as_ptr(), + uid, + gid, + libc::AT_SYMLINK_NOFOLLOW, + ) + }; if res < 0 { return Err(io::Error::last_os_error()); } @@ -806,6 +791,7 @@ impl FileSystem for PassthroughFs { if valid.contains(SetattrValid::SIZE) { // Cap restored when _killpriv is dropped + #[cfg(target_os = "linux")] let _killpriv = if self.killpriv_v2.load(Ordering::Relaxed) && valid.contains(SetattrValid::KILL_SUIDGID) { @@ -861,9 +847,24 @@ impl FileSystem for PassthroughFs { Data::Handle(ref h) => unsafe { libc::futimens(h.borrow_fd().as_raw_fd(), tvs.as_ptr()) }, + #[cfg(target_os = "linux")] Data::ProcPath(ref p) => unsafe { libc::utimensat(self.proc_self_fd.as_raw_fd(), p.as_ptr(), tvs.as_ptr(), 0) }, + #[cfg(target_os = "macos")] + Data::ProcPath(ref p) => { + let tvs = [ + libc::timeval { + tv_sec: tvs[0].tv_sec, + tv_usec: (tvs[0].tv_nsec / 1000) as i32, + }, + libc::timeval { + tv_sec: tvs[1].tv_sec, + tv_usec: (tvs[1].tv_nsec / 1000) as i32, + }, + ]; + unsafe { libc::utimes(p.as_ptr(), tvs.as_ptr()) } + } }; if res < 0 { return Err(io::Error::last_os_error()); @@ -880,7 +881,8 @@ impl FileSystem for PassthroughFs { oldname: &CStr, newdir: Inode, newname: &CStr, - flags: u32, + #[cfg(target_os = "linux")] flags: u32, + #[cfg(target_os = "macos")] _flags: u32, ) -> io::Result<()> { self.validate_path_component(oldname)?; self.validate_path_component(newname)?; @@ -893,6 +895,7 @@ impl FileSystem for PassthroughFs { // Safe because this doesn't modify any memory and we check the return value. // TODO: Switch to libc::renameat2 once https://github.com/rust-lang/libc/pull/1508 lands // and we have glibc 2.28. + #[cfg(target_os = "linux")] let res = unsafe { libc::syscall( libc::SYS_renameat2, @@ -903,6 +906,16 @@ impl FileSystem for PassthroughFs { flags, ) }; + + #[cfg(target_os = "macos")] + let res = unsafe { + libc::renameat( + old_file.as_raw_fd(), + oldname.as_ptr(), + new_file.as_raw_fd(), + newname.as_ptr(), + ) + }; if res == 0 { Ok(()) } else { @@ -922,12 +935,16 @@ impl FileSystem for PassthroughFs { self.validate_path_component(name)?; let data = self.inode_map.get(parent)?; + #[cfg(target_os = "linux")] let file = data.get_file()?; + #[cfg(target_os = "macos")] + let pathname = data.get_path()?; let res = { let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; // Safe because this doesn't modify any memory and we check the return value. + #[cfg(target_os = "linux")] unsafe { libc::mknodat( file.as_raw_fd(), @@ -936,6 +953,14 @@ impl FileSystem for PassthroughFs { u64::from(rdev), ) } + #[cfg(target_os = "macos")] + unsafe { + libc::mknod( + pathname.as_ptr(), + (mode & !umask) as libc::mode_t, + rdev as i32, + ) + } }; if res < 0 { Err(io::Error::last_os_error()) @@ -962,6 +987,7 @@ impl FileSystem for PassthroughFs { let empty = unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) }; // Safe because this doesn't modify any memory and we check the return value. + #[cfg(target_os = "linux")] let res = unsafe { libc::linkat( file.as_raw_fd(), @@ -971,6 +997,16 @@ impl FileSystem for PassthroughFs { libc::AT_EMPTY_PATH, ) }; + #[cfg(target_os = "macos")] + let res = unsafe { + libc::linkat( + file.as_raw_fd(), + empty.as_ptr(), + new_file.as_raw_fd(), + newname.as_ptr(), + libc::AT_FDCWD, + ) + }; if res == 0 { self.do_lookup(newparent, newname) } else { @@ -1063,13 +1099,15 @@ impl FileSystem for PassthroughFs { &self, _ctx: &Context, inode: Inode, - datasync: bool, + #[cfg(target_os = "linux")] datasync: bool, + #[cfg(target_os = "macos")] _datasync: bool, handle: Handle, ) -> io::Result<()> { let data = self.get_data(handle, inode, libc::O_RDONLY)?; let fd = data.borrow_fd(); // Safe because this doesn't modify any memory and we check the return value. + #[cfg(target_os = "linux")] let res = unsafe { if datasync { libc::fdatasync(fd.as_raw_fd()) @@ -1077,6 +1115,8 @@ impl FileSystem for PassthroughFs { libc::fsync(fd.as_raw_fd()) } }; + #[cfg(target_os = "macos")] + let res = unsafe { libc::fsync(fd.as_raw_fd()) }; if res == 0 { Ok(()) } else { @@ -1096,7 +1136,10 @@ impl FileSystem for PassthroughFs { fn access(&self, ctx: &Context, inode: Inode, mask: u32) -> io::Result<()> { let data = self.inode_map.get(inode)?; + #[cfg(target_os = "linux")] let st = stat_fd(&data.get_file()?, None)?; + #[cfg(target_os = "macos")] + let st = stat(&data.get_file()?)?.st; let mode = mask as i32 & (libc::R_OK | libc::W_OK | libc::X_OK); if mode == libc::F_OK { @@ -1149,19 +1192,36 @@ impl FileSystem for PassthroughFs { } let data = self.inode_map.get(inode)?; + #[cfg(target_os = "linux")] let file = data.get_file()?; + #[cfg(target_os = "linux")] let pathname = CString::new(format!("/proc/self/fd/{}", file.as_raw_fd())) .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + #[cfg(target_os = "macos")] + let pathname = data.get_path()?; + // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we // need to use the {set,get,remove,list}xattr variants. // Safe because this doesn't modify any memory and we check the return value. + #[cfg(target_os = "linux")] + let res = unsafe { + libc::setxattr( + pathname.as_ptr(), + name.as_ptr(), + value.as_ptr() as *const libc::c_void, + value.len(), + flags as libc::c_int, + ) + }; + #[cfg(target_os = "macos")] let res = unsafe { libc::setxattr( pathname.as_ptr(), name.as_ptr(), value.as_ptr() as *const libc::c_void, value.len(), + 0, flags as libc::c_int, ) }; @@ -1184,20 +1244,37 @@ impl FileSystem for PassthroughFs { } let data = self.inode_map.get(inode)?; + #[cfg(target_os = "linux")] let file = data.get_file()?; let mut buf = Vec::::with_capacity(size as usize); + #[cfg(target_os = "linux")] let pathname = CString::new(format!("/proc/self/fd/{}", file.as_raw_fd(),)) .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + #[cfg(target_os = "macos")] + let pathname = data.get_path()?; + // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we // need to use the {set,get,remove,list}xattr variants. // Safe because this will only modify the contents of `buf`. + #[cfg(target_os = "linux")] + let res = unsafe { + libc::getxattr( + pathname.as_ptr(), + name.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_void, + size as libc::size_t, + ) + }; + #[cfg(target_os = "macos")] let res = unsafe { libc::getxattr( pathname.as_ptr(), name.as_ptr(), buf.as_mut_ptr() as *mut libc::c_void, size as libc::size_t, + 0, + 0, ) }; if res < 0 { @@ -1219,14 +1296,20 @@ impl FileSystem for PassthroughFs { } let data = self.inode_map.get(inode)?; + #[cfg(target_os = "linux")] let file = data.get_file()?; let mut buf = Vec::::with_capacity(size as usize); + #[cfg(target_os = "linux")] let pathname = CString::new(format!("/proc/self/fd/{}", file.as_raw_fd())) .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + #[cfg(target_os = "macos")] + let pathname = data.get_path()?; + // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we // need to use the {set,get,remove,list}xattr variants. // Safe because this will only modify the contents of `buf`. + #[cfg(target_os = "linux")] let res = unsafe { libc::listxattr( pathname.as_ptr(), @@ -1234,6 +1317,15 @@ impl FileSystem for PassthroughFs { size as libc::size_t, ) }; + #[cfg(target_os = "macos")] + let res = unsafe { + libc::listxattr( + pathname.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_char, + size as libc::size_t, + 0, + ) + }; if res < 0 { return Err(io::Error::last_os_error()); } @@ -1253,14 +1345,22 @@ impl FileSystem for PassthroughFs { } let data = self.inode_map.get(inode)?; + #[cfg(target_os = "linux")] let file = data.get_file()?; + #[cfg(target_os = "linux")] let pathname = CString::new(format!("/proc/self/fd/{}", file.as_raw_fd())) .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + #[cfg(target_os = "macos")] + let pathname = data.get_path()?; + // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we // need to use the {set,get,remove,list}xattr variants. // Safe because this doesn't modify any memory and we check the return value. + #[cfg(target_os = "linux")] let res = unsafe { libc::removexattr(pathname.as_ptr(), name.as_ptr()) }; + #[cfg(target_os = "macos")] + let res = unsafe { libc::removexattr(pathname.as_ptr(), name.as_ptr(), 0) }; if res == 0 { Ok(()) } else { @@ -1268,6 +1368,7 @@ impl FileSystem for PassthroughFs { } } + #[cfg(target_os = "linux")] fn fallocate( &self, _ctx: &Context, @@ -1323,13 +1424,7 @@ impl FileSystem for PassthroughFs { let (_guard, file) = data.get_file_mut(); // Safe because this doesn't modify any memory and we check the return value. - let res = unsafe { - libc::lseek( - file.as_raw_fd(), - offset as libc::off64_t, - whence as libc::c_int, - ) - }; + let res = unsafe { libc::lseek(file.as_raw_fd(), offset as OffT, whence as libc::c_int) }; if res < 0 { Err(io::Error::last_os_error()) } else { diff --git a/src/passthrough/sync_io_linux.rs b/src/passthrough/sync_io_linux.rs new file mode 100644 index 000000000..304597c31 --- /dev/null +++ b/src/passthrough/sync_io_linux.rs @@ -0,0 +1,149 @@ +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Fuse passthrough file system, mirroring an existing FS hierarchy. +//! +//! This file system mirrors the existing file system hierarchy of the system, starting at the +//! root file system. This is implemented by just "passing through" all requests to the +//! corresponding underlying file system. +//! +//! The code is derived from the +//! [CrosVM](https://chromium.googlesource.com/chromiumos/platform/crosvm/) project, +//! with heavy modification/enhancements from Alibaba Cloud OS team. +use std::{ + io, + mem::{self, size_of}, + os::fd::{AsRawFd, RawFd}, +}; +use vm_memory::{bitmap::BitmapSlice, ByteValued}; + +use crate::{ + api::{filesystem::DirEntry, CURRENT_DIR_CSTR, PARENT_DIR_CSTR}, + bytes_to_cstr, + passthrough::{os_compat::LinuxDirent64, util::einval}, +}; + +use super::{Handle, Inode, OffT, PassthroughFs}; + +impl PassthroughFs { + pub fn do_readdir( + &self, + inode: Inode, + handle: Handle, + size: u32, + offset: u64, + add_entry: &mut dyn FnMut(DirEntry, RawFd) -> io::Result, + ) -> io::Result<()> { + if size == 0 { + return Ok(()); + } + + let mut buf = Vec::::with_capacity(size as usize); + let data = self.get_dirdata(handle, inode, libc::O_RDONLY)?; + + { + // Since we are going to work with the kernel offset, we have to acquire the file lock + // for both the `lseek64` and `getdents64` syscalls to ensure that no other thread + // changes the kernel offset while we are using it. + let (guard, dir) = data.get_file_mut(); + + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { libc::lseek64(dir.as_raw_fd(), offset as OffT, libc::SEEK_SET) }; + if res < 0 { + return Err(io::Error::last_os_error()); + } + + // Safe because the kernel guarantees that it will only write to `buf` and we check the + // return value. + let res = unsafe { + libc::syscall( + libc::SYS_getdents64, + dir.as_raw_fd(), + buf.as_mut_ptr() as *mut LinuxDirent64, + size as libc::c_int, + ) + }; + if res < 0 { + return Err(io::Error::last_os_error()); + } + + // Safe because we trust the value returned by kernel. + unsafe { buf.set_len(res as usize) }; + + // Explicitly drop the lock so that it's not held while we fill in the fuse buffer. + mem::drop(guard); + } + + let mut rem = &buf[..]; + let orig_rem_len = rem.len(); + + while !rem.is_empty() { + // We only use debug asserts here because these values are coming from the kernel and we + // trust them implicitly. + debug_assert!( + rem.len() >= size_of::(), + "fuse: not enough space left in `rem`" + ); + + let (front, back) = rem.split_at(size_of::()); + + let dirent64 = LinuxDirent64::from_slice(front) + .expect("fuse: unable to get LinuxDirent64 from slice"); + + let namelen = dirent64.d_reclen as usize - size_of::(); + debug_assert!( + namelen <= back.len(), + "fuse: back is smaller than `namelen`" + ); + + let name = &back[..namelen]; + let res = if name.starts_with(CURRENT_DIR_CSTR) || name.starts_with(PARENT_DIR_CSTR) { + // We don't want to report the "." and ".." entries. However, returning `Ok(0)` will + // break the loop so return `Ok` with a non-zero value instead. + Ok(1) + } else { + // The Sys_getdents64 in kernel will pad the name with '\0' + // bytes up to 8-byte alignment, so @name may contain a few null + // terminators. This causes an extra lookup from fuse when + // called by readdirplus, because kernel path walking only takes + // name without null terminators, the dentry with more than 1 + // null terminators added by readdirplus doesn't satisfy the + // path walking. + let name = bytes_to_cstr(name) + .map_err(|e| { + error!("fuse: do_readdir: {:?}", e); + einval() + })? + .to_bytes(); + + add_entry( + DirEntry { + ino: dirent64.d_ino, + offset: dirent64.d_off as u64, + type_: u32::from(dirent64.d_ty), + name, + }, + data.borrow_fd().as_raw_fd(), + ) + }; + + debug_assert!( + rem.len() >= dirent64.d_reclen as usize, + "fuse: rem is smaller than `d_reclen`" + ); + + match res { + Ok(0) => break, + Ok(_) => rem = &rem[dirent64.d_reclen as usize..], + // If there's an error, we can only signal it if we haven't + // stored any entries yet - otherwise we'd end up with wrong + // lookup counts for the entries that are already in the + // buffer. So we return what we've collected until that point. + Err(e) if rem.len() == orig_rem_len => return Err(e), + Err(_) => return Ok(()), + } + } + + Ok(()) + } +} diff --git a/src/passthrough/sync_io_macos.rs b/src/passthrough/sync_io_macos.rs new file mode 100644 index 000000000..312427cf2 --- /dev/null +++ b/src/passthrough/sync_io_macos.rs @@ -0,0 +1,85 @@ +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Fuse passthrough file system, mirroring an existing FS hierarchy. +//! +//! This file system mirrors the existing file system hierarchy of the system, starting at the +//! root file system. This is implemented by just "passing through" all requests to the +//! corresponding underlying file system. +//! +//! The code is derived from the +//! [CrosVM](https://chromium.googlesource.com/chromiumos/platform/crosvm/) project, +//! with heavy modification/enhancements from Alibaba Cloud OS team. +use std::{ + ffi::CStr, + io, + os::fd::{AsRawFd, RawFd}, + ptr, +}; + +use vm_memory::bitmap::BitmapSlice; + +use crate::api::filesystem::DirEntry; + +use super::{Handle, Inode, OffT, PassthroughFs}; + +impl PassthroughFs { + pub fn do_readdir( + &self, + inode: Inode, + handle: Handle, + size: u32, + offset: u64, + add_entry: &mut dyn FnMut(DirEntry, RawFd) -> io::Result, + ) -> io::Result<()> { + if size == 0 { + return Ok(()); + } + + let data = self.get_dirdata(handle, inode, libc::O_RDONLY)?; + + let (_guard, dir) = data.get_file_mut(); + if dir.metadata()?.is_dir() { + return Ok(()); + } + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { libc::lseek(dir.as_raw_fd(), offset as OffT, libc::SEEK_SET) }; + if res < 0 { + return Err(io::Error::last_os_error()); + } + + let dir = unsafe { libc::fdopendir(dir.as_raw_fd()) }; + loop { + let entry_ptr = unsafe { libc::readdir(dir) }; + + if entry_ptr.is_null() { + break; + } + + let entry: libc::dirent = unsafe { ptr::read(entry_ptr) }; + + let cstr = unsafe { CStr::from_ptr(entry.d_name.as_ptr()) }; + let name_str = cstr.to_str().expect("Failed to convert CStr to str"); + let res = if name_str == "." || name_str == ".." { + Ok(1) + } else { + add_entry( + DirEntry { + ino: entry.d_ino, + offset: entry.d_seekoff, + type_: entry.d_type as u32, + name: cstr.to_bytes(), + }, + data.borrow_fd().as_raw_fd(), + ) + }; + match res { + Ok(0) => break, + Ok(_) => continue, + Err(_) => return Ok(()), + } + } + unsafe { libc::closedir(dir) }; + Ok(()) + } +} diff --git a/src/passthrough/util.rs b/src/passthrough/util.rs index 08a31031f..3f2a810c5 100644 --- a/src/passthrough/util.rs +++ b/src/passthrough/util.rs @@ -3,17 +3,23 @@ // Copyright (C) 2023 Alibaba Cloud. All rights reserved. use std::collections::{btree_map, BTreeMap}; -use std::ffi::{CStr, CString}; +use std::ffi::CStr; +#[cfg(target_os = "linux")] +use std::ffi::CString; use std::fs::File; use std::io; + +#[cfg(target_os = "linux")] use std::mem::MaybeUninit; use std::os::unix::io::{AsRawFd, FromRawFd}; use std::sync::atomic::{AtomicU64, AtomicU8, Ordering}; use std::sync::Mutex; use super::inode_store::InodeId; -use super::MAX_HOST_INO; +use super::{InoT, InodeMode, MAX_HOST_INO}; use crate::abi::fuse_abi as fuse; + +#[cfg(target_os = "linux")] use crate::api::EMPTY_CSTR; /// the 56th bit used to set the inode to 1 indicates virtual inode @@ -45,9 +51,12 @@ impl UniqueInodeGenerator { } } - pub fn get_unique_inode(&self, id: &InodeId) -> io::Result { + pub fn get_unique_inode(&self, id: &InodeId) -> io::Result { let unique_id = { + #[cfg(target_os = "linux")] let id: DevMntIDPair = DevMntIDPair(id.dev, id.mnt); + #[cfg(target_os = "macos")] + let id: DevMntIDPair = DevMntIDPair(id.dev, id.ino); let mut id_map_guard = self.dev_mntid_map.lock().unwrap(); match id_map_guard.entry(id) { btree_map::Entry::Occupied(v) => *v.get(), @@ -81,7 +90,7 @@ impl UniqueInodeGenerator { } #[cfg(test)] - fn decode_unique_inode(&self, inode: libc::ino64_t) -> io::Result { + fn decode_unique_inode(&self, inode: InoT) -> io::Result { if inode > crate::api::VFS_MAX_INO { return Err(io::Error::new( io::ErrorKind::InvalidInput, @@ -123,6 +132,7 @@ impl UniqueInodeGenerator { Ok(InodeId { ino: inode & MAX_HOST_INO, dev, + #[cfg(target_os = "linux")] mnt, }) } @@ -158,6 +168,7 @@ pub fn openat( /// Open `/proc/self/fd/{fd}` with the given flags to effectively duplicate the given `fd` with new /// flags (e.g. to turn an `O_PATH` file descriptor into one that can be used for I/O). +#[cfg(target_os = "linux")] pub fn reopen_fd_through_proc( fd: &impl AsRawFd, flags: libc::c_int, @@ -174,6 +185,7 @@ pub fn reopen_fd_through_proc( ) } +#[cfg(target_os = "linux")] pub fn stat_fd(dir: &impl AsRawFd, path: Option<&CStr>) -> io::Result { // Safe because this is a constant value and a valid C string. let pathname = @@ -198,14 +210,14 @@ pub fn stat_fd(dir: &impl AsRawFd, path: Option<&CStr>) -> io::Result bool { +pub fn is_safe_inode(mode: InodeMode) -> bool { // Only regular files and directories are considered safe to be opened from the file // server without O_PATH. matches!(mode & libc::S_IFMT, libc::S_IFREG | libc::S_IFDIR) } /// Returns true if the mode is for a directory. -pub fn is_dir(mode: u32) -> bool { +pub fn is_dir(mode: InodeMode) -> bool { (mode & libc::S_IFMT) == libc::S_IFDIR } @@ -271,6 +283,7 @@ mod tests { let inode_alt_key = InodeId { ino: 1, dev: 0, + #[cfg(target_os = "linux")] mnt: 0, }; let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap(); @@ -284,19 +297,24 @@ mod tests { let inode_alt_key = InodeId { ino: 1, dev: 0, + #[cfg(target_os = "linux")] mnt: 1, }; let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap(); // 56 bit = 0 // 55~48 bit = 0000 0010 // 47~1 bit = 1 + #[cfg(target_os = "linux")] assert_eq!(unique_inode, 0x01000000000001); + #[cfg(target_os = "macos")] + assert_eq!(unique_inode, 0x00800000000001); let expect_inode_alt_key = generator.decode_unique_inode(unique_inode).unwrap(); assert_eq!(expect_inode_alt_key, inode_alt_key); let inode_alt_key = InodeId { ino: 2, dev: 0, + #[cfg(target_os = "linux")] mnt: 1, }; let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap(); @@ -310,13 +328,17 @@ mod tests { let inode_alt_key = InodeId { ino: MAX_HOST_INO, dev: 0, + #[cfg(target_os = "linux")] mnt: 1, }; let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap(); // 56 bit = 0 // 55~48 bit = 0000 0010 // 47~1 bit = 0x7fffffffffff + #[cfg(target_os = "linux")] assert_eq!(unique_inode, 0x017fffffffffff); + #[cfg(target_os = "macos")] + assert_eq!(unique_inode, 0x01ffffffffffff); let expect_inode_alt_key = generator.decode_unique_inode(unique_inode).unwrap(); assert_eq!(expect_inode_alt_key, inode_alt_key); } @@ -326,7 +348,11 @@ mod tests { let generator = UniqueInodeGenerator::new(); let inode_alt_key = InodeId { ino: MAX_HOST_INO + 1, + #[cfg(target_os = "macos")] + dev: i32::MAX, + #[cfg(target_os = "linux")] dev: u64::MAX, + #[cfg(target_os = "linux")] mnt: u64::MAX, }; let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap(); @@ -337,40 +363,62 @@ mod tests { let inode_alt_key = InodeId { ino: MAX_HOST_INO + 2, + #[cfg(target_os = "macos")] + dev: i32::MAX, + #[cfg(target_os = "linux")] dev: u64::MAX, + #[cfg(target_os = "linux")] mnt: u64::MAX, }; let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap(); // 56 bit = 1 // 55~48 bit = 0000 0001 // 47~1 bit = 3 + #[cfg(target_os = "linux")] assert_eq!(unique_inode, 0x80800000000003); + #[cfg(target_os = "macos")] + assert_eq!(format!("0x{:x}", unique_inode), "0x81000000000003"); let inode_alt_key = InodeId { ino: MAX_HOST_INO + 3, + #[cfg(target_os = "macos")] + dev: i32::MAX, + #[cfg(target_os = "linux")] dev: u64::MAX, + #[cfg(target_os = "linux")] mnt: 0, }; let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap(); // 56 bit = 1 // 55~48 bit = 0000 0010 // 47~1 bit = 4 + #[cfg(target_os = "linux")] assert_eq!(unique_inode, 0x81000000000004); + #[cfg(target_os = "macos")] + assert_eq!(unique_inode, 0x81800000000004); let inode_alt_key = InodeId { ino: u64::MAX, + #[cfg(target_os = "macos")] + dev: i32::MAX, + #[cfg(target_os = "linux")] dev: u64::MAX, + #[cfg(target_os = "linux")] mnt: u64::MAX, }; let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap(); // 56 bit = 1 // 55~48 bit = 0000 0001 // 47~1 bit = 5 + #[cfg(target_os = "linux")] assert_eq!(unique_inode, 0x80800000000005); + #[cfg(target_os = "macos")] + assert_eq!(format!("0x{:x}", unique_inode), "0x82000000000005"); } } #[test] + #[cfg(target_os = "linux")] fn test_stat_fd() { let topdir = env!("CARGO_MANIFEST_DIR"); let dir = File::open(topdir).unwrap(); diff --git a/tests/example/mod.rs b/tests/example/mod.rs index b2ed97ba8..af7211e1c 100644 --- a/tests/example/mod.rs +++ b/tests/example/mod.rs @@ -2,7 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 -#[cfg(all(feature = "fusedev", target_os = "linux"))] +#[cfg(all(feature = "fusedev"))] pub(crate) mod passthroughfs; #[cfg(all(feature = "fusedev", target_os = "macos"))] diff --git a/tests/example/passthroughfs.rs b/tests/example/passthroughfs.rs index 856c8a261..4f086667b 100644 --- a/tests/example/passthroughfs.rs +++ b/tests/example/passthroughfs.rs @@ -27,7 +27,9 @@ impl Daemon { pub fn new(src: &str, mountpoint: &str, thread_cnt: u32) -> Result { // create vfs let vfs = Vfs::new(VfsOptions { + #[cfg(target_os = "linux")] no_open: false, + #[cfg(target_os = "linux")] no_opendir: false, ..Default::default() }); diff --git a/tests/smoke.rs b/tests/smoke.rs index c3261f692..d0220c43f 100644 --- a/tests/smoke.rs +++ b/tests/smoke.rs @@ -3,20 +3,24 @@ // SPDX-License-Identifier: Apache-2.0 // -#[cfg(all(feature = "fusedev", target_os = "linux"))] +#[cfg(all(feature = "fusedev"))] #[macro_use] extern crate log; mod example; -#[cfg(all(feature = "fusedev", target_os = "linux"))] +#[cfg(feature = "fusedev")] mod fusedev_tests { use std::io::Result; use std::path::Path; use std::process::Command; + #[cfg(target_os = "linux")] use vmm_sys_util::tempdir::TempDir; + #[cfg(target_os = "macos")] + use tempfile::tempdir; + use crate::example::passthroughfs; fn validate_two_git_directory(src: &str, dest: &str) -> bool { @@ -37,7 +41,6 @@ mod fusedev_tests { ); return false; } - let src_md5 = exec( format!( "cd {}; git ls-files --recurse-submodules | grep --invert-match rust-vmm-ci | xargs md5sum; cd - > /dev/null", @@ -85,7 +88,16 @@ mod fusedev_tests { // test the fuse-rs repository let src = Path::new(".").canonicalize().unwrap(); let src_dir = src.to_str().unwrap(); + + #[cfg(target_os = "linux")] let tmp_dir = TempDir::new().unwrap(); + + #[cfg(target_os = "macos")] + let tmp_dir = { + let source = tempdir().expect("Cannot create temporary directory."); + source.into_path() + }; + let mnt_dir = tmp_dir.as_path().to_str().unwrap(); info!( "test passthroughfs src {:?} mountpoint {}",