From 266b3bd3f26d30f7be56b7ec9d31f3db2285b4ce Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 30 Oct 2023 19:56:04 -0400 Subject: [PATCH] Unify arc_prune_async() code, fix excessive ARC pruning There is no sense to have separate implementations for FreeBSD and Linux. Make Linux code shared as more functional and just register FreeBSD-specific prune callback with arc_add_prune_callback() API. Aside of code cleanup this fixes excessive pruning on FreeBSD. [olce: This code comes from the OpenZFS pull request: https://github.com/openzfs/zfs/pull/16083, vendor-merged into our tree. Its commit message has been slightly adapted to the present context. The upstream pull request has been reviewed and merged into 'zfs-2.1.16-staging' as 5b81b1bf5e6d6aeb8a87175dcb12b529185cac2f, which should come into our tree at the next vendor import. This is the same code that was merged into stable/14 and main as part of vendor merges, and released as an EN (FreeBSD-EN-23:18.openzfs) over releng/14.0 by markj@.] PR: 275594, 274698 Reported by: Seigo Tanimura , markj, and others Tested by: olce Approved by: emaste (mentor) Approved by: so Obtained from: OpenZFS Sponsored by: iXsystems, Inc. Sponsored by: The FreeBSD Foundation Signed-off-by: Alexander Motin (cherry picked from commit 330954bdb822af6bc07d487b1ecd7f8fda9c4def) --- .../openzfs/include/os/linux/zfs/sys/zpl.h | 2 +- sys/contrib/openzfs/include/sys/arc.h | 2 +- sys/contrib/openzfs/include/sys/arc_impl.h | 1 - .../openzfs/module/os/freebsd/zfs/arc_os.c | 62 ------------------- .../module/os/freebsd/zfs/zfs_vfsops.c | 32 ++++++++++ .../openzfs/module/os/linux/zfs/arc_os.c | 51 --------------- .../openzfs/module/os/linux/zfs/zpl_super.c | 2 +- sys/contrib/openzfs/module/zfs/arc.c | 52 ++++++++++++++++ 8 files changed, 87 insertions(+), 117 deletions(-) diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h index 4e08470e794faa..9d048d19aece37 100644 --- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h +++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h @@ -52,7 +52,7 @@ extern const struct file_operations zpl_file_operations; extern const struct file_operations zpl_dir_file_operations; /* zpl_super.c */ -extern void zpl_prune_sb(int64_t nr_to_scan, void *arg); +extern void zpl_prune_sb(uint64_t nr_to_scan, void *arg); extern const struct super_operations zpl_super_operations; extern const struct export_operations zpl_export_operations; diff --git a/sys/contrib/openzfs/include/sys/arc.h b/sys/contrib/openzfs/include/sys/arc.h index 5d8176894e6046..9b762c01c159b9 100644 --- a/sys/contrib/openzfs/include/sys/arc.h +++ b/sys/contrib/openzfs/include/sys/arc.h @@ -81,7 +81,7 @@ typedef struct arc_prune arc_prune_t; typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp, arc_buf_t *buf, void *priv); typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv); -typedef void arc_prune_func_t(int64_t bytes, void *priv); +typedef void arc_prune_func_t(uint64_t bytes, void *priv); /* Shared module parameters */ extern int zfs_arc_average_blocksize; diff --git a/sys/contrib/openzfs/include/sys/arc_impl.h b/sys/contrib/openzfs/include/sys/arc_impl.h index db6238fda61e8b..118bf33632fb51 100644 --- a/sys/contrib/openzfs/include/sys/arc_impl.h +++ b/sys/contrib/openzfs/include/sys/arc_impl.h @@ -994,7 +994,6 @@ extern void arc_wait_for_eviction(uint64_t, boolean_t); extern void arc_lowmem_init(void); extern void arc_lowmem_fini(void); -extern void arc_prune_async(int64_t); extern int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg); extern uint64_t arc_free_memory(void); extern int64_t arc_available_memory(void); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c index 3dd49f05521bf9..9641bf8bd5910b 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c @@ -51,11 +51,6 @@ #include #include -#if __FreeBSD_version >= 1300139 -static struct sx arc_vnlru_lock; -static struct vnode *arc_vnlru_marker; -#endif - extern struct vfsops zfs_vfsops; uint_t zfs_arc_free_target = 0; @@ -151,53 +146,6 @@ arc_default_max(uint64_t min, uint64_t allmem) return (MAX(allmem * 5 / 8, size)); } -/* - * Helper function for arc_prune_async() it is responsible for safely - * handling the execution of a registered arc_prune_func_t. - */ -static void -arc_prune_task(void *arg) -{ - int64_t nr_scan = (intptr_t)arg; - -#ifndef __ILP32__ - if (nr_scan > INT_MAX) - nr_scan = INT_MAX; -#endif - -#if __FreeBSD_version >= 1300139 - sx_xlock(&arc_vnlru_lock); - vnlru_free_vfsops(nr_scan, &zfs_vfsops, arc_vnlru_marker); - sx_xunlock(&arc_vnlru_lock); -#else - vnlru_free(nr_scan, &zfs_vfsops); -#endif -} - -/* - * Notify registered consumers they must drop holds on a portion of the ARC - * buffered they reference. This provides a mechanism to ensure the ARC can - * honor the arc_meta_limit and reclaim otherwise pinned ARC buffers. This - * is analogous to dnlc_reduce_cache() but more generic. - * - * This operation is performed asynchronously so it may be safely called - * in the context of the arc_reclaim_thread(). A reference is taken here - * for each registered arc_prune_t and the arc_prune_task() is responsible - * for releasing it once the registered arc_prune_func_t has completed. - */ -void -arc_prune_async(int64_t adjust) -{ - -#ifndef __LP64__ - if (adjust > INTPTR_MAX) - adjust = INTPTR_MAX; -#endif - taskq_dispatch(arc_prune_taskq, arc_prune_task, - (void *)(intptr_t)adjust, TQ_SLEEP); - ARCSTAT_BUMP(arcstat_prune); -} - uint64_t arc_all_memory(void) { @@ -248,10 +196,6 @@ arc_lowmem_init(void) { arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL, EVENTHANDLER_PRI_FIRST); -#if __FreeBSD_version >= 1300139 - arc_vnlru_marker = vnlru_alloc_marker(); - sx_init(&arc_vnlru_lock, "arc vnlru lock"); -#endif } void @@ -259,12 +203,6 @@ arc_lowmem_fini(void) { if (arc_event_lowmem != NULL) EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem); -#if __FreeBSD_version >= 1300139 - if (arc_vnlru_marker != NULL) { - vnlru_free_marker(arc_vnlru_marker); - sx_destroy(&arc_vnlru_lock); - } -#endif } void diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c index 6ffd3688565564..33581d018256d2 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c @@ -2097,6 +2097,26 @@ zfs_vnodes_adjust_back(void) #endif } +#if __FreeBSD_version >= 1300139 +static struct sx zfs_vnlru_lock; +static struct vnode *zfs_vnlru_marker; +#endif +static arc_prune_t *zfs_prune; + +static void +zfs_prune_task(uint64_t nr_to_scan, void *arg __unused) +{ + if (nr_to_scan > INT_MAX) + nr_to_scan = INT_MAX; +#if __FreeBSD_version >= 1300139 + sx_xlock(&zfs_vnlru_lock); + vnlru_free_vfsops(nr_to_scan, &zfs_vfsops, zfs_vnlru_marker); + sx_xunlock(&zfs_vnlru_lock); +#else + vnlru_free(nr_to_scan, &zfs_vfsops); +#endif +} + void zfs_init(void) { @@ -2123,11 +2143,23 @@ zfs_init(void) dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info); zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0); + +#if __FreeBSD_version >= 1300139 + zfs_vnlru_marker = vnlru_alloc_marker(); + sx_init(&zfs_vnlru_lock, "zfs vnlru lock"); +#endif + zfs_prune = arc_add_prune_callback(zfs_prune_task, NULL); } void zfs_fini(void) { + arc_remove_prune_callback(zfs_prune); +#if __FreeBSD_version >= 1300139 + vnlru_free_marker(zfs_vnlru_marker); + sx_destroy(&zfs_vnlru_lock); +#endif + taskq_destroy(zfsvfs_taskq); zfsctl_fini(); zfs_znode_fini(); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c index fc76fe0e0b5ca2..496dce5abe61e8 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c @@ -488,57 +488,6 @@ arc_unregister_hotplug(void) } #endif /* _KERNEL */ -/* - * Helper function for arc_prune_async() it is responsible for safely - * handling the execution of a registered arc_prune_func_t. - */ -static void -arc_prune_task(void *ptr) -{ - arc_prune_t *ap = (arc_prune_t *)ptr; - arc_prune_func_t *func = ap->p_pfunc; - - if (func != NULL) - func(ap->p_adjust, ap->p_private); - - zfs_refcount_remove(&ap->p_refcnt, func); -} - -/* - * Notify registered consumers they must drop holds on a portion of the ARC - * buffered they reference. This provides a mechanism to ensure the ARC can - * honor the arc_meta_limit and reclaim otherwise pinned ARC buffers. This - * is analogous to dnlc_reduce_cache() but more generic. - * - * This operation is performed asynchronously so it may be safely called - * in the context of the arc_reclaim_thread(). A reference is taken here - * for each registered arc_prune_t and the arc_prune_task() is responsible - * for releasing it once the registered arc_prune_func_t has completed. - */ -void -arc_prune_async(int64_t adjust) -{ - arc_prune_t *ap; - - mutex_enter(&arc_prune_mtx); - for (ap = list_head(&arc_prune_list); ap != NULL; - ap = list_next(&arc_prune_list, ap)) { - - if (zfs_refcount_count(&ap->p_refcnt) >= 2) - continue; - - zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc); - ap->p_adjust = adjust; - if (taskq_dispatch(arc_prune_taskq, arc_prune_task, - ap, TQ_SLEEP) == TASKQID_INVALID) { - zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc); - continue; - } - ARCSTAT_BUMP(arcstat_prune); - } - mutex_exit(&arc_prune_mtx); -} - /* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW, "Limit on number of pages that ARC shrinker can reclaim at once"); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c index c2fd3fee140184..9300a71992967e 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c @@ -334,7 +334,7 @@ zpl_kill_sb(struct super_block *sb) } void -zpl_prune_sb(int64_t nr_to_scan, void *arg) +zpl_prune_sb(uint64_t nr_to_scan, void *arg) { struct super_block *sb = (struct super_block *)arg; int objects = 0; diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index 1180853da03821..eacc2104a6a642 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -868,6 +868,8 @@ static void l2arc_do_free_on_write(void); static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr, boolean_t state_only); +static void arc_prune_async(uint64_t adjust); + #define l2arc_hdr_arcstats_increment(hdr) \ l2arc_hdr_arcstats_update((hdr), B_TRUE, B_FALSE) #define l2arc_hdr_arcstats_decrement(hdr) \ @@ -6521,6 +6523,56 @@ arc_remove_prune_callback(arc_prune_t *p) kmem_free(p, sizeof (*p)); } +/* + * Helper function for arc_prune_async() it is responsible for safely + * handling the execution of a registered arc_prune_func_t. + */ +static void +arc_prune_task(void *ptr) +{ + arc_prune_t *ap = (arc_prune_t *)ptr; + arc_prune_func_t *func = ap->p_pfunc; + + if (func != NULL) + func(ap->p_adjust, ap->p_private); + + zfs_refcount_remove(&ap->p_refcnt, func); +} + +/* + * Notify registered consumers they must drop holds on a portion of the ARC + * buffers they reference. This provides a mechanism to ensure the ARC can + * honor the metadata limit and reclaim otherwise pinned ARC buffers. + * + * This operation is performed asynchronously so it may be safely called + * in the context of the arc_reclaim_thread(). A reference is taken here + * for each registered arc_prune_t and the arc_prune_task() is responsible + * for releasing it once the registered arc_prune_func_t has completed. + */ +static void +arc_prune_async(uint64_t adjust) +{ + arc_prune_t *ap; + + mutex_enter(&arc_prune_mtx); + for (ap = list_head(&arc_prune_list); ap != NULL; + ap = list_next(&arc_prune_list, ap)) { + + if (zfs_refcount_count(&ap->p_refcnt) >= 2) + continue; + + zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc); + ap->p_adjust = adjust; + if (taskq_dispatch(arc_prune_taskq, arc_prune_task, + ap, TQ_SLEEP) == TASKQID_INVALID) { + zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc); + continue; + } + ARCSTAT_BUMP(arcstat_prune); + } + mutex_exit(&arc_prune_mtx); +} + /* * Notify the arc that a block was freed, and thus will never be used again. */