From 7f016bcf76d6879cbc8a1efbf5f4d55c581a4906 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Thu, 20 Apr 2023 19:46:00 +0200 Subject: [PATCH] 6.2: Update bore Scheduler, update for 6.2.12 Signed-off-by: Peter Jung --- 6.2/0001-bbr2.patch | 2 +- 6.2/0002-bfq.patch | 2 +- 6.2/0003-bitmap.patch | 2 +- 6.2/0004-cachy.patch | 12 +- 6.2/0005-clr.patch | 4 +- 6.2/0006-fixes.patch | 281 +++- 6.2/0007-fs-patches.patch | 23 +- ...tate-epp-and-amd-pstate-guided-drive.patch | 2 +- 6.2/0009-ksm.patch | 2 +- 6.2/0010-maple-lru.patch | 786 ++++++++++-- 6.2/0011-objtool.patch | 2 +- 6.2/0012-sched.patch | 16 +- 6.2/0013-zram.patch | 2 +- 6.2/0014-zstd-import-1.5.5.patch | 2 +- 6.2/0015-v4l2-core-add-v4l2loopback.patch | 2 +- 6.2/all/0001-cachyos-base-all.patch | 1140 ++++++++++++++--- ...1-Add-latency-priority-for-CFS-class.patch | 16 +- 6.2/sched/0001-bore-cachy.patch | 104 +- 6.2/sched/0001-bore.patch | 104 +- 19 files changed, 2056 insertions(+), 448 deletions(-) diff --git a/6.2/0001-bbr2.patch b/6.2/0001-bbr2.patch index 37765ee6..6ec24f50 100644 --- a/6.2/0001-bbr2.patch +++ b/6.2/0001-bbr2.patch @@ -1,4 +1,4 @@ -From 89f3a4d45dc91c408f5c02b09982c7262b55c48d Mon Sep 17 00:00:00 2001 +From 9ee19b282653761511acbde09e77416a96f55a5b Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Fri, 10 Mar 2023 17:59:47 +0100 Subject: [PATCH 01/15] bbr2 diff --git a/6.2/0002-bfq.patch b/6.2/0002-bfq.patch index 3d624437..1d1bf61c 100644 --- a/6.2/0002-bfq.patch +++ b/6.2/0002-bfq.patch @@ -1,4 +1,4 @@ -From 0a4585a3f29f2acb9bf8e27f2fe61172243c3163 Mon Sep 17 00:00:00 2001 +From 5610d33c45f6785da3a9b856d1fee11fb06c78e8 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Thu, 13 Apr 2023 18:13:28 +0200 Subject: [PATCH 02/15] bfq diff --git a/6.2/0003-bitmap.patch b/6.2/0003-bitmap.patch index d0df1f62..3ad63bda 100644 --- a/6.2/0003-bitmap.patch +++ b/6.2/0003-bitmap.patch @@ -1,4 +1,4 @@ -From 7b09e20a0ef54c18d97ddfa479d39d1daab8be85 Mon Sep 17 00:00:00 2001 +From f3810e5d25c92b9a4a0da90dc5c39fe1421ee533 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Mon, 13 Feb 2023 11:26:20 +0100 Subject: [PATCH 03/15] bitmap diff --git a/6.2/0004-cachy.patch b/6.2/0004-cachy.patch index 573f6c7d..af2ab41b 100644 --- a/6.2/0004-cachy.patch +++ b/6.2/0004-cachy.patch @@ -1,4 +1,4 @@ -From 538ff5a6a2aa503cf813248d2a9deb19842b2d93 Mon Sep 17 00:00:00 2001 +From 955ced3981bf578dc6cc299ed5f620f68ae29c39 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Thu, 13 Apr 2023 18:13:45 +0200 Subject: [PATCH 04/15] cachy @@ -124,7 +124,7 @@ index 352ff53a2306..7c210744d84c 100644 vmlinuz voffset.h diff --git a/Makefile b/Makefile -index 416490daa76a..e2a454ac73d2 100644 +index 068374cc2601..f7890837c555 100644 --- a/Makefile +++ b/Makefile @@ -834,6 +834,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) @@ -2852,7 +2852,7 @@ index 7b0fe741a088..77ad9e033358 100644 out: diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index e046a2bff207..1ea94874b0ce 100644 +index 661226e38835..735994022fe0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -69,9 +69,13 @@ @@ -3082,10 +3082,10 @@ index 5b7b8d4f5297..160acbbdf111 100644 static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) { diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c -index 0d0cc4ef2b85..544104f9f4b3 100644 +index 40fe70fc2015..3028e27897d9 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c -@@ -1467,6 +1467,13 @@ static struct ctl_table ipv4_net_table[] = { +@@ -1470,6 +1470,13 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = &tcp_plb_max_cong_thresh, }, @@ -3161,7 +3161,7 @@ index 754e0212c951..b6d7faeb737a 100644 * drop receive data on the floor. It will get retransmitted * and hopefully then we'll have sufficient space. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c -index 8320d0ecb13a..37a09cd767a1 100644 +index 339a9cea9047..34bd711a1e7c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3274,6 +3274,8 @@ static int __net_init tcp_sk_init(struct net *net) diff --git a/6.2/0005-clr.patch b/6.2/0005-clr.patch index b8bcbbf5..68d17653 100644 --- a/6.2/0005-clr.patch +++ b/6.2/0005-clr.patch @@ -1,4 +1,4 @@ -From 3ecdb9dc32a4858395c48328399eee7833664748 Mon Sep 17 00:00:00 2001 +From 5e20c6e278ccac0b0774a4644c2aacdd924d3584 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Fri, 10 Mar 2023 18:00:48 +0100 Subject: [PATCH 05/15] clr @@ -1240,7 +1240,7 @@ index 6d2dd03dafa8..750440803883 100644 EXPORT_SYMBOL(dst_release_immediate); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c -index 64289bc98887..228c54bbdecc 100644 +index f5114b2395ae..6a7be9d3dd10 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -840,7 +840,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, diff --git a/6.2/0006-fixes.patch b/6.2/0006-fixes.patch index 65455af3..621aa7e0 100644 --- a/6.2/0006-fixes.patch +++ b/6.2/0006-fixes.patch @@ -1,4 +1,4 @@ -From 4a31e787aefebf5586016b69ae7d54abc6428d22 Mon Sep 17 00:00:00 2001 +From c0762e6bc0c17ce7f3d4c6d75626d574266d4f02 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Thu, 13 Apr 2023 18:16:34 +0200 Subject: [PATCH 06/15] fixes @@ -10,14 +10,26 @@ Signed-off-by: Peter Jung Documentation/admin-guide/mm/ksm.rst | 7 + Documentation/leds/index.rst | 1 + Documentation/leds/ledtrig-blkdev.rst | 158 +++ + Documentation/x86/topology.rst | 26 + arch/x86/boot/compressed/Makefile | 2 +- arch/x86/events/rapl.c | 20 +- + arch/x86/include/asm/cacheinfo.h | 1 + + arch/x86/kernel/cpu/amd.c | 1 + + arch/x86/kernel/cpu/cacheinfo.c | 36 + + arch/x86/kernel/cpu/hygon.c | 1 + arch/x86/mm/tlb.c | 2 +- arch/x86/net/bpf_jit_comp.c | 5 +- drivers/bluetooth/btusb.c | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 3 +- + .../drm/amd/display/dc/bios/bios_parser2.c | 7 +- + .../drm/amd/display/dc/dcn20/dcn20_resource.c | 2 +- + .../drm/amd/display/dc/dcn21/dcn21_resource.c | 2 +- + .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 3 +- + drivers/gpu/drm/scheduler/sched_main.c | 3 +- drivers/leds/trigger/Kconfig | 9 + drivers/leds/trigger/Makefile | 1 + drivers/leds/trigger/ledtrig-blkdev.c | 1221 +++++++++++++++++ + .../net/wireless/mediatek/mt76/mt7921/init.c | 7 +- fs/eventpoll.c | 188 ++- fs/proc/base.c | 1 + include/linux/mm_types.h | 7 +- @@ -35,7 +47,7 @@ Signed-off-by: Peter Jung scripts/Makefile.vmlinux_o | 2 +- sound/pci/hda/cs35l41_hda.c | 2 +- .../selftests/vm/ksm_functional_tests.c | 96 +- - 30 files changed, 1961 insertions(+), 175 deletions(-) + 42 files changed, 2041 insertions(+), 187 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-class-led-trigger-blkdev create mode 100644 Documentation/leds/ledtrig-blkdev.rst create mode 100644 drivers/leds/trigger/ledtrig-blkdev.c @@ -339,6 +351,64 @@ index 000000000000..9ff5b99de451 +* The ``blkdev`` LED trigger supports many-to-many device/LED associations. + A device can be associated with multiple LEDs, and an LED can be associated + with multiple devices. +diff --git a/Documentation/x86/topology.rst b/Documentation/x86/topology.rst +index 7f58010ea86a..9de14f3f7783 100644 +--- a/Documentation/x86/topology.rst ++++ b/Documentation/x86/topology.rst +@@ -33,6 +33,7 @@ historical nature and should be cleaned up. + The topology of a system is described in the units of: + + - packages ++ - cluster + - cores + - threads + +@@ -90,6 +91,22 @@ Package-related topology information in the kernel: + Cache. In general, it is a number identifying an LLC uniquely on the + system. + ++Clusters ++======== ++A cluster consists of threads of one or more cores sharing the same L2 cache. ++ ++Cluster-related topology information in the kernel: ++ ++ - cluster_id: ++ ++ A per-CPU variable containing: ++ ++ - Upper bits extracted from the APIC ID. CPUs which have the same value ++ in these bits share an L2 and have the same cluster_id. ++ ++ CPUs for which cluster information is unavailable will show 65535 ++ (BAD_APICID) as the cluster_id. ++ + Cores + ===== + A core consists of 1 or more threads. It does not matter whether the threads +@@ -125,6 +142,11 @@ Thread-related topology information in the kernel: + + The number of online threads is also printed in /proc/cpuinfo "siblings." + ++ - topology_cluster_cpumask(): ++ ++ The cpumask contains all online threads in the cluster to which a thread ++ belongs. ++ + - topology_sibling_cpumask(): + + The cpumask contains all online threads in the core to which a thread +@@ -138,6 +160,10 @@ Thread-related topology information in the kernel: + + The physical package ID to which a thread belongs. + ++ - topology_cluster_id(); ++ ++ The ID of the cluster to which a thread belongs. ++ + - topology_core_id(); + + The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index d995595394bb..19d1fb601796 100644 --- a/arch/x86/boot/compressed/Makefile @@ -410,6 +480,89 @@ index 52e6e7ed4f78..f000cc16d128 100644 .msr_power_unit = MSR_AMD_RAPL_POWER_UNIT, .rapl_msrs = amd_rapl_msrs, }; +diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h +index ce9685fc78d8..2034cd556c07 100644 +--- a/arch/x86/include/asm/cacheinfo.h ++++ b/arch/x86/include/asm/cacheinfo.h +@@ -7,6 +7,7 @@ extern unsigned int memory_caching_control; + #define CACHE_MTRR 0x01 + #define CACHE_PAT 0x02 + ++void cacheinfo_topoext_init_l2c_id(struct cpuinfo_x86 *c, int cpu); + void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu); + void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu); + +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index 06f2ede1544f..84c250027a50 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -358,6 +358,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) + if (!err) + c->x86_coreid_bits = get_count_order(c->x86_max_cores); + ++ cacheinfo_topoext_init_l2c_id(c, cpu); + cacheinfo_amd_init_llc_id(c, cpu); + + } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { +diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c +index f4e5aa27eec6..bed7b9633451 100644 +--- a/arch/x86/kernel/cpu/cacheinfo.c ++++ b/arch/x86/kernel/cpu/cacheinfo.c +@@ -659,6 +659,42 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c) + return i; + } + ++void cacheinfo_topoext_init_l2c_id(struct cpuinfo_x86 *c, int cpu) ++{ ++ u32 eax, ebx, ecx, edx, num_sharing_cache; ++ int i = 0, bits; ++ ++ /* Check if L2 cache identifiers exists. */ ++ if (!cpuid_ecx(0x80000006)) ++ return; ++ ++ while (true) { ++ u32 level; ++ ++ cpuid_count(0x8000001d, i, &eax, &ebx, &ecx, &edx); ++ if (!eax) ++ return; ++ ++ /* ++ * Check if the current leaf is for L2 cache using ++ * eax[7:5] used to describe the cache level. ++ */ ++ level = (eax >> 5) & 0x7; ++ if (level == 2) ++ break; ++ ++ ++i; ++ } ++ ++ /* ++ * L2 ID is calculated from the number of threads ++ * sharing the L2 cache. ++ */ ++ num_sharing_cache = ((eax >> 14) & 0xfff) + 1; ++ bits = get_count_order(num_sharing_cache); ++ per_cpu(cpu_l2c_id, cpu) = c->apicid >> bits; ++} ++ + void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu) + { + /* +diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c +index 5a2962c492d3..cb0025b4a2fd 100644 +--- a/arch/x86/kernel/cpu/hygon.c ++++ b/arch/x86/kernel/cpu/hygon.c +@@ -89,6 +89,7 @@ static void hygon_get_topology(struct cpuinfo_x86 *c) + /* Socket ID is ApicId[6] for these processors. */ + c->phys_proc_id = c->apicid >> APICID_SOCKET_ID_BIT; + ++ cacheinfo_topoext_init_l2c_id(c, cpu); + cacheinfo_hygon_init_llc_id(c, cpu); + } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { + u64 value; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index c1e31e9a85d7..92d73ccede70 100644 --- a/arch/x86/mm/tlb.c @@ -453,6 +606,92 @@ index 5c536151ef83..5a80379253a7 100644 gpiod_set_value_cansleep(reset_gpio, 1); return; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +index 9fa1d814508a..43d6a9d6a538 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +@@ -453,7 +453,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, + /* Limit maximum size to 2GiB due to SG table limitations */ + size = min(remaining_size, 2ULL << 30); + +- if (size >= (u64)pages_per_block << PAGE_SHIFT) ++ if ((size >= (u64)pages_per_block << PAGE_SHIFT) && ++ !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) + min_block_size = (u64)pages_per_block << PAGE_SHIFT; + + cur_size = size; +diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +index 074e70a5c458..e507d2e1410b 100644 +--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c ++++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +@@ -516,11 +516,8 @@ static enum bp_result get_gpio_i2c_info( + info->i2c_slave_address = record->i2c_slave_addr; + + /* TODO: check how to get register offset for en, Y, etc. */ +- info->gpio_info.clk_a_register_index = +- le16_to_cpu( +- header->gpio_pin[table_index].data_a_reg_index); +- info->gpio_info.clk_a_shift = +- header->gpio_pin[table_index].gpio_bitshift; ++ info->gpio_info.clk_a_register_index = le16_to_cpu(pin->data_a_reg_index); ++ info->gpio_info.clk_a_shift = pin->gpio_bitshift; + + return BP_RESULT_OK; + } +diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +index 8a0dd0d7134b..481a15b02126 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +@@ -714,7 +714,7 @@ static const struct dc_debug_options debug_defaults_drv = { + .timing_trace = false, + .clock_trace = true, + .disable_pplib_clock_request = true, +- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, ++ .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .vsr_support = true, +diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +index fbcf0afeae0d..ec30d171e7de 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +@@ -642,7 +642,7 @@ static const struct dc_debug_options debug_defaults_drv = { + .clock_trace = true, + .disable_pplib_clock_request = true, + .min_disp_clk_khz = 100000, +- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, ++ .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .vsr_support = true, +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +index 0bcd4fe0ef17..5b7a780cbd54 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +@@ -304,7 +304,8 @@ navi10_get_allowed_feature_mask(struct smu_context *smu, + | FEATURE_MASK(FEATURE_GFX_SS_BIT) + | FEATURE_MASK(FEATURE_APCC_DFLL_BIT) + | FEATURE_MASK(FEATURE_FW_CTF_BIT) +- | FEATURE_MASK(FEATURE_OUT_OF_BAND_MONITOR_BIT); ++ | FEATURE_MASK(FEATURE_OUT_OF_BAND_MONITOR_BIT) ++ | FEATURE_MASK(FEATURE_TEMP_DEPENDENT_VMIN_BIT); + + if (adev->pm.pp_feature & PP_SCLK_DPM_MASK) + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_GFXCLK_BIT); +diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c +index fd22d753b4ed..fcd4bfef7415 100644 +--- a/drivers/gpu/drm/scheduler/sched_main.c ++++ b/drivers/gpu/drm/scheduler/sched_main.c +@@ -308,7 +308,8 @@ static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched) + */ + void drm_sched_fault(struct drm_gpu_scheduler *sched) + { +- mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0); ++ if (sched->ready) ++ mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0); + } + EXPORT_SYMBOL(drm_sched_fault); + diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig index dc6816d36d06..bda249068182 100644 --- a/drivers/leds/trigger/Kconfig @@ -1707,6 +1946,44 @@ index 000000000000..067eedb003b5 +MODULE_DESCRIPTION("Block device LED trigger"); +MODULE_AUTHOR("Ian Pilcher "); +MODULE_LICENSE("GPL v2"); +diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c +index d4b681d7e1d2..f2c6ec4d8e2e 100644 +--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c ++++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c +@@ -162,12 +162,12 @@ mt7921_mac_init_band(struct mt7921_dev *dev, u8 band) + + u8 mt7921_check_offload_capability(struct device *dev, const char *fw_wm) + { +- struct mt7921_fw_features *features = NULL; + const struct mt76_connac2_fw_trailer *hdr; + struct mt7921_realease_info *rel_info; + const struct firmware *fw; + int ret, i, offset = 0; + const u8 *data, *end; ++ u8 offload_caps = 0; + + ret = request_firmware(&fw, fw_wm, dev); + if (ret) +@@ -199,7 +199,10 @@ u8 mt7921_check_offload_capability(struct device *dev, const char *fw_wm) + data += sizeof(*rel_info); + + if (rel_info->tag == MT7921_FW_TAG_FEATURE) { ++ struct mt7921_fw_features *features; ++ + features = (struct mt7921_fw_features *)data; ++ offload_caps = features->data; + break; + } + +@@ -209,7 +212,7 @@ u8 mt7921_check_offload_capability(struct device *dev, const char *fw_wm) + out: + release_firmware(fw); + +- return features ? features->data : 0; ++ return offload_caps; + } + EXPORT_SYMBOL_GPL(mt7921_check_offload_capability); + diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 64659b110973..4cad490028ab 100644 --- a/fs/eventpoll.c diff --git a/6.2/0007-fs-patches.patch b/6.2/0007-fs-patches.patch index c13efb4d..4e0c9fab 100644 --- a/6.2/0007-fs-patches.patch +++ b/6.2/0007-fs-patches.patch @@ -1,4 +1,4 @@ -From 3e1082da2acc077e4c66bf84f225770589ffef86 Mon Sep 17 00:00:00 2001 +From dc7a1604c3a8761a1f980e5dd5f1b2901b775a84 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Thu, 6 Apr 2023 17:34:29 +0200 Subject: [PATCH 07/15] fs-patches @@ -52,7 +52,7 @@ Signed-off-by: Peter Jung fs/btrfs/relocation.c | 2 +- fs/btrfs/scrub.c | 2 +- fs/btrfs/send.c | 684 +++++++++-------- - fs/btrfs/super.c | 5 +- + fs/btrfs/super.c | 3 +- fs/btrfs/sysfs.c | 54 +- fs/btrfs/tests/extent-map-tests.c | 2 +- fs/btrfs/transaction.c | 29 + @@ -105,7 +105,7 @@ Signed-off-by: Peter Jung include/trace/events/btrfs.h | 127 +++- include/trace/events/ext4.h | 7 - include/uapi/linux/btrfs.h | 12 +- - 100 files changed, 3340 insertions(+), 3789 deletions(-) + 100 files changed, 3338 insertions(+), 3789 deletions(-) create mode 100644 fs/btrfs/lru_cache.c create mode 100644 fs/btrfs/lru_cache.h @@ -2362,7 +2362,7 @@ index 317aeff6c1da..c48abc817ed2 100644 spin_unlock(&discard_ctl->lock); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c -index fde40112a259..b53f0e30ce2b 100644 +index 174d196d6960..9e1596bb208d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -78,23 +78,6 @@ static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info) @@ -2608,7 +2608,7 @@ index fde40112a259..b53f0e30ce2b 100644 static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { -@@ -5162,11 +4979,12 @@ static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info, +@@ -5176,11 +4993,12 @@ static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info, start += fs_info->nodesize; if (!eb) continue; @@ -7788,7 +7788,7 @@ index d50182b6deec..e5c963bb873d 100644 kfree(sctx); } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c -index 433ce221dc5c..dd6d5b6844f1 100644 +index 3f3c8f9186f9..366fb4cde145 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -58,6 +58,7 @@ @@ -7799,16 +7799,7 @@ index 433ce221dc5c..dd6d5b6844f1 100644 #define CREATE_TRACE_POINTS #include -@@ -1630,6 +1631,8 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, - btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size); - btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size); - btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size); -+ workqueue_set_max_active(fs_info->endio_workers, new_pool_size); -+ workqueue_set_max_active(fs_info->endio_meta_workers, new_pool_size); - btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size); - btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size); - btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size); -@@ -2049,7 +2052,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) +@@ -2049,7 +2050,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) } /* diff --git a/6.2/0008-Implement-amd-pstate-epp-and-amd-pstate-guided-drive.patch b/6.2/0008-Implement-amd-pstate-epp-and-amd-pstate-guided-drive.patch index 431c41a2..2c6c500f 100644 --- a/6.2/0008-Implement-amd-pstate-epp-and-amd-pstate-guided-drive.patch +++ b/6.2/0008-Implement-amd-pstate-epp-and-amd-pstate-guided-drive.patch @@ -1,4 +1,4 @@ -From 0ae7f3e01c6d3f6e596eb17315ae6ee6c6e30538 Mon Sep 17 00:00:00 2001 +From 498e79383c15c846059b24391fce696d83ee9f83 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Fri, 10 Mar 2023 18:05:48 +0100 Subject: [PATCH 08/15] Implement amd-pstate-epp and amd-pstate-guided driver diff --git a/6.2/0009-ksm.patch b/6.2/0009-ksm.patch index 5a852e45..adb62b28 100644 --- a/6.2/0009-ksm.patch +++ b/6.2/0009-ksm.patch @@ -1,4 +1,4 @@ -From a7b105f93faf08a10fd8461d0eb11f82943c7fe3 Mon Sep 17 00:00:00 2001 +From 0cc7a04fbadb3ea4948f19e025e158f486f84663 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Fri, 17 Mar 2023 17:40:46 +0100 Subject: [PATCH 09/15] ksm diff --git a/6.2/0010-maple-lru.patch b/6.2/0010-maple-lru.patch index 9f17e7c8..86b191d2 100644 --- a/6.2/0010-maple-lru.patch +++ b/6.2/0010-maple-lru.patch @@ -1,6 +1,6 @@ -From 5e1a0be395f154252ed7d0c8212923a7ac0381c4 Mon Sep 17 00:00:00 2001 +From 330db0b8fd28b6036a1dc14bfbad31f64999d8b5 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 13 Apr 2023 18:17:33 +0200 +Date: Thu, 20 Apr 2023 19:38:17 +0200 Subject: [PATCH 10/15] maple-lru Signed-off-by: Peter Jung @@ -18,30 +18,33 @@ Signed-off-by: Peter Jung arch/powerpc/kvm/book3s.h | 2 + arch/powerpc/kvm/book3s_64_mmu_radix.c | 78 +- arch/powerpc/kvm/book3s_hv.c | 10 +- + arch/s390/mm/hugetlbpage.c | 2 +- + arch/s390/mm/mmap.c | 2 +- arch/x86/include/asm/kvm_host.h | 27 + arch/x86/kvm/mmu/spte.h | 12 - arch/x86/kvm/mmu/tdp_mmu.c | 41 + + fs/hugetlbfs/inode.c | 2 +- include/linux/fs.h | 2 + include/linux/kvm_host.h | 29 + include/linux/maple_tree.h | 8 +- include/linux/memcontrol.h | 10 + include/linux/mm_inline.h | 19 +- include/linux/mmu_notifier.h | 40 + - include/linux/mmzone.h | 130 ++- - lib/maple_tree.c | 103 +- + include/linux/mmzone.h | 138 ++- + lib/maple_tree.c | 191 ++-- mm/fadvise.c | 5 +- mm/memcontrol.c | 12 + mm/memory.c | 7 +- - mm/mmap.c | 16 +- + mm/mmap.c | 73 +- mm/mmu_notifier.c | 26 + mm/nommu.c | 8 +- mm/page_alloc.c | 1 + mm/rmap.c | 48 +- - mm/vmscan.c | 1210 +++++++++++++++-------- + mm/vmscan.c | 1316 +++++++++++++++-------- mm/workingset.c | 4 +- tools/testing/radix-tree/maple.c | 56 +- - virt/kvm/kvm_main.c | 58 ++ - 36 files changed, 1668 insertions(+), 648 deletions(-) + virt/kvm/kvm_main.c | 58 + + 39 files changed, 1805 insertions(+), 776 deletions(-) diff --git a/Documentation/mm/multigen_lru.rst b/Documentation/mm/multigen_lru.rst index d7062c6a8946..52ed5092022f 100644 @@ -314,7 +317,7 @@ index c8dca8ae359c..350437661d4b 100644 + #endif /* __ARM64_S2_PGTABLE_H_ */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c -index 9c5573bc4614..6770bc47f5c9 100644 +index e57f8ae09387..0b71117ffc7e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -191,6 +191,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) @@ -736,6 +739,32 @@ index 6ba68dd6190b..17b415661282 100644 static int kvmppc_book3s_init_hv(void) { int r; +diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c +index c299a18273ff..c718f2a0de94 100644 +--- a/arch/s390/mm/hugetlbpage.c ++++ b/arch/s390/mm/hugetlbpage.c +@@ -273,7 +273,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; +- info.low_limit = max(PAGE_SIZE, mmap_min_addr); ++ info.low_limit = PAGE_SIZE; + info.high_limit = current->mm->mmap_base; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; +diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c +index 3327c47bc181..fc9a7dc26c5e 100644 +--- a/arch/s390/mm/mmap.c ++++ b/arch/s390/mm/mmap.c +@@ -136,7 +136,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long ad + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; +- info.low_limit = max(PAGE_SIZE, mmap_min_addr); ++ info.low_limit = PAGE_SIZE; + info.high_limit = mm->mmap_base; + if (filp || (flags & MAP_SHARED)) + info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 24480b4f1c57..a076e337b3db 100644 --- a/arch/x86/include/asm/kvm_host.h @@ -861,6 +890,19 @@ index d6df38d371a0..9028e09f1aab 100644 static bool test_age_gfn(struct kvm *kvm, struct tdp_iter *iter, struct kvm_gfn_range *range) { +diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c +index 790d2727141a..07297fac7de8 100644 +--- a/fs/hugetlbfs/inode.c ++++ b/fs/hugetlbfs/inode.c +@@ -208,7 +208,7 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr, + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; +- info.low_limit = max(PAGE_SIZE, mmap_min_addr); ++ info.low_limit = PAGE_SIZE; + info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base); + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index c1769a2c5d70..d353c262d669 100644 --- a/include/linux/fs.h @@ -1110,7 +1152,7 @@ index d6c06e140277..521f71ad0467 100644 unsigned long address) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h -index cd28a100d9e4..0ddbf712708d 100644 +index cd28a100d9e4..5eeca358d043 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -7,6 +7,7 @@ @@ -1182,7 +1224,26 @@ index cd28a100d9e4..0ddbf712708d 100644 }; enum { -@@ -461,7 +471,7 @@ struct lru_gen_mm_state { +@@ -444,24 +454,20 @@ enum { + struct lru_gen_mm_state { + /* set to max_seq after each iteration */ + unsigned long seq; +- /* where the current iteration continues (inclusive) */ ++ /* where the current iteration continues after */ + struct list_head *head; +- /* where the last iteration ended (exclusive) */ ++ /* where the last iteration ended before */ + struct list_head *tail; +- /* to wait for the last page table walker to finish */ +- struct wait_queue_head wait; + /* Bloom filters flip after each iteration */ + unsigned long *filters[NR_BLOOM_FILTERS]; + /* the mm stats for debugging */ + unsigned long stats[NR_HIST_GENS][NR_MM_STATS]; +- /* the number of concurrent page table walkers */ +- int nr_walkers; + }; + struct lru_gen_mm_walk { /* the lruvec under reclaim */ struct lruvec *lruvec; @@ -1191,7 +1252,7 @@ index cd28a100d9e4..0ddbf712708d 100644 unsigned long max_seq; /* the next address within an mm to scan */ unsigned long next_addr; -@@ -476,24 +486,101 @@ struct lru_gen_mm_walk { +@@ -476,24 +482,101 @@ struct lru_gen_mm_walk { }; void lru_gen_init_lruvec(struct lruvec *lruvec); @@ -1296,7 +1357,7 @@ index cd28a100d9e4..0ddbf712708d 100644 static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) { } -@@ -501,7 +588,24 @@ static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) +@@ -501,7 +584,24 @@ static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg) { } @@ -1322,7 +1383,7 @@ index cd28a100d9e4..0ddbf712708d 100644 #endif /* CONFIG_LRU_GEN */ -@@ -524,7 +628,7 @@ struct lruvec { +@@ -524,7 +624,7 @@ struct lruvec { unsigned long flags; #ifdef CONFIG_LRU_GEN /* evictable pages divided into generations */ @@ -1331,7 +1392,7 @@ index cd28a100d9e4..0ddbf712708d 100644 /* to concurrently iterate lru_gen_mm_list */ struct lru_gen_mm_state mm_state; #endif -@@ -1242,7 +1346,9 @@ typedef struct pglist_data { +@@ -1242,7 +1342,9 @@ typedef struct pglist_data { #ifdef CONFIG_LRU_GEN /* kswap mm walk data */ @@ -1343,7 +1404,7 @@ index cd28a100d9e4..0ddbf712708d 100644 CACHELINE_PADDING(_pad2_); diff --git a/lib/maple_tree.c b/lib/maple_tree.c -index fb452873914f..c167efc70e60 100644 +index 022573f49957..110a36479dce 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -146,6 +146,13 @@ struct maple_subtree_state { @@ -1433,7 +1494,7 @@ index fb452873914f..c167efc70e60 100644 } /* -@@ -1950,10 +1948,9 @@ static inline int mab_calc_split(struct ma_state *mas, +@@ -1952,10 +1950,9 @@ static inline int mab_calc_split(struct ma_state *mas, /* Avoid ending a node on a NULL entry */ split = mab_no_null_split(bn, split, slot_count); @@ -1446,7 +1507,7 @@ index fb452873914f..c167efc70e60 100644 return split; } -@@ -2176,7 +2173,7 @@ static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end, +@@ -2178,7 +2175,7 @@ static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end, * * Return: The actual end of the data stored in @b_node */ @@ -1455,7 +1516,7 @@ index fb452873914f..c167efc70e60 100644 struct maple_big_node *b_node, unsigned char offset_end) { unsigned char slot; -@@ -2313,9 +2310,7 @@ static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode) +@@ -2315,9 +2312,7 @@ static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode) static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) { struct ma_state *mas = wr_mas->mas; @@ -1466,14 +1527,14 @@ index fb452873914f..c167efc70e60 100644 if (unlikely(ma_is_dense(wr_mas->type))) { wr_mas->r_max = wr_mas->r_min = mas->index; -@@ -2328,34 +2323,12 @@ static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) +@@ -2330,34 +2325,12 @@ static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) count = wr_mas->node_end = ma_data_end(wr_mas->node, wr_mas->type, wr_mas->pivots, mas->max); offset = mas->offset; - min = mas_safe_min(mas, wr_mas->pivots, offset); - if (unlikely(offset == count)) - goto max; - +- - max = wr_mas->pivots[offset]; - index = mas->index; - if (unlikely(index <= max)) @@ -1489,12 +1550,12 @@ index fb452873914f..c167efc70e60 100644 - goto done; - else if (unlikely(!max)) - break; -+ while (offset < count && mas->index > wr_mas->pivots[offset]) -+ offset++; - min = max + 1; - } -- ++ while (offset < count && mas->index > wr_mas->pivots[offset]) ++ offset++; + -max: - max = mas->max; -done: @@ -1505,7 +1566,7 @@ index fb452873914f..c167efc70e60 100644 wr_mas->offset_end = mas->offset = offset; } -@@ -3010,7 +2983,7 @@ static inline void *mtree_range_walk(struct ma_state *mas) +@@ -3012,7 +2985,7 @@ static inline void *mtree_range_walk(struct ma_state *mas) mas->min = prev_min; mas->max = prev_max; mas->node = last; @@ -1514,7 +1575,7 @@ index fb452873914f..c167efc70e60 100644 dead_node: mas_reset(mas); -@@ -3283,7 +3256,7 @@ static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end +@@ -3285,7 +3258,7 @@ static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end if (tmp < max_p) memset(pivs + tmp, 0, @@ -1523,7 +1584,7 @@ index fb452873914f..c167efc70e60 100644 if (tmp < mt_slots[mt]) memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp)); -@@ -3530,7 +3503,6 @@ static inline bool mas_push_data(struct ma_state *mas, int height, +@@ -3532,7 +3505,6 @@ static inline bool mas_push_data(struct ma_state *mas, int height, */ static int mas_split(struct ma_state *mas, struct maple_big_node *b_node) { @@ -1531,7 +1592,7 @@ index fb452873914f..c167efc70e60 100644 struct maple_subtree_state mast; int height = 0; unsigned char mid_split, split = 0; -@@ -3649,7 +3621,7 @@ static inline bool mas_reuse_node(struct ma_wr_state *wr_mas, +@@ -3651,7 +3623,7 @@ static inline bool mas_reuse_node(struct ma_wr_state *wr_mas, * @b_node: The maple big node * @end: The end of the data. */ @@ -1540,7 +1601,7 @@ index fb452873914f..c167efc70e60 100644 struct maple_big_node *b_node, unsigned char end) { struct maple_node *node; -@@ -3950,7 +3922,7 @@ static inline void *mtree_lookup_walk(struct ma_state *mas) +@@ -3952,7 +3924,7 @@ static inline void *mtree_lookup_walk(struct ma_state *mas) goto dead_node; } while (!ma_is_leaf(type)); @@ -1549,7 +1610,7 @@ index fb452873914f..c167efc70e60 100644 dead_node: mas_reset(mas); -@@ -4785,15 +4757,11 @@ static inline void *mas_next_nentry(struct ma_state *mas, +@@ -4788,15 +4760,11 @@ static inline void *mas_next_nentry(struct ma_state *mas, static inline void mas_rewalk(struct ma_state *mas, unsigned long index) { @@ -1565,7 +1626,179 @@ index fb452873914f..c167efc70e60 100644 } /* -@@ -5675,8 +5643,8 @@ static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, +@@ -4973,7 +4941,8 @@ static inline void *mas_prev_entry(struct ma_state *mas, unsigned long min) + * Return: True if found in a leaf, false otherwise. + * + */ +-static bool mas_rev_awalk(struct ma_state *mas, unsigned long size) ++static bool mas_rev_awalk(struct ma_state *mas, unsigned long size, ++ unsigned long *gap_min, unsigned long *gap_max) + { + enum maple_type type = mte_node_type(mas->node); + struct maple_node *node = mas_mn(mas); +@@ -5038,8 +5007,8 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size) + + if (unlikely(ma_is_leaf(type))) { + mas->offset = offset; +- mas->min = min; +- mas->max = min + gap - 1; ++ *gap_min = min; ++ *gap_max = min + gap - 1; + return true; + } + +@@ -5063,10 +5032,10 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) + { + enum maple_type type = mte_node_type(mas->node); + unsigned long pivot, min, gap = 0; +- unsigned char offset; +- unsigned long *gaps; +- unsigned long *pivots = ma_pivots(mas_mn(mas), type); +- void __rcu **slots = ma_slots(mas_mn(mas), type); ++ unsigned char offset, data_end; ++ unsigned long *gaps, *pivots; ++ void __rcu **slots; ++ struct maple_node *node; + bool found = false; + + if (ma_is_dense(type)) { +@@ -5074,13 +5043,15 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) + return true; + } + +- gaps = ma_gaps(mte_to_node(mas->node), type); ++ node = mas_mn(mas); ++ pivots = ma_pivots(node, type); ++ slots = ma_slots(node, type); ++ gaps = ma_gaps(node, type); + offset = mas->offset; + min = mas_safe_min(mas, pivots, offset); +- for (; offset < mt_slots[type]; offset++) { +- pivot = mas_safe_pivot(mas, pivots, offset, type); +- if (offset && !pivot) +- break; ++ data_end = ma_data_end(node, type, pivots, mas->max); ++ for (; offset <= data_end; offset++) { ++ pivot = mas_logical_pivot(mas, pivots, offset, type); + + /* Not within lower bounds */ + if (mas->index > pivot) +@@ -5279,25 +5250,28 @@ static inline void mas_fill_gap(struct ma_state *mas, void *entry, + * @size: The size of the gap + * @fwd: Searching forward or back + */ +-static inline void mas_sparse_area(struct ma_state *mas, unsigned long min, ++static inline int mas_sparse_area(struct ma_state *mas, unsigned long min, + unsigned long max, unsigned long size, bool fwd) + { +- unsigned long start = 0; +- +- if (!unlikely(mas_is_none(mas))) +- start++; ++ if (!unlikely(mas_is_none(mas)) && min == 0) { ++ min++; ++ /* ++ * At this time, min is increased, we need to recheck whether ++ * the size is satisfied. ++ */ ++ if (min > max || max - min + 1 < size) ++ return -EBUSY; ++ } + /* mas_is_ptr */ + +- if (start < min) +- start = min; +- + if (fwd) { +- mas->index = start; +- mas->last = start + size - 1; +- return; ++ mas->index = min; ++ mas->last = min + size - 1; ++ } else { ++ mas->last = max; ++ mas->index = max - size + 1; + } +- +- mas->index = max; ++ return 0; + } + + /* +@@ -5315,6 +5289,9 @@ int mas_empty_area(struct ma_state *mas, unsigned long min, + unsigned long *pivots; + enum maple_type mt; + ++ if (min >= max) ++ return -EINVAL; ++ + if (mas_is_start(mas)) + mas_start(mas); + else if (mas->offset >= 2) +@@ -5323,10 +5300,8 @@ int mas_empty_area(struct ma_state *mas, unsigned long min, + return -EBUSY; + + /* Empty set */ +- if (mas_is_none(mas) || mas_is_ptr(mas)) { +- mas_sparse_area(mas, min, max, size, true); +- return 0; +- } ++ if (mas_is_none(mas) || mas_is_ptr(mas)) ++ return mas_sparse_area(mas, min, max, size, true); + + /* The start of the window can only be within these values */ + mas->index = min; +@@ -5369,6 +5344,9 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min, + { + struct maple_enode *last = mas->node; + ++ if (min >= max) ++ return -EINVAL; ++ + if (mas_is_start(mas)) { + mas_start(mas); + mas->offset = mas_data_end(mas); +@@ -5379,16 +5357,14 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min, + } + + /* Empty set. */ +- if (mas_is_none(mas) || mas_is_ptr(mas)) { +- mas_sparse_area(mas, min, max, size, false); +- return 0; +- } ++ if (mas_is_none(mas) || mas_is_ptr(mas)) ++ return mas_sparse_area(mas, min, max, size, false); + + /* The start of the window can only be within these values. */ + mas->index = min; + mas->last = max; + +- while (!mas_rev_awalk(mas, size)) { ++ while (!mas_rev_awalk(mas, size, &min, &max)) { + if (last == mas->node) { + if (!mas_rewind_node(mas)) + return -EBUSY; +@@ -5403,17 +5379,9 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min, + if (unlikely(mas->offset == MAPLE_NODE_SLOTS)) + return -EBUSY; + +- /* +- * mas_rev_awalk() has set mas->min and mas->max to the gap values. If +- * the maximum is outside the window we are searching, then use the last +- * location in the search. +- * mas->max and mas->min is the range of the gap. +- * mas->index and mas->last are currently set to the search range. +- */ +- + /* Trim the upper limit to the max. */ +- if (mas->max <= mas->last) +- mas->last = mas->max; ++ if (max <= mas->last) ++ mas->last = max; + + mas->index = mas->last - size + 1; + return 0; +@@ -5678,8 +5646,8 @@ static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, /* * mte_destroy_walk() - Free a tree or sub-tree. @@ -1576,7 +1809,7 @@ index fb452873914f..c167efc70e60 100644 * * Must hold the write lock. */ -@@ -5708,7 +5676,6 @@ static void mas_wr_store_setup(struct ma_wr_state *wr_mas) +@@ -5711,7 +5679,6 @@ static void mas_wr_store_setup(struct ma_wr_state *wr_mas) mas_reset(wr_mas->mas); } } @@ -1584,7 +1817,7 @@ index fb452873914f..c167efc70e60 100644 } /* Interface */ -@@ -5800,12 +5767,11 @@ EXPORT_SYMBOL_GPL(mas_store_prealloc); +@@ -5803,12 +5770,11 @@ EXPORT_SYMBOL_GPL(mas_store_prealloc); /** * mas_preallocate() - Preallocate enough nodes for a store operation * @mas: The maple state @@ -1598,7 +1831,7 @@ index fb452873914f..c167efc70e60 100644 { int ret; -@@ -5821,6 +5787,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) +@@ -5824,6 +5790,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) mas_reset(mas); return ret; } @@ -1606,7 +1839,7 @@ index fb452873914f..c167efc70e60 100644 /* * mas_destroy() - destroy a maple state. -@@ -6833,7 +6800,7 @@ static void mt_dump_range64(const struct maple_tree *mt, void *entry, +@@ -6836,7 +6803,7 @@ static void mt_dump_range64(const struct maple_tree *mt, void *entry, if (i < (MAPLE_RANGE64_SLOTS - 1)) last = node->pivot[i]; @@ -1615,7 +1848,7 @@ index fb452873914f..c167efc70e60 100644 break; if (last == 0 && i > 0) break; -@@ -6940,7 +6907,7 @@ void mt_dump(const struct maple_tree *mt) +@@ -6943,7 +6910,7 @@ void mt_dump(const struct maple_tree *mt) if (!xa_is_node(entry)) mt_dump_entry(entry, 0, 0, 0); else if (entry) @@ -1724,7 +1957,7 @@ index 6a99e9dc07e6..8a26ee4dc4d4 100644 static void lru_gen_exit_fault(void) diff --git a/mm/mmap.c b/mm/mmap.c -index 1931da077b2f..c1a09b21a22a 100644 +index 1931da077b2f..b7380077336d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -472,7 +472,7 @@ static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma) @@ -1754,7 +1987,107 @@ index 1931da077b2f..c1a09b21a22a 100644 return -ENOMEM; vma_adjust_trans_huge(orig_vma, start, end, adjust_next); -@@ -1938,7 +1938,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) +@@ -1566,6 +1566,8 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags) + static unsigned long unmapped_area(struct vm_unmapped_area_info *info) + { + unsigned long length, gap; ++ unsigned long low_limit, high_limit; ++ struct vm_area_struct *tmp; + + MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); + +@@ -1574,12 +1576,32 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info) + if (length < info->length) + return -ENOMEM; + +- if (mas_empty_area(&mas, info->low_limit, info->high_limit - 1, +- length)) ++ low_limit = info->low_limit; ++ if (low_limit < mmap_min_addr) ++ low_limit = mmap_min_addr; ++ high_limit = info->high_limit; ++retry: ++ if (mas_empty_area(&mas, low_limit, high_limit - 1, length)) + return -ENOMEM; + + gap = mas.index; + gap += (info->align_offset - gap) & info->align_mask; ++ tmp = mas_next(&mas, ULONG_MAX); ++ if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */ ++ if (vm_start_gap(tmp) < gap + length - 1) { ++ low_limit = tmp->vm_end; ++ mas_reset(&mas); ++ goto retry; ++ } ++ } else { ++ tmp = mas_prev(&mas, 0); ++ if (tmp && vm_end_gap(tmp) > gap) { ++ low_limit = vm_end_gap(tmp); ++ mas_reset(&mas); ++ goto retry; ++ } ++ } ++ + return gap; + } + +@@ -1595,7 +1617,9 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info) + */ + static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) + { +- unsigned long length, gap; ++ unsigned long length, gap, gap_end; ++ unsigned long low_limit, high_limit; ++ struct vm_area_struct *tmp; + + MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); + /* Adjust search length to account for worst case alignment overhead */ +@@ -1603,12 +1627,33 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) + if (length < info->length) + return -ENOMEM; + +- if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, +- length)) ++ low_limit = info->low_limit; ++ if (low_limit < mmap_min_addr) ++ low_limit = mmap_min_addr; ++ high_limit = info->high_limit; ++retry: ++ if (mas_empty_area_rev(&mas, low_limit, high_limit - 1, length)) + return -ENOMEM; + + gap = mas.last + 1 - info->length; + gap -= (gap - info->align_offset) & info->align_mask; ++ gap_end = mas.last; ++ tmp = mas_next(&mas, ULONG_MAX); ++ if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */ ++ if (vm_start_gap(tmp) <= gap_end) { ++ high_limit = vm_start_gap(tmp); ++ mas_reset(&mas); ++ goto retry; ++ } ++ } else { ++ tmp = mas_prev(&mas, 0); ++ if (tmp && vm_end_gap(tmp) > gap) { ++ high_limit = tmp->vm_start; ++ mas_reset(&mas); ++ goto retry; ++ } ++ } ++ + return gap; + } + +@@ -1722,7 +1767,7 @@ generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr, + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; +- info.low_limit = max(PAGE_SIZE, mmap_min_addr); ++ info.low_limit = PAGE_SIZE; + info.high_limit = arch_get_mmap_base(addr, mm->mmap_base); + info.align_mask = 0; + info.align_offset = 0; +@@ -1938,7 +1983,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) /* Check that both stack segments have the same anon_vma? */ } @@ -1763,7 +2096,7 @@ index 1931da077b2f..c1a09b21a22a 100644 return -ENOMEM; /* We must make sure the anon_vma is allocated. */ -@@ -2019,7 +2019,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address) +@@ -2019,7 +2064,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address) return -ENOMEM; } @@ -1772,7 +2105,7 @@ index 1931da077b2f..c1a09b21a22a 100644 return -ENOMEM; /* We must make sure the anon_vma is allocated. */ -@@ -2311,7 +2311,7 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma, +@@ -2311,7 +2356,7 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma, mt_init_flags(&mt_detach, mas->tree->ma_flags & MT_FLAGS_LOCK_MASK); mt_set_external_lock(&mt_detach, &mm->mmap_lock); @@ -1781,7 +2114,7 @@ index 1931da077b2f..c1a09b21a22a 100644 return -ENOMEM; mas->last = end - 1; -@@ -2680,7 +2680,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, +@@ -2680,7 +2725,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, goto free_vma; } @@ -1790,7 +2123,7 @@ index 1931da077b2f..c1a09b21a22a 100644 error = -ENOMEM; if (file) goto close_and_free_vma; -@@ -2953,7 +2953,7 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma, +@@ -2953,7 +2998,7 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma, can_vma_merge_after(vma, flags, NULL, NULL, addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) { mas_set_range(mas, vma->vm_start, addr + len - 1); @@ -1971,7 +2304,7 @@ index 3b45d049069e..8ecbbadab752 100644 *vm_flags = pra.vm_flags; diff --git a/mm/vmscan.c b/mm/vmscan.c -index 160acbbdf111..ec0142165ce7 100644 +index 160acbbdf111..4af7fd442b4a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -55,6 +55,10 @@ @@ -2148,7 +2481,31 @@ index 160acbbdf111..ec0142165ce7 100644 /****************************************************************************** * mm_struct list ******************************************************************************/ -@@ -3348,94 +3454,6 @@ void lru_gen_migrate_mm(struct mm_struct *mm) +@@ -3294,18 +3400,13 @@ void lru_gen_del_mm(struct mm_struct *mm) + for_each_node(nid) { + struct lruvec *lruvec = get_lruvec(memcg, nid); + +- /* where the last iteration ended (exclusive) */ ++ /* where the current iteration continues after */ ++ if (lruvec->mm_state.head == &mm->lru_gen.list) ++ lruvec->mm_state.head = lruvec->mm_state.head->prev; ++ ++ /* where the last iteration ended before */ + if (lruvec->mm_state.tail == &mm->lru_gen.list) + lruvec->mm_state.tail = lruvec->mm_state.tail->next; +- +- /* where the current iteration continues (inclusive) */ +- if (lruvec->mm_state.head != &mm->lru_gen.list) +- continue; +- +- lruvec->mm_state.head = lruvec->mm_state.head->next; +- /* the deletion ends the current iteration */ +- if (lruvec->mm_state.head == &mm_list->fifo) +- WRITE_ONCE(lruvec->mm_state.seq, lruvec->mm_state.seq + 1); + } + + list_del_init(&mm->lru_gen.list); +@@ -3348,94 +3449,6 @@ void lru_gen_migrate_mm(struct mm_struct *mm) } #endif @@ -2243,7 +2600,112 @@ index 160acbbdf111..ec0142165ce7 100644 static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, bool last) { int i; -@@ -3592,7 +3610,7 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq) +@@ -3489,68 +3502,54 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, + struct mm_struct **iter) + { + bool first = false; +- bool last = true; ++ bool last = false; + struct mm_struct *mm = NULL; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + struct lru_gen_mm_list *mm_list = get_mm_list(memcg); + struct lru_gen_mm_state *mm_state = &lruvec->mm_state; + + /* +- * There are four interesting cases for this page table walker: +- * 1. It tries to start a new iteration of mm_list with a stale max_seq; +- * there is nothing left to do. +- * 2. It's the first of the current generation, and it needs to reset +- * the Bloom filter for the next generation. +- * 3. It reaches the end of mm_list, and it needs to increment +- * mm_state->seq; the iteration is done. +- * 4. It's the last of the current generation, and it needs to reset the +- * mm stats counters for the next generation. ++ * mm_state->seq is incremented after each iteration of mm_list. There ++ * are three interesting cases for this page table walker: ++ * 1. It tries to start a new iteration with a stale max_seq: there is ++ * nothing left to do. ++ * 2. It started the next iteration: it needs to reset the Bloom filter ++ * so that a fresh set of PTE tables can be recorded. ++ * 3. It ended the current iteration: it needs to reset the mm stats ++ * counters and tell its caller to increment max_seq. + */ + spin_lock(&mm_list->lock); + + VM_WARN_ON_ONCE(mm_state->seq + 1 < walk->max_seq); +- VM_WARN_ON_ONCE(*iter && mm_state->seq > walk->max_seq); +- VM_WARN_ON_ONCE(*iter && !mm_state->nr_walkers); + +- if (walk->max_seq <= mm_state->seq) { +- if (!*iter) +- last = false; ++ if (walk->max_seq <= mm_state->seq) + goto done; +- } + +- if (!mm_state->nr_walkers) { +- VM_WARN_ON_ONCE(mm_state->head && mm_state->head != &mm_list->fifo); ++ if (!mm_state->head) ++ mm_state->head = &mm_list->fifo; + +- mm_state->head = mm_list->fifo.next; ++ if (mm_state->head == &mm_list->fifo) + first = true; +- } +- +- while (!mm && mm_state->head != &mm_list->fifo) { +- mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list); + ++ do { + mm_state->head = mm_state->head->next; ++ if (mm_state->head == &mm_list->fifo) { ++ WRITE_ONCE(mm_state->seq, mm_state->seq + 1); ++ last = true; ++ break; ++ } + + /* force scan for those added after the last iteration */ +- if (!mm_state->tail || mm_state->tail == &mm->lru_gen.list) { +- mm_state->tail = mm_state->head; ++ if (!mm_state->tail || mm_state->tail == mm_state->head) { ++ mm_state->tail = mm_state->head->next; + walk->force_scan = true; + } + ++ mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list); + if (should_skip_mm(mm, walk)) + mm = NULL; +- } +- +- if (mm_state->head == &mm_list->fifo) +- WRITE_ONCE(mm_state->seq, mm_state->seq + 1); ++ } while (!mm); + done: +- if (*iter && !mm) +- mm_state->nr_walkers--; +- if (!*iter && mm) +- mm_state->nr_walkers++; +- +- if (mm_state->nr_walkers) +- last = false; +- + if (*iter || last) + reset_mm_stats(lruvec, walk, last); + +@@ -3578,9 +3577,9 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq) + + VM_WARN_ON_ONCE(mm_state->seq + 1 < max_seq); + +- if (max_seq > mm_state->seq && !mm_state->nr_walkers) { +- VM_WARN_ON_ONCE(mm_state->head && mm_state->head != &mm_list->fifo); +- ++ if (max_seq > mm_state->seq) { ++ mm_state->head = NULL; ++ mm_state->tail = NULL; + WRITE_ONCE(mm_state->seq, mm_state->seq + 1); + reset_mm_stats(lruvec, NULL, true); + success = true; +@@ -3592,7 +3591,7 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq) } /****************************************************************************** @@ -2252,7 +2714,7 @@ index 160acbbdf111..ec0142165ce7 100644 ******************************************************************************/ /* -@@ -3623,7 +3641,7 @@ struct ctrl_pos { +@@ -3623,7 +3622,7 @@ struct ctrl_pos { static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain, struct ctrl_pos *pos) { @@ -2261,7 +2723,7 @@ index 160acbbdf111..ec0142165ce7 100644 int hist = lru_hist_from_seq(lrugen->min_seq[type]); pos->refaulted = lrugen->avg_refaulted[type][tier] + -@@ -3638,7 +3656,7 @@ static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain, +@@ -3638,7 +3637,7 @@ static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain, static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover) { int hist, tier; @@ -2270,7 +2732,7 @@ index 160acbbdf111..ec0142165ce7 100644 bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1; unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1; -@@ -3715,7 +3733,7 @@ static int folio_update_gen(struct folio *folio, int gen) +@@ -3715,7 +3714,7 @@ static int folio_update_gen(struct folio *folio, int gen) static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming) { int type = folio_is_file_lru(folio); @@ -2279,7 +2741,7 @@ index 160acbbdf111..ec0142165ce7 100644 int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); unsigned long new_flags, old_flags = READ_ONCE(folio->flags); -@@ -3760,7 +3778,7 @@ static void update_batch_size(struct lru_gen_mm_walk *walk, struct folio *folio, +@@ -3760,7 +3759,7 @@ static void update_batch_size(struct lru_gen_mm_walk *walk, struct folio *folio, static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk) { int gen, type, zone; @@ -2288,7 +2750,7 @@ index 160acbbdf111..ec0142165ce7 100644 walk->batched = 0; -@@ -3793,7 +3811,10 @@ static int should_skip_vma(unsigned long start, unsigned long end, struct mm_wal +@@ -3793,7 +3792,10 @@ static int should_skip_vma(unsigned long start, unsigned long end, struct mm_wal if (is_vm_hugetlb_page(vma)) return true; @@ -2300,7 +2762,7 @@ index 160acbbdf111..ec0142165ce7 100644 return true; if (vma == get_gate_vma(vma->vm_mm)) -@@ -3908,6 +3929,55 @@ static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg, +@@ -3908,6 +3910,55 @@ static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg, return folio; } @@ -2356,7 +2818,7 @@ index 160acbbdf111..ec0142165ce7 100644 static bool suitable_to_scan(int total, int young) { int n = clamp_t(int, cache_line_size() / sizeof(pte_t), 2, 8); -@@ -3923,6 +3993,8 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, +@@ -3923,6 +3974,8 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, pte_t *pte; spinlock_t *ptl; unsigned long addr; @@ -2365,7 +2827,7 @@ index 160acbbdf111..ec0142165ce7 100644 int total = 0; int young = 0; struct lru_gen_mm_walk *walk = args->private; -@@ -3941,6 +4013,7 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, +@@ -3941,6 +3994,7 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, pte = pte_offset_map(pmd, start & PMD_MASK); restart: for (i = pte_index(start), addr = start; addr != end; i++, addr += PAGE_SIZE) { @@ -2373,7 +2835,7 @@ index 160acbbdf111..ec0142165ce7 100644 unsigned long pfn; struct folio *folio; -@@ -3948,20 +4021,27 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, +@@ -3948,20 +4002,27 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, walk->mm_stats[MM_LEAF_TOTAL]++; pfn = get_pte_pfn(pte[i], args->vma, addr); @@ -2406,7 +2868,7 @@ index 160acbbdf111..ec0142165ce7 100644 young++; walk->mm_stats[MM_LEAF_YOUNG]++; -@@ -3988,8 +4068,8 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, +@@ -3988,8 +4049,8 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, } #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) @@ -2417,7 +2879,7 @@ index 160acbbdf111..ec0142165ce7 100644 { int i; pmd_t *pmd; -@@ -4002,18 +4082,19 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area +@@ -4002,18 +4063,19 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area VM_WARN_ON_ONCE(pud_leaf(*pud)); /* try to batch at most 1+MIN_LRU_BATCH+1 entries */ @@ -2441,7 +2903,7 @@ index 160acbbdf111..ec0142165ce7 100644 ptl = pmd_lockptr(args->mm, pmd); if (!spin_trylock(ptl)) -@@ -4024,15 +4105,16 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area +@@ -4024,15 +4086,16 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area do { unsigned long pfn; struct folio *folio; @@ -2461,7 +2923,7 @@ index 160acbbdf111..ec0142165ce7 100644 pmdp_test_and_clear_young(vma, addr, pmd + i); goto next; } -@@ -4061,12 +4143,11 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area +@@ -4061,12 +4124,11 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area arch_leave_lazy_mmu_mode(); spin_unlock(ptl); done: @@ -2477,7 +2939,7 @@ index 160acbbdf111..ec0142165ce7 100644 { } #endif -@@ -4079,9 +4160,9 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, +@@ -4079,9 +4141,9 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, unsigned long next; unsigned long addr; struct vm_area_struct *vma; @@ -2489,7 +2951,7 @@ index 160acbbdf111..ec0142165ce7 100644 VM_WARN_ON_ONCE(pud_leaf(*pud)); -@@ -4120,18 +4201,17 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, +@@ -4120,18 +4182,17 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) continue; @@ -2511,7 +2973,7 @@ index 160acbbdf111..ec0142165ce7 100644 } if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i)) -@@ -4148,7 +4228,7 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, +@@ -4148,7 +4209,7 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, update_bloom_filter(walk->lruvec, walk->max_seq + 1, pmd + i); } @@ -2520,7 +2982,33 @@ index 160acbbdf111..ec0142165ce7 100644 if (i < PTRS_PER_PMD && get_next_vma(PUD_MASK, PMD_SIZE, args, &start, &end)) goto restart; -@@ -4238,7 +4318,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_ +@@ -4177,10 +4238,6 @@ static int walk_pud_range(p4d_t *p4d, unsigned long start, unsigned long end, + + walk_pmd_range(&val, addr, next, args); + +- /* a racy check to curtail the waiting time */ +- if (wq_has_sleeper(&walk->lruvec->mm_state.wait)) +- return 1; +- + if (need_resched() || walk->batched >= MAX_LRU_BATCH) { + end = (addr | ~PUD_MASK) + 1; + goto done; +@@ -4213,8 +4270,14 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_ + walk->next_addr = FIRST_USER_ADDRESS; + + do { ++ DEFINE_MAX_SEQ(lruvec); ++ + err = -EBUSY; + ++ /* another thread might have called inc_max_seq() */ ++ if (walk->max_seq != max_seq) ++ break; ++ + /* folio_update_gen() requires stable folio_memcg() */ + if (!mem_cgroup_trylock_pages(memcg)) + break; +@@ -4238,7 +4301,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_ } while (err == -EAGAIN); } @@ -2529,7 +3017,7 @@ index 160acbbdf111..ec0142165ce7 100644 { struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk; -@@ -4246,7 +4326,7 @@ static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat) +@@ -4246,7 +4309,7 @@ static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat) VM_WARN_ON_ONCE(walk); walk = &pgdat->mm_walk; @@ -2538,7 +3026,7 @@ index 160acbbdf111..ec0142165ce7 100644 VM_WARN_ON_ONCE(current_is_kswapd()); walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN); -@@ -4274,7 +4354,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) +@@ -4274,7 +4337,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) { int zone; int remaining = MAX_LRU_BATCH; @@ -2547,7 +3035,7 @@ index 160acbbdf111..ec0142165ce7 100644 int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); if (type == LRU_GEN_ANON && !can_swap) -@@ -4282,7 +4362,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) +@@ -4282,7 +4345,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) /* prevent cold/hot inversion if force_scan is true */ for (zone = 0; zone < MAX_NR_ZONES; zone++) { @@ -2556,7 +3044,7 @@ index 160acbbdf111..ec0142165ce7 100644 while (!list_empty(head)) { struct folio *folio = lru_to_folio(head); -@@ -4293,7 +4373,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) +@@ -4293,7 +4356,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio); new_gen = folio_inc_gen(lruvec, folio, false); @@ -2565,7 +3053,7 @@ index 160acbbdf111..ec0142165ce7 100644 if (!--remaining) return false; -@@ -4310,7 +4390,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) +@@ -4310,7 +4373,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) { int gen, type, zone; bool success = false; @@ -2574,7 +3062,7 @@ index 160acbbdf111..ec0142165ce7 100644 DEFINE_MIN_SEQ(lruvec); VM_WARN_ON_ONCE(!seq_is_valid(lruvec)); -@@ -4321,7 +4401,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) +@@ -4321,7 +4384,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) gen = lru_gen_from_seq(min_seq[type]); for (zone = 0; zone < MAX_NR_ZONES; zone++) { @@ -2583,7 +3071,7 @@ index 160acbbdf111..ec0142165ce7 100644 goto next; } -@@ -4331,7 +4411,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) +@@ -4331,7 +4394,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) ; } @@ -2592,7 +3080,7 @@ index 160acbbdf111..ec0142165ce7 100644 if (can_swap) { min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]); min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]); -@@ -4353,7 +4433,7 @@ static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan) +@@ -4353,7 +4416,7 @@ static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan) { int prev, next; int type, zone; @@ -2601,7 +3089,7 @@ index 160acbbdf111..ec0142165ce7 100644 spin_lock_irq(&lruvec->lru_lock); -@@ -4411,7 +4491,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, +@@ -4411,7 +4474,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, bool success; struct lru_gen_mm_walk *walk; struct mm_struct *mm = NULL; @@ -2610,7 +3098,7 @@ index 160acbbdf111..ec0142165ce7 100644 VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq)); -@@ -4427,12 +4507,12 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, +@@ -4427,12 +4490,12 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, * handful of PTEs. Spreading the work out over a period of time usually * is less efficient, but it avoids bursty page faults. */ @@ -2625,26 +3113,41 @@ index 160acbbdf111..ec0142165ce7 100644 if (!walk) { success = iterate_mm_list_nowalk(lruvec, max_seq); goto done; -@@ -4455,8 +4535,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, - if (sc->priority <= DEF_PRIORITY - 2) - wait_event_killable(lruvec->mm_state.wait, - max_seq < READ_ONCE(lrugen->max_seq)); +@@ -4447,119 +4510,64 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, + success = iterate_mm_list(lruvec, walk, &mm); + if (mm) + walk_mm(lruvec, mm, walk); - -- return max_seq < READ_ONCE(lrugen->max_seq); -+ return false; - } +- cond_resched(); + } while (mm); + done: +- if (!success) { +- if (sc->priority <= DEF_PRIORITY - 2) +- wait_event_killable(lruvec->mm_state.wait, +- max_seq < READ_ONCE(lrugen->max_seq)); ++ if (success) ++ inc_max_seq(lruvec, can_swap, force_scan); - VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq)); -@@ -4469,97 +4548,56 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, - return true; - } +- return max_seq < READ_ONCE(lrugen->max_seq); +- } +- +- VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq)); ++ return success; ++} --static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsigned long *min_seq, -- struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan) +- inc_max_seq(lruvec, can_swap, force_scan); +- /* either this sees any waiters or they will see updated max_seq */ +- if (wq_has_sleeper(&lruvec->mm_state.wait)) +- wake_up_all(&lruvec->mm_state.wait); +- +- return true; +-} +/****************************************************************************** + * working set protection + ******************************************************************************/ -+ + +-static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsigned long *min_seq, +- struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan) +static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc) { int gen, type, zone; @@ -2680,10 +3183,7 @@ index 160acbbdf111..ec0142165ce7 100644 - /* try to scrape all its memory if this memcg was deleted */ - *nr_to_scan = mem_cgroup_online(memcg) ? (total >> sc->priority) : total; -+ /* whether the size is big enough to be helpful */ -+ return mem_cgroup_online(memcg) ? (total >> sc->priority) : total; -+} - +- - /* - * The aging tries to be lazy to reduce the overhead, while the eviction - * stalls when the number of generations reaches MIN_NR_GENS. Hence, the @@ -2707,8 +3207,10 @@ index 160acbbdf111..ec0142165ce7 100644 - return true; - - return false; --} -- ++ /* whether the size is big enough to be helpful */ ++ return mem_cgroup_online(memcg) ? (total >> sc->priority) : total; + } + -static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned long min_ttl) +static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc, + unsigned long min_ttl) @@ -2758,7 +3260,7 @@ index 160acbbdf111..ec0142165ce7 100644 } /* to protect the working set of the last N jiffies */ -@@ -4572,46 +4610,30 @@ static unsigned long lru_gen_min_ttl __read_mostly; +@@ -4572,46 +4580,30 @@ static unsigned long lru_gen_min_ttl __read_mostly; static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) { struct mem_cgroup *memcg; @@ -2812,7 +3314,7 @@ index 160acbbdf111..ec0142165ce7 100644 */ if (mutex_trylock(&oom_lock)) { struct oom_control oc = { -@@ -4624,6 +4646,28 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) +@@ -4624,6 +4616,28 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) } } @@ -2841,7 +3343,7 @@ index 160acbbdf111..ec0142165ce7 100644 /* * This function exploits spatial locality when shrink_folio_list() walks the * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages. If -@@ -4631,16 +4675,17 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) +@@ -4631,16 +4645,17 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) * the PTE table to the Bloom filter. This forms a feedback loop between the * eviction and the aging. */ @@ -2863,7 +3365,7 @@ index 160acbbdf111..ec0142165ce7 100644 struct folio *folio = pfn_folio(pvmw->pfn); struct mem_cgroup *memcg = folio_memcg(folio); struct pglist_data *pgdat = folio_pgdat(folio); -@@ -4651,47 +4696,65 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) +@@ -4651,47 +4666,65 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) lockdep_assert_held(pvmw->ptl); VM_WARN_ON_ONCE_FOLIO(folio_test_lru(folio), folio); @@ -2943,7 +3445,7 @@ index 160acbbdf111..ec0142165ce7 100644 young++; -@@ -4700,58 +4763,173 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) +@@ -4700,58 +4733,173 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) !folio_test_swapcache(folio))) folio_mark_dirty(folio); @@ -3122,7 +3624,9 @@ index 160acbbdf111..ec0142165ce7 100644 + spin_unlock(&pgdat->memcg_lru.lock); } +} -+ + +- if (!walk) +- spin_unlock_irq(&lruvec->lru_lock); +void lru_gen_soft_reclaim(struct lruvec *lruvec) +{ + /* see the comment on MEMCG_NR_GENS */ @@ -3130,11 +3634,9 @@ index 160acbbdf111..ec0142165ce7 100644 + lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD); +} -- if (!walk) -- spin_unlock_irq(&lruvec->lru_lock); -+#else /* !CONFIG_MEMCG */ - - mem_cgroup_unlock_pages(); ++#else /* !CONFIG_MEMCG */ ++ +static int lru_gen_memcg_seg(struct lruvec *lruvec) +{ + return 0; @@ -3145,7 +3647,7 @@ index 160acbbdf111..ec0142165ce7 100644 /****************************************************************************** * the eviction ******************************************************************************/ -@@ -4765,7 +4943,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) +@@ -4765,7 +4913,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) int delta = folio_nr_pages(folio); int refs = folio_lru_refs(folio); int tier = lru_tier_from_refs(refs); @@ -3154,7 +3656,7 @@ index 160acbbdf111..ec0142165ce7 100644 VM_WARN_ON_ONCE_FOLIO(gen >= MAX_NR_GENS, folio); -@@ -4790,7 +4968,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) +@@ -4790,7 +4938,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) /* promoted */ if (gen != lru_gen_from_seq(lrugen->min_seq[type])) { @@ -3163,7 +3665,7 @@ index 160acbbdf111..ec0142165ce7 100644 return true; } -@@ -4799,7 +4977,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) +@@ -4799,7 +4947,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) int hist = lru_hist_from_seq(lrugen->min_seq[type]); gen = folio_inc_gen(lruvec, folio, false); @@ -3172,7 +3674,7 @@ index 160acbbdf111..ec0142165ce7 100644 WRITE_ONCE(lrugen->protected[hist][type][tier - 1], lrugen->protected[hist][type][tier - 1] + delta); -@@ -4811,7 +4989,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) +@@ -4811,7 +4959,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) if (folio_test_locked(folio) || folio_test_writeback(folio) || (type == LRU_GEN_FILE && folio_test_dirty(folio))) { gen = folio_inc_gen(lruvec, folio, true); @@ -3181,7 +3683,7 @@ index 160acbbdf111..ec0142165ce7 100644 return true; } -@@ -4822,12 +5000,8 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca +@@ -4822,12 +4970,8 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca { bool success; @@ -3195,7 +3697,7 @@ index 160acbbdf111..ec0142165ce7 100644 (folio_test_dirty(folio) || (folio_test_anon(folio) && !folio_test_swapcache(folio)))) return false; -@@ -4865,7 +5039,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, +@@ -4865,7 +5009,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, int scanned = 0; int isolated = 0; int remaining = MAX_LRU_BATCH; @@ -3204,7 +3706,7 @@ index 160acbbdf111..ec0142165ce7 100644 struct mem_cgroup *memcg = lruvec_memcg(lruvec); VM_WARN_ON_ONCE(!list_empty(list)); -@@ -4878,7 +5052,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, +@@ -4878,7 +5022,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, for (zone = sc->reclaim_idx; zone >= 0; zone--) { LIST_HEAD(moved); int skipped = 0; @@ -3213,7 +3715,7 @@ index 160acbbdf111..ec0142165ce7 100644 while (!list_empty(head)) { struct folio *folio = lru_to_folio(head); -@@ -4924,9 +5098,8 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, +@@ -4924,9 +5068,8 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, __count_vm_events(PGSCAN_ANON + type, isolated); /* @@ -3225,7 +3727,7 @@ index 160acbbdf111..ec0142165ce7 100644 */ return isolated || !remaining ? scanned : 0; } -@@ -5021,8 +5194,7 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw +@@ -5021,8 +5164,7 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw return scanned; } @@ -3235,7 +3737,7 @@ index 160acbbdf111..ec0142165ce7 100644 { int type; int scanned; -@@ -5111,153 +5283,348 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap +@@ -5111,153 +5253,348 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap goto retry; } @@ -3674,7 +4176,7 @@ index 160acbbdf111..ec0142165ce7 100644 } /****************************************************************************** -@@ -5266,7 +5633,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc +@@ -5266,7 +5603,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc static bool __maybe_unused state_is_valid(struct lruvec *lruvec) { @@ -3683,7 +4185,7 @@ index 160acbbdf111..ec0142165ce7 100644 if (lrugen->enabled) { enum lru_list lru; -@@ -5279,7 +5646,7 @@ static bool __maybe_unused state_is_valid(struct lruvec *lruvec) +@@ -5279,7 +5616,7 @@ static bool __maybe_unused state_is_valid(struct lruvec *lruvec) int gen, type, zone; for_each_gen_type_zone(gen, type, zone) { @@ -3692,7 +4194,7 @@ index 160acbbdf111..ec0142165ce7 100644 return false; } } -@@ -5324,7 +5691,7 @@ static bool drain_evictable(struct lruvec *lruvec) +@@ -5324,7 +5661,7 @@ static bool drain_evictable(struct lruvec *lruvec) int remaining = MAX_LRU_BATCH; for_each_gen_type_zone(gen, type, zone) { @@ -3701,7 +4203,7 @@ index 160acbbdf111..ec0142165ce7 100644 while (!list_empty(head)) { bool success; -@@ -5402,14 +5769,14 @@ static void lru_gen_change_state(bool enabled) +@@ -5402,14 +5739,14 @@ static void lru_gen_change_state(bool enabled) * sysfs interface ******************************************************************************/ @@ -3720,7 +4222,7 @@ index 160acbbdf111..ec0142165ce7 100644 { unsigned int msecs; -@@ -5421,11 +5788,9 @@ static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, +@@ -5421,11 +5758,9 @@ static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, return len; } @@ -3734,7 +4236,7 @@ index 160acbbdf111..ec0142165ce7 100644 { unsigned int caps = 0; -@@ -5438,11 +5803,14 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c +@@ -5438,11 +5773,14 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) caps |= BIT(LRU_GEN_NONLEAF_YOUNG); @@ -3750,7 +4252,7 @@ index 160acbbdf111..ec0142165ce7 100644 const char *buf, size_t len) { int i; -@@ -5469,9 +5837,7 @@ static ssize_t store_enabled(struct kobject *kobj, struct kobj_attribute *attr, +@@ -5469,9 +5807,7 @@ static ssize_t store_enabled(struct kobject *kobj, struct kobj_attribute *attr, return len; } @@ -3761,7 +4263,7 @@ index 160acbbdf111..ec0142165ce7 100644 static struct attribute *lru_gen_attrs[] = { &lru_gen_min_ttl_attr.attr, -@@ -5479,7 +5845,7 @@ static struct attribute *lru_gen_attrs[] = { +@@ -5479,7 +5815,7 @@ static struct attribute *lru_gen_attrs[] = { NULL }; @@ -3770,7 +4272,7 @@ index 160acbbdf111..ec0142165ce7 100644 .name = "lru_gen", .attrs = lru_gen_attrs, }; -@@ -5545,7 +5911,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, +@@ -5545,7 +5881,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, int i; int type, tier; int hist = lru_hist_from_seq(seq); @@ -3779,7 +4281,7 @@ index 160acbbdf111..ec0142165ce7 100644 for (tier = 0; tier < MAX_NR_TIERS; tier++) { seq_printf(m, " %10d", tier); -@@ -5595,7 +5961,7 @@ static int lru_gen_seq_show(struct seq_file *m, void *v) +@@ -5595,7 +5931,7 @@ static int lru_gen_seq_show(struct seq_file *m, void *v) unsigned long seq; bool full = !debugfs_real_fops(m->file)->write; struct lruvec *lruvec = v; @@ -3788,7 +4290,7 @@ index 160acbbdf111..ec0142165ce7 100644 int nid = lruvec_pgdat(lruvec)->node_id; struct mem_cgroup *memcg = lruvec_memcg(lruvec); DEFINE_MAX_SEQ(lruvec); -@@ -5692,7 +6058,7 @@ static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_co +@@ -5692,7 +6028,7 @@ static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_co if (sc->nr_reclaimed >= nr_to_reclaim) return 0; @@ -3797,7 +4299,7 @@ index 160acbbdf111..ec0142165ce7 100644 return 0; cond_resched(); -@@ -5713,11 +6079,11 @@ static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq, +@@ -5713,11 +6049,11 @@ static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq, if (!mem_cgroup_disabled()) { rcu_read_lock(); @@ -3812,7 +4314,7 @@ index 160acbbdf111..ec0142165ce7 100644 rcu_read_unlock(); if (!memcg) -@@ -5777,7 +6143,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src, +@@ -5777,7 +6113,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src, set_task_reclaim_state(current, &sc.reclaim_state); flags = memalloc_noreclaim_save(); blk_start_plug(&plug); @@ -3821,7 +4323,7 @@ index 160acbbdf111..ec0142165ce7 100644 err = -ENOMEM; goto done; } -@@ -5849,7 +6215,7 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) +@@ -5849,7 +6185,7 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) { int i; int gen, type, zone; @@ -3830,7 +4332,7 @@ index 160acbbdf111..ec0142165ce7 100644 lrugen->max_seq = MIN_NR_GENS + 1; lrugen->enabled = lru_gen_enabled(); -@@ -5858,13 +6224,26 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) +@@ -5858,13 +6194,25 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) lrugen->timestamps[i] = jiffies; for_each_gen_type_zone(gen, type, zone) @@ -3838,7 +4340,7 @@ index 160acbbdf111..ec0142165ce7 100644 + INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]); lruvec->mm_state.seq = MIN_NR_GENS; - init_waitqueue_head(&lruvec->mm_state.wait); +- init_waitqueue_head(&lruvec->mm_state.wait); } #ifdef CONFIG_MEMCG @@ -3858,7 +4360,7 @@ index 160acbbdf111..ec0142165ce7 100644 void lru_gen_init_memcg(struct mem_cgroup *memcg) { INIT_LIST_HEAD(&memcg->mm_list.fifo); -@@ -5876,19 +6255,25 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg) +@@ -5876,19 +6224,24 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg) int i; int nid; @@ -3867,7 +4369,6 @@ index 160acbbdf111..ec0142165ce7 100644 for_each_node(nid) { struct lruvec *lruvec = get_lruvec(memcg, nid); -+ VM_WARN_ON_ONCE(lruvec->mm_state.nr_walkers); VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0, sizeof(lruvec->lrugen.nr_pages))); @@ -3885,7 +4386,7 @@ index 160acbbdf111..ec0142165ce7 100644 static int __init init_lru_gen(void) { -@@ -5915,6 +6300,10 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc +@@ -5915,6 +6268,10 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc { } @@ -3896,7 +4397,7 @@ index 160acbbdf111..ec0142165ce7 100644 #endif /* CONFIG_LRU_GEN */ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) -@@ -5928,7 +6317,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) +@@ -5928,7 +6285,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) bool proportional_reclaim; struct blk_plug plug; @@ -3905,7 +4406,7 @@ index 160acbbdf111..ec0142165ce7 100644 lru_gen_shrink_lruvec(lruvec, sc); return; } -@@ -6171,6 +6560,11 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) +@@ -6171,6 +6528,11 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) struct lruvec *target_lruvec; bool reclaimable = false; @@ -3940,7 +4441,7 @@ index 1a86645b7b3c..fd666584515c 100644 struct pglist_data *pgdat; int type = folio_is_file_lru(folio); diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c -index 1f36bc1c5d36..26389e0dcfff 100644 +index 2a16939cf028..9286d3baa12d 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -55,6 +55,28 @@ struct rcu_reader_struct { @@ -3981,7 +4482,7 @@ index 1f36bc1c5d36..26389e0dcfff 100644 /* Try allocating 3 nodes */ mtree_lock(mt); mt_set_non_kernel(0); -@@ -35342,7 +35366,7 @@ static noinline void check_prealloc(struct maple_tree *mt) +@@ -35355,7 +35379,7 @@ static noinline void check_prealloc(struct maple_tree *mt) for (i = 0; i <= max; i++) mtree_test_store_range(mt, i * 10, i * 10 + 5, &i); @@ -3990,7 +4491,7 @@ index 1f36bc1c5d36..26389e0dcfff 100644 allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); -@@ -35351,18 +35375,18 @@ static noinline void check_prealloc(struct maple_tree *mt) +@@ -35364,18 +35388,18 @@ static noinline void check_prealloc(struct maple_tree *mt) allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); @@ -4012,9 +4513,9 @@ index 1f36bc1c5d36..26389e0dcfff 100644 allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); -@@ -35370,25 +35394,25 @@ static noinline void check_prealloc(struct maple_tree *mt) - mn = mas_pop_node(&mas); +@@ -35384,26 +35408,26 @@ static noinline void check_prealloc(struct maple_tree *mt) MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); @@ -4035,6 +4536,7 @@ index 1f36bc1c5d36..26389e0dcfff 100644 mas_destroy(&mas); allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); @@ -4042,7 +4544,7 @@ index 1f36bc1c5d36..26389e0dcfff 100644 allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); -@@ -35397,12 +35421,12 @@ static noinline void check_prealloc(struct maple_tree *mt) +@@ -35412,12 +35436,12 @@ static noinline void check_prealloc(struct maple_tree *mt) MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); mas_push_node(&mas, mn); MT_BUG_ON(mt, mas_allocated(&mas) != allocated); @@ -4057,7 +4559,7 @@ index 1f36bc1c5d36..26389e0dcfff 100644 allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); -@@ -35410,21 +35434,21 @@ static noinline void check_prealloc(struct maple_tree *mt) +@@ -35425,21 +35449,21 @@ static noinline void check_prealloc(struct maple_tree *mt) mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); @@ -4082,7 +4584,7 @@ index 1f36bc1c5d36..26389e0dcfff 100644 allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); -@@ -35432,14 +35456,14 @@ static noinline void check_prealloc(struct maple_tree *mt) +@@ -35447,14 +35471,14 @@ static noinline void check_prealloc(struct maple_tree *mt) mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); mt_set_non_kernel(1); @@ -4099,7 +4601,7 @@ index 1f36bc1c5d36..26389e0dcfff 100644 allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); -@@ -35447,7 +35471,7 @@ static noinline void check_prealloc(struct maple_tree *mt) +@@ -35462,7 +35486,7 @@ static noinline void check_prealloc(struct maple_tree *mt) mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); mt_set_non_kernel(1); diff --git a/6.2/0011-objtool.patch b/6.2/0011-objtool.patch index 1d2dbce3..507ec411 100644 --- a/6.2/0011-objtool.patch +++ b/6.2/0011-objtool.patch @@ -1,4 +1,4 @@ -From 408f428355e56ebba78d9b13e73d90f3a61057cc Mon Sep 17 00:00:00 2001 +From a9b5ae8237970121057f450cbc5f8e54081aceab Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Mon, 13 Feb 2023 09:26:09 +0100 Subject: [PATCH 11/15] objtool diff --git a/6.2/0012-sched.patch b/6.2/0012-sched.patch index 3de7da5c..3c237daa 100644 --- a/6.2/0012-sched.patch +++ b/6.2/0012-sched.patch @@ -1,4 +1,4 @@ -From 142ae5ede709849f18055b9531bc73df4e53679f Mon Sep 17 00:00:00 2001 +From 4b90d86e2ae379b4e8d1aa5b67a4312e2bf0ee31 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Thu, 30 Mar 2023 17:54:54 +0200 Subject: [PATCH 12/15] sched @@ -311,7 +311,7 @@ index 1637b65ba07a..8d64fba16cfe 100644 P(se.avg.load_sum); P(se.avg.runnable_sum); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 1ea94874b0ce..1a8ae34c9464 100644 +index 735994022fe0..ca8bbe5c1cb4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1082,6 +1082,23 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -903,7 +903,7 @@ index 1ea94874b0ce..1a8ae34c9464 100644 if (env->sd->flags & SD_NUMA) env->fbq_type = fbq_classify_group(&sds->busiest_stat); -@@ -10203,24 +10223,23 @@ static struct sched_group *find_busiest_group(struct lb_env *env) +@@ -10213,24 +10233,23 @@ static struct sched_group *find_busiest_group(struct lb_env *env) */ update_sd_lb_stats(env, &sds); @@ -937,7 +937,7 @@ index 1ea94874b0ce..1a8ae34c9464 100644 /* ASYM feature bypasses nice load balance check */ if (busiest->group_type == group_asym_packing) goto force_balance; -@@ -10233,6 +10252,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) +@@ -10243,6 +10262,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) if (busiest->group_type == group_imbalanced) goto force_balance; @@ -945,7 +945,7 @@ index 1ea94874b0ce..1a8ae34c9464 100644 /* * If the local group is busier than the selected busiest group * don't try and pull any tasks. -@@ -10272,7 +10292,6 @@ static struct sched_group *find_busiest_group(struct lb_env *env) +@@ -10282,7 +10302,6 @@ static struct sched_group *find_busiest_group(struct lb_env *env) goto out_balanced; } @@ -953,7 +953,7 @@ index 1ea94874b0ce..1a8ae34c9464 100644 if (sds.prefer_sibling && local->group_type == group_has_spare && busiest->sum_nr_running > local->sum_nr_running + 1) goto force_balance; -@@ -10374,11 +10393,20 @@ static struct rq *find_busiest_queue(struct lb_env *env, +@@ -10384,11 +10403,20 @@ static struct rq *find_busiest_queue(struct lb_env *env, nr_running == 1) continue; @@ -977,7 +977,7 @@ index 1ea94874b0ce..1a8ae34c9464 100644 switch (env->migration_type) { case migrate_load: -@@ -10468,8 +10496,20 @@ asym_active_balance(struct lb_env *env) +@@ -10478,8 +10506,20 @@ asym_active_balance(struct lb_env *env) * lower priority CPUs in order to pack all tasks in the * highest priority CPUs. */ @@ -1000,7 +1000,7 @@ index 1ea94874b0ce..1a8ae34c9464 100644 } static inline bool -@@ -11206,8 +11246,17 @@ static void nohz_balancer_kick(struct rq *rq) +@@ -11216,8 +11256,17 @@ static void nohz_balancer_kick(struct rq *rq) */ for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) { if (sched_asym_prefer(i, cpu)) { diff --git a/6.2/0013-zram.patch b/6.2/0013-zram.patch index 12cc0b8d..0d2fba2a 100644 --- a/6.2/0013-zram.patch +++ b/6.2/0013-zram.patch @@ -1,4 +1,4 @@ -From 9bf62fd4fa835ffda2dd24b87c8e1feff3e98061 Mon Sep 17 00:00:00 2001 +From 661d513d0987b55f3b00273a1411d2f43e27f2d0 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Mon, 6 Feb 2023 09:53:13 +0100 Subject: [PATCH 13/15] zram diff --git a/6.2/0014-zstd-import-1.5.5.patch b/6.2/0014-zstd-import-1.5.5.patch index cab2cd3b..9859715d 100644 --- a/6.2/0014-zstd-import-1.5.5.patch +++ b/6.2/0014-zstd-import-1.5.5.patch @@ -1,4 +1,4 @@ -From 581756bfa197a0f8011730304fbcc3d9fc547ddb Mon Sep 17 00:00:00 2001 +From 5226c6f74024424daecec7e0d3f70268db60b200 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Thu, 6 Apr 2023 17:10:48 +0200 Subject: [PATCH 14/15] zstd import 1.5.5 diff --git a/6.2/0015-v4l2-core-add-v4l2loopback.patch b/6.2/0015-v4l2-core-add-v4l2loopback.patch index d25b3ae3..172123f1 100644 --- a/6.2/0015-v4l2-core-add-v4l2loopback.patch +++ b/6.2/0015-v4l2-core-add-v4l2loopback.patch @@ -1,4 +1,4 @@ -From ab74a3305ccf63616c74a3bc7c4bd7f6ee55e4f9 Mon Sep 17 00:00:00 2001 +From e63a0a13d76c3d3d65edb7452b6cab1cbff1b237 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Fri, 10 Mar 2023 19:28:54 +0100 Subject: [PATCH 15/15] v4l2-core: add v4l2loopback diff --git a/6.2/all/0001-cachyos-base-all.patch b/6.2/all/0001-cachyos-base-all.patch index da382448..244ba1d4 100644 --- a/6.2/all/0001-cachyos-base-all.patch +++ b/6.2/all/0001-cachyos-base-all.patch @@ -1,4 +1,4 @@ -From 89f3a4d45dc91c408f5c02b09982c7262b55c48d Mon Sep 17 00:00:00 2001 +From 9ee19b282653761511acbde09e77416a96f55a5b Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Fri, 10 Mar 2023 17:59:47 +0100 Subject: [PATCH 01/15] bbr2 @@ -3283,7 +3283,7 @@ index cb79127f45c3..70e4de876a7f 100644 -- 2.40.0 -From 0a4585a3f29f2acb9bf8e27f2fe61172243c3163 Mon Sep 17 00:00:00 2001 +From 5610d33c45f6785da3a9b856d1fee11fb06c78e8 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Thu, 13 Apr 2023 18:13:28 +0200 Subject: [PATCH 02/15] bfq @@ -5567,7 +5567,7 @@ index e835f21d48af..ad384230d0c7 100644 -- 2.40.0 -From 7b09e20a0ef54c18d97ddfa479d39d1daab8be85 Mon Sep 17 00:00:00 2001 +From f3810e5d25c92b9a4a0da90dc5c39fe1421ee533 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Mon, 13 Feb 2023 11:26:20 +0100 Subject: [PATCH 03/15] bitmap @@ -6918,7 +6918,7 @@ index bb0ee80526b2..8c04254c5284 100644 -- 2.40.0 -From 538ff5a6a2aa503cf813248d2a9deb19842b2d93 Mon Sep 17 00:00:00 2001 +From 955ced3981bf578dc6cc299ed5f620f68ae29c39 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Thu, 13 Apr 2023 18:13:45 +0200 Subject: [PATCH 04/15] cachy @@ -7044,7 +7044,7 @@ index 352ff53a2306..7c210744d84c 100644 vmlinuz voffset.h diff --git a/Makefile b/Makefile -index 416490daa76a..e2a454ac73d2 100644 +index 068374cc2601..f7890837c555 100644 --- a/Makefile +++ b/Makefile @@ -834,6 +834,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) @@ -9772,7 +9772,7 @@ index 7b0fe741a088..77ad9e033358 100644 out: diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index e046a2bff207..1ea94874b0ce 100644 +index 661226e38835..735994022fe0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -69,9 +69,13 @@ @@ -10002,10 +10002,10 @@ index 5b7b8d4f5297..160acbbdf111 100644 static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) { diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c -index 0d0cc4ef2b85..544104f9f4b3 100644 +index 40fe70fc2015..3028e27897d9 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c -@@ -1467,6 +1467,13 @@ static struct ctl_table ipv4_net_table[] = { +@@ -1470,6 +1470,13 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = &tcp_plb_max_cong_thresh, }, @@ -10081,7 +10081,7 @@ index 754e0212c951..b6d7faeb737a 100644 * drop receive data on the floor. It will get retransmitted * and hopefully then we'll have sufficient space. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c -index 8320d0ecb13a..37a09cd767a1 100644 +index 339a9cea9047..34bd711a1e7c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3274,6 +3274,8 @@ static int __net_init tcp_sk_init(struct net *net) @@ -10144,7 +10144,7 @@ index 4815a8e32227..6a3c36713045 100644 -- 2.40.0 -From 3ecdb9dc32a4858395c48328399eee7833664748 Mon Sep 17 00:00:00 2001 +From 5e20c6e278ccac0b0774a4644c2aacdd924d3584 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Fri, 10 Mar 2023 18:00:48 +0100 Subject: [PATCH 05/15] clr @@ -11386,7 +11386,7 @@ index 6d2dd03dafa8..750440803883 100644 EXPORT_SYMBOL(dst_release_immediate); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c -index 64289bc98887..228c54bbdecc 100644 +index f5114b2395ae..6a7be9d3dd10 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -840,7 +840,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, @@ -11482,7 +11482,7 @@ index 029171379884..bc9dc51828f7 100644 -- 2.40.0 -From 4a31e787aefebf5586016b69ae7d54abc6428d22 Mon Sep 17 00:00:00 2001 +From c0762e6bc0c17ce7f3d4c6d75626d574266d4f02 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Thu, 13 Apr 2023 18:16:34 +0200 Subject: [PATCH 06/15] fixes @@ -11494,14 +11494,26 @@ Signed-off-by: Peter Jung Documentation/admin-guide/mm/ksm.rst | 7 + Documentation/leds/index.rst | 1 + Documentation/leds/ledtrig-blkdev.rst | 158 +++ + Documentation/x86/topology.rst | 26 + arch/x86/boot/compressed/Makefile | 2 +- arch/x86/events/rapl.c | 20 +- + arch/x86/include/asm/cacheinfo.h | 1 + + arch/x86/kernel/cpu/amd.c | 1 + + arch/x86/kernel/cpu/cacheinfo.c | 36 + + arch/x86/kernel/cpu/hygon.c | 1 + arch/x86/mm/tlb.c | 2 +- arch/x86/net/bpf_jit_comp.c | 5 +- drivers/bluetooth/btusb.c | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 3 +- + .../drm/amd/display/dc/bios/bios_parser2.c | 7 +- + .../drm/amd/display/dc/dcn20/dcn20_resource.c | 2 +- + .../drm/amd/display/dc/dcn21/dcn21_resource.c | 2 +- + .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 3 +- + drivers/gpu/drm/scheduler/sched_main.c | 3 +- drivers/leds/trigger/Kconfig | 9 + drivers/leds/trigger/Makefile | 1 + drivers/leds/trigger/ledtrig-blkdev.c | 1221 +++++++++++++++++ + .../net/wireless/mediatek/mt76/mt7921/init.c | 7 +- fs/eventpoll.c | 188 ++- fs/proc/base.c | 1 + include/linux/mm_types.h | 7 +- @@ -11519,7 +11531,7 @@ Signed-off-by: Peter Jung scripts/Makefile.vmlinux_o | 2 +- sound/pci/hda/cs35l41_hda.c | 2 +- .../selftests/vm/ksm_functional_tests.c | 96 +- - 30 files changed, 1961 insertions(+), 175 deletions(-) + 42 files changed, 2041 insertions(+), 187 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-class-led-trigger-blkdev create mode 100644 Documentation/leds/ledtrig-blkdev.rst create mode 100644 drivers/leds/trigger/ledtrig-blkdev.c @@ -11823,6 +11835,64 @@ index 000000000000..9ff5b99de451 +* The ``blkdev`` LED trigger supports many-to-many device/LED associations. + A device can be associated with multiple LEDs, and an LED can be associated + with multiple devices. +diff --git a/Documentation/x86/topology.rst b/Documentation/x86/topology.rst +index 7f58010ea86a..9de14f3f7783 100644 +--- a/Documentation/x86/topology.rst ++++ b/Documentation/x86/topology.rst +@@ -33,6 +33,7 @@ historical nature and should be cleaned up. + The topology of a system is described in the units of: + + - packages ++ - cluster + - cores + - threads + +@@ -90,6 +91,22 @@ Package-related topology information in the kernel: + Cache. In general, it is a number identifying an LLC uniquely on the + system. + ++Clusters ++======== ++A cluster consists of threads of one or more cores sharing the same L2 cache. ++ ++Cluster-related topology information in the kernel: ++ ++ - cluster_id: ++ ++ A per-CPU variable containing: ++ ++ - Upper bits extracted from the APIC ID. CPUs which have the same value ++ in these bits share an L2 and have the same cluster_id. ++ ++ CPUs for which cluster information is unavailable will show 65535 ++ (BAD_APICID) as the cluster_id. ++ + Cores + ===== + A core consists of 1 or more threads. It does not matter whether the threads +@@ -125,6 +142,11 @@ Thread-related topology information in the kernel: + + The number of online threads is also printed in /proc/cpuinfo "siblings." + ++ - topology_cluster_cpumask(): ++ ++ The cpumask contains all online threads in the cluster to which a thread ++ belongs. ++ + - topology_sibling_cpumask(): + + The cpumask contains all online threads in the core to which a thread +@@ -138,6 +160,10 @@ Thread-related topology information in the kernel: + + The physical package ID to which a thread belongs. + ++ - topology_cluster_id(); ++ ++ The ID of the cluster to which a thread belongs. ++ + - topology_core_id(); + + The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index d995595394bb..19d1fb601796 100644 --- a/arch/x86/boot/compressed/Makefile @@ -11894,6 +11964,89 @@ index 52e6e7ed4f78..f000cc16d128 100644 .msr_power_unit = MSR_AMD_RAPL_POWER_UNIT, .rapl_msrs = amd_rapl_msrs, }; +diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h +index ce9685fc78d8..2034cd556c07 100644 +--- a/arch/x86/include/asm/cacheinfo.h ++++ b/arch/x86/include/asm/cacheinfo.h +@@ -7,6 +7,7 @@ extern unsigned int memory_caching_control; + #define CACHE_MTRR 0x01 + #define CACHE_PAT 0x02 + ++void cacheinfo_topoext_init_l2c_id(struct cpuinfo_x86 *c, int cpu); + void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu); + void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu); + +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index 06f2ede1544f..84c250027a50 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -358,6 +358,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) + if (!err) + c->x86_coreid_bits = get_count_order(c->x86_max_cores); + ++ cacheinfo_topoext_init_l2c_id(c, cpu); + cacheinfo_amd_init_llc_id(c, cpu); + + } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { +diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c +index f4e5aa27eec6..bed7b9633451 100644 +--- a/arch/x86/kernel/cpu/cacheinfo.c ++++ b/arch/x86/kernel/cpu/cacheinfo.c +@@ -659,6 +659,42 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c) + return i; + } + ++void cacheinfo_topoext_init_l2c_id(struct cpuinfo_x86 *c, int cpu) ++{ ++ u32 eax, ebx, ecx, edx, num_sharing_cache; ++ int i = 0, bits; ++ ++ /* Check if L2 cache identifiers exists. */ ++ if (!cpuid_ecx(0x80000006)) ++ return; ++ ++ while (true) { ++ u32 level; ++ ++ cpuid_count(0x8000001d, i, &eax, &ebx, &ecx, &edx); ++ if (!eax) ++ return; ++ ++ /* ++ * Check if the current leaf is for L2 cache using ++ * eax[7:5] used to describe the cache level. ++ */ ++ level = (eax >> 5) & 0x7; ++ if (level == 2) ++ break; ++ ++ ++i; ++ } ++ ++ /* ++ * L2 ID is calculated from the number of threads ++ * sharing the L2 cache. ++ */ ++ num_sharing_cache = ((eax >> 14) & 0xfff) + 1; ++ bits = get_count_order(num_sharing_cache); ++ per_cpu(cpu_l2c_id, cpu) = c->apicid >> bits; ++} ++ + void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu) + { + /* +diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c +index 5a2962c492d3..cb0025b4a2fd 100644 +--- a/arch/x86/kernel/cpu/hygon.c ++++ b/arch/x86/kernel/cpu/hygon.c +@@ -89,6 +89,7 @@ static void hygon_get_topology(struct cpuinfo_x86 *c) + /* Socket ID is ApicId[6] for these processors. */ + c->phys_proc_id = c->apicid >> APICID_SOCKET_ID_BIT; + ++ cacheinfo_topoext_init_l2c_id(c, cpu); + cacheinfo_hygon_init_llc_id(c, cpu); + } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { + u64 value; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index c1e31e9a85d7..92d73ccede70 100644 --- a/arch/x86/mm/tlb.c @@ -11937,6 +12090,92 @@ index 5c536151ef83..5a80379253a7 100644 gpiod_set_value_cansleep(reset_gpio, 1); return; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +index 9fa1d814508a..43d6a9d6a538 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +@@ -453,7 +453,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, + /* Limit maximum size to 2GiB due to SG table limitations */ + size = min(remaining_size, 2ULL << 30); + +- if (size >= (u64)pages_per_block << PAGE_SHIFT) ++ if ((size >= (u64)pages_per_block << PAGE_SHIFT) && ++ !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) + min_block_size = (u64)pages_per_block << PAGE_SHIFT; + + cur_size = size; +diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +index 074e70a5c458..e507d2e1410b 100644 +--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c ++++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +@@ -516,11 +516,8 @@ static enum bp_result get_gpio_i2c_info( + info->i2c_slave_address = record->i2c_slave_addr; + + /* TODO: check how to get register offset for en, Y, etc. */ +- info->gpio_info.clk_a_register_index = +- le16_to_cpu( +- header->gpio_pin[table_index].data_a_reg_index); +- info->gpio_info.clk_a_shift = +- header->gpio_pin[table_index].gpio_bitshift; ++ info->gpio_info.clk_a_register_index = le16_to_cpu(pin->data_a_reg_index); ++ info->gpio_info.clk_a_shift = pin->gpio_bitshift; + + return BP_RESULT_OK; + } +diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +index 8a0dd0d7134b..481a15b02126 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +@@ -714,7 +714,7 @@ static const struct dc_debug_options debug_defaults_drv = { + .timing_trace = false, + .clock_trace = true, + .disable_pplib_clock_request = true, +- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, ++ .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .vsr_support = true, +diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +index fbcf0afeae0d..ec30d171e7de 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +@@ -642,7 +642,7 @@ static const struct dc_debug_options debug_defaults_drv = { + .clock_trace = true, + .disable_pplib_clock_request = true, + .min_disp_clk_khz = 100000, +- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, ++ .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .vsr_support = true, +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +index 0bcd4fe0ef17..5b7a780cbd54 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +@@ -304,7 +304,8 @@ navi10_get_allowed_feature_mask(struct smu_context *smu, + | FEATURE_MASK(FEATURE_GFX_SS_BIT) + | FEATURE_MASK(FEATURE_APCC_DFLL_BIT) + | FEATURE_MASK(FEATURE_FW_CTF_BIT) +- | FEATURE_MASK(FEATURE_OUT_OF_BAND_MONITOR_BIT); ++ | FEATURE_MASK(FEATURE_OUT_OF_BAND_MONITOR_BIT) ++ | FEATURE_MASK(FEATURE_TEMP_DEPENDENT_VMIN_BIT); + + if (adev->pm.pp_feature & PP_SCLK_DPM_MASK) + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_GFXCLK_BIT); +diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c +index fd22d753b4ed..fcd4bfef7415 100644 +--- a/drivers/gpu/drm/scheduler/sched_main.c ++++ b/drivers/gpu/drm/scheduler/sched_main.c +@@ -308,7 +308,8 @@ static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched) + */ + void drm_sched_fault(struct drm_gpu_scheduler *sched) + { +- mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0); ++ if (sched->ready) ++ mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0); + } + EXPORT_SYMBOL(drm_sched_fault); + diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig index dc6816d36d06..bda249068182 100644 --- a/drivers/leds/trigger/Kconfig @@ -13191,6 +13430,44 @@ index 000000000000..067eedb003b5 +MODULE_DESCRIPTION("Block device LED trigger"); +MODULE_AUTHOR("Ian Pilcher "); +MODULE_LICENSE("GPL v2"); +diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c +index d4b681d7e1d2..f2c6ec4d8e2e 100644 +--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c ++++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c +@@ -162,12 +162,12 @@ mt7921_mac_init_band(struct mt7921_dev *dev, u8 band) + + u8 mt7921_check_offload_capability(struct device *dev, const char *fw_wm) + { +- struct mt7921_fw_features *features = NULL; + const struct mt76_connac2_fw_trailer *hdr; + struct mt7921_realease_info *rel_info; + const struct firmware *fw; + int ret, i, offset = 0; + const u8 *data, *end; ++ u8 offload_caps = 0; + + ret = request_firmware(&fw, fw_wm, dev); + if (ret) +@@ -199,7 +199,10 @@ u8 mt7921_check_offload_capability(struct device *dev, const char *fw_wm) + data += sizeof(*rel_info); + + if (rel_info->tag == MT7921_FW_TAG_FEATURE) { ++ struct mt7921_fw_features *features; ++ + features = (struct mt7921_fw_features *)data; ++ offload_caps = features->data; + break; + } + +@@ -209,7 +212,7 @@ u8 mt7921_check_offload_capability(struct device *dev, const char *fw_wm) + out: + release_firmware(fw); + +- return features ? features->data : 0; ++ return offload_caps; + } + EXPORT_SYMBOL_GPL(mt7921_check_offload_capability); + diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 64659b110973..4cad490028ab 100644 --- a/fs/eventpoll.c @@ -14494,7 +14771,7 @@ index b11b7e5115dc..3033cd6ed3b4 100644 -- 2.40.0 -From 3e1082da2acc077e4c66bf84f225770589ffef86 Mon Sep 17 00:00:00 2001 +From dc7a1604c3a8761a1f980e5dd5f1b2901b775a84 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Thu, 6 Apr 2023 17:34:29 +0200 Subject: [PATCH 07/15] fs-patches @@ -14548,7 +14825,7 @@ Signed-off-by: Peter Jung fs/btrfs/relocation.c | 2 +- fs/btrfs/scrub.c | 2 +- fs/btrfs/send.c | 684 +++++++++-------- - fs/btrfs/super.c | 5 +- + fs/btrfs/super.c | 3 +- fs/btrfs/sysfs.c | 54 +- fs/btrfs/tests/extent-map-tests.c | 2 +- fs/btrfs/transaction.c | 29 + @@ -14601,7 +14878,7 @@ Signed-off-by: Peter Jung include/trace/events/btrfs.h | 127 +++- include/trace/events/ext4.h | 7 - include/uapi/linux/btrfs.h | 12 +- - 100 files changed, 3340 insertions(+), 3789 deletions(-) + 100 files changed, 3338 insertions(+), 3789 deletions(-) create mode 100644 fs/btrfs/lru_cache.c create mode 100644 fs/btrfs/lru_cache.h @@ -16858,7 +17135,7 @@ index 317aeff6c1da..c48abc817ed2 100644 spin_unlock(&discard_ctl->lock); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c -index fde40112a259..b53f0e30ce2b 100644 +index 174d196d6960..9e1596bb208d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -78,23 +78,6 @@ static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info) @@ -17104,7 +17381,7 @@ index fde40112a259..b53f0e30ce2b 100644 static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { -@@ -5162,11 +4979,12 @@ static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info, +@@ -5176,11 +4993,12 @@ static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info, start += fs_info->nodesize; if (!eb) continue; @@ -22284,7 +22561,7 @@ index d50182b6deec..e5c963bb873d 100644 kfree(sctx); } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c -index 433ce221dc5c..dd6d5b6844f1 100644 +index 3f3c8f9186f9..366fb4cde145 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -58,6 +58,7 @@ @@ -22295,16 +22572,7 @@ index 433ce221dc5c..dd6d5b6844f1 100644 #define CREATE_TRACE_POINTS #include -@@ -1630,6 +1631,8 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, - btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size); - btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size); - btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size); -+ workqueue_set_max_active(fs_info->endio_workers, new_pool_size); -+ workqueue_set_max_active(fs_info->endio_meta_workers, new_pool_size); - btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size); - btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size); - btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size); -@@ -2049,7 +2052,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) +@@ -2049,7 +2050,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) } /* @@ -27071,7 +27339,7 @@ index b4f0f9531119..ada0a489bf2b 100644 -- 2.40.0 -From 0ae7f3e01c6d3f6e596eb17315ae6ee6c6e30538 Mon Sep 17 00:00:00 2001 +From 498e79383c15c846059b24391fce696d83ee9f83 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Fri, 10 Mar 2023 18:05:48 +0100 Subject: [PATCH 08/15] Implement amd-pstate-epp and amd-pstate-guided driver @@ -28672,7 +28940,7 @@ index 6a94a6eaad27..65623233ab2f 100644 -- 2.40.0 -From a7b105f93faf08a10fd8461d0eb11f82943c7fe3 Mon Sep 17 00:00:00 2001 +From 0cc7a04fbadb3ea4948f19e025e158f486f84663 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Fri, 17 Mar 2023 17:40:46 +0100 Subject: [PATCH 09/15] ksm @@ -29173,9 +29441,9 @@ index b6ea204d4e23..0064dcafb812 100644 -- 2.40.0 -From 5e1a0be395f154252ed7d0c8212923a7ac0381c4 Mon Sep 17 00:00:00 2001 +From 330db0b8fd28b6036a1dc14bfbad31f64999d8b5 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 13 Apr 2023 18:17:33 +0200 +Date: Thu, 20 Apr 2023 19:38:17 +0200 Subject: [PATCH 10/15] maple-lru Signed-off-by: Peter Jung @@ -29193,30 +29461,33 @@ Signed-off-by: Peter Jung arch/powerpc/kvm/book3s.h | 2 + arch/powerpc/kvm/book3s_64_mmu_radix.c | 78 +- arch/powerpc/kvm/book3s_hv.c | 10 +- + arch/s390/mm/hugetlbpage.c | 2 +- + arch/s390/mm/mmap.c | 2 +- arch/x86/include/asm/kvm_host.h | 27 + arch/x86/kvm/mmu/spte.h | 12 - arch/x86/kvm/mmu/tdp_mmu.c | 41 + + fs/hugetlbfs/inode.c | 2 +- include/linux/fs.h | 2 + include/linux/kvm_host.h | 29 + include/linux/maple_tree.h | 8 +- include/linux/memcontrol.h | 10 + include/linux/mm_inline.h | 19 +- include/linux/mmu_notifier.h | 40 + - include/linux/mmzone.h | 130 ++- - lib/maple_tree.c | 103 +- + include/linux/mmzone.h | 138 ++- + lib/maple_tree.c | 191 ++-- mm/fadvise.c | 5 +- mm/memcontrol.c | 12 + mm/memory.c | 7 +- - mm/mmap.c | 16 +- + mm/mmap.c | 73 +- mm/mmu_notifier.c | 26 + mm/nommu.c | 8 +- mm/page_alloc.c | 1 + mm/rmap.c | 48 +- - mm/vmscan.c | 1210 +++++++++++++++-------- + mm/vmscan.c | 1316 +++++++++++++++-------- mm/workingset.c | 4 +- tools/testing/radix-tree/maple.c | 56 +- - virt/kvm/kvm_main.c | 58 ++ - 36 files changed, 1668 insertions(+), 648 deletions(-) + virt/kvm/kvm_main.c | 58 + + 39 files changed, 1805 insertions(+), 776 deletions(-) diff --git a/Documentation/mm/multigen_lru.rst b/Documentation/mm/multigen_lru.rst index d7062c6a8946..52ed5092022f 100644 @@ -29489,7 +29760,7 @@ index c8dca8ae359c..350437661d4b 100644 + #endif /* __ARM64_S2_PGTABLE_H_ */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c -index 9c5573bc4614..6770bc47f5c9 100644 +index e57f8ae09387..0b71117ffc7e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -191,6 +191,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) @@ -29911,6 +30182,32 @@ index 6ba68dd6190b..17b415661282 100644 static int kvmppc_book3s_init_hv(void) { int r; +diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c +index c299a18273ff..c718f2a0de94 100644 +--- a/arch/s390/mm/hugetlbpage.c ++++ b/arch/s390/mm/hugetlbpage.c +@@ -273,7 +273,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; +- info.low_limit = max(PAGE_SIZE, mmap_min_addr); ++ info.low_limit = PAGE_SIZE; + info.high_limit = current->mm->mmap_base; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; +diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c +index 3327c47bc181..fc9a7dc26c5e 100644 +--- a/arch/s390/mm/mmap.c ++++ b/arch/s390/mm/mmap.c +@@ -136,7 +136,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long ad + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; +- info.low_limit = max(PAGE_SIZE, mmap_min_addr); ++ info.low_limit = PAGE_SIZE; + info.high_limit = mm->mmap_base; + if (filp || (flags & MAP_SHARED)) + info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 24480b4f1c57..a076e337b3db 100644 --- a/arch/x86/include/asm/kvm_host.h @@ -30036,6 +30333,19 @@ index d6df38d371a0..9028e09f1aab 100644 static bool test_age_gfn(struct kvm *kvm, struct tdp_iter *iter, struct kvm_gfn_range *range) { +diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c +index 790d2727141a..07297fac7de8 100644 +--- a/fs/hugetlbfs/inode.c ++++ b/fs/hugetlbfs/inode.c +@@ -208,7 +208,7 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr, + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; +- info.low_limit = max(PAGE_SIZE, mmap_min_addr); ++ info.low_limit = PAGE_SIZE; + info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base); + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index c1769a2c5d70..d353c262d669 100644 --- a/include/linux/fs.h @@ -30285,7 +30595,7 @@ index d6c06e140277..521f71ad0467 100644 unsigned long address) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h -index cd28a100d9e4..0ddbf712708d 100644 +index cd28a100d9e4..5eeca358d043 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -7,6 +7,7 @@ @@ -30357,7 +30667,26 @@ index cd28a100d9e4..0ddbf712708d 100644 }; enum { -@@ -461,7 +471,7 @@ struct lru_gen_mm_state { +@@ -444,24 +454,20 @@ enum { + struct lru_gen_mm_state { + /* set to max_seq after each iteration */ + unsigned long seq; +- /* where the current iteration continues (inclusive) */ ++ /* where the current iteration continues after */ + struct list_head *head; +- /* where the last iteration ended (exclusive) */ ++ /* where the last iteration ended before */ + struct list_head *tail; +- /* to wait for the last page table walker to finish */ +- struct wait_queue_head wait; + /* Bloom filters flip after each iteration */ + unsigned long *filters[NR_BLOOM_FILTERS]; + /* the mm stats for debugging */ + unsigned long stats[NR_HIST_GENS][NR_MM_STATS]; +- /* the number of concurrent page table walkers */ +- int nr_walkers; + }; + struct lru_gen_mm_walk { /* the lruvec under reclaim */ struct lruvec *lruvec; @@ -30366,7 +30695,7 @@ index cd28a100d9e4..0ddbf712708d 100644 unsigned long max_seq; /* the next address within an mm to scan */ unsigned long next_addr; -@@ -476,24 +486,101 @@ struct lru_gen_mm_walk { +@@ -476,24 +482,101 @@ struct lru_gen_mm_walk { }; void lru_gen_init_lruvec(struct lruvec *lruvec); @@ -30471,7 +30800,7 @@ index cd28a100d9e4..0ddbf712708d 100644 static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) { } -@@ -501,7 +588,24 @@ static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) +@@ -501,7 +584,24 @@ static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg) { } @@ -30497,7 +30826,7 @@ index cd28a100d9e4..0ddbf712708d 100644 #endif /* CONFIG_LRU_GEN */ -@@ -524,7 +628,7 @@ struct lruvec { +@@ -524,7 +624,7 @@ struct lruvec { unsigned long flags; #ifdef CONFIG_LRU_GEN /* evictable pages divided into generations */ @@ -30506,7 +30835,7 @@ index cd28a100d9e4..0ddbf712708d 100644 /* to concurrently iterate lru_gen_mm_list */ struct lru_gen_mm_state mm_state; #endif -@@ -1242,7 +1346,9 @@ typedef struct pglist_data { +@@ -1242,7 +1342,9 @@ typedef struct pglist_data { #ifdef CONFIG_LRU_GEN /* kswap mm walk data */ @@ -30518,7 +30847,7 @@ index cd28a100d9e4..0ddbf712708d 100644 CACHELINE_PADDING(_pad2_); diff --git a/lib/maple_tree.c b/lib/maple_tree.c -index fb452873914f..c167efc70e60 100644 +index 022573f49957..110a36479dce 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -146,6 +146,13 @@ struct maple_subtree_state { @@ -30608,7 +30937,7 @@ index fb452873914f..c167efc70e60 100644 } /* -@@ -1950,10 +1948,9 @@ static inline int mab_calc_split(struct ma_state *mas, +@@ -1952,10 +1950,9 @@ static inline int mab_calc_split(struct ma_state *mas, /* Avoid ending a node on a NULL entry */ split = mab_no_null_split(bn, split, slot_count); @@ -30621,7 +30950,7 @@ index fb452873914f..c167efc70e60 100644 return split; } -@@ -2176,7 +2173,7 @@ static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end, +@@ -2178,7 +2175,7 @@ static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end, * * Return: The actual end of the data stored in @b_node */ @@ -30630,7 +30959,7 @@ index fb452873914f..c167efc70e60 100644 struct maple_big_node *b_node, unsigned char offset_end) { unsigned char slot; -@@ -2313,9 +2310,7 @@ static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode) +@@ -2315,9 +2312,7 @@ static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode) static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) { struct ma_state *mas = wr_mas->mas; @@ -30641,14 +30970,14 @@ index fb452873914f..c167efc70e60 100644 if (unlikely(ma_is_dense(wr_mas->type))) { wr_mas->r_max = wr_mas->r_min = mas->index; -@@ -2328,34 +2323,12 @@ static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) +@@ -2330,34 +2325,12 @@ static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) count = wr_mas->node_end = ma_data_end(wr_mas->node, wr_mas->type, wr_mas->pivots, mas->max); offset = mas->offset; - min = mas_safe_min(mas, wr_mas->pivots, offset); - if (unlikely(offset == count)) - goto max; - +- - max = wr_mas->pivots[offset]; - index = mas->index; - if (unlikely(index <= max)) @@ -30664,12 +30993,12 @@ index fb452873914f..c167efc70e60 100644 - goto done; - else if (unlikely(!max)) - break; -+ while (offset < count && mas->index > wr_mas->pivots[offset]) -+ offset++; - min = max + 1; - } -- ++ while (offset < count && mas->index > wr_mas->pivots[offset]) ++ offset++; + -max: - max = mas->max; -done: @@ -30680,7 +31009,7 @@ index fb452873914f..c167efc70e60 100644 wr_mas->offset_end = mas->offset = offset; } -@@ -3010,7 +2983,7 @@ static inline void *mtree_range_walk(struct ma_state *mas) +@@ -3012,7 +2985,7 @@ static inline void *mtree_range_walk(struct ma_state *mas) mas->min = prev_min; mas->max = prev_max; mas->node = last; @@ -30689,7 +31018,7 @@ index fb452873914f..c167efc70e60 100644 dead_node: mas_reset(mas); -@@ -3283,7 +3256,7 @@ static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end +@@ -3285,7 +3258,7 @@ static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end if (tmp < max_p) memset(pivs + tmp, 0, @@ -30698,7 +31027,7 @@ index fb452873914f..c167efc70e60 100644 if (tmp < mt_slots[mt]) memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp)); -@@ -3530,7 +3503,6 @@ static inline bool mas_push_data(struct ma_state *mas, int height, +@@ -3532,7 +3505,6 @@ static inline bool mas_push_data(struct ma_state *mas, int height, */ static int mas_split(struct ma_state *mas, struct maple_big_node *b_node) { @@ -30706,7 +31035,7 @@ index fb452873914f..c167efc70e60 100644 struct maple_subtree_state mast; int height = 0; unsigned char mid_split, split = 0; -@@ -3649,7 +3621,7 @@ static inline bool mas_reuse_node(struct ma_wr_state *wr_mas, +@@ -3651,7 +3623,7 @@ static inline bool mas_reuse_node(struct ma_wr_state *wr_mas, * @b_node: The maple big node * @end: The end of the data. */ @@ -30715,7 +31044,7 @@ index fb452873914f..c167efc70e60 100644 struct maple_big_node *b_node, unsigned char end) { struct maple_node *node; -@@ -3950,7 +3922,7 @@ static inline void *mtree_lookup_walk(struct ma_state *mas) +@@ -3952,7 +3924,7 @@ static inline void *mtree_lookup_walk(struct ma_state *mas) goto dead_node; } while (!ma_is_leaf(type)); @@ -30724,7 +31053,7 @@ index fb452873914f..c167efc70e60 100644 dead_node: mas_reset(mas); -@@ -4785,15 +4757,11 @@ static inline void *mas_next_nentry(struct ma_state *mas, +@@ -4788,15 +4760,11 @@ static inline void *mas_next_nentry(struct ma_state *mas, static inline void mas_rewalk(struct ma_state *mas, unsigned long index) { @@ -30740,7 +31069,179 @@ index fb452873914f..c167efc70e60 100644 } /* -@@ -5675,8 +5643,8 @@ static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, +@@ -4973,7 +4941,8 @@ static inline void *mas_prev_entry(struct ma_state *mas, unsigned long min) + * Return: True if found in a leaf, false otherwise. + * + */ +-static bool mas_rev_awalk(struct ma_state *mas, unsigned long size) ++static bool mas_rev_awalk(struct ma_state *mas, unsigned long size, ++ unsigned long *gap_min, unsigned long *gap_max) + { + enum maple_type type = mte_node_type(mas->node); + struct maple_node *node = mas_mn(mas); +@@ -5038,8 +5007,8 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size) + + if (unlikely(ma_is_leaf(type))) { + mas->offset = offset; +- mas->min = min; +- mas->max = min + gap - 1; ++ *gap_min = min; ++ *gap_max = min + gap - 1; + return true; + } + +@@ -5063,10 +5032,10 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) + { + enum maple_type type = mte_node_type(mas->node); + unsigned long pivot, min, gap = 0; +- unsigned char offset; +- unsigned long *gaps; +- unsigned long *pivots = ma_pivots(mas_mn(mas), type); +- void __rcu **slots = ma_slots(mas_mn(mas), type); ++ unsigned char offset, data_end; ++ unsigned long *gaps, *pivots; ++ void __rcu **slots; ++ struct maple_node *node; + bool found = false; + + if (ma_is_dense(type)) { +@@ -5074,13 +5043,15 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) + return true; + } + +- gaps = ma_gaps(mte_to_node(mas->node), type); ++ node = mas_mn(mas); ++ pivots = ma_pivots(node, type); ++ slots = ma_slots(node, type); ++ gaps = ma_gaps(node, type); + offset = mas->offset; + min = mas_safe_min(mas, pivots, offset); +- for (; offset < mt_slots[type]; offset++) { +- pivot = mas_safe_pivot(mas, pivots, offset, type); +- if (offset && !pivot) +- break; ++ data_end = ma_data_end(node, type, pivots, mas->max); ++ for (; offset <= data_end; offset++) { ++ pivot = mas_logical_pivot(mas, pivots, offset, type); + + /* Not within lower bounds */ + if (mas->index > pivot) +@@ -5279,25 +5250,28 @@ static inline void mas_fill_gap(struct ma_state *mas, void *entry, + * @size: The size of the gap + * @fwd: Searching forward or back + */ +-static inline void mas_sparse_area(struct ma_state *mas, unsigned long min, ++static inline int mas_sparse_area(struct ma_state *mas, unsigned long min, + unsigned long max, unsigned long size, bool fwd) + { +- unsigned long start = 0; +- +- if (!unlikely(mas_is_none(mas))) +- start++; ++ if (!unlikely(mas_is_none(mas)) && min == 0) { ++ min++; ++ /* ++ * At this time, min is increased, we need to recheck whether ++ * the size is satisfied. ++ */ ++ if (min > max || max - min + 1 < size) ++ return -EBUSY; ++ } + /* mas_is_ptr */ + +- if (start < min) +- start = min; +- + if (fwd) { +- mas->index = start; +- mas->last = start + size - 1; +- return; ++ mas->index = min; ++ mas->last = min + size - 1; ++ } else { ++ mas->last = max; ++ mas->index = max - size + 1; + } +- +- mas->index = max; ++ return 0; + } + + /* +@@ -5315,6 +5289,9 @@ int mas_empty_area(struct ma_state *mas, unsigned long min, + unsigned long *pivots; + enum maple_type mt; + ++ if (min >= max) ++ return -EINVAL; ++ + if (mas_is_start(mas)) + mas_start(mas); + else if (mas->offset >= 2) +@@ -5323,10 +5300,8 @@ int mas_empty_area(struct ma_state *mas, unsigned long min, + return -EBUSY; + + /* Empty set */ +- if (mas_is_none(mas) || mas_is_ptr(mas)) { +- mas_sparse_area(mas, min, max, size, true); +- return 0; +- } ++ if (mas_is_none(mas) || mas_is_ptr(mas)) ++ return mas_sparse_area(mas, min, max, size, true); + + /* The start of the window can only be within these values */ + mas->index = min; +@@ -5369,6 +5344,9 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min, + { + struct maple_enode *last = mas->node; + ++ if (min >= max) ++ return -EINVAL; ++ + if (mas_is_start(mas)) { + mas_start(mas); + mas->offset = mas_data_end(mas); +@@ -5379,16 +5357,14 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min, + } + + /* Empty set. */ +- if (mas_is_none(mas) || mas_is_ptr(mas)) { +- mas_sparse_area(mas, min, max, size, false); +- return 0; +- } ++ if (mas_is_none(mas) || mas_is_ptr(mas)) ++ return mas_sparse_area(mas, min, max, size, false); + + /* The start of the window can only be within these values. */ + mas->index = min; + mas->last = max; + +- while (!mas_rev_awalk(mas, size)) { ++ while (!mas_rev_awalk(mas, size, &min, &max)) { + if (last == mas->node) { + if (!mas_rewind_node(mas)) + return -EBUSY; +@@ -5403,17 +5379,9 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min, + if (unlikely(mas->offset == MAPLE_NODE_SLOTS)) + return -EBUSY; + +- /* +- * mas_rev_awalk() has set mas->min and mas->max to the gap values. If +- * the maximum is outside the window we are searching, then use the last +- * location in the search. +- * mas->max and mas->min is the range of the gap. +- * mas->index and mas->last are currently set to the search range. +- */ +- + /* Trim the upper limit to the max. */ +- if (mas->max <= mas->last) +- mas->last = mas->max; ++ if (max <= mas->last) ++ mas->last = max; + + mas->index = mas->last - size + 1; + return 0; +@@ -5678,8 +5646,8 @@ static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, /* * mte_destroy_walk() - Free a tree or sub-tree. @@ -30751,7 +31252,7 @@ index fb452873914f..c167efc70e60 100644 * * Must hold the write lock. */ -@@ -5708,7 +5676,6 @@ static void mas_wr_store_setup(struct ma_wr_state *wr_mas) +@@ -5711,7 +5679,6 @@ static void mas_wr_store_setup(struct ma_wr_state *wr_mas) mas_reset(wr_mas->mas); } } @@ -30759,7 +31260,7 @@ index fb452873914f..c167efc70e60 100644 } /* Interface */ -@@ -5800,12 +5767,11 @@ EXPORT_SYMBOL_GPL(mas_store_prealloc); +@@ -5803,12 +5770,11 @@ EXPORT_SYMBOL_GPL(mas_store_prealloc); /** * mas_preallocate() - Preallocate enough nodes for a store operation * @mas: The maple state @@ -30773,7 +31274,7 @@ index fb452873914f..c167efc70e60 100644 { int ret; -@@ -5821,6 +5787,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) +@@ -5824,6 +5790,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) mas_reset(mas); return ret; } @@ -30781,7 +31282,7 @@ index fb452873914f..c167efc70e60 100644 /* * mas_destroy() - destroy a maple state. -@@ -6833,7 +6800,7 @@ static void mt_dump_range64(const struct maple_tree *mt, void *entry, +@@ -6836,7 +6803,7 @@ static void mt_dump_range64(const struct maple_tree *mt, void *entry, if (i < (MAPLE_RANGE64_SLOTS - 1)) last = node->pivot[i]; @@ -30790,7 +31291,7 @@ index fb452873914f..c167efc70e60 100644 break; if (last == 0 && i > 0) break; -@@ -6940,7 +6907,7 @@ void mt_dump(const struct maple_tree *mt) +@@ -6943,7 +6910,7 @@ void mt_dump(const struct maple_tree *mt) if (!xa_is_node(entry)) mt_dump_entry(entry, 0, 0, 0); else if (entry) @@ -30899,7 +31400,7 @@ index 6a99e9dc07e6..8a26ee4dc4d4 100644 static void lru_gen_exit_fault(void) diff --git a/mm/mmap.c b/mm/mmap.c -index 1931da077b2f..c1a09b21a22a 100644 +index 1931da077b2f..b7380077336d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -472,7 +472,7 @@ static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma) @@ -30929,7 +31430,107 @@ index 1931da077b2f..c1a09b21a22a 100644 return -ENOMEM; vma_adjust_trans_huge(orig_vma, start, end, adjust_next); -@@ -1938,7 +1938,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) +@@ -1566,6 +1566,8 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags) + static unsigned long unmapped_area(struct vm_unmapped_area_info *info) + { + unsigned long length, gap; ++ unsigned long low_limit, high_limit; ++ struct vm_area_struct *tmp; + + MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); + +@@ -1574,12 +1576,32 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info) + if (length < info->length) + return -ENOMEM; + +- if (mas_empty_area(&mas, info->low_limit, info->high_limit - 1, +- length)) ++ low_limit = info->low_limit; ++ if (low_limit < mmap_min_addr) ++ low_limit = mmap_min_addr; ++ high_limit = info->high_limit; ++retry: ++ if (mas_empty_area(&mas, low_limit, high_limit - 1, length)) + return -ENOMEM; + + gap = mas.index; + gap += (info->align_offset - gap) & info->align_mask; ++ tmp = mas_next(&mas, ULONG_MAX); ++ if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */ ++ if (vm_start_gap(tmp) < gap + length - 1) { ++ low_limit = tmp->vm_end; ++ mas_reset(&mas); ++ goto retry; ++ } ++ } else { ++ tmp = mas_prev(&mas, 0); ++ if (tmp && vm_end_gap(tmp) > gap) { ++ low_limit = vm_end_gap(tmp); ++ mas_reset(&mas); ++ goto retry; ++ } ++ } ++ + return gap; + } + +@@ -1595,7 +1617,9 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info) + */ + static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) + { +- unsigned long length, gap; ++ unsigned long length, gap, gap_end; ++ unsigned long low_limit, high_limit; ++ struct vm_area_struct *tmp; + + MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); + /* Adjust search length to account for worst case alignment overhead */ +@@ -1603,12 +1627,33 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) + if (length < info->length) + return -ENOMEM; + +- if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, +- length)) ++ low_limit = info->low_limit; ++ if (low_limit < mmap_min_addr) ++ low_limit = mmap_min_addr; ++ high_limit = info->high_limit; ++retry: ++ if (mas_empty_area_rev(&mas, low_limit, high_limit - 1, length)) + return -ENOMEM; + + gap = mas.last + 1 - info->length; + gap -= (gap - info->align_offset) & info->align_mask; ++ gap_end = mas.last; ++ tmp = mas_next(&mas, ULONG_MAX); ++ if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */ ++ if (vm_start_gap(tmp) <= gap_end) { ++ high_limit = vm_start_gap(tmp); ++ mas_reset(&mas); ++ goto retry; ++ } ++ } else { ++ tmp = mas_prev(&mas, 0); ++ if (tmp && vm_end_gap(tmp) > gap) { ++ high_limit = tmp->vm_start; ++ mas_reset(&mas); ++ goto retry; ++ } ++ } ++ + return gap; + } + +@@ -1722,7 +1767,7 @@ generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr, + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; +- info.low_limit = max(PAGE_SIZE, mmap_min_addr); ++ info.low_limit = PAGE_SIZE; + info.high_limit = arch_get_mmap_base(addr, mm->mmap_base); + info.align_mask = 0; + info.align_offset = 0; +@@ -1938,7 +1983,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) /* Check that both stack segments have the same anon_vma? */ } @@ -30938,7 +31539,7 @@ index 1931da077b2f..c1a09b21a22a 100644 return -ENOMEM; /* We must make sure the anon_vma is allocated. */ -@@ -2019,7 +2019,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address) +@@ -2019,7 +2064,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address) return -ENOMEM; } @@ -30947,7 +31548,7 @@ index 1931da077b2f..c1a09b21a22a 100644 return -ENOMEM; /* We must make sure the anon_vma is allocated. */ -@@ -2311,7 +2311,7 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma, +@@ -2311,7 +2356,7 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma, mt_init_flags(&mt_detach, mas->tree->ma_flags & MT_FLAGS_LOCK_MASK); mt_set_external_lock(&mt_detach, &mm->mmap_lock); @@ -30956,7 +31557,7 @@ index 1931da077b2f..c1a09b21a22a 100644 return -ENOMEM; mas->last = end - 1; -@@ -2680,7 +2680,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, +@@ -2680,7 +2725,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, goto free_vma; } @@ -30965,7 +31566,7 @@ index 1931da077b2f..c1a09b21a22a 100644 error = -ENOMEM; if (file) goto close_and_free_vma; -@@ -2953,7 +2953,7 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma, +@@ -2953,7 +2998,7 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma, can_vma_merge_after(vma, flags, NULL, NULL, addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) { mas_set_range(mas, vma->vm_start, addr + len - 1); @@ -31146,7 +31747,7 @@ index 3b45d049069e..8ecbbadab752 100644 *vm_flags = pra.vm_flags; diff --git a/mm/vmscan.c b/mm/vmscan.c -index 160acbbdf111..ec0142165ce7 100644 +index 160acbbdf111..4af7fd442b4a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -55,6 +55,10 @@ @@ -31323,7 +31924,31 @@ index 160acbbdf111..ec0142165ce7 100644 /****************************************************************************** * mm_struct list ******************************************************************************/ -@@ -3348,94 +3454,6 @@ void lru_gen_migrate_mm(struct mm_struct *mm) +@@ -3294,18 +3400,13 @@ void lru_gen_del_mm(struct mm_struct *mm) + for_each_node(nid) { + struct lruvec *lruvec = get_lruvec(memcg, nid); + +- /* where the last iteration ended (exclusive) */ ++ /* where the current iteration continues after */ ++ if (lruvec->mm_state.head == &mm->lru_gen.list) ++ lruvec->mm_state.head = lruvec->mm_state.head->prev; ++ ++ /* where the last iteration ended before */ + if (lruvec->mm_state.tail == &mm->lru_gen.list) + lruvec->mm_state.tail = lruvec->mm_state.tail->next; +- +- /* where the current iteration continues (inclusive) */ +- if (lruvec->mm_state.head != &mm->lru_gen.list) +- continue; +- +- lruvec->mm_state.head = lruvec->mm_state.head->next; +- /* the deletion ends the current iteration */ +- if (lruvec->mm_state.head == &mm_list->fifo) +- WRITE_ONCE(lruvec->mm_state.seq, lruvec->mm_state.seq + 1); + } + + list_del_init(&mm->lru_gen.list); +@@ -3348,94 +3449,6 @@ void lru_gen_migrate_mm(struct mm_struct *mm) } #endif @@ -31418,7 +32043,112 @@ index 160acbbdf111..ec0142165ce7 100644 static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, bool last) { int i; -@@ -3592,7 +3610,7 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq) +@@ -3489,68 +3502,54 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, + struct mm_struct **iter) + { + bool first = false; +- bool last = true; ++ bool last = false; + struct mm_struct *mm = NULL; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + struct lru_gen_mm_list *mm_list = get_mm_list(memcg); + struct lru_gen_mm_state *mm_state = &lruvec->mm_state; + + /* +- * There are four interesting cases for this page table walker: +- * 1. It tries to start a new iteration of mm_list with a stale max_seq; +- * there is nothing left to do. +- * 2. It's the first of the current generation, and it needs to reset +- * the Bloom filter for the next generation. +- * 3. It reaches the end of mm_list, and it needs to increment +- * mm_state->seq; the iteration is done. +- * 4. It's the last of the current generation, and it needs to reset the +- * mm stats counters for the next generation. ++ * mm_state->seq is incremented after each iteration of mm_list. There ++ * are three interesting cases for this page table walker: ++ * 1. It tries to start a new iteration with a stale max_seq: there is ++ * nothing left to do. ++ * 2. It started the next iteration: it needs to reset the Bloom filter ++ * so that a fresh set of PTE tables can be recorded. ++ * 3. It ended the current iteration: it needs to reset the mm stats ++ * counters and tell its caller to increment max_seq. + */ + spin_lock(&mm_list->lock); + + VM_WARN_ON_ONCE(mm_state->seq + 1 < walk->max_seq); +- VM_WARN_ON_ONCE(*iter && mm_state->seq > walk->max_seq); +- VM_WARN_ON_ONCE(*iter && !mm_state->nr_walkers); + +- if (walk->max_seq <= mm_state->seq) { +- if (!*iter) +- last = false; ++ if (walk->max_seq <= mm_state->seq) + goto done; +- } + +- if (!mm_state->nr_walkers) { +- VM_WARN_ON_ONCE(mm_state->head && mm_state->head != &mm_list->fifo); ++ if (!mm_state->head) ++ mm_state->head = &mm_list->fifo; + +- mm_state->head = mm_list->fifo.next; ++ if (mm_state->head == &mm_list->fifo) + first = true; +- } +- +- while (!mm && mm_state->head != &mm_list->fifo) { +- mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list); + ++ do { + mm_state->head = mm_state->head->next; ++ if (mm_state->head == &mm_list->fifo) { ++ WRITE_ONCE(mm_state->seq, mm_state->seq + 1); ++ last = true; ++ break; ++ } + + /* force scan for those added after the last iteration */ +- if (!mm_state->tail || mm_state->tail == &mm->lru_gen.list) { +- mm_state->tail = mm_state->head; ++ if (!mm_state->tail || mm_state->tail == mm_state->head) { ++ mm_state->tail = mm_state->head->next; + walk->force_scan = true; + } + ++ mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list); + if (should_skip_mm(mm, walk)) + mm = NULL; +- } +- +- if (mm_state->head == &mm_list->fifo) +- WRITE_ONCE(mm_state->seq, mm_state->seq + 1); ++ } while (!mm); + done: +- if (*iter && !mm) +- mm_state->nr_walkers--; +- if (!*iter && mm) +- mm_state->nr_walkers++; +- +- if (mm_state->nr_walkers) +- last = false; +- + if (*iter || last) + reset_mm_stats(lruvec, walk, last); + +@@ -3578,9 +3577,9 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq) + + VM_WARN_ON_ONCE(mm_state->seq + 1 < max_seq); + +- if (max_seq > mm_state->seq && !mm_state->nr_walkers) { +- VM_WARN_ON_ONCE(mm_state->head && mm_state->head != &mm_list->fifo); +- ++ if (max_seq > mm_state->seq) { ++ mm_state->head = NULL; ++ mm_state->tail = NULL; + WRITE_ONCE(mm_state->seq, mm_state->seq + 1); + reset_mm_stats(lruvec, NULL, true); + success = true; +@@ -3592,7 +3591,7 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq) } /****************************************************************************** @@ -31427,7 +32157,7 @@ index 160acbbdf111..ec0142165ce7 100644 ******************************************************************************/ /* -@@ -3623,7 +3641,7 @@ struct ctrl_pos { +@@ -3623,7 +3622,7 @@ struct ctrl_pos { static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain, struct ctrl_pos *pos) { @@ -31436,7 +32166,7 @@ index 160acbbdf111..ec0142165ce7 100644 int hist = lru_hist_from_seq(lrugen->min_seq[type]); pos->refaulted = lrugen->avg_refaulted[type][tier] + -@@ -3638,7 +3656,7 @@ static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain, +@@ -3638,7 +3637,7 @@ static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain, static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover) { int hist, tier; @@ -31445,7 +32175,7 @@ index 160acbbdf111..ec0142165ce7 100644 bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1; unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1; -@@ -3715,7 +3733,7 @@ static int folio_update_gen(struct folio *folio, int gen) +@@ -3715,7 +3714,7 @@ static int folio_update_gen(struct folio *folio, int gen) static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming) { int type = folio_is_file_lru(folio); @@ -31454,7 +32184,7 @@ index 160acbbdf111..ec0142165ce7 100644 int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); unsigned long new_flags, old_flags = READ_ONCE(folio->flags); -@@ -3760,7 +3778,7 @@ static void update_batch_size(struct lru_gen_mm_walk *walk, struct folio *folio, +@@ -3760,7 +3759,7 @@ static void update_batch_size(struct lru_gen_mm_walk *walk, struct folio *folio, static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk) { int gen, type, zone; @@ -31463,7 +32193,7 @@ index 160acbbdf111..ec0142165ce7 100644 walk->batched = 0; -@@ -3793,7 +3811,10 @@ static int should_skip_vma(unsigned long start, unsigned long end, struct mm_wal +@@ -3793,7 +3792,10 @@ static int should_skip_vma(unsigned long start, unsigned long end, struct mm_wal if (is_vm_hugetlb_page(vma)) return true; @@ -31475,7 +32205,7 @@ index 160acbbdf111..ec0142165ce7 100644 return true; if (vma == get_gate_vma(vma->vm_mm)) -@@ -3908,6 +3929,55 @@ static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg, +@@ -3908,6 +3910,55 @@ static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg, return folio; } @@ -31531,7 +32261,7 @@ index 160acbbdf111..ec0142165ce7 100644 static bool suitable_to_scan(int total, int young) { int n = clamp_t(int, cache_line_size() / sizeof(pte_t), 2, 8); -@@ -3923,6 +3993,8 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, +@@ -3923,6 +3974,8 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, pte_t *pte; spinlock_t *ptl; unsigned long addr; @@ -31540,7 +32270,7 @@ index 160acbbdf111..ec0142165ce7 100644 int total = 0; int young = 0; struct lru_gen_mm_walk *walk = args->private; -@@ -3941,6 +4013,7 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, +@@ -3941,6 +3994,7 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, pte = pte_offset_map(pmd, start & PMD_MASK); restart: for (i = pte_index(start), addr = start; addr != end; i++, addr += PAGE_SIZE) { @@ -31548,7 +32278,7 @@ index 160acbbdf111..ec0142165ce7 100644 unsigned long pfn; struct folio *folio; -@@ -3948,20 +4021,27 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, +@@ -3948,20 +4002,27 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, walk->mm_stats[MM_LEAF_TOTAL]++; pfn = get_pte_pfn(pte[i], args->vma, addr); @@ -31581,7 +32311,7 @@ index 160acbbdf111..ec0142165ce7 100644 young++; walk->mm_stats[MM_LEAF_YOUNG]++; -@@ -3988,8 +4068,8 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, +@@ -3988,8 +4049,8 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, } #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) @@ -31592,7 +32322,7 @@ index 160acbbdf111..ec0142165ce7 100644 { int i; pmd_t *pmd; -@@ -4002,18 +4082,19 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area +@@ -4002,18 +4063,19 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area VM_WARN_ON_ONCE(pud_leaf(*pud)); /* try to batch at most 1+MIN_LRU_BATCH+1 entries */ @@ -31616,7 +32346,7 @@ index 160acbbdf111..ec0142165ce7 100644 ptl = pmd_lockptr(args->mm, pmd); if (!spin_trylock(ptl)) -@@ -4024,15 +4105,16 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area +@@ -4024,15 +4086,16 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area do { unsigned long pfn; struct folio *folio; @@ -31636,7 +32366,7 @@ index 160acbbdf111..ec0142165ce7 100644 pmdp_test_and_clear_young(vma, addr, pmd + i); goto next; } -@@ -4061,12 +4143,11 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area +@@ -4061,12 +4124,11 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area arch_leave_lazy_mmu_mode(); spin_unlock(ptl); done: @@ -31652,7 +32382,7 @@ index 160acbbdf111..ec0142165ce7 100644 { } #endif -@@ -4079,9 +4160,9 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, +@@ -4079,9 +4141,9 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, unsigned long next; unsigned long addr; struct vm_area_struct *vma; @@ -31664,7 +32394,7 @@ index 160acbbdf111..ec0142165ce7 100644 VM_WARN_ON_ONCE(pud_leaf(*pud)); -@@ -4120,18 +4201,17 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, +@@ -4120,18 +4182,17 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) continue; @@ -31686,7 +32416,7 @@ index 160acbbdf111..ec0142165ce7 100644 } if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i)) -@@ -4148,7 +4228,7 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, +@@ -4148,7 +4209,7 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, update_bloom_filter(walk->lruvec, walk->max_seq + 1, pmd + i); } @@ -31695,7 +32425,33 @@ index 160acbbdf111..ec0142165ce7 100644 if (i < PTRS_PER_PMD && get_next_vma(PUD_MASK, PMD_SIZE, args, &start, &end)) goto restart; -@@ -4238,7 +4318,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_ +@@ -4177,10 +4238,6 @@ static int walk_pud_range(p4d_t *p4d, unsigned long start, unsigned long end, + + walk_pmd_range(&val, addr, next, args); + +- /* a racy check to curtail the waiting time */ +- if (wq_has_sleeper(&walk->lruvec->mm_state.wait)) +- return 1; +- + if (need_resched() || walk->batched >= MAX_LRU_BATCH) { + end = (addr | ~PUD_MASK) + 1; + goto done; +@@ -4213,8 +4270,14 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_ + walk->next_addr = FIRST_USER_ADDRESS; + + do { ++ DEFINE_MAX_SEQ(lruvec); ++ + err = -EBUSY; + ++ /* another thread might have called inc_max_seq() */ ++ if (walk->max_seq != max_seq) ++ break; ++ + /* folio_update_gen() requires stable folio_memcg() */ + if (!mem_cgroup_trylock_pages(memcg)) + break; +@@ -4238,7 +4301,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_ } while (err == -EAGAIN); } @@ -31704,7 +32460,7 @@ index 160acbbdf111..ec0142165ce7 100644 { struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk; -@@ -4246,7 +4326,7 @@ static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat) +@@ -4246,7 +4309,7 @@ static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat) VM_WARN_ON_ONCE(walk); walk = &pgdat->mm_walk; @@ -31713,7 +32469,7 @@ index 160acbbdf111..ec0142165ce7 100644 VM_WARN_ON_ONCE(current_is_kswapd()); walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN); -@@ -4274,7 +4354,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) +@@ -4274,7 +4337,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) { int zone; int remaining = MAX_LRU_BATCH; @@ -31722,7 +32478,7 @@ index 160acbbdf111..ec0142165ce7 100644 int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); if (type == LRU_GEN_ANON && !can_swap) -@@ -4282,7 +4362,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) +@@ -4282,7 +4345,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) /* prevent cold/hot inversion if force_scan is true */ for (zone = 0; zone < MAX_NR_ZONES; zone++) { @@ -31731,7 +32487,7 @@ index 160acbbdf111..ec0142165ce7 100644 while (!list_empty(head)) { struct folio *folio = lru_to_folio(head); -@@ -4293,7 +4373,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) +@@ -4293,7 +4356,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio); new_gen = folio_inc_gen(lruvec, folio, false); @@ -31740,7 +32496,7 @@ index 160acbbdf111..ec0142165ce7 100644 if (!--remaining) return false; -@@ -4310,7 +4390,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) +@@ -4310,7 +4373,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) { int gen, type, zone; bool success = false; @@ -31749,7 +32505,7 @@ index 160acbbdf111..ec0142165ce7 100644 DEFINE_MIN_SEQ(lruvec); VM_WARN_ON_ONCE(!seq_is_valid(lruvec)); -@@ -4321,7 +4401,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) +@@ -4321,7 +4384,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) gen = lru_gen_from_seq(min_seq[type]); for (zone = 0; zone < MAX_NR_ZONES; zone++) { @@ -31758,7 +32514,7 @@ index 160acbbdf111..ec0142165ce7 100644 goto next; } -@@ -4331,7 +4411,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) +@@ -4331,7 +4394,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) ; } @@ -31767,7 +32523,7 @@ index 160acbbdf111..ec0142165ce7 100644 if (can_swap) { min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]); min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]); -@@ -4353,7 +4433,7 @@ static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan) +@@ -4353,7 +4416,7 @@ static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan) { int prev, next; int type, zone; @@ -31776,7 +32532,7 @@ index 160acbbdf111..ec0142165ce7 100644 spin_lock_irq(&lruvec->lru_lock); -@@ -4411,7 +4491,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, +@@ -4411,7 +4474,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, bool success; struct lru_gen_mm_walk *walk; struct mm_struct *mm = NULL; @@ -31785,7 +32541,7 @@ index 160acbbdf111..ec0142165ce7 100644 VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq)); -@@ -4427,12 +4507,12 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, +@@ -4427,12 +4490,12 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, * handful of PTEs. Spreading the work out over a period of time usually * is less efficient, but it avoids bursty page faults. */ @@ -31800,26 +32556,41 @@ index 160acbbdf111..ec0142165ce7 100644 if (!walk) { success = iterate_mm_list_nowalk(lruvec, max_seq); goto done; -@@ -4455,8 +4535,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, - if (sc->priority <= DEF_PRIORITY - 2) - wait_event_killable(lruvec->mm_state.wait, - max_seq < READ_ONCE(lrugen->max_seq)); +@@ -4447,119 +4510,64 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, + success = iterate_mm_list(lruvec, walk, &mm); + if (mm) + walk_mm(lruvec, mm, walk); - -- return max_seq < READ_ONCE(lrugen->max_seq); -+ return false; - } +- cond_resched(); + } while (mm); + done: +- if (!success) { +- if (sc->priority <= DEF_PRIORITY - 2) +- wait_event_killable(lruvec->mm_state.wait, +- max_seq < READ_ONCE(lrugen->max_seq)); ++ if (success) ++ inc_max_seq(lruvec, can_swap, force_scan); - VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq)); -@@ -4469,97 +4548,56 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, - return true; - } +- return max_seq < READ_ONCE(lrugen->max_seq); +- } +- +- VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq)); ++ return success; ++} --static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsigned long *min_seq, -- struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan) +- inc_max_seq(lruvec, can_swap, force_scan); +- /* either this sees any waiters or they will see updated max_seq */ +- if (wq_has_sleeper(&lruvec->mm_state.wait)) +- wake_up_all(&lruvec->mm_state.wait); +- +- return true; +-} +/****************************************************************************** + * working set protection + ******************************************************************************/ -+ + +-static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsigned long *min_seq, +- struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan) +static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc) { int gen, type, zone; @@ -31855,10 +32626,7 @@ index 160acbbdf111..ec0142165ce7 100644 - /* try to scrape all its memory if this memcg was deleted */ - *nr_to_scan = mem_cgroup_online(memcg) ? (total >> sc->priority) : total; -+ /* whether the size is big enough to be helpful */ -+ return mem_cgroup_online(memcg) ? (total >> sc->priority) : total; -+} - +- - /* - * The aging tries to be lazy to reduce the overhead, while the eviction - * stalls when the number of generations reaches MIN_NR_GENS. Hence, the @@ -31882,8 +32650,10 @@ index 160acbbdf111..ec0142165ce7 100644 - return true; - - return false; --} -- ++ /* whether the size is big enough to be helpful */ ++ return mem_cgroup_online(memcg) ? (total >> sc->priority) : total; + } + -static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned long min_ttl) +static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc, + unsigned long min_ttl) @@ -31933,7 +32703,7 @@ index 160acbbdf111..ec0142165ce7 100644 } /* to protect the working set of the last N jiffies */ -@@ -4572,46 +4610,30 @@ static unsigned long lru_gen_min_ttl __read_mostly; +@@ -4572,46 +4580,30 @@ static unsigned long lru_gen_min_ttl __read_mostly; static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) { struct mem_cgroup *memcg; @@ -31987,7 +32757,7 @@ index 160acbbdf111..ec0142165ce7 100644 */ if (mutex_trylock(&oom_lock)) { struct oom_control oc = { -@@ -4624,6 +4646,28 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) +@@ -4624,6 +4616,28 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) } } @@ -32016,7 +32786,7 @@ index 160acbbdf111..ec0142165ce7 100644 /* * This function exploits spatial locality when shrink_folio_list() walks the * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages. If -@@ -4631,16 +4675,17 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) +@@ -4631,16 +4645,17 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) * the PTE table to the Bloom filter. This forms a feedback loop between the * eviction and the aging. */ @@ -32038,7 +32808,7 @@ index 160acbbdf111..ec0142165ce7 100644 struct folio *folio = pfn_folio(pvmw->pfn); struct mem_cgroup *memcg = folio_memcg(folio); struct pglist_data *pgdat = folio_pgdat(folio); -@@ -4651,47 +4696,65 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) +@@ -4651,47 +4666,65 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) lockdep_assert_held(pvmw->ptl); VM_WARN_ON_ONCE_FOLIO(folio_test_lru(folio), folio); @@ -32118,7 +32888,7 @@ index 160acbbdf111..ec0142165ce7 100644 young++; -@@ -4700,58 +4763,173 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) +@@ -4700,58 +4733,173 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) !folio_test_swapcache(folio))) folio_mark_dirty(folio); @@ -32297,7 +33067,9 @@ index 160acbbdf111..ec0142165ce7 100644 + spin_unlock(&pgdat->memcg_lru.lock); } +} -+ + +- if (!walk) +- spin_unlock_irq(&lruvec->lru_lock); +void lru_gen_soft_reclaim(struct lruvec *lruvec) +{ + /* see the comment on MEMCG_NR_GENS */ @@ -32305,11 +33077,9 @@ index 160acbbdf111..ec0142165ce7 100644 + lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD); +} -- if (!walk) -- spin_unlock_irq(&lruvec->lru_lock); -+#else /* !CONFIG_MEMCG */ - - mem_cgroup_unlock_pages(); ++#else /* !CONFIG_MEMCG */ ++ +static int lru_gen_memcg_seg(struct lruvec *lruvec) +{ + return 0; @@ -32320,7 +33090,7 @@ index 160acbbdf111..ec0142165ce7 100644 /****************************************************************************** * the eviction ******************************************************************************/ -@@ -4765,7 +4943,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) +@@ -4765,7 +4913,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) int delta = folio_nr_pages(folio); int refs = folio_lru_refs(folio); int tier = lru_tier_from_refs(refs); @@ -32329,7 +33099,7 @@ index 160acbbdf111..ec0142165ce7 100644 VM_WARN_ON_ONCE_FOLIO(gen >= MAX_NR_GENS, folio); -@@ -4790,7 +4968,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) +@@ -4790,7 +4938,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) /* promoted */ if (gen != lru_gen_from_seq(lrugen->min_seq[type])) { @@ -32338,7 +33108,7 @@ index 160acbbdf111..ec0142165ce7 100644 return true; } -@@ -4799,7 +4977,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) +@@ -4799,7 +4947,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) int hist = lru_hist_from_seq(lrugen->min_seq[type]); gen = folio_inc_gen(lruvec, folio, false); @@ -32347,7 +33117,7 @@ index 160acbbdf111..ec0142165ce7 100644 WRITE_ONCE(lrugen->protected[hist][type][tier - 1], lrugen->protected[hist][type][tier - 1] + delta); -@@ -4811,7 +4989,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) +@@ -4811,7 +4959,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) if (folio_test_locked(folio) || folio_test_writeback(folio) || (type == LRU_GEN_FILE && folio_test_dirty(folio))) { gen = folio_inc_gen(lruvec, folio, true); @@ -32356,7 +33126,7 @@ index 160acbbdf111..ec0142165ce7 100644 return true; } -@@ -4822,12 +5000,8 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca +@@ -4822,12 +4970,8 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca { bool success; @@ -32370,7 +33140,7 @@ index 160acbbdf111..ec0142165ce7 100644 (folio_test_dirty(folio) || (folio_test_anon(folio) && !folio_test_swapcache(folio)))) return false; -@@ -4865,7 +5039,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, +@@ -4865,7 +5009,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, int scanned = 0; int isolated = 0; int remaining = MAX_LRU_BATCH; @@ -32379,7 +33149,7 @@ index 160acbbdf111..ec0142165ce7 100644 struct mem_cgroup *memcg = lruvec_memcg(lruvec); VM_WARN_ON_ONCE(!list_empty(list)); -@@ -4878,7 +5052,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, +@@ -4878,7 +5022,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, for (zone = sc->reclaim_idx; zone >= 0; zone--) { LIST_HEAD(moved); int skipped = 0; @@ -32388,7 +33158,7 @@ index 160acbbdf111..ec0142165ce7 100644 while (!list_empty(head)) { struct folio *folio = lru_to_folio(head); -@@ -4924,9 +5098,8 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, +@@ -4924,9 +5068,8 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, __count_vm_events(PGSCAN_ANON + type, isolated); /* @@ -32400,7 +33170,7 @@ index 160acbbdf111..ec0142165ce7 100644 */ return isolated || !remaining ? scanned : 0; } -@@ -5021,8 +5194,7 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw +@@ -5021,8 +5164,7 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw return scanned; } @@ -32410,7 +33180,7 @@ index 160acbbdf111..ec0142165ce7 100644 { int type; int scanned; -@@ -5111,153 +5283,348 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap +@@ -5111,153 +5253,348 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap goto retry; } @@ -32849,7 +33619,7 @@ index 160acbbdf111..ec0142165ce7 100644 } /****************************************************************************** -@@ -5266,7 +5633,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc +@@ -5266,7 +5603,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc static bool __maybe_unused state_is_valid(struct lruvec *lruvec) { @@ -32858,7 +33628,7 @@ index 160acbbdf111..ec0142165ce7 100644 if (lrugen->enabled) { enum lru_list lru; -@@ -5279,7 +5646,7 @@ static bool __maybe_unused state_is_valid(struct lruvec *lruvec) +@@ -5279,7 +5616,7 @@ static bool __maybe_unused state_is_valid(struct lruvec *lruvec) int gen, type, zone; for_each_gen_type_zone(gen, type, zone) { @@ -32867,7 +33637,7 @@ index 160acbbdf111..ec0142165ce7 100644 return false; } } -@@ -5324,7 +5691,7 @@ static bool drain_evictable(struct lruvec *lruvec) +@@ -5324,7 +5661,7 @@ static bool drain_evictable(struct lruvec *lruvec) int remaining = MAX_LRU_BATCH; for_each_gen_type_zone(gen, type, zone) { @@ -32876,7 +33646,7 @@ index 160acbbdf111..ec0142165ce7 100644 while (!list_empty(head)) { bool success; -@@ -5402,14 +5769,14 @@ static void lru_gen_change_state(bool enabled) +@@ -5402,14 +5739,14 @@ static void lru_gen_change_state(bool enabled) * sysfs interface ******************************************************************************/ @@ -32895,7 +33665,7 @@ index 160acbbdf111..ec0142165ce7 100644 { unsigned int msecs; -@@ -5421,11 +5788,9 @@ static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, +@@ -5421,11 +5758,9 @@ static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, return len; } @@ -32909,7 +33679,7 @@ index 160acbbdf111..ec0142165ce7 100644 { unsigned int caps = 0; -@@ -5438,11 +5803,14 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c +@@ -5438,11 +5773,14 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) caps |= BIT(LRU_GEN_NONLEAF_YOUNG); @@ -32925,7 +33695,7 @@ index 160acbbdf111..ec0142165ce7 100644 const char *buf, size_t len) { int i; -@@ -5469,9 +5837,7 @@ static ssize_t store_enabled(struct kobject *kobj, struct kobj_attribute *attr, +@@ -5469,9 +5807,7 @@ static ssize_t store_enabled(struct kobject *kobj, struct kobj_attribute *attr, return len; } @@ -32936,7 +33706,7 @@ index 160acbbdf111..ec0142165ce7 100644 static struct attribute *lru_gen_attrs[] = { &lru_gen_min_ttl_attr.attr, -@@ -5479,7 +5845,7 @@ static struct attribute *lru_gen_attrs[] = { +@@ -5479,7 +5815,7 @@ static struct attribute *lru_gen_attrs[] = { NULL }; @@ -32945,7 +33715,7 @@ index 160acbbdf111..ec0142165ce7 100644 .name = "lru_gen", .attrs = lru_gen_attrs, }; -@@ -5545,7 +5911,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, +@@ -5545,7 +5881,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, int i; int type, tier; int hist = lru_hist_from_seq(seq); @@ -32954,7 +33724,7 @@ index 160acbbdf111..ec0142165ce7 100644 for (tier = 0; tier < MAX_NR_TIERS; tier++) { seq_printf(m, " %10d", tier); -@@ -5595,7 +5961,7 @@ static int lru_gen_seq_show(struct seq_file *m, void *v) +@@ -5595,7 +5931,7 @@ static int lru_gen_seq_show(struct seq_file *m, void *v) unsigned long seq; bool full = !debugfs_real_fops(m->file)->write; struct lruvec *lruvec = v; @@ -32963,7 +33733,7 @@ index 160acbbdf111..ec0142165ce7 100644 int nid = lruvec_pgdat(lruvec)->node_id; struct mem_cgroup *memcg = lruvec_memcg(lruvec); DEFINE_MAX_SEQ(lruvec); -@@ -5692,7 +6058,7 @@ static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_co +@@ -5692,7 +6028,7 @@ static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_co if (sc->nr_reclaimed >= nr_to_reclaim) return 0; @@ -32972,7 +33742,7 @@ index 160acbbdf111..ec0142165ce7 100644 return 0; cond_resched(); -@@ -5713,11 +6079,11 @@ static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq, +@@ -5713,11 +6049,11 @@ static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq, if (!mem_cgroup_disabled()) { rcu_read_lock(); @@ -32987,7 +33757,7 @@ index 160acbbdf111..ec0142165ce7 100644 rcu_read_unlock(); if (!memcg) -@@ -5777,7 +6143,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src, +@@ -5777,7 +6113,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src, set_task_reclaim_state(current, &sc.reclaim_state); flags = memalloc_noreclaim_save(); blk_start_plug(&plug); @@ -32996,7 +33766,7 @@ index 160acbbdf111..ec0142165ce7 100644 err = -ENOMEM; goto done; } -@@ -5849,7 +6215,7 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) +@@ -5849,7 +6185,7 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) { int i; int gen, type, zone; @@ -33005,7 +33775,7 @@ index 160acbbdf111..ec0142165ce7 100644 lrugen->max_seq = MIN_NR_GENS + 1; lrugen->enabled = lru_gen_enabled(); -@@ -5858,13 +6224,26 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) +@@ -5858,13 +6194,25 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) lrugen->timestamps[i] = jiffies; for_each_gen_type_zone(gen, type, zone) @@ -33013,7 +33783,7 @@ index 160acbbdf111..ec0142165ce7 100644 + INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]); lruvec->mm_state.seq = MIN_NR_GENS; - init_waitqueue_head(&lruvec->mm_state.wait); +- init_waitqueue_head(&lruvec->mm_state.wait); } #ifdef CONFIG_MEMCG @@ -33033,7 +33803,7 @@ index 160acbbdf111..ec0142165ce7 100644 void lru_gen_init_memcg(struct mem_cgroup *memcg) { INIT_LIST_HEAD(&memcg->mm_list.fifo); -@@ -5876,19 +6255,25 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg) +@@ -5876,19 +6224,24 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg) int i; int nid; @@ -33042,7 +33812,6 @@ index 160acbbdf111..ec0142165ce7 100644 for_each_node(nid) { struct lruvec *lruvec = get_lruvec(memcg, nid); -+ VM_WARN_ON_ONCE(lruvec->mm_state.nr_walkers); VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0, sizeof(lruvec->lrugen.nr_pages))); @@ -33060,7 +33829,7 @@ index 160acbbdf111..ec0142165ce7 100644 static int __init init_lru_gen(void) { -@@ -5915,6 +6300,10 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc +@@ -5915,6 +6268,10 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc { } @@ -33071,7 +33840,7 @@ index 160acbbdf111..ec0142165ce7 100644 #endif /* CONFIG_LRU_GEN */ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) -@@ -5928,7 +6317,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) +@@ -5928,7 +6285,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) bool proportional_reclaim; struct blk_plug plug; @@ -33080,7 +33849,7 @@ index 160acbbdf111..ec0142165ce7 100644 lru_gen_shrink_lruvec(lruvec, sc); return; } -@@ -6171,6 +6560,11 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) +@@ -6171,6 +6528,11 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) struct lruvec *target_lruvec; bool reclaimable = false; @@ -33115,7 +33884,7 @@ index 1a86645b7b3c..fd666584515c 100644 struct pglist_data *pgdat; int type = folio_is_file_lru(folio); diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c -index 1f36bc1c5d36..26389e0dcfff 100644 +index 2a16939cf028..9286d3baa12d 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -55,6 +55,28 @@ struct rcu_reader_struct { @@ -33156,7 +33925,7 @@ index 1f36bc1c5d36..26389e0dcfff 100644 /* Try allocating 3 nodes */ mtree_lock(mt); mt_set_non_kernel(0); -@@ -35342,7 +35366,7 @@ static noinline void check_prealloc(struct maple_tree *mt) +@@ -35355,7 +35379,7 @@ static noinline void check_prealloc(struct maple_tree *mt) for (i = 0; i <= max; i++) mtree_test_store_range(mt, i * 10, i * 10 + 5, &i); @@ -33165,7 +33934,7 @@ index 1f36bc1c5d36..26389e0dcfff 100644 allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); -@@ -35351,18 +35375,18 @@ static noinline void check_prealloc(struct maple_tree *mt) +@@ -35364,18 +35388,18 @@ static noinline void check_prealloc(struct maple_tree *mt) allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); @@ -33187,9 +33956,9 @@ index 1f36bc1c5d36..26389e0dcfff 100644 allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); -@@ -35370,25 +35394,25 @@ static noinline void check_prealloc(struct maple_tree *mt) - mn = mas_pop_node(&mas); +@@ -35384,26 +35408,26 @@ static noinline void check_prealloc(struct maple_tree *mt) MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); @@ -33210,6 +33979,7 @@ index 1f36bc1c5d36..26389e0dcfff 100644 mas_destroy(&mas); allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); @@ -33217,7 +33987,7 @@ index 1f36bc1c5d36..26389e0dcfff 100644 allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); -@@ -35397,12 +35421,12 @@ static noinline void check_prealloc(struct maple_tree *mt) +@@ -35412,12 +35436,12 @@ static noinline void check_prealloc(struct maple_tree *mt) MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); mas_push_node(&mas, mn); MT_BUG_ON(mt, mas_allocated(&mas) != allocated); @@ -33232,7 +34002,7 @@ index 1f36bc1c5d36..26389e0dcfff 100644 allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); -@@ -35410,21 +35434,21 @@ static noinline void check_prealloc(struct maple_tree *mt) +@@ -35425,21 +35449,21 @@ static noinline void check_prealloc(struct maple_tree *mt) mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); @@ -33257,7 +34027,7 @@ index 1f36bc1c5d36..26389e0dcfff 100644 allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); -@@ -35432,14 +35456,14 @@ static noinline void check_prealloc(struct maple_tree *mt) +@@ -35447,14 +35471,14 @@ static noinline void check_prealloc(struct maple_tree *mt) mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); mt_set_non_kernel(1); @@ -33274,7 +34044,7 @@ index 1f36bc1c5d36..26389e0dcfff 100644 allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); -@@ -35447,7 +35471,7 @@ static noinline void check_prealloc(struct maple_tree *mt) +@@ -35462,7 +35486,7 @@ static noinline void check_prealloc(struct maple_tree *mt) mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); mt_set_non_kernel(1); @@ -33362,7 +34132,7 @@ index 07aae60288f9..a115a27b375e 100644 -- 2.40.0 -From 408f428355e56ebba78d9b13e73d90f3a61057cc Mon Sep 17 00:00:00 2001 +From a9b5ae8237970121057f450cbc5f8e54081aceab Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Mon, 13 Feb 2023 09:26:09 +0100 Subject: [PATCH 11/15] objtool @@ -33752,7 +34522,7 @@ index 9c8d827f69af..baa85c31526b 100644 -- 2.40.0 -From 142ae5ede709849f18055b9531bc73df4e53679f Mon Sep 17 00:00:00 2001 +From 4b90d86e2ae379b4e8d1aa5b67a4312e2bf0ee31 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Thu, 30 Mar 2023 17:54:54 +0200 Subject: [PATCH 12/15] sched @@ -34065,7 +34835,7 @@ index 1637b65ba07a..8d64fba16cfe 100644 P(se.avg.load_sum); P(se.avg.runnable_sum); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 1ea94874b0ce..1a8ae34c9464 100644 +index 735994022fe0..ca8bbe5c1cb4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1082,6 +1082,23 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -34657,7 +35427,7 @@ index 1ea94874b0ce..1a8ae34c9464 100644 if (env->sd->flags & SD_NUMA) env->fbq_type = fbq_classify_group(&sds->busiest_stat); -@@ -10203,24 +10223,23 @@ static struct sched_group *find_busiest_group(struct lb_env *env) +@@ -10213,24 +10233,23 @@ static struct sched_group *find_busiest_group(struct lb_env *env) */ update_sd_lb_stats(env, &sds); @@ -34691,7 +35461,7 @@ index 1ea94874b0ce..1a8ae34c9464 100644 /* ASYM feature bypasses nice load balance check */ if (busiest->group_type == group_asym_packing) goto force_balance; -@@ -10233,6 +10252,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) +@@ -10243,6 +10262,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) if (busiest->group_type == group_imbalanced) goto force_balance; @@ -34699,7 +35469,7 @@ index 1ea94874b0ce..1a8ae34c9464 100644 /* * If the local group is busier than the selected busiest group * don't try and pull any tasks. -@@ -10272,7 +10292,6 @@ static struct sched_group *find_busiest_group(struct lb_env *env) +@@ -10282,7 +10302,6 @@ static struct sched_group *find_busiest_group(struct lb_env *env) goto out_balanced; } @@ -34707,7 +35477,7 @@ index 1ea94874b0ce..1a8ae34c9464 100644 if (sds.prefer_sibling && local->group_type == group_has_spare && busiest->sum_nr_running > local->sum_nr_running + 1) goto force_balance; -@@ -10374,11 +10393,20 @@ static struct rq *find_busiest_queue(struct lb_env *env, +@@ -10384,11 +10403,20 @@ static struct rq *find_busiest_queue(struct lb_env *env, nr_running == 1) continue; @@ -34731,7 +35501,7 @@ index 1ea94874b0ce..1a8ae34c9464 100644 switch (env->migration_type) { case migrate_load: -@@ -10468,8 +10496,20 @@ asym_active_balance(struct lb_env *env) +@@ -10478,8 +10506,20 @@ asym_active_balance(struct lb_env *env) * lower priority CPUs in order to pack all tasks in the * highest priority CPUs. */ @@ -34754,7 +35524,7 @@ index 1ea94874b0ce..1a8ae34c9464 100644 } static inline bool -@@ -11206,8 +11246,17 @@ static void nohz_balancer_kick(struct rq *rq) +@@ -11216,8 +11256,17 @@ static void nohz_balancer_kick(struct rq *rq) */ for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) { if (sched_asym_prefer(i, cpu)) { @@ -34988,7 +35758,7 @@ index 771f8ddb7053..0927d16631fb 100644 -- 2.40.0 -From 9bf62fd4fa835ffda2dd24b87c8e1feff3e98061 Mon Sep 17 00:00:00 2001 +From 661d513d0987b55f3b00273a1411d2f43e27f2d0 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Mon, 6 Feb 2023 09:53:13 +0100 Subject: [PATCH 13/15] zram @@ -35481,7 +36251,7 @@ index c5254626f051..2afdbf76a1aa 100644 -- 2.40.0 -From 581756bfa197a0f8011730304fbcc3d9fc547ddb Mon Sep 17 00:00:00 2001 +From 5226c6f74024424daecec7e0d3f70268db60b200 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Thu, 6 Apr 2023 17:10:48 +0200 Subject: [PATCH 14/15] zstd import 1.5.5 @@ -49261,7 +50031,7 @@ index f4ed952ed485..7d31518e9d5a 100644 -- 2.40.0 -From ab74a3305ccf63616c74a3bc7c4bd7f6ee55e4f9 Mon Sep 17 00:00:00 2001 +From e63a0a13d76c3d3d65edb7452b6cab1cbff1b237 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Fri, 10 Mar 2023 19:28:54 +0100 Subject: [PATCH 15/15] v4l2-core: add v4l2loopback diff --git a/6.2/misc/0001-Add-latency-priority-for-CFS-class.patch b/6.2/misc/0001-Add-latency-priority-for-CFS-class.patch index 41c4b889..5cc2d94c 100644 --- a/6.2/misc/0001-Add-latency-priority-for-CFS-class.patch +++ b/6.2/misc/0001-Add-latency-priority-for-CFS-class.patch @@ -1,4 +1,4 @@ -From 122a44ab2772010fc31e4ff9050a1ff21ecc61eb Mon Sep 17 00:00:00 2001 +From cb5968987cb68d54d733cb669bff4f48188c8bae Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Thu, 30 Mar 2023 19:27:28 +0200 Subject: [PATCH] Add latency priority for CFS class @@ -348,7 +348,7 @@ index ff6c4b9bfe6b..071deff8dbd1 100644 .cpus_ptr = &init_task.cpus_mask, .user_cpus_ptr = NULL, diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 5640cac31f9a..32a564dfb3e6 100644 +index e92e0a5c19aa..ec7d13b138eb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1283,6 +1283,11 @@ static void set_load_weight(struct task_struct *p, bool update_load) @@ -513,7 +513,7 @@ index 8d64fba16cfe..177934290ec4 100644 P(dl.runtime); P(dl.deadline); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 1a8ae34c9464..4a378f404336 100644 +index ca8bbe5c1cb4..959d6fe0e30d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -698,7 +698,85 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) @@ -791,7 +791,7 @@ index 1a8ae34c9464..4a378f404336 100644 if (vdiff > gran) return 1; -@@ -11914,6 +12022,9 @@ bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool in_fi) +@@ -11924,6 +12032,9 @@ bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool in_fi) delta = (s64)(sea->vruntime - seb->vruntime) + (s64)(cfs_rqb->min_vruntime_fi - cfs_rqa->min_vruntime_fi); @@ -801,7 +801,7 @@ index 1a8ae34c9464..4a378f404336 100644 return delta > 0; } #else -@@ -12184,6 +12295,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first) +@@ -12194,6 +12305,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first) void init_cfs_rq(struct cfs_rq *cfs_rq) { cfs_rq->tasks_timeline = RB_ROOT_CACHED; @@ -809,7 +809,7 @@ index 1a8ae34c9464..4a378f404336 100644 u64_u32_store(cfs_rq->min_vruntime, (u64)(-(1LL << 20))); #ifdef CONFIG_SMP raw_spin_lock_init(&cfs_rq->removed.lock); -@@ -12239,6 +12351,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) +@@ -12249,6 +12361,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) goto err; tg->shares = NICE_0_LOAD; @@ -817,7 +817,7 @@ index 1a8ae34c9464..4a378f404336 100644 init_cfs_bandwidth(tg_cfs_bandwidth(tg)); -@@ -12337,6 +12450,10 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, +@@ -12347,6 +12460,10 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, } se->my_q = cfs_rq; @@ -828,7 +828,7 @@ index 1a8ae34c9464..4a378f404336 100644 /* guarantee group entities always have weight */ update_load_set(&se->load, NICE_0_LOAD); se->parent = parent; -@@ -12467,6 +12584,45 @@ int sched_group_set_idle(struct task_group *tg, long idle) +@@ -12477,6 +12594,45 @@ int sched_group_set_idle(struct task_group *tg, long idle) return 0; } diff --git a/6.2/sched/0001-bore-cachy.patch b/6.2/sched/0001-bore-cachy.patch index 47f06a0d..7da1d3e8 100644 --- a/6.2/sched/0001-bore-cachy.patch +++ b/6.2/sched/0001-bore-cachy.patch @@ -1,20 +1,20 @@ -From d39405a7c6d67669059122becce33e2ac23a0b82 Mon Sep 17 00:00:00 2001 +From fe78b2fbce250d802fee7b12899668c85bc6b8a8 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Fri, 7 Apr 2023 11:28:43 +0200 -Subject: [PATCH] bore-cachy-lat +Date: Thu, 20 Apr 2023 19:41:31 +0200 +Subject: [PATCH] bore-cachy Signed-off-by: Peter Jung --- - include/linux/sched.h | 6 ++ + include/linux/sched.h | 10 +++ init/Kconfig | 20 +++++ - kernel/sched/core.c | 30 ++++++++ + kernel/sched/core.c | 45 +++++++++++ kernel/sched/debug.c | 3 + - kernel/sched/fair.c | 159 +++++++++++++++++++++++++++++++++++++++- + kernel/sched/fair.c | 167 +++++++++++++++++++++++++++++++++++++++- kernel/sched/features.h | 8 ++ - 6 files changed, 222 insertions(+), 4 deletions(-) + 6 files changed, 249 insertions(+), 4 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h -index df219c7cd6aa..956a85b5f56a 100644 +index df219c7cd6aa..ba3882e13a4d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -556,6 +556,12 @@ struct sched_entity { @@ -30,6 +30,17 @@ index df219c7cd6aa..956a85b5f56a 100644 u64 nr_migrations; u64 prev_sleep_sum_runtime; +@@ -990,6 +996,10 @@ struct task_struct { + struct list_head children; + struct list_head sibling; + struct task_struct *group_leader; ++#ifdef CONFIG_SCHED_BORE ++ u64 child_burst_cache; ++ u64 child_burst_last_cached; ++#endif // CONFIG_SCHED_BORE + + /* + * 'ptraced' is the list of tasks this task is using ptrace() on. diff --git a/init/Kconfig b/init/Kconfig index 748a9491ca12..d10f1e6257cd 100644 --- a/init/Kconfig @@ -62,33 +73,48 @@ index 748a9491ca12..d10f1e6257cd 100644 bool "Automatic process group scheduling" select CGROUPS diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index ec7d13b138eb..811eb92367b4 100644 +index ec7d13b138eb..ec11a317dd85 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -4421,6 +4421,22 @@ int wake_up_state(struct task_struct *p, unsigned int state) +@@ -4421,6 +4421,37 @@ int wake_up_state(struct task_struct *p, unsigned int state) return try_to_wake_up(p, state, 0); } +#ifdef CONFIG_SCHED_BORE -+static inline void adjust_prev_burst(struct task_struct *p) -+{ ++extern unsigned int sched_burst_cache_lifetime; ++ ++static inline void update_burst_cache(struct task_struct *p) { + u32 cnt = 0; + u64 sum = 0, avg = 0; -+ struct task_struct *sib; -+ list_for_each_entry(sib, &p->sibling, sibling) { ++ struct task_struct *child; ++ list_for_each_entry(child, &p->children, sibling) { + cnt++; -+ sum += sib->se.max_burst_time >> 8; ++ sum += child->se.max_burst_time >> 8; + } + if (cnt) avg = div_u64(sum, cnt) << 8; -+ if (p->se.prev_burst_time < avg) p->se.prev_burst_time = avg; -+ p->se.max_burst_time = p->se.prev_burst_time; ++ p->child_burst_cache = max(avg, p->se.max_burst_time); +} ++ ++static void adjust_prev_burst(struct task_struct *p) { ++ struct task_struct *parent = p->parent; ++ u64 ktime = ktime_to_ns(ktime_get()); ++ ++ if (likely(parent)) { ++ if (parent->child_burst_last_cached + sched_burst_cache_lifetime < ktime) { ++ parent->child_burst_last_cached = ktime; ++ update_burst_cache(parent); ++ } ++ if (p->se.prev_burst_time < parent->child_burst_cache) ++ p->se.prev_burst_time = parent->child_burst_cache; ++ } ++ p->se.max_burst_time = p->se.prev_burst_time; ++} +#endif // CONFIG_SCHED_BORE + /* * Perform scheduler related setup for a newly forked process p. * p is forked by current. -@@ -4439,6 +4455,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) +@@ -4439,6 +4470,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->se.vruntime = 0; p->se.dur_avg = 0; p->se.prev_sleep_sum_runtime = 0; @@ -98,7 +124,7 @@ index ec7d13b138eb..811eb92367b4 100644 INIT_LIST_HEAD(&p->se.group_node); RB_CLEAR_NODE(&p->se.latency_node); -@@ -4665,6 +4684,9 @@ late_initcall(sched_core_sysctl_init); +@@ -4665,6 +4699,9 @@ late_initcall(sched_core_sysctl_init); int sched_fork(unsigned long clone_flags, struct task_struct *p) { __sched_fork(clone_flags, p); @@ -108,7 +134,7 @@ index ec7d13b138eb..811eb92367b4 100644 /* * We mark the process as NEW here. This guarantees that * nobody will actually run it, and a signal or other external -@@ -9155,6 +9177,10 @@ void __init init_idle(struct task_struct *idle, int cpu) +@@ -9155,6 +9192,10 @@ void __init init_idle(struct task_struct *idle, int cpu) idle->__state = TASK_RUNNING; idle->se.exec_start = sched_clock(); @@ -119,12 +145,12 @@ index ec7d13b138eb..811eb92367b4 100644 /* * PF_KTHREAD should already be set at this point; regardless, make it * look like a proper per-CPU kthread. -@@ -9822,6 +9848,10 @@ void __init sched_init(void) +@@ -9822,6 +9863,10 @@ void __init sched_init(void) BUG_ON(&dl_sched_class != &stop_sched_class + 1); #endif +#ifdef CONFIG_SCHED_BORE -+ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 2.1.1 by Masahito Suzuki"); ++ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 2.2.1 by Masahito Suzuki"); +#endif // CONFIG_SCHED_BORE + wait_bit_init(); @@ -145,7 +171,7 @@ index 177934290ec4..e51c5f727c09 100644 SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); #endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 4a378f404336..3914ecc9a900 100644 +index 959d6fe0e30d..5acb402c5035 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -19,6 +19,9 @@ @@ -158,12 +184,13 @@ index 4a378f404336..3914ecc9a900 100644 */ #include #include -@@ -140,6 +143,16 @@ static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; +@@ -140,6 +143,17 @@ static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; const_debug unsigned int sysctl_sched_migration_cost = 500000UL; +#ifdef CONFIG_SCHED_BORE +unsigned int __read_mostly sched_bore = 3; ++unsigned int __read_mostly sched_burst_cache_lifetime = 50000000; +unsigned int __read_mostly sched_burst_penalty_offset = 12; +unsigned int __read_mostly sched_burst_penalty_scale = 1292; +unsigned int __read_mostly sched_burst_smoothness = 1; @@ -175,7 +202,7 @@ index 4a378f404336..3914ecc9a900 100644 int sched_thermal_decay_shift; static int __init setup_sched_thermal_decay_shift(char *str) { -@@ -203,6 +216,44 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536; +@@ -203,6 +217,51 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536; #ifdef CONFIG_SYSCTL static struct ctl_table sched_fair_sysctls[] = { @@ -190,6 +217,13 @@ index 4a378f404336..3914ecc9a900 100644 + .extra2 = &three, + }, + { ++ .procname = "sched_burst_cache_lifetime", ++ .data = &sched_burst_cache_lifetime, ++ .maxlen = sizeof(unsigned int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec, ++ }, ++ { + .procname = "sched_burst_penalty_offset", + .data = &sched_burst_penalty_offset, + .maxlen = sizeof(unsigned int), @@ -220,7 +254,7 @@ index 4a378f404336..3914ecc9a900 100644 { .procname = "sched_child_runs_first", .data = &sysctl_sched_child_runs_first, -@@ -987,6 +1038,47 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq) +@@ -987,6 +1046,47 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq) } #endif /* CONFIG_SMP */ @@ -268,7 +302,7 @@ index 4a378f404336..3914ecc9a900 100644 /* * Update the current task's runtime statistics. */ -@@ -1016,6 +1108,14 @@ static void update_curr(struct cfs_rq *cfs_rq) +@@ -1016,6 +1116,14 @@ static void update_curr(struct cfs_rq *cfs_rq) curr->sum_exec_runtime += delta_exec; schedstat_add(cfs_rq->exec_clock, delta_exec); @@ -283,7 +317,7 @@ index 4a378f404336..3914ecc9a900 100644 curr->vruntime += calc_delta_fair(delta_exec, curr); update_min_vruntime(cfs_rq); -@@ -5102,8 +5202,14 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) +@@ -5102,8 +5210,14 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) se->prev_sum_exec_runtime = se->sum_exec_runtime; } @@ -298,7 +332,7 @@ index 4a378f404336..3914ecc9a900 100644 /* * Pick the next process, keeping these things in mind, in this order: -@@ -5142,16 +5248,34 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) +@@ -5142,16 +5256,34 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) second = curr; } @@ -335,7 +369,7 @@ index 4a378f404336..3914ecc9a900 100644 /* * Prefer last buddy, try to return the CPU to a preempted task. */ -@@ -5160,8 +5284,13 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) +@@ -5160,8 +5292,13 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) /* Check for latency sensitive entity waiting for running */ latency = __pick_first_latency(cfs_rq); @@ -349,7 +383,7 @@ index 4a378f404336..3914ecc9a900 100644 se = latency; return se; -@@ -6315,6 +6444,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) +@@ -6315,6 +6452,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) util_est_dequeue(&rq->cfs, p); for_each_sched_entity(se) { @@ -359,7 +393,7 @@ index 4a378f404336..3914ecc9a900 100644 cfs_rq = cfs_rq_of(se); dequeue_entity(cfs_rq, se, flags); -@@ -7784,7 +7916,12 @@ static unsigned long wakeup_gran(struct sched_entity *se) +@@ -7784,7 +7924,12 @@ static unsigned long wakeup_gran(struct sched_entity *se) * */ static int @@ -372,7 +406,7 @@ index 4a378f404336..3914ecc9a900 100644 { s64 gran, vdiff = curr->vruntime - se->vruntime; s64 offset = wakeup_latency_gran(curr, se); -@@ -7792,7 +7929,13 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) +@@ -7792,7 +7937,13 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) if (vdiff < offset) return -1; @@ -387,7 +421,7 @@ index 4a378f404336..3914ecc9a900 100644 /* * At wake up, the vruntime of a task is capped to not be older than -@@ -7909,7 +8052,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ +@@ -7909,7 +8060,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ return; update_curr(cfs_rq_of(se)); @@ -401,7 +435,7 @@ index 4a378f404336..3914ecc9a900 100644 /* * Bias pick_next to pick the sched entity that is * triggering this preemption. -@@ -8145,6 +8293,9 @@ static void yield_task_fair(struct rq *rq) +@@ -8145,6 +8301,9 @@ static void yield_task_fair(struct rq *rq) struct task_struct *curr = rq->curr; struct cfs_rq *cfs_rq = task_cfs_rq(curr); struct sched_entity *se = &curr->se; diff --git a/6.2/sched/0001-bore.patch b/6.2/sched/0001-bore.patch index cc65a175..a56e9cad 100644 --- a/6.2/sched/0001-bore.patch +++ b/6.2/sched/0001-bore.patch @@ -1,20 +1,20 @@ -From 32fa1fa5b5e04d0debb93cf9785df6b64f0152b9 Mon Sep 17 00:00:00 2001 +From 62a50ec9d91fe1c4f6cceb51e46ac4e7c0f6a78b Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 6 Apr 2023 19:10:58 +0200 +Date: Thu, 20 Apr 2023 19:45:28 +0200 Subject: [PATCH] bore Signed-off-by: Peter Jung --- - include/linux/sched.h | 6 ++ - init/Kconfig | 20 ++++++ - kernel/sched/core.c | 30 ++++++++ + include/linux/sched.h | 10 +++ + init/Kconfig | 20 +++++ + kernel/sched/core.c | 45 ++++++++++++ kernel/sched/debug.c | 3 + - kernel/sched/fair.c | 149 +++++++++++++++++++++++++++++++++++++++- - kernel/sched/features.h | 8 +++ - 6 files changed, 213 insertions(+), 3 deletions(-) + kernel/sched/fair.c | 157 +++++++++++++++++++++++++++++++++++++++- + kernel/sched/features.h | 8 ++ + 6 files changed, 240 insertions(+), 3 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h -index 853d08f7562b..d673ab1b885f 100644 +index 853d08f7562b..d4670632f67d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -555,6 +555,12 @@ struct sched_entity { @@ -30,6 +30,17 @@ index 853d08f7562b..d673ab1b885f 100644 u64 nr_migrations; +@@ -983,6 +989,10 @@ struct task_struct { + struct list_head children; + struct list_head sibling; + struct task_struct *group_leader; ++#ifdef CONFIG_SCHED_BORE ++ u64 child_burst_cache; ++ u64 child_burst_last_cached; ++#endif // CONFIG_SCHED_BORE + + /* + * 'ptraced' is the list of tasks this task is using ptrace() on. diff --git a/init/Kconfig b/init/Kconfig index 44e90b28a30f..f2854e244381 100644 --- a/init/Kconfig @@ -62,33 +73,48 @@ index 44e90b28a30f..f2854e244381 100644 bool "Automatic process group scheduling" select CGROUPS diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 57d84b534cde..62de0463e4ef 100644 +index 57d84b534cde..fdb589fa3dd5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -4411,6 +4411,22 @@ int wake_up_state(struct task_struct *p, unsigned int state) +@@ -4411,6 +4411,37 @@ int wake_up_state(struct task_struct *p, unsigned int state) return try_to_wake_up(p, state, 0); } +#ifdef CONFIG_SCHED_BORE -+static inline void adjust_prev_burst(struct task_struct *p) -+{ ++extern unsigned int sched_burst_cache_lifetime; ++ ++static inline void update_burst_cache(struct task_struct *p) { + u32 cnt = 0; + u64 sum = 0, avg = 0; -+ struct task_struct *sib; -+ list_for_each_entry(sib, &p->sibling, sibling) { ++ struct task_struct *child; ++ list_for_each_entry(child, &p->children, sibling) { + cnt++; -+ sum += sib->se.max_burst_time >> 8; ++ sum += child->se.max_burst_time >> 8; + } + if (cnt) avg = div_u64(sum, cnt) << 8; -+ if (p->se.prev_burst_time < avg) p->se.prev_burst_time = avg; -+ p->se.max_burst_time = p->se.prev_burst_time; ++ p->child_burst_cache = max(avg, p->se.max_burst_time); +} ++ ++static void adjust_prev_burst(struct task_struct *p) { ++ struct task_struct *parent = p->parent; ++ u64 ktime = ktime_to_ns(ktime_get()); ++ ++ if (likely(parent)) { ++ if (parent->child_burst_last_cached + sched_burst_cache_lifetime < ktime) { ++ parent->child_burst_last_cached = ktime; ++ update_burst_cache(parent); ++ } ++ if (p->se.prev_burst_time < parent->child_burst_cache) ++ p->se.prev_burst_time = parent->child_burst_cache; ++ } ++ p->se.max_burst_time = p->se.prev_burst_time; ++} +#endif // CONFIG_SCHED_BORE + /* * Perform scheduler related setup for a newly forked process p. * p is forked by current. -@@ -4427,6 +4443,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) +@@ -4427,6 +4458,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->se.prev_sum_exec_runtime = 0; p->se.nr_migrations = 0; p->se.vruntime = 0; @@ -98,7 +124,7 @@ index 57d84b534cde..62de0463e4ef 100644 INIT_LIST_HEAD(&p->se.group_node); #ifdef CONFIG_FAIR_GROUP_SCHED -@@ -4652,6 +4671,9 @@ late_initcall(sched_core_sysctl_init); +@@ -4652,6 +4686,9 @@ late_initcall(sched_core_sysctl_init); int sched_fork(unsigned long clone_flags, struct task_struct *p) { __sched_fork(clone_flags, p); @@ -108,7 +134,7 @@ index 57d84b534cde..62de0463e4ef 100644 /* * We mark the process as NEW here. This guarantees that * nobody will actually run it, and a signal or other external -@@ -9113,6 +9135,10 @@ void __init init_idle(struct task_struct *idle, int cpu) +@@ -9113,6 +9150,10 @@ void __init init_idle(struct task_struct *idle, int cpu) idle->__state = TASK_RUNNING; idle->se.exec_start = sched_clock(); @@ -119,12 +145,12 @@ index 57d84b534cde..62de0463e4ef 100644 /* * PF_KTHREAD should already be set at this point; regardless, make it * look like a proper per-CPU kthread. -@@ -9780,6 +9806,10 @@ void __init sched_init(void) +@@ -9780,6 +9821,10 @@ void __init sched_init(void) BUG_ON(&dl_sched_class != &stop_sched_class + 1); #endif +#ifdef CONFIG_SCHED_BORE -+ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 2.1.1 by Masahito Suzuki"); ++ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 2.2.1 by Masahito Suzuki"); +#endif // CONFIG_SCHED_BORE + wait_bit_init(); @@ -145,7 +171,7 @@ index 1637b65ba07a..752c43a9ff13 100644 SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); #endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index e046a2bff207..e1062b628e41 100644 +index 661226e38835..7fca60baba72 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -19,6 +19,9 @@ @@ -158,12 +184,13 @@ index e046a2bff207..e1062b628e41 100644 */ #include #include -@@ -126,6 +129,16 @@ static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; +@@ -126,6 +129,17 @@ static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; const_debug unsigned int sysctl_sched_migration_cost = 500000UL; +#ifdef CONFIG_SCHED_BORE +unsigned int __read_mostly sched_bore = 3; ++unsigned int __read_mostly sched_burst_cache_lifetime = 50000000; +unsigned int __read_mostly sched_burst_penalty_offset = 12; +unsigned int __read_mostly sched_burst_penalty_scale = 1292; +unsigned int __read_mostly sched_burst_smoothness = 1; @@ -175,7 +202,7 @@ index e046a2bff207..e1062b628e41 100644 int sched_thermal_decay_shift; static int __init setup_sched_thermal_decay_shift(char *str) { -@@ -185,6 +198,44 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536; +@@ -185,6 +199,51 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536; #ifdef CONFIG_SYSCTL static struct ctl_table sched_fair_sysctls[] = { @@ -190,6 +217,13 @@ index e046a2bff207..e1062b628e41 100644 + .extra2 = &three, + }, + { ++ .procname = "sched_burst_cache_lifetime", ++ .data = &sched_burst_cache_lifetime, ++ .maxlen = sizeof(unsigned int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec, ++ }, ++ { + .procname = "sched_burst_penalty_offset", + .data = &sched_burst_penalty_offset, + .maxlen = sizeof(unsigned int), @@ -220,7 +254,7 @@ index e046a2bff207..e1062b628e41 100644 { .procname = "sched_child_runs_first", .data = &sysctl_sched_child_runs_first, -@@ -891,6 +942,47 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq) +@@ -891,6 +950,47 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq) } #endif /* CONFIG_SMP */ @@ -268,7 +302,7 @@ index e046a2bff207..e1062b628e41 100644 /* * Update the current task's runtime statistics. */ -@@ -920,6 +1012,14 @@ static void update_curr(struct cfs_rq *cfs_rq) +@@ -920,6 +1020,14 @@ static void update_curr(struct cfs_rq *cfs_rq) curr->sum_exec_runtime += delta_exec; schedstat_add(cfs_rq->exec_clock, delta_exec); @@ -283,7 +317,7 @@ index e046a2bff207..e1062b628e41 100644 curr->vruntime += calc_delta_fair(delta_exec, curr); update_min_vruntime(cfs_rq); -@@ -5011,8 +5111,14 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) +@@ -5011,8 +5119,14 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) se->prev_sum_exec_runtime = se->sum_exec_runtime; } @@ -298,7 +332,7 @@ index e046a2bff207..e1062b628e41 100644 /* * Pick the next process, keeping these things in mind, in this order: -@@ -5051,16 +5157,34 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) +@@ -5051,16 +5165,34 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) second = curr; } @@ -335,7 +369,7 @@ index e046a2bff207..e1062b628e41 100644 /* * Prefer last buddy, try to return the CPU to a preempted task. */ -@@ -6204,6 +6328,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) +@@ -6204,6 +6336,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) util_est_dequeue(&rq->cfs, p); for_each_sched_entity(se) { @@ -345,7 +379,7 @@ index e046a2bff207..e1062b628e41 100644 cfs_rq = cfs_rq_of(se); dequeue_entity(cfs_rq, se, flags); -@@ -7565,7 +7692,12 @@ static unsigned long wakeup_gran(struct sched_entity *se) +@@ -7565,7 +7700,12 @@ static unsigned long wakeup_gran(struct sched_entity *se) * */ static int @@ -358,7 +392,7 @@ index e046a2bff207..e1062b628e41 100644 { s64 gran, vdiff = curr->vruntime - se->vruntime; -@@ -7573,6 +7705,9 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) +@@ -7573,6 +7713,9 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) return -1; gran = wakeup_gran(se); @@ -368,7 +402,7 @@ index e046a2bff207..e1062b628e41 100644 if (vdiff > gran) return 1; -@@ -7677,7 +7812,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ +@@ -7677,7 +7820,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ return; update_curr(cfs_rq_of(se)); @@ -382,7 +416,7 @@ index e046a2bff207..e1062b628e41 100644 /* * Bias pick_next to pick the sched entity that is * triggering this preemption. -@@ -7913,6 +8053,9 @@ static void yield_task_fair(struct rq *rq) +@@ -7913,6 +8061,9 @@ static void yield_task_fair(struct rq *rq) struct task_struct *curr = rq->curr; struct cfs_rq *cfs_rq = task_cfs_rq(curr); struct sched_entity *se = &curr->se;