From 0e7abfd91ef5f628f9d9f6f370ff8dbe468f43ef Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Mon, 20 Jan 2025 15:06:01 +0100 Subject: [PATCH] 6.13: Sync, update tlb-broadcast-series Signed-off-by: Peter Jung --- 6.13/0001-amd-pstate.patch | 4 +- 6.13/0002-amd-tlb-broadcast.patch | 873 +++++++++----- 6.13/0003-bbr3.patch | 4 +- 6.13/0004-cachy.patch | 477 +++++++- 6.13/0005-crypto.patch | 4 +- 6.13/0006-fixes.patch | 184 ++- 6.13/0007-itmt-core-ranking.patch | 20 +- 6.13/0008-ntsync.patch | 8 +- 6.13/0009-perf-per-core.patch | 4 +- 6.13/0010-pksm.patch | 4 +- 6.13/0011-t2.patch | 8 +- 6.13/0012-zstd.patch | 4 +- 6.13/all/0001-cachyos-base-all.patch | 1590 ++++++++++++++++++-------- 6.13/sched-dev/0001-bore-cachy.patch | 1030 ----------------- 6.13/sched-dev/0001-bore.patch | 1005 ---------------- 6.13/sched/0001-bore-cachy.patch | 59 +- 6.13/sched/0001-bore.patch | 59 +- 17 files changed, 2290 insertions(+), 3047 deletions(-) delete mode 100644 6.13/sched-dev/0001-bore-cachy.patch delete mode 100644 6.13/sched-dev/0001-bore.patch diff --git a/6.13/0001-amd-pstate.patch b/6.13/0001-amd-pstate.patch index e095f0e4..e100c061 100644 --- a/6.13/0001-amd-pstate.patch +++ b/6.13/0001-amd-pstate.patch @@ -1,6 +1,6 @@ -From 2af576964728ca6af63da3c61dae669b5ae945c7 Mon Sep 17 00:00:00 2001 +From 1ec94c7b86986796d5d14135302e81dd3ddbe223 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:37:09 +0100 +Date: Mon, 20 Jan 2025 13:21:23 +0100 Subject: [PATCH 01/12] amd-pstate Signed-off-by: Peter Jung diff --git a/6.13/0002-amd-tlb-broadcast.patch b/6.13/0002-amd-tlb-broadcast.patch index 6fa53f3f..070bd8f0 100644 --- a/6.13/0002-amd-tlb-broadcast.patch +++ b/6.13/0002-amd-tlb-broadcast.patch @@ -1,39 +1,41 @@ -From 1d6b426b59b09163dbcaac857551295ad4b343d5 Mon Sep 17 00:00:00 2001 +From b74b9b0459100443f73ce718d0191bf58d6cb4b4 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:51:04 +0100 +Date: Mon, 20 Jan 2025 13:21:35 +0100 Subject: [PATCH 02/12] amd-tlb-broadcast Signed-off-by: Peter Jung --- - arch/x86/Kconfig | 2 +- - arch/x86/hyperv/mmu.c | 1 - - arch/x86/include/asm/cpufeatures.h | 1 + - arch/x86/include/asm/invlpgb.h | 93 ++++++ - arch/x86/include/asm/mmu.h | 6 + - arch/x86/include/asm/mmu_context.h | 12 + - arch/x86/include/asm/paravirt.h | 5 - - arch/x86/include/asm/paravirt_types.h | 2 - - arch/x86/include/asm/tlbbatch.h | 1 + - arch/x86/include/asm/tlbflush.h | 31 +- - arch/x86/kernel/cpu/amd.c | 16 ++ - arch/x86/kernel/kvm.c | 1 - - arch/x86/kernel/paravirt.c | 6 - - arch/x86/kernel/setup.c | 4 + - arch/x86/mm/pgtable.c | 16 +- - arch/x86/mm/tlb.c | 393 +++++++++++++++++++++++++- - arch/x86/xen/mmu_pv.c | 1 - - mm/memory.c | 1 - - mm/mmap.c | 2 - - mm/swap_state.c | 1 - - mm/vma.c | 2 - - 21 files changed, 541 insertions(+), 56 deletions(-) + arch/x86/Kconfig | 2 +- + arch/x86/Kconfig.cpu | 5 + + arch/x86/hyperv/mmu.c | 1 - + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/invlpgb.h | 103 +++++ + arch/x86/include/asm/mmu.h | 6 + + arch/x86/include/asm/mmu_context.h | 14 + + arch/x86/include/asm/msr-index.h | 2 + + arch/x86/include/asm/paravirt.h | 5 - + arch/x86/include/asm/paravirt_types.h | 2 - + arch/x86/include/asm/tlbbatch.h | 1 + + arch/x86/include/asm/tlbflush.h | 92 ++++- + arch/x86/kernel/cpu/amd.c | 12 + + arch/x86/kernel/kvm.c | 1 - + arch/x86/kernel/paravirt.c | 6 - + arch/x86/mm/pgtable.c | 16 +- + arch/x86/mm/tlb.c | 496 +++++++++++++++++++++++-- + arch/x86/xen/mmu_pv.c | 1 - + mm/memory.c | 1 - + mm/mmap.c | 2 - + mm/swap_state.c | 1 - + mm/vma.c | 2 - + tools/arch/x86/include/asm/msr-index.h | 2 + + 23 files changed, 695 insertions(+), 79 deletions(-) create mode 100644 arch/x86/include/asm/invlpgb.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index 9d7bd0ae48c4..e8743f8c9fd0 100644 +index ef6cfea9df73..1f824dcab4dc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig -@@ -274,7 +274,7 @@ config X86 +@@ -273,7 +273,7 @@ config X86 select HAVE_PCI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -42,6 +44,29 @@ index 9d7bd0ae48c4..e8743f8c9fd0 100644 select MMU_GATHER_MERGE_VMAS select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_REGS_AND_STACK_ACCESS_API +diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu +index 2a7279d80460..bacdc502903f 100644 +--- a/arch/x86/Kconfig.cpu ++++ b/arch/x86/Kconfig.cpu +@@ -395,6 +395,10 @@ config X86_VMX_FEATURE_NAMES + def_bool y + depends on IA32_FEAT_CTL + ++config X86_BROADCAST_TLB_FLUSH ++ def_bool y ++ depends on CPU_SUP_AMD ++ + menuconfig PROCESSOR_SELECT + bool "Supported processor vendors" if EXPERT + help +@@ -431,6 +435,7 @@ config CPU_SUP_CYRIX_32 + config CPU_SUP_AMD + default y + bool "Support AMD processors" if PROCESSOR_SELECT ++ select X86_BROADCAST_TLB_FLUSH + help + This enables detection, tunings and quirks for AMD processors + diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 1cc113200ff5..cbe6c71e17c1 100644 --- a/arch/x86/hyperv/mmu.c @@ -53,27 +78,28 @@ index 1cc113200ff5..cbe6c71e17c1 100644 - pv_ops.mmu.tlb_remove_table = tlb_remove_table; } diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 645aa360628d..742c138d011a 100644 +index 645aa360628d..989e4c9cad2e 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -338,6 +338,7 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ -+#define X86_FEATURE_INVLPGB (13*32+ 3) /* "invlpgb" INVLPGB instruction */ ++#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instruction supported. */ #define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ #define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ #define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ diff --git a/arch/x86/include/asm/invlpgb.h b/arch/x86/include/asm/invlpgb.h new file mode 100644 -index 000000000000..2669ebfffe81 +index 000000000000..418402535319 --- /dev/null +++ b/arch/x86/include/asm/invlpgb.h -@@ -0,0 +1,93 @@ +@@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_INVLPGB +#define _ASM_X86_INVLPGB + ++#include +#include + +/* @@ -85,21 +111,31 @@ index 000000000000..2669ebfffe81 + * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from + * this CPU have completed. + */ -+static inline void __invlpgb(unsigned long asid, unsigned long pcid, unsigned long addr, -+ int extra_count, bool pmd_stride, unsigned long flags) ++static inline void __invlpgb(unsigned long asid, unsigned long pcid, ++ unsigned long addr, u16 extra_count, ++ bool pmd_stride, unsigned long flags) +{ -+ u64 rax = addr | flags; -+ u32 ecx = (pmd_stride << 31) | extra_count; + u32 edx = (pcid << 16) | asid; ++ u32 ecx = (pmd_stride << 31) | extra_count; ++ u64 rax = addr | flags; + -+ asm volatile("invlpgb" : : "a" (rax), "c" (ecx), "d" (edx)); ++ /* INVLPGB; supported in binutils >= 2.36. */ ++ asm volatile(".byte 0x0f, 0x01, 0xfe" : : "a" (rax), "c" (ecx), "d" (edx)); ++} ++ ++/* Wait for INVLPGB originated by this CPU to complete. */ ++static inline void tlbsync(void) ++{ ++ cant_migrate(); ++ /* TLBSYNC: supported in binutils >= 0.36. */ ++ asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory"); +} + +/* + * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination + * of the three. For example: + * - INVLPGB_VA | INVLPGB_INCLUDE_GLOBAL: invalidate all TLB entries at the address -+ * - INVLPGB_PCID: invalidate all TLB entries matching the PCID ++ * - INVLPGB_PCID: invalidate all TLB entries matching the PCID + * + * The first can be used to invalidate (kernel) mappings at a particular + * address across all processes. @@ -118,22 +154,25 @@ index 000000000000..2669ebfffe81 + unsigned long addr) +{ + __invlpgb(0, pcid, addr, 0, 0, INVLPGB_PCID | INVLPGB_VA); ++ tlbsync(); +} + -+static inline void invlpgb_flush_user_nr(unsigned long pcid, unsigned long addr, -+ int nr, bool pmd_stride) ++static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid, ++ unsigned long addr, ++ u16 nr, ++ bool pmd_stride, ++ bool freed_tables) +{ -+ __invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA | INVLPGB_FINAL_ONLY); -+} ++ unsigned long flags = INVLPGB_PCID | INVLPGB_VA; + -+/* Flush all mappings for a given ASID, not including globals. */ -+static inline void invlpgb_flush_single_asid(unsigned long asid) -+{ -+ __invlpgb(asid, 0, 0, 0, 0, INVLPGB_ASID); ++ if (!freed_tables) ++ flags |= INVLPGB_FINAL_ONLY; ++ ++ __invlpgb(0, pcid, addr, nr - 1, pmd_stride, flags); +} + +/* Flush all mappings for a given PCID, not including globals. */ -+static inline void invlpgb_flush_single_pcid(unsigned long pcid) ++static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid) +{ + __invlpgb(0, pcid, 0, 0, 0, INVLPGB_PCID); +} @@ -142,10 +181,11 @@ index 000000000000..2669ebfffe81 +static inline void invlpgb_flush_all(void) +{ + __invlpgb(0, 0, 0, 0, 0, INVLPGB_INCLUDE_GLOBAL); ++ tlbsync(); +} + +/* Flush addr, including globals, for all PCIDs. */ -+static inline void invlpgb_flush_addr(unsigned long addr, int nr) ++static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr) +{ + __invlpgb(0, 0, addr, nr - 1, 0, INVLPGB_INCLUDE_GLOBAL); +} @@ -154,69 +194,86 @@ index 000000000000..2669ebfffe81 +static inline void invlpgb_flush_all_nonglobals(void) +{ + __invlpgb(0, 0, 0, 0, 0, 0); -+} -+ -+/* Wait for INVLPGB originated by this CPU to complete. */ -+static inline void tlbsync(void) -+{ -+ asm volatile("tlbsync"); ++ tlbsync(); +} + +#endif /* _ASM_X86_INVLPGB */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h -index ce4677b8b735..83d0986295d3 100644 +index ce4677b8b735..51f25d38de86 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h -@@ -46,6 +46,12 @@ typedef struct { - unsigned long flags; +@@ -67,6 +67,12 @@ typedef struct { + u16 pkey_allocation_map; + s16 execute_only_pkey; #endif - -+#ifdef CONFIG_CPU_SUP_AMD -+ struct list_head broadcast_asid_list; -+ u16 broadcast_asid; ++ ++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH ++ u16 global_asid; + bool asid_transition; +#endif + - #ifdef CONFIG_ADDRESS_MASKING - /* Active LAM mode: X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */ - unsigned long lam_cr3_mask; + } mm_context_t; + + #define INIT_MM_CONTEXT(mm) \ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h -index 2886cb668d7f..2c347b51d9b9 100644 +index 2886cb668d7f..65f50464b5c3 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -139,6 +139,8 @@ static inline void mm_reset_untag_mask(struct mm_struct *mm) #define enter_lazy_tlb enter_lazy_tlb extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); -+extern void destroy_context_free_broadcast_asid(struct mm_struct *mm); ++extern void destroy_context_free_global_asid(struct mm_struct *mm); + /* * Init a new mm. Used on mm copies, like at fork() * and on mm's that are brand-new, like at execve(). -@@ -160,6 +162,13 @@ static inline int init_new_context(struct task_struct *tsk, +@@ -160,6 +162,14 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.execute_only_pkey = -1; } #endif + -+#ifdef CONFIG_CPU_SUP_AMD -+ INIT_LIST_HEAD(&mm->context.broadcast_asid_list); -+ mm->context.broadcast_asid = 0; -+ mm->context.asid_transition = false; ++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH ++ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { ++ mm->context.global_asid = 0; ++ mm->context.asid_transition = false; ++ } +#endif + mm_reset_untag_mask(mm); init_new_context_ldt(mm); return 0; -@@ -169,6 +178,9 @@ static inline int init_new_context(struct task_struct *tsk, +@@ -169,6 +179,10 @@ static inline int init_new_context(struct task_struct *tsk, static inline void destroy_context(struct mm_struct *mm) { destroy_context_ldt(mm); -+#ifdef CONFIG_CPU_SUP_AMD -+ destroy_context_free_broadcast_asid(mm); ++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH ++ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) ++ destroy_context_free_global_asid(mm); +#endif } extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index 3ae84c3b8e6d..dc1c1057f26e 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -25,6 +25,7 @@ + #define _EFER_SVME 12 /* Enable virtualization */ + #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ + #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ ++#define _EFER_TCE 15 /* Enable Translation Cache Extensions */ + #define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */ + + #define EFER_SCE (1<<_EFER_SCE) +@@ -34,6 +35,7 @@ + #define EFER_SVME (1<<_EFER_SVME) + #define EFER_LMSLE (1<<_EFER_LMSLE) + #define EFER_FFXSR (1<<_EFER_FFXSR) ++#define EFER_TCE (1<<_EFER_TCE) + #define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) + + /* diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index d4eb9e1d61b8..794ba3647c6c 100644 --- a/arch/x86/include/asm/paravirt.h @@ -259,7 +316,7 @@ index 1ad56eb3e8a8..f9a17edf63ad 100644 #endif /* _ARCH_X86_TLBBATCH_H */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index 69e79fff41b8..a2f9b7370717 100644 +index 69e79fff41b8..5490ca71e27f 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -10,6 +10,7 @@ @@ -270,39 +327,100 @@ index 69e79fff41b8..a2f9b7370717 100644 #include #include #include -@@ -64,6 +65,23 @@ static inline void cr4_clear_bits(unsigned long mask) - */ - #define TLB_NR_DYN_ASIDS 6 - -+#ifdef CONFIG_CPU_SUP_AMD -+#define is_dyn_asid(asid) (asid) < TLB_NR_DYN_ASIDS -+#define is_broadcast_asid(asid) (asid) >= TLB_NR_DYN_ASIDS -+#define in_asid_transition(info) (info->mm && info->mm->context.asid_transition) -+#define mm_broadcast_asid(mm) (mm->context.broadcast_asid) +@@ -183,6 +184,13 @@ static inline void cr4_init_shadow(void) + extern unsigned long mmu_cr4_features; + extern u32 *trampoline_cr4_features; + ++/* How many pages can we invalidate with one INVLPGB. */ ++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH ++extern u16 invlpgb_count_max; +#else -+#define is_dyn_asid(asid) true -+#define is_broadcast_asid(asid) false -+#define in_asid_transition(info) false -+#define mm_broadcast_asid(mm) 0 ++#define invlpgb_count_max 1 ++#endif ++ + extern void initialize_tlbstate_and_flush(void); + + /* +@@ -230,6 +238,78 @@ void flush_tlb_one_kernel(unsigned long addr); + void flush_tlb_multi(const struct cpumask *cpumask, + const struct flush_tlb_info *info); + ++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH ++static inline bool is_dyn_asid(u16 asid) ++{ ++ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) ++ return true; ++ ++ return asid < TLB_NR_DYN_ASIDS; ++} ++ ++static inline bool is_global_asid(u16 asid) ++{ ++ return !is_dyn_asid(asid); ++} ++ ++static inline bool in_asid_transition(const struct flush_tlb_info *info) ++{ ++ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) ++ return false; ++ ++ return info->mm && READ_ONCE(info->mm->context.asid_transition); ++} + -+inline bool needs_broadcast_asid_reload(struct mm_struct *next, u16 prev_asid) ++static inline u16 mm_global_asid(struct mm_struct *mm) ++{ ++ u16 asid; ++ ++ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) ++ return 0; ++ ++ asid = READ_ONCE(mm->context.global_asid); ++ ++ /* mm->context.global_asid is either 0, or a global ASID */ ++ VM_WARN_ON_ONCE(is_dyn_asid(asid)); ++ ++ return asid; ++} ++#else ++static inline bool is_dyn_asid(u16 asid) ++{ ++ return true; ++} ++ ++static inline bool is_global_asid(u16 asid) ++{ ++ return false; ++} ++ ++static inline bool in_asid_transition(const struct flush_tlb_info *info) +{ + return false; +} ++ ++static inline u16 mm_global_asid(struct mm_struct *mm) ++{ ++ return 0; ++} ++ ++static inline bool needs_global_asid_reload(struct mm_struct *next, u16 prev_asid) ++{ ++ return false; ++} ++ ++static inline void broadcast_tlb_flush(struct flush_tlb_info *info) ++{ ++ VM_WARN_ON_ONCE(1); ++} ++ ++static inline void consider_global_asid(struct mm_struct *mm) ++{ ++} +#endif + - struct tlb_context { - u64 ctx_id; - u64 tlb_gen; -@@ -182,6 +200,7 @@ static inline void cr4_init_shadow(void) - - extern unsigned long mmu_cr4_features; - extern u32 *trampoline_cr4_features; -+extern u16 invlpgb_count_max; - - extern void initialize_tlbstate_and_flush(void); - -@@ -277,21 +296,15 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) + #ifdef CONFIG_PARAVIRT + #include + #endif +@@ -277,21 +357,15 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) return atomic64_inc_return(&mm->context.tlb_gen); } @@ -328,28 +446,38 @@ index 69e79fff41b8..a2f9b7370717 100644 static inline bool pte_flags_need_flush(unsigned long oldflags, unsigned long newflags, diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c -index 79d2e17f6582..4dc42705aaca 100644 +index 79d2e17f6582..21076252a491 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c -@@ -1135,6 +1135,22 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) - tlb_lli_2m[ENTRIES] = eax & mask; +@@ -29,6 +29,8 @@ - tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; + #include "cpu.h" + ++u16 invlpgb_count_max __ro_after_init; + -+ if (c->extended_cpuid_level < 0x80000008) -+ return; + static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) + { + u32 gprs[8] = { 0 }; +@@ -1069,6 +1071,10 @@ static void init_amd(struct cpuinfo_x86 *c) + + /* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */ + clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE); + -+ cpuid(0x80000008, &eax, &ebx, &ecx, &edx); ++ /* Enable Translation Cache Extension */ ++ if (cpu_feature_enabled(X86_FEATURE_TCE)) ++ msr_set_bit(MSR_EFER, _EFER_TCE); + } + + #ifdef CONFIG_X86_32 +@@ -1135,6 +1141,12 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) + tlb_lli_2m[ENTRIES] = eax & mask; + + tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; + + /* Max number of pages INVLPGB can invalidate in one shot */ -+ invlpgb_count_max = (edx & 0xffff) + 1; -+ -+ /* If supported, enable translation cache extensions (TCE) */ -+ cpuid(0x80000001, &eax, &ebx, &ecx, &edx); -+ if (ecx & BIT(17)) { -+ u64 msr = native_read_msr(MSR_EFER);; -+ msr |= BIT(15); -+ wrmsrl(MSR_EFER, msr); ++ if (boot_cpu_has(X86_FEATURE_INVLPGB)) { ++ cpuid(0x80000008, &eax, &ebx, &ecx, &edx); ++ invlpgb_count_max = (edx & 0xffff) + 1; + } } @@ -390,21 +518,6 @@ index fec381533555..c019771e0123 100644 .mmu.exit_mmap = paravirt_nop, .mmu.notify_page_enc_status_changed = paravirt_nop, -diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c -index f1fea506e20f..6c4d08f8f7b1 100644 ---- a/arch/x86/kernel/setup.c -+++ b/arch/x86/kernel/setup.c -@@ -138,6 +138,10 @@ __visible unsigned long mmu_cr4_features __ro_after_init; - __visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE; - #endif - -+#ifdef CONFIG_CPU_SUP_AMD -+u16 invlpgb_count_max __ro_after_init; -+#endif -+ - #ifdef CONFIG_IMA - static phys_addr_t ima_kexec_buffer_phys; - static size_t ima_kexec_buffer_size; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 5745a354a241..3dc4af1f7868 100644 --- a/arch/x86/mm/pgtable.c @@ -460,7 +573,7 @@ index 5745a354a241..3dc4af1f7868 100644 #endif /* CONFIG_PGTABLE_LEVELS > 4 */ #endif /* CONFIG_PGTABLE_LEVELS > 3 */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index a2becb85bea7..0080175153ef 100644 +index a2becb85bea7..6449ac701c88 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -74,13 +74,15 @@ @@ -482,120 +595,136 @@ index a2becb85bea7..0080175153ef 100644 * for KPTI each mm has two address spaces and thus needs two * PCID values, but we can still do with a single ASID denomination * for each mm. Corresponds to kPCID + 2048. -@@ -225,6 +227,18 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, +@@ -225,6 +227,20 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, return; } + /* -+ * TLB consistency for this ASID is maintained with INVLPGB; -+ * TLB flushes happen even while the process isn't running. ++ * TLB consistency for global ASIDs is maintained with broadcast TLB ++ * flushing. The TLB is never outdated, and does not need flushing. + */ -+#ifdef CONFIG_CPU_SUP_AMD -+ if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_broadcast_asid(next)) { -+ *new_asid = mm_broadcast_asid(next); -+ *need_flush = false; -+ return; ++ if (IS_ENABLED(CONFIG_X86_BROADCAST_TLB_FLUSH) && static_cpu_has(X86_FEATURE_INVLPGB)) { ++ u16 global_asid = mm_global_asid(next); ++ ++ if (global_asid) { ++ *new_asid = global_asid; ++ *need_flush = false; ++ return; ++ } + } -+#endif + if (this_cpu_read(cpu_tlbstate.invalidate_other)) clear_asid_other(); -@@ -251,6 +265,245 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, +@@ -251,6 +267,290 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, *need_flush = true; } -+#ifdef CONFIG_CPU_SUP_AMD ++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH +/* -+ * Logic for AMD INVLPGB support. ++ * Logic for broadcast TLB invalidation. + */ -+static DEFINE_RAW_SPINLOCK(broadcast_asid_lock); -+static u16 last_broadcast_asid = TLB_NR_DYN_ASIDS; -+static DECLARE_BITMAP(broadcast_asid_used, MAX_ASID_AVAILABLE) = { 0 }; -+static LIST_HEAD(broadcast_asid_list); -+static int broadcast_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1; ++static DEFINE_RAW_SPINLOCK(global_asid_lock); ++static u16 last_global_asid = MAX_ASID_AVAILABLE; ++static DECLARE_BITMAP(global_asid_used, MAX_ASID_AVAILABLE) = { 0 }; ++static DECLARE_BITMAP(global_asid_freed, MAX_ASID_AVAILABLE) = { 0 }; ++static int global_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1; + -+static void reset_broadcast_asid_space(void) ++static void reset_global_asid_space(void) +{ -+ mm_context_t *context; -+ -+ lockdep_assert_held(&broadcast_asid_lock); ++ lockdep_assert_held(&global_asid_lock); + + /* -+ * Flush once when we wrap around the ASID space, so we won't need -+ * to flush every time we allocate an ASID for boradcast flushing. ++ * A global TLB flush guarantees that any stale entries from ++ * previously freed global ASIDs get flushed from the TLB ++ * everywhere, making these global ASIDs safe to reuse. + */ + invlpgb_flush_all_nonglobals(); -+ tlbsync(); + + /* -+ * Leave the currently used broadcast ASIDs set in the bitmap, since -+ * those cannot be reused before the next wraparound and flush.. ++ * Clear all the previously freed global ASIDs from the ++ * broadcast_asid_used bitmap, now that the global TLB flush ++ * has made them actually available for re-use. + */ -+ bitmap_clear(broadcast_asid_used, 0, MAX_ASID_AVAILABLE); -+ list_for_each_entry(context, &broadcast_asid_list, broadcast_asid_list) -+ __set_bit(context->broadcast_asid, broadcast_asid_used); ++ bitmap_andnot(global_asid_used, global_asid_used, ++ global_asid_freed, MAX_ASID_AVAILABLE); ++ bitmap_clear(global_asid_freed, 0, MAX_ASID_AVAILABLE); + -+ last_broadcast_asid = TLB_NR_DYN_ASIDS; ++ /* ++ * ASIDs 0-TLB_NR_DYN_ASIDS are used for CPU-local ASID ++ * assignments, for tasks doing IPI based TLB shootdowns. ++ * Restart the search from the start of the global ASID space. ++ */ ++ last_global_asid = TLB_NR_DYN_ASIDS; +} + -+static u16 get_broadcast_asid(void) ++static u16 get_global_asid(void) +{ -+ lockdep_assert_held(&broadcast_asid_lock); ++ lockdep_assert_held(&global_asid_lock); + + do { -+ u16 start = last_broadcast_asid; -+ u16 asid = find_next_zero_bit(broadcast_asid_used, MAX_ASID_AVAILABLE, start); ++ u16 start = last_global_asid; ++ u16 asid = find_next_zero_bit(global_asid_used, MAX_ASID_AVAILABLE, start); + + if (asid >= MAX_ASID_AVAILABLE) { -+ reset_broadcast_asid_space(); ++ reset_global_asid_space(); + continue; + } + -+ /* Try claiming this broadcast ASID. */ -+ if (!test_and_set_bit(asid, broadcast_asid_used)) { -+ last_broadcast_asid = asid; -+ return asid; -+ } ++ /* Claim this global ASID. */ ++ __set_bit(asid, global_asid_used); ++ last_global_asid = asid; ++ global_asid_available--; ++ return asid; + } while (1); +} + +/* -+ * Returns true if the mm is transitioning from a CPU-local ASID to a broadcast ++ * Returns true if the mm is transitioning from a CPU-local ASID to a global + * (INVLPGB) ASID, or the other way around. + */ -+static bool needs_broadcast_asid_reload(struct mm_struct *next, u16 prev_asid) ++static bool needs_global_asid_reload(struct mm_struct *next, u16 prev_asid) +{ -+ u16 broadcast_asid = mm_broadcast_asid(next); ++ u16 global_asid = mm_global_asid(next); + -+ if (broadcast_asid && prev_asid != broadcast_asid) ++ if (global_asid && prev_asid != global_asid) + return true; + -+ if (!broadcast_asid && is_broadcast_asid(prev_asid)) ++ if (!global_asid && is_global_asid(prev_asid)) + return true; + + return false; +} + -+void destroy_context_free_broadcast_asid(struct mm_struct *mm) ++void destroy_context_free_global_asid(struct mm_struct *mm) +{ -+ if (!mm->context.broadcast_asid) ++ if (!mm->context.global_asid) + return; + -+ guard(raw_spinlock_irqsave)(&broadcast_asid_lock); -+ mm->context.broadcast_asid = 0; -+ list_del(&mm->context.broadcast_asid_list); -+ broadcast_asid_available++; ++ guard(raw_spinlock_irqsave)(&global_asid_lock); ++ ++ /* The global ASID can be re-used only after flush at wrap-around. */ ++ __set_bit(mm->context.global_asid, global_asid_freed); ++ ++ mm->context.global_asid = 0; ++ global_asid_available++; +} + ++/* ++ * Check whether a process is currently active on more than "threshold" CPUs. ++ * This is a cheap estimation on whether or not it may make sense to assign ++ * a global ASID to this process, and use broadcast TLB invalidation. ++ */ +static bool mm_active_cpus_exceeds(struct mm_struct *mm, int threshold) +{ + int count = 0; + int cpu; + ++ /* This quick check should eliminate most single threaded programs. */ + if (cpumask_weight(mm_cpumask(mm)) <= threshold) + return false; + ++ /* Slower check to make sure. */ + for_each_cpu(cpu, mm_cpumask(mm)) { + /* Skip the CPUs that aren't really running this process. */ + if (per_cpu(cpu_tlbstate.loaded_mm, cpu) != mm) @@ -611,40 +740,56 @@ index a2becb85bea7..0080175153ef 100644 +} + +/* -+ * Assign a broadcast ASID to the current process, protecting against ++ * Assign a global ASID to the current process, protecting against + * races between multiple threads in the process. + */ -+static void use_broadcast_asid(struct mm_struct *mm) ++static void use_global_asid(struct mm_struct *mm) +{ -+ guard(raw_spinlock_irqsave)(&broadcast_asid_lock); ++ guard(raw_spinlock_irqsave)(&global_asid_lock); + + /* This process is already using broadcast TLB invalidation. */ -+ if (mm->context.broadcast_asid) ++ if (mm->context.global_asid) ++ return; ++ ++ /* The last global ASID was consumed while waiting for the lock. */ ++ if (!global_asid_available) + return; + -+ mm->context.broadcast_asid = get_broadcast_asid(); -+ mm->context.asid_transition = true; -+ list_add(&mm->context.broadcast_asid_list, &broadcast_asid_list); -+ broadcast_asid_available--; ++ /* ++ * The transition from IPI TLB flushing, with a dynamic ASID, ++ * and broadcast TLB flushing, using a global ASID, uses memory ++ * ordering for synchronization. ++ * ++ * While the process has threads still using a dynamic ASID, ++ * TLB invalidation IPIs continue to get sent. ++ * ++ * This code sets asid_transition first, before assigning the ++ * global ASID. ++ * ++ * The TLB flush code will only verify the ASID transition ++ * after it has seen the new global ASID for the process. ++ */ ++ WRITE_ONCE(mm->context.asid_transition, true); ++ WRITE_ONCE(mm->context.global_asid, get_global_asid()); +} + +/* -+ * Figure out whether to assign a broadcast (global) ASID to a process. -+ * We vary the threshold by how empty or full broadcast ASID space is. ++ * Figure out whether to assign a global ASID to a process. ++ * We vary the threshold by how empty or full global ASID space is. + * 1/4 full: >= 4 active threads + * 1/2 full: >= 8 active threads + * 3/4 full: >= 16 active threads + * 7/8 full: >= 32 active threads + * etc + * -+ * This way we should never exhaust the broadcast ASID space, even on very ++ * This way we should never exhaust the global ASID space, even on very + * large systems, and the processes with the largest number of active + * threads should be able to use broadcast TLB invalidation. + */ +#define HALFFULL_THRESHOLD 8 -+static bool meets_broadcast_asid_threshold(struct mm_struct *mm) ++static bool meets_global_asid_threshold(struct mm_struct *mm) +{ -+ int avail = broadcast_asid_available; ++ int avail = global_asid_available; + int threshold = HALFFULL_THRESHOLD; + + if (!avail) @@ -664,7 +809,7 @@ index a2becb85bea7..0080175153ef 100644 + return mm_active_cpus_exceeds(mm, threshold); +} + -+static void count_tlb_flush(struct mm_struct *mm) ++static void consider_global_asid(struct mm_struct *mm) +{ + if (!static_cpu_has(X86_FEATURE_INVLPGB)) + return; @@ -673,43 +818,54 @@ index a2becb85bea7..0080175153ef 100644 + if ((current->pid & 0x1f) != (jiffies & 0x1f)) + return; + -+ if (meets_broadcast_asid_threshold(mm)) -+ use_broadcast_asid(mm); ++ if (meets_global_asid_threshold(mm)) ++ use_global_asid(mm); +} + +static void finish_asid_transition(struct flush_tlb_info *info) +{ + struct mm_struct *mm = info->mm; -+ int bc_asid = mm_broadcast_asid(mm); ++ int bc_asid = mm_global_asid(mm); + int cpu; + -+ if (!mm->context.asid_transition) ++ if (!READ_ONCE(mm->context.asid_transition)) + return; + + for_each_cpu(cpu, mm_cpumask(mm)) { ++ /* ++ * The remote CPU is context switching. Wait for that to ++ * finish, to catch the unlikely case of it switching to ++ * the target mm with an out of date ASID. ++ */ ++ while (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) == LOADED_MM_SWITCHING) ++ cpu_relax(); ++ + if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) != mm) + continue; + + /* -+ * If at least one CPU is not using the broadcast ASID yet, ++ * If at least one CPU is not using the global ASID yet, + * send a TLB flush IPI. The IPI should cause stragglers + * to transition soon. ++ * ++ * This can race with the CPU switching to another task; ++ * that results in a (harmless) extra IPI. + */ -+ if (per_cpu(cpu_tlbstate.loaded_mm_asid, cpu) != bc_asid) { ++ if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm_asid, cpu)) != bc_asid) { + flush_tlb_multi(mm_cpumask(info->mm), info); + return; + } + } + -+ /* All the CPUs running this process are using the broadcast ASID. */ -+ mm->context.asid_transition = 0; ++ /* All the CPUs running this process are using the global ASID. */ ++ WRITE_ONCE(mm->context.asid_transition, false); +} + +static void broadcast_tlb_flush(struct flush_tlb_info *info) +{ + bool pmd = info->stride_shift == PMD_SHIFT; + unsigned long maxnr = invlpgb_count_max; -+ unsigned long asid = info->mm->context.broadcast_asid; ++ unsigned long asid = info->mm->context.global_asid; + unsigned long addr = info->start; + unsigned long nr; + @@ -717,12 +873,17 @@ index a2becb85bea7..0080175153ef 100644 + if (info->stride_shift > PMD_SHIFT) + maxnr = 1; + -+ if (info->end == TLB_FLUSH_ALL || info->freed_tables) { -+ invlpgb_flush_single_pcid(kern_pcid(asid)); ++ /* ++ * TLB flushes with INVLPGB are kicked off asynchronously. ++ * The inc_mm_tlb_gen() guarantees page table updates are done ++ * before these TLB flushes happen. ++ */ ++ if (info->end == TLB_FLUSH_ALL) { ++ invlpgb_flush_single_pcid_nosync(kern_pcid(asid)); + /* Do any CPUs supporting INVLPGB need PTI? */ + if (static_cpu_has(X86_FEATURE_PTI)) -+ invlpgb_flush_single_pcid(user_pcid(asid)); -+ } else do { ++ invlpgb_flush_single_pcid_nosync(user_pcid(asid)); ++ } else for (; addr < info->end; addr += nr << info->stride_shift) { + /* + * Calculate how many pages can be flushed at once; if the + * remainder of the range is less than one page, flush one. @@ -730,43 +891,42 @@ index a2becb85bea7..0080175153ef 100644 + nr = min(maxnr, (info->end - addr) >> info->stride_shift); + nr = max(nr, 1); + -+ invlpgb_flush_user_nr(kern_pcid(asid), addr, nr, pmd); ++ invlpgb_flush_user_nr_nosync(kern_pcid(asid), addr, nr, pmd, info->freed_tables); + /* Do any CPUs supporting INVLPGB need PTI? */ + if (static_cpu_has(X86_FEATURE_PTI)) -+ invlpgb_flush_user_nr(user_pcid(asid), addr, nr, pmd); -+ addr += nr << info->stride_shift; -+ } while (addr < info->end); ++ invlpgb_flush_user_nr_nosync(user_pcid(asid), addr, nr, pmd, info->freed_tables); ++ } + + finish_asid_transition(info); + + /* Wait for the INVLPGBs kicked off above to finish. */ + tlbsync(); +} -+#endif /* CONFIG_CPU_SUP_AMD */ ++#endif /* CONFIG_X86_BROADCAST_TLB_FLUSH */ + /* * Given an ASID, flush the corresponding user ASID. We can delay this * until the next time we switch to it. -@@ -556,8 +809,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, +@@ -556,8 +856,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, */ if (prev == next) { /* Not actually switching mm's */ - VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != - next->context.ctx_id); -+ if (is_dyn_asid(prev_asid)) -+ VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != -+ next->context.ctx_id); ++ VM_WARN_ON(is_dyn_asid(prev_asid) && ++ this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != ++ next->context.ctx_id); /* * If this races with another thread that enables lam, 'new_lam' -@@ -573,6 +827,23 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, +@@ -573,6 +874,23 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, !cpumask_test_cpu(cpu, mm_cpumask(next)))) cpumask_set_cpu(cpu, mm_cpumask(next)); + /* + * Check if the current mm is transitioning to a new ASID. + */ -+ if (needs_broadcast_asid_reload(next, prev_asid)) { ++ if (needs_global_asid_reload(next, prev_asid)) { + next_tlb_gen = atomic64_read(&next->context.tlb_gen); + + choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); @@ -777,24 +937,44 @@ index a2becb85bea7..0080175153ef 100644 + * Broadcast TLB invalidation keeps this PCID up to date + * all the time. + */ -+ if (is_broadcast_asid(prev_asid)) ++ if (is_global_asid(prev_asid)) + return; + /* * If the CPU is not in lazy TLB mode, we are just switching * from one thread in a process to another thread in the same -@@ -629,8 +900,10 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, - barrier(); +@@ -606,6 +924,13 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, + */ + cond_mitigation(tsk); + ++ /* ++ * Let nmi_uaccess_okay() and finish_asid_transition() ++ * know that we're changing CR3. ++ */ ++ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); ++ barrier(); ++ + /* + * Stop remote flushes for the previous mm. + * Skip kernel threads; we never send init_mm TLB flushing IPIs, +@@ -623,14 +948,12 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, + next_tlb_gen = atomic64_read(&next->context.tlb_gen); + + choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); +- +- /* Let nmi_uaccess_okay() know that we're changing CR3. */ +- this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); +- barrier(); } +reload_tlb: new_lam = mm_lam_cr3_mask(next); if (need_flush) { -+ VM_BUG_ON(is_broadcast_asid(new_asid)); ++ VM_WARN_ON_ONCE(is_global_asid(new_asid)); this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); load_new_mm_cr3(next->pgd, new_asid, new_lam, true); -@@ -749,7 +1022,7 @@ static void flush_tlb_func(void *info) +@@ -749,7 +1072,7 @@ static void flush_tlb_func(void *info) const struct flush_tlb_info *f = info; struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); @@ -803,24 +983,24 @@ index a2becb85bea7..0080175153ef 100644 bool local = smp_processor_id() == f->initiating_cpu; unsigned long nr_invalidate = 0; u64 mm_tlb_gen; -@@ -769,6 +1042,16 @@ static void flush_tlb_func(void *info) +@@ -769,6 +1092,16 @@ static void flush_tlb_func(void *info) if (unlikely(loaded_mm == &init_mm)) return; -+ /* Reload the ASID if transitioning into or out of a broadcast ASID */ -+ if (needs_broadcast_asid_reload(loaded_mm, loaded_mm_asid)) { ++ /* Reload the ASID if transitioning into or out of a global ASID */ ++ if (needs_global_asid_reload(loaded_mm, loaded_mm_asid)) { + switch_mm_irqs_off(NULL, loaded_mm, NULL); + loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); + } + + /* Broadcast ASIDs are always kept up to date with INVLPGB. */ -+ if (is_broadcast_asid(loaded_mm_asid)) ++ if (is_global_asid(loaded_mm_asid)) + return; + VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) != loaded_mm->context.ctx_id); -@@ -786,6 +1069,8 @@ static void flush_tlb_func(void *info) +@@ -786,6 +1119,8 @@ static void flush_tlb_func(void *info) return; } @@ -829,32 +1009,7 @@ index a2becb85bea7..0080175153ef 100644 if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID && f->new_tlb_gen <= local_tlb_gen)) { /* -@@ -825,7 +1110,7 @@ static void flush_tlb_func(void *info) - * - * The only question is whether to do a full or partial flush. - * -- * We do a partial flush if requested and two extra conditions -+ * We do a partial flush if requested and three extra conditions - * are met: - * - * 1. f->new_tlb_gen == local_tlb_gen + 1. We have an invariant that -@@ -852,10 +1137,14 @@ static void flush_tlb_func(void *info) - * date. By doing a full flush instead, we can increase - * local_tlb_gen all the way to mm_tlb_gen and we can probably - * avoid another flush in the very near future. -+ * -+ * 3. No page tables were freed. If page tables were freed, a full -+ * flush ensures intermediate translations in the TLB get flushed. - */ - if (f->end != TLB_FLUSH_ALL && - f->new_tlb_gen == local_tlb_gen + 1 && -- f->new_tlb_gen == mm_tlb_gen) { -+ f->new_tlb_gen == mm_tlb_gen && -+ !f->freed_tables) { - /* Partial flush */ - unsigned long addr = f->start; - -@@ -926,7 +1215,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, +@@ -926,7 +1261,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, * up on the new contents of what used to be page tables, while * doing a speculative memory access. */ @@ -863,102 +1018,155 @@ index a2becb85bea7..0080175153ef 100644 on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true); else on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func, -@@ -998,14 +1287,18 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, +@@ -981,6 +1316,15 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm, + info->new_tlb_gen = new_tlb_gen; + info->initiating_cpu = smp_processor_id(); + ++ /* ++ * If the number of flushes is so large that a full flush ++ * would be faster, do a full flush. ++ */ ++ if ((end - start) >> stride_shift > tlb_single_page_flush_ceiling) { ++ info->start = 0; ++ info->end = TLB_FLUSH_ALL; ++ } ++ + return info; + } + +@@ -998,17 +1342,8 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, bool freed_tables) { struct flush_tlb_info *info; -+ unsigned long threshold = tlb_single_page_flush_ceiling; ++ int cpu = get_cpu(); u64 new_tlb_gen; - int cpu; - -+ if (static_cpu_has(X86_FEATURE_INVLPGB)) -+ threshold *= invlpgb_count_max; -+ - cpu = get_cpu(); - - /* Should we flush just the requested range? */ - if ((end == TLB_FLUSH_ALL) || +- int cpu; +- +- cpu = get_cpu(); +- +- /* Should we flush just the requested range? */ +- if ((end == TLB_FLUSH_ALL) || - ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) { -+ ((end - start) >> stride_shift) > threshold) { - start = 0; - end = TLB_FLUSH_ALL; - } -@@ -1021,8 +1314,11 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, +- start = 0; +- end = TLB_FLUSH_ALL; +- } + + /* This is also a barrier that synchronizes with switch_mm(). */ + new_tlb_gen = inc_mm_tlb_gen(mm); +@@ -1021,8 +1356,11 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, * a local TLB flush is needed. Optimize this use-case by calling * flush_tlb_func_local() directly in this case. */ - if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { -+ if (IS_ENABLED(CONFIG_CPU_SUP_AMD) && mm_broadcast_asid(mm)) { ++ if (mm_global_asid(mm)) { + broadcast_tlb_flush(info); + } else if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { flush_tlb_multi(mm_cpumask(mm), info); -+ count_tlb_flush(mm); ++ consider_global_asid(mm); } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) { lockdep_assert_irqs_enabled(); local_irq_disable(); -@@ -1045,9 +1341,41 @@ static void do_flush_tlb_all(void *info) +@@ -1036,6 +1374,19 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + } + + ++static bool broadcast_flush_tlb_all(void) ++{ ++ if (!IS_ENABLED(CONFIG_X86_BROADCAST_TLB_FLUSH)) ++ return false; ++ ++ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) ++ return false; ++ ++ guard(preempt)(); ++ invlpgb_flush_all(); ++ return true; ++} ++ + static void do_flush_tlb_all(void *info) + { + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); +@@ -1044,10 +1395,36 @@ static void do_flush_tlb_all(void *info) + void flush_tlb_all(void) { - count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); -+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { -+ guard(preempt)(); -+ invlpgb_flush_all(); -+ tlbsync(); ++ if (broadcast_flush_tlb_all()) + return; -+ } + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); on_each_cpu(do_flush_tlb_all, NULL, 1); } -+static void broadcast_kernel_range_flush(unsigned long start, unsigned long end) ++static bool broadcast_kernel_range_flush(struct flush_tlb_info *info) +{ + unsigned long addr; -+ unsigned long maxnr = invlpgb_count_max; -+ unsigned long threshold = tlb_single_page_flush_ceiling * maxnr; ++ unsigned long nr; + -+ /* -+ * TLBSYNC only waits for flushes originating on the same CPU. -+ * Disabling migration allows us to wait on all flushes. -+ */ -+ guard(preempt)(); ++ if (!IS_ENABLED(CONFIG_X86_BROADCAST_TLB_FLUSH)) ++ return false; ++ ++ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) ++ return false; + -+ if (end == TLB_FLUSH_ALL || -+ (end - start) > threshold << PAGE_SHIFT) { ++ if (info->end == TLB_FLUSH_ALL) { + invlpgb_flush_all(); -+ } else { -+ unsigned long nr; -+ for (addr = start; addr < end; addr += nr << PAGE_SHIFT) { -+ nr = min((end - addr) >> PAGE_SHIFT, maxnr); -+ invlpgb_flush_addr(addr, nr); -+ } ++ return true; + } + ++ for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) { ++ nr = min((info->end - addr) >> PAGE_SHIFT, invlpgb_count_max); ++ invlpgb_flush_addr_nosync(addr, nr); ++ } + tlbsync(); ++ return true; +} + static void do_kernel_range_flush(void *info) { struct flush_tlb_info *f = info; -@@ -1060,6 +1388,11 @@ static void do_kernel_range_flush(void *info) +@@ -1060,22 +1437,21 @@ static void do_kernel_range_flush(void *info) void flush_tlb_kernel_range(unsigned long start, unsigned long end) { -+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { -+ broadcast_kernel_range_flush(start, end); -+ return; -+ } -+ - /* Balance as user space task's flush, a bit conservative */ - if (end == TLB_FLUSH_ALL || - (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) { -@@ -1244,7 +1577,6 @@ EXPORT_SYMBOL_GPL(__flush_tlb_all); - void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) - { - struct flush_tlb_info *info; -- +- /* Balance as user space task's flush, a bit conservative */ +- if (end == TLB_FLUSH_ALL || +- (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) { +- on_each_cpu(do_flush_tlb_all, NULL, 1); +- } else { +- struct flush_tlb_info *info; ++ struct flush_tlb_info *info; + +- preempt_disable(); +- info = get_flush_tlb_info(NULL, start, end, 0, false, +- TLB_GENERATION_INVALID); ++ guard(preempt)(); + ++ info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false, ++ TLB_GENERATION_INVALID); ++ ++ if (broadcast_kernel_range_flush(info)) ++ ; /* Fall through. */ ++ else if (info->end == TLB_FLUSH_ALL) ++ on_each_cpu(do_flush_tlb_all, NULL, 1); ++ else + on_each_cpu(do_kernel_range_flush, info, 1); + +- put_flush_tlb_info(); +- preempt_enable(); +- } ++ put_flush_tlb_info(); + } + + /* +@@ -1247,7 +1623,7 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) + int cpu = get_cpu(); - info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, -@@ -1263,12 +1595,49 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) +- info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, ++ info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, PAGE_SHIFT, false, + TLB_GENERATION_INVALID); + /* + * flush_tlb_multi() is not optimized for the common case in which only +@@ -1263,12 +1639,62 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) local_irq_enable(); } @@ -983,8 +1191,8 @@ index a2becb85bea7..0080175153ef 100644 + struct mm_struct *mm, + unsigned long uaddr) +{ -+ if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_broadcast_asid(mm)) { -+ u16 asid = mm_broadcast_asid(mm); ++ if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_global_asid(mm)) { ++ u16 asid = mm_global_asid(mm); + /* + * Queue up an asynchronous invalidation. The corresponding + * TLBSYNC is done in arch_tlbbatch_flush(), and must be done @@ -994,11 +1202,24 @@ index a2becb85bea7..0080175153ef 100644 + batch->used_invlpgb = true; + migrate_disable(); + } -+ invlpgb_flush_user_nr(kern_pcid(asid), uaddr, 1, 0); ++ invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false, false); + /* Do any CPUs supporting INVLPGB need PTI? */ + if (static_cpu_has(X86_FEATURE_PTI)) -+ invlpgb_flush_user_nr(user_pcid(asid), uaddr, 1, 0); ++ invlpgb_flush_user_nr_nosync(user_pcid(asid), uaddr, 1, false, false); ++ ++ /* ++ * Some CPUs might still be using a local ASID for this ++ * process, and require IPIs, while others are using the ++ * global ASID. ++ * ++ * In this corner case we need to do both the broadcast ++ * TLB invalidation, and send IPIs. The IPIs will help ++ * stragglers transition to the broadcast ASID. ++ */ ++ if (READ_ONCE(mm->context.asid_transition)) ++ goto also_send_ipi; + } else { ++also_send_ipi: + inc_mm_tlb_gen(mm); + cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); + } @@ -1084,6 +1305,26 @@ index bb2119e5a0d0..a593d5edfd88 100644 tlb_gather_mmu(&tlb, vms->vma->vm_mm); update_hiwater_rss(vms->vma->vm_mm); unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end, +diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h +index 3ae84c3b8e6d..dc1c1057f26e 100644 +--- a/tools/arch/x86/include/asm/msr-index.h ++++ b/tools/arch/x86/include/asm/msr-index.h +@@ -25,6 +25,7 @@ + #define _EFER_SVME 12 /* Enable virtualization */ + #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ + #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ ++#define _EFER_TCE 15 /* Enable Translation Cache Extensions */ + #define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */ + + #define EFER_SCE (1<<_EFER_SCE) +@@ -34,6 +35,7 @@ + #define EFER_SVME (1<<_EFER_SVME) + #define EFER_LMSLE (1<<_EFER_LMSLE) + #define EFER_FFXSR (1<<_EFER_FFXSR) ++#define EFER_TCE (1<<_EFER_TCE) + #define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) + + /* -- 2.48.0.rc1 diff --git a/6.13/0003-bbr3.patch b/6.13/0003-bbr3.patch index 75d9ec9b..8967721b 100644 --- a/6.13/0003-bbr3.patch +++ b/6.13/0003-bbr3.patch @@ -1,6 +1,6 @@ -From 9341991abd224336e551e90c7179e2e221fdf466 Mon Sep 17 00:00:00 2001 +From 1fc2e15c0c690b276928953ff73277b4d66e67f3 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:37:35 +0100 +Date: Mon, 20 Jan 2025 13:21:45 +0100 Subject: [PATCH 03/12] bbr3 Signed-off-by: Peter Jung diff --git a/6.13/0004-cachy.patch b/6.13/0004-cachy.patch index 77d9cf11..fa60f8b7 100644 --- a/6.13/0004-cachy.patch +++ b/6.13/0004-cachy.patch @@ -1,6 +1,6 @@ -From 32089eb0a217a8d425f387e5e613d498ad760f34 Mon Sep 17 00:00:00 2001 +From e01619bda1e69eea53c0f3ef61476fb02da06868 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:37:49 +0100 +Date: Mon, 20 Jan 2025 13:21:55 +0100 Subject: [PATCH 04/12] cachy Signed-off-by: Peter Jung @@ -19,12 +19,23 @@ Signed-off-by: Peter Jung drivers/cpufreq/Kconfig.x86 | 2 - drivers/cpufreq/intel_pstate.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 44 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 + + drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c | 19 + + drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 1 + drivers/gpu/drm/amd/display/Kconfig | 6 + - .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- + .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 69 +- + .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 7 + .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 2 +- .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 6 +- .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 6 +- + .../drm/amd/display/dc/bios/bios_parser2.c | 13 +- + .../drm/amd/display/dc/core/dc_link_exports.c | 6 + + drivers/gpu/drm/amd/display/dc/dc.h | 3 + + .../dc/resource/dce120/dce120_resource.c | 17 + drivers/gpu/drm/amd/pm/amdgpu_pm.c | 3 + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 14 +- drivers/input/evdev.c | 19 +- @@ -66,7 +77,7 @@ Signed-off-by: Peter Jung mm/vmpressure.c | 4 + mm/vmscan.c | 143 + net/ipv4/inet_connection_sock.c | 2 +- - 61 files changed, 6557 insertions(+), 65 deletions(-) + 72 files changed, 6714 insertions(+), 93 deletions(-) create mode 100644 drivers/media/v4l2-core/v4l2loopback.c create mode 100644 drivers/media/v4l2-core/v4l2loopback.h create mode 100644 drivers/media/v4l2-core/v4l2loopback_formats.h @@ -203,7 +214,7 @@ index f48eaa98d22d..fc777c14cff6 100644 unprivileged_userfaultfd ======================== diff --git a/Makefile b/Makefile -index e20a62ad397f..9a63ab456ffc 100644 +index b9464c88ac72..ea555e6a8bf1 100644 --- a/Makefile +++ b/Makefile @@ -860,11 +860,19 @@ KBUILD_CFLAGS += -fno-delete-null-pointer-checks @@ -227,7 +238,7 @@ index e20a62ad397f..9a63ab456ffc 100644 # depends on `opt-level` and `debug-assertions`, respectively. KBUILD_RUSTFLAGS += -Cdebug-assertions=$(if $(CONFIG_RUST_DEBUG_ASSERTIONS),y,n) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu -index 2a7279d80460..301ced02b077 100644 +index bacdc502903f..f2c97bdcef58 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -155,9 +155,8 @@ config MPENTIUM4 @@ -1070,6 +1081,118 @@ index 4653a8d2823a..6590e83dfbf0 100644 extern int amdgpu_vis_vram_limit; extern int amdgpu_gart_size; extern int amdgpu_gtt_size; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +index 093141ad6ed0..e476e45b996a 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +@@ -36,13 +36,6 @@ + #include "atombios_encoders.h" + #include "bif/bif_4_1_d.h" + +-static void amdgpu_atombios_lookup_i2c_gpio_quirks(struct amdgpu_device *adev, +- ATOM_GPIO_I2C_ASSIGMENT *gpio, +- u8 index) +-{ +- +-} +- + static struct amdgpu_i2c_bus_rec amdgpu_atombios_get_bus_rec_for_i2c_gpio(ATOM_GPIO_I2C_ASSIGMENT *gpio) + { + struct amdgpu_i2c_bus_rec i2c; +@@ -108,9 +101,6 @@ struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device * + + gpio = &i2c_info->asGPIO_Info[0]; + for (i = 0; i < num_indices; i++) { +- +- amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i); +- + if (gpio->sucI2cId.ucAccess == id) { + i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio); + break; +@@ -142,8 +132,6 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev) + + gpio = &i2c_info->asGPIO_Info[0]; + for (i = 0; i < num_indices; i++) { +- amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i); +- + i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio); + + if (i2c.valid) { +@@ -156,6 +144,38 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev) + } + } + ++void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id) ++{ ++ struct atom_context *ctx = adev->mode_info.atom_context; ++ ATOM_GPIO_I2C_ASSIGMENT *gpio; ++ struct amdgpu_i2c_bus_rec i2c; ++ int index = GetIndexIntoMasterTable(DATA, GPIO_I2C_Info); ++ struct _ATOM_GPIO_I2C_INFO *i2c_info; ++ uint16_t data_offset, size; ++ int i, num_indices; ++ char stmp[32]; ++ ++ if (amdgpu_atom_parse_data_header(ctx, index, &size, NULL, NULL, &data_offset)) { ++ i2c_info = (struct _ATOM_GPIO_I2C_INFO *)(ctx->bios + data_offset); ++ ++ num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / ++ sizeof(ATOM_GPIO_I2C_ASSIGMENT); ++ ++ gpio = &i2c_info->asGPIO_Info[0]; ++ for (i = 0; i < num_indices; i++) { ++ i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio); ++ ++ if (i2c.valid && i2c.i2c_id == i2c_id) { ++ sprintf(stmp, "OEM 0x%x", i2c.i2c_id); ++ adev->i2c_bus[i] = amdgpu_i2c_create(adev_to_drm(adev), &i2c, stmp); ++ break; ++ } ++ gpio = (ATOM_GPIO_I2C_ASSIGMENT *) ++ ((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT)); ++ } ++ } ++} ++ + struct amdgpu_gpio_rec + amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev, + u8 id) +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +index 0e16432d9a72..867bc5c5ce67 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +@@ -136,6 +136,7 @@ amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev, + struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *adev, + uint8_t id); + void amdgpu_atombios_i2c_init(struct amdgpu_device *adev); ++void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id); + + bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev); + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +index cd4fac120834..1ab433d774cc 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -4461,8 +4461,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, + goto failed; + } + /* init i2c buses */ +- if (!amdgpu_device_has_dc_support(adev)) +- amdgpu_atombios_i2c_init(adev); ++ amdgpu_i2c_init(adev); + } + } + +@@ -4724,8 +4723,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) + amdgpu_reset_fini(adev); + + /* free i2c buses */ +- if (!amdgpu_device_has_dc_support(adev)) +- amdgpu_i2c_fini(adev); ++ amdgpu_i2c_fini(adev); + + if (amdgpu_emu_mode != 1) + amdgpu_atombios_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 38686203bea6..811d020f3f4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1098,6 +1221,60 @@ index 38686203bea6..811d020f3f4b 100644 /** * DOC: vramlimit (int) * Restrict the total amount of VRAM in MiB for testing. The default is 0 (Use full VRAM). +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +index f0765ccde668..8179d0814db9 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +@@ -225,6 +225,25 @@ void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c) + kfree(i2c); + } + ++void amdgpu_i2c_init(struct amdgpu_device *adev) ++{ ++ if (!adev->is_atom_fw) { ++ if (!amdgpu_device_has_dc_support(adev)) { ++ amdgpu_atombios_i2c_init(adev); ++ } else { ++ switch (adev->asic_type) { ++ case CHIP_POLARIS10: ++ case CHIP_POLARIS11: ++ case CHIP_POLARIS12: ++ amdgpu_atombios_oem_i2c_init(adev, 0x97); ++ break; ++ default: ++ break; ++ } ++ } ++ } ++} ++ + /* remove all the buses */ + void amdgpu_i2c_fini(struct amdgpu_device *adev) + { +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h +index 21e3d1dad0a1..1d3d3806e0dd 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h +@@ -28,6 +28,7 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev, + const struct amdgpu_i2c_bus_rec *rec, + const char *name); + void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c); ++void amdgpu_i2c_init(struct amdgpu_device *adev); + void amdgpu_i2c_fini(struct amdgpu_device *adev); + struct amdgpu_i2c_chan * + amdgpu_i2c_lookup(struct amdgpu_device *adev, +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +index 5e3faefc5510..6da4f946cac0 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +@@ -609,6 +609,7 @@ struct amdgpu_i2c_adapter { + struct i2c_adapter base; + + struct ddc_service *ddc_service; ++ bool oem; + }; + + #define TO_DM_AUX(x) container_of((x), struct amdgpu_dm_dp_aux, aux) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 11e3f2f3b174..7b1bd69dc29e 100644 --- a/drivers/gpu/drm/amd/display/Kconfig @@ -1114,10 +1291,73 @@ index 11e3f2f3b174..7b1bd69dc29e 100644 + endmenu diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -index cd16dae534dc..1508978f92dd 100644 +index 5f216d626cbb..382af92c4ff1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -@@ -4516,7 +4516,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) +@@ -177,6 +177,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev); + static void amdgpu_dm_fini(struct amdgpu_device *adev); + static bool is_freesync_video_mode(const struct drm_display_mode *mode, struct amdgpu_dm_connector *aconnector); + static void reset_freesync_config_for_crtc(struct dm_crtc_state *new_crtc_state); ++static struct amdgpu_i2c_adapter * ++create_i2c(struct ddc_service *ddc_service, bool oem); + + static enum drm_mode_subconnector get_subconnector_type(struct dc_link *link) + { +@@ -2839,6 +2841,33 @@ static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev) + return 0; + } + ++static int dm_oem_i2c_hw_init(struct amdgpu_device *adev) ++{ ++ struct amdgpu_display_manager *dm = &adev->dm; ++ struct amdgpu_i2c_adapter *oem_i2c; ++ struct ddc_service *oem_ddc_service; ++ int r; ++ ++ oem_ddc_service = dc_get_oem_i2c_device(adev->dm.dc); ++ if (oem_ddc_service) { ++ oem_i2c = create_i2c(oem_ddc_service, true); ++ if (!oem_i2c) { ++ dev_info(adev->dev, "Failed to create oem i2c adapter data\n"); ++ return -ENOMEM; ++ } ++ ++ r = i2c_add_adapter(&oem_i2c->base); ++ if (r) { ++ dev_info(adev->dev, "Failed to register oem i2c\n"); ++ kfree(oem_i2c); ++ return r; ++ } ++ dm->oem_i2c = oem_i2c; ++ } ++ ++ return 0; ++} ++ + /** + * dm_hw_init() - Initialize DC device + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. +@@ -2870,6 +2899,10 @@ static int dm_hw_init(struct amdgpu_ip_block *ip_block) + return r; + amdgpu_dm_hpd_init(adev); + ++ r = dm_oem_i2c_hw_init(adev); ++ if (r) ++ dev_info(adev->dev, "Failed to add OEM i2c bus\n"); ++ + return 0; + } + +@@ -2885,6 +2918,8 @@ static int dm_hw_fini(struct amdgpu_ip_block *ip_block) + { + struct amdgpu_device *adev = ip_block->adev; + ++ kfree(adev->dm.oem_i2c); ++ + amdgpu_dm_hpd_fini(adev); + + amdgpu_dm_irq_fini(adev); +@@ -4516,7 +4551,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) return r; } @@ -1126,6 +1366,93 @@ index cd16dae534dc..1508978f92dd 100644 if (amdgpu_dm_create_color_properties(adev)) { dc_state_release(state->context); kfree(state); +@@ -8218,7 +8253,7 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap, + int i; + int result = -EIO; + +- if (!ddc_service->ddc_pin || !ddc_service->ddc_pin->hw_info.hw_supported) ++ if (!ddc_service->ddc_pin) + return result; + + cmd.payloads = kcalloc(num, sizeof(struct i2c_payload), GFP_KERNEL); +@@ -8237,11 +8272,18 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap, + cmd.payloads[i].data = msgs[i].buf; + } + +- if (dc_submit_i2c( +- ddc_service->ctx->dc, +- ddc_service->link->link_index, +- &cmd)) +- result = num; ++ if (i2c->oem) { ++ if (dc_submit_i2c_oem( ++ ddc_service->ctx->dc, ++ &cmd)) ++ result = num; ++ } else { ++ if (dc_submit_i2c( ++ ddc_service->ctx->dc, ++ ddc_service->link->link_index, ++ &cmd)) ++ result = num; ++ } + + kfree(cmd.payloads); + return result; +@@ -8258,9 +8300,7 @@ static const struct i2c_algorithm amdgpu_dm_i2c_algo = { + }; + + static struct amdgpu_i2c_adapter * +-create_i2c(struct ddc_service *ddc_service, +- int link_index, +- int *res) ++create_i2c(struct ddc_service *ddc_service, bool oem) + { + struct amdgpu_device *adev = ddc_service->ctx->driver_context; + struct amdgpu_i2c_adapter *i2c; +@@ -8271,9 +8311,14 @@ create_i2c(struct ddc_service *ddc_service, + i2c->base.owner = THIS_MODULE; + i2c->base.dev.parent = &adev->pdev->dev; + i2c->base.algo = &amdgpu_dm_i2c_algo; +- snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c hw bus %d", link_index); ++ if (oem) ++ snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c OEM bus"); ++ else ++ snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c hw bus %d", ++ ddc_service->link->link_index); + i2c_set_adapdata(&i2c->base, i2c); + i2c->ddc_service = ddc_service; ++ i2c->oem = oem; + + return i2c; + } +@@ -8298,7 +8343,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, + link->priv = aconnector; + + +- i2c = create_i2c(link->ddc, link->link_index, &res); ++ i2c = create_i2c(link->ddc, false); + if (!i2c) { + DRM_ERROR("Failed to create i2c adapter data\n"); + return -ENOMEM; +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +index 2227cd8e4a89..5710776bb0e2 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +@@ -606,6 +606,13 @@ struct amdgpu_display_manager { + * Bounding box data read from dmub during early initialization for DCN4+ + */ + struct dml2_soc_bb *bb_from_dmub; ++ ++ /** ++ * @oem_i2c: ++ * ++ * OEM i2c bus ++ */ ++ struct amdgpu_i2c_adapter *oem_i2c; + }; + + enum dsc_clock_force_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index ebabfe3a512f..4d3ebcaacca1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -1140,7 +1467,7 @@ index ebabfe3a512f..4d3ebcaacca1 100644 * * AMD driver supports pre-defined mathematical functions for transferring diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c -index 64a041c2af05..08790bcfe109 100644 +index 36a830a7440f..a8fc8bd52d51 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -470,7 +470,7 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) @@ -1201,6 +1528,124 @@ index 495e3cd70426..704a48209657 100644 dm_atomic_plane_attach_color_mgmt_properties(dm, plane); #endif /* Create (reset) the plane state */ +diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +index c9a6de110b74..470ec970217b 100644 +--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c ++++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +@@ -1778,6 +1778,7 @@ static enum bp_result get_firmware_info_v3_1( + struct dc_firmware_info *info) + { + struct atom_firmware_info_v3_1 *firmware_info; ++ struct atom_firmware_info_v3_2 *firmware_info32; + struct atom_display_controller_info_v4_1 *dce_info = NULL; + + if (!info) +@@ -1785,6 +1786,8 @@ static enum bp_result get_firmware_info_v3_1( + + firmware_info = GET_IMAGE(struct atom_firmware_info_v3_1, + DATA_TABLES(firmwareinfo)); ++ firmware_info32 = GET_IMAGE(struct atom_firmware_info_v3_2, ++ DATA_TABLES(firmwareinfo)); + + dce_info = GET_IMAGE(struct atom_display_controller_info_v4_1, + DATA_TABLES(dce_info)); +@@ -1817,7 +1820,15 @@ static enum bp_result get_firmware_info_v3_1( + bp->cmd_tbl.get_smu_clock_info(bp, SMU9_SYSPLL0_ID) * 10; + } + +- info->oem_i2c_present = false; ++ /* These fields are marked as reserved in v3_1, but they appear to be populated ++ * properly. ++ */ ++ if (firmware_info32->board_i2c_feature_id == 0x2) { ++ info->oem_i2c_present = true; ++ info->oem_i2c_obj_id = firmware_info32->board_i2c_feature_gpio_id; ++ } else { ++ info->oem_i2c_present = false; ++ } + + return BP_RESULT_OK; + } +diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c +index 457d60eeb486..13636eb4ec3f 100644 +--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c ++++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c +@@ -142,6 +142,12 @@ bool dc_link_update_dsc_config(struct pipe_ctx *pipe_ctx) + return link->dc->link_srv->update_dsc_config(pipe_ctx); + } + ++struct ddc_service * ++dc_get_oem_i2c_device(struct dc *dc) ++{ ++ return dc->res_pool->oem_device; ++} ++ + bool dc_is_oem_i2c_device_present( + struct dc *dc, + size_t slave_address) +diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h +index 08c5a315b3a6..70d6005ecd64 100644 +--- a/drivers/gpu/drm/amd/display/dc/dc.h ++++ b/drivers/gpu/drm/amd/display/dc/dc.h +@@ -1939,6 +1939,9 @@ int dc_link_aux_transfer_raw(struct ddc_service *ddc, + struct aux_payload *payload, + enum aux_return_code_type *operation_result); + ++struct ddc_service * ++dc_get_oem_i2c_device(struct dc *dc); ++ + bool dc_is_oem_i2c_device_present( + struct dc *dc, + size_t slave_address +diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c +index c63c59623433..eb1e158d3436 100644 +--- a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c ++++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c +@@ -67,6 +67,7 @@ + #include "reg_helper.h" + + #include "dce100/dce100_resource.h" ++#include "link.h" + + #ifndef mmDP0_DP_DPHY_INTERNAL_CTRL + #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x210f +@@ -659,6 +660,12 @@ static void dce120_resource_destruct(struct dce110_resource_pool *pool) + + if (pool->base.dmcu != NULL) + dce_dmcu_destroy(&pool->base.dmcu); ++ ++ if (pool->base.oem_device != NULL) { ++ struct dc *dc = pool->base.oem_device->ctx->dc; ++ ++ dc->link_srv->destroy_ddc_service(&pool->base.oem_device); ++ } + } + + static void read_dce_straps( +@@ -1054,6 +1061,7 @@ static bool dce120_resource_construct( + struct dc *dc, + struct dce110_resource_pool *pool) + { ++ struct ddc_service_init_data ddc_init_data = {0}; + unsigned int i; + int j; + struct dc_context *ctx = dc->ctx; +@@ -1257,6 +1265,15 @@ static bool dce120_resource_construct( + + bw_calcs_data_update_from_pplib(dc); + ++ if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { ++ ddc_init_data.ctx = dc->ctx; ++ ddc_init_data.link = NULL; ++ ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id; ++ ddc_init_data.id.enum_id = 0; ++ ddc_init_data.id.type = OBJECT_TYPE_GENERIC; ++ pool->base.oem_device = dc->link_srv->create_ddc_service(&ddc_init_data); ++ } ++ + return true; + + irqs_create_fail: diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index e8ae7681bf0a..8a0d873983f3 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -7128,7 +7573,7 @@ index 2ddb827e3bea..464049c4af3f 100644 return state; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 3e9ca38512de..463fe1dc6de8 100644 +index 26958431deb7..8c0f17a96d4f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -73,10 +73,19 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; @@ -7408,7 +7853,7 @@ index a2b16b08cbbf..48d611e58ad3 100644 static int __read_mostly sysctl_compact_memory; diff --git a/mm/huge_memory.c b/mm/huge_memory.c -index e53d83b3e5cf..b4c205f2042a 100644 +index db64116a4f84..3e0266c973e1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -64,7 +64,11 @@ unsigned long transparent_hugepage_flags __read_mostly = @@ -7436,7 +7881,7 @@ index 24b68b425afb..081ddb92db87 100644 /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c -index d213ead95675..0430a97b30fd 100644 +index d9861e42b2bd..13ab2294f0bb 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -71,7 +71,11 @@ static long ratelimit_pages = 32; @@ -7464,7 +7909,7 @@ index d213ead95675..0430a97b30fd 100644 EXPORT_SYMBOL_GPL(dirty_writeback_interval); diff --git a/mm/page_alloc.c b/mm/page_alloc.c -index cae7b93864c2..57038052c153 100644 +index 01eab25edf89..3ea393f1311a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -271,7 +271,11 @@ const char * const migratetype_names[MIGRATE_TYPES] = { @@ -7517,7 +7962,7 @@ index bd5183dfd879..3a410f53a07c 100644 /* diff --git a/mm/vmscan.c b/mm/vmscan.c -index 9a859b7d18d7..ec7f96bb0e9f 100644 +index b1ec5ece067e..e258174d240a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -148,6 +148,15 @@ struct scan_control { @@ -7712,7 +8157,7 @@ index 9a859b7d18d7..ec7f96bb0e9f 100644 else if (min_seq[LRU_GEN_ANON] < min_seq[LRU_GEN_FILE]) type = LRU_GEN_ANON; else if (swappiness == 1) -@@ -4826,6 +4965,8 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) +@@ -4829,6 +4968,8 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); @@ -7721,7 +8166,7 @@ index 9a859b7d18d7..ec7f96bb0e9f 100644 /* lru_gen_age_node() called mem_cgroup_calculate_protection() */ if (mem_cgroup_below_min(NULL, memcg)) return MEMCG_LRU_YOUNG; -@@ -5974,6 +6115,8 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) +@@ -5977,6 +6118,8 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) prepare_scan_control(pgdat, sc); diff --git a/6.13/0005-crypto.patch b/6.13/0005-crypto.patch index e6f240c0..e19af187 100644 --- a/6.13/0005-crypto.patch +++ b/6.13/0005-crypto.patch @@ -1,6 +1,6 @@ -From 8d1fa2a8636c551dd33500837e87e2c3f889d95c Mon Sep 17 00:00:00 2001 +From 7bc012030531a472b823293e167a86cd58da545c Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:38:01 +0100 +Date: Mon, 20 Jan 2025 13:22:05 +0100 Subject: [PATCH 05/12] crypto Signed-off-by: Peter Jung diff --git a/6.13/0006-fixes.patch b/6.13/0006-fixes.patch index 18a8692e..aaa180f2 100644 --- a/6.13/0006-fixes.patch +++ b/6.13/0006-fixes.patch @@ -1,22 +1,22 @@ -From e094aa9f2a3d8ac13a8bca382f0f5585f80926ee Mon Sep 17 00:00:00 2001 +From 2f514dfe8b006e7fa976b6265bef4b8efb81ec11 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:38:11 +0100 +Date: Mon, 20 Jan 2025 13:22:15 +0100 Subject: [PATCH 06/12] fixes Signed-off-by: Peter Jung --- arch/Kconfig | 4 +- - drivers/acpi/acpi_video.c | 50 +++++++++++-------- .../link/protocols/link_edp_panel_control.c | 3 +- - drivers/gpu/drm/drm_edid.c | 47 +++++++++++++++-- - drivers/gpu/drm/nouveau/nouveau_acpi.c | 2 +- + drivers/gpu/drm/drm_edid.c | 47 +++++++++++++++++-- + drivers/hid/hid-asus.c | 26 ++++++++++ drivers/hid/hid-ids.h | 1 + + include/linux/platform_data/x86/asus-wmi.h | 5 ++ kernel/fork.c | 9 ++-- - kernel/kprobes.c | 23 ++++----- + kernel/kprobes.c | 23 +++++---- kernel/sched/ext.c | 4 +- scripts/package/PKGBUILD | 5 ++ - sound/pci/hda/patch_realtek.c | 2 + - 11 files changed, 103 insertions(+), 47 deletions(-) + sound/pci/hda/patch_realtek.c | 4 +- + 11 files changed, 105 insertions(+), 26 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 6682b2a53e34..fe54298ae05c 100644 @@ -40,99 +40,6 @@ index 6682b2a53e34..fe54298ae05c 100644 depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS help This value can be used to select the number of bits to use to -diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c -index 8274a17872ed..3c627bdf2d1b 100644 ---- a/drivers/acpi/acpi_video.c -+++ b/drivers/acpi/acpi_video.c -@@ -610,16 +610,29 @@ acpi_video_device_lcd_get_level_current(struct acpi_video_device *device, - return 0; - } - -+/* -+ * Arg: -+ * device : video output device (LCD, CRT, ..) -+ * edid : address for returned EDID pointer -+ * length : _DDC length to request (must be a multiple of 128) -+ * -+ * Return Value: -+ * Length of EDID (positive value) or error (negative value) -+ * -+ * Get EDID from ACPI _DDC. On success, a pointer to the EDID data is written -+ * to the edid address, and the length of the EDID is returned. The caller is -+ * responsible for freeing the edid pointer. -+ */ -+ - static int --acpi_video_device_EDID(struct acpi_video_device *device, -- union acpi_object **edid, int length) -+acpi_video_device_EDID(struct acpi_video_device *device, void **edid, int length) - { -- int status; -+ acpi_status status; - struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; - union acpi_object *obj; - union acpi_object arg0 = { ACPI_TYPE_INTEGER }; - struct acpi_object_list args = { 1, &arg0 }; -- -+ int ret; - - *edid = NULL; - -@@ -636,16 +649,17 @@ acpi_video_device_EDID(struct acpi_video_device *device, - - obj = buffer.pointer; - -- if (obj && obj->type == ACPI_TYPE_BUFFER) -- *edid = obj; -- else { -+ if (obj && obj->type == ACPI_TYPE_BUFFER) { -+ *edid = kmemdup(obj->buffer.pointer, obj->buffer.length, GFP_KERNEL); -+ ret = *edid ? obj->buffer.length : -ENOMEM; -+ } else { - acpi_handle_debug(device->dev->handle, - "Invalid _DDC data for length %d\n", length); -- status = -EFAULT; -- kfree(obj); -+ ret = -EFAULT; - } - -- return status; -+ kfree(obj); -+ return ret; - } - - /* bus */ -@@ -1435,9 +1449,7 @@ int acpi_video_get_edid(struct acpi_device *device, int type, int device_id, - { - struct acpi_video_bus *video; - struct acpi_video_device *video_device; -- union acpi_object *buffer = NULL; -- acpi_status status; -- int i, length; -+ int i, length, ret; - - if (!device || !acpi_driver_data(device)) - return -EINVAL; -@@ -1477,16 +1489,10 @@ int acpi_video_get_edid(struct acpi_device *device, int type, int device_id, - } - - for (length = 512; length > 0; length -= 128) { -- status = acpi_video_device_EDID(video_device, &buffer, -- length); -- if (ACPI_SUCCESS(status)) -- break; -+ ret = acpi_video_device_EDID(video_device, edid, length); -+ if (ret > 0) -+ return ret; - } -- if (!length) -- continue; -- -- *edid = buffer->buffer.pointer; -- return length; - } - - return -ENODEV; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index e0e3bb865359..ba98d56a0fe4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -240,19 +147,50 @@ index 855beafb76ff..ad78059ee954 100644 if (!newmode) continue; -diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c -index 8f0c69aad248..21b56cc7605c 100644 ---- a/drivers/gpu/drm/nouveau/nouveau_acpi.c -+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c -@@ -384,7 +384,7 @@ nouveau_acpi_edid(struct drm_device *dev, struct drm_connector *connector) - if (ret < 0) - return NULL; - -- return kmemdup(edid, EDID_LENGTH, GFP_KERNEL); -+ return edid; +diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c +index 506c6f377e7d..46e3e42f9eb5 100644 +--- a/drivers/hid/hid-asus.c ++++ b/drivers/hid/hid-asus.c +@@ -432,6 +432,26 @@ static int asus_kbd_get_functions(struct hid_device *hdev, + return ret; } - bool nouveau_acpi_video_backlight_use_native(void) ++static int asus_kbd_disable_oobe(struct hid_device *hdev) ++{ ++ const u8 init[][6] = { ++ { FEATURE_KBD_REPORT_ID, 0x05, 0x20, 0x31, 0x00, 0x08 }, ++ { FEATURE_KBD_REPORT_ID, 0xBA, 0xC5, 0xC4 }, ++ { FEATURE_KBD_REPORT_ID, 0xD0, 0x8F, 0x01 }, ++ { FEATURE_KBD_REPORT_ID, 0xD0, 0x85, 0xFF } ++ }; ++ int ret; ++ ++ for (size_t i = 0; i < ARRAY_SIZE(init); i++) { ++ ret = asus_kbd_set_report(hdev, init[i], sizeof(init[i])); ++ if (ret < 0) ++ return ret; ++ } ++ ++ hid_info(hdev, "Disabled OOBE for keyboard\n"); ++ return 0; ++} ++ + static void asus_schedule_work(struct asus_kbd_leds *led) + { + unsigned long flags; +@@ -534,6 +554,12 @@ static int asus_kbd_register_leds(struct hid_device *hdev) + ret = asus_kbd_init(hdev, FEATURE_KBD_LED_REPORT_ID2); + if (ret < 0) + return ret; ++ ++ if (dmi_match(DMI_PRODUCT_FAMILY, "ProArt P16")) { ++ ret = asus_kbd_disable_oobe(hdev); ++ if (ret < 0) ++ return ret; ++ } + } else { + /* Initialize keyboard */ + ret = asus_kbd_init(hdev, FEATURE_KBD_REPORT_ID); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 1f47fda809b9..6c2df0d37b3b 100644 --- a/drivers/hid/hid-ids.h @@ -265,6 +203,22 @@ index 1f47fda809b9..6c2df0d37b3b 100644 #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY 0x1abe #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X 0x1b4c #define USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD 0x196b +diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h +index 365e119bebaa..783e2a336861 100644 +--- a/include/linux/platform_data/x86/asus-wmi.h ++++ b/include/linux/platform_data/x86/asus-wmi.h +@@ -184,6 +184,11 @@ static const struct dmi_system_id asus_use_hid_led_dmi_ids[] = { + DMI_MATCH(DMI_PRODUCT_FAMILY, "ROG Flow"), + }, + }, ++ { ++ .matches = { ++ DMI_MATCH(DMI_PRODUCT_FAMILY, "ProArt P16"), ++ }, ++ }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GA403U"), diff --git a/kernel/fork.c b/kernel/fork.c index 0cb5431b4d7e..e919c8c3a121 100644 --- a/kernel/fork.c @@ -388,13 +342,15 @@ index dca706617adc..89d3aef160b7 100644 mkdir -p "${builddir}" cp System.map "${builddir}/System.map" diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c -index b74b566f675e..070dd1ab89c6 100644 +index ad66378d7321..4210bc8f12e1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c -@@ -10641,6 +10641,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { +@@ -10641,8 +10641,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1e1f, "ASUS Vivobook 15 X1504VAP", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS), +- SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), +- SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), + SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC), + SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), diff --git a/6.13/0007-itmt-core-ranking.patch b/6.13/0007-itmt-core-ranking.patch index 13a8b040..f9edbbec 100644 --- a/6.13/0007-itmt-core-ranking.patch +++ b/6.13/0007-itmt-core-ranking.patch @@ -1,6 +1,6 @@ -From 25702dae4d4390c6e804bfe18eef1341a854b9f2 Mon Sep 17 00:00:00 2001 +From edca92ed206343ae09ee1af6ae0dfc26a68085b1 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:38:22 +0100 +Date: Mon, 20 Jan 2025 13:22:28 +0100 Subject: [PATCH 07/12] itmt-core-ranking Signed-off-by: Peter Jung @@ -235,10 +235,10 @@ index b5a8f0891135..ef63b1c0b491 100644 } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 463fe1dc6de8..f849298a4cc1 100644 +index 8c0f17a96d4f..c532ffb153b4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c -@@ -9941,6 +9941,8 @@ struct sg_lb_stats { +@@ -9836,6 +9836,8 @@ struct sg_lb_stats { unsigned int group_weight; enum group_type group_type; unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */ @@ -247,7 +247,7 @@ index 463fe1dc6de8..f849298a4cc1 100644 unsigned int group_smt_balance; /* Task on busy SMT be moved */ unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */ #ifdef CONFIG_NUMA_BALANCING -@@ -10270,7 +10272,7 @@ sched_group_asym(struct lb_env *env, struct sg_lb_stats *sgs, struct sched_group +@@ -10165,7 +10167,7 @@ sched_group_asym(struct lb_env *env, struct sg_lb_stats *sgs, struct sched_group (sgs->group_weight - sgs->idle_cpus != 1)) return false; @@ -256,7 +256,7 @@ index 463fe1dc6de8..f849298a4cc1 100644 } /* One group has more than one SMT CPU while the other group does not */ -@@ -10351,6 +10353,17 @@ sched_reduced_capacity(struct rq *rq, struct sched_domain *sd) +@@ -10246,6 +10248,17 @@ sched_reduced_capacity(struct rq *rq, struct sched_domain *sd) return check_cpu_capacity(rq, sd); } @@ -274,7 +274,7 @@ index 463fe1dc6de8..f849298a4cc1 100644 /** * update_sg_lb_stats - Update sched_group's statistics for load balancing. * @env: The load balancing environment. -@@ -10367,11 +10380,13 @@ static inline void update_sg_lb_stats(struct lb_env *env, +@@ -10262,11 +10275,13 @@ static inline void update_sg_lb_stats(struct lb_env *env, bool *sg_overloaded, bool *sg_overutilized) { @@ -289,7 +289,7 @@ index 463fe1dc6de8..f849298a4cc1 100644 for_each_cpu_and(i, sched_group_span(group), env->cpus) { struct rq *rq = cpu_rq(i); -@@ -10385,16 +10400,12 @@ static inline void update_sg_lb_stats(struct lb_env *env, +@@ -10280,16 +10295,12 @@ static inline void update_sg_lb_stats(struct lb_env *env, nr_running = rq->nr_running; sgs->sum_nr_running += nr_running; @@ -308,7 +308,7 @@ index 463fe1dc6de8..f849298a4cc1 100644 /* * No need to call idle_cpu() if nr_running is not 0 */ -@@ -10404,10 +10415,21 @@ static inline void update_sg_lb_stats(struct lb_env *env, +@@ -10299,10 +10310,21 @@ static inline void update_sg_lb_stats(struct lb_env *env, continue; } @@ -331,7 +331,7 @@ index 463fe1dc6de8..f849298a4cc1 100644 /* Check for a misfit task on the cpu */ if (sgs->group_misfit_task_load < rq->misfit_task_load) { sgs->group_misfit_task_load = rq->misfit_task_load; -@@ -10502,7 +10524,7 @@ static bool update_sd_pick_busiest(struct lb_env *env, +@@ -10397,7 +10419,7 @@ static bool update_sd_pick_busiest(struct lb_env *env, case group_asym_packing: /* Prefer to move from lowest priority CPU's work */ diff --git a/6.13/0008-ntsync.patch b/6.13/0008-ntsync.patch index 82b27d47..76a91251 100644 --- a/6.13/0008-ntsync.patch +++ b/6.13/0008-ntsync.patch @@ -1,6 +1,6 @@ -From 1871388db87b6e7114a28eec15fc03e4c0497e52 Mon Sep 17 00:00:00 2001 +From dad63380fd4bccaf1df47a5d2a14b3622a828bbf Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:38:39 +0100 +Date: Mon, 20 Jan 2025 13:22:39 +0100 Subject: [PATCH 08/12] ntsync Signed-off-by: Peter Jung @@ -427,10 +427,10 @@ index 000000000000..25e7c4aef968 + ``objs`` and in ``alert``. If this is attempted, the function fails + with ``EINVAL``. diff --git a/MAINTAINERS b/MAINTAINERS -index a87ddad78e26..69c7e0c9cbfd 100644 +index 0fa7c5728f1e..efecb59adfe6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS -@@ -16708,6 +16708,15 @@ T: git https://github.com/Paragon-Software-Group/linux-ntfs3.git +@@ -16709,6 +16709,15 @@ T: git https://github.com/Paragon-Software-Group/linux-ntfs3.git F: Documentation/filesystems/ntfs3.rst F: fs/ntfs3/ diff --git a/6.13/0009-perf-per-core.patch b/6.13/0009-perf-per-core.patch index 91886a65..ceb745db 100644 --- a/6.13/0009-perf-per-core.patch +++ b/6.13/0009-perf-per-core.patch @@ -1,6 +1,6 @@ -From ecafa3b39e7691288beb920eb362064d548d45e7 Mon Sep 17 00:00:00 2001 +From d0d15e3d79a2d5bb2c94b8ff3d2ab51f0b0100fe Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:38:51 +0100 +Date: Mon, 20 Jan 2025 13:22:50 +0100 Subject: [PATCH 09/12] perf-per-core Signed-off-by: Peter Jung diff --git a/6.13/0010-pksm.patch b/6.13/0010-pksm.patch index 369779b6..b877ca5c 100644 --- a/6.13/0010-pksm.patch +++ b/6.13/0010-pksm.patch @@ -1,6 +1,6 @@ -From b0522d38174d109d02042dc5591c1ab52de16a94 Mon Sep 17 00:00:00 2001 +From 6a7ea67c66634276802b4b9b0964a0b00db97d9c Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:39:03 +0100 +Date: Mon, 20 Jan 2025 13:23:02 +0100 Subject: [PATCH 10/12] pksm Signed-off-by: Peter Jung diff --git a/6.13/0011-t2.patch b/6.13/0011-t2.patch index 988a3380..3278629e 100644 --- a/6.13/0011-t2.patch +++ b/6.13/0011-t2.patch @@ -1,6 +1,6 @@ -From 5662d52675419bbe7b47731ad55c01ecf94b8426 Mon Sep 17 00:00:00 2001 +From 5e459e48f274c34d701726a61a96140381b1de2b Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:39:12 +0100 +Date: Mon, 20 Jan 2025 13:23:11 +0100 Subject: [PATCH 11/12] t2 Signed-off-by: Peter Jung @@ -156,10 +156,10 @@ index ecccc0473da9..6de6b0e6abf3 100644 ---- diff --git a/MAINTAINERS b/MAINTAINERS -index 69c7e0c9cbfd..01be85b7d886 100644 +index efecb59adfe6..16af42c68cca 100644 --- a/MAINTAINERS +++ b/MAINTAINERS -@@ -7065,6 +7065,12 @@ S: Supported +@@ -7066,6 +7066,12 @@ S: Supported T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: drivers/gpu/drm/sun4i/sun8i* diff --git a/6.13/0012-zstd.patch b/6.13/0012-zstd.patch index bee631a9..f534c712 100644 --- a/6.13/0012-zstd.patch +++ b/6.13/0012-zstd.patch @@ -1,6 +1,6 @@ -From 91beebc1e962374c32c95b975d59ff5aa90b66c1 Mon Sep 17 00:00:00 2001 +From 6f96c228cd968c7f47eb90d9e7ad6d679bf5a7f0 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:39:21 +0100 +Date: Mon, 20 Jan 2025 13:23:20 +0100 Subject: [PATCH 12/12] zstd Signed-off-by: Peter Jung diff --git a/6.13/all/0001-cachyos-base-all.patch b/6.13/all/0001-cachyos-base-all.patch index b08fa650..83b4d000 100644 --- a/6.13/all/0001-cachyos-base-all.patch +++ b/6.13/all/0001-cachyos-base-all.patch @@ -1,6 +1,6 @@ -From 2af576964728ca6af63da3c61dae669b5ae945c7 Mon Sep 17 00:00:00 2001 +From 1ec94c7b86986796d5d14135302e81dd3ddbe223 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:37:09 +0100 +Date: Mon, 20 Jan 2025 13:21:23 +0100 Subject: [PATCH 01/12] amd-pstate Signed-off-by: Peter Jung @@ -1003,42 +1003,44 @@ index cd573bc6b6db..9747e3be6cee 100644 -- 2.48.0.rc1 -From 1d6b426b59b09163dbcaac857551295ad4b343d5 Mon Sep 17 00:00:00 2001 +From b74b9b0459100443f73ce718d0191bf58d6cb4b4 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:51:04 +0100 +Date: Mon, 20 Jan 2025 13:21:35 +0100 Subject: [PATCH 02/12] amd-tlb-broadcast Signed-off-by: Peter Jung --- - arch/x86/Kconfig | 2 +- - arch/x86/hyperv/mmu.c | 1 - - arch/x86/include/asm/cpufeatures.h | 1 + - arch/x86/include/asm/invlpgb.h | 93 ++++++ - arch/x86/include/asm/mmu.h | 6 + - arch/x86/include/asm/mmu_context.h | 12 + - arch/x86/include/asm/paravirt.h | 5 - - arch/x86/include/asm/paravirt_types.h | 2 - - arch/x86/include/asm/tlbbatch.h | 1 + - arch/x86/include/asm/tlbflush.h | 31 +- - arch/x86/kernel/cpu/amd.c | 16 ++ - arch/x86/kernel/kvm.c | 1 - - arch/x86/kernel/paravirt.c | 6 - - arch/x86/kernel/setup.c | 4 + - arch/x86/mm/pgtable.c | 16 +- - arch/x86/mm/tlb.c | 393 +++++++++++++++++++++++++- - arch/x86/xen/mmu_pv.c | 1 - - mm/memory.c | 1 - - mm/mmap.c | 2 - - mm/swap_state.c | 1 - - mm/vma.c | 2 - - 21 files changed, 541 insertions(+), 56 deletions(-) + arch/x86/Kconfig | 2 +- + arch/x86/Kconfig.cpu | 5 + + arch/x86/hyperv/mmu.c | 1 - + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/invlpgb.h | 103 +++++ + arch/x86/include/asm/mmu.h | 6 + + arch/x86/include/asm/mmu_context.h | 14 + + arch/x86/include/asm/msr-index.h | 2 + + arch/x86/include/asm/paravirt.h | 5 - + arch/x86/include/asm/paravirt_types.h | 2 - + arch/x86/include/asm/tlbbatch.h | 1 + + arch/x86/include/asm/tlbflush.h | 92 ++++- + arch/x86/kernel/cpu/amd.c | 12 + + arch/x86/kernel/kvm.c | 1 - + arch/x86/kernel/paravirt.c | 6 - + arch/x86/mm/pgtable.c | 16 +- + arch/x86/mm/tlb.c | 496 +++++++++++++++++++++++-- + arch/x86/xen/mmu_pv.c | 1 - + mm/memory.c | 1 - + mm/mmap.c | 2 - + mm/swap_state.c | 1 - + mm/vma.c | 2 - + tools/arch/x86/include/asm/msr-index.h | 2 + + 23 files changed, 695 insertions(+), 79 deletions(-) create mode 100644 arch/x86/include/asm/invlpgb.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index 9d7bd0ae48c4..e8743f8c9fd0 100644 +index ef6cfea9df73..1f824dcab4dc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig -@@ -274,7 +274,7 @@ config X86 +@@ -273,7 +273,7 @@ config X86 select HAVE_PCI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -1047,6 +1049,29 @@ index 9d7bd0ae48c4..e8743f8c9fd0 100644 select MMU_GATHER_MERGE_VMAS select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_REGS_AND_STACK_ACCESS_API +diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu +index 2a7279d80460..bacdc502903f 100644 +--- a/arch/x86/Kconfig.cpu ++++ b/arch/x86/Kconfig.cpu +@@ -395,6 +395,10 @@ config X86_VMX_FEATURE_NAMES + def_bool y + depends on IA32_FEAT_CTL + ++config X86_BROADCAST_TLB_FLUSH ++ def_bool y ++ depends on CPU_SUP_AMD ++ + menuconfig PROCESSOR_SELECT + bool "Supported processor vendors" if EXPERT + help +@@ -431,6 +435,7 @@ config CPU_SUP_CYRIX_32 + config CPU_SUP_AMD + default y + bool "Support AMD processors" if PROCESSOR_SELECT ++ select X86_BROADCAST_TLB_FLUSH + help + This enables detection, tunings and quirks for AMD processors + diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 1cc113200ff5..cbe6c71e17c1 100644 --- a/arch/x86/hyperv/mmu.c @@ -1058,27 +1083,28 @@ index 1cc113200ff5..cbe6c71e17c1 100644 - pv_ops.mmu.tlb_remove_table = tlb_remove_table; } diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 645aa360628d..742c138d011a 100644 +index 645aa360628d..989e4c9cad2e 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -338,6 +338,7 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ -+#define X86_FEATURE_INVLPGB (13*32+ 3) /* "invlpgb" INVLPGB instruction */ ++#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instruction supported. */ #define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ #define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ #define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ diff --git a/arch/x86/include/asm/invlpgb.h b/arch/x86/include/asm/invlpgb.h new file mode 100644 -index 000000000000..2669ebfffe81 +index 000000000000..418402535319 --- /dev/null +++ b/arch/x86/include/asm/invlpgb.h -@@ -0,0 +1,93 @@ +@@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_INVLPGB +#define _ASM_X86_INVLPGB + ++#include +#include + +/* @@ -1090,21 +1116,31 @@ index 000000000000..2669ebfffe81 + * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from + * this CPU have completed. + */ -+static inline void __invlpgb(unsigned long asid, unsigned long pcid, unsigned long addr, -+ int extra_count, bool pmd_stride, unsigned long flags) ++static inline void __invlpgb(unsigned long asid, unsigned long pcid, ++ unsigned long addr, u16 extra_count, ++ bool pmd_stride, unsigned long flags) +{ -+ u64 rax = addr | flags; -+ u32 ecx = (pmd_stride << 31) | extra_count; + u32 edx = (pcid << 16) | asid; ++ u32 ecx = (pmd_stride << 31) | extra_count; ++ u64 rax = addr | flags; ++ ++ /* INVLPGB; supported in binutils >= 2.36. */ ++ asm volatile(".byte 0x0f, 0x01, 0xfe" : : "a" (rax), "c" (ecx), "d" (edx)); ++} + -+ asm volatile("invlpgb" : : "a" (rax), "c" (ecx), "d" (edx)); ++/* Wait for INVLPGB originated by this CPU to complete. */ ++static inline void tlbsync(void) ++{ ++ cant_migrate(); ++ /* TLBSYNC: supported in binutils >= 0.36. */ ++ asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory"); +} + +/* + * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination + * of the three. For example: + * - INVLPGB_VA | INVLPGB_INCLUDE_GLOBAL: invalidate all TLB entries at the address -+ * - INVLPGB_PCID: invalidate all TLB entries matching the PCID ++ * - INVLPGB_PCID: invalidate all TLB entries matching the PCID + * + * The first can be used to invalidate (kernel) mappings at a particular + * address across all processes. @@ -1123,22 +1159,25 @@ index 000000000000..2669ebfffe81 + unsigned long addr) +{ + __invlpgb(0, pcid, addr, 0, 0, INVLPGB_PCID | INVLPGB_VA); ++ tlbsync(); +} + -+static inline void invlpgb_flush_user_nr(unsigned long pcid, unsigned long addr, -+ int nr, bool pmd_stride) ++static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid, ++ unsigned long addr, ++ u16 nr, ++ bool pmd_stride, ++ bool freed_tables) +{ -+ __invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA | INVLPGB_FINAL_ONLY); -+} ++ unsigned long flags = INVLPGB_PCID | INVLPGB_VA; + -+/* Flush all mappings for a given ASID, not including globals. */ -+static inline void invlpgb_flush_single_asid(unsigned long asid) -+{ -+ __invlpgb(asid, 0, 0, 0, 0, INVLPGB_ASID); ++ if (!freed_tables) ++ flags |= INVLPGB_FINAL_ONLY; ++ ++ __invlpgb(0, pcid, addr, nr - 1, pmd_stride, flags); +} + +/* Flush all mappings for a given PCID, not including globals. */ -+static inline void invlpgb_flush_single_pcid(unsigned long pcid) ++static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid) +{ + __invlpgb(0, pcid, 0, 0, 0, INVLPGB_PCID); +} @@ -1147,10 +1186,11 @@ index 000000000000..2669ebfffe81 +static inline void invlpgb_flush_all(void) +{ + __invlpgb(0, 0, 0, 0, 0, INVLPGB_INCLUDE_GLOBAL); ++ tlbsync(); +} + +/* Flush addr, including globals, for all PCIDs. */ -+static inline void invlpgb_flush_addr(unsigned long addr, int nr) ++static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr) +{ + __invlpgb(0, 0, addr, nr - 1, 0, INVLPGB_INCLUDE_GLOBAL); +} @@ -1159,69 +1199,86 @@ index 000000000000..2669ebfffe81 +static inline void invlpgb_flush_all_nonglobals(void) +{ + __invlpgb(0, 0, 0, 0, 0, 0); -+} -+ -+/* Wait for INVLPGB originated by this CPU to complete. */ -+static inline void tlbsync(void) -+{ -+ asm volatile("tlbsync"); ++ tlbsync(); +} + +#endif /* _ASM_X86_INVLPGB */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h -index ce4677b8b735..83d0986295d3 100644 +index ce4677b8b735..51f25d38de86 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h -@@ -46,6 +46,12 @@ typedef struct { - unsigned long flags; +@@ -67,6 +67,12 @@ typedef struct { + u16 pkey_allocation_map; + s16 execute_only_pkey; #endif - -+#ifdef CONFIG_CPU_SUP_AMD -+ struct list_head broadcast_asid_list; -+ u16 broadcast_asid; ++ ++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH ++ u16 global_asid; + bool asid_transition; +#endif + - #ifdef CONFIG_ADDRESS_MASKING - /* Active LAM mode: X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */ - unsigned long lam_cr3_mask; + } mm_context_t; + + #define INIT_MM_CONTEXT(mm) \ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h -index 2886cb668d7f..2c347b51d9b9 100644 +index 2886cb668d7f..65f50464b5c3 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -139,6 +139,8 @@ static inline void mm_reset_untag_mask(struct mm_struct *mm) #define enter_lazy_tlb enter_lazy_tlb extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); -+extern void destroy_context_free_broadcast_asid(struct mm_struct *mm); ++extern void destroy_context_free_global_asid(struct mm_struct *mm); + /* * Init a new mm. Used on mm copies, like at fork() * and on mm's that are brand-new, like at execve(). -@@ -160,6 +162,13 @@ static inline int init_new_context(struct task_struct *tsk, +@@ -160,6 +162,14 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.execute_only_pkey = -1; } #endif + -+#ifdef CONFIG_CPU_SUP_AMD -+ INIT_LIST_HEAD(&mm->context.broadcast_asid_list); -+ mm->context.broadcast_asid = 0; -+ mm->context.asid_transition = false; ++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH ++ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { ++ mm->context.global_asid = 0; ++ mm->context.asid_transition = false; ++ } +#endif + mm_reset_untag_mask(mm); init_new_context_ldt(mm); return 0; -@@ -169,6 +178,9 @@ static inline int init_new_context(struct task_struct *tsk, +@@ -169,6 +179,10 @@ static inline int init_new_context(struct task_struct *tsk, static inline void destroy_context(struct mm_struct *mm) { destroy_context_ldt(mm); -+#ifdef CONFIG_CPU_SUP_AMD -+ destroy_context_free_broadcast_asid(mm); ++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH ++ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) ++ destroy_context_free_global_asid(mm); +#endif } extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index 3ae84c3b8e6d..dc1c1057f26e 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -25,6 +25,7 @@ + #define _EFER_SVME 12 /* Enable virtualization */ + #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ + #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ ++#define _EFER_TCE 15 /* Enable Translation Cache Extensions */ + #define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */ + + #define EFER_SCE (1<<_EFER_SCE) +@@ -34,6 +35,7 @@ + #define EFER_SVME (1<<_EFER_SVME) + #define EFER_LMSLE (1<<_EFER_LMSLE) + #define EFER_FFXSR (1<<_EFER_FFXSR) ++#define EFER_TCE (1<<_EFER_TCE) + #define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) + + /* diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index d4eb9e1d61b8..794ba3647c6c 100644 --- a/arch/x86/include/asm/paravirt.h @@ -1264,7 +1321,7 @@ index 1ad56eb3e8a8..f9a17edf63ad 100644 #endif /* _ARCH_X86_TLBBATCH_H */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index 69e79fff41b8..a2f9b7370717 100644 +index 69e79fff41b8..5490ca71e27f 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -10,6 +10,7 @@ @@ -1275,39 +1332,100 @@ index 69e79fff41b8..a2f9b7370717 100644 #include #include #include -@@ -64,6 +65,23 @@ static inline void cr4_clear_bits(unsigned long mask) - */ - #define TLB_NR_DYN_ASIDS 6 +@@ -183,6 +184,13 @@ static inline void cr4_init_shadow(void) + extern unsigned long mmu_cr4_features; + extern u32 *trampoline_cr4_features; + ++/* How many pages can we invalidate with one INVLPGB. */ ++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH ++extern u16 invlpgb_count_max; ++#else ++#define invlpgb_count_max 1 ++#endif ++ + extern void initialize_tlbstate_and_flush(void); -+#ifdef CONFIG_CPU_SUP_AMD -+#define is_dyn_asid(asid) (asid) < TLB_NR_DYN_ASIDS -+#define is_broadcast_asid(asid) (asid) >= TLB_NR_DYN_ASIDS -+#define in_asid_transition(info) (info->mm && info->mm->context.asid_transition) -+#define mm_broadcast_asid(mm) (mm->context.broadcast_asid) + /* +@@ -230,6 +238,78 @@ void flush_tlb_one_kernel(unsigned long addr); + void flush_tlb_multi(const struct cpumask *cpumask, + const struct flush_tlb_info *info); + ++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH ++static inline bool is_dyn_asid(u16 asid) ++{ ++ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) ++ return true; ++ ++ return asid < TLB_NR_DYN_ASIDS; ++} ++ ++static inline bool is_global_asid(u16 asid) ++{ ++ return !is_dyn_asid(asid); ++} ++ ++static inline bool in_asid_transition(const struct flush_tlb_info *info) ++{ ++ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) ++ return false; ++ ++ return info->mm && READ_ONCE(info->mm->context.asid_transition); ++} ++ ++static inline u16 mm_global_asid(struct mm_struct *mm) ++{ ++ u16 asid; ++ ++ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) ++ return 0; ++ ++ asid = READ_ONCE(mm->context.global_asid); ++ ++ /* mm->context.global_asid is either 0, or a global ASID */ ++ VM_WARN_ON_ONCE(is_dyn_asid(asid)); ++ ++ return asid; ++} +#else -+#define is_dyn_asid(asid) true -+#define is_broadcast_asid(asid) false -+#define in_asid_transition(info) false -+#define mm_broadcast_asid(mm) 0 ++static inline bool is_dyn_asid(u16 asid) ++{ ++ return true; ++} ++ ++static inline bool is_global_asid(u16 asid) ++{ ++ return false; ++} + -+inline bool needs_broadcast_asid_reload(struct mm_struct *next, u16 prev_asid) ++static inline bool in_asid_transition(const struct flush_tlb_info *info) +{ + return false; +} ++ ++static inline u16 mm_global_asid(struct mm_struct *mm) ++{ ++ return 0; ++} ++ ++static inline bool needs_global_asid_reload(struct mm_struct *next, u16 prev_asid) ++{ ++ return false; ++} ++ ++static inline void broadcast_tlb_flush(struct flush_tlb_info *info) ++{ ++ VM_WARN_ON_ONCE(1); ++} ++ ++static inline void consider_global_asid(struct mm_struct *mm) ++{ ++} +#endif + - struct tlb_context { - u64 ctx_id; - u64 tlb_gen; -@@ -182,6 +200,7 @@ static inline void cr4_init_shadow(void) - - extern unsigned long mmu_cr4_features; - extern u32 *trampoline_cr4_features; -+extern u16 invlpgb_count_max; - - extern void initialize_tlbstate_and_flush(void); - -@@ -277,21 +296,15 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) + #ifdef CONFIG_PARAVIRT + #include + #endif +@@ -277,21 +357,15 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) return atomic64_inc_return(&mm->context.tlb_gen); } @@ -1333,28 +1451,38 @@ index 69e79fff41b8..a2f9b7370717 100644 static inline bool pte_flags_need_flush(unsigned long oldflags, unsigned long newflags, diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c -index 79d2e17f6582..4dc42705aaca 100644 +index 79d2e17f6582..21076252a491 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c -@@ -1135,6 +1135,22 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) - tlb_lli_2m[ENTRIES] = eax & mask; +@@ -29,6 +29,8 @@ - tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; + #include "cpu.h" + ++u16 invlpgb_count_max __ro_after_init; + -+ if (c->extended_cpuid_level < 0x80000008) -+ return; + static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) + { + u32 gprs[8] = { 0 }; +@@ -1069,6 +1071,10 @@ static void init_amd(struct cpuinfo_x86 *c) + + /* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */ + clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE); + -+ cpuid(0x80000008, &eax, &ebx, &ecx, &edx); ++ /* Enable Translation Cache Extension */ ++ if (cpu_feature_enabled(X86_FEATURE_TCE)) ++ msr_set_bit(MSR_EFER, _EFER_TCE); + } + + #ifdef CONFIG_X86_32 +@@ -1135,6 +1141,12 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) + tlb_lli_2m[ENTRIES] = eax & mask; + + tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; + + /* Max number of pages INVLPGB can invalidate in one shot */ -+ invlpgb_count_max = (edx & 0xffff) + 1; -+ -+ /* If supported, enable translation cache extensions (TCE) */ -+ cpuid(0x80000001, &eax, &ebx, &ecx, &edx); -+ if (ecx & BIT(17)) { -+ u64 msr = native_read_msr(MSR_EFER);; -+ msr |= BIT(15); -+ wrmsrl(MSR_EFER, msr); ++ if (boot_cpu_has(X86_FEATURE_INVLPGB)) { ++ cpuid(0x80000008, &eax, &ebx, &ecx, &edx); ++ invlpgb_count_max = (edx & 0xffff) + 1; + } } @@ -1395,21 +1523,6 @@ index fec381533555..c019771e0123 100644 .mmu.exit_mmap = paravirt_nop, .mmu.notify_page_enc_status_changed = paravirt_nop, -diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c -index f1fea506e20f..6c4d08f8f7b1 100644 ---- a/arch/x86/kernel/setup.c -+++ b/arch/x86/kernel/setup.c -@@ -138,6 +138,10 @@ __visible unsigned long mmu_cr4_features __ro_after_init; - __visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE; - #endif - -+#ifdef CONFIG_CPU_SUP_AMD -+u16 invlpgb_count_max __ro_after_init; -+#endif -+ - #ifdef CONFIG_IMA - static phys_addr_t ima_kexec_buffer_phys; - static size_t ima_kexec_buffer_size; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 5745a354a241..3dc4af1f7868 100644 --- a/arch/x86/mm/pgtable.c @@ -1465,7 +1578,7 @@ index 5745a354a241..3dc4af1f7868 100644 #endif /* CONFIG_PGTABLE_LEVELS > 4 */ #endif /* CONFIG_PGTABLE_LEVELS > 3 */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index a2becb85bea7..0080175153ef 100644 +index a2becb85bea7..6449ac701c88 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -74,13 +74,15 @@ @@ -1487,120 +1600,136 @@ index a2becb85bea7..0080175153ef 100644 * for KPTI each mm has two address spaces and thus needs two * PCID values, but we can still do with a single ASID denomination * for each mm. Corresponds to kPCID + 2048. -@@ -225,6 +227,18 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, +@@ -225,6 +227,20 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, return; } + /* -+ * TLB consistency for this ASID is maintained with INVLPGB; -+ * TLB flushes happen even while the process isn't running. ++ * TLB consistency for global ASIDs is maintained with broadcast TLB ++ * flushing. The TLB is never outdated, and does not need flushing. + */ -+#ifdef CONFIG_CPU_SUP_AMD -+ if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_broadcast_asid(next)) { -+ *new_asid = mm_broadcast_asid(next); -+ *need_flush = false; -+ return; ++ if (IS_ENABLED(CONFIG_X86_BROADCAST_TLB_FLUSH) && static_cpu_has(X86_FEATURE_INVLPGB)) { ++ u16 global_asid = mm_global_asid(next); ++ ++ if (global_asid) { ++ *new_asid = global_asid; ++ *need_flush = false; ++ return; ++ } + } -+#endif + if (this_cpu_read(cpu_tlbstate.invalidate_other)) clear_asid_other(); -@@ -251,6 +265,245 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, +@@ -251,6 +267,290 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, *need_flush = true; } -+#ifdef CONFIG_CPU_SUP_AMD ++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH +/* -+ * Logic for AMD INVLPGB support. ++ * Logic for broadcast TLB invalidation. + */ -+static DEFINE_RAW_SPINLOCK(broadcast_asid_lock); -+static u16 last_broadcast_asid = TLB_NR_DYN_ASIDS; -+static DECLARE_BITMAP(broadcast_asid_used, MAX_ASID_AVAILABLE) = { 0 }; -+static LIST_HEAD(broadcast_asid_list); -+static int broadcast_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1; ++static DEFINE_RAW_SPINLOCK(global_asid_lock); ++static u16 last_global_asid = MAX_ASID_AVAILABLE; ++static DECLARE_BITMAP(global_asid_used, MAX_ASID_AVAILABLE) = { 0 }; ++static DECLARE_BITMAP(global_asid_freed, MAX_ASID_AVAILABLE) = { 0 }; ++static int global_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1; + -+static void reset_broadcast_asid_space(void) ++static void reset_global_asid_space(void) +{ -+ mm_context_t *context; -+ -+ lockdep_assert_held(&broadcast_asid_lock); ++ lockdep_assert_held(&global_asid_lock); + + /* -+ * Flush once when we wrap around the ASID space, so we won't need -+ * to flush every time we allocate an ASID for boradcast flushing. ++ * A global TLB flush guarantees that any stale entries from ++ * previously freed global ASIDs get flushed from the TLB ++ * everywhere, making these global ASIDs safe to reuse. + */ + invlpgb_flush_all_nonglobals(); -+ tlbsync(); + + /* -+ * Leave the currently used broadcast ASIDs set in the bitmap, since -+ * those cannot be reused before the next wraparound and flush.. ++ * Clear all the previously freed global ASIDs from the ++ * broadcast_asid_used bitmap, now that the global TLB flush ++ * has made them actually available for re-use. + */ -+ bitmap_clear(broadcast_asid_used, 0, MAX_ASID_AVAILABLE); -+ list_for_each_entry(context, &broadcast_asid_list, broadcast_asid_list) -+ __set_bit(context->broadcast_asid, broadcast_asid_used); ++ bitmap_andnot(global_asid_used, global_asid_used, ++ global_asid_freed, MAX_ASID_AVAILABLE); ++ bitmap_clear(global_asid_freed, 0, MAX_ASID_AVAILABLE); + -+ last_broadcast_asid = TLB_NR_DYN_ASIDS; ++ /* ++ * ASIDs 0-TLB_NR_DYN_ASIDS are used for CPU-local ASID ++ * assignments, for tasks doing IPI based TLB shootdowns. ++ * Restart the search from the start of the global ASID space. ++ */ ++ last_global_asid = TLB_NR_DYN_ASIDS; +} + -+static u16 get_broadcast_asid(void) ++static u16 get_global_asid(void) +{ -+ lockdep_assert_held(&broadcast_asid_lock); ++ lockdep_assert_held(&global_asid_lock); + + do { -+ u16 start = last_broadcast_asid; -+ u16 asid = find_next_zero_bit(broadcast_asid_used, MAX_ASID_AVAILABLE, start); ++ u16 start = last_global_asid; ++ u16 asid = find_next_zero_bit(global_asid_used, MAX_ASID_AVAILABLE, start); + + if (asid >= MAX_ASID_AVAILABLE) { -+ reset_broadcast_asid_space(); ++ reset_global_asid_space(); + continue; + } + -+ /* Try claiming this broadcast ASID. */ -+ if (!test_and_set_bit(asid, broadcast_asid_used)) { -+ last_broadcast_asid = asid; -+ return asid; -+ } ++ /* Claim this global ASID. */ ++ __set_bit(asid, global_asid_used); ++ last_global_asid = asid; ++ global_asid_available--; ++ return asid; + } while (1); +} + +/* -+ * Returns true if the mm is transitioning from a CPU-local ASID to a broadcast ++ * Returns true if the mm is transitioning from a CPU-local ASID to a global + * (INVLPGB) ASID, or the other way around. + */ -+static bool needs_broadcast_asid_reload(struct mm_struct *next, u16 prev_asid) ++static bool needs_global_asid_reload(struct mm_struct *next, u16 prev_asid) +{ -+ u16 broadcast_asid = mm_broadcast_asid(next); ++ u16 global_asid = mm_global_asid(next); + -+ if (broadcast_asid && prev_asid != broadcast_asid) ++ if (global_asid && prev_asid != global_asid) + return true; + -+ if (!broadcast_asid && is_broadcast_asid(prev_asid)) ++ if (!global_asid && is_global_asid(prev_asid)) + return true; + + return false; +} + -+void destroy_context_free_broadcast_asid(struct mm_struct *mm) ++void destroy_context_free_global_asid(struct mm_struct *mm) +{ -+ if (!mm->context.broadcast_asid) ++ if (!mm->context.global_asid) + return; + -+ guard(raw_spinlock_irqsave)(&broadcast_asid_lock); -+ mm->context.broadcast_asid = 0; -+ list_del(&mm->context.broadcast_asid_list); -+ broadcast_asid_available++; ++ guard(raw_spinlock_irqsave)(&global_asid_lock); ++ ++ /* The global ASID can be re-used only after flush at wrap-around. */ ++ __set_bit(mm->context.global_asid, global_asid_freed); ++ ++ mm->context.global_asid = 0; ++ global_asid_available++; +} + ++/* ++ * Check whether a process is currently active on more than "threshold" CPUs. ++ * This is a cheap estimation on whether or not it may make sense to assign ++ * a global ASID to this process, and use broadcast TLB invalidation. ++ */ +static bool mm_active_cpus_exceeds(struct mm_struct *mm, int threshold) +{ + int count = 0; + int cpu; + ++ /* This quick check should eliminate most single threaded programs. */ + if (cpumask_weight(mm_cpumask(mm)) <= threshold) + return false; + ++ /* Slower check to make sure. */ + for_each_cpu(cpu, mm_cpumask(mm)) { + /* Skip the CPUs that aren't really running this process. */ + if (per_cpu(cpu_tlbstate.loaded_mm, cpu) != mm) @@ -1616,40 +1745,56 @@ index a2becb85bea7..0080175153ef 100644 +} + +/* -+ * Assign a broadcast ASID to the current process, protecting against ++ * Assign a global ASID to the current process, protecting against + * races between multiple threads in the process. + */ -+static void use_broadcast_asid(struct mm_struct *mm) ++static void use_global_asid(struct mm_struct *mm) +{ -+ guard(raw_spinlock_irqsave)(&broadcast_asid_lock); ++ guard(raw_spinlock_irqsave)(&global_asid_lock); + + /* This process is already using broadcast TLB invalidation. */ -+ if (mm->context.broadcast_asid) ++ if (mm->context.global_asid) ++ return; ++ ++ /* The last global ASID was consumed while waiting for the lock. */ ++ if (!global_asid_available) + return; + -+ mm->context.broadcast_asid = get_broadcast_asid(); -+ mm->context.asid_transition = true; -+ list_add(&mm->context.broadcast_asid_list, &broadcast_asid_list); -+ broadcast_asid_available--; ++ /* ++ * The transition from IPI TLB flushing, with a dynamic ASID, ++ * and broadcast TLB flushing, using a global ASID, uses memory ++ * ordering for synchronization. ++ * ++ * While the process has threads still using a dynamic ASID, ++ * TLB invalidation IPIs continue to get sent. ++ * ++ * This code sets asid_transition first, before assigning the ++ * global ASID. ++ * ++ * The TLB flush code will only verify the ASID transition ++ * after it has seen the new global ASID for the process. ++ */ ++ WRITE_ONCE(mm->context.asid_transition, true); ++ WRITE_ONCE(mm->context.global_asid, get_global_asid()); +} + +/* -+ * Figure out whether to assign a broadcast (global) ASID to a process. -+ * We vary the threshold by how empty or full broadcast ASID space is. ++ * Figure out whether to assign a global ASID to a process. ++ * We vary the threshold by how empty or full global ASID space is. + * 1/4 full: >= 4 active threads + * 1/2 full: >= 8 active threads + * 3/4 full: >= 16 active threads + * 7/8 full: >= 32 active threads + * etc + * -+ * This way we should never exhaust the broadcast ASID space, even on very ++ * This way we should never exhaust the global ASID space, even on very + * large systems, and the processes with the largest number of active + * threads should be able to use broadcast TLB invalidation. + */ +#define HALFFULL_THRESHOLD 8 -+static bool meets_broadcast_asid_threshold(struct mm_struct *mm) ++static bool meets_global_asid_threshold(struct mm_struct *mm) +{ -+ int avail = broadcast_asid_available; ++ int avail = global_asid_available; + int threshold = HALFFULL_THRESHOLD; + + if (!avail) @@ -1669,7 +1814,7 @@ index a2becb85bea7..0080175153ef 100644 + return mm_active_cpus_exceeds(mm, threshold); +} + -+static void count_tlb_flush(struct mm_struct *mm) ++static void consider_global_asid(struct mm_struct *mm) +{ + if (!static_cpu_has(X86_FEATURE_INVLPGB)) + return; @@ -1678,43 +1823,54 @@ index a2becb85bea7..0080175153ef 100644 + if ((current->pid & 0x1f) != (jiffies & 0x1f)) + return; + -+ if (meets_broadcast_asid_threshold(mm)) -+ use_broadcast_asid(mm); ++ if (meets_global_asid_threshold(mm)) ++ use_global_asid(mm); +} + +static void finish_asid_transition(struct flush_tlb_info *info) +{ + struct mm_struct *mm = info->mm; -+ int bc_asid = mm_broadcast_asid(mm); ++ int bc_asid = mm_global_asid(mm); + int cpu; + -+ if (!mm->context.asid_transition) ++ if (!READ_ONCE(mm->context.asid_transition)) + return; + + for_each_cpu(cpu, mm_cpumask(mm)) { ++ /* ++ * The remote CPU is context switching. Wait for that to ++ * finish, to catch the unlikely case of it switching to ++ * the target mm with an out of date ASID. ++ */ ++ while (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) == LOADED_MM_SWITCHING) ++ cpu_relax(); ++ + if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) != mm) + continue; + + /* -+ * If at least one CPU is not using the broadcast ASID yet, ++ * If at least one CPU is not using the global ASID yet, + * send a TLB flush IPI. The IPI should cause stragglers + * to transition soon. ++ * ++ * This can race with the CPU switching to another task; ++ * that results in a (harmless) extra IPI. + */ -+ if (per_cpu(cpu_tlbstate.loaded_mm_asid, cpu) != bc_asid) { ++ if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm_asid, cpu)) != bc_asid) { + flush_tlb_multi(mm_cpumask(info->mm), info); + return; + } + } + -+ /* All the CPUs running this process are using the broadcast ASID. */ -+ mm->context.asid_transition = 0; ++ /* All the CPUs running this process are using the global ASID. */ ++ WRITE_ONCE(mm->context.asid_transition, false); +} + +static void broadcast_tlb_flush(struct flush_tlb_info *info) +{ + bool pmd = info->stride_shift == PMD_SHIFT; + unsigned long maxnr = invlpgb_count_max; -+ unsigned long asid = info->mm->context.broadcast_asid; ++ unsigned long asid = info->mm->context.global_asid; + unsigned long addr = info->start; + unsigned long nr; + @@ -1722,12 +1878,17 @@ index a2becb85bea7..0080175153ef 100644 + if (info->stride_shift > PMD_SHIFT) + maxnr = 1; + -+ if (info->end == TLB_FLUSH_ALL || info->freed_tables) { -+ invlpgb_flush_single_pcid(kern_pcid(asid)); ++ /* ++ * TLB flushes with INVLPGB are kicked off asynchronously. ++ * The inc_mm_tlb_gen() guarantees page table updates are done ++ * before these TLB flushes happen. ++ */ ++ if (info->end == TLB_FLUSH_ALL) { ++ invlpgb_flush_single_pcid_nosync(kern_pcid(asid)); + /* Do any CPUs supporting INVLPGB need PTI? */ + if (static_cpu_has(X86_FEATURE_PTI)) -+ invlpgb_flush_single_pcid(user_pcid(asid)); -+ } else do { ++ invlpgb_flush_single_pcid_nosync(user_pcid(asid)); ++ } else for (; addr < info->end; addr += nr << info->stride_shift) { + /* + * Calculate how many pages can be flushed at once; if the + * remainder of the range is less than one page, flush one. @@ -1735,43 +1896,42 @@ index a2becb85bea7..0080175153ef 100644 + nr = min(maxnr, (info->end - addr) >> info->stride_shift); + nr = max(nr, 1); + -+ invlpgb_flush_user_nr(kern_pcid(asid), addr, nr, pmd); ++ invlpgb_flush_user_nr_nosync(kern_pcid(asid), addr, nr, pmd, info->freed_tables); + /* Do any CPUs supporting INVLPGB need PTI? */ + if (static_cpu_has(X86_FEATURE_PTI)) -+ invlpgb_flush_user_nr(user_pcid(asid), addr, nr, pmd); -+ addr += nr << info->stride_shift; -+ } while (addr < info->end); ++ invlpgb_flush_user_nr_nosync(user_pcid(asid), addr, nr, pmd, info->freed_tables); ++ } + + finish_asid_transition(info); + + /* Wait for the INVLPGBs kicked off above to finish. */ + tlbsync(); +} -+#endif /* CONFIG_CPU_SUP_AMD */ ++#endif /* CONFIG_X86_BROADCAST_TLB_FLUSH */ + /* * Given an ASID, flush the corresponding user ASID. We can delay this * until the next time we switch to it. -@@ -556,8 +809,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, +@@ -556,8 +856,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, */ if (prev == next) { /* Not actually switching mm's */ - VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != - next->context.ctx_id); -+ if (is_dyn_asid(prev_asid)) -+ VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != -+ next->context.ctx_id); ++ VM_WARN_ON(is_dyn_asid(prev_asid) && ++ this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != ++ next->context.ctx_id); /* * If this races with another thread that enables lam, 'new_lam' -@@ -573,6 +827,23 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, +@@ -573,6 +874,23 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, !cpumask_test_cpu(cpu, mm_cpumask(next)))) cpumask_set_cpu(cpu, mm_cpumask(next)); + /* + * Check if the current mm is transitioning to a new ASID. + */ -+ if (needs_broadcast_asid_reload(next, prev_asid)) { ++ if (needs_global_asid_reload(next, prev_asid)) { + next_tlb_gen = atomic64_read(&next->context.tlb_gen); + + choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); @@ -1782,24 +1942,44 @@ index a2becb85bea7..0080175153ef 100644 + * Broadcast TLB invalidation keeps this PCID up to date + * all the time. + */ -+ if (is_broadcast_asid(prev_asid)) ++ if (is_global_asid(prev_asid)) + return; + /* * If the CPU is not in lazy TLB mode, we are just switching * from one thread in a process to another thread in the same -@@ -629,8 +900,10 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, - barrier(); +@@ -606,6 +924,13 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, + */ + cond_mitigation(tsk); + ++ /* ++ * Let nmi_uaccess_okay() and finish_asid_transition() ++ * know that we're changing CR3. ++ */ ++ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); ++ barrier(); ++ + /* + * Stop remote flushes for the previous mm. + * Skip kernel threads; we never send init_mm TLB flushing IPIs, +@@ -623,14 +948,12 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, + next_tlb_gen = atomic64_read(&next->context.tlb_gen); + + choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); +- +- /* Let nmi_uaccess_okay() know that we're changing CR3. */ +- this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); +- barrier(); } +reload_tlb: new_lam = mm_lam_cr3_mask(next); if (need_flush) { -+ VM_BUG_ON(is_broadcast_asid(new_asid)); ++ VM_WARN_ON_ONCE(is_global_asid(new_asid)); this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); load_new_mm_cr3(next->pgd, new_asid, new_lam, true); -@@ -749,7 +1022,7 @@ static void flush_tlb_func(void *info) +@@ -749,7 +1072,7 @@ static void flush_tlb_func(void *info) const struct flush_tlb_info *f = info; struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); @@ -1808,24 +1988,24 @@ index a2becb85bea7..0080175153ef 100644 bool local = smp_processor_id() == f->initiating_cpu; unsigned long nr_invalidate = 0; u64 mm_tlb_gen; -@@ -769,6 +1042,16 @@ static void flush_tlb_func(void *info) +@@ -769,6 +1092,16 @@ static void flush_tlb_func(void *info) if (unlikely(loaded_mm == &init_mm)) return; -+ /* Reload the ASID if transitioning into or out of a broadcast ASID */ -+ if (needs_broadcast_asid_reload(loaded_mm, loaded_mm_asid)) { ++ /* Reload the ASID if transitioning into or out of a global ASID */ ++ if (needs_global_asid_reload(loaded_mm, loaded_mm_asid)) { + switch_mm_irqs_off(NULL, loaded_mm, NULL); + loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); + } + + /* Broadcast ASIDs are always kept up to date with INVLPGB. */ -+ if (is_broadcast_asid(loaded_mm_asid)) ++ if (is_global_asid(loaded_mm_asid)) + return; + VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) != loaded_mm->context.ctx_id); -@@ -786,6 +1069,8 @@ static void flush_tlb_func(void *info) +@@ -786,6 +1119,8 @@ static void flush_tlb_func(void *info) return; } @@ -1834,32 +2014,7 @@ index a2becb85bea7..0080175153ef 100644 if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID && f->new_tlb_gen <= local_tlb_gen)) { /* -@@ -825,7 +1110,7 @@ static void flush_tlb_func(void *info) - * - * The only question is whether to do a full or partial flush. - * -- * We do a partial flush if requested and two extra conditions -+ * We do a partial flush if requested and three extra conditions - * are met: - * - * 1. f->new_tlb_gen == local_tlb_gen + 1. We have an invariant that -@@ -852,10 +1137,14 @@ static void flush_tlb_func(void *info) - * date. By doing a full flush instead, we can increase - * local_tlb_gen all the way to mm_tlb_gen and we can probably - * avoid another flush in the very near future. -+ * -+ * 3. No page tables were freed. If page tables were freed, a full -+ * flush ensures intermediate translations in the TLB get flushed. - */ - if (f->end != TLB_FLUSH_ALL && - f->new_tlb_gen == local_tlb_gen + 1 && -- f->new_tlb_gen == mm_tlb_gen) { -+ f->new_tlb_gen == mm_tlb_gen && -+ !f->freed_tables) { - /* Partial flush */ - unsigned long addr = f->start; - -@@ -926,7 +1215,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, +@@ -926,7 +1261,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, * up on the new contents of what used to be page tables, while * doing a speculative memory access. */ @@ -1868,102 +2023,155 @@ index a2becb85bea7..0080175153ef 100644 on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true); else on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func, -@@ -998,14 +1287,18 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, +@@ -981,6 +1316,15 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm, + info->new_tlb_gen = new_tlb_gen; + info->initiating_cpu = smp_processor_id(); + ++ /* ++ * If the number of flushes is so large that a full flush ++ * would be faster, do a full flush. ++ */ ++ if ((end - start) >> stride_shift > tlb_single_page_flush_ceiling) { ++ info->start = 0; ++ info->end = TLB_FLUSH_ALL; ++ } ++ + return info; + } + +@@ -998,17 +1342,8 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, bool freed_tables) { struct flush_tlb_info *info; -+ unsigned long threshold = tlb_single_page_flush_ceiling; ++ int cpu = get_cpu(); u64 new_tlb_gen; - int cpu; - -+ if (static_cpu_has(X86_FEATURE_INVLPGB)) -+ threshold *= invlpgb_count_max; -+ - cpu = get_cpu(); - - /* Should we flush just the requested range? */ - if ((end == TLB_FLUSH_ALL) || +- int cpu; +- +- cpu = get_cpu(); +- +- /* Should we flush just the requested range? */ +- if ((end == TLB_FLUSH_ALL) || - ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) { -+ ((end - start) >> stride_shift) > threshold) { - start = 0; - end = TLB_FLUSH_ALL; - } -@@ -1021,8 +1314,11 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, +- start = 0; +- end = TLB_FLUSH_ALL; +- } + + /* This is also a barrier that synchronizes with switch_mm(). */ + new_tlb_gen = inc_mm_tlb_gen(mm); +@@ -1021,8 +1356,11 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, * a local TLB flush is needed. Optimize this use-case by calling * flush_tlb_func_local() directly in this case. */ - if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { -+ if (IS_ENABLED(CONFIG_CPU_SUP_AMD) && mm_broadcast_asid(mm)) { ++ if (mm_global_asid(mm)) { + broadcast_tlb_flush(info); + } else if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { flush_tlb_multi(mm_cpumask(mm), info); -+ count_tlb_flush(mm); ++ consider_global_asid(mm); } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) { lockdep_assert_irqs_enabled(); local_irq_disable(); -@@ -1045,9 +1341,41 @@ static void do_flush_tlb_all(void *info) +@@ -1036,6 +1374,19 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + } + + ++static bool broadcast_flush_tlb_all(void) ++{ ++ if (!IS_ENABLED(CONFIG_X86_BROADCAST_TLB_FLUSH)) ++ return false; ++ ++ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) ++ return false; ++ ++ guard(preempt)(); ++ invlpgb_flush_all(); ++ return true; ++} ++ + static void do_flush_tlb_all(void *info) + { + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); +@@ -1044,10 +1395,36 @@ static void do_flush_tlb_all(void *info) + void flush_tlb_all(void) { - count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); -+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { -+ guard(preempt)(); -+ invlpgb_flush_all(); -+ tlbsync(); ++ if (broadcast_flush_tlb_all()) + return; -+ } + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); on_each_cpu(do_flush_tlb_all, NULL, 1); } -+static void broadcast_kernel_range_flush(unsigned long start, unsigned long end) ++static bool broadcast_kernel_range_flush(struct flush_tlb_info *info) +{ + unsigned long addr; -+ unsigned long maxnr = invlpgb_count_max; -+ unsigned long threshold = tlb_single_page_flush_ceiling * maxnr; ++ unsigned long nr; + -+ /* -+ * TLBSYNC only waits for flushes originating on the same CPU. -+ * Disabling migration allows us to wait on all flushes. -+ */ -+ guard(preempt)(); ++ if (!IS_ENABLED(CONFIG_X86_BROADCAST_TLB_FLUSH)) ++ return false; + -+ if (end == TLB_FLUSH_ALL || -+ (end - start) > threshold << PAGE_SHIFT) { ++ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) ++ return false; ++ ++ if (info->end == TLB_FLUSH_ALL) { + invlpgb_flush_all(); -+ } else { -+ unsigned long nr; -+ for (addr = start; addr < end; addr += nr << PAGE_SHIFT) { -+ nr = min((end - addr) >> PAGE_SHIFT, maxnr); -+ invlpgb_flush_addr(addr, nr); -+ } ++ return true; + } + ++ for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) { ++ nr = min((info->end - addr) >> PAGE_SHIFT, invlpgb_count_max); ++ invlpgb_flush_addr_nosync(addr, nr); ++ } + tlbsync(); ++ return true; +} + static void do_kernel_range_flush(void *info) { struct flush_tlb_info *f = info; -@@ -1060,6 +1388,11 @@ static void do_kernel_range_flush(void *info) +@@ -1060,22 +1437,21 @@ static void do_kernel_range_flush(void *info) void flush_tlb_kernel_range(unsigned long start, unsigned long end) { -+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { -+ broadcast_kernel_range_flush(start, end); -+ return; -+ } +- /* Balance as user space task's flush, a bit conservative */ +- if (end == TLB_FLUSH_ALL || +- (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) { +- on_each_cpu(do_flush_tlb_all, NULL, 1); +- } else { +- struct flush_tlb_info *info; ++ struct flush_tlb_info *info; + +- preempt_disable(); +- info = get_flush_tlb_info(NULL, start, end, 0, false, +- TLB_GENERATION_INVALID); ++ guard(preempt)(); + ++ info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false, ++ TLB_GENERATION_INVALID); + - /* Balance as user space task's flush, a bit conservative */ - if (end == TLB_FLUSH_ALL || - (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) { -@@ -1244,7 +1577,6 @@ EXPORT_SYMBOL_GPL(__flush_tlb_all); - void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) - { - struct flush_tlb_info *info; -- ++ if (broadcast_kernel_range_flush(info)) ++ ; /* Fall through. */ ++ else if (info->end == TLB_FLUSH_ALL) ++ on_each_cpu(do_flush_tlb_all, NULL, 1); ++ else + on_each_cpu(do_kernel_range_flush, info, 1); + +- put_flush_tlb_info(); +- preempt_enable(); +- } ++ put_flush_tlb_info(); + } + + /* +@@ -1247,7 +1623,7 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) + int cpu = get_cpu(); - info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, -@@ -1263,12 +1595,49 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) +- info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, ++ info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, PAGE_SHIFT, false, + TLB_GENERATION_INVALID); + /* + * flush_tlb_multi() is not optimized for the common case in which only +@@ -1263,12 +1639,62 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) local_irq_enable(); } @@ -1988,8 +2196,8 @@ index a2becb85bea7..0080175153ef 100644 + struct mm_struct *mm, + unsigned long uaddr) +{ -+ if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_broadcast_asid(mm)) { -+ u16 asid = mm_broadcast_asid(mm); ++ if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_global_asid(mm)) { ++ u16 asid = mm_global_asid(mm); + /* + * Queue up an asynchronous invalidation. The corresponding + * TLBSYNC is done in arch_tlbbatch_flush(), and must be done @@ -1999,11 +2207,24 @@ index a2becb85bea7..0080175153ef 100644 + batch->used_invlpgb = true; + migrate_disable(); + } -+ invlpgb_flush_user_nr(kern_pcid(asid), uaddr, 1, 0); ++ invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false, false); + /* Do any CPUs supporting INVLPGB need PTI? */ + if (static_cpu_has(X86_FEATURE_PTI)) -+ invlpgb_flush_user_nr(user_pcid(asid), uaddr, 1, 0); ++ invlpgb_flush_user_nr_nosync(user_pcid(asid), uaddr, 1, false, false); ++ ++ /* ++ * Some CPUs might still be using a local ASID for this ++ * process, and require IPIs, while others are using the ++ * global ASID. ++ * ++ * In this corner case we need to do both the broadcast ++ * TLB invalidation, and send IPIs. The IPIs will help ++ * stragglers transition to the broadcast ASID. ++ */ ++ if (READ_ONCE(mm->context.asid_transition)) ++ goto also_send_ipi; + } else { ++also_send_ipi: + inc_mm_tlb_gen(mm); + cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); + } @@ -2089,12 +2310,32 @@ index bb2119e5a0d0..a593d5edfd88 100644 tlb_gather_mmu(&tlb, vms->vma->vm_mm); update_hiwater_rss(vms->vma->vm_mm); unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end, +diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h +index 3ae84c3b8e6d..dc1c1057f26e 100644 +--- a/tools/arch/x86/include/asm/msr-index.h ++++ b/tools/arch/x86/include/asm/msr-index.h +@@ -25,6 +25,7 @@ + #define _EFER_SVME 12 /* Enable virtualization */ + #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ + #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ ++#define _EFER_TCE 15 /* Enable Translation Cache Extensions */ + #define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */ + + #define EFER_SCE (1<<_EFER_SCE) +@@ -34,6 +35,7 @@ + #define EFER_SVME (1<<_EFER_SVME) + #define EFER_LMSLE (1<<_EFER_LMSLE) + #define EFER_FFXSR (1<<_EFER_FFXSR) ++#define EFER_TCE (1<<_EFER_TCE) + #define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) + + /* -- 2.48.0.rc1 -From 9341991abd224336e551e90c7179e2e221fdf466 Mon Sep 17 00:00:00 2001 +From 1fc2e15c0c690b276928953ff73277b4d66e67f3 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:37:35 +0100 +Date: Mon, 20 Jan 2025 13:21:45 +0100 Subject: [PATCH 03/12] bbr3 Signed-off-by: Peter Jung @@ -5478,9 +5719,9 @@ index b412ed88ccd9..d70f8b742b21 100644 -- 2.48.0.rc1 -From 32089eb0a217a8d425f387e5e613d498ad760f34 Mon Sep 17 00:00:00 2001 +From e01619bda1e69eea53c0f3ef61476fb02da06868 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:37:49 +0100 +Date: Mon, 20 Jan 2025 13:21:55 +0100 Subject: [PATCH 04/12] cachy Signed-off-by: Peter Jung @@ -5499,12 +5740,23 @@ Signed-off-by: Peter Jung drivers/cpufreq/Kconfig.x86 | 2 - drivers/cpufreq/intel_pstate.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 44 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 + + drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c | 19 + + drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 1 + drivers/gpu/drm/amd/display/Kconfig | 6 + - .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- + .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 69 +- + .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 7 + .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 2 +- .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 6 +- .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 6 +- + .../drm/amd/display/dc/bios/bios_parser2.c | 13 +- + .../drm/amd/display/dc/core/dc_link_exports.c | 6 + + drivers/gpu/drm/amd/display/dc/dc.h | 3 + + .../dc/resource/dce120/dce120_resource.c | 17 + drivers/gpu/drm/amd/pm/amdgpu_pm.c | 3 + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 14 +- drivers/input/evdev.c | 19 +- @@ -5546,7 +5798,7 @@ Signed-off-by: Peter Jung mm/vmpressure.c | 4 + mm/vmscan.c | 143 + net/ipv4/inet_connection_sock.c | 2 +- - 61 files changed, 6557 insertions(+), 65 deletions(-) + 72 files changed, 6714 insertions(+), 93 deletions(-) create mode 100644 drivers/media/v4l2-core/v4l2loopback.c create mode 100644 drivers/media/v4l2-core/v4l2loopback.h create mode 100644 drivers/media/v4l2-core/v4l2loopback_formats.h @@ -5683,7 +5935,7 @@ index f48eaa98d22d..fc777c14cff6 100644 unprivileged_userfaultfd ======================== diff --git a/Makefile b/Makefile -index e20a62ad397f..9a63ab456ffc 100644 +index b9464c88ac72..ea555e6a8bf1 100644 --- a/Makefile +++ b/Makefile @@ -860,11 +860,19 @@ KBUILD_CFLAGS += -fno-delete-null-pointer-checks @@ -5707,7 +5959,7 @@ index e20a62ad397f..9a63ab456ffc 100644 # depends on `opt-level` and `debug-assertions`, respectively. KBUILD_RUSTFLAGS += -Cdebug-assertions=$(if $(CONFIG_RUST_DEBUG_ASSERTIONS),y,n) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu -index 2a7279d80460..301ced02b077 100644 +index bacdc502903f..f2c97bdcef58 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -155,9 +155,8 @@ config MPENTIUM4 @@ -6550,6 +6802,118 @@ index 4653a8d2823a..6590e83dfbf0 100644 extern int amdgpu_vis_vram_limit; extern int amdgpu_gart_size; extern int amdgpu_gtt_size; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +index 093141ad6ed0..e476e45b996a 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +@@ -36,13 +36,6 @@ + #include "atombios_encoders.h" + #include "bif/bif_4_1_d.h" + +-static void amdgpu_atombios_lookup_i2c_gpio_quirks(struct amdgpu_device *adev, +- ATOM_GPIO_I2C_ASSIGMENT *gpio, +- u8 index) +-{ +- +-} +- + static struct amdgpu_i2c_bus_rec amdgpu_atombios_get_bus_rec_for_i2c_gpio(ATOM_GPIO_I2C_ASSIGMENT *gpio) + { + struct amdgpu_i2c_bus_rec i2c; +@@ -108,9 +101,6 @@ struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device * + + gpio = &i2c_info->asGPIO_Info[0]; + for (i = 0; i < num_indices; i++) { +- +- amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i); +- + if (gpio->sucI2cId.ucAccess == id) { + i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio); + break; +@@ -142,8 +132,6 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev) + + gpio = &i2c_info->asGPIO_Info[0]; + for (i = 0; i < num_indices; i++) { +- amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i); +- + i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio); + + if (i2c.valid) { +@@ -156,6 +144,38 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev) + } + } + ++void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id) ++{ ++ struct atom_context *ctx = adev->mode_info.atom_context; ++ ATOM_GPIO_I2C_ASSIGMENT *gpio; ++ struct amdgpu_i2c_bus_rec i2c; ++ int index = GetIndexIntoMasterTable(DATA, GPIO_I2C_Info); ++ struct _ATOM_GPIO_I2C_INFO *i2c_info; ++ uint16_t data_offset, size; ++ int i, num_indices; ++ char stmp[32]; ++ ++ if (amdgpu_atom_parse_data_header(ctx, index, &size, NULL, NULL, &data_offset)) { ++ i2c_info = (struct _ATOM_GPIO_I2C_INFO *)(ctx->bios + data_offset); ++ ++ num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / ++ sizeof(ATOM_GPIO_I2C_ASSIGMENT); ++ ++ gpio = &i2c_info->asGPIO_Info[0]; ++ for (i = 0; i < num_indices; i++) { ++ i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio); ++ ++ if (i2c.valid && i2c.i2c_id == i2c_id) { ++ sprintf(stmp, "OEM 0x%x", i2c.i2c_id); ++ adev->i2c_bus[i] = amdgpu_i2c_create(adev_to_drm(adev), &i2c, stmp); ++ break; ++ } ++ gpio = (ATOM_GPIO_I2C_ASSIGMENT *) ++ ((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT)); ++ } ++ } ++} ++ + struct amdgpu_gpio_rec + amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev, + u8 id) +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +index 0e16432d9a72..867bc5c5ce67 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +@@ -136,6 +136,7 @@ amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev, + struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *adev, + uint8_t id); + void amdgpu_atombios_i2c_init(struct amdgpu_device *adev); ++void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id); + + bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev); + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +index cd4fac120834..1ab433d774cc 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -4461,8 +4461,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, + goto failed; + } + /* init i2c buses */ +- if (!amdgpu_device_has_dc_support(adev)) +- amdgpu_atombios_i2c_init(adev); ++ amdgpu_i2c_init(adev); + } + } + +@@ -4724,8 +4723,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) + amdgpu_reset_fini(adev); + + /* free i2c buses */ +- if (!amdgpu_device_has_dc_support(adev)) +- amdgpu_i2c_fini(adev); ++ amdgpu_i2c_fini(adev); + + if (amdgpu_emu_mode != 1) + amdgpu_atombios_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 38686203bea6..811d020f3f4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -6578,6 +6942,60 @@ index 38686203bea6..811d020f3f4b 100644 /** * DOC: vramlimit (int) * Restrict the total amount of VRAM in MiB for testing. The default is 0 (Use full VRAM). +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +index f0765ccde668..8179d0814db9 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +@@ -225,6 +225,25 @@ void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c) + kfree(i2c); + } + ++void amdgpu_i2c_init(struct amdgpu_device *adev) ++{ ++ if (!adev->is_atom_fw) { ++ if (!amdgpu_device_has_dc_support(adev)) { ++ amdgpu_atombios_i2c_init(adev); ++ } else { ++ switch (adev->asic_type) { ++ case CHIP_POLARIS10: ++ case CHIP_POLARIS11: ++ case CHIP_POLARIS12: ++ amdgpu_atombios_oem_i2c_init(adev, 0x97); ++ break; ++ default: ++ break; ++ } ++ } ++ } ++} ++ + /* remove all the buses */ + void amdgpu_i2c_fini(struct amdgpu_device *adev) + { +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h +index 21e3d1dad0a1..1d3d3806e0dd 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h +@@ -28,6 +28,7 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev, + const struct amdgpu_i2c_bus_rec *rec, + const char *name); + void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c); ++void amdgpu_i2c_init(struct amdgpu_device *adev); + void amdgpu_i2c_fini(struct amdgpu_device *adev); + struct amdgpu_i2c_chan * + amdgpu_i2c_lookup(struct amdgpu_device *adev, +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +index 5e3faefc5510..6da4f946cac0 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +@@ -609,6 +609,7 @@ struct amdgpu_i2c_adapter { + struct i2c_adapter base; + + struct ddc_service *ddc_service; ++ bool oem; + }; + + #define TO_DM_AUX(x) container_of((x), struct amdgpu_dm_dp_aux, aux) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 11e3f2f3b174..7b1bd69dc29e 100644 --- a/drivers/gpu/drm/amd/display/Kconfig @@ -6594,10 +7012,73 @@ index 11e3f2f3b174..7b1bd69dc29e 100644 + endmenu diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -index cd16dae534dc..1508978f92dd 100644 +index 5f216d626cbb..382af92c4ff1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -@@ -4516,7 +4516,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) +@@ -177,6 +177,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev); + static void amdgpu_dm_fini(struct amdgpu_device *adev); + static bool is_freesync_video_mode(const struct drm_display_mode *mode, struct amdgpu_dm_connector *aconnector); + static void reset_freesync_config_for_crtc(struct dm_crtc_state *new_crtc_state); ++static struct amdgpu_i2c_adapter * ++create_i2c(struct ddc_service *ddc_service, bool oem); + + static enum drm_mode_subconnector get_subconnector_type(struct dc_link *link) + { +@@ -2839,6 +2841,33 @@ static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev) + return 0; + } + ++static int dm_oem_i2c_hw_init(struct amdgpu_device *adev) ++{ ++ struct amdgpu_display_manager *dm = &adev->dm; ++ struct amdgpu_i2c_adapter *oem_i2c; ++ struct ddc_service *oem_ddc_service; ++ int r; ++ ++ oem_ddc_service = dc_get_oem_i2c_device(adev->dm.dc); ++ if (oem_ddc_service) { ++ oem_i2c = create_i2c(oem_ddc_service, true); ++ if (!oem_i2c) { ++ dev_info(adev->dev, "Failed to create oem i2c adapter data\n"); ++ return -ENOMEM; ++ } ++ ++ r = i2c_add_adapter(&oem_i2c->base); ++ if (r) { ++ dev_info(adev->dev, "Failed to register oem i2c\n"); ++ kfree(oem_i2c); ++ return r; ++ } ++ dm->oem_i2c = oem_i2c; ++ } ++ ++ return 0; ++} ++ + /** + * dm_hw_init() - Initialize DC device + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. +@@ -2870,6 +2899,10 @@ static int dm_hw_init(struct amdgpu_ip_block *ip_block) + return r; + amdgpu_dm_hpd_init(adev); + ++ r = dm_oem_i2c_hw_init(adev); ++ if (r) ++ dev_info(adev->dev, "Failed to add OEM i2c bus\n"); ++ + return 0; + } + +@@ -2885,6 +2918,8 @@ static int dm_hw_fini(struct amdgpu_ip_block *ip_block) + { + struct amdgpu_device *adev = ip_block->adev; + ++ kfree(adev->dm.oem_i2c); ++ + amdgpu_dm_hpd_fini(adev); + + amdgpu_dm_irq_fini(adev); +@@ -4516,7 +4551,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) return r; } @@ -6606,6 +7087,93 @@ index cd16dae534dc..1508978f92dd 100644 if (amdgpu_dm_create_color_properties(adev)) { dc_state_release(state->context); kfree(state); +@@ -8218,7 +8253,7 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap, + int i; + int result = -EIO; + +- if (!ddc_service->ddc_pin || !ddc_service->ddc_pin->hw_info.hw_supported) ++ if (!ddc_service->ddc_pin) + return result; + + cmd.payloads = kcalloc(num, sizeof(struct i2c_payload), GFP_KERNEL); +@@ -8237,11 +8272,18 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap, + cmd.payloads[i].data = msgs[i].buf; + } + +- if (dc_submit_i2c( +- ddc_service->ctx->dc, +- ddc_service->link->link_index, +- &cmd)) +- result = num; ++ if (i2c->oem) { ++ if (dc_submit_i2c_oem( ++ ddc_service->ctx->dc, ++ &cmd)) ++ result = num; ++ } else { ++ if (dc_submit_i2c( ++ ddc_service->ctx->dc, ++ ddc_service->link->link_index, ++ &cmd)) ++ result = num; ++ } + + kfree(cmd.payloads); + return result; +@@ -8258,9 +8300,7 @@ static const struct i2c_algorithm amdgpu_dm_i2c_algo = { + }; + + static struct amdgpu_i2c_adapter * +-create_i2c(struct ddc_service *ddc_service, +- int link_index, +- int *res) ++create_i2c(struct ddc_service *ddc_service, bool oem) + { + struct amdgpu_device *adev = ddc_service->ctx->driver_context; + struct amdgpu_i2c_adapter *i2c; +@@ -8271,9 +8311,14 @@ create_i2c(struct ddc_service *ddc_service, + i2c->base.owner = THIS_MODULE; + i2c->base.dev.parent = &adev->pdev->dev; + i2c->base.algo = &amdgpu_dm_i2c_algo; +- snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c hw bus %d", link_index); ++ if (oem) ++ snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c OEM bus"); ++ else ++ snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c hw bus %d", ++ ddc_service->link->link_index); + i2c_set_adapdata(&i2c->base, i2c); + i2c->ddc_service = ddc_service; ++ i2c->oem = oem; + + return i2c; + } +@@ -8298,7 +8343,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, + link->priv = aconnector; + + +- i2c = create_i2c(link->ddc, link->link_index, &res); ++ i2c = create_i2c(link->ddc, false); + if (!i2c) { + DRM_ERROR("Failed to create i2c adapter data\n"); + return -ENOMEM; +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +index 2227cd8e4a89..5710776bb0e2 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +@@ -606,6 +606,13 @@ struct amdgpu_display_manager { + * Bounding box data read from dmub during early initialization for DCN4+ + */ + struct dml2_soc_bb *bb_from_dmub; ++ ++ /** ++ * @oem_i2c: ++ * ++ * OEM i2c bus ++ */ ++ struct amdgpu_i2c_adapter *oem_i2c; + }; + + enum dsc_clock_force_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index ebabfe3a512f..4d3ebcaacca1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -6620,7 +7188,7 @@ index ebabfe3a512f..4d3ebcaacca1 100644 * * AMD driver supports pre-defined mathematical functions for transferring diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c -index 64a041c2af05..08790bcfe109 100644 +index 36a830a7440f..a8fc8bd52d51 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -470,7 +470,7 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) @@ -6681,6 +7249,124 @@ index 495e3cd70426..704a48209657 100644 dm_atomic_plane_attach_color_mgmt_properties(dm, plane); #endif /* Create (reset) the plane state */ +diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +index c9a6de110b74..470ec970217b 100644 +--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c ++++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +@@ -1778,6 +1778,7 @@ static enum bp_result get_firmware_info_v3_1( + struct dc_firmware_info *info) + { + struct atom_firmware_info_v3_1 *firmware_info; ++ struct atom_firmware_info_v3_2 *firmware_info32; + struct atom_display_controller_info_v4_1 *dce_info = NULL; + + if (!info) +@@ -1785,6 +1786,8 @@ static enum bp_result get_firmware_info_v3_1( + + firmware_info = GET_IMAGE(struct atom_firmware_info_v3_1, + DATA_TABLES(firmwareinfo)); ++ firmware_info32 = GET_IMAGE(struct atom_firmware_info_v3_2, ++ DATA_TABLES(firmwareinfo)); + + dce_info = GET_IMAGE(struct atom_display_controller_info_v4_1, + DATA_TABLES(dce_info)); +@@ -1817,7 +1820,15 @@ static enum bp_result get_firmware_info_v3_1( + bp->cmd_tbl.get_smu_clock_info(bp, SMU9_SYSPLL0_ID) * 10; + } + +- info->oem_i2c_present = false; ++ /* These fields are marked as reserved in v3_1, but they appear to be populated ++ * properly. ++ */ ++ if (firmware_info32->board_i2c_feature_id == 0x2) { ++ info->oem_i2c_present = true; ++ info->oem_i2c_obj_id = firmware_info32->board_i2c_feature_gpio_id; ++ } else { ++ info->oem_i2c_present = false; ++ } + + return BP_RESULT_OK; + } +diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c +index 457d60eeb486..13636eb4ec3f 100644 +--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c ++++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c +@@ -142,6 +142,12 @@ bool dc_link_update_dsc_config(struct pipe_ctx *pipe_ctx) + return link->dc->link_srv->update_dsc_config(pipe_ctx); + } + ++struct ddc_service * ++dc_get_oem_i2c_device(struct dc *dc) ++{ ++ return dc->res_pool->oem_device; ++} ++ + bool dc_is_oem_i2c_device_present( + struct dc *dc, + size_t slave_address) +diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h +index 08c5a315b3a6..70d6005ecd64 100644 +--- a/drivers/gpu/drm/amd/display/dc/dc.h ++++ b/drivers/gpu/drm/amd/display/dc/dc.h +@@ -1939,6 +1939,9 @@ int dc_link_aux_transfer_raw(struct ddc_service *ddc, + struct aux_payload *payload, + enum aux_return_code_type *operation_result); + ++struct ddc_service * ++dc_get_oem_i2c_device(struct dc *dc); ++ + bool dc_is_oem_i2c_device_present( + struct dc *dc, + size_t slave_address +diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c +index c63c59623433..eb1e158d3436 100644 +--- a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c ++++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c +@@ -67,6 +67,7 @@ + #include "reg_helper.h" + + #include "dce100/dce100_resource.h" ++#include "link.h" + + #ifndef mmDP0_DP_DPHY_INTERNAL_CTRL + #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x210f +@@ -659,6 +660,12 @@ static void dce120_resource_destruct(struct dce110_resource_pool *pool) + + if (pool->base.dmcu != NULL) + dce_dmcu_destroy(&pool->base.dmcu); ++ ++ if (pool->base.oem_device != NULL) { ++ struct dc *dc = pool->base.oem_device->ctx->dc; ++ ++ dc->link_srv->destroy_ddc_service(&pool->base.oem_device); ++ } + } + + static void read_dce_straps( +@@ -1054,6 +1061,7 @@ static bool dce120_resource_construct( + struct dc *dc, + struct dce110_resource_pool *pool) + { ++ struct ddc_service_init_data ddc_init_data = {0}; + unsigned int i; + int j; + struct dc_context *ctx = dc->ctx; +@@ -1257,6 +1265,15 @@ static bool dce120_resource_construct( + + bw_calcs_data_update_from_pplib(dc); + ++ if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { ++ ddc_init_data.ctx = dc->ctx; ++ ddc_init_data.link = NULL; ++ ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id; ++ ddc_init_data.id.enum_id = 0; ++ ddc_init_data.id.type = OBJECT_TYPE_GENERIC; ++ pool->base.oem_device = dc->link_srv->create_ddc_service(&ddc_init_data); ++ } ++ + return true; + + irqs_create_fail: diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index e8ae7681bf0a..8a0d873983f3 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -12608,7 +13294,7 @@ index 2ddb827e3bea..464049c4af3f 100644 return state; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 3e9ca38512de..463fe1dc6de8 100644 +index 26958431deb7..8c0f17a96d4f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -73,10 +73,19 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; @@ -12888,7 +13574,7 @@ index a2b16b08cbbf..48d611e58ad3 100644 static int __read_mostly sysctl_compact_memory; diff --git a/mm/huge_memory.c b/mm/huge_memory.c -index e53d83b3e5cf..b4c205f2042a 100644 +index db64116a4f84..3e0266c973e1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -64,7 +64,11 @@ unsigned long transparent_hugepage_flags __read_mostly = @@ -12916,7 +13602,7 @@ index 24b68b425afb..081ddb92db87 100644 /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c -index d213ead95675..0430a97b30fd 100644 +index d9861e42b2bd..13ab2294f0bb 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -71,7 +71,11 @@ static long ratelimit_pages = 32; @@ -12944,7 +13630,7 @@ index d213ead95675..0430a97b30fd 100644 EXPORT_SYMBOL_GPL(dirty_writeback_interval); diff --git a/mm/page_alloc.c b/mm/page_alloc.c -index cae7b93864c2..57038052c153 100644 +index 01eab25edf89..3ea393f1311a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -271,7 +271,11 @@ const char * const migratetype_names[MIGRATE_TYPES] = { @@ -12997,7 +13683,7 @@ index bd5183dfd879..3a410f53a07c 100644 /* diff --git a/mm/vmscan.c b/mm/vmscan.c -index 9a859b7d18d7..ec7f96bb0e9f 100644 +index b1ec5ece067e..e258174d240a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -148,6 +148,15 @@ struct scan_control { @@ -13192,7 +13878,7 @@ index 9a859b7d18d7..ec7f96bb0e9f 100644 else if (min_seq[LRU_GEN_ANON] < min_seq[LRU_GEN_FILE]) type = LRU_GEN_ANON; else if (swappiness == 1) -@@ -4826,6 +4965,8 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) +@@ -4829,6 +4968,8 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); @@ -13201,7 +13887,7 @@ index 9a859b7d18d7..ec7f96bb0e9f 100644 /* lru_gen_age_node() called mem_cgroup_calculate_protection() */ if (mem_cgroup_below_min(NULL, memcg)) return MEMCG_LRU_YOUNG; -@@ -5974,6 +6115,8 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) +@@ -5977,6 +6118,8 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) prepare_scan_control(pgdat, sc); @@ -13226,9 +13912,9 @@ index 6872b5aff73e..1910fe1b2471 100644 -- 2.48.0.rc1 -From 8d1fa2a8636c551dd33500837e87e2c3f889d95c Mon Sep 17 00:00:00 2001 +From 7bc012030531a472b823293e167a86cd58da545c Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:38:01 +0100 +Date: Mon, 20 Jan 2025 13:22:05 +0100 Subject: [PATCH 05/12] crypto Signed-off-by: Peter Jung @@ -14000,25 +14686,25 @@ index fbf43482e1f5..11e95fc62636 100644 -- 2.48.0.rc1 -From e094aa9f2a3d8ac13a8bca382f0f5585f80926ee Mon Sep 17 00:00:00 2001 +From 2f514dfe8b006e7fa976b6265bef4b8efb81ec11 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:38:11 +0100 +Date: Mon, 20 Jan 2025 13:22:15 +0100 Subject: [PATCH 06/12] fixes Signed-off-by: Peter Jung --- arch/Kconfig | 4 +- - drivers/acpi/acpi_video.c | 50 +++++++++++-------- .../link/protocols/link_edp_panel_control.c | 3 +- - drivers/gpu/drm/drm_edid.c | 47 +++++++++++++++-- - drivers/gpu/drm/nouveau/nouveau_acpi.c | 2 +- + drivers/gpu/drm/drm_edid.c | 47 +++++++++++++++++-- + drivers/hid/hid-asus.c | 26 ++++++++++ drivers/hid/hid-ids.h | 1 + + include/linux/platform_data/x86/asus-wmi.h | 5 ++ kernel/fork.c | 9 ++-- - kernel/kprobes.c | 23 ++++----- + kernel/kprobes.c | 23 +++++---- kernel/sched/ext.c | 4 +- scripts/package/PKGBUILD | 5 ++ - sound/pci/hda/patch_realtek.c | 2 + - 11 files changed, 103 insertions(+), 47 deletions(-) + sound/pci/hda/patch_realtek.c | 4 +- + 11 files changed, 105 insertions(+), 26 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 6682b2a53e34..fe54298ae05c 100644 @@ -14042,99 +14728,6 @@ index 6682b2a53e34..fe54298ae05c 100644 depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS help This value can be used to select the number of bits to use to -diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c -index 8274a17872ed..3c627bdf2d1b 100644 ---- a/drivers/acpi/acpi_video.c -+++ b/drivers/acpi/acpi_video.c -@@ -610,16 +610,29 @@ acpi_video_device_lcd_get_level_current(struct acpi_video_device *device, - return 0; - } - -+/* -+ * Arg: -+ * device : video output device (LCD, CRT, ..) -+ * edid : address for returned EDID pointer -+ * length : _DDC length to request (must be a multiple of 128) -+ * -+ * Return Value: -+ * Length of EDID (positive value) or error (negative value) -+ * -+ * Get EDID from ACPI _DDC. On success, a pointer to the EDID data is written -+ * to the edid address, and the length of the EDID is returned. The caller is -+ * responsible for freeing the edid pointer. -+ */ -+ - static int --acpi_video_device_EDID(struct acpi_video_device *device, -- union acpi_object **edid, int length) -+acpi_video_device_EDID(struct acpi_video_device *device, void **edid, int length) - { -- int status; -+ acpi_status status; - struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; - union acpi_object *obj; - union acpi_object arg0 = { ACPI_TYPE_INTEGER }; - struct acpi_object_list args = { 1, &arg0 }; -- -+ int ret; - - *edid = NULL; - -@@ -636,16 +649,17 @@ acpi_video_device_EDID(struct acpi_video_device *device, - - obj = buffer.pointer; - -- if (obj && obj->type == ACPI_TYPE_BUFFER) -- *edid = obj; -- else { -+ if (obj && obj->type == ACPI_TYPE_BUFFER) { -+ *edid = kmemdup(obj->buffer.pointer, obj->buffer.length, GFP_KERNEL); -+ ret = *edid ? obj->buffer.length : -ENOMEM; -+ } else { - acpi_handle_debug(device->dev->handle, - "Invalid _DDC data for length %d\n", length); -- status = -EFAULT; -- kfree(obj); -+ ret = -EFAULT; - } - -- return status; -+ kfree(obj); -+ return ret; - } - - /* bus */ -@@ -1435,9 +1449,7 @@ int acpi_video_get_edid(struct acpi_device *device, int type, int device_id, - { - struct acpi_video_bus *video; - struct acpi_video_device *video_device; -- union acpi_object *buffer = NULL; -- acpi_status status; -- int i, length; -+ int i, length, ret; - - if (!device || !acpi_driver_data(device)) - return -EINVAL; -@@ -1477,16 +1489,10 @@ int acpi_video_get_edid(struct acpi_device *device, int type, int device_id, - } - - for (length = 512; length > 0; length -= 128) { -- status = acpi_video_device_EDID(video_device, &buffer, -- length); -- if (ACPI_SUCCESS(status)) -- break; -+ ret = acpi_video_device_EDID(video_device, edid, length); -+ if (ret > 0) -+ return ret; - } -- if (!length) -- continue; -- -- *edid = buffer->buffer.pointer; -- return length; - } - - return -ENODEV; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index e0e3bb865359..ba98d56a0fe4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -14242,19 +14835,50 @@ index 855beafb76ff..ad78059ee954 100644 if (!newmode) continue; -diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c -index 8f0c69aad248..21b56cc7605c 100644 ---- a/drivers/gpu/drm/nouveau/nouveau_acpi.c -+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c -@@ -384,7 +384,7 @@ nouveau_acpi_edid(struct drm_device *dev, struct drm_connector *connector) - if (ret < 0) - return NULL; - -- return kmemdup(edid, EDID_LENGTH, GFP_KERNEL); -+ return edid; +diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c +index 506c6f377e7d..46e3e42f9eb5 100644 +--- a/drivers/hid/hid-asus.c ++++ b/drivers/hid/hid-asus.c +@@ -432,6 +432,26 @@ static int asus_kbd_get_functions(struct hid_device *hdev, + return ret; } - bool nouveau_acpi_video_backlight_use_native(void) ++static int asus_kbd_disable_oobe(struct hid_device *hdev) ++{ ++ const u8 init[][6] = { ++ { FEATURE_KBD_REPORT_ID, 0x05, 0x20, 0x31, 0x00, 0x08 }, ++ { FEATURE_KBD_REPORT_ID, 0xBA, 0xC5, 0xC4 }, ++ { FEATURE_KBD_REPORT_ID, 0xD0, 0x8F, 0x01 }, ++ { FEATURE_KBD_REPORT_ID, 0xD0, 0x85, 0xFF } ++ }; ++ int ret; ++ ++ for (size_t i = 0; i < ARRAY_SIZE(init); i++) { ++ ret = asus_kbd_set_report(hdev, init[i], sizeof(init[i])); ++ if (ret < 0) ++ return ret; ++ } ++ ++ hid_info(hdev, "Disabled OOBE for keyboard\n"); ++ return 0; ++} ++ + static void asus_schedule_work(struct asus_kbd_leds *led) + { + unsigned long flags; +@@ -534,6 +554,12 @@ static int asus_kbd_register_leds(struct hid_device *hdev) + ret = asus_kbd_init(hdev, FEATURE_KBD_LED_REPORT_ID2); + if (ret < 0) + return ret; ++ ++ if (dmi_match(DMI_PRODUCT_FAMILY, "ProArt P16")) { ++ ret = asus_kbd_disable_oobe(hdev); ++ if (ret < 0) ++ return ret; ++ } + } else { + /* Initialize keyboard */ + ret = asus_kbd_init(hdev, FEATURE_KBD_REPORT_ID); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 1f47fda809b9..6c2df0d37b3b 100644 --- a/drivers/hid/hid-ids.h @@ -14267,6 +14891,22 @@ index 1f47fda809b9..6c2df0d37b3b 100644 #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY 0x1abe #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X 0x1b4c #define USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD 0x196b +diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h +index 365e119bebaa..783e2a336861 100644 +--- a/include/linux/platform_data/x86/asus-wmi.h ++++ b/include/linux/platform_data/x86/asus-wmi.h +@@ -184,6 +184,11 @@ static const struct dmi_system_id asus_use_hid_led_dmi_ids[] = { + DMI_MATCH(DMI_PRODUCT_FAMILY, "ROG Flow"), + }, + }, ++ { ++ .matches = { ++ DMI_MATCH(DMI_PRODUCT_FAMILY, "ProArt P16"), ++ }, ++ }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GA403U"), diff --git a/kernel/fork.c b/kernel/fork.c index 0cb5431b4d7e..e919c8c3a121 100644 --- a/kernel/fork.c @@ -14390,13 +15030,15 @@ index dca706617adc..89d3aef160b7 100644 mkdir -p "${builddir}" cp System.map "${builddir}/System.map" diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c -index b74b566f675e..070dd1ab89c6 100644 +index ad66378d7321..4210bc8f12e1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c -@@ -10641,6 +10641,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { +@@ -10641,8 +10641,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1e1f, "ASUS Vivobook 15 X1504VAP", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS), +- SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), +- SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), + SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC), + SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), @@ -14405,9 +15047,9 @@ index b74b566f675e..070dd1ab89c6 100644 -- 2.48.0.rc1 -From 25702dae4d4390c6e804bfe18eef1341a854b9f2 Mon Sep 17 00:00:00 2001 +From edca92ed206343ae09ee1af6ae0dfc26a68085b1 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:38:22 +0100 +Date: Mon, 20 Jan 2025 13:22:28 +0100 Subject: [PATCH 07/12] itmt-core-ranking Signed-off-by: Peter Jung @@ -14642,10 +15284,10 @@ index b5a8f0891135..ef63b1c0b491 100644 } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 463fe1dc6de8..f849298a4cc1 100644 +index 8c0f17a96d4f..c532ffb153b4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c -@@ -9941,6 +9941,8 @@ struct sg_lb_stats { +@@ -9836,6 +9836,8 @@ struct sg_lb_stats { unsigned int group_weight; enum group_type group_type; unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */ @@ -14654,7 +15296,7 @@ index 463fe1dc6de8..f849298a4cc1 100644 unsigned int group_smt_balance; /* Task on busy SMT be moved */ unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */ #ifdef CONFIG_NUMA_BALANCING -@@ -10270,7 +10272,7 @@ sched_group_asym(struct lb_env *env, struct sg_lb_stats *sgs, struct sched_group +@@ -10165,7 +10167,7 @@ sched_group_asym(struct lb_env *env, struct sg_lb_stats *sgs, struct sched_group (sgs->group_weight - sgs->idle_cpus != 1)) return false; @@ -14663,7 +15305,7 @@ index 463fe1dc6de8..f849298a4cc1 100644 } /* One group has more than one SMT CPU while the other group does not */ -@@ -10351,6 +10353,17 @@ sched_reduced_capacity(struct rq *rq, struct sched_domain *sd) +@@ -10246,6 +10248,17 @@ sched_reduced_capacity(struct rq *rq, struct sched_domain *sd) return check_cpu_capacity(rq, sd); } @@ -14681,7 +15323,7 @@ index 463fe1dc6de8..f849298a4cc1 100644 /** * update_sg_lb_stats - Update sched_group's statistics for load balancing. * @env: The load balancing environment. -@@ -10367,11 +10380,13 @@ static inline void update_sg_lb_stats(struct lb_env *env, +@@ -10262,11 +10275,13 @@ static inline void update_sg_lb_stats(struct lb_env *env, bool *sg_overloaded, bool *sg_overutilized) { @@ -14696,7 +15338,7 @@ index 463fe1dc6de8..f849298a4cc1 100644 for_each_cpu_and(i, sched_group_span(group), env->cpus) { struct rq *rq = cpu_rq(i); -@@ -10385,16 +10400,12 @@ static inline void update_sg_lb_stats(struct lb_env *env, +@@ -10280,16 +10295,12 @@ static inline void update_sg_lb_stats(struct lb_env *env, nr_running = rq->nr_running; sgs->sum_nr_running += nr_running; @@ -14715,7 +15357,7 @@ index 463fe1dc6de8..f849298a4cc1 100644 /* * No need to call idle_cpu() if nr_running is not 0 */ -@@ -10404,10 +10415,21 @@ static inline void update_sg_lb_stats(struct lb_env *env, +@@ -10299,10 +10310,21 @@ static inline void update_sg_lb_stats(struct lb_env *env, continue; } @@ -14738,7 +15380,7 @@ index 463fe1dc6de8..f849298a4cc1 100644 /* Check for a misfit task on the cpu */ if (sgs->group_misfit_task_load < rq->misfit_task_load) { sgs->group_misfit_task_load = rq->misfit_task_load; -@@ -10502,7 +10524,7 @@ static bool update_sd_pick_busiest(struct lb_env *env, +@@ -10397,7 +10419,7 @@ static bool update_sd_pick_busiest(struct lb_env *env, case group_asym_packing: /* Prefer to move from lowest priority CPU's work */ @@ -14795,9 +15437,9 @@ index 9748a4c8d668..59b8157cb114 100644 -- 2.48.0.rc1 -From 1871388db87b6e7114a28eec15fc03e4c0497e52 Mon Sep 17 00:00:00 2001 +From dad63380fd4bccaf1df47a5d2a14b3622a828bbf Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:38:39 +0100 +Date: Mon, 20 Jan 2025 13:22:39 +0100 Subject: [PATCH 08/12] ntsync Signed-off-by: Peter Jung @@ -15224,10 +15866,10 @@ index 000000000000..25e7c4aef968 + ``objs`` and in ``alert``. If this is attempted, the function fails + with ``EINVAL``. diff --git a/MAINTAINERS b/MAINTAINERS -index a87ddad78e26..69c7e0c9cbfd 100644 +index 0fa7c5728f1e..efecb59adfe6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS -@@ -16708,6 +16708,15 @@ T: git https://github.com/Paragon-Software-Group/linux-ntfs3.git +@@ -16709,6 +16709,15 @@ T: git https://github.com/Paragon-Software-Group/linux-ntfs3.git F: Documentation/filesystems/ntfs3.rst F: fs/ntfs3/ @@ -17825,9 +18467,9 @@ index 000000000000..3aad311574c4 -- 2.48.0.rc1 -From ecafa3b39e7691288beb920eb362064d548d45e7 Mon Sep 17 00:00:00 2001 +From d0d15e3d79a2d5bb2c94b8ff3d2ab51f0b0100fe Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:38:51 +0100 +Date: Mon, 20 Jan 2025 13:22:50 +0100 Subject: [PATCH 09/12] perf-per-core Signed-off-by: Peter Jung @@ -18723,9 +19365,9 @@ index 8277c64f88db..b5a5e1411469 100644 -- 2.48.0.rc1 -From b0522d38174d109d02042dc5591c1ab52de16a94 Mon Sep 17 00:00:00 2001 +From 6a7ea67c66634276802b4b9b0964a0b00db97d9c Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:39:03 +0100 +Date: Mon, 20 Jan 2025 13:23:02 +0100 Subject: [PATCH 10/12] pksm Signed-off-by: Peter Jung @@ -19156,9 +19798,9 @@ index e9115b4d8b63..2afc778f2d17 100644 -- 2.48.0.rc1 -From 5662d52675419bbe7b47731ad55c01ecf94b8426 Mon Sep 17 00:00:00 2001 +From 5e459e48f274c34d701726a61a96140381b1de2b Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:39:12 +0100 +Date: Mon, 20 Jan 2025 13:23:11 +0100 Subject: [PATCH 11/12] t2 Signed-off-by: Peter Jung @@ -19314,10 +19956,10 @@ index ecccc0473da9..6de6b0e6abf3 100644 ---- diff --git a/MAINTAINERS b/MAINTAINERS -index 69c7e0c9cbfd..01be85b7d886 100644 +index efecb59adfe6..16af42c68cca 100644 --- a/MAINTAINERS +++ b/MAINTAINERS -@@ -7065,6 +7065,12 @@ S: Supported +@@ -7066,6 +7066,12 @@ S: Supported T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: drivers/gpu/drm/sun4i/sun8i* @@ -29483,9 +30125,9 @@ index 9eed3683ad76..7ddbf75f4c26 100755 -- 2.48.0.rc1 -From 91beebc1e962374c32c95b975d59ff5aa90b66c1 Mon Sep 17 00:00:00 2001 +From 6f96c228cd968c7f47eb90d9e7ad6d679bf5a7f0 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 13 Jan 2025 15:39:21 +0100 +Date: Mon, 20 Jan 2025 13:23:20 +0100 Subject: [PATCH 12/12] zstd Signed-off-by: Peter Jung diff --git a/6.13/sched-dev/0001-bore-cachy.patch b/6.13/sched-dev/0001-bore-cachy.patch deleted file mode 100644 index 1eb64cb1..00000000 --- a/6.13/sched-dev/0001-bore-cachy.patch +++ /dev/null @@ -1,1030 +0,0 @@ -From 2aaaad0215c8d15c5133eb2bc1c77c021edff609 Mon Sep 17 00:00:00 2001 -From: Eric Naim -Date: Mon, 20 Jan 2025 09:19:36 +0700 -Subject: [PATCH] bore-cachy - -Signed-off-by: Eric Naim ---- - include/linux/sched.h | 18 ++ - include/linux/sched/bore.h | 40 ++++ - init/Kconfig | 17 ++ - kernel/Kconfig.hz | 17 ++ - kernel/fork.c | 6 + - kernel/sched/Makefile | 1 + - kernel/sched/bore.c | 443 +++++++++++++++++++++++++++++++++++++ - kernel/sched/core.c | 6 + - kernel/sched/debug.c | 61 ++++- - kernel/sched/fair.c | 86 +++++-- - kernel/sched/sched.h | 9 + - 11 files changed, 686 insertions(+), 18 deletions(-) - create mode 100644 include/linux/sched/bore.h - create mode 100644 kernel/sched/bore.c - -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 64934e0830af..7ec02a323014 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -538,6 +538,15 @@ struct sched_statistics { - #endif /* CONFIG_SCHEDSTATS */ - } ____cacheline_aligned; - -+#ifdef CONFIG_SCHED_BORE -+struct sched_burst_cache { -+ u8 score; -+ u32 count; -+ u64 timestamp; -+ spinlock_t lock; -+}; -+#endif // CONFIG_SCHED_BORE -+ - struct sched_entity { - /* For load-balancing: */ - struct load_weight load; -@@ -557,6 +566,15 @@ struct sched_entity { - u64 sum_exec_runtime; - u64 prev_sum_exec_runtime; - u64 vruntime; -+#ifdef CONFIG_SCHED_BORE -+ u64 burst_time; -+ u8 prev_burst_penalty; -+ u8 curr_burst_penalty; -+ u8 burst_penalty; -+ u8 burst_score; -+ struct sched_burst_cache child_burst; -+ struct sched_burst_cache group_burst; -+#endif // CONFIG_SCHED_BORE - s64 vlag; - u64 slice; - -diff --git a/include/linux/sched/bore.h b/include/linux/sched/bore.h -new file mode 100644 -index 000000000000..a8faabc2885e ---- /dev/null -+++ b/include/linux/sched/bore.h -@@ -0,0 +1,40 @@ -+ -+#include -+#include -+ -+#ifndef _LINUX_SCHED_BORE_H -+#define _LINUX_SCHED_BORE_H -+#define SCHED_BORE_VERSION "5.9.6" -+ -+#ifdef CONFIG_SCHED_BORE -+extern u8 __read_mostly sched_bore; -+extern u8 __read_mostly sched_burst_exclude_kthreads; -+extern u8 __read_mostly sched_burst_smoothness_long; -+extern u8 __read_mostly sched_burst_smoothness_short; -+extern u8 __read_mostly sched_burst_fork_atavistic; -+extern u8 __read_mostly sched_burst_parity_threshold; -+extern u8 __read_mostly sched_burst_penalty_offset; -+extern uint __read_mostly sched_burst_penalty_scale; -+extern uint __read_mostly sched_burst_cache_stop_count; -+extern uint __read_mostly sched_burst_cache_lifetime; -+extern uint __read_mostly sched_deadline_boost_mask; -+ -+extern void update_burst_score(struct sched_entity *se); -+extern void update_burst_penalty(struct sched_entity *se); -+ -+extern void restart_burst(struct sched_entity *se); -+extern void restart_burst_rescale_deadline(struct sched_entity *se); -+ -+extern int sched_bore_update_handler(const struct ctl_table *table, int write, -+ void __user *buffer, size_t *lenp, loff_t *ppos); -+ -+extern void sched_clone_bore( -+ struct task_struct *p, struct task_struct *parent, u64 clone_flags, u64 now); -+ -+extern void reset_task_bore(struct task_struct *p); -+extern void sched_bore_init(void); -+ -+extern void reweight_entity( -+ struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight); -+#endif // CONFIG_SCHED_BORE -+#endif // _LINUX_SCHED_BORE_H -diff --git a/init/Kconfig b/init/Kconfig -index 9437171030e2..c6f811d72dfd 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -1362,6 +1362,23 @@ config CHECKPOINT_RESTORE - - If unsure, say N here. - -+config SCHED_BORE -+ bool "Burst-Oriented Response Enhancer" -+ default y -+ help -+ In Desktop and Mobile computing, one might prefer interactive -+ tasks to keep responsive no matter what they run in the background. -+ -+ Enabling this kernel feature modifies the scheduler to discriminate -+ tasks by their burst time (runtime since it last went sleeping or -+ yielding state) and prioritize those that run less bursty. -+ Such tasks usually include window compositor, widgets backend, -+ terminal emulator, video playback, games and so on. -+ With a little impact to scheduling fairness, it may improve -+ responsiveness especially under heavy background workload. -+ -+ If unsure, say Y here. -+ - config SCHED_AUTOGROUP - bool "Automatic process group scheduling" - select CGROUPS -diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz -index 0f78364efd4f..83a6b919ab29 100644 ---- a/kernel/Kconfig.hz -+++ b/kernel/Kconfig.hz -@@ -79,5 +79,22 @@ config HZ - default 750 if HZ_750 - default 1000 if HZ_1000 - -+config MIN_BASE_SLICE_NS -+ int "Default value for min_base_slice_ns" -+ default 2000000 -+ help -+ The BORE Scheduler automatically calculates the optimal base -+ slice for the configured HZ using the following equation: -+ -+ base_slice_ns = -+ 1000000000/HZ * DIV_ROUNDUP(min_base_slice_ns, 1000000000/HZ) -+ -+ This option sets the default lower bound limit of the base slice -+ to prevent the loss of task throughput due to overscheduling. -+ -+ Setting this value too high can cause the system to boot with -+ an unnecessarily large base slice, resulting in high scheduling -+ latency and poor system responsiveness. -+ - config SCHED_HRTICK - def_bool HIGH_RES_TIMERS -diff --git a/kernel/fork.c b/kernel/fork.c -index e919c8c3a121..726d3daa0498 100644 ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -116,6 +116,8 @@ - #include - #include - -+#include -+ - #include - - #define CREATE_TRACE_POINTS -@@ -2524,6 +2526,10 @@ __latent_entropy struct task_struct *copy_process( - p->start_time = ktime_get_ns(); - p->start_boottime = ktime_get_boottime_ns(); - -+#ifdef CONFIG_SCHED_BORE -+ if (likely(p->pid)) -+ sched_clone_bore(p, current, clone_flags, p->start_time); -+#endif // CONFIG_SCHED_BORE - /* - * Make it visible to the rest of the system, but dont wake it up yet. - * Need tasklist lock for parent etc handling! -diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile -index 976092b7bd45..293aad675444 100644 ---- a/kernel/sched/Makefile -+++ b/kernel/sched/Makefile -@@ -32,3 +32,4 @@ obj-y += core.o - obj-y += fair.o - obj-y += build_policy.o - obj-y += build_utility.o -+obj-y += bore.o -diff --git a/kernel/sched/bore.c b/kernel/sched/bore.c -new file mode 100644 -index 000000000000..23aeb5649479 ---- /dev/null -+++ b/kernel/sched/bore.c -@@ -0,0 +1,443 @@ -+/* -+ * Burst-Oriented Response Enhancer (BORE) CPU Scheduler -+ * Copyright (C) 2021-2024 Masahito Suzuki -+ */ -+#include -+#include -+#include -+#include "sched.h" -+ -+#ifdef CONFIG_SCHED_BORE -+u8 __read_mostly sched_bore = 1; -+u8 __read_mostly sched_burst_exclude_kthreads = 1; -+u8 __read_mostly sched_burst_smoothness_long = 1; -+u8 __read_mostly sched_burst_smoothness_short = 0; -+u8 __read_mostly sched_burst_fork_atavistic = 2; -+u8 __read_mostly sched_burst_parity_threshold = 2; -+u8 __read_mostly sched_burst_penalty_offset = 24; -+uint __read_mostly sched_burst_penalty_scale = 1280; -+uint __read_mostly sched_burst_cache_stop_count = 64; -+uint __read_mostly sched_burst_cache_lifetime = 75000000; -+uint __read_mostly sched_deadline_boost_mask = ENQUEUE_INITIAL -+ | ENQUEUE_WAKEUP; -+static int __maybe_unused sixty_four = 64; -+static int __maybe_unused maxval_u8 = 255; -+static int __maybe_unused maxval_12_bits = 4095; -+ -+#define MAX_BURST_PENALTY (39U <<2) -+ -+static inline u32 log2plus1_u64_u32f8(u64 v) { -+ u32 integral = fls64(v); -+ u8 fractional = v << (64 - integral) >> 55; -+ return integral << 8 | fractional; -+} -+ -+static inline u32 calc_burst_penalty(u64 burst_time) { -+ u32 greed, tolerance, penalty, scaled_penalty; -+ -+ greed = log2plus1_u64_u32f8(burst_time); -+ tolerance = sched_burst_penalty_offset << 8; -+ penalty = max(0, (s32)(greed - tolerance)); -+ scaled_penalty = penalty * sched_burst_penalty_scale >> 16; -+ -+ return min(MAX_BURST_PENALTY, scaled_penalty); -+} -+ -+static inline u64 __scale_slice(u64 delta, u8 score) -+{return mul_u64_u32_shr(delta, sched_prio_to_wmult[score], 22);} -+ -+static inline u64 __unscale_slice(u64 delta, u8 score) -+{return mul_u64_u32_shr(delta, sched_prio_to_weight[score], 10);} -+ -+static void reweight_task_by_prio(struct task_struct *p, int prio) { -+ struct sched_entity *se = &p->se; -+ unsigned long weight = scale_load(sched_prio_to_weight[prio]); -+ -+ reweight_entity(cfs_rq_of(se), se, weight); -+ se->load.inv_weight = sched_prio_to_wmult[prio]; -+} -+ -+static inline u8 effective_prio(struct task_struct *p) { -+ u8 prio = p->static_prio - MAX_RT_PRIO; -+ if (likely(sched_bore)) -+ prio += p->se.burst_score; -+ return min(39, prio); -+} -+ -+void update_burst_score(struct sched_entity *se) { -+ if (!entity_is_task(se)) return; -+ struct task_struct *p = task_of(se); -+ u8 prev_prio = effective_prio(p); -+ -+ u8 burst_score = 0; -+ if (!((p->flags & PF_KTHREAD) && likely(sched_burst_exclude_kthreads))) -+ burst_score = se->burst_penalty >> 2; -+ se->burst_score = burst_score; -+ -+ u8 new_prio = effective_prio(p); -+ if (new_prio != prev_prio) -+ reweight_task_by_prio(p, new_prio); -+} -+ -+void update_burst_penalty(struct sched_entity *se) { -+ se->curr_burst_penalty = calc_burst_penalty(se->burst_time); -+ se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty); -+ update_burst_score(se); -+} -+ -+static inline u32 binary_smooth(u32 new, u32 old) { -+ int increment = new - old; -+ return (0 <= increment)? -+ old + ( increment >> (int)sched_burst_smoothness_long): -+ old - (-increment >> (int)sched_burst_smoothness_short); -+} -+ -+static void revolve_burst_penalty(struct sched_entity *se) { -+ se->prev_burst_penalty = -+ binary_smooth(se->curr_burst_penalty, se->prev_burst_penalty); -+ se->burst_time = 0; -+ se->curr_burst_penalty = 0; -+} -+ -+inline void restart_burst(struct sched_entity *se) { -+ revolve_burst_penalty(se); -+ se->burst_penalty = se->prev_burst_penalty; -+ update_burst_score(se); -+} -+ -+void restart_burst_rescale_deadline(struct sched_entity *se) { -+ s64 vscaled, wremain, vremain = se->deadline - se->vruntime; -+ struct task_struct *p = task_of(se); -+ u8 prev_prio = effective_prio(p); -+ restart_burst(se); -+ u8 new_prio = effective_prio(p); -+ if (prev_prio > new_prio) { -+ wremain = __unscale_slice(abs(vremain), prev_prio); -+ vscaled = __scale_slice(wremain, new_prio); -+ if (unlikely(vremain < 0)) -+ vscaled = -vscaled; -+ se->deadline = se->vruntime + vscaled; -+ } -+} -+ -+static inline bool task_is_bore_eligible(struct task_struct *p) -+{return p && p->sched_class == &fair_sched_class && !p->exit_state;} -+ -+static void reset_task_weights_bore(void) { -+ struct task_struct *task; -+ struct rq *rq; -+ struct rq_flags rf; -+ -+ write_lock_irq(&tasklist_lock); -+ for_each_process(task) { -+ if (!task_is_bore_eligible(task)) continue; -+ rq = task_rq(task); -+ rq_pin_lock(rq, &rf); -+ update_rq_clock(rq); -+ reweight_task_by_prio(task, effective_prio(task)); -+ rq_unpin_lock(rq, &rf); -+ } -+ write_unlock_irq(&tasklist_lock); -+} -+ -+int sched_bore_update_handler(const struct ctl_table *table, int write, -+ void __user *buffer, size_t *lenp, loff_t *ppos) { -+ int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); -+ if (ret || !write) -+ return ret; -+ -+ reset_task_weights_bore(); -+ -+ return 0; -+} -+ -+#define for_each_child(p, t) \ -+ list_for_each_entry(t, &(p)->children, sibling) -+ -+static u32 count_entries_upto2(struct list_head *head) { -+ struct list_head *next = head->next; -+ return (next != head) + (next->next != head); -+} -+ -+static inline void init_task_burst_cache_lock(struct task_struct *p) { -+ spin_lock_init(&p->se.child_burst.lock); -+ spin_lock_init(&p->se.group_burst.lock); -+} -+ -+static inline bool burst_cache_expired(struct sched_burst_cache *bc, u64 now) -+{return (s64)(bc->timestamp + sched_burst_cache_lifetime - now) < 0;} -+ -+static void update_burst_cache(struct sched_burst_cache *bc, -+ struct task_struct *p, u32 cnt, u32 sum, u64 now) { -+ u8 avg = cnt ? sum / cnt : 0; -+ bc->score = max(avg, p->se.burst_penalty); -+ bc->count = cnt; -+ bc->timestamp = now; -+} -+ -+static inline void update_child_burst_direct(struct task_struct *p, u64 now) { -+ u32 cnt = 0, sum = 0; -+ struct task_struct *child; -+ -+ for_each_child(p, child) { -+ if (!task_is_bore_eligible(child)) continue; -+ cnt++; -+ sum += child->se.burst_penalty; -+ } -+ -+ update_burst_cache(&p->se.child_burst, p, cnt, sum, now); -+} -+ -+static inline u8 inherit_burst_direct( -+ struct task_struct *p, u64 now, u64 clone_flags) { -+ struct task_struct *parent = p; -+ struct sched_burst_cache *bc; -+ -+ if (clone_flags & CLONE_PARENT) -+ parent = parent->real_parent; -+ -+ bc = &parent->se.child_burst; -+ guard(spinlock)(&bc->lock); -+ if (burst_cache_expired(bc, now)) -+ update_child_burst_direct(parent, now); -+ -+ return bc->score; -+} -+ -+static void update_child_burst_topological( -+ struct task_struct *p, u64 now, u32 depth, u32 *acnt, u32 *asum) { -+ u32 cnt = 0, dcnt = 0, sum = 0; -+ struct task_struct *child, *dec; -+ struct sched_burst_cache *bc __maybe_unused; -+ -+ for_each_child(p, child) { -+ dec = child; -+ while ((dcnt = count_entries_upto2(&dec->children)) == 1) -+ dec = list_first_entry(&dec->children, struct task_struct, sibling); -+ -+ if (!dcnt || !depth) { -+ if (!task_is_bore_eligible(dec)) continue; -+ cnt++; -+ sum += dec->se.burst_penalty; -+ continue; -+ } -+ bc = &dec->se.child_burst; -+ spin_lock(&bc->lock); -+ if (!burst_cache_expired(bc, now)) { -+ cnt += bc->count; -+ sum += (u32)bc->score * bc->count; -+ if (sched_burst_cache_stop_count <= cnt) { -+ spin_unlock(&bc->lock); -+ break; -+ } -+ spin_unlock(&bc->lock); -+ continue; -+ } -+ update_child_burst_topological(dec, now, depth - 1, &cnt, &sum); -+ spin_unlock(&bc->lock); -+ } -+ -+ update_burst_cache(&p->se.child_burst, p, cnt, sum, now); -+ *acnt += cnt; -+ *asum += sum; -+} -+ -+static inline u8 inherit_burst_topological( -+ struct task_struct *p, u64 now, u64 clone_flags) { -+ struct task_struct *anc = p; -+ struct sched_burst_cache *bc; -+ u32 cnt = 0, sum = 0; -+ u32 base_child_cnt = 0; -+ -+ if (clone_flags & CLONE_PARENT) { -+ anc = anc->real_parent; -+ base_child_cnt = 1; -+ } -+ -+ for (struct task_struct *next; -+ anc != (next = anc->real_parent) && -+ count_entries_upto2(&anc->children) <= base_child_cnt;) { -+ anc = next; -+ base_child_cnt = 1; -+ } -+ -+ bc = &anc->se.child_burst; -+ guard(spinlock)(&bc->lock); -+ if (burst_cache_expired(bc, now)) -+ update_child_burst_topological( -+ anc, now, sched_burst_fork_atavistic - 1, &cnt, &sum); -+ -+ return bc->score; -+} -+ -+static inline void update_tg_burst(struct task_struct *p, u64 now) { -+ struct task_struct *task; -+ u32 cnt = 0, sum = 0; -+ -+ for_each_thread(p, task) { -+ if (!task_is_bore_eligible(task)) continue; -+ cnt++; -+ sum += task->se.burst_penalty; -+ } -+ -+ update_burst_cache(&p->se.group_burst, p, cnt, sum, now); -+} -+ -+static inline u8 inherit_burst_tg(struct task_struct *p, u64 now) { -+ struct task_struct *parent = rcu_dereference(p->group_leader); -+ struct sched_burst_cache *bc = &parent->se.group_burst; -+ guard(spinlock)(&bc->lock); -+ if (burst_cache_expired(bc, now)) -+ update_tg_burst(parent, now); -+ -+ return bc->score; -+} -+ -+void sched_clone_bore(struct task_struct *p, -+ struct task_struct *parent, u64 clone_flags, u64 now) { -+ struct sched_entity *se = &p->se; -+ u8 penalty; -+ -+ init_task_burst_cache_lock(p); -+ -+ if (!task_is_bore_eligible(p)) return; -+ -+ if (clone_flags & CLONE_THREAD) { -+ rcu_read_lock(); -+ penalty = inherit_burst_tg(parent, now); -+ rcu_read_unlock(); -+ } else { -+ read_lock(&tasklist_lock); -+ penalty = likely(sched_burst_fork_atavistic) ? -+ inherit_burst_topological(parent, now, clone_flags): -+ inherit_burst_direct(parent, now, clone_flags); -+ read_unlock(&tasklist_lock); -+ } -+ -+ revolve_burst_penalty(se); -+ se->burst_penalty = se->prev_burst_penalty = -+ max(se->prev_burst_penalty, penalty); -+ se->child_burst.timestamp = 0; -+ se->group_burst.timestamp = 0; -+} -+ -+void reset_task_bore(struct task_struct *p) { -+ p->se.burst_time = 0; -+ p->se.prev_burst_penalty = 0; -+ p->se.curr_burst_penalty = 0; -+ p->se.burst_penalty = 0; -+ p->se.burst_score = 0; -+ memset(&p->se.child_burst, 0, sizeof(struct sched_burst_cache)); -+ memset(&p->se.group_burst, 0, sizeof(struct sched_burst_cache)); -+} -+ -+void __init sched_bore_init(void) { -+ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification %s by Masahito Suzuki", SCHED_BORE_VERSION); -+ reset_task_bore(&init_task); -+ init_task_burst_cache_lock(&init_task); -+} -+ -+#ifdef CONFIG_SYSCTL -+static struct ctl_table sched_bore_sysctls[] = { -+ { -+ .procname = "sched_bore", -+ .data = &sched_bore, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = sched_bore_update_handler, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_ONE, -+ }, -+ { -+ .procname = "sched_burst_exclude_kthreads", -+ .data = &sched_burst_exclude_kthreads, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_ONE, -+ }, -+ { -+ .procname = "sched_burst_smoothness_long", -+ .data = &sched_burst_smoothness_long, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_ONE, -+ }, -+ { -+ .procname = "sched_burst_smoothness_short", -+ .data = &sched_burst_smoothness_short, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_ONE, -+ }, -+ { -+ .procname = "sched_burst_fork_atavistic", -+ .data = &sched_burst_fork_atavistic, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_THREE, -+ }, -+ { -+ .procname = "sched_burst_parity_threshold", -+ .data = &sched_burst_parity_threshold, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = &maxval_u8, -+ }, -+ { -+ .procname = "sched_burst_penalty_offset", -+ .data = &sched_burst_penalty_offset, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = &sixty_four, -+ }, -+ { -+ .procname = "sched_burst_penalty_scale", -+ .data = &sched_burst_penalty_scale, -+ .maxlen = sizeof(uint), -+ .mode = 0644, -+ .proc_handler = proc_douintvec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = &maxval_12_bits, -+ }, -+ { -+ .procname = "sched_burst_cache_stop_count", -+ .data = &sched_burst_cache_stop_count, -+ .maxlen = sizeof(uint), -+ .mode = 0644, -+ .proc_handler = proc_douintvec, -+ }, -+ { -+ .procname = "sched_burst_cache_lifetime", -+ .data = &sched_burst_cache_lifetime, -+ .maxlen = sizeof(uint), -+ .mode = 0644, -+ .proc_handler = proc_douintvec, -+ }, -+ { -+ .procname = "sched_deadline_boost_mask", -+ .data = &sched_deadline_boost_mask, -+ .maxlen = sizeof(uint), -+ .mode = 0644, -+ .proc_handler = proc_douintvec, -+ }, -+}; -+ -+static int __init sched_bore_sysctl_init(void) { -+ register_sysctl_init("kernel", sched_bore_sysctls); -+ return 0; -+} -+late_initcall(sched_bore_sysctl_init); -+#endif // CONFIG_SYSCTL -+#endif // CONFIG_SCHED_BORE -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 3e5a6bf587f9..fb4bb3fa5a96 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -97,6 +97,8 @@ - #include "../../io_uring/io-wq.h" - #include "../smpboot.h" - -+#include -+ - EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu); - EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask); - -@@ -8481,6 +8483,10 @@ void __init sched_init(void) - BUG_ON(!sched_class_above(&ext_sched_class, &idle_sched_class)); - #endif - -+#ifdef CONFIG_SCHED_BORE -+ sched_bore_init(); -+#endif // CONFIG_SCHED_BORE -+ - wait_bit_init(); - - #ifdef CONFIG_FAIR_GROUP_SCHED -diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c -index a1be00a988bf..66fcb229007d 100644 ---- a/kernel/sched/debug.c -+++ b/kernel/sched/debug.c -@@ -167,7 +167,53 @@ static const struct file_operations sched_feat_fops = { - }; - - #ifdef CONFIG_SMP -+#ifdef CONFIG_SCHED_BORE -+#define DEFINE_SYSCTL_SCHED_FUNC(name, update_func) \ -+static ssize_t sched_##name##_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) \ -+{ \ -+ char buf[16]; \ -+ unsigned int value; \ -+\ -+ if (cnt > 15) \ -+ cnt = 15; \ -+\ -+ if (copy_from_user(&buf, ubuf, cnt)) \ -+ return -EFAULT; \ -+ buf[cnt] = '\0'; \ -+\ -+ if (kstrtouint(buf, 10, &value)) \ -+ return -EINVAL; \ -+\ -+ sysctl_sched_##name = value; \ -+ sched_update_##update_func(); \ -+\ -+ *ppos += cnt; \ -+ return cnt; \ -+} \ -+\ -+static int sched_##name##_show(struct seq_file *m, void *v) \ -+{ \ -+ seq_printf(m, "%d\n", sysctl_sched_##name); \ -+ return 0; \ -+} \ -+\ -+static int sched_##name##_open(struct inode *inode, struct file *filp) \ -+{ \ -+ return single_open(filp, sched_##name##_show, NULL); \ -+} \ -+\ -+static const struct file_operations sched_##name##_fops = { \ -+ .open = sched_##name##_open, \ -+ .write = sched_##name##_write, \ -+ .read = seq_read, \ -+ .llseek = seq_lseek, \ -+ .release = single_release, \ -+}; - -+DEFINE_SYSCTL_SCHED_FUNC(min_base_slice, min_base_slice) -+ -+#undef DEFINE_SYSCTL_SCHED_FUNC -+#else // !CONFIG_SCHED_BORE - static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) - { -@@ -213,7 +259,7 @@ static const struct file_operations sched_scaling_fops = { - .llseek = seq_lseek, - .release = single_release, - }; -- -+#endif // CONFIG_SCHED_BORE - #endif /* SMP */ - - #ifdef CONFIG_PREEMPT_DYNAMIC -@@ -505,13 +551,20 @@ static __init int sched_init_debug(void) - debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops); - #endif - -+#ifdef CONFIG_SCHED_BORE -+ debugfs_create_file("min_base_slice_ns", 0644, debugfs_sched, NULL, &sched_min_base_slice_fops); -+ debugfs_create_u32("base_slice_ns", 0444, debugfs_sched, &sysctl_sched_base_slice); -+#else // !CONFIG_SCHED_BORE - debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice); -+#endif // CONFIG_SCHED_BORE - - debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms); - debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once); - - #ifdef CONFIG_SMP -+#if !defined(CONFIG_SCHED_BORE) - debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops); -+#endif // CONFIG_SCHED_BORE - debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost); - debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate); - -@@ -756,6 +809,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) - SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)), - SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime))); - -+#ifdef CONFIG_SCHED_BORE -+ SEQ_printf(m, " %2d", p->se.burst_score); -+#endif // CONFIG_SCHED_BORE - #ifdef CONFIG_NUMA_BALANCING - SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); - #endif -@@ -1245,6 +1301,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, - - P(se.load.weight); - #ifdef CONFIG_SMP -+#ifdef CONFIG_SCHED_BORE -+ P(se.burst_score); -+#endif // CONFIG_SCHED_BORE - P(se.avg.load_sum); - P(se.avg.runnable_sum); - P(se.avg.util_sum); -diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index c532ffb153b4..c55d61977364 100644 ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -55,6 +55,8 @@ - #include "stats.h" - #include "autogroup.h" - -+#include -+ - /* - * The initial- and re-scaling of tunables is configurable - * -@@ -64,28 +66,32 @@ - * SCHED_TUNABLESCALING_LOG - scaled logarithmically, *1+ilog(ncpus) - * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus - * -- * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) -+ * BORE : default SCHED_TUNABLESCALING_NONE = *1 constant -+ * EEVDF: default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) - */ -+#ifdef CONFIG_SCHED_BORE -+unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; -+#else // !CONFIG_SCHED_BORE - unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; -+#endif // CONFIG_SCHED_BORE - - /* - * Minimal preemption granularity for CPU-bound tasks: - * -- * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) -+ * BORE : base_slice = minimum multiple of nsecs_per_tick >= min_base_slice -+ * (default min_base_slice = 2000000 constant, units: nanoseconds) -+ * EEVDF: default 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds - */ --#ifdef CONFIG_CACHY --unsigned int sysctl_sched_base_slice = 350000ULL; --static unsigned int normalized_sysctl_sched_base_slice = 350000ULL; --#else -+#ifdef CONFIG_SCHED_BORE -+static const unsigned int nsecs_per_tick = 1000000000ULL / HZ; -+unsigned int sysctl_sched_min_base_slice = CONFIG_MIN_BASE_SLICE_NS; -+__read_mostly uint sysctl_sched_base_slice = nsecs_per_tick; -+#else // !CONFIG_SCHED_BORE - unsigned int sysctl_sched_base_slice = 750000ULL; - static unsigned int normalized_sysctl_sched_base_slice = 750000ULL; --#endif -+#endif // CONFIG_SCHED_BORE - --#ifdef CONFIG_CACHY --const_debug unsigned int sysctl_sched_migration_cost = 300000UL; --#else - const_debug unsigned int sysctl_sched_migration_cost = 500000UL; --#endif - - static int __init setup_sched_thermal_decay_shift(char *str) - { -@@ -130,12 +136,8 @@ int __weak arch_asym_cpu_priority(int cpu) - * - * (default: 5 msec, units: microseconds) - */ --#ifdef CONFIG_CACHY --static unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL; --#else - static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; - #endif --#endif - - #ifdef CONFIG_NUMA_BALANCING - /* Restrict the NUMA promotion throughput (MB/s) for each target node. */ -@@ -201,6 +203,13 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w) - * - * This idea comes from the SD scheduler of Con Kolivas: - */ -+#ifdef CONFIG_SCHED_BORE -+static void update_sysctl(void) { -+ sysctl_sched_base_slice = nsecs_per_tick * -+ max(1UL, DIV_ROUND_UP(sysctl_sched_min_base_slice, nsecs_per_tick)); -+} -+void sched_update_min_base_slice(void) { update_sysctl(); } -+#else // !CONFIG_SCHED_BORE - static unsigned int get_update_sysctl_factor(void) - { - unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8); -@@ -231,6 +240,7 @@ static void update_sysctl(void) - SET_SYSCTL(sched_base_slice); - #undef SET_SYSCTL - } -+#endif // CONFIG_SCHED_BORE - - void __init sched_init_granularity(void) - { -@@ -710,6 +720,9 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) - - vlag = avg_vruntime(cfs_rq) - se->vruntime; - limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); -+#ifdef CONFIG_SCHED_BORE -+ limit >>= !!sched_bore; -+#endif // CONFIG_SCHED_BORE - - se->vlag = clamp(vlag, -limit, limit); - } -@@ -934,6 +947,10 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) - * until it gets a new slice. See the HACK in set_next_entity(). - */ - if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline) -+#ifdef CONFIG_SCHED_BORE -+ if (!(likely(sched_bore) && likely(sched_burst_parity_threshold) && -+ sched_burst_parity_threshold < cfs_rq->nr_running)) -+#endif // CONFIG_SCHED_BORE - return curr; - - /* Pick the leftmost entity if it's eligible */ -@@ -992,6 +1009,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) - * Scheduling class statistics methods: - */ - #ifdef CONFIG_SMP -+#if !defined(CONFIG_SCHED_BORE) - int sched_update_scaling(void) - { - unsigned int factor = get_update_sysctl_factor(); -@@ -1003,6 +1021,7 @@ int sched_update_scaling(void) - - return 0; - } -+#endif // CONFIG_SCHED_BORE - #endif - #endif - -@@ -1233,6 +1252,10 @@ static void update_curr(struct cfs_rq *cfs_rq) - if (unlikely(delta_exec <= 0)) - return; - -+#ifdef CONFIG_SCHED_BORE -+ curr->burst_time += delta_exec; -+ update_burst_penalty(curr); -+#endif // CONFIG_SCHED_BORE - curr->vruntime += calc_delta_fair(delta_exec, curr); - resched = update_deadline(cfs_rq, curr); - update_min_vruntime(cfs_rq); -@@ -3784,7 +3807,7 @@ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } - - static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags); - --static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, -+void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, - unsigned long weight) - { - bool curr = cfs_rq->curr == se; -@@ -5272,7 +5295,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - se->rel_deadline = 0; - return; - } -- -+#ifdef CONFIG_SCHED_BORE -+ else if (likely(sched_bore)) -+ vslice >>= !!(flags & sched_deadline_boost_mask); -+ else -+#endif // CONFIG_SCHED_BORE - /* - * When joining the competition; the existing tasks will be, - * on average, halfway through their slice, as such start tasks -@@ -7148,6 +7175,15 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) - util_est_dequeue(&rq->cfs, p); - - util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP); -+#ifdef CONFIG_SCHED_BORE -+ struct cfs_rq *cfs_rq = &rq->cfs; -+ struct sched_entity *se = &p->se; -+ if (flags & DEQUEUE_SLEEP && entity_is_task(se)) { -+ if (cfs_rq->curr == se) -+ update_curr(cfs_rq); -+ restart_burst(se); -+ } -+#endif // CONFIG_SCHED_BORE - if (dequeue_entities(rq, &p->se, flags) < 0) - return false; - -@@ -8961,16 +8997,25 @@ static void yield_task_fair(struct rq *rq) - /* - * Are we the only task in the tree? - */ -+#if !defined(CONFIG_SCHED_BORE) - if (unlikely(rq->nr_running == 1)) - return; - - clear_buddies(cfs_rq, se); -+#endif // CONFIG_SCHED_BORE - - update_rq_clock(rq); - /* - * Update run-time statistics of the 'current'. - */ - update_curr(cfs_rq); -+#ifdef CONFIG_SCHED_BORE -+ restart_burst_rescale_deadline(se); -+ if (unlikely(rq->nr_running == 1)) -+ return; -+ -+ clear_buddies(cfs_rq, se); -+#endif // CONFIG_SCHED_BORE - /* - * Tell update_rq_clock() that we've just updated, - * so we don't do microscopic update in schedule() -@@ -13044,6 +13089,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) - static void task_fork_fair(struct task_struct *p) - { - set_task_max_allowed_capacity(p); -+#ifdef CONFIG_SCHED_BORE -+ update_burst_score(&p->se); -+#endif // CONFIG_SCHED_BORE - } - - /* -@@ -13154,6 +13202,10 @@ static void attach_task_cfs_rq(struct task_struct *p) - - static void switched_from_fair(struct rq *rq, struct task_struct *p) - { -+ p->se.rel_deadline = 0; -+#ifdef CONFIG_SCHED_BORE -+ reset_task_bore(p); -+#endif // CONFIG_SCHED_BORE - detach_task_cfs_rq(p); - } - -diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index dee2797009e3..bdc0b9c037d4 100644 ---- a/kernel/sched/sched.h -+++ b/kernel/sched/sched.h -@@ -2091,7 +2091,11 @@ static inline void update_sched_domain_debugfs(void) { } - static inline void dirty_sched_domain_sysctl(int cpu) { } - #endif - -+#ifdef CONFIG_SCHED_BORE -+extern void sched_update_min_base_slice(void); -+#else // !CONFIG_SCHED_BORE - extern int sched_update_scaling(void); -+#endif // CONFIG_SCHED_BORE - - static inline const struct cpumask *task_user_cpus(struct task_struct *p) - { -@@ -2828,7 +2832,12 @@ extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags); - extern const_debug unsigned int sysctl_sched_nr_migrate; - extern const_debug unsigned int sysctl_sched_migration_cost; - -+#ifdef CONFIG_SCHED_BORE -+extern unsigned int sysctl_sched_min_base_slice; -+extern __read_mostly uint sysctl_sched_base_slice; -+#else // !CONFIG_SCHED_BORE - extern unsigned int sysctl_sched_base_slice; -+#endif // CONFIG_SCHED_BORE - - #ifdef CONFIG_SCHED_DEBUG - extern int sysctl_resched_latency_warn_ms; --- -2.48.1 - diff --git a/6.13/sched-dev/0001-bore.patch b/6.13/sched-dev/0001-bore.patch deleted file mode 100644 index e000df8e..00000000 --- a/6.13/sched-dev/0001-bore.patch +++ /dev/null @@ -1,1005 +0,0 @@ -From 9e3f11411e7128d3ebbbe546df56fb110f0d9370 Mon Sep 17 00:00:00 2001 -From: Masahito S -Date: Mon, 20 Jan 2025 07:24:54 +0900 -Subject: [PATCH] linux6.13.y-bore5.9.6 - ---- - include/linux/sched.h | 18 ++ - include/linux/sched/bore.h | 40 ++++ - init/Kconfig | 17 ++ - kernel/Kconfig.hz | 17 ++ - kernel/fork.c | 6 + - kernel/sched/Makefile | 1 + - kernel/sched/bore.c | 443 +++++++++++++++++++++++++++++++++++++ - kernel/sched/core.c | 6 + - kernel/sched/debug.c | 61 ++++- - kernel/sched/fair.c | 73 +++++- - kernel/sched/sched.h | 9 + - 11 files changed, 686 insertions(+), 5 deletions(-) - create mode 100644 include/linux/sched/bore.h - create mode 100644 kernel/sched/bore.c - -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 66b311fbd5..43a00a7308 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -538,6 +538,15 @@ struct sched_statistics { - #endif /* CONFIG_SCHEDSTATS */ - } ____cacheline_aligned; - -+#ifdef CONFIG_SCHED_BORE -+struct sched_burst_cache { -+ u8 score; -+ u32 count; -+ u64 timestamp; -+ spinlock_t lock; -+}; -+#endif // CONFIG_SCHED_BORE -+ - struct sched_entity { - /* For load-balancing: */ - struct load_weight load; -@@ -557,6 +566,15 @@ struct sched_entity { - u64 sum_exec_runtime; - u64 prev_sum_exec_runtime; - u64 vruntime; -+#ifdef CONFIG_SCHED_BORE -+ u64 burst_time; -+ u8 prev_burst_penalty; -+ u8 curr_burst_penalty; -+ u8 burst_penalty; -+ u8 burst_score; -+ struct sched_burst_cache child_burst; -+ struct sched_burst_cache group_burst; -+#endif // CONFIG_SCHED_BORE - s64 vlag; - u64 slice; - -diff --git a/include/linux/sched/bore.h b/include/linux/sched/bore.h -new file mode 100644 -index 0000000000..a8faabc288 ---- /dev/null -+++ b/include/linux/sched/bore.h -@@ -0,0 +1,40 @@ -+ -+#include -+#include -+ -+#ifndef _LINUX_SCHED_BORE_H -+#define _LINUX_SCHED_BORE_H -+#define SCHED_BORE_VERSION "5.9.6" -+ -+#ifdef CONFIG_SCHED_BORE -+extern u8 __read_mostly sched_bore; -+extern u8 __read_mostly sched_burst_exclude_kthreads; -+extern u8 __read_mostly sched_burst_smoothness_long; -+extern u8 __read_mostly sched_burst_smoothness_short; -+extern u8 __read_mostly sched_burst_fork_atavistic; -+extern u8 __read_mostly sched_burst_parity_threshold; -+extern u8 __read_mostly sched_burst_penalty_offset; -+extern uint __read_mostly sched_burst_penalty_scale; -+extern uint __read_mostly sched_burst_cache_stop_count; -+extern uint __read_mostly sched_burst_cache_lifetime; -+extern uint __read_mostly sched_deadline_boost_mask; -+ -+extern void update_burst_score(struct sched_entity *se); -+extern void update_burst_penalty(struct sched_entity *se); -+ -+extern void restart_burst(struct sched_entity *se); -+extern void restart_burst_rescale_deadline(struct sched_entity *se); -+ -+extern int sched_bore_update_handler(const struct ctl_table *table, int write, -+ void __user *buffer, size_t *lenp, loff_t *ppos); -+ -+extern void sched_clone_bore( -+ struct task_struct *p, struct task_struct *parent, u64 clone_flags, u64 now); -+ -+extern void reset_task_bore(struct task_struct *p); -+extern void sched_bore_init(void); -+ -+extern void reweight_entity( -+ struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight); -+#endif // CONFIG_SCHED_BORE -+#endif // _LINUX_SCHED_BORE_H -diff --git a/init/Kconfig b/init/Kconfig -index a20e6efd3f..0b17af19d3 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -1342,6 +1342,23 @@ config CHECKPOINT_RESTORE - - If unsure, say N here. - -+config SCHED_BORE -+ bool "Burst-Oriented Response Enhancer" -+ default y -+ help -+ In Desktop and Mobile computing, one might prefer interactive -+ tasks to keep responsive no matter what they run in the background. -+ -+ Enabling this kernel feature modifies the scheduler to discriminate -+ tasks by their burst time (runtime since it last went sleeping or -+ yielding state) and prioritize those that run less bursty. -+ Such tasks usually include window compositor, widgets backend, -+ terminal emulator, video playback, games and so on. -+ With a little impact to scheduling fairness, it may improve -+ responsiveness especially under heavy background workload. -+ -+ If unsure, say Y here. -+ - config SCHED_AUTOGROUP - bool "Automatic process group scheduling" - select CGROUPS -diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz -index 38ef6d0688..253c566b59 100644 ---- a/kernel/Kconfig.hz -+++ b/kernel/Kconfig.hz -@@ -55,5 +55,22 @@ config HZ - default 300 if HZ_300 - default 1000 if HZ_1000 - -+config MIN_BASE_SLICE_NS -+ int "Default value for min_base_slice_ns" -+ default 2000000 -+ help -+ The BORE Scheduler automatically calculates the optimal base -+ slice for the configured HZ using the following equation: -+ -+ base_slice_ns = -+ 1000000000/HZ * DIV_ROUNDUP(min_base_slice_ns, 1000000000/HZ) -+ -+ This option sets the default lower bound limit of the base slice -+ to prevent the loss of task throughput due to overscheduling. -+ -+ Setting this value too high can cause the system to boot with -+ an unnecessarily large base slice, resulting in high scheduling -+ latency and poor system responsiveness. -+ - config SCHED_HRTICK - def_bool HIGH_RES_TIMERS -diff --git a/kernel/fork.c b/kernel/fork.c -index 9b301180fd..e2ca4830c3 100644 ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -112,6 +112,8 @@ - #include - #include - -+#include -+ - #include - - #define CREATE_TRACE_POINTS -@@ -2515,6 +2517,10 @@ __latent_entropy struct task_struct *copy_process( - p->start_time = ktime_get_ns(); - p->start_boottime = ktime_get_boottime_ns(); - -+#ifdef CONFIG_SCHED_BORE -+ if (likely(p->pid)) -+ sched_clone_bore(p, current, clone_flags, p->start_time); -+#endif // CONFIG_SCHED_BORE - /* - * Make it visible to the rest of the system, but dont wake it up yet. - * Need tasklist lock for parent etc handling! -diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile -index 976092b7bd..293aad6754 100644 ---- a/kernel/sched/Makefile -+++ b/kernel/sched/Makefile -@@ -32,3 +32,4 @@ obj-y += core.o - obj-y += fair.o - obj-y += build_policy.o - obj-y += build_utility.o -+obj-y += bore.o -diff --git a/kernel/sched/bore.c b/kernel/sched/bore.c -new file mode 100644 -index 0000000000..23aeb56494 ---- /dev/null -+++ b/kernel/sched/bore.c -@@ -0,0 +1,443 @@ -+/* -+ * Burst-Oriented Response Enhancer (BORE) CPU Scheduler -+ * Copyright (C) 2021-2024 Masahito Suzuki -+ */ -+#include -+#include -+#include -+#include "sched.h" -+ -+#ifdef CONFIG_SCHED_BORE -+u8 __read_mostly sched_bore = 1; -+u8 __read_mostly sched_burst_exclude_kthreads = 1; -+u8 __read_mostly sched_burst_smoothness_long = 1; -+u8 __read_mostly sched_burst_smoothness_short = 0; -+u8 __read_mostly sched_burst_fork_atavistic = 2; -+u8 __read_mostly sched_burst_parity_threshold = 2; -+u8 __read_mostly sched_burst_penalty_offset = 24; -+uint __read_mostly sched_burst_penalty_scale = 1280; -+uint __read_mostly sched_burst_cache_stop_count = 64; -+uint __read_mostly sched_burst_cache_lifetime = 75000000; -+uint __read_mostly sched_deadline_boost_mask = ENQUEUE_INITIAL -+ | ENQUEUE_WAKEUP; -+static int __maybe_unused sixty_four = 64; -+static int __maybe_unused maxval_u8 = 255; -+static int __maybe_unused maxval_12_bits = 4095; -+ -+#define MAX_BURST_PENALTY (39U <<2) -+ -+static inline u32 log2plus1_u64_u32f8(u64 v) { -+ u32 integral = fls64(v); -+ u8 fractional = v << (64 - integral) >> 55; -+ return integral << 8 | fractional; -+} -+ -+static inline u32 calc_burst_penalty(u64 burst_time) { -+ u32 greed, tolerance, penalty, scaled_penalty; -+ -+ greed = log2plus1_u64_u32f8(burst_time); -+ tolerance = sched_burst_penalty_offset << 8; -+ penalty = max(0, (s32)(greed - tolerance)); -+ scaled_penalty = penalty * sched_burst_penalty_scale >> 16; -+ -+ return min(MAX_BURST_PENALTY, scaled_penalty); -+} -+ -+static inline u64 __scale_slice(u64 delta, u8 score) -+{return mul_u64_u32_shr(delta, sched_prio_to_wmult[score], 22);} -+ -+static inline u64 __unscale_slice(u64 delta, u8 score) -+{return mul_u64_u32_shr(delta, sched_prio_to_weight[score], 10);} -+ -+static void reweight_task_by_prio(struct task_struct *p, int prio) { -+ struct sched_entity *se = &p->se; -+ unsigned long weight = scale_load(sched_prio_to_weight[prio]); -+ -+ reweight_entity(cfs_rq_of(se), se, weight); -+ se->load.inv_weight = sched_prio_to_wmult[prio]; -+} -+ -+static inline u8 effective_prio(struct task_struct *p) { -+ u8 prio = p->static_prio - MAX_RT_PRIO; -+ if (likely(sched_bore)) -+ prio += p->se.burst_score; -+ return min(39, prio); -+} -+ -+void update_burst_score(struct sched_entity *se) { -+ if (!entity_is_task(se)) return; -+ struct task_struct *p = task_of(se); -+ u8 prev_prio = effective_prio(p); -+ -+ u8 burst_score = 0; -+ if (!((p->flags & PF_KTHREAD) && likely(sched_burst_exclude_kthreads))) -+ burst_score = se->burst_penalty >> 2; -+ se->burst_score = burst_score; -+ -+ u8 new_prio = effective_prio(p); -+ if (new_prio != prev_prio) -+ reweight_task_by_prio(p, new_prio); -+} -+ -+void update_burst_penalty(struct sched_entity *se) { -+ se->curr_burst_penalty = calc_burst_penalty(se->burst_time); -+ se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty); -+ update_burst_score(se); -+} -+ -+static inline u32 binary_smooth(u32 new, u32 old) { -+ int increment = new - old; -+ return (0 <= increment)? -+ old + ( increment >> (int)sched_burst_smoothness_long): -+ old - (-increment >> (int)sched_burst_smoothness_short); -+} -+ -+static void revolve_burst_penalty(struct sched_entity *se) { -+ se->prev_burst_penalty = -+ binary_smooth(se->curr_burst_penalty, se->prev_burst_penalty); -+ se->burst_time = 0; -+ se->curr_burst_penalty = 0; -+} -+ -+inline void restart_burst(struct sched_entity *se) { -+ revolve_burst_penalty(se); -+ se->burst_penalty = se->prev_burst_penalty; -+ update_burst_score(se); -+} -+ -+void restart_burst_rescale_deadline(struct sched_entity *se) { -+ s64 vscaled, wremain, vremain = se->deadline - se->vruntime; -+ struct task_struct *p = task_of(se); -+ u8 prev_prio = effective_prio(p); -+ restart_burst(se); -+ u8 new_prio = effective_prio(p); -+ if (prev_prio > new_prio) { -+ wremain = __unscale_slice(abs(vremain), prev_prio); -+ vscaled = __scale_slice(wremain, new_prio); -+ if (unlikely(vremain < 0)) -+ vscaled = -vscaled; -+ se->deadline = se->vruntime + vscaled; -+ } -+} -+ -+static inline bool task_is_bore_eligible(struct task_struct *p) -+{return p && p->sched_class == &fair_sched_class && !p->exit_state;} -+ -+static void reset_task_weights_bore(void) { -+ struct task_struct *task; -+ struct rq *rq; -+ struct rq_flags rf; -+ -+ write_lock_irq(&tasklist_lock); -+ for_each_process(task) { -+ if (!task_is_bore_eligible(task)) continue; -+ rq = task_rq(task); -+ rq_pin_lock(rq, &rf); -+ update_rq_clock(rq); -+ reweight_task_by_prio(task, effective_prio(task)); -+ rq_unpin_lock(rq, &rf); -+ } -+ write_unlock_irq(&tasklist_lock); -+} -+ -+int sched_bore_update_handler(const struct ctl_table *table, int write, -+ void __user *buffer, size_t *lenp, loff_t *ppos) { -+ int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); -+ if (ret || !write) -+ return ret; -+ -+ reset_task_weights_bore(); -+ -+ return 0; -+} -+ -+#define for_each_child(p, t) \ -+ list_for_each_entry(t, &(p)->children, sibling) -+ -+static u32 count_entries_upto2(struct list_head *head) { -+ struct list_head *next = head->next; -+ return (next != head) + (next->next != head); -+} -+ -+static inline void init_task_burst_cache_lock(struct task_struct *p) { -+ spin_lock_init(&p->se.child_burst.lock); -+ spin_lock_init(&p->se.group_burst.lock); -+} -+ -+static inline bool burst_cache_expired(struct sched_burst_cache *bc, u64 now) -+{return (s64)(bc->timestamp + sched_burst_cache_lifetime - now) < 0;} -+ -+static void update_burst_cache(struct sched_burst_cache *bc, -+ struct task_struct *p, u32 cnt, u32 sum, u64 now) { -+ u8 avg = cnt ? sum / cnt : 0; -+ bc->score = max(avg, p->se.burst_penalty); -+ bc->count = cnt; -+ bc->timestamp = now; -+} -+ -+static inline void update_child_burst_direct(struct task_struct *p, u64 now) { -+ u32 cnt = 0, sum = 0; -+ struct task_struct *child; -+ -+ for_each_child(p, child) { -+ if (!task_is_bore_eligible(child)) continue; -+ cnt++; -+ sum += child->se.burst_penalty; -+ } -+ -+ update_burst_cache(&p->se.child_burst, p, cnt, sum, now); -+} -+ -+static inline u8 inherit_burst_direct( -+ struct task_struct *p, u64 now, u64 clone_flags) { -+ struct task_struct *parent = p; -+ struct sched_burst_cache *bc; -+ -+ if (clone_flags & CLONE_PARENT) -+ parent = parent->real_parent; -+ -+ bc = &parent->se.child_burst; -+ guard(spinlock)(&bc->lock); -+ if (burst_cache_expired(bc, now)) -+ update_child_burst_direct(parent, now); -+ -+ return bc->score; -+} -+ -+static void update_child_burst_topological( -+ struct task_struct *p, u64 now, u32 depth, u32 *acnt, u32 *asum) { -+ u32 cnt = 0, dcnt = 0, sum = 0; -+ struct task_struct *child, *dec; -+ struct sched_burst_cache *bc __maybe_unused; -+ -+ for_each_child(p, child) { -+ dec = child; -+ while ((dcnt = count_entries_upto2(&dec->children)) == 1) -+ dec = list_first_entry(&dec->children, struct task_struct, sibling); -+ -+ if (!dcnt || !depth) { -+ if (!task_is_bore_eligible(dec)) continue; -+ cnt++; -+ sum += dec->se.burst_penalty; -+ continue; -+ } -+ bc = &dec->se.child_burst; -+ spin_lock(&bc->lock); -+ if (!burst_cache_expired(bc, now)) { -+ cnt += bc->count; -+ sum += (u32)bc->score * bc->count; -+ if (sched_burst_cache_stop_count <= cnt) { -+ spin_unlock(&bc->lock); -+ break; -+ } -+ spin_unlock(&bc->lock); -+ continue; -+ } -+ update_child_burst_topological(dec, now, depth - 1, &cnt, &sum); -+ spin_unlock(&bc->lock); -+ } -+ -+ update_burst_cache(&p->se.child_burst, p, cnt, sum, now); -+ *acnt += cnt; -+ *asum += sum; -+} -+ -+static inline u8 inherit_burst_topological( -+ struct task_struct *p, u64 now, u64 clone_flags) { -+ struct task_struct *anc = p; -+ struct sched_burst_cache *bc; -+ u32 cnt = 0, sum = 0; -+ u32 base_child_cnt = 0; -+ -+ if (clone_flags & CLONE_PARENT) { -+ anc = anc->real_parent; -+ base_child_cnt = 1; -+ } -+ -+ for (struct task_struct *next; -+ anc != (next = anc->real_parent) && -+ count_entries_upto2(&anc->children) <= base_child_cnt;) { -+ anc = next; -+ base_child_cnt = 1; -+ } -+ -+ bc = &anc->se.child_burst; -+ guard(spinlock)(&bc->lock); -+ if (burst_cache_expired(bc, now)) -+ update_child_burst_topological( -+ anc, now, sched_burst_fork_atavistic - 1, &cnt, &sum); -+ -+ return bc->score; -+} -+ -+static inline void update_tg_burst(struct task_struct *p, u64 now) { -+ struct task_struct *task; -+ u32 cnt = 0, sum = 0; -+ -+ for_each_thread(p, task) { -+ if (!task_is_bore_eligible(task)) continue; -+ cnt++; -+ sum += task->se.burst_penalty; -+ } -+ -+ update_burst_cache(&p->se.group_burst, p, cnt, sum, now); -+} -+ -+static inline u8 inherit_burst_tg(struct task_struct *p, u64 now) { -+ struct task_struct *parent = rcu_dereference(p->group_leader); -+ struct sched_burst_cache *bc = &parent->se.group_burst; -+ guard(spinlock)(&bc->lock); -+ if (burst_cache_expired(bc, now)) -+ update_tg_burst(parent, now); -+ -+ return bc->score; -+} -+ -+void sched_clone_bore(struct task_struct *p, -+ struct task_struct *parent, u64 clone_flags, u64 now) { -+ struct sched_entity *se = &p->se; -+ u8 penalty; -+ -+ init_task_burst_cache_lock(p); -+ -+ if (!task_is_bore_eligible(p)) return; -+ -+ if (clone_flags & CLONE_THREAD) { -+ rcu_read_lock(); -+ penalty = inherit_burst_tg(parent, now); -+ rcu_read_unlock(); -+ } else { -+ read_lock(&tasklist_lock); -+ penalty = likely(sched_burst_fork_atavistic) ? -+ inherit_burst_topological(parent, now, clone_flags): -+ inherit_burst_direct(parent, now, clone_flags); -+ read_unlock(&tasklist_lock); -+ } -+ -+ revolve_burst_penalty(se); -+ se->burst_penalty = se->prev_burst_penalty = -+ max(se->prev_burst_penalty, penalty); -+ se->child_burst.timestamp = 0; -+ se->group_burst.timestamp = 0; -+} -+ -+void reset_task_bore(struct task_struct *p) { -+ p->se.burst_time = 0; -+ p->se.prev_burst_penalty = 0; -+ p->se.curr_burst_penalty = 0; -+ p->se.burst_penalty = 0; -+ p->se.burst_score = 0; -+ memset(&p->se.child_burst, 0, sizeof(struct sched_burst_cache)); -+ memset(&p->se.group_burst, 0, sizeof(struct sched_burst_cache)); -+} -+ -+void __init sched_bore_init(void) { -+ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification %s by Masahito Suzuki", SCHED_BORE_VERSION); -+ reset_task_bore(&init_task); -+ init_task_burst_cache_lock(&init_task); -+} -+ -+#ifdef CONFIG_SYSCTL -+static struct ctl_table sched_bore_sysctls[] = { -+ { -+ .procname = "sched_bore", -+ .data = &sched_bore, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = sched_bore_update_handler, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_ONE, -+ }, -+ { -+ .procname = "sched_burst_exclude_kthreads", -+ .data = &sched_burst_exclude_kthreads, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_ONE, -+ }, -+ { -+ .procname = "sched_burst_smoothness_long", -+ .data = &sched_burst_smoothness_long, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_ONE, -+ }, -+ { -+ .procname = "sched_burst_smoothness_short", -+ .data = &sched_burst_smoothness_short, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_ONE, -+ }, -+ { -+ .procname = "sched_burst_fork_atavistic", -+ .data = &sched_burst_fork_atavistic, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_THREE, -+ }, -+ { -+ .procname = "sched_burst_parity_threshold", -+ .data = &sched_burst_parity_threshold, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = &maxval_u8, -+ }, -+ { -+ .procname = "sched_burst_penalty_offset", -+ .data = &sched_burst_penalty_offset, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = &sixty_four, -+ }, -+ { -+ .procname = "sched_burst_penalty_scale", -+ .data = &sched_burst_penalty_scale, -+ .maxlen = sizeof(uint), -+ .mode = 0644, -+ .proc_handler = proc_douintvec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = &maxval_12_bits, -+ }, -+ { -+ .procname = "sched_burst_cache_stop_count", -+ .data = &sched_burst_cache_stop_count, -+ .maxlen = sizeof(uint), -+ .mode = 0644, -+ .proc_handler = proc_douintvec, -+ }, -+ { -+ .procname = "sched_burst_cache_lifetime", -+ .data = &sched_burst_cache_lifetime, -+ .maxlen = sizeof(uint), -+ .mode = 0644, -+ .proc_handler = proc_douintvec, -+ }, -+ { -+ .procname = "sched_deadline_boost_mask", -+ .data = &sched_deadline_boost_mask, -+ .maxlen = sizeof(uint), -+ .mode = 0644, -+ .proc_handler = proc_douintvec, -+ }, -+}; -+ -+static int __init sched_bore_sysctl_init(void) { -+ register_sysctl_init("kernel", sched_bore_sysctls); -+ return 0; -+} -+late_initcall(sched_bore_sysctl_init); -+#endif // CONFIG_SYSCTL -+#endif // CONFIG_SCHED_BORE -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 3e5a6bf587..fb4bb3fa5a 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -97,6 +97,8 @@ - #include "../../io_uring/io-wq.h" - #include "../smpboot.h" - -+#include -+ - EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu); - EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask); - -@@ -8481,6 +8483,10 @@ void __init sched_init(void) - BUG_ON(!sched_class_above(&ext_sched_class, &idle_sched_class)); - #endif - -+#ifdef CONFIG_SCHED_BORE -+ sched_bore_init(); -+#endif // CONFIG_SCHED_BORE -+ - wait_bit_init(); - - #ifdef CONFIG_FAIR_GROUP_SCHED -diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c -index a1be00a988..66fcb22900 100644 ---- a/kernel/sched/debug.c -+++ b/kernel/sched/debug.c -@@ -167,7 +167,53 @@ static const struct file_operations sched_feat_fops = { - }; - - #ifdef CONFIG_SMP -+#ifdef CONFIG_SCHED_BORE -+#define DEFINE_SYSCTL_SCHED_FUNC(name, update_func) \ -+static ssize_t sched_##name##_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) \ -+{ \ -+ char buf[16]; \ -+ unsigned int value; \ -+\ -+ if (cnt > 15) \ -+ cnt = 15; \ -+\ -+ if (copy_from_user(&buf, ubuf, cnt)) \ -+ return -EFAULT; \ -+ buf[cnt] = '\0'; \ -+\ -+ if (kstrtouint(buf, 10, &value)) \ -+ return -EINVAL; \ -+\ -+ sysctl_sched_##name = value; \ -+ sched_update_##update_func(); \ -+\ -+ *ppos += cnt; \ -+ return cnt; \ -+} \ -+\ -+static int sched_##name##_show(struct seq_file *m, void *v) \ -+{ \ -+ seq_printf(m, "%d\n", sysctl_sched_##name); \ -+ return 0; \ -+} \ -+\ -+static int sched_##name##_open(struct inode *inode, struct file *filp) \ -+{ \ -+ return single_open(filp, sched_##name##_show, NULL); \ -+} \ -+\ -+static const struct file_operations sched_##name##_fops = { \ -+ .open = sched_##name##_open, \ -+ .write = sched_##name##_write, \ -+ .read = seq_read, \ -+ .llseek = seq_lseek, \ -+ .release = single_release, \ -+}; -+ -+DEFINE_SYSCTL_SCHED_FUNC(min_base_slice, min_base_slice) - -+#undef DEFINE_SYSCTL_SCHED_FUNC -+#else // !CONFIG_SCHED_BORE - static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) - { -@@ -213,7 +259,7 @@ static const struct file_operations sched_scaling_fops = { - .llseek = seq_lseek, - .release = single_release, - }; -- -+#endif // CONFIG_SCHED_BORE - #endif /* SMP */ - - #ifdef CONFIG_PREEMPT_DYNAMIC -@@ -505,13 +551,20 @@ static __init int sched_init_debug(void) - debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops); - #endif - -+#ifdef CONFIG_SCHED_BORE -+ debugfs_create_file("min_base_slice_ns", 0644, debugfs_sched, NULL, &sched_min_base_slice_fops); -+ debugfs_create_u32("base_slice_ns", 0444, debugfs_sched, &sysctl_sched_base_slice); -+#else // !CONFIG_SCHED_BORE - debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice); -+#endif // CONFIG_SCHED_BORE - - debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms); - debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once); - - #ifdef CONFIG_SMP -+#if !defined(CONFIG_SCHED_BORE) - debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops); -+#endif // CONFIG_SCHED_BORE - debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost); - debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate); - -@@ -756,6 +809,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) - SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)), - SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime))); - -+#ifdef CONFIG_SCHED_BORE -+ SEQ_printf(m, " %2d", p->se.burst_score); -+#endif // CONFIG_SCHED_BORE - #ifdef CONFIG_NUMA_BALANCING - SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); - #endif -@@ -1245,6 +1301,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, - - P(se.load.weight); - #ifdef CONFIG_SMP -+#ifdef CONFIG_SCHED_BORE -+ P(se.burst_score); -+#endif // CONFIG_SCHED_BORE - P(se.avg.load_sum); - P(se.avg.runnable_sum); - P(se.avg.util_sum); -diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 26958431de..9331896e5d 100644 ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -55,6 +55,8 @@ - #include "stats.h" - #include "autogroup.h" - -+#include -+ - /* - * The initial- and re-scaling of tunables is configurable - * -@@ -64,17 +66,30 @@ - * SCHED_TUNABLESCALING_LOG - scaled logarithmically, *1+ilog(ncpus) - * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus - * -- * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) -+ * BORE : default SCHED_TUNABLESCALING_NONE = *1 constant -+ * EEVDF: default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) - */ -+#ifdef CONFIG_SCHED_BORE -+unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; -+#else // !CONFIG_SCHED_BORE - unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; -+#endif // CONFIG_SCHED_BORE - - /* - * Minimal preemption granularity for CPU-bound tasks: - * -- * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) -+ * BORE : base_slice = minimum multiple of nsecs_per_tick >= min_base_slice -+ * (default min_base_slice = 2000000 constant, units: nanoseconds) -+ * EEVDF: default 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds - */ -+#ifdef CONFIG_SCHED_BORE -+static const unsigned int nsecs_per_tick = 1000000000ULL / HZ; -+unsigned int sysctl_sched_min_base_slice = CONFIG_MIN_BASE_SLICE_NS; -+__read_mostly uint sysctl_sched_base_slice = nsecs_per_tick; -+#else // !CONFIG_SCHED_BORE - unsigned int sysctl_sched_base_slice = 750000ULL; - static unsigned int normalized_sysctl_sched_base_slice = 750000ULL; -+#endif // CONFIG_SCHED_BORE - - const_debug unsigned int sysctl_sched_migration_cost = 500000UL; - -@@ -188,6 +203,13 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w) - * - * This idea comes from the SD scheduler of Con Kolivas: - */ -+#ifdef CONFIG_SCHED_BORE -+static void update_sysctl(void) { -+ sysctl_sched_base_slice = nsecs_per_tick * -+ max(1UL, DIV_ROUND_UP(sysctl_sched_min_base_slice, nsecs_per_tick)); -+} -+void sched_update_min_base_slice(void) { update_sysctl(); } -+#else // !CONFIG_SCHED_BORE - static unsigned int get_update_sysctl_factor(void) - { - unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8); -@@ -218,6 +240,7 @@ static void update_sysctl(void) - SET_SYSCTL(sched_base_slice); - #undef SET_SYSCTL - } -+#endif // CONFIG_SCHED_BORE - - void __init sched_init_granularity(void) - { -@@ -697,6 +720,9 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) - - vlag = avg_vruntime(cfs_rq) - se->vruntime; - limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); -+#ifdef CONFIG_SCHED_BORE -+ limit >>= !!sched_bore; -+#endif // CONFIG_SCHED_BORE - - se->vlag = clamp(vlag, -limit, limit); - } -@@ -921,6 +947,10 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) - * until it gets a new slice. See the HACK in set_next_entity(). - */ - if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline) -+#ifdef CONFIG_SCHED_BORE -+ if (!(likely(sched_bore) && likely(sched_burst_parity_threshold) && -+ sched_burst_parity_threshold < cfs_rq->nr_running)) -+#endif // CONFIG_SCHED_BORE - return curr; - - /* Pick the leftmost entity if it's eligible */ -@@ -979,6 +1009,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) - * Scheduling class statistics methods: - */ - #ifdef CONFIG_SMP -+#if !defined(CONFIG_SCHED_BORE) - int sched_update_scaling(void) - { - unsigned int factor = get_update_sysctl_factor(); -@@ -990,6 +1021,7 @@ int sched_update_scaling(void) - - return 0; - } -+#endif // CONFIG_SCHED_BORE - #endif - #endif - -@@ -1220,6 +1252,10 @@ static void update_curr(struct cfs_rq *cfs_rq) - if (unlikely(delta_exec <= 0)) - return; - -+#ifdef CONFIG_SCHED_BORE -+ curr->burst_time += delta_exec; -+ update_burst_penalty(curr); -+#endif // CONFIG_SCHED_BORE - curr->vruntime += calc_delta_fair(delta_exec, curr); - resched = update_deadline(cfs_rq, curr); - update_min_vruntime(cfs_rq); -@@ -3771,7 +3807,7 @@ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } - - static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags); - --static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, -+void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, - unsigned long weight) - { - bool curr = cfs_rq->curr == se; -@@ -5259,7 +5295,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - se->rel_deadline = 0; - return; - } -- -+#ifdef CONFIG_SCHED_BORE -+ else if (likely(sched_bore)) -+ vslice >>= !!(flags & sched_deadline_boost_mask); -+ else -+#endif // CONFIG_SCHED_BORE - /* - * When joining the competition; the existing tasks will be, - * on average, halfway through their slice, as such start tasks -@@ -7135,6 +7175,15 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) - util_est_dequeue(&rq->cfs, p); - - util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP); -+#ifdef CONFIG_SCHED_BORE -+ struct cfs_rq *cfs_rq = &rq->cfs; -+ struct sched_entity *se = &p->se; -+ if (flags & DEQUEUE_SLEEP && entity_is_task(se)) { -+ if (cfs_rq->curr == se) -+ update_curr(cfs_rq); -+ restart_burst(se); -+ } -+#endif // CONFIG_SCHED_BORE - if (dequeue_entities(rq, &p->se, flags) < 0) - return false; - -@@ -8948,16 +8997,25 @@ static void yield_task_fair(struct rq *rq) - /* - * Are we the only task in the tree? - */ -+#if !defined(CONFIG_SCHED_BORE) - if (unlikely(rq->nr_running == 1)) - return; - - clear_buddies(cfs_rq, se); -+#endif // CONFIG_SCHED_BORE - - update_rq_clock(rq); - /* - * Update run-time statistics of the 'current'. - */ - update_curr(cfs_rq); -+#ifdef CONFIG_SCHED_BORE -+ restart_burst_rescale_deadline(se); -+ if (unlikely(rq->nr_running == 1)) -+ return; -+ -+ clear_buddies(cfs_rq, se); -+#endif // CONFIG_SCHED_BORE - /* - * Tell update_rq_clock() that we've just updated, - * so we don't do microscopic update in schedule() -@@ -13009,6 +13067,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) - static void task_fork_fair(struct task_struct *p) - { - set_task_max_allowed_capacity(p); -+#ifdef CONFIG_SCHED_BORE -+ update_burst_score(&p->se); -+#endif // CONFIG_SCHED_BORE - } - - /* -@@ -13119,6 +13180,10 @@ static void attach_task_cfs_rq(struct task_struct *p) - - static void switched_from_fair(struct rq *rq, struct task_struct *p) - { -+ p->se.rel_deadline = 0; -+#ifdef CONFIG_SCHED_BORE -+ reset_task_bore(p); -+#endif // CONFIG_SCHED_BORE - detach_task_cfs_rq(p); - } - -diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index c5d67a43fe..e14855d24a 100644 ---- a/kernel/sched/sched.h -+++ b/kernel/sched/sched.h -@@ -2092,7 +2092,11 @@ static inline void update_sched_domain_debugfs(void) { } - static inline void dirty_sched_domain_sysctl(int cpu) { } - #endif - -+#ifdef CONFIG_SCHED_BORE -+extern void sched_update_min_base_slice(void); -+#else // !CONFIG_SCHED_BORE - extern int sched_update_scaling(void); -+#endif // CONFIG_SCHED_BORE - - static inline const struct cpumask *task_user_cpus(struct task_struct *p) - { -@@ -2829,7 +2833,12 @@ extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags); - extern const_debug unsigned int sysctl_sched_nr_migrate; - extern const_debug unsigned int sysctl_sched_migration_cost; - -+#ifdef CONFIG_SCHED_BORE -+extern unsigned int sysctl_sched_min_base_slice; -+extern __read_mostly uint sysctl_sched_base_slice; -+#else // !CONFIG_SCHED_BORE - extern unsigned int sysctl_sched_base_slice; -+#endif // CONFIG_SCHED_BORE - - #ifdef CONFIG_SCHED_DEBUG - extern int sysctl_resched_latency_warn_ms; --- -2.34.1 - diff --git a/6.13/sched/0001-bore-cachy.patch b/6.13/sched/0001-bore-cachy.patch index ab8bf50c..1eb64cb1 100644 --- a/6.13/sched/0001-bore-cachy.patch +++ b/6.13/sched/0001-bore-cachy.patch @@ -1,6 +1,6 @@ -From 80f8bf0adb51a725636db3fdabab2c6209f5348a Mon Sep 17 00:00:00 2001 +From 2aaaad0215c8d15c5133eb2bc1c77c021edff609 Mon Sep 17 00:00:00 2001 From: Eric Naim -Date: Tue, 31 Dec 2024 20:17:12 +0700 +Date: Mon, 20 Jan 2025 09:19:36 +0700 Subject: [PATCH] bore-cachy Signed-off-by: Eric Naim @@ -11,12 +11,12 @@ Signed-off-by: Eric Naim kernel/Kconfig.hz | 17 ++ kernel/fork.c | 6 + kernel/sched/Makefile | 1 + - kernel/sched/bore.c | 446 +++++++++++++++++++++++++++++++++++++ + kernel/sched/bore.c | 443 +++++++++++++++++++++++++++++++++++++ kernel/sched/core.c | 6 + kernel/sched/debug.c | 61 ++++- kernel/sched/fair.c | 86 +++++-- kernel/sched/sched.h | 9 + - 11 files changed, 689 insertions(+), 18 deletions(-) + 11 files changed, 686 insertions(+), 18 deletions(-) create mode 100644 include/linux/sched/bore.h create mode 100644 kernel/sched/bore.c @@ -58,7 +58,7 @@ index 64934e0830af..7ec02a323014 100644 diff --git a/include/linux/sched/bore.h b/include/linux/sched/bore.h new file mode 100644 -index 000000000000..a36947e12c2f +index 000000000000..a8faabc2885e --- /dev/null +++ b/include/linux/sched/bore.h @@ -0,0 +1,40 @@ @@ -68,7 +68,7 @@ index 000000000000..a36947e12c2f + +#ifndef _LINUX_SCHED_BORE_H +#define _LINUX_SCHED_BORE_H -+#define SCHED_BORE_VERSION "5.9.5" ++#define SCHED_BORE_VERSION "5.9.6" + +#ifdef CONFIG_SCHED_BORE +extern u8 __read_mostly sched_bore; @@ -192,10 +192,10 @@ index 976092b7bd45..293aad675444 100644 +obj-y += bore.o diff --git a/kernel/sched/bore.c b/kernel/sched/bore.c new file mode 100644 -index 000000000000..d55cd32b34ea +index 000000000000..23aeb5649479 --- /dev/null +++ b/kernel/sched/bore.c -@@ -0,0 +1,446 @@ +@@ -0,0 +1,443 @@ +/* + * Burst-Oriented Response Enhancer (BORE) CPU Scheduler + * Copyright (C) 2021-2024 Masahito Suzuki @@ -395,10 +395,9 @@ index 000000000000..d55cd32b34ea + parent = parent->real_parent; + + bc = &parent->se.child_burst; -+ spin_lock(&bc->lock); ++ guard(spinlock)(&bc->lock); + if (burst_cache_expired(bc, now)) + update_child_burst_direct(parent, now); -+ spin_unlock(&bc->lock); + + return bc->score; +} @@ -461,11 +460,10 @@ index 000000000000..d55cd32b34ea + } + + bc = &anc->se.child_burst; -+ spin_lock(&bc->lock); ++ guard(spinlock)(&bc->lock); + if (burst_cache_expired(bc, now)) + update_child_burst_topological( + anc, now, sched_burst_fork_atavistic - 1, &cnt, &sum); -+ spin_unlock(&bc->lock); + + return bc->score; +} @@ -486,10 +484,9 @@ index 000000000000..d55cd32b34ea +static inline u8 inherit_burst_tg(struct task_struct *p, u64 now) { + struct task_struct *parent = rcu_dereference(p->group_leader); + struct sched_burst_cache *bc = &parent->se.group_burst; -+ spin_lock(&bc->lock); ++ guard(spinlock)(&bc->lock); + if (burst_cache_expired(bc, now)) + update_tg_burst(parent, now); -+ spin_unlock(&bc->lock); + + return bc->score; +} @@ -775,7 +772,7 @@ index a1be00a988bf..66fcb229007d 100644 P(se.avg.runnable_sum); P(se.avg.util_sum); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index d38a4feac0c9..c455ba008d8b 100644 +index c532ffb153b4..c55d61977364 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -55,6 +55,8 @@ @@ -866,17 +863,17 @@ index d38a4feac0c9..c455ba008d8b 100644 void __init sched_init_granularity(void) { -@@ -708,6 +718,9 @@ static s64 entity_lag(u64 avruntime, struct sched_entity *se) +@@ -710,6 +720,9 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) - vlag = avruntime - se->vruntime; + vlag = avg_vruntime(cfs_rq) - se->vruntime; limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); +#ifdef CONFIG_SCHED_BORE + limit >>= !!sched_bore; +#endif // CONFIG_SCHED_BORE - return clamp(vlag, -limit, limit); + se->vlag = clamp(vlag, -limit, limit); } -@@ -939,6 +952,10 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) +@@ -934,6 +947,10 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) * until it gets a new slice. See the HACK in set_next_entity(). */ if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline) @@ -887,7 +884,7 @@ index d38a4feac0c9..c455ba008d8b 100644 return curr; /* Pick the leftmost entity if it's eligible */ -@@ -997,6 +1014,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) +@@ -992,6 +1009,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) * Scheduling class statistics methods: */ #ifdef CONFIG_SMP @@ -895,7 +892,7 @@ index d38a4feac0c9..c455ba008d8b 100644 int sched_update_scaling(void) { unsigned int factor = get_update_sysctl_factor(); -@@ -1008,6 +1026,7 @@ int sched_update_scaling(void) +@@ -1003,6 +1021,7 @@ int sched_update_scaling(void) return 0; } @@ -903,7 +900,7 @@ index d38a4feac0c9..c455ba008d8b 100644 #endif #endif -@@ -1238,6 +1257,10 @@ static void update_curr(struct cfs_rq *cfs_rq) +@@ -1233,6 +1252,10 @@ static void update_curr(struct cfs_rq *cfs_rq) if (unlikely(delta_exec <= 0)) return; @@ -914,16 +911,16 @@ index d38a4feac0c9..c455ba008d8b 100644 curr->vruntime += calc_delta_fair(delta_exec, curr); resched = update_deadline(cfs_rq, curr); update_min_vruntime(cfs_rq); -@@ -3893,7 +3916,7 @@ static void reweight_eevdf(struct sched_entity *se, u64 avruntime, - se->deadline = avruntime + vslice; - } +@@ -3784,7 +3807,7 @@ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } + + static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags); -static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, +void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight) { bool curr = cfs_rq->curr == se; -@@ -5377,7 +5400,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) +@@ -5272,7 +5295,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) se->rel_deadline = 0; return; } @@ -936,7 +933,7 @@ index d38a4feac0c9..c455ba008d8b 100644 /* * When joining the competition; the existing tasks will be, * on average, halfway through their slice, as such start tasks -@@ -7253,6 +7280,15 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) +@@ -7148,6 +7175,15 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) util_est_dequeue(&rq->cfs, p); util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP); @@ -952,7 +949,7 @@ index d38a4feac0c9..c455ba008d8b 100644 if (dequeue_entities(rq, &p->se, flags) < 0) return false; -@@ -9066,16 +9102,25 @@ static void yield_task_fair(struct rq *rq) +@@ -8961,16 +8997,25 @@ static void yield_task_fair(struct rq *rq) /* * Are we the only task in the tree? */ @@ -978,7 +975,7 @@ index d38a4feac0c9..c455ba008d8b 100644 /* * Tell update_rq_clock() that we've just updated, * so we don't do microscopic update in schedule() -@@ -13148,6 +13193,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) +@@ -13044,6 +13089,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) static void task_fork_fair(struct task_struct *p) { set_task_max_allowed_capacity(p); @@ -988,7 +985,7 @@ index d38a4feac0c9..c455ba008d8b 100644 } /* -@@ -13258,6 +13306,10 @@ static void attach_task_cfs_rq(struct task_struct *p) +@@ -13154,6 +13202,10 @@ static void attach_task_cfs_rq(struct task_struct *p) static void switched_from_fair(struct rq *rq, struct task_struct *p) { @@ -1029,5 +1026,5 @@ index dee2797009e3..bdc0b9c037d4 100644 #ifdef CONFIG_SCHED_DEBUG extern int sysctl_resched_latency_warn_ms; -- -2.47.1 +2.48.1 diff --git a/6.13/sched/0001-bore.patch b/6.13/sched/0001-bore.patch index 496fa884..e000df8e 100644 --- a/6.13/sched/0001-bore.patch +++ b/6.13/sched/0001-bore.patch @@ -1,7 +1,7 @@ -From 327b1f8f9cf94ef8561a9c6624b0a54342a4a8d3 Mon Sep 17 00:00:00 2001 +From 9e3f11411e7128d3ebbbe546df56fb110f0d9370 Mon Sep 17 00:00:00 2001 From: Masahito S -Date: Tue, 31 Dec 2024 21:49:13 +0900 -Subject: [PATCH] linux6.13.y-bore5.9.5 +Date: Mon, 20 Jan 2025 07:24:54 +0900 +Subject: [PATCH] linux6.13.y-bore5.9.6 --- include/linux/sched.h | 18 ++ @@ -10,12 +10,12 @@ Subject: [PATCH] linux6.13.y-bore5.9.5 kernel/Kconfig.hz | 17 ++ kernel/fork.c | 6 + kernel/sched/Makefile | 1 + - kernel/sched/bore.c | 446 +++++++++++++++++++++++++++++++++++++ + kernel/sched/bore.c | 443 +++++++++++++++++++++++++++++++++++++ kernel/sched/core.c | 6 + kernel/sched/debug.c | 61 ++++- kernel/sched/fair.c | 73 +++++- kernel/sched/sched.h | 9 + - 11 files changed, 689 insertions(+), 5 deletions(-) + 11 files changed, 686 insertions(+), 5 deletions(-) create mode 100644 include/linux/sched/bore.h create mode 100644 kernel/sched/bore.c @@ -57,7 +57,7 @@ index 66b311fbd5..43a00a7308 100644 diff --git a/include/linux/sched/bore.h b/include/linux/sched/bore.h new file mode 100644 -index 0000000000..a36947e12c +index 0000000000..a8faabc288 --- /dev/null +++ b/include/linux/sched/bore.h @@ -0,0 +1,40 @@ @@ -67,7 +67,7 @@ index 0000000000..a36947e12c + +#ifndef _LINUX_SCHED_BORE_H +#define _LINUX_SCHED_BORE_H -+#define SCHED_BORE_VERSION "5.9.5" ++#define SCHED_BORE_VERSION "5.9.6" + +#ifdef CONFIG_SCHED_BORE +extern u8 __read_mostly sched_bore; @@ -191,10 +191,10 @@ index 976092b7bd..293aad6754 100644 +obj-y += bore.o diff --git a/kernel/sched/bore.c b/kernel/sched/bore.c new file mode 100644 -index 0000000000..d55cd32b34 +index 0000000000..23aeb56494 --- /dev/null +++ b/kernel/sched/bore.c -@@ -0,0 +1,446 @@ +@@ -0,0 +1,443 @@ +/* + * Burst-Oriented Response Enhancer (BORE) CPU Scheduler + * Copyright (C) 2021-2024 Masahito Suzuki @@ -394,10 +394,9 @@ index 0000000000..d55cd32b34 + parent = parent->real_parent; + + bc = &parent->se.child_burst; -+ spin_lock(&bc->lock); ++ guard(spinlock)(&bc->lock); + if (burst_cache_expired(bc, now)) + update_child_burst_direct(parent, now); -+ spin_unlock(&bc->lock); + + return bc->score; +} @@ -460,11 +459,10 @@ index 0000000000..d55cd32b34 + } + + bc = &anc->se.child_burst; -+ spin_lock(&bc->lock); ++ guard(spinlock)(&bc->lock); + if (burst_cache_expired(bc, now)) + update_child_burst_topological( + anc, now, sched_burst_fork_atavistic - 1, &cnt, &sum); -+ spin_unlock(&bc->lock); + + return bc->score; +} @@ -485,10 +483,9 @@ index 0000000000..d55cd32b34 +static inline u8 inherit_burst_tg(struct task_struct *p, u64 now) { + struct task_struct *parent = rcu_dereference(p->group_leader); + struct sched_burst_cache *bc = &parent->se.group_burst; -+ spin_lock(&bc->lock); ++ guard(spinlock)(&bc->lock); + if (burst_cache_expired(bc, now)) + update_tg_burst(parent, now); -+ spin_unlock(&bc->lock); + + return bc->score; +} @@ -774,7 +771,7 @@ index a1be00a988..66fcb22900 100644 P(se.avg.runnable_sum); P(se.avg.util_sum); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 3e9ca38512..647b25840d 100644 +index 26958431de..9331896e5d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -55,6 +55,8 @@ @@ -841,17 +838,17 @@ index 3e9ca38512..647b25840d 100644 void __init sched_init_granularity(void) { -@@ -695,6 +718,9 @@ static s64 entity_lag(u64 avruntime, struct sched_entity *se) +@@ -697,6 +720,9 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) - vlag = avruntime - se->vruntime; + vlag = avg_vruntime(cfs_rq) - se->vruntime; limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); +#ifdef CONFIG_SCHED_BORE + limit >>= !!sched_bore; +#endif // CONFIG_SCHED_BORE - return clamp(vlag, -limit, limit); + se->vlag = clamp(vlag, -limit, limit); } -@@ -926,6 +952,10 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) +@@ -921,6 +947,10 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) * until it gets a new slice. See the HACK in set_next_entity(). */ if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline) @@ -862,7 +859,7 @@ index 3e9ca38512..647b25840d 100644 return curr; /* Pick the leftmost entity if it's eligible */ -@@ -984,6 +1014,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) +@@ -979,6 +1009,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) * Scheduling class statistics methods: */ #ifdef CONFIG_SMP @@ -870,7 +867,7 @@ index 3e9ca38512..647b25840d 100644 int sched_update_scaling(void) { unsigned int factor = get_update_sysctl_factor(); -@@ -995,6 +1026,7 @@ int sched_update_scaling(void) +@@ -990,6 +1021,7 @@ int sched_update_scaling(void) return 0; } @@ -878,7 +875,7 @@ index 3e9ca38512..647b25840d 100644 #endif #endif -@@ -1225,6 +1257,10 @@ static void update_curr(struct cfs_rq *cfs_rq) +@@ -1220,6 +1252,10 @@ static void update_curr(struct cfs_rq *cfs_rq) if (unlikely(delta_exec <= 0)) return; @@ -889,16 +886,16 @@ index 3e9ca38512..647b25840d 100644 curr->vruntime += calc_delta_fair(delta_exec, curr); resched = update_deadline(cfs_rq, curr); update_min_vruntime(cfs_rq); -@@ -3880,7 +3916,7 @@ static void reweight_eevdf(struct sched_entity *se, u64 avruntime, - se->deadline = avruntime + vslice; - } +@@ -3771,7 +3807,7 @@ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } + + static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags); -static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, +void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight) { bool curr = cfs_rq->curr == se; -@@ -5364,7 +5400,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) +@@ -5259,7 +5295,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) se->rel_deadline = 0; return; } @@ -911,7 +908,7 @@ index 3e9ca38512..647b25840d 100644 /* * When joining the competition; the existing tasks will be, * on average, halfway through their slice, as such start tasks -@@ -7240,6 +7280,15 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) +@@ -7135,6 +7175,15 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) util_est_dequeue(&rq->cfs, p); util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP); @@ -927,7 +924,7 @@ index 3e9ca38512..647b25840d 100644 if (dequeue_entities(rq, &p->se, flags) < 0) return false; -@@ -9053,16 +9102,25 @@ static void yield_task_fair(struct rq *rq) +@@ -8948,16 +8997,25 @@ static void yield_task_fair(struct rq *rq) /* * Are we the only task in the tree? */ @@ -953,7 +950,7 @@ index 3e9ca38512..647b25840d 100644 /* * Tell update_rq_clock() that we've just updated, * so we don't do microscopic update in schedule() -@@ -13114,6 +13172,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) +@@ -13009,6 +13067,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) static void task_fork_fair(struct task_struct *p) { set_task_max_allowed_capacity(p); @@ -963,7 +960,7 @@ index 3e9ca38512..647b25840d 100644 } /* -@@ -13224,6 +13285,10 @@ static void attach_task_cfs_rq(struct task_struct *p) +@@ -13119,6 +13180,10 @@ static void attach_task_cfs_rq(struct task_struct *p) static void switched_from_fair(struct rq *rq, struct task_struct *p) {