From 0e7abfd91ef5f628f9d9f6f370ff8dbe468f43ef Mon Sep 17 00:00:00 2001
From: Peter Jung <admin@ptr1337.dev>
Date: Mon, 20 Jan 2025 15:06:01 +0100
Subject: [PATCH] 6.13: Sync, update tlb-broadcast-series

Signed-off-by: Peter Jung <admin@ptr1337.dev>
---
 6.13/0001-amd-pstate.patch           |    4 +-
 6.13/0002-amd-tlb-broadcast.patch    |  873 +++++++++-----
 6.13/0003-bbr3.patch                 |    4 +-
 6.13/0004-cachy.patch                |  477 +++++++-
 6.13/0005-crypto.patch               |    4 +-
 6.13/0006-fixes.patch                |  184 ++-
 6.13/0007-itmt-core-ranking.patch    |   20 +-
 6.13/0008-ntsync.patch               |    8 +-
 6.13/0009-perf-per-core.patch        |    4 +-
 6.13/0010-pksm.patch                 |    4 +-
 6.13/0011-t2.patch                   |    8 +-
 6.13/0012-zstd.patch                 |    4 +-
 6.13/all/0001-cachyos-base-all.patch | 1590 ++++++++++++++++++--------
 6.13/sched-dev/0001-bore-cachy.patch | 1030 -----------------
 6.13/sched-dev/0001-bore.patch       | 1005 ----------------
 6.13/sched/0001-bore-cachy.patch     |   59 +-
 6.13/sched/0001-bore.patch           |   59 +-
 17 files changed, 2290 insertions(+), 3047 deletions(-)
 delete mode 100644 6.13/sched-dev/0001-bore-cachy.patch
 delete mode 100644 6.13/sched-dev/0001-bore.patch

diff --git a/6.13/0001-amd-pstate.patch b/6.13/0001-amd-pstate.patch
index e095f0e4..e100c061 100644
--- a/6.13/0001-amd-pstate.patch
+++ b/6.13/0001-amd-pstate.patch
@@ -1,6 +1,6 @@
-From 2af576964728ca6af63da3c61dae669b5ae945c7 Mon Sep 17 00:00:00 2001
+From 1ec94c7b86986796d5d14135302e81dd3ddbe223 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:37:09 +0100
+Date: Mon, 20 Jan 2025 13:21:23 +0100
 Subject: [PATCH 01/12] amd-pstate
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
diff --git a/6.13/0002-amd-tlb-broadcast.patch b/6.13/0002-amd-tlb-broadcast.patch
index 6fa53f3f..070bd8f0 100644
--- a/6.13/0002-amd-tlb-broadcast.patch
+++ b/6.13/0002-amd-tlb-broadcast.patch
@@ -1,39 +1,41 @@
-From 1d6b426b59b09163dbcaac857551295ad4b343d5 Mon Sep 17 00:00:00 2001
+From b74b9b0459100443f73ce718d0191bf58d6cb4b4 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:51:04 +0100
+Date: Mon, 20 Jan 2025 13:21:35 +0100
 Subject: [PATCH 02/12] amd-tlb-broadcast
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
 ---
- arch/x86/Kconfig                      |   2 +-
- arch/x86/hyperv/mmu.c                 |   1 -
- arch/x86/include/asm/cpufeatures.h    |   1 +
- arch/x86/include/asm/invlpgb.h        |  93 ++++++
- arch/x86/include/asm/mmu.h            |   6 +
- arch/x86/include/asm/mmu_context.h    |  12 +
- arch/x86/include/asm/paravirt.h       |   5 -
- arch/x86/include/asm/paravirt_types.h |   2 -
- arch/x86/include/asm/tlbbatch.h       |   1 +
- arch/x86/include/asm/tlbflush.h       |  31 +-
- arch/x86/kernel/cpu/amd.c             |  16 ++
- arch/x86/kernel/kvm.c                 |   1 -
- arch/x86/kernel/paravirt.c            |   6 -
- arch/x86/kernel/setup.c               |   4 +
- arch/x86/mm/pgtable.c                 |  16 +-
- arch/x86/mm/tlb.c                     | 393 +++++++++++++++++++++++++-
- arch/x86/xen/mmu_pv.c                 |   1 -
- mm/memory.c                           |   1 -
- mm/mmap.c                             |   2 -
- mm/swap_state.c                       |   1 -
- mm/vma.c                              |   2 -
- 21 files changed, 541 insertions(+), 56 deletions(-)
+ arch/x86/Kconfig                       |   2 +-
+ arch/x86/Kconfig.cpu                   |   5 +
+ arch/x86/hyperv/mmu.c                  |   1 -
+ arch/x86/include/asm/cpufeatures.h     |   1 +
+ arch/x86/include/asm/invlpgb.h         | 103 +++++
+ arch/x86/include/asm/mmu.h             |   6 +
+ arch/x86/include/asm/mmu_context.h     |  14 +
+ arch/x86/include/asm/msr-index.h       |   2 +
+ arch/x86/include/asm/paravirt.h        |   5 -
+ arch/x86/include/asm/paravirt_types.h  |   2 -
+ arch/x86/include/asm/tlbbatch.h        |   1 +
+ arch/x86/include/asm/tlbflush.h        |  92 ++++-
+ arch/x86/kernel/cpu/amd.c              |  12 +
+ arch/x86/kernel/kvm.c                  |   1 -
+ arch/x86/kernel/paravirt.c             |   6 -
+ arch/x86/mm/pgtable.c                  |  16 +-
+ arch/x86/mm/tlb.c                      | 496 +++++++++++++++++++++++--
+ arch/x86/xen/mmu_pv.c                  |   1 -
+ mm/memory.c                            |   1 -
+ mm/mmap.c                              |   2 -
+ mm/swap_state.c                        |   1 -
+ mm/vma.c                               |   2 -
+ tools/arch/x86/include/asm/msr-index.h |   2 +
+ 23 files changed, 695 insertions(+), 79 deletions(-)
  create mode 100644 arch/x86/include/asm/invlpgb.h
 
 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 9d7bd0ae48c4..e8743f8c9fd0 100644
+index ef6cfea9df73..1f824dcab4dc 100644
 --- a/arch/x86/Kconfig
 +++ b/arch/x86/Kconfig
-@@ -274,7 +274,7 @@ config X86
+@@ -273,7 +273,7 @@ config X86
  	select HAVE_PCI
  	select HAVE_PERF_REGS
  	select HAVE_PERF_USER_STACK_DUMP
@@ -42,6 +44,29 @@ index 9d7bd0ae48c4..e8743f8c9fd0 100644
  	select MMU_GATHER_MERGE_VMAS
  	select HAVE_POSIX_CPU_TIMERS_TASK_WORK
  	select HAVE_REGS_AND_STACK_ACCESS_API
+diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
+index 2a7279d80460..bacdc502903f 100644
+--- a/arch/x86/Kconfig.cpu
++++ b/arch/x86/Kconfig.cpu
+@@ -395,6 +395,10 @@ config X86_VMX_FEATURE_NAMES
+ 	def_bool y
+ 	depends on IA32_FEAT_CTL
+ 
++config X86_BROADCAST_TLB_FLUSH
++	def_bool y
++	depends on CPU_SUP_AMD
++
+ menuconfig PROCESSOR_SELECT
+ 	bool "Supported processor vendors" if EXPERT
+ 	help
+@@ -431,6 +435,7 @@ config CPU_SUP_CYRIX_32
+ config CPU_SUP_AMD
+ 	default y
+ 	bool "Support AMD processors" if PROCESSOR_SELECT
++	select X86_BROADCAST_TLB_FLUSH
+ 	help
+ 	  This enables detection, tunings and quirks for AMD processors
+ 
 diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
 index 1cc113200ff5..cbe6c71e17c1 100644
 --- a/arch/x86/hyperv/mmu.c
@@ -53,27 +78,28 @@ index 1cc113200ff5..cbe6c71e17c1 100644
 -	pv_ops.mmu.tlb_remove_table = tlb_remove_table;
  }
 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
-index 645aa360628d..742c138d011a 100644
+index 645aa360628d..989e4c9cad2e 100644
 --- a/arch/x86/include/asm/cpufeatures.h
 +++ b/arch/x86/include/asm/cpufeatures.h
 @@ -338,6 +338,7 @@
  #define X86_FEATURE_CLZERO		(13*32+ 0) /* "clzero" CLZERO instruction */
  #define X86_FEATURE_IRPERF		(13*32+ 1) /* "irperf" Instructions Retired Count */
  #define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */
-+#define X86_FEATURE_INVLPGB		(13*32+ 3) /* "invlpgb" INVLPGB instruction */
++#define X86_FEATURE_INVLPGB		(13*32+ 3) /* INVLPGB and TLBSYNC instruction supported. */
  #define X86_FEATURE_RDPRU		(13*32+ 4) /* "rdpru" Read processor register at user level */
  #define X86_FEATURE_WBNOINVD		(13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */
  #define X86_FEATURE_AMD_IBPB		(13*32+12) /* Indirect Branch Prediction Barrier */
 diff --git a/arch/x86/include/asm/invlpgb.h b/arch/x86/include/asm/invlpgb.h
 new file mode 100644
-index 000000000000..2669ebfffe81
+index 000000000000..418402535319
 --- /dev/null
 +++ b/arch/x86/include/asm/invlpgb.h
-@@ -0,0 +1,93 @@
+@@ -0,0 +1,103 @@
 +/* SPDX-License-Identifier: GPL-2.0 */
 +#ifndef _ASM_X86_INVLPGB
 +#define _ASM_X86_INVLPGB
 +
++#include <linux/kernel.h>
 +#include <vdso/bits.h>
 +
 +/*
@@ -85,21 +111,31 @@ index 000000000000..2669ebfffe81
 + * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from
 + * this CPU have completed.
 + */
-+static inline void __invlpgb(unsigned long asid, unsigned long pcid, unsigned long addr,
-+			    int extra_count, bool pmd_stride, unsigned long flags)
++static inline void __invlpgb(unsigned long asid, unsigned long pcid,
++			     unsigned long addr, u16 extra_count,
++			     bool pmd_stride, unsigned long flags)
 +{
-+	u64 rax = addr | flags;
-+	u32 ecx = (pmd_stride << 31) | extra_count;
 +	u32 edx = (pcid << 16) | asid;
++	u32 ecx = (pmd_stride << 31) | extra_count;
++	u64 rax = addr | flags;
 +
-+	asm volatile("invlpgb" : : "a" (rax), "c" (ecx), "d" (edx));
++	/* INVLPGB; supported in binutils >= 2.36. */
++	asm volatile(".byte 0x0f, 0x01, 0xfe" : : "a" (rax), "c" (ecx), "d" (edx));
++}
++
++/* Wait for INVLPGB originated by this CPU to complete. */
++static inline void tlbsync(void)
++{
++	cant_migrate();
++	/* TLBSYNC: supported in binutils >= 0.36. */
++	asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
 +}
 +
 +/*
 + * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
 + * of the three. For example:
 + * - INVLPGB_VA | INVLPGB_INCLUDE_GLOBAL: invalidate all TLB entries at the address
-+ * - INVLPGB_PCID:              	  invalidate all TLB entries matching the PCID
++ * - INVLPGB_PCID:			  invalidate all TLB entries matching the PCID
 + *
 + * The first can be used to invalidate (kernel) mappings at a particular
 + * address across all processes.
@@ -118,22 +154,25 @@ index 000000000000..2669ebfffe81
 +				      unsigned long addr)
 +{
 +	__invlpgb(0, pcid, addr, 0, 0, INVLPGB_PCID | INVLPGB_VA);
++	tlbsync();
 +}
 +
-+static inline void invlpgb_flush_user_nr(unsigned long pcid, unsigned long addr,
-+					 int nr, bool pmd_stride)
++static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
++						unsigned long addr,
++						u16 nr,
++						bool pmd_stride,
++						bool freed_tables)
 +{
-+	__invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA | INVLPGB_FINAL_ONLY);
-+}
++	unsigned long flags = INVLPGB_PCID | INVLPGB_VA;
 +
-+/* Flush all mappings for a given ASID, not including globals. */
-+static inline void invlpgb_flush_single_asid(unsigned long asid)
-+{
-+	__invlpgb(asid, 0, 0, 0, 0, INVLPGB_ASID);
++	if (!freed_tables)
++		flags |= INVLPGB_FINAL_ONLY;
++
++	__invlpgb(0, pcid, addr, nr - 1, pmd_stride, flags);
 +}
 +
 +/* Flush all mappings for a given PCID, not including globals. */
-+static inline void invlpgb_flush_single_pcid(unsigned long pcid)
++static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
 +{
 +	__invlpgb(0, pcid, 0, 0, 0, INVLPGB_PCID);
 +}
@@ -142,10 +181,11 @@ index 000000000000..2669ebfffe81
 +static inline void invlpgb_flush_all(void)
 +{
 +	__invlpgb(0, 0, 0, 0, 0, INVLPGB_INCLUDE_GLOBAL);
++	tlbsync();
 +}
 +
 +/* Flush addr, including globals, for all PCIDs. */
-+static inline void invlpgb_flush_addr(unsigned long addr, int nr)
++static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
 +{
 +	__invlpgb(0, 0, addr, nr - 1, 0, INVLPGB_INCLUDE_GLOBAL);
 +}
@@ -154,69 +194,86 @@ index 000000000000..2669ebfffe81
 +static inline void invlpgb_flush_all_nonglobals(void)
 +{
 +	__invlpgb(0, 0, 0, 0, 0, 0);
-+}
-+
-+/* Wait for INVLPGB originated by this CPU to complete. */
-+static inline void tlbsync(void)
-+{
-+	asm volatile("tlbsync");
++	tlbsync();
 +}
 +
 +#endif /* _ASM_X86_INVLPGB */
 diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
-index ce4677b8b735..83d0986295d3 100644
+index ce4677b8b735..51f25d38de86 100644
 --- a/arch/x86/include/asm/mmu.h
 +++ b/arch/x86/include/asm/mmu.h
-@@ -46,6 +46,12 @@ typedef struct {
- 	unsigned long flags;
+@@ -67,6 +67,12 @@ typedef struct {
+ 	u16 pkey_allocation_map;
+ 	s16 execute_only_pkey;
  #endif
- 
-+#ifdef CONFIG_CPU_SUP_AMD
-+	struct list_head broadcast_asid_list;
-+	u16 broadcast_asid;
++
++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
++	u16 global_asid;
 +	bool asid_transition;
 +#endif
 +
- #ifdef CONFIG_ADDRESS_MASKING
- 	/* Active LAM mode:  X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */
- 	unsigned long lam_cr3_mask;
+ } mm_context_t;
+ 
+ #define INIT_MM_CONTEXT(mm)						\
 diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
-index 2886cb668d7f..2c347b51d9b9 100644
+index 2886cb668d7f..65f50464b5c3 100644
 --- a/arch/x86/include/asm/mmu_context.h
 +++ b/arch/x86/include/asm/mmu_context.h
 @@ -139,6 +139,8 @@ static inline void mm_reset_untag_mask(struct mm_struct *mm)
  #define enter_lazy_tlb enter_lazy_tlb
  extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
  
-+extern void destroy_context_free_broadcast_asid(struct mm_struct *mm);
++extern void destroy_context_free_global_asid(struct mm_struct *mm);
 +
  /*
   * Init a new mm.  Used on mm copies, like at fork()
   * and on mm's that are brand-new, like at execve().
-@@ -160,6 +162,13 @@ static inline int init_new_context(struct task_struct *tsk,
+@@ -160,6 +162,14 @@ static inline int init_new_context(struct task_struct *tsk,
  		mm->context.execute_only_pkey = -1;
  	}
  #endif
 +
-+#ifdef CONFIG_CPU_SUP_AMD
-+	INIT_LIST_HEAD(&mm->context.broadcast_asid_list);
-+	mm->context.broadcast_asid = 0;
-+	mm->context.asid_transition = false;
++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
++	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
++		mm->context.global_asid = 0;
++		mm->context.asid_transition = false;
++	}
 +#endif
 +
  	mm_reset_untag_mask(mm);
  	init_new_context_ldt(mm);
  	return 0;
-@@ -169,6 +178,9 @@ static inline int init_new_context(struct task_struct *tsk,
+@@ -169,6 +179,10 @@ static inline int init_new_context(struct task_struct *tsk,
  static inline void destroy_context(struct mm_struct *mm)
  {
  	destroy_context_ldt(mm);
-+#ifdef CONFIG_CPU_SUP_AMD
-+	destroy_context_free_broadcast_asid(mm);
++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
++	if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
++		destroy_context_free_global_asid(mm);
 +#endif
  }
  
  extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index 3ae84c3b8e6d..dc1c1057f26e 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -25,6 +25,7 @@
+ #define _EFER_SVME		12 /* Enable virtualization */
+ #define _EFER_LMSLE		13 /* Long Mode Segment Limit Enable */
+ #define _EFER_FFXSR		14 /* Enable Fast FXSAVE/FXRSTOR */
++#define _EFER_TCE		15 /* Enable Translation Cache Extensions */
+ #define _EFER_AUTOIBRS		21 /* Enable Automatic IBRS */
+ 
+ #define EFER_SCE		(1<<_EFER_SCE)
+@@ -34,6 +35,7 @@
+ #define EFER_SVME		(1<<_EFER_SVME)
+ #define EFER_LMSLE		(1<<_EFER_LMSLE)
+ #define EFER_FFXSR		(1<<_EFER_FFXSR)
++#define EFER_TCE		(1<<_EFER_TCE)
+ #define EFER_AUTOIBRS		(1<<_EFER_AUTOIBRS)
+ 
+ /*
 diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
 index d4eb9e1d61b8..794ba3647c6c 100644
 --- a/arch/x86/include/asm/paravirt.h
@@ -259,7 +316,7 @@ index 1ad56eb3e8a8..f9a17edf63ad 100644
  
  #endif /* _ARCH_X86_TLBBATCH_H */
 diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index 69e79fff41b8..a2f9b7370717 100644
+index 69e79fff41b8..5490ca71e27f 100644
 --- a/arch/x86/include/asm/tlbflush.h
 +++ b/arch/x86/include/asm/tlbflush.h
 @@ -10,6 +10,7 @@
@@ -270,39 +327,100 @@ index 69e79fff41b8..a2f9b7370717 100644
  #include <asm/invpcid.h>
  #include <asm/pti.h>
  #include <asm/processor-flags.h>
-@@ -64,6 +65,23 @@ static inline void cr4_clear_bits(unsigned long mask)
-  */
- #define TLB_NR_DYN_ASIDS	6
- 
-+#ifdef CONFIG_CPU_SUP_AMD
-+#define is_dyn_asid(asid) (asid) < TLB_NR_DYN_ASIDS
-+#define is_broadcast_asid(asid) (asid) >= TLB_NR_DYN_ASIDS
-+#define in_asid_transition(info) (info->mm && info->mm->context.asid_transition)
-+#define mm_broadcast_asid(mm) (mm->context.broadcast_asid)
+@@ -183,6 +184,13 @@ static inline void cr4_init_shadow(void)
+ extern unsigned long mmu_cr4_features;
+ extern u32 *trampoline_cr4_features;
+ 
++/* How many pages can we invalidate with one INVLPGB. */
++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
++extern u16 invlpgb_count_max;
 +#else
-+#define is_dyn_asid(asid) true
-+#define is_broadcast_asid(asid) false
-+#define in_asid_transition(info) false
-+#define mm_broadcast_asid(mm) 0
++#define invlpgb_count_max 1
++#endif
++
+ extern void initialize_tlbstate_and_flush(void);
+ 
+ /*
+@@ -230,6 +238,78 @@ void flush_tlb_one_kernel(unsigned long addr);
+ void flush_tlb_multi(const struct cpumask *cpumask,
+ 		      const struct flush_tlb_info *info);
+ 
++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
++static inline bool is_dyn_asid(u16 asid)
++{
++	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
++		return true;
++
++	return asid < TLB_NR_DYN_ASIDS;
++}
++
++static inline bool is_global_asid(u16 asid)
++{
++	return !is_dyn_asid(asid);
++}
++
++static inline bool in_asid_transition(const struct flush_tlb_info *info)
++{
++	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
++		return false;
++
++	return info->mm && READ_ONCE(info->mm->context.asid_transition);
++}
 +
-+inline bool needs_broadcast_asid_reload(struct mm_struct *next, u16 prev_asid)
++static inline u16 mm_global_asid(struct mm_struct *mm)
++{
++	u16 asid;
++
++	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
++		return 0;
++
++	asid = READ_ONCE(mm->context.global_asid);
++
++	/* mm->context.global_asid is either 0, or a global ASID */
++	VM_WARN_ON_ONCE(is_dyn_asid(asid));
++
++	return asid;
++}
++#else
++static inline bool is_dyn_asid(u16 asid)
++{
++	return true;
++}
++
++static inline bool is_global_asid(u16 asid)
++{
++	return false;
++}
++
++static inline bool in_asid_transition(const struct flush_tlb_info *info)
 +{
 +	return false;
 +}
++
++static inline u16 mm_global_asid(struct mm_struct *mm)
++{
++	return 0;
++}
++
++static inline bool needs_global_asid_reload(struct mm_struct *next, u16 prev_asid)
++{
++	return false;
++}
++
++static inline void broadcast_tlb_flush(struct flush_tlb_info *info)
++{
++	VM_WARN_ON_ONCE(1);
++}
++
++static inline void consider_global_asid(struct mm_struct *mm)
++{
++}
 +#endif
 +
- struct tlb_context {
- 	u64 ctx_id;
- 	u64 tlb_gen;
-@@ -182,6 +200,7 @@ static inline void cr4_init_shadow(void)
- 
- extern unsigned long mmu_cr4_features;
- extern u32 *trampoline_cr4_features;
-+extern u16 invlpgb_count_max;
- 
- extern void initialize_tlbstate_and_flush(void);
- 
-@@ -277,21 +296,15 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+ #ifdef CONFIG_PARAVIRT
+ #include <asm/paravirt.h>
+ #endif
+@@ -277,21 +357,15 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
  	return atomic64_inc_return(&mm->context.tlb_gen);
  }
  
@@ -328,28 +446,38 @@ index 69e79fff41b8..a2f9b7370717 100644
  static inline bool pte_flags_need_flush(unsigned long oldflags,
  					unsigned long newflags,
 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
-index 79d2e17f6582..4dc42705aaca 100644
+index 79d2e17f6582..21076252a491 100644
 --- a/arch/x86/kernel/cpu/amd.c
 +++ b/arch/x86/kernel/cpu/amd.c
-@@ -1135,6 +1135,22 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
- 		tlb_lli_2m[ENTRIES] = eax & mask;
+@@ -29,6 +29,8 @@
  
- 	tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
+ #include "cpu.h"
+ 
++u16 invlpgb_count_max __ro_after_init;
 +
-+	if (c->extended_cpuid_level < 0x80000008)
-+		return;
+ static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
+ {
+ 	u32 gprs[8] = { 0 };
+@@ -1069,6 +1071,10 @@ static void init_amd(struct cpuinfo_x86 *c)
+ 
+ 	/* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */
+ 	clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
 +
-+	cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
++	/* Enable Translation Cache Extension */
++	if (cpu_feature_enabled(X86_FEATURE_TCE))
++		msr_set_bit(MSR_EFER, _EFER_TCE);
+ }
+ 
+ #ifdef CONFIG_X86_32
+@@ -1135,6 +1141,12 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
+ 		tlb_lli_2m[ENTRIES] = eax & mask;
+ 
+ 	tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
 +
 +	/* Max number of pages INVLPGB can invalidate in one shot */
-+	invlpgb_count_max = (edx & 0xffff) + 1;
-+
-+	/* If supported, enable translation cache extensions (TCE) */
-+	cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
-+	if (ecx & BIT(17)) {
-+		u64 msr = native_read_msr(MSR_EFER);;
-+		msr |= BIT(15);
-+		wrmsrl(MSR_EFER, msr);
++	if (boot_cpu_has(X86_FEATURE_INVLPGB)) {
++		cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
++		invlpgb_count_max = (edx & 0xffff) + 1;
 +	}
  }
  
@@ -390,21 +518,6 @@ index fec381533555..c019771e0123 100644
  
  	.mmu.exit_mmap		= paravirt_nop,
  	.mmu.notify_page_enc_status_changed	= paravirt_nop,
-diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
-index f1fea506e20f..6c4d08f8f7b1 100644
---- a/arch/x86/kernel/setup.c
-+++ b/arch/x86/kernel/setup.c
-@@ -138,6 +138,10 @@ __visible unsigned long mmu_cr4_features __ro_after_init;
- __visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE;
- #endif
- 
-+#ifdef CONFIG_CPU_SUP_AMD
-+u16 invlpgb_count_max __ro_after_init;
-+#endif
-+
- #ifdef CONFIG_IMA
- static phys_addr_t ima_kexec_buffer_phys;
- static size_t ima_kexec_buffer_size;
 diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
 index 5745a354a241..3dc4af1f7868 100644
 --- a/arch/x86/mm/pgtable.c
@@ -460,7 +573,7 @@ index 5745a354a241..3dc4af1f7868 100644
  #endif	/* CONFIG_PGTABLE_LEVELS > 4 */
  #endif	/* CONFIG_PGTABLE_LEVELS > 3 */
 diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index a2becb85bea7..0080175153ef 100644
+index a2becb85bea7..6449ac701c88 100644
 --- a/arch/x86/mm/tlb.c
 +++ b/arch/x86/mm/tlb.c
 @@ -74,13 +74,15 @@
@@ -482,120 +595,136 @@ index a2becb85bea7..0080175153ef 100644
   *         for KPTI each mm has two address spaces and thus needs two
   *         PCID values, but we can still do with a single ASID denomination
   *         for each mm. Corresponds to kPCID + 2048.
-@@ -225,6 +227,18 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
+@@ -225,6 +227,20 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
  		return;
  	}
  
 +	/*
-+	 * TLB consistency for this ASID is maintained with INVLPGB;
-+	 * TLB flushes happen even while the process isn't running.
++	 * TLB consistency for global ASIDs is maintained with broadcast TLB
++	 * flushing. The TLB is never outdated, and does not need flushing.
 +	 */
-+#ifdef CONFIG_CPU_SUP_AMD
-+	if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_broadcast_asid(next)) {
-+		*new_asid = mm_broadcast_asid(next);
-+		*need_flush = false;
-+		return;
++	if (IS_ENABLED(CONFIG_X86_BROADCAST_TLB_FLUSH) && static_cpu_has(X86_FEATURE_INVLPGB)) {
++		u16 global_asid = mm_global_asid(next);
++
++		if (global_asid) {
++			*new_asid = global_asid;
++			*need_flush = false;
++			return;
++		}
 +	}
-+#endif
 +
  	if (this_cpu_read(cpu_tlbstate.invalidate_other))
  		clear_asid_other();
  
-@@ -251,6 +265,245 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
+@@ -251,6 +267,290 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
  	*need_flush = true;
  }
  
-+#ifdef CONFIG_CPU_SUP_AMD
++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
 +/*
-+ * Logic for AMD INVLPGB support.
++ * Logic for broadcast TLB invalidation.
 + */
-+static DEFINE_RAW_SPINLOCK(broadcast_asid_lock);
-+static u16 last_broadcast_asid = TLB_NR_DYN_ASIDS;
-+static DECLARE_BITMAP(broadcast_asid_used, MAX_ASID_AVAILABLE) = { 0 };
-+static LIST_HEAD(broadcast_asid_list);
-+static int broadcast_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1;
++static DEFINE_RAW_SPINLOCK(global_asid_lock);
++static u16 last_global_asid = MAX_ASID_AVAILABLE;
++static DECLARE_BITMAP(global_asid_used, MAX_ASID_AVAILABLE) = { 0 };
++static DECLARE_BITMAP(global_asid_freed, MAX_ASID_AVAILABLE) = { 0 };
++static int global_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1;
 +
-+static void reset_broadcast_asid_space(void)
++static void reset_global_asid_space(void)
 +{
-+	mm_context_t *context;
-+
-+	lockdep_assert_held(&broadcast_asid_lock);
++	lockdep_assert_held(&global_asid_lock);
 +
 +	/*
-+	 * Flush once when we wrap around the ASID space, so we won't need
-+	 * to flush every time we allocate an ASID for boradcast flushing.
++	 * A global TLB flush guarantees that any stale entries from
++	 * previously freed global ASIDs get flushed from the TLB
++	 * everywhere, making these global ASIDs safe to reuse.
 +	 */
 +	invlpgb_flush_all_nonglobals();
-+	tlbsync();
 +
 +	/*
-+	 * Leave the currently used broadcast ASIDs set in the bitmap, since
-+	 * those cannot be reused before the next wraparound and flush..
++	 * Clear all the previously freed global ASIDs from the
++	 * broadcast_asid_used bitmap, now that the global TLB flush
++	 * has made them actually available for re-use.
 +	 */
-+	bitmap_clear(broadcast_asid_used, 0, MAX_ASID_AVAILABLE);
-+	list_for_each_entry(context, &broadcast_asid_list, broadcast_asid_list)
-+		__set_bit(context->broadcast_asid, broadcast_asid_used);
++	bitmap_andnot(global_asid_used, global_asid_used,
++			global_asid_freed, MAX_ASID_AVAILABLE);
++	bitmap_clear(global_asid_freed, 0, MAX_ASID_AVAILABLE);
 +
-+	last_broadcast_asid = TLB_NR_DYN_ASIDS;
++	/*
++	 * ASIDs 0-TLB_NR_DYN_ASIDS are used for CPU-local ASID
++	 * assignments, for tasks doing IPI based TLB shootdowns.
++	 * Restart the search from the start of the global ASID space.
++	 */
++	last_global_asid = TLB_NR_DYN_ASIDS;
 +}
 +
-+static u16 get_broadcast_asid(void)
++static u16 get_global_asid(void)
 +{
-+	lockdep_assert_held(&broadcast_asid_lock);
++	lockdep_assert_held(&global_asid_lock);
 +
 +	do {
-+		u16 start = last_broadcast_asid;
-+		u16 asid = find_next_zero_bit(broadcast_asid_used, MAX_ASID_AVAILABLE, start);
++		u16 start = last_global_asid;
++		u16 asid = find_next_zero_bit(global_asid_used, MAX_ASID_AVAILABLE, start);
 +
 +		if (asid >= MAX_ASID_AVAILABLE) {
-+			reset_broadcast_asid_space();
++			reset_global_asid_space();
 +			continue;
 +		}
 +
-+		/* Try claiming this broadcast ASID. */
-+		if (!test_and_set_bit(asid, broadcast_asid_used)) {
-+			last_broadcast_asid = asid;
-+			return asid;
-+		}
++		/* Claim this global ASID. */
++		__set_bit(asid, global_asid_used);
++		last_global_asid = asid;
++		global_asid_available--;
++		return asid;
 +	} while (1);
 +}
 +
 +/*
-+ * Returns true if the mm is transitioning from a CPU-local ASID to a broadcast
++ * Returns true if the mm is transitioning from a CPU-local ASID to a global
 + * (INVLPGB) ASID, or the other way around.
 + */
-+static bool needs_broadcast_asid_reload(struct mm_struct *next, u16 prev_asid)
++static bool needs_global_asid_reload(struct mm_struct *next, u16 prev_asid)
 +{
-+	u16 broadcast_asid = mm_broadcast_asid(next);
++	u16 global_asid = mm_global_asid(next);
 +
-+	if (broadcast_asid && prev_asid != broadcast_asid)
++	if (global_asid && prev_asid != global_asid)
 +		return true;
 +
-+	if (!broadcast_asid && is_broadcast_asid(prev_asid))
++	if (!global_asid && is_global_asid(prev_asid))
 +		return true;
 +
 +	return false;
 +}
 +
-+void destroy_context_free_broadcast_asid(struct mm_struct *mm)
++void destroy_context_free_global_asid(struct mm_struct *mm)
 +{
-+	if (!mm->context.broadcast_asid)
++	if (!mm->context.global_asid)
 +		return;
 +
-+	guard(raw_spinlock_irqsave)(&broadcast_asid_lock);
-+	mm->context.broadcast_asid = 0;
-+	list_del(&mm->context.broadcast_asid_list);
-+	broadcast_asid_available++;
++	guard(raw_spinlock_irqsave)(&global_asid_lock);
++
++	/* The global ASID can be re-used only after flush at wrap-around. */
++	__set_bit(mm->context.global_asid, global_asid_freed);
++
++	mm->context.global_asid = 0;
++	global_asid_available++;
 +}
 +
++/*
++ * Check whether a process is currently active on more than "threshold" CPUs.
++ * This is a cheap estimation on whether or not it may make sense to assign
++ * a global ASID to this process, and use broadcast TLB invalidation.
++ */
 +static bool mm_active_cpus_exceeds(struct mm_struct *mm, int threshold)
 +{
 +	int count = 0;
 +	int cpu;
 +
++	/* This quick check should eliminate most single threaded programs. */
 +	if (cpumask_weight(mm_cpumask(mm)) <= threshold)
 +		return false;
 +
++	/* Slower check to make sure. */
 +	for_each_cpu(cpu, mm_cpumask(mm)) {
 +		/* Skip the CPUs that aren't really running this process. */
 +		if (per_cpu(cpu_tlbstate.loaded_mm, cpu) != mm)
@@ -611,40 +740,56 @@ index a2becb85bea7..0080175153ef 100644
 +}
 +
 +/*
-+ * Assign a broadcast ASID to the current process, protecting against
++ * Assign a global ASID to the current process, protecting against
 + * races between multiple threads in the process.
 + */
-+static void use_broadcast_asid(struct mm_struct *mm)
++static void use_global_asid(struct mm_struct *mm)
 +{
-+	guard(raw_spinlock_irqsave)(&broadcast_asid_lock);
++	guard(raw_spinlock_irqsave)(&global_asid_lock);
 +
 +	/* This process is already using broadcast TLB invalidation. */
-+	if (mm->context.broadcast_asid)
++	if (mm->context.global_asid)
++		return;
++
++	/* The last global ASID was consumed while waiting for the lock. */
++	if (!global_asid_available)
 +		return;
 +
-+	mm->context.broadcast_asid = get_broadcast_asid();
-+	mm->context.asid_transition = true;
-+	list_add(&mm->context.broadcast_asid_list, &broadcast_asid_list);
-+	broadcast_asid_available--;
++	/*
++	 * The transition from IPI TLB flushing, with a dynamic ASID,
++	 * and broadcast TLB flushing, using a global ASID, uses memory
++	 * ordering for synchronization.
++	 *
++	 * While the process has threads still using a dynamic ASID,
++	 * TLB invalidation IPIs continue to get sent.
++	 *
++	 * This code sets asid_transition first, before assigning the
++	 * global ASID.
++	 *
++	 * The TLB flush code will only verify the ASID transition
++	 * after it has seen the new global ASID for the process.
++	 */
++	WRITE_ONCE(mm->context.asid_transition, true);
++	WRITE_ONCE(mm->context.global_asid, get_global_asid());
 +}
 +
 +/*
-+ * Figure out whether to assign a broadcast (global) ASID to a process.
-+ * We vary the threshold by how empty or full broadcast ASID space is.
++ * Figure out whether to assign a global ASID to a process.
++ * We vary the threshold by how empty or full global ASID space is.
 + * 1/4 full: >= 4 active threads
 + * 1/2 full: >= 8 active threads
 + * 3/4 full: >= 16 active threads
 + * 7/8 full: >= 32 active threads
 + * etc
 + *
-+ * This way we should never exhaust the broadcast ASID space, even on very
++ * This way we should never exhaust the global ASID space, even on very
 + * large systems, and the processes with the largest number of active
 + * threads should be able to use broadcast TLB invalidation.
 + */
 +#define HALFFULL_THRESHOLD 8
-+static bool meets_broadcast_asid_threshold(struct mm_struct *mm)
++static bool meets_global_asid_threshold(struct mm_struct *mm)
 +{
-+	int avail = broadcast_asid_available;
++	int avail = global_asid_available;
 +	int threshold = HALFFULL_THRESHOLD;
 +
 +	if (!avail)
@@ -664,7 +809,7 @@ index a2becb85bea7..0080175153ef 100644
 +	return mm_active_cpus_exceeds(mm, threshold);
 +}
 +
-+static void count_tlb_flush(struct mm_struct *mm)
++static void consider_global_asid(struct mm_struct *mm)
 +{
 +	if (!static_cpu_has(X86_FEATURE_INVLPGB))
 +		return;
@@ -673,43 +818,54 @@ index a2becb85bea7..0080175153ef 100644
 +	if ((current->pid & 0x1f) != (jiffies & 0x1f))
 +		return;
 +
-+	if (meets_broadcast_asid_threshold(mm))
-+		use_broadcast_asid(mm);
++	if (meets_global_asid_threshold(mm))
++		use_global_asid(mm);
 +}
 +
 +static void finish_asid_transition(struct flush_tlb_info *info)
 +{
 +	struct mm_struct *mm = info->mm;
-+	int bc_asid = mm_broadcast_asid(mm);
++	int bc_asid = mm_global_asid(mm);
 +	int cpu;
 +
-+	if (!mm->context.asid_transition)
++	if (!READ_ONCE(mm->context.asid_transition))
 +		return;
 +
 +	for_each_cpu(cpu, mm_cpumask(mm)) {
++		/*
++		 * The remote CPU is context switching. Wait for that to
++		 * finish, to catch the unlikely case of it switching to
++		 * the target mm with an out of date ASID.
++		 */
++		while (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) == LOADED_MM_SWITCHING)
++			cpu_relax();
++
 +		if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) != mm)
 +			continue;
 +
 +		/*
-+		 * If at least one CPU is not using the broadcast ASID yet,
++		 * If at least one CPU is not using the global ASID yet,
 +		 * send a TLB flush IPI. The IPI should cause stragglers
 +		 * to transition soon.
++		 *
++		 * This can race with the CPU switching to another task;
++		 * that results in a (harmless) extra IPI.
 +		 */
-+		if (per_cpu(cpu_tlbstate.loaded_mm_asid, cpu) != bc_asid) {
++		if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm_asid, cpu)) != bc_asid) {
 +			flush_tlb_multi(mm_cpumask(info->mm), info);
 +			return;
 +		}
 +	}
 +
-+	/* All the CPUs running this process are using the broadcast ASID. */
-+	mm->context.asid_transition = 0;
++	/* All the CPUs running this process are using the global ASID. */
++	WRITE_ONCE(mm->context.asid_transition, false);
 +}
 +
 +static void broadcast_tlb_flush(struct flush_tlb_info *info)
 +{
 +	bool pmd = info->stride_shift == PMD_SHIFT;
 +	unsigned long maxnr = invlpgb_count_max;
-+	unsigned long asid = info->mm->context.broadcast_asid;
++	unsigned long asid = info->mm->context.global_asid;
 +	unsigned long addr = info->start;
 +	unsigned long nr;
 +
@@ -717,12 +873,17 @@ index a2becb85bea7..0080175153ef 100644
 +	if (info->stride_shift > PMD_SHIFT)
 +		maxnr = 1;
 +
-+	if (info->end == TLB_FLUSH_ALL || info->freed_tables) {
-+		invlpgb_flush_single_pcid(kern_pcid(asid));
++	/*
++	 * TLB flushes with INVLPGB are kicked off asynchronously.
++	 * The inc_mm_tlb_gen() guarantees page table updates are done
++	 * before these TLB flushes happen.
++	 */
++	if (info->end == TLB_FLUSH_ALL) {
++		invlpgb_flush_single_pcid_nosync(kern_pcid(asid));
 +		/* Do any CPUs supporting INVLPGB need PTI? */
 +		if (static_cpu_has(X86_FEATURE_PTI))
-+			invlpgb_flush_single_pcid(user_pcid(asid));
-+	} else do {
++			invlpgb_flush_single_pcid_nosync(user_pcid(asid));
++	} else for (; addr < info->end; addr += nr << info->stride_shift) {
 +		/*
 +		 * Calculate how many pages can be flushed at once; if the
 +		 * remainder of the range is less than one page, flush one.
@@ -730,43 +891,42 @@ index a2becb85bea7..0080175153ef 100644
 +		nr = min(maxnr, (info->end - addr) >> info->stride_shift);
 +		nr = max(nr, 1);
 +
-+		invlpgb_flush_user_nr(kern_pcid(asid), addr, nr, pmd);
++		invlpgb_flush_user_nr_nosync(kern_pcid(asid), addr, nr, pmd, info->freed_tables);
 +		/* Do any CPUs supporting INVLPGB need PTI? */
 +		if (static_cpu_has(X86_FEATURE_PTI))
-+			invlpgb_flush_user_nr(user_pcid(asid), addr, nr, pmd);
-+		addr += nr << info->stride_shift;
-+	} while (addr < info->end);
++			invlpgb_flush_user_nr_nosync(user_pcid(asid), addr, nr, pmd, info->freed_tables);
++	}
 +
 +	finish_asid_transition(info);
 +
 +	/* Wait for the INVLPGBs kicked off above to finish. */
 +	tlbsync();
 +}
-+#endif /* CONFIG_CPU_SUP_AMD */
++#endif /* CONFIG_X86_BROADCAST_TLB_FLUSH */
 +
  /*
   * Given an ASID, flush the corresponding user ASID.  We can delay this
   * until the next time we switch to it.
-@@ -556,8 +809,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
+@@ -556,8 +856,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
  	 */
  	if (prev == next) {
  		/* Not actually switching mm's */
 -		VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
 -			   next->context.ctx_id);
-+		if (is_dyn_asid(prev_asid))
-+			VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
-+				   next->context.ctx_id);
++		VM_WARN_ON(is_dyn_asid(prev_asid) &&
++				this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
++				next->context.ctx_id);
  
  		/*
  		 * If this races with another thread that enables lam, 'new_lam'
-@@ -573,6 +827,23 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
+@@ -573,6 +874,23 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
  				 !cpumask_test_cpu(cpu, mm_cpumask(next))))
  			cpumask_set_cpu(cpu, mm_cpumask(next));
  
 +		/*
 +		 * Check if the current mm is transitioning to a new ASID.
 +		 */
-+		if (needs_broadcast_asid_reload(next, prev_asid)) {
++		if (needs_global_asid_reload(next, prev_asid)) {
 +			next_tlb_gen = atomic64_read(&next->context.tlb_gen);
 +
 +			choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
@@ -777,24 +937,44 @@ index a2becb85bea7..0080175153ef 100644
 +		 * Broadcast TLB invalidation keeps this PCID up to date
 +		 * all the time.
 +		 */
-+		if (is_broadcast_asid(prev_asid))
++		if (is_global_asid(prev_asid))
 +			return;
 +
  		/*
  		 * If the CPU is not in lazy TLB mode, we are just switching
  		 * from one thread in a process to another thread in the same
-@@ -629,8 +900,10 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
- 		barrier();
+@@ -606,6 +924,13 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
+ 		 */
+ 		cond_mitigation(tsk);
+ 
++		/*
++		 * Let nmi_uaccess_okay() and finish_asid_transition()
++		 * know that we're changing CR3.
++		 */
++		this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
++		barrier();
++
+ 		/*
+ 		 * Stop remote flushes for the previous mm.
+ 		 * Skip kernel threads; we never send init_mm TLB flushing IPIs,
+@@ -623,14 +948,12 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
+ 		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
+ 
+ 		choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+-
+-		/* Let nmi_uaccess_okay() know that we're changing CR3. */
+-		this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
+-		barrier();
  	}
  
 +reload_tlb:
  	new_lam = mm_lam_cr3_mask(next);
  	if (need_flush) {
-+		VM_BUG_ON(is_broadcast_asid(new_asid));
++		VM_WARN_ON_ONCE(is_global_asid(new_asid));
  		this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
  		this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
  		load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
-@@ -749,7 +1022,7 @@ static void flush_tlb_func(void *info)
+@@ -749,7 +1072,7 @@ static void flush_tlb_func(void *info)
  	const struct flush_tlb_info *f = info;
  	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
  	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
@@ -803,24 +983,24 @@ index a2becb85bea7..0080175153ef 100644
  	bool local = smp_processor_id() == f->initiating_cpu;
  	unsigned long nr_invalidate = 0;
  	u64 mm_tlb_gen;
-@@ -769,6 +1042,16 @@ static void flush_tlb_func(void *info)
+@@ -769,6 +1092,16 @@ static void flush_tlb_func(void *info)
  	if (unlikely(loaded_mm == &init_mm))
  		return;
  
-+	/* Reload the ASID if transitioning into or out of a broadcast ASID */
-+	if (needs_broadcast_asid_reload(loaded_mm, loaded_mm_asid)) {
++	/* Reload the ASID if transitioning into or out of a global ASID */
++	if (needs_global_asid_reload(loaded_mm, loaded_mm_asid)) {
 +		switch_mm_irqs_off(NULL, loaded_mm, NULL);
 +		loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
 +	}
 +
 +	/* Broadcast ASIDs are always kept up to date with INVLPGB. */
-+	if (is_broadcast_asid(loaded_mm_asid))
++	if (is_global_asid(loaded_mm_asid))
 +		return;
 +
  	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
  		   loaded_mm->context.ctx_id);
  
-@@ -786,6 +1069,8 @@ static void flush_tlb_func(void *info)
+@@ -786,6 +1119,8 @@ static void flush_tlb_func(void *info)
  		return;
  	}
  
@@ -829,32 +1009,7 @@ index a2becb85bea7..0080175153ef 100644
  	if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID &&
  		     f->new_tlb_gen <= local_tlb_gen)) {
  		/*
-@@ -825,7 +1110,7 @@ static void flush_tlb_func(void *info)
- 	 *
- 	 * The only question is whether to do a full or partial flush.
- 	 *
--	 * We do a partial flush if requested and two extra conditions
-+	 * We do a partial flush if requested and three extra conditions
- 	 * are met:
- 	 *
- 	 * 1. f->new_tlb_gen == local_tlb_gen + 1.  We have an invariant that
-@@ -852,10 +1137,14 @@ static void flush_tlb_func(void *info)
- 	 *    date.  By doing a full flush instead, we can increase
- 	 *    local_tlb_gen all the way to mm_tlb_gen and we can probably
- 	 *    avoid another flush in the very near future.
-+	 *
-+	 * 3. No page tables were freed. If page tables were freed, a full
-+	 *    flush ensures intermediate translations in the TLB get flushed.
- 	 */
- 	if (f->end != TLB_FLUSH_ALL &&
- 	    f->new_tlb_gen == local_tlb_gen + 1 &&
--	    f->new_tlb_gen == mm_tlb_gen) {
-+	    f->new_tlb_gen == mm_tlb_gen &&
-+	    !f->freed_tables) {
- 		/* Partial flush */
- 		unsigned long addr = f->start;
- 
-@@ -926,7 +1215,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask,
+@@ -926,7 +1261,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask,
  	 * up on the new contents of what used to be page tables, while
  	 * doing a speculative memory access.
  	 */
@@ -863,102 +1018,155 @@ index a2becb85bea7..0080175153ef 100644
  		on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
  	else
  		on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func,
-@@ -998,14 +1287,18 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+@@ -981,6 +1316,15 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
+ 	info->new_tlb_gen	= new_tlb_gen;
+ 	info->initiating_cpu	= smp_processor_id();
+ 
++	/*
++	 * If the number of flushes is so large that a full flush
++	 * would be faster, do a full flush.
++	 */
++	if ((end - start) >> stride_shift > tlb_single_page_flush_ceiling) {
++		info->start = 0;
++		info->end = TLB_FLUSH_ALL;
++	}
++
+ 	return info;
+ }
+ 
+@@ -998,17 +1342,8 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
  				bool freed_tables)
  {
  	struct flush_tlb_info *info;
-+	unsigned long threshold = tlb_single_page_flush_ceiling;
++	int cpu = get_cpu();
  	u64 new_tlb_gen;
- 	int cpu;
- 
-+	if (static_cpu_has(X86_FEATURE_INVLPGB))
-+		threshold *= invlpgb_count_max;
-+
- 	cpu = get_cpu();
- 
- 	/* Should we flush just the requested range? */
- 	if ((end == TLB_FLUSH_ALL) ||
+-	int cpu;
+-
+-	cpu = get_cpu();
+-
+-	/* Should we flush just the requested range? */
+-	if ((end == TLB_FLUSH_ALL) ||
 -	    ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) {
-+	    ((end - start) >> stride_shift) > threshold) {
- 		start = 0;
- 		end = TLB_FLUSH_ALL;
- 	}
-@@ -1021,8 +1314,11 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+-		start = 0;
+-		end = TLB_FLUSH_ALL;
+-	}
+ 
+ 	/* This is also a barrier that synchronizes with switch_mm(). */
+ 	new_tlb_gen = inc_mm_tlb_gen(mm);
+@@ -1021,8 +1356,11 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
  	 * a local TLB flush is needed. Optimize this use-case by calling
  	 * flush_tlb_func_local() directly in this case.
  	 */
 -	if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
-+	if (IS_ENABLED(CONFIG_CPU_SUP_AMD) && mm_broadcast_asid(mm)) {
++	if (mm_global_asid(mm)) {
 +		broadcast_tlb_flush(info);
 +	} else if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
  		flush_tlb_multi(mm_cpumask(mm), info);
-+		count_tlb_flush(mm);
++		consider_global_asid(mm);
  	} else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
  		lockdep_assert_irqs_enabled();
  		local_irq_disable();
-@@ -1045,9 +1341,41 @@ static void do_flush_tlb_all(void *info)
+@@ -1036,6 +1374,19 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ }
+ 
+ 
++static bool broadcast_flush_tlb_all(void)
++{
++	if (!IS_ENABLED(CONFIG_X86_BROADCAST_TLB_FLUSH))
++		return false;
++
++	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
++		return false;
++
++	guard(preempt)();
++	invlpgb_flush_all();
++	return true;
++}
++
+ static void do_flush_tlb_all(void *info)
+ {
+ 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+@@ -1044,10 +1395,36 @@ static void do_flush_tlb_all(void *info)
+ 
  void flush_tlb_all(void)
  {
- 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
-+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
-+		guard(preempt)();
-+		invlpgb_flush_all();
-+		tlbsync();
++	if (broadcast_flush_tlb_all())
 +		return;
-+	}
+ 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
  	on_each_cpu(do_flush_tlb_all, NULL, 1);
  }
  
-+static void broadcast_kernel_range_flush(unsigned long start, unsigned long end)
++static bool broadcast_kernel_range_flush(struct flush_tlb_info *info)
 +{
 +	unsigned long addr;
-+	unsigned long maxnr = invlpgb_count_max;
-+	unsigned long threshold = tlb_single_page_flush_ceiling * maxnr;
++	unsigned long nr;
 +
-+	/*
-+	 * TLBSYNC only waits for flushes originating on the same CPU.
-+	 * Disabling migration allows us to wait on all flushes.
-+	 */
-+	guard(preempt)();
++	if (!IS_ENABLED(CONFIG_X86_BROADCAST_TLB_FLUSH))
++		return false;
++
++	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
++		return false;
 +
-+	if (end == TLB_FLUSH_ALL ||
-+	    (end - start) > threshold << PAGE_SHIFT) {
++	if (info->end == TLB_FLUSH_ALL) {
 +		invlpgb_flush_all();
-+	} else {
-+		unsigned long nr;
-+		for (addr = start; addr < end; addr += nr << PAGE_SHIFT) {
-+			nr = min((end - addr) >> PAGE_SHIFT, maxnr);
-+			invlpgb_flush_addr(addr, nr);
-+		}
++		return true;
 +	}
 +
++	for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) {
++		nr = min((info->end - addr) >> PAGE_SHIFT, invlpgb_count_max);
++		invlpgb_flush_addr_nosync(addr, nr);
++	}
 +	tlbsync();
++	return true;
 +}
 +
  static void do_kernel_range_flush(void *info)
  {
  	struct flush_tlb_info *f = info;
-@@ -1060,6 +1388,11 @@ static void do_kernel_range_flush(void *info)
+@@ -1060,22 +1437,21 @@ static void do_kernel_range_flush(void *info)
  
  void flush_tlb_kernel_range(unsigned long start, unsigned long end)
  {
-+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
-+		broadcast_kernel_range_flush(start, end);
-+		return;
-+	}
-+
- 	/* Balance as user space task's flush, a bit conservative */
- 	if (end == TLB_FLUSH_ALL ||
- 	    (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
-@@ -1244,7 +1577,6 @@ EXPORT_SYMBOL_GPL(__flush_tlb_all);
- void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
- {
- 	struct flush_tlb_info *info;
--
+-	/* Balance as user space task's flush, a bit conservative */
+-	if (end == TLB_FLUSH_ALL ||
+-	    (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
+-		on_each_cpu(do_flush_tlb_all, NULL, 1);
+-	} else {
+-		struct flush_tlb_info *info;
++	struct flush_tlb_info *info;
+ 
+-		preempt_disable();
+-		info = get_flush_tlb_info(NULL, start, end, 0, false,
+-					  TLB_GENERATION_INVALID);
++	guard(preempt)();
+ 
++	info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
++				  TLB_GENERATION_INVALID);
++
++	if (broadcast_kernel_range_flush(info))
++		; /* Fall through. */
++	else if (info->end == TLB_FLUSH_ALL)
++		on_each_cpu(do_flush_tlb_all, NULL, 1);
++	else
+ 		on_each_cpu(do_kernel_range_flush, info, 1);
+ 
+-		put_flush_tlb_info();
+-		preempt_enable();
+-	}
++	put_flush_tlb_info();
+ }
+ 
+ /*
+@@ -1247,7 +1623,7 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+ 
  	int cpu = get_cpu();
  
- 	info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
-@@ -1263,12 +1595,49 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+-	info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
++	info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, PAGE_SHIFT, false,
+ 				  TLB_GENERATION_INVALID);
+ 	/*
+ 	 * flush_tlb_multi() is not optimized for the common case in which only
+@@ -1263,12 +1639,62 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
  		local_irq_enable();
  	}
  
@@ -983,8 +1191,8 @@ index a2becb85bea7..0080175153ef 100644
 +					     struct mm_struct *mm,
 +					     unsigned long uaddr)
 +{
-+	if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_broadcast_asid(mm)) {
-+		u16 asid = mm_broadcast_asid(mm);
++	if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_global_asid(mm)) {
++		u16 asid = mm_global_asid(mm);
 +		/*
 +		 * Queue up an asynchronous invalidation. The corresponding
 +		 * TLBSYNC is done in arch_tlbbatch_flush(), and must be done
@@ -994,11 +1202,24 @@ index a2becb85bea7..0080175153ef 100644
 +			batch->used_invlpgb = true;
 +			migrate_disable();
 +		}
-+		invlpgb_flush_user_nr(kern_pcid(asid), uaddr, 1, 0);
++		invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false, false);
 +		/* Do any CPUs supporting INVLPGB need PTI? */
 +		if (static_cpu_has(X86_FEATURE_PTI))
-+			invlpgb_flush_user_nr(user_pcid(asid), uaddr, 1, 0);
++			invlpgb_flush_user_nr_nosync(user_pcid(asid), uaddr, 1, false, false);
++
++		/*
++		 * Some CPUs might still be using a local ASID for this
++		 * process, and require IPIs, while others are using the
++		 * global ASID.
++		 *
++		 * In this corner case we need to do both the broadcast
++		 * TLB invalidation, and send IPIs. The IPIs will help
++		 * stragglers transition to the broadcast ASID.
++		 */
++		if (READ_ONCE(mm->context.asid_transition))
++			goto also_send_ipi;
 +	} else {
++also_send_ipi:
 +		inc_mm_tlb_gen(mm);
 +		cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
 +	}
@@ -1084,6 +1305,26 @@ index bb2119e5a0d0..a593d5edfd88 100644
  	tlb_gather_mmu(&tlb, vms->vma->vm_mm);
  	update_hiwater_rss(vms->vma->vm_mm);
  	unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end,
+diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
+index 3ae84c3b8e6d..dc1c1057f26e 100644
+--- a/tools/arch/x86/include/asm/msr-index.h
++++ b/tools/arch/x86/include/asm/msr-index.h
+@@ -25,6 +25,7 @@
+ #define _EFER_SVME		12 /* Enable virtualization */
+ #define _EFER_LMSLE		13 /* Long Mode Segment Limit Enable */
+ #define _EFER_FFXSR		14 /* Enable Fast FXSAVE/FXRSTOR */
++#define _EFER_TCE		15 /* Enable Translation Cache Extensions */
+ #define _EFER_AUTOIBRS		21 /* Enable Automatic IBRS */
+ 
+ #define EFER_SCE		(1<<_EFER_SCE)
+@@ -34,6 +35,7 @@
+ #define EFER_SVME		(1<<_EFER_SVME)
+ #define EFER_LMSLE		(1<<_EFER_LMSLE)
+ #define EFER_FFXSR		(1<<_EFER_FFXSR)
++#define EFER_TCE		(1<<_EFER_TCE)
+ #define EFER_AUTOIBRS		(1<<_EFER_AUTOIBRS)
+ 
+ /*
 -- 
 2.48.0.rc1
 
diff --git a/6.13/0003-bbr3.patch b/6.13/0003-bbr3.patch
index 75d9ec9b..8967721b 100644
--- a/6.13/0003-bbr3.patch
+++ b/6.13/0003-bbr3.patch
@@ -1,6 +1,6 @@
-From 9341991abd224336e551e90c7179e2e221fdf466 Mon Sep 17 00:00:00 2001
+From 1fc2e15c0c690b276928953ff73277b4d66e67f3 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:37:35 +0100
+Date: Mon, 20 Jan 2025 13:21:45 +0100
 Subject: [PATCH 03/12] bbr3
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
diff --git a/6.13/0004-cachy.patch b/6.13/0004-cachy.patch
index 77d9cf11..fa60f8b7 100644
--- a/6.13/0004-cachy.patch
+++ b/6.13/0004-cachy.patch
@@ -1,6 +1,6 @@
-From 32089eb0a217a8d425f387e5e613d498ad760f34 Mon Sep 17 00:00:00 2001
+From e01619bda1e69eea53c0f3ef61476fb02da06868 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:37:49 +0100
+Date: Mon, 20 Jan 2025 13:21:55 +0100
 Subject: [PATCH 04/12] cachy
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
@@ -19,12 +19,23 @@ Signed-off-by: Peter Jung <admin@ptr1337.dev>
  drivers/cpufreq/Kconfig.x86                   |    2 -
  drivers/cpufreq/intel_pstate.c                |    2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu.h           |    1 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c  |   44 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h  |    1 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |    6 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |   10 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c       |   19 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h       |    1 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h      |    1 +
  drivers/gpu/drm/amd/display/Kconfig           |    6 +
- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |    2 +-
+ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   69 +-
+ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |    7 +
  .../amd/display/amdgpu_dm/amdgpu_dm_color.c   |    2 +-
  .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c    |    6 +-
  .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   |    6 +-
+ .../drm/amd/display/dc/bios/bios_parser2.c    |   13 +-
+ .../drm/amd/display/dc/core/dc_link_exports.c |    6 +
+ drivers/gpu/drm/amd/display/dc/dc.h           |    3 +
+ .../dc/resource/dce120/dce120_resource.c      |   17 +
  drivers/gpu/drm/amd/pm/amdgpu_pm.c            |    3 +
  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     |   14 +-
  drivers/input/evdev.c                         |   19 +-
@@ -66,7 +77,7 @@ Signed-off-by: Peter Jung <admin@ptr1337.dev>
  mm/vmpressure.c                               |    4 +
  mm/vmscan.c                                   |  143 +
  net/ipv4/inet_connection_sock.c               |    2 +-
- 61 files changed, 6557 insertions(+), 65 deletions(-)
+ 72 files changed, 6714 insertions(+), 93 deletions(-)
  create mode 100644 drivers/media/v4l2-core/v4l2loopback.c
  create mode 100644 drivers/media/v4l2-core/v4l2loopback.h
  create mode 100644 drivers/media/v4l2-core/v4l2loopback_formats.h
@@ -203,7 +214,7 @@ index f48eaa98d22d..fc777c14cff6 100644
  unprivileged_userfaultfd
  ========================
 diff --git a/Makefile b/Makefile
-index e20a62ad397f..9a63ab456ffc 100644
+index b9464c88ac72..ea555e6a8bf1 100644
 --- a/Makefile
 +++ b/Makefile
 @@ -860,11 +860,19 @@ KBUILD_CFLAGS	+= -fno-delete-null-pointer-checks
@@ -227,7 +238,7 @@ index e20a62ad397f..9a63ab456ffc 100644
  # depends on `opt-level` and `debug-assertions`, respectively.
  KBUILD_RUSTFLAGS += -Cdebug-assertions=$(if $(CONFIG_RUST_DEBUG_ASSERTIONS),y,n)
 diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
-index 2a7279d80460..301ced02b077 100644
+index bacdc502903f..f2c97bdcef58 100644
 --- a/arch/x86/Kconfig.cpu
 +++ b/arch/x86/Kconfig.cpu
 @@ -155,9 +155,8 @@ config MPENTIUM4
@@ -1070,6 +1081,118 @@ index 4653a8d2823a..6590e83dfbf0 100644
  extern int amdgpu_vis_vram_limit;
  extern int amdgpu_gart_size;
  extern int amdgpu_gtt_size;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+index 093141ad6ed0..e476e45b996a 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+@@ -36,13 +36,6 @@
+ #include "atombios_encoders.h"
+ #include "bif/bif_4_1_d.h"
+ 
+-static void amdgpu_atombios_lookup_i2c_gpio_quirks(struct amdgpu_device *adev,
+-					  ATOM_GPIO_I2C_ASSIGMENT *gpio,
+-					  u8 index)
+-{
+-
+-}
+-
+ static struct amdgpu_i2c_bus_rec amdgpu_atombios_get_bus_rec_for_i2c_gpio(ATOM_GPIO_I2C_ASSIGMENT *gpio)
+ {
+ 	struct amdgpu_i2c_bus_rec i2c;
+@@ -108,9 +101,6 @@ struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *
+ 
+ 		gpio = &i2c_info->asGPIO_Info[0];
+ 		for (i = 0; i < num_indices; i++) {
+-
+-			amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i);
+-
+ 			if (gpio->sucI2cId.ucAccess == id) {
+ 				i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
+ 				break;
+@@ -142,8 +132,6 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev)
+ 
+ 		gpio = &i2c_info->asGPIO_Info[0];
+ 		for (i = 0; i < num_indices; i++) {
+-			amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i);
+-
+ 			i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
+ 
+ 			if (i2c.valid) {
+@@ -156,6 +144,38 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev)
+ 	}
+ }
+ 
++void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id)
++{
++	struct atom_context *ctx = adev->mode_info.atom_context;
++	ATOM_GPIO_I2C_ASSIGMENT *gpio;
++	struct amdgpu_i2c_bus_rec i2c;
++	int index = GetIndexIntoMasterTable(DATA, GPIO_I2C_Info);
++	struct _ATOM_GPIO_I2C_INFO *i2c_info;
++	uint16_t data_offset, size;
++	int i, num_indices;
++	char stmp[32];
++
++	if (amdgpu_atom_parse_data_header(ctx, index, &size, NULL, NULL, &data_offset)) {
++		i2c_info = (struct _ATOM_GPIO_I2C_INFO *)(ctx->bios + data_offset);
++
++		num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
++			sizeof(ATOM_GPIO_I2C_ASSIGMENT);
++
++		gpio = &i2c_info->asGPIO_Info[0];
++		for (i = 0; i < num_indices; i++) {
++			i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
++
++			if (i2c.valid && i2c.i2c_id == i2c_id) {
++				sprintf(stmp, "OEM 0x%x", i2c.i2c_id);
++				adev->i2c_bus[i] = amdgpu_i2c_create(adev_to_drm(adev), &i2c, stmp);
++				break;
++			}
++			gpio = (ATOM_GPIO_I2C_ASSIGMENT *)
++				((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT));
++		}
++	}
++}
++
+ struct amdgpu_gpio_rec
+ amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev,
+ 			    u8 id)
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+index 0e16432d9a72..867bc5c5ce67 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+@@ -136,6 +136,7 @@ amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev,
+ struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *adev,
+ 							  uint8_t id);
+ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev);
++void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id);
+ 
+ bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev);
+ 
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index cd4fac120834..1ab433d774cc 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -4461,8 +4461,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ 				goto failed;
+ 			}
+ 			/* init i2c buses */
+-			if (!amdgpu_device_has_dc_support(adev))
+-				amdgpu_atombios_i2c_init(adev);
++			amdgpu_i2c_init(adev);
+ 		}
+ 	}
+ 
+@@ -4724,8 +4723,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
+ 	amdgpu_reset_fini(adev);
+ 
+ 	/* free i2c buses */
+-	if (!amdgpu_device_has_dc_support(adev))
+-		amdgpu_i2c_fini(adev);
++	amdgpu_i2c_fini(adev);
+ 
+ 	if (amdgpu_emu_mode != 1)
+ 		amdgpu_atombios_fini(adev);
 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
 index 38686203bea6..811d020f3f4b 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1098,6 +1221,60 @@ index 38686203bea6..811d020f3f4b 100644
  /**
   * DOC: vramlimit (int)
   * Restrict the total amount of VRAM in MiB for testing.  The default is 0 (Use full VRAM).
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
+index f0765ccde668..8179d0814db9 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
+@@ -225,6 +225,25 @@ void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c)
+ 	kfree(i2c);
+ }
+ 
++void amdgpu_i2c_init(struct amdgpu_device *adev)
++{
++	if (!adev->is_atom_fw) {
++		if (!amdgpu_device_has_dc_support(adev)) {
++			amdgpu_atombios_i2c_init(adev);
++		} else {
++			switch (adev->asic_type) {
++			case CHIP_POLARIS10:
++			case CHIP_POLARIS11:
++			case CHIP_POLARIS12:
++				amdgpu_atombios_oem_i2c_init(adev, 0x97);
++				break;
++			default:
++				break;
++			}
++		}
++	}
++}
++
+ /* remove all the buses */
+ void amdgpu_i2c_fini(struct amdgpu_device *adev)
+ {
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
+index 21e3d1dad0a1..1d3d3806e0dd 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
+@@ -28,6 +28,7 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev,
+ 					  const struct amdgpu_i2c_bus_rec *rec,
+ 					  const char *name);
+ void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c);
++void amdgpu_i2c_init(struct amdgpu_device *adev);
+ void amdgpu_i2c_fini(struct amdgpu_device *adev);
+ struct amdgpu_i2c_chan *
+ amdgpu_i2c_lookup(struct amdgpu_device *adev,
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+index 5e3faefc5510..6da4f946cac0 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+@@ -609,6 +609,7 @@ struct amdgpu_i2c_adapter {
+ 	struct i2c_adapter base;
+ 
+ 	struct ddc_service *ddc_service;
++	bool oem;
+ };
+ 
+ #define TO_DM_AUX(x) container_of((x), struct amdgpu_dm_dp_aux, aux)
 diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
 index 11e3f2f3b174..7b1bd69dc29e 100644
 --- a/drivers/gpu/drm/amd/display/Kconfig
@@ -1114,10 +1291,73 @@ index 11e3f2f3b174..7b1bd69dc29e 100644
 +
  endmenu
 diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
-index cd16dae534dc..1508978f92dd 100644
+index 5f216d626cbb..382af92c4ff1 100644
 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
 +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
-@@ -4516,7 +4516,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
+@@ -177,6 +177,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev);
+ static void amdgpu_dm_fini(struct amdgpu_device *adev);
+ static bool is_freesync_video_mode(const struct drm_display_mode *mode, struct amdgpu_dm_connector *aconnector);
+ static void reset_freesync_config_for_crtc(struct dm_crtc_state *new_crtc_state);
++static struct amdgpu_i2c_adapter *
++create_i2c(struct ddc_service *ddc_service, bool oem);
+ 
+ static enum drm_mode_subconnector get_subconnector_type(struct dc_link *link)
+ {
+@@ -2839,6 +2841,33 @@ static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev)
+ 	return 0;
+ }
+ 
++static int dm_oem_i2c_hw_init(struct amdgpu_device *adev)
++{
++	struct amdgpu_display_manager *dm = &adev->dm;
++	struct amdgpu_i2c_adapter *oem_i2c;
++	struct ddc_service *oem_ddc_service;
++	int r;
++
++	oem_ddc_service = dc_get_oem_i2c_device(adev->dm.dc);
++	if (oem_ddc_service) {
++		oem_i2c = create_i2c(oem_ddc_service, true);
++		if (!oem_i2c) {
++			dev_info(adev->dev, "Failed to create oem i2c adapter data\n");
++			return -ENOMEM;
++		}
++
++		r = i2c_add_adapter(&oem_i2c->base);
++		if (r) {
++			dev_info(adev->dev, "Failed to register oem i2c\n");
++			kfree(oem_i2c);
++			return r;
++		}
++		dm->oem_i2c = oem_i2c;
++	}
++
++	return 0;
++}
++
+ /**
+  * dm_hw_init() - Initialize DC device
+  * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+@@ -2870,6 +2899,10 @@ static int dm_hw_init(struct amdgpu_ip_block *ip_block)
+ 		return r;
+ 	amdgpu_dm_hpd_init(adev);
+ 
++	r = dm_oem_i2c_hw_init(adev);
++	if (r)
++		dev_info(adev->dev, "Failed to add OEM i2c bus\n");
++
+ 	return 0;
+ }
+ 
+@@ -2885,6 +2918,8 @@ static int dm_hw_fini(struct amdgpu_ip_block *ip_block)
+ {
+ 	struct amdgpu_device *adev = ip_block->adev;
+ 
++	kfree(adev->dm.oem_i2c);
++
+ 	amdgpu_dm_hpd_fini(adev);
+ 
+ 	amdgpu_dm_irq_fini(adev);
+@@ -4516,7 +4551,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
  		return r;
  	}
  
@@ -1126,6 +1366,93 @@ index cd16dae534dc..1508978f92dd 100644
  	if (amdgpu_dm_create_color_properties(adev)) {
  		dc_state_release(state->context);
  		kfree(state);
+@@ -8218,7 +8253,7 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap,
+ 	int i;
+ 	int result = -EIO;
+ 
+-	if (!ddc_service->ddc_pin || !ddc_service->ddc_pin->hw_info.hw_supported)
++	if (!ddc_service->ddc_pin)
+ 		return result;
+ 
+ 	cmd.payloads = kcalloc(num, sizeof(struct i2c_payload), GFP_KERNEL);
+@@ -8237,11 +8272,18 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap,
+ 		cmd.payloads[i].data = msgs[i].buf;
+ 	}
+ 
+-	if (dc_submit_i2c(
+-			ddc_service->ctx->dc,
+-			ddc_service->link->link_index,
+-			&cmd))
+-		result = num;
++	if (i2c->oem) {
++		if (dc_submit_i2c_oem(
++			    ddc_service->ctx->dc,
++			    &cmd))
++			result = num;
++	} else {
++		if (dc_submit_i2c(
++			    ddc_service->ctx->dc,
++			    ddc_service->link->link_index,
++			    &cmd))
++			result = num;
++	}
+ 
+ 	kfree(cmd.payloads);
+ 	return result;
+@@ -8258,9 +8300,7 @@ static const struct i2c_algorithm amdgpu_dm_i2c_algo = {
+ };
+ 
+ static struct amdgpu_i2c_adapter *
+-create_i2c(struct ddc_service *ddc_service,
+-	   int link_index,
+-	   int *res)
++create_i2c(struct ddc_service *ddc_service, bool oem)
+ {
+ 	struct amdgpu_device *adev = ddc_service->ctx->driver_context;
+ 	struct amdgpu_i2c_adapter *i2c;
+@@ -8271,9 +8311,14 @@ create_i2c(struct ddc_service *ddc_service,
+ 	i2c->base.owner = THIS_MODULE;
+ 	i2c->base.dev.parent = &adev->pdev->dev;
+ 	i2c->base.algo = &amdgpu_dm_i2c_algo;
+-	snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c hw bus %d", link_index);
++	if (oem)
++		snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c OEM bus");
++	else
++		snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c hw bus %d",
++			 ddc_service->link->link_index);
+ 	i2c_set_adapdata(&i2c->base, i2c);
+ 	i2c->ddc_service = ddc_service;
++	i2c->oem = oem;
+ 
+ 	return i2c;
+ }
+@@ -8298,7 +8343,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
+ 	link->priv = aconnector;
+ 
+ 
+-	i2c = create_i2c(link->ddc, link->link_index, &res);
++	i2c = create_i2c(link->ddc, false);
+ 	if (!i2c) {
+ 		DRM_ERROR("Failed to create i2c adapter data\n");
+ 		return -ENOMEM;
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+index 2227cd8e4a89..5710776bb0e2 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+@@ -606,6 +606,13 @@ struct amdgpu_display_manager {
+ 	 * Bounding box data read from dmub during early initialization for DCN4+
+ 	 */
+ 	struct dml2_soc_bb *bb_from_dmub;
++
++	/**
++	 * @oem_i2c:
++	 *
++	 * OEM i2c bus
++	 */
++	struct amdgpu_i2c_adapter *oem_i2c;
+ };
+ 
+ enum dsc_clock_force_state {
 diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
 index ebabfe3a512f..4d3ebcaacca1 100644
 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -1140,7 +1467,7 @@ index ebabfe3a512f..4d3ebcaacca1 100644
   *
   * AMD driver supports pre-defined mathematical functions for transferring
 diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
-index 64a041c2af05..08790bcfe109 100644
+index 36a830a7440f..a8fc8bd52d51 100644
 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
 +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
 @@ -470,7 +470,7 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc)
@@ -1201,6 +1528,124 @@ index 495e3cd70426..704a48209657 100644
  	dm_atomic_plane_attach_color_mgmt_properties(dm, plane);
  #endif
  	/* Create (reset) the plane state */
+diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+index c9a6de110b74..470ec970217b 100644
+--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
++++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+@@ -1778,6 +1778,7 @@ static enum bp_result get_firmware_info_v3_1(
+ 	struct dc_firmware_info *info)
+ {
+ 	struct atom_firmware_info_v3_1 *firmware_info;
++	struct atom_firmware_info_v3_2 *firmware_info32;
+ 	struct atom_display_controller_info_v4_1 *dce_info = NULL;
+ 
+ 	if (!info)
+@@ -1785,6 +1786,8 @@ static enum bp_result get_firmware_info_v3_1(
+ 
+ 	firmware_info = GET_IMAGE(struct atom_firmware_info_v3_1,
+ 			DATA_TABLES(firmwareinfo));
++	firmware_info32 = GET_IMAGE(struct atom_firmware_info_v3_2,
++			DATA_TABLES(firmwareinfo));
+ 
+ 	dce_info = GET_IMAGE(struct atom_display_controller_info_v4_1,
+ 			DATA_TABLES(dce_info));
+@@ -1817,7 +1820,15 @@ static enum bp_result get_firmware_info_v3_1(
+ 				bp->cmd_tbl.get_smu_clock_info(bp, SMU9_SYSPLL0_ID) * 10;
+ 	}
+ 
+-	info->oem_i2c_present = false;
++	/* These fields are marked as reserved in v3_1, but they appear to be populated
++	 * properly.
++	 */
++	if (firmware_info32->board_i2c_feature_id == 0x2) {
++		info->oem_i2c_present = true;
++		info->oem_i2c_obj_id = firmware_info32->board_i2c_feature_gpio_id;
++	} else {
++		info->oem_i2c_present = false;
++	}
+ 
+ 	return BP_RESULT_OK;
+ }
+diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
+index 457d60eeb486..13636eb4ec3f 100644
+--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
++++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
+@@ -142,6 +142,12 @@ bool dc_link_update_dsc_config(struct pipe_ctx *pipe_ctx)
+ 	return link->dc->link_srv->update_dsc_config(pipe_ctx);
+ }
+ 
++struct ddc_service *
++dc_get_oem_i2c_device(struct dc *dc)
++{
++	return dc->res_pool->oem_device;
++}
++
+ bool dc_is_oem_i2c_device_present(
+ 	struct dc *dc,
+ 	size_t slave_address)
+diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
+index 08c5a315b3a6..70d6005ecd64 100644
+--- a/drivers/gpu/drm/amd/display/dc/dc.h
++++ b/drivers/gpu/drm/amd/display/dc/dc.h
+@@ -1939,6 +1939,9 @@ int dc_link_aux_transfer_raw(struct ddc_service *ddc,
+ 		struct aux_payload *payload,
+ 		enum aux_return_code_type *operation_result);
+ 
++struct ddc_service *
++dc_get_oem_i2c_device(struct dc *dc);
++
+ bool dc_is_oem_i2c_device_present(
+ 	struct dc *dc,
+ 	size_t slave_address
+diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
+index c63c59623433..eb1e158d3436 100644
+--- a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
+@@ -67,6 +67,7 @@
+ #include "reg_helper.h"
+ 
+ #include "dce100/dce100_resource.h"
++#include "link.h"
+ 
+ #ifndef mmDP0_DP_DPHY_INTERNAL_CTRL
+ 	#define mmDP0_DP_DPHY_INTERNAL_CTRL		0x210f
+@@ -659,6 +660,12 @@ static void dce120_resource_destruct(struct dce110_resource_pool *pool)
+ 
+ 	if (pool->base.dmcu != NULL)
+ 		dce_dmcu_destroy(&pool->base.dmcu);
++
++	if (pool->base.oem_device != NULL) {
++		struct dc *dc = pool->base.oem_device->ctx->dc;
++
++		dc->link_srv->destroy_ddc_service(&pool->base.oem_device);
++	}
+ }
+ 
+ static void read_dce_straps(
+@@ -1054,6 +1061,7 @@ static bool dce120_resource_construct(
+ 	struct dc *dc,
+ 	struct dce110_resource_pool *pool)
+ {
++	struct ddc_service_init_data ddc_init_data = {0};
+ 	unsigned int i;
+ 	int j;
+ 	struct dc_context *ctx = dc->ctx;
+@@ -1257,6 +1265,15 @@ static bool dce120_resource_construct(
+ 
+ 	bw_calcs_data_update_from_pplib(dc);
+ 
++	if (dc->ctx->dc_bios->fw_info.oem_i2c_present) {
++		ddc_init_data.ctx = dc->ctx;
++		ddc_init_data.link = NULL;
++		ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id;
++		ddc_init_data.id.enum_id = 0;
++		ddc_init_data.id.type = OBJECT_TYPE_GENERIC;
++		pool->base.oem_device = dc->link_srv->create_ddc_service(&ddc_init_data);
++	}
++
+ 	return true;
+ 
+ irqs_create_fail:
 diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
 index e8ae7681bf0a..8a0d873983f3 100644
 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -7128,7 +7573,7 @@ index 2ddb827e3bea..464049c4af3f 100644
  
  	return state;
 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index 3e9ca38512de..463fe1dc6de8 100644
+index 26958431deb7..8c0f17a96d4f 100644
 --- a/kernel/sched/fair.c
 +++ b/kernel/sched/fair.c
 @@ -73,10 +73,19 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
@@ -7408,7 +7853,7 @@ index a2b16b08cbbf..48d611e58ad3 100644
  static int __read_mostly sysctl_compact_memory;
  
 diff --git a/mm/huge_memory.c b/mm/huge_memory.c
-index e53d83b3e5cf..b4c205f2042a 100644
+index db64116a4f84..3e0266c973e1 100644
 --- a/mm/huge_memory.c
 +++ b/mm/huge_memory.c
 @@ -64,7 +64,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
@@ -7436,7 +7881,7 @@ index 24b68b425afb..081ddb92db87 100644
  
  /*
 diff --git a/mm/page-writeback.c b/mm/page-writeback.c
-index d213ead95675..0430a97b30fd 100644
+index d9861e42b2bd..13ab2294f0bb 100644
 --- a/mm/page-writeback.c
 +++ b/mm/page-writeback.c
 @@ -71,7 +71,11 @@ static long ratelimit_pages = 32;
@@ -7464,7 +7909,7 @@ index d213ead95675..0430a97b30fd 100644
  EXPORT_SYMBOL_GPL(dirty_writeback_interval);
  
 diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index cae7b93864c2..57038052c153 100644
+index 01eab25edf89..3ea393f1311a 100644
 --- a/mm/page_alloc.c
 +++ b/mm/page_alloc.c
 @@ -271,7 +271,11 @@ const char * const migratetype_names[MIGRATE_TYPES] = {
@@ -7517,7 +7962,7 @@ index bd5183dfd879..3a410f53a07c 100644
  
  /*
 diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 9a859b7d18d7..ec7f96bb0e9f 100644
+index b1ec5ece067e..e258174d240a 100644
 --- a/mm/vmscan.c
 +++ b/mm/vmscan.c
 @@ -148,6 +148,15 @@ struct scan_control {
@@ -7712,7 +8157,7 @@ index 9a859b7d18d7..ec7f96bb0e9f 100644
  	else if (min_seq[LRU_GEN_ANON] < min_seq[LRU_GEN_FILE])
  		type = LRU_GEN_ANON;
  	else if (swappiness == 1)
-@@ -4826,6 +4965,8 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
+@@ -4829,6 +4968,8 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
  	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
  	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
  
@@ -7721,7 +8166,7 @@ index 9a859b7d18d7..ec7f96bb0e9f 100644
  	/* lru_gen_age_node() called mem_cgroup_calculate_protection() */
  	if (mem_cgroup_below_min(NULL, memcg))
  		return MEMCG_LRU_YOUNG;
-@@ -5974,6 +6115,8 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
+@@ -5977,6 +6118,8 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
  
  	prepare_scan_control(pgdat, sc);
  
diff --git a/6.13/0005-crypto.patch b/6.13/0005-crypto.patch
index e6f240c0..e19af187 100644
--- a/6.13/0005-crypto.patch
+++ b/6.13/0005-crypto.patch
@@ -1,6 +1,6 @@
-From 8d1fa2a8636c551dd33500837e87e2c3f889d95c Mon Sep 17 00:00:00 2001
+From 7bc012030531a472b823293e167a86cd58da545c Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:38:01 +0100
+Date: Mon, 20 Jan 2025 13:22:05 +0100
 Subject: [PATCH 05/12] crypto
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
diff --git a/6.13/0006-fixes.patch b/6.13/0006-fixes.patch
index 18a8692e..aaa180f2 100644
--- a/6.13/0006-fixes.patch
+++ b/6.13/0006-fixes.patch
@@ -1,22 +1,22 @@
-From e094aa9f2a3d8ac13a8bca382f0f5585f80926ee Mon Sep 17 00:00:00 2001
+From 2f514dfe8b006e7fa976b6265bef4b8efb81ec11 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:38:11 +0100
+Date: Mon, 20 Jan 2025 13:22:15 +0100
 Subject: [PATCH 06/12] fixes
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
 ---
  arch/Kconfig                                  |  4 +-
- drivers/acpi/acpi_video.c                     | 50 +++++++++++--------
  .../link/protocols/link_edp_panel_control.c   |  3 +-
- drivers/gpu/drm/drm_edid.c                    | 47 +++++++++++++++--
- drivers/gpu/drm/nouveau/nouveau_acpi.c        |  2 +-
+ drivers/gpu/drm/drm_edid.c                    | 47 +++++++++++++++++--
+ drivers/hid/hid-asus.c                        | 26 ++++++++++
  drivers/hid/hid-ids.h                         |  1 +
+ include/linux/platform_data/x86/asus-wmi.h    |  5 ++
  kernel/fork.c                                 |  9 ++--
- kernel/kprobes.c                              | 23 ++++-----
+ kernel/kprobes.c                              | 23 +++++----
  kernel/sched/ext.c                            |  4 +-
  scripts/package/PKGBUILD                      |  5 ++
- sound/pci/hda/patch_realtek.c                 |  2 +
- 11 files changed, 103 insertions(+), 47 deletions(-)
+ sound/pci/hda/patch_realtek.c                 |  4 +-
+ 11 files changed, 105 insertions(+), 26 deletions(-)
 
 diff --git a/arch/Kconfig b/arch/Kconfig
 index 6682b2a53e34..fe54298ae05c 100644
@@ -40,99 +40,6 @@ index 6682b2a53e34..fe54298ae05c 100644
  	depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS
  	help
  	  This value can be used to select the number of bits to use to
-diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
-index 8274a17872ed..3c627bdf2d1b 100644
---- a/drivers/acpi/acpi_video.c
-+++ b/drivers/acpi/acpi_video.c
-@@ -610,16 +610,29 @@ acpi_video_device_lcd_get_level_current(struct acpi_video_device *device,
- 	return 0;
- }
- 
-+/*
-+ *  Arg:
-+ *	device	: video output device (LCD, CRT, ..)
-+ *	edid    : address for returned EDID pointer
-+ *	length  : _DDC length to request (must be a multiple of 128)
-+ *
-+ *  Return Value:
-+ *	Length of EDID (positive value) or error (negative value)
-+ *
-+ *  Get EDID from ACPI _DDC. On success, a pointer to the EDID data is written
-+ *  to the edid address, and the length of the EDID is returned. The caller is
-+ *  responsible for freeing the edid pointer.
-+ */
-+
- static int
--acpi_video_device_EDID(struct acpi_video_device *device,
--		       union acpi_object **edid, int length)
-+acpi_video_device_EDID(struct acpi_video_device *device, void **edid, int length)
- {
--	int status;
-+	acpi_status status;
- 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
- 	union acpi_object *obj;
- 	union acpi_object arg0 = { ACPI_TYPE_INTEGER };
- 	struct acpi_object_list args = { 1, &arg0 };
--
-+	int ret;
- 
- 	*edid = NULL;
- 
-@@ -636,16 +649,17 @@ acpi_video_device_EDID(struct acpi_video_device *device,
- 
- 	obj = buffer.pointer;
- 
--	if (obj && obj->type == ACPI_TYPE_BUFFER)
--		*edid = obj;
--	else {
-+	if (obj && obj->type == ACPI_TYPE_BUFFER) {
-+		*edid = kmemdup(obj->buffer.pointer, obj->buffer.length, GFP_KERNEL);
-+		ret = *edid ? obj->buffer.length : -ENOMEM;
-+	} else {
- 		acpi_handle_debug(device->dev->handle,
- 				 "Invalid _DDC data for length %d\n", length);
--		status = -EFAULT;
--		kfree(obj);
-+		ret = -EFAULT;
- 	}
- 
--	return status;
-+	kfree(obj);
-+	return ret;
- }
- 
- /* bus */
-@@ -1435,9 +1449,7 @@ int acpi_video_get_edid(struct acpi_device *device, int type, int device_id,
- {
- 	struct acpi_video_bus *video;
- 	struct acpi_video_device *video_device;
--	union acpi_object *buffer = NULL;
--	acpi_status status;
--	int i, length;
-+	int i, length, ret;
- 
- 	if (!device || !acpi_driver_data(device))
- 		return -EINVAL;
-@@ -1477,16 +1489,10 @@ int acpi_video_get_edid(struct acpi_device *device, int type, int device_id,
- 		}
- 
- 		for (length = 512; length > 0; length -= 128) {
--			status = acpi_video_device_EDID(video_device, &buffer,
--							length);
--			if (ACPI_SUCCESS(status))
--				break;
-+			ret = acpi_video_device_EDID(video_device, edid, length);
-+			if (ret > 0)
-+				return ret;
- 		}
--		if (!length)
--			continue;
--
--		*edid = buffer->buffer.pointer;
--		return length;
- 	}
- 
- 	return -ENODEV;
 diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
 index e0e3bb865359..ba98d56a0fe4 100644
 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
@@ -240,19 +147,50 @@ index 855beafb76ff..ad78059ee954 100644
  		if (!newmode)
  			continue;
  
-diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
-index 8f0c69aad248..21b56cc7605c 100644
---- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
-+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
-@@ -384,7 +384,7 @@ nouveau_acpi_edid(struct drm_device *dev, struct drm_connector *connector)
- 	if (ret < 0)
- 		return NULL;
- 
--	return kmemdup(edid, EDID_LENGTH, GFP_KERNEL);
-+	return edid;
+diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
+index 506c6f377e7d..46e3e42f9eb5 100644
+--- a/drivers/hid/hid-asus.c
++++ b/drivers/hid/hid-asus.c
+@@ -432,6 +432,26 @@ static int asus_kbd_get_functions(struct hid_device *hdev,
+ 	return ret;
  }
  
- bool nouveau_acpi_video_backlight_use_native(void)
++static int asus_kbd_disable_oobe(struct hid_device *hdev)
++{
++	const u8 init[][6] = {
++		{ FEATURE_KBD_REPORT_ID, 0x05, 0x20, 0x31, 0x00, 0x08 },
++		{ FEATURE_KBD_REPORT_ID, 0xBA, 0xC5, 0xC4 },
++		{ FEATURE_KBD_REPORT_ID, 0xD0, 0x8F, 0x01 },
++		{ FEATURE_KBD_REPORT_ID, 0xD0, 0x85, 0xFF }
++	};
++	int ret;
++
++	for (size_t i = 0; i < ARRAY_SIZE(init); i++) {
++		ret = asus_kbd_set_report(hdev, init[i], sizeof(init[i]));
++		if (ret < 0)
++			return ret;
++	}
++
++	hid_info(hdev, "Disabled OOBE for keyboard\n");
++	return 0;
++}
++
+ static void asus_schedule_work(struct asus_kbd_leds *led)
+ {
+ 	unsigned long flags;
+@@ -534,6 +554,12 @@ static int asus_kbd_register_leds(struct hid_device *hdev)
+ 		ret = asus_kbd_init(hdev, FEATURE_KBD_LED_REPORT_ID2);
+ 		if (ret < 0)
+ 			return ret;
++
++		if (dmi_match(DMI_PRODUCT_FAMILY, "ProArt P16")) {
++			ret = asus_kbd_disable_oobe(hdev);
++			if (ret < 0)
++				return ret;
++		}
+ 	} else {
+ 		/* Initialize keyboard */
+ 		ret = asus_kbd_init(hdev, FEATURE_KBD_REPORT_ID);
 diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
 index 1f47fda809b9..6c2df0d37b3b 100644
 --- a/drivers/hid/hid-ids.h
@@ -265,6 +203,22 @@ index 1f47fda809b9..6c2df0d37b3b 100644
  #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY		0x1abe
  #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X		0x1b4c
  #define USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD	0x196b
+diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
+index 365e119bebaa..783e2a336861 100644
+--- a/include/linux/platform_data/x86/asus-wmi.h
++++ b/include/linux/platform_data/x86/asus-wmi.h
+@@ -184,6 +184,11 @@ static const struct dmi_system_id asus_use_hid_led_dmi_ids[] = {
+ 			DMI_MATCH(DMI_PRODUCT_FAMILY, "ROG Flow"),
+ 		},
+ 	},
++	{
++		.matches = {
++			DMI_MATCH(DMI_PRODUCT_FAMILY, "ProArt P16"),
++		},
++	},
+ 	{
+ 		.matches = {
+ 			DMI_MATCH(DMI_BOARD_NAME, "GA403U"),
 diff --git a/kernel/fork.c b/kernel/fork.c
 index 0cb5431b4d7e..e919c8c3a121 100644
 --- a/kernel/fork.c
@@ -388,13 +342,15 @@ index dca706617adc..89d3aef160b7 100644
  	mkdir -p "${builddir}"
  	cp System.map "${builddir}/System.map"
 diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
-index b74b566f675e..070dd1ab89c6 100644
+index ad66378d7321..4210bc8f12e1 100644
 --- a/sound/pci/hda/patch_realtek.c
 +++ b/sound/pci/hda/patch_realtek.c
-@@ -10641,6 +10641,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
+@@ -10641,8 +10641,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
  	SND_PCI_QUIRK(0x1043, 0x1e1f, "ASUS Vivobook 15 X1504VAP", ALC2XX_FIXUP_HEADSET_MIC),
  	SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS),
  	SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS),
+-	SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1),
+-	SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1),
 +	SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC),
 +	SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC),
  	SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401),
diff --git a/6.13/0007-itmt-core-ranking.patch b/6.13/0007-itmt-core-ranking.patch
index 13a8b040..f9edbbec 100644
--- a/6.13/0007-itmt-core-ranking.patch
+++ b/6.13/0007-itmt-core-ranking.patch
@@ -1,6 +1,6 @@
-From 25702dae4d4390c6e804bfe18eef1341a854b9f2 Mon Sep 17 00:00:00 2001
+From edca92ed206343ae09ee1af6ae0dfc26a68085b1 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:38:22 +0100
+Date: Mon, 20 Jan 2025 13:22:28 +0100
 Subject: [PATCH 07/12] itmt-core-ranking
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
@@ -235,10 +235,10 @@ index b5a8f0891135..ef63b1c0b491 100644
  	}
  
 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index 463fe1dc6de8..f849298a4cc1 100644
+index 8c0f17a96d4f..c532ffb153b4 100644
 --- a/kernel/sched/fair.c
 +++ b/kernel/sched/fair.c
-@@ -9941,6 +9941,8 @@ struct sg_lb_stats {
+@@ -9836,6 +9836,8 @@ struct sg_lb_stats {
  	unsigned int group_weight;
  	enum group_type group_type;
  	unsigned int group_asym_packing;	/* Tasks should be moved to preferred CPU */
@@ -247,7 +247,7 @@ index 463fe1dc6de8..f849298a4cc1 100644
  	unsigned int group_smt_balance;		/* Task on busy SMT be moved */
  	unsigned long group_misfit_task_load;	/* A CPU has a task too big for its capacity */
  #ifdef CONFIG_NUMA_BALANCING
-@@ -10270,7 +10272,7 @@ sched_group_asym(struct lb_env *env, struct sg_lb_stats *sgs, struct sched_group
+@@ -10165,7 +10167,7 @@ sched_group_asym(struct lb_env *env, struct sg_lb_stats *sgs, struct sched_group
  	    (sgs->group_weight - sgs->idle_cpus != 1))
  		return false;
  
@@ -256,7 +256,7 @@ index 463fe1dc6de8..f849298a4cc1 100644
  }
  
  /* One group has more than one SMT CPU while the other group does not */
-@@ -10351,6 +10353,17 @@ sched_reduced_capacity(struct rq *rq, struct sched_domain *sd)
+@@ -10246,6 +10248,17 @@ sched_reduced_capacity(struct rq *rq, struct sched_domain *sd)
  	return check_cpu_capacity(rq, sd);
  }
  
@@ -274,7 +274,7 @@ index 463fe1dc6de8..f849298a4cc1 100644
  /**
   * update_sg_lb_stats - Update sched_group's statistics for load balancing.
   * @env: The load balancing environment.
-@@ -10367,11 +10380,13 @@ static inline void update_sg_lb_stats(struct lb_env *env,
+@@ -10262,11 +10275,13 @@ static inline void update_sg_lb_stats(struct lb_env *env,
  				      bool *sg_overloaded,
  				      bool *sg_overutilized)
  {
@@ -289,7 +289,7 @@ index 463fe1dc6de8..f849298a4cc1 100644
  
  	for_each_cpu_and(i, sched_group_span(group), env->cpus) {
  		struct rq *rq = cpu_rq(i);
-@@ -10385,16 +10400,12 @@ static inline void update_sg_lb_stats(struct lb_env *env,
+@@ -10280,16 +10295,12 @@ static inline void update_sg_lb_stats(struct lb_env *env,
  		nr_running = rq->nr_running;
  		sgs->sum_nr_running += nr_running;
  
@@ -308,7 +308,7 @@ index 463fe1dc6de8..f849298a4cc1 100644
  		/*
  		 * No need to call idle_cpu() if nr_running is not 0
  		 */
-@@ -10404,10 +10415,21 @@ static inline void update_sg_lb_stats(struct lb_env *env,
+@@ -10299,10 +10310,21 @@ static inline void update_sg_lb_stats(struct lb_env *env,
  			continue;
  		}
  
@@ -331,7 +331,7 @@ index 463fe1dc6de8..f849298a4cc1 100644
  			/* Check for a misfit task on the cpu */
  			if (sgs->group_misfit_task_load < rq->misfit_task_load) {
  				sgs->group_misfit_task_load = rq->misfit_task_load;
-@@ -10502,7 +10524,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
+@@ -10397,7 +10419,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
  
  	case group_asym_packing:
  		/* Prefer to move from lowest priority CPU's work */
diff --git a/6.13/0008-ntsync.patch b/6.13/0008-ntsync.patch
index 82b27d47..76a91251 100644
--- a/6.13/0008-ntsync.patch
+++ b/6.13/0008-ntsync.patch
@@ -1,6 +1,6 @@
-From 1871388db87b6e7114a28eec15fc03e4c0497e52 Mon Sep 17 00:00:00 2001
+From dad63380fd4bccaf1df47a5d2a14b3622a828bbf Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:38:39 +0100
+Date: Mon, 20 Jan 2025 13:22:39 +0100
 Subject: [PATCH 08/12] ntsync
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
@@ -427,10 +427,10 @@ index 000000000000..25e7c4aef968
 +  ``objs`` and in ``alert``. If this is attempted, the function fails
 +  with ``EINVAL``.
 diff --git a/MAINTAINERS b/MAINTAINERS
-index a87ddad78e26..69c7e0c9cbfd 100644
+index 0fa7c5728f1e..efecb59adfe6 100644
 --- a/MAINTAINERS
 +++ b/MAINTAINERS
-@@ -16708,6 +16708,15 @@ T:	git https://github.com/Paragon-Software-Group/linux-ntfs3.git
+@@ -16709,6 +16709,15 @@ T:	git https://github.com/Paragon-Software-Group/linux-ntfs3.git
  F:	Documentation/filesystems/ntfs3.rst
  F:	fs/ntfs3/
  
diff --git a/6.13/0009-perf-per-core.patch b/6.13/0009-perf-per-core.patch
index 91886a65..ceb745db 100644
--- a/6.13/0009-perf-per-core.patch
+++ b/6.13/0009-perf-per-core.patch
@@ -1,6 +1,6 @@
-From ecafa3b39e7691288beb920eb362064d548d45e7 Mon Sep 17 00:00:00 2001
+From d0d15e3d79a2d5bb2c94b8ff3d2ab51f0b0100fe Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:38:51 +0100
+Date: Mon, 20 Jan 2025 13:22:50 +0100
 Subject: [PATCH 09/12] perf-per-core
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
diff --git a/6.13/0010-pksm.patch b/6.13/0010-pksm.patch
index 369779b6..b877ca5c 100644
--- a/6.13/0010-pksm.patch
+++ b/6.13/0010-pksm.patch
@@ -1,6 +1,6 @@
-From b0522d38174d109d02042dc5591c1ab52de16a94 Mon Sep 17 00:00:00 2001
+From 6a7ea67c66634276802b4b9b0964a0b00db97d9c Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:39:03 +0100
+Date: Mon, 20 Jan 2025 13:23:02 +0100
 Subject: [PATCH 10/12] pksm
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
diff --git a/6.13/0011-t2.patch b/6.13/0011-t2.patch
index 988a3380..3278629e 100644
--- a/6.13/0011-t2.patch
+++ b/6.13/0011-t2.patch
@@ -1,6 +1,6 @@
-From 5662d52675419bbe7b47731ad55c01ecf94b8426 Mon Sep 17 00:00:00 2001
+From 5e459e48f274c34d701726a61a96140381b1de2b Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:39:12 +0100
+Date: Mon, 20 Jan 2025 13:23:11 +0100
 Subject: [PATCH 11/12] t2
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
@@ -156,10 +156,10 @@ index ecccc0473da9..6de6b0e6abf3 100644
  ----
  
 diff --git a/MAINTAINERS b/MAINTAINERS
-index 69c7e0c9cbfd..01be85b7d886 100644
+index efecb59adfe6..16af42c68cca 100644
 --- a/MAINTAINERS
 +++ b/MAINTAINERS
-@@ -7065,6 +7065,12 @@ S:	Supported
+@@ -7066,6 +7066,12 @@ S:	Supported
  T:	git https://gitlab.freedesktop.org/drm/misc/kernel.git
  F:	drivers/gpu/drm/sun4i/sun8i*
  
diff --git a/6.13/0012-zstd.patch b/6.13/0012-zstd.patch
index bee631a9..f534c712 100644
--- a/6.13/0012-zstd.patch
+++ b/6.13/0012-zstd.patch
@@ -1,6 +1,6 @@
-From 91beebc1e962374c32c95b975d59ff5aa90b66c1 Mon Sep 17 00:00:00 2001
+From 6f96c228cd968c7f47eb90d9e7ad6d679bf5a7f0 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:39:21 +0100
+Date: Mon, 20 Jan 2025 13:23:20 +0100
 Subject: [PATCH 12/12] zstd
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
diff --git a/6.13/all/0001-cachyos-base-all.patch b/6.13/all/0001-cachyos-base-all.patch
index b08fa650..83b4d000 100644
--- a/6.13/all/0001-cachyos-base-all.patch
+++ b/6.13/all/0001-cachyos-base-all.patch
@@ -1,6 +1,6 @@
-From 2af576964728ca6af63da3c61dae669b5ae945c7 Mon Sep 17 00:00:00 2001
+From 1ec94c7b86986796d5d14135302e81dd3ddbe223 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:37:09 +0100
+Date: Mon, 20 Jan 2025 13:21:23 +0100
 Subject: [PATCH 01/12] amd-pstate
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
@@ -1003,42 +1003,44 @@ index cd573bc6b6db..9747e3be6cee 100644
 -- 
 2.48.0.rc1
 
-From 1d6b426b59b09163dbcaac857551295ad4b343d5 Mon Sep 17 00:00:00 2001
+From b74b9b0459100443f73ce718d0191bf58d6cb4b4 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:51:04 +0100
+Date: Mon, 20 Jan 2025 13:21:35 +0100
 Subject: [PATCH 02/12] amd-tlb-broadcast
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
 ---
- arch/x86/Kconfig                      |   2 +-
- arch/x86/hyperv/mmu.c                 |   1 -
- arch/x86/include/asm/cpufeatures.h    |   1 +
- arch/x86/include/asm/invlpgb.h        |  93 ++++++
- arch/x86/include/asm/mmu.h            |   6 +
- arch/x86/include/asm/mmu_context.h    |  12 +
- arch/x86/include/asm/paravirt.h       |   5 -
- arch/x86/include/asm/paravirt_types.h |   2 -
- arch/x86/include/asm/tlbbatch.h       |   1 +
- arch/x86/include/asm/tlbflush.h       |  31 +-
- arch/x86/kernel/cpu/amd.c             |  16 ++
- arch/x86/kernel/kvm.c                 |   1 -
- arch/x86/kernel/paravirt.c            |   6 -
- arch/x86/kernel/setup.c               |   4 +
- arch/x86/mm/pgtable.c                 |  16 +-
- arch/x86/mm/tlb.c                     | 393 +++++++++++++++++++++++++-
- arch/x86/xen/mmu_pv.c                 |   1 -
- mm/memory.c                           |   1 -
- mm/mmap.c                             |   2 -
- mm/swap_state.c                       |   1 -
- mm/vma.c                              |   2 -
- 21 files changed, 541 insertions(+), 56 deletions(-)
+ arch/x86/Kconfig                       |   2 +-
+ arch/x86/Kconfig.cpu                   |   5 +
+ arch/x86/hyperv/mmu.c                  |   1 -
+ arch/x86/include/asm/cpufeatures.h     |   1 +
+ arch/x86/include/asm/invlpgb.h         | 103 +++++
+ arch/x86/include/asm/mmu.h             |   6 +
+ arch/x86/include/asm/mmu_context.h     |  14 +
+ arch/x86/include/asm/msr-index.h       |   2 +
+ arch/x86/include/asm/paravirt.h        |   5 -
+ arch/x86/include/asm/paravirt_types.h  |   2 -
+ arch/x86/include/asm/tlbbatch.h        |   1 +
+ arch/x86/include/asm/tlbflush.h        |  92 ++++-
+ arch/x86/kernel/cpu/amd.c              |  12 +
+ arch/x86/kernel/kvm.c                  |   1 -
+ arch/x86/kernel/paravirt.c             |   6 -
+ arch/x86/mm/pgtable.c                  |  16 +-
+ arch/x86/mm/tlb.c                      | 496 +++++++++++++++++++++++--
+ arch/x86/xen/mmu_pv.c                  |   1 -
+ mm/memory.c                            |   1 -
+ mm/mmap.c                              |   2 -
+ mm/swap_state.c                        |   1 -
+ mm/vma.c                               |   2 -
+ tools/arch/x86/include/asm/msr-index.h |   2 +
+ 23 files changed, 695 insertions(+), 79 deletions(-)
  create mode 100644 arch/x86/include/asm/invlpgb.h
 
 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 9d7bd0ae48c4..e8743f8c9fd0 100644
+index ef6cfea9df73..1f824dcab4dc 100644
 --- a/arch/x86/Kconfig
 +++ b/arch/x86/Kconfig
-@@ -274,7 +274,7 @@ config X86
+@@ -273,7 +273,7 @@ config X86
  	select HAVE_PCI
  	select HAVE_PERF_REGS
  	select HAVE_PERF_USER_STACK_DUMP
@@ -1047,6 +1049,29 @@ index 9d7bd0ae48c4..e8743f8c9fd0 100644
  	select MMU_GATHER_MERGE_VMAS
  	select HAVE_POSIX_CPU_TIMERS_TASK_WORK
  	select HAVE_REGS_AND_STACK_ACCESS_API
+diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
+index 2a7279d80460..bacdc502903f 100644
+--- a/arch/x86/Kconfig.cpu
++++ b/arch/x86/Kconfig.cpu
+@@ -395,6 +395,10 @@ config X86_VMX_FEATURE_NAMES
+ 	def_bool y
+ 	depends on IA32_FEAT_CTL
+ 
++config X86_BROADCAST_TLB_FLUSH
++	def_bool y
++	depends on CPU_SUP_AMD
++
+ menuconfig PROCESSOR_SELECT
+ 	bool "Supported processor vendors" if EXPERT
+ 	help
+@@ -431,6 +435,7 @@ config CPU_SUP_CYRIX_32
+ config CPU_SUP_AMD
+ 	default y
+ 	bool "Support AMD processors" if PROCESSOR_SELECT
++	select X86_BROADCAST_TLB_FLUSH
+ 	help
+ 	  This enables detection, tunings and quirks for AMD processors
+ 
 diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
 index 1cc113200ff5..cbe6c71e17c1 100644
 --- a/arch/x86/hyperv/mmu.c
@@ -1058,27 +1083,28 @@ index 1cc113200ff5..cbe6c71e17c1 100644
 -	pv_ops.mmu.tlb_remove_table = tlb_remove_table;
  }
 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
-index 645aa360628d..742c138d011a 100644
+index 645aa360628d..989e4c9cad2e 100644
 --- a/arch/x86/include/asm/cpufeatures.h
 +++ b/arch/x86/include/asm/cpufeatures.h
 @@ -338,6 +338,7 @@
  #define X86_FEATURE_CLZERO		(13*32+ 0) /* "clzero" CLZERO instruction */
  #define X86_FEATURE_IRPERF		(13*32+ 1) /* "irperf" Instructions Retired Count */
  #define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */
-+#define X86_FEATURE_INVLPGB		(13*32+ 3) /* "invlpgb" INVLPGB instruction */
++#define X86_FEATURE_INVLPGB		(13*32+ 3) /* INVLPGB and TLBSYNC instruction supported. */
  #define X86_FEATURE_RDPRU		(13*32+ 4) /* "rdpru" Read processor register at user level */
  #define X86_FEATURE_WBNOINVD		(13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */
  #define X86_FEATURE_AMD_IBPB		(13*32+12) /* Indirect Branch Prediction Barrier */
 diff --git a/arch/x86/include/asm/invlpgb.h b/arch/x86/include/asm/invlpgb.h
 new file mode 100644
-index 000000000000..2669ebfffe81
+index 000000000000..418402535319
 --- /dev/null
 +++ b/arch/x86/include/asm/invlpgb.h
-@@ -0,0 +1,93 @@
+@@ -0,0 +1,103 @@
 +/* SPDX-License-Identifier: GPL-2.0 */
 +#ifndef _ASM_X86_INVLPGB
 +#define _ASM_X86_INVLPGB
 +
++#include <linux/kernel.h>
 +#include <vdso/bits.h>
 +
 +/*
@@ -1090,21 +1116,31 @@ index 000000000000..2669ebfffe81
 + * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from
 + * this CPU have completed.
 + */
-+static inline void __invlpgb(unsigned long asid, unsigned long pcid, unsigned long addr,
-+			    int extra_count, bool pmd_stride, unsigned long flags)
++static inline void __invlpgb(unsigned long asid, unsigned long pcid,
++			     unsigned long addr, u16 extra_count,
++			     bool pmd_stride, unsigned long flags)
 +{
-+	u64 rax = addr | flags;
-+	u32 ecx = (pmd_stride << 31) | extra_count;
 +	u32 edx = (pcid << 16) | asid;
++	u32 ecx = (pmd_stride << 31) | extra_count;
++	u64 rax = addr | flags;
++
++	/* INVLPGB; supported in binutils >= 2.36. */
++	asm volatile(".byte 0x0f, 0x01, 0xfe" : : "a" (rax), "c" (ecx), "d" (edx));
++}
 +
-+	asm volatile("invlpgb" : : "a" (rax), "c" (ecx), "d" (edx));
++/* Wait for INVLPGB originated by this CPU to complete. */
++static inline void tlbsync(void)
++{
++	cant_migrate();
++	/* TLBSYNC: supported in binutils >= 0.36. */
++	asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
 +}
 +
 +/*
 + * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
 + * of the three. For example:
 + * - INVLPGB_VA | INVLPGB_INCLUDE_GLOBAL: invalidate all TLB entries at the address
-+ * - INVLPGB_PCID:              	  invalidate all TLB entries matching the PCID
++ * - INVLPGB_PCID:			  invalidate all TLB entries matching the PCID
 + *
 + * The first can be used to invalidate (kernel) mappings at a particular
 + * address across all processes.
@@ -1123,22 +1159,25 @@ index 000000000000..2669ebfffe81
 +				      unsigned long addr)
 +{
 +	__invlpgb(0, pcid, addr, 0, 0, INVLPGB_PCID | INVLPGB_VA);
++	tlbsync();
 +}
 +
-+static inline void invlpgb_flush_user_nr(unsigned long pcid, unsigned long addr,
-+					 int nr, bool pmd_stride)
++static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
++						unsigned long addr,
++						u16 nr,
++						bool pmd_stride,
++						bool freed_tables)
 +{
-+	__invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA | INVLPGB_FINAL_ONLY);
-+}
++	unsigned long flags = INVLPGB_PCID | INVLPGB_VA;
 +
-+/* Flush all mappings for a given ASID, not including globals. */
-+static inline void invlpgb_flush_single_asid(unsigned long asid)
-+{
-+	__invlpgb(asid, 0, 0, 0, 0, INVLPGB_ASID);
++	if (!freed_tables)
++		flags |= INVLPGB_FINAL_ONLY;
++
++	__invlpgb(0, pcid, addr, nr - 1, pmd_stride, flags);
 +}
 +
 +/* Flush all mappings for a given PCID, not including globals. */
-+static inline void invlpgb_flush_single_pcid(unsigned long pcid)
++static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
 +{
 +	__invlpgb(0, pcid, 0, 0, 0, INVLPGB_PCID);
 +}
@@ -1147,10 +1186,11 @@ index 000000000000..2669ebfffe81
 +static inline void invlpgb_flush_all(void)
 +{
 +	__invlpgb(0, 0, 0, 0, 0, INVLPGB_INCLUDE_GLOBAL);
++	tlbsync();
 +}
 +
 +/* Flush addr, including globals, for all PCIDs. */
-+static inline void invlpgb_flush_addr(unsigned long addr, int nr)
++static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
 +{
 +	__invlpgb(0, 0, addr, nr - 1, 0, INVLPGB_INCLUDE_GLOBAL);
 +}
@@ -1159,69 +1199,86 @@ index 000000000000..2669ebfffe81
 +static inline void invlpgb_flush_all_nonglobals(void)
 +{
 +	__invlpgb(0, 0, 0, 0, 0, 0);
-+}
-+
-+/* Wait for INVLPGB originated by this CPU to complete. */
-+static inline void tlbsync(void)
-+{
-+	asm volatile("tlbsync");
++	tlbsync();
 +}
 +
 +#endif /* _ASM_X86_INVLPGB */
 diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
-index ce4677b8b735..83d0986295d3 100644
+index ce4677b8b735..51f25d38de86 100644
 --- a/arch/x86/include/asm/mmu.h
 +++ b/arch/x86/include/asm/mmu.h
-@@ -46,6 +46,12 @@ typedef struct {
- 	unsigned long flags;
+@@ -67,6 +67,12 @@ typedef struct {
+ 	u16 pkey_allocation_map;
+ 	s16 execute_only_pkey;
  #endif
- 
-+#ifdef CONFIG_CPU_SUP_AMD
-+	struct list_head broadcast_asid_list;
-+	u16 broadcast_asid;
++
++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
++	u16 global_asid;
 +	bool asid_transition;
 +#endif
 +
- #ifdef CONFIG_ADDRESS_MASKING
- 	/* Active LAM mode:  X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */
- 	unsigned long lam_cr3_mask;
+ } mm_context_t;
+ 
+ #define INIT_MM_CONTEXT(mm)						\
 diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
-index 2886cb668d7f..2c347b51d9b9 100644
+index 2886cb668d7f..65f50464b5c3 100644
 --- a/arch/x86/include/asm/mmu_context.h
 +++ b/arch/x86/include/asm/mmu_context.h
 @@ -139,6 +139,8 @@ static inline void mm_reset_untag_mask(struct mm_struct *mm)
  #define enter_lazy_tlb enter_lazy_tlb
  extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
  
-+extern void destroy_context_free_broadcast_asid(struct mm_struct *mm);
++extern void destroy_context_free_global_asid(struct mm_struct *mm);
 +
  /*
   * Init a new mm.  Used on mm copies, like at fork()
   * and on mm's that are brand-new, like at execve().
-@@ -160,6 +162,13 @@ static inline int init_new_context(struct task_struct *tsk,
+@@ -160,6 +162,14 @@ static inline int init_new_context(struct task_struct *tsk,
  		mm->context.execute_only_pkey = -1;
  	}
  #endif
 +
-+#ifdef CONFIG_CPU_SUP_AMD
-+	INIT_LIST_HEAD(&mm->context.broadcast_asid_list);
-+	mm->context.broadcast_asid = 0;
-+	mm->context.asid_transition = false;
++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
++	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
++		mm->context.global_asid = 0;
++		mm->context.asid_transition = false;
++	}
 +#endif
 +
  	mm_reset_untag_mask(mm);
  	init_new_context_ldt(mm);
  	return 0;
-@@ -169,6 +178,9 @@ static inline int init_new_context(struct task_struct *tsk,
+@@ -169,6 +179,10 @@ static inline int init_new_context(struct task_struct *tsk,
  static inline void destroy_context(struct mm_struct *mm)
  {
  	destroy_context_ldt(mm);
-+#ifdef CONFIG_CPU_SUP_AMD
-+	destroy_context_free_broadcast_asid(mm);
++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
++	if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
++		destroy_context_free_global_asid(mm);
 +#endif
  }
  
  extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index 3ae84c3b8e6d..dc1c1057f26e 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -25,6 +25,7 @@
+ #define _EFER_SVME		12 /* Enable virtualization */
+ #define _EFER_LMSLE		13 /* Long Mode Segment Limit Enable */
+ #define _EFER_FFXSR		14 /* Enable Fast FXSAVE/FXRSTOR */
++#define _EFER_TCE		15 /* Enable Translation Cache Extensions */
+ #define _EFER_AUTOIBRS		21 /* Enable Automatic IBRS */
+ 
+ #define EFER_SCE		(1<<_EFER_SCE)
+@@ -34,6 +35,7 @@
+ #define EFER_SVME		(1<<_EFER_SVME)
+ #define EFER_LMSLE		(1<<_EFER_LMSLE)
+ #define EFER_FFXSR		(1<<_EFER_FFXSR)
++#define EFER_TCE		(1<<_EFER_TCE)
+ #define EFER_AUTOIBRS		(1<<_EFER_AUTOIBRS)
+ 
+ /*
 diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
 index d4eb9e1d61b8..794ba3647c6c 100644
 --- a/arch/x86/include/asm/paravirt.h
@@ -1264,7 +1321,7 @@ index 1ad56eb3e8a8..f9a17edf63ad 100644
  
  #endif /* _ARCH_X86_TLBBATCH_H */
 diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index 69e79fff41b8..a2f9b7370717 100644
+index 69e79fff41b8..5490ca71e27f 100644
 --- a/arch/x86/include/asm/tlbflush.h
 +++ b/arch/x86/include/asm/tlbflush.h
 @@ -10,6 +10,7 @@
@@ -1275,39 +1332,100 @@ index 69e79fff41b8..a2f9b7370717 100644
  #include <asm/invpcid.h>
  #include <asm/pti.h>
  #include <asm/processor-flags.h>
-@@ -64,6 +65,23 @@ static inline void cr4_clear_bits(unsigned long mask)
-  */
- #define TLB_NR_DYN_ASIDS	6
+@@ -183,6 +184,13 @@ static inline void cr4_init_shadow(void)
+ extern unsigned long mmu_cr4_features;
+ extern u32 *trampoline_cr4_features;
+ 
++/* How many pages can we invalidate with one INVLPGB. */
++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
++extern u16 invlpgb_count_max;
++#else
++#define invlpgb_count_max 1
++#endif
++
+ extern void initialize_tlbstate_and_flush(void);
  
-+#ifdef CONFIG_CPU_SUP_AMD
-+#define is_dyn_asid(asid) (asid) < TLB_NR_DYN_ASIDS
-+#define is_broadcast_asid(asid) (asid) >= TLB_NR_DYN_ASIDS
-+#define in_asid_transition(info) (info->mm && info->mm->context.asid_transition)
-+#define mm_broadcast_asid(mm) (mm->context.broadcast_asid)
+ /*
+@@ -230,6 +238,78 @@ void flush_tlb_one_kernel(unsigned long addr);
+ void flush_tlb_multi(const struct cpumask *cpumask,
+ 		      const struct flush_tlb_info *info);
+ 
++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
++static inline bool is_dyn_asid(u16 asid)
++{
++	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
++		return true;
++
++	return asid < TLB_NR_DYN_ASIDS;
++}
++
++static inline bool is_global_asid(u16 asid)
++{
++	return !is_dyn_asid(asid);
++}
++
++static inline bool in_asid_transition(const struct flush_tlb_info *info)
++{
++	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
++		return false;
++
++	return info->mm && READ_ONCE(info->mm->context.asid_transition);
++}
++
++static inline u16 mm_global_asid(struct mm_struct *mm)
++{
++	u16 asid;
++
++	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
++		return 0;
++
++	asid = READ_ONCE(mm->context.global_asid);
++
++	/* mm->context.global_asid is either 0, or a global ASID */
++	VM_WARN_ON_ONCE(is_dyn_asid(asid));
++
++	return asid;
++}
 +#else
-+#define is_dyn_asid(asid) true
-+#define is_broadcast_asid(asid) false
-+#define in_asid_transition(info) false
-+#define mm_broadcast_asid(mm) 0
++static inline bool is_dyn_asid(u16 asid)
++{
++	return true;
++}
++
++static inline bool is_global_asid(u16 asid)
++{
++	return false;
++}
 +
-+inline bool needs_broadcast_asid_reload(struct mm_struct *next, u16 prev_asid)
++static inline bool in_asid_transition(const struct flush_tlb_info *info)
 +{
 +	return false;
 +}
++
++static inline u16 mm_global_asid(struct mm_struct *mm)
++{
++	return 0;
++}
++
++static inline bool needs_global_asid_reload(struct mm_struct *next, u16 prev_asid)
++{
++	return false;
++}
++
++static inline void broadcast_tlb_flush(struct flush_tlb_info *info)
++{
++	VM_WARN_ON_ONCE(1);
++}
++
++static inline void consider_global_asid(struct mm_struct *mm)
++{
++}
 +#endif
 +
- struct tlb_context {
- 	u64 ctx_id;
- 	u64 tlb_gen;
-@@ -182,6 +200,7 @@ static inline void cr4_init_shadow(void)
- 
- extern unsigned long mmu_cr4_features;
- extern u32 *trampoline_cr4_features;
-+extern u16 invlpgb_count_max;
- 
- extern void initialize_tlbstate_and_flush(void);
- 
-@@ -277,21 +296,15 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+ #ifdef CONFIG_PARAVIRT
+ #include <asm/paravirt.h>
+ #endif
+@@ -277,21 +357,15 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
  	return atomic64_inc_return(&mm->context.tlb_gen);
  }
  
@@ -1333,28 +1451,38 @@ index 69e79fff41b8..a2f9b7370717 100644
  static inline bool pte_flags_need_flush(unsigned long oldflags,
  					unsigned long newflags,
 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
-index 79d2e17f6582..4dc42705aaca 100644
+index 79d2e17f6582..21076252a491 100644
 --- a/arch/x86/kernel/cpu/amd.c
 +++ b/arch/x86/kernel/cpu/amd.c
-@@ -1135,6 +1135,22 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
- 		tlb_lli_2m[ENTRIES] = eax & mask;
+@@ -29,6 +29,8 @@
  
- 	tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
+ #include "cpu.h"
+ 
++u16 invlpgb_count_max __ro_after_init;
 +
-+	if (c->extended_cpuid_level < 0x80000008)
-+		return;
+ static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
+ {
+ 	u32 gprs[8] = { 0 };
+@@ -1069,6 +1071,10 @@ static void init_amd(struct cpuinfo_x86 *c)
+ 
+ 	/* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */
+ 	clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
 +
-+	cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
++	/* Enable Translation Cache Extension */
++	if (cpu_feature_enabled(X86_FEATURE_TCE))
++		msr_set_bit(MSR_EFER, _EFER_TCE);
+ }
+ 
+ #ifdef CONFIG_X86_32
+@@ -1135,6 +1141,12 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
+ 		tlb_lli_2m[ENTRIES] = eax & mask;
+ 
+ 	tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
 +
 +	/* Max number of pages INVLPGB can invalidate in one shot */
-+	invlpgb_count_max = (edx & 0xffff) + 1;
-+
-+	/* If supported, enable translation cache extensions (TCE) */
-+	cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
-+	if (ecx & BIT(17)) {
-+		u64 msr = native_read_msr(MSR_EFER);;
-+		msr |= BIT(15);
-+		wrmsrl(MSR_EFER, msr);
++	if (boot_cpu_has(X86_FEATURE_INVLPGB)) {
++		cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
++		invlpgb_count_max = (edx & 0xffff) + 1;
 +	}
  }
  
@@ -1395,21 +1523,6 @@ index fec381533555..c019771e0123 100644
  
  	.mmu.exit_mmap		= paravirt_nop,
  	.mmu.notify_page_enc_status_changed	= paravirt_nop,
-diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
-index f1fea506e20f..6c4d08f8f7b1 100644
---- a/arch/x86/kernel/setup.c
-+++ b/arch/x86/kernel/setup.c
-@@ -138,6 +138,10 @@ __visible unsigned long mmu_cr4_features __ro_after_init;
- __visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE;
- #endif
- 
-+#ifdef CONFIG_CPU_SUP_AMD
-+u16 invlpgb_count_max __ro_after_init;
-+#endif
-+
- #ifdef CONFIG_IMA
- static phys_addr_t ima_kexec_buffer_phys;
- static size_t ima_kexec_buffer_size;
 diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
 index 5745a354a241..3dc4af1f7868 100644
 --- a/arch/x86/mm/pgtable.c
@@ -1465,7 +1578,7 @@ index 5745a354a241..3dc4af1f7868 100644
  #endif	/* CONFIG_PGTABLE_LEVELS > 4 */
  #endif	/* CONFIG_PGTABLE_LEVELS > 3 */
 diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index a2becb85bea7..0080175153ef 100644
+index a2becb85bea7..6449ac701c88 100644
 --- a/arch/x86/mm/tlb.c
 +++ b/arch/x86/mm/tlb.c
 @@ -74,13 +74,15 @@
@@ -1487,120 +1600,136 @@ index a2becb85bea7..0080175153ef 100644
   *         for KPTI each mm has two address spaces and thus needs two
   *         PCID values, but we can still do with a single ASID denomination
   *         for each mm. Corresponds to kPCID + 2048.
-@@ -225,6 +227,18 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
+@@ -225,6 +227,20 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
  		return;
  	}
  
 +	/*
-+	 * TLB consistency for this ASID is maintained with INVLPGB;
-+	 * TLB flushes happen even while the process isn't running.
++	 * TLB consistency for global ASIDs is maintained with broadcast TLB
++	 * flushing. The TLB is never outdated, and does not need flushing.
 +	 */
-+#ifdef CONFIG_CPU_SUP_AMD
-+	if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_broadcast_asid(next)) {
-+		*new_asid = mm_broadcast_asid(next);
-+		*need_flush = false;
-+		return;
++	if (IS_ENABLED(CONFIG_X86_BROADCAST_TLB_FLUSH) && static_cpu_has(X86_FEATURE_INVLPGB)) {
++		u16 global_asid = mm_global_asid(next);
++
++		if (global_asid) {
++			*new_asid = global_asid;
++			*need_flush = false;
++			return;
++		}
 +	}
-+#endif
 +
  	if (this_cpu_read(cpu_tlbstate.invalidate_other))
  		clear_asid_other();
  
-@@ -251,6 +265,245 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
+@@ -251,6 +267,290 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
  	*need_flush = true;
  }
  
-+#ifdef CONFIG_CPU_SUP_AMD
++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
 +/*
-+ * Logic for AMD INVLPGB support.
++ * Logic for broadcast TLB invalidation.
 + */
-+static DEFINE_RAW_SPINLOCK(broadcast_asid_lock);
-+static u16 last_broadcast_asid = TLB_NR_DYN_ASIDS;
-+static DECLARE_BITMAP(broadcast_asid_used, MAX_ASID_AVAILABLE) = { 0 };
-+static LIST_HEAD(broadcast_asid_list);
-+static int broadcast_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1;
++static DEFINE_RAW_SPINLOCK(global_asid_lock);
++static u16 last_global_asid = MAX_ASID_AVAILABLE;
++static DECLARE_BITMAP(global_asid_used, MAX_ASID_AVAILABLE) = { 0 };
++static DECLARE_BITMAP(global_asid_freed, MAX_ASID_AVAILABLE) = { 0 };
++static int global_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1;
 +
-+static void reset_broadcast_asid_space(void)
++static void reset_global_asid_space(void)
 +{
-+	mm_context_t *context;
-+
-+	lockdep_assert_held(&broadcast_asid_lock);
++	lockdep_assert_held(&global_asid_lock);
 +
 +	/*
-+	 * Flush once when we wrap around the ASID space, so we won't need
-+	 * to flush every time we allocate an ASID for boradcast flushing.
++	 * A global TLB flush guarantees that any stale entries from
++	 * previously freed global ASIDs get flushed from the TLB
++	 * everywhere, making these global ASIDs safe to reuse.
 +	 */
 +	invlpgb_flush_all_nonglobals();
-+	tlbsync();
 +
 +	/*
-+	 * Leave the currently used broadcast ASIDs set in the bitmap, since
-+	 * those cannot be reused before the next wraparound and flush..
++	 * Clear all the previously freed global ASIDs from the
++	 * broadcast_asid_used bitmap, now that the global TLB flush
++	 * has made them actually available for re-use.
 +	 */
-+	bitmap_clear(broadcast_asid_used, 0, MAX_ASID_AVAILABLE);
-+	list_for_each_entry(context, &broadcast_asid_list, broadcast_asid_list)
-+		__set_bit(context->broadcast_asid, broadcast_asid_used);
++	bitmap_andnot(global_asid_used, global_asid_used,
++			global_asid_freed, MAX_ASID_AVAILABLE);
++	bitmap_clear(global_asid_freed, 0, MAX_ASID_AVAILABLE);
 +
-+	last_broadcast_asid = TLB_NR_DYN_ASIDS;
++	/*
++	 * ASIDs 0-TLB_NR_DYN_ASIDS are used for CPU-local ASID
++	 * assignments, for tasks doing IPI based TLB shootdowns.
++	 * Restart the search from the start of the global ASID space.
++	 */
++	last_global_asid = TLB_NR_DYN_ASIDS;
 +}
 +
-+static u16 get_broadcast_asid(void)
++static u16 get_global_asid(void)
 +{
-+	lockdep_assert_held(&broadcast_asid_lock);
++	lockdep_assert_held(&global_asid_lock);
 +
 +	do {
-+		u16 start = last_broadcast_asid;
-+		u16 asid = find_next_zero_bit(broadcast_asid_used, MAX_ASID_AVAILABLE, start);
++		u16 start = last_global_asid;
++		u16 asid = find_next_zero_bit(global_asid_used, MAX_ASID_AVAILABLE, start);
 +
 +		if (asid >= MAX_ASID_AVAILABLE) {
-+			reset_broadcast_asid_space();
++			reset_global_asid_space();
 +			continue;
 +		}
 +
-+		/* Try claiming this broadcast ASID. */
-+		if (!test_and_set_bit(asid, broadcast_asid_used)) {
-+			last_broadcast_asid = asid;
-+			return asid;
-+		}
++		/* Claim this global ASID. */
++		__set_bit(asid, global_asid_used);
++		last_global_asid = asid;
++		global_asid_available--;
++		return asid;
 +	} while (1);
 +}
 +
 +/*
-+ * Returns true if the mm is transitioning from a CPU-local ASID to a broadcast
++ * Returns true if the mm is transitioning from a CPU-local ASID to a global
 + * (INVLPGB) ASID, or the other way around.
 + */
-+static bool needs_broadcast_asid_reload(struct mm_struct *next, u16 prev_asid)
++static bool needs_global_asid_reload(struct mm_struct *next, u16 prev_asid)
 +{
-+	u16 broadcast_asid = mm_broadcast_asid(next);
++	u16 global_asid = mm_global_asid(next);
 +
-+	if (broadcast_asid && prev_asid != broadcast_asid)
++	if (global_asid && prev_asid != global_asid)
 +		return true;
 +
-+	if (!broadcast_asid && is_broadcast_asid(prev_asid))
++	if (!global_asid && is_global_asid(prev_asid))
 +		return true;
 +
 +	return false;
 +}
 +
-+void destroy_context_free_broadcast_asid(struct mm_struct *mm)
++void destroy_context_free_global_asid(struct mm_struct *mm)
 +{
-+	if (!mm->context.broadcast_asid)
++	if (!mm->context.global_asid)
 +		return;
 +
-+	guard(raw_spinlock_irqsave)(&broadcast_asid_lock);
-+	mm->context.broadcast_asid = 0;
-+	list_del(&mm->context.broadcast_asid_list);
-+	broadcast_asid_available++;
++	guard(raw_spinlock_irqsave)(&global_asid_lock);
++
++	/* The global ASID can be re-used only after flush at wrap-around. */
++	__set_bit(mm->context.global_asid, global_asid_freed);
++
++	mm->context.global_asid = 0;
++	global_asid_available++;
 +}
 +
++/*
++ * Check whether a process is currently active on more than "threshold" CPUs.
++ * This is a cheap estimation on whether or not it may make sense to assign
++ * a global ASID to this process, and use broadcast TLB invalidation.
++ */
 +static bool mm_active_cpus_exceeds(struct mm_struct *mm, int threshold)
 +{
 +	int count = 0;
 +	int cpu;
 +
++	/* This quick check should eliminate most single threaded programs. */
 +	if (cpumask_weight(mm_cpumask(mm)) <= threshold)
 +		return false;
 +
++	/* Slower check to make sure. */
 +	for_each_cpu(cpu, mm_cpumask(mm)) {
 +		/* Skip the CPUs that aren't really running this process. */
 +		if (per_cpu(cpu_tlbstate.loaded_mm, cpu) != mm)
@@ -1616,40 +1745,56 @@ index a2becb85bea7..0080175153ef 100644
 +}
 +
 +/*
-+ * Assign a broadcast ASID to the current process, protecting against
++ * Assign a global ASID to the current process, protecting against
 + * races between multiple threads in the process.
 + */
-+static void use_broadcast_asid(struct mm_struct *mm)
++static void use_global_asid(struct mm_struct *mm)
 +{
-+	guard(raw_spinlock_irqsave)(&broadcast_asid_lock);
++	guard(raw_spinlock_irqsave)(&global_asid_lock);
 +
 +	/* This process is already using broadcast TLB invalidation. */
-+	if (mm->context.broadcast_asid)
++	if (mm->context.global_asid)
++		return;
++
++	/* The last global ASID was consumed while waiting for the lock. */
++	if (!global_asid_available)
 +		return;
 +
-+	mm->context.broadcast_asid = get_broadcast_asid();
-+	mm->context.asid_transition = true;
-+	list_add(&mm->context.broadcast_asid_list, &broadcast_asid_list);
-+	broadcast_asid_available--;
++	/*
++	 * The transition from IPI TLB flushing, with a dynamic ASID,
++	 * and broadcast TLB flushing, using a global ASID, uses memory
++	 * ordering for synchronization.
++	 *
++	 * While the process has threads still using a dynamic ASID,
++	 * TLB invalidation IPIs continue to get sent.
++	 *
++	 * This code sets asid_transition first, before assigning the
++	 * global ASID.
++	 *
++	 * The TLB flush code will only verify the ASID transition
++	 * after it has seen the new global ASID for the process.
++	 */
++	WRITE_ONCE(mm->context.asid_transition, true);
++	WRITE_ONCE(mm->context.global_asid, get_global_asid());
 +}
 +
 +/*
-+ * Figure out whether to assign a broadcast (global) ASID to a process.
-+ * We vary the threshold by how empty or full broadcast ASID space is.
++ * Figure out whether to assign a global ASID to a process.
++ * We vary the threshold by how empty or full global ASID space is.
 + * 1/4 full: >= 4 active threads
 + * 1/2 full: >= 8 active threads
 + * 3/4 full: >= 16 active threads
 + * 7/8 full: >= 32 active threads
 + * etc
 + *
-+ * This way we should never exhaust the broadcast ASID space, even on very
++ * This way we should never exhaust the global ASID space, even on very
 + * large systems, and the processes with the largest number of active
 + * threads should be able to use broadcast TLB invalidation.
 + */
 +#define HALFFULL_THRESHOLD 8
-+static bool meets_broadcast_asid_threshold(struct mm_struct *mm)
++static bool meets_global_asid_threshold(struct mm_struct *mm)
 +{
-+	int avail = broadcast_asid_available;
++	int avail = global_asid_available;
 +	int threshold = HALFFULL_THRESHOLD;
 +
 +	if (!avail)
@@ -1669,7 +1814,7 @@ index a2becb85bea7..0080175153ef 100644
 +	return mm_active_cpus_exceeds(mm, threshold);
 +}
 +
-+static void count_tlb_flush(struct mm_struct *mm)
++static void consider_global_asid(struct mm_struct *mm)
 +{
 +	if (!static_cpu_has(X86_FEATURE_INVLPGB))
 +		return;
@@ -1678,43 +1823,54 @@ index a2becb85bea7..0080175153ef 100644
 +	if ((current->pid & 0x1f) != (jiffies & 0x1f))
 +		return;
 +
-+	if (meets_broadcast_asid_threshold(mm))
-+		use_broadcast_asid(mm);
++	if (meets_global_asid_threshold(mm))
++		use_global_asid(mm);
 +}
 +
 +static void finish_asid_transition(struct flush_tlb_info *info)
 +{
 +	struct mm_struct *mm = info->mm;
-+	int bc_asid = mm_broadcast_asid(mm);
++	int bc_asid = mm_global_asid(mm);
 +	int cpu;
 +
-+	if (!mm->context.asid_transition)
++	if (!READ_ONCE(mm->context.asid_transition))
 +		return;
 +
 +	for_each_cpu(cpu, mm_cpumask(mm)) {
++		/*
++		 * The remote CPU is context switching. Wait for that to
++		 * finish, to catch the unlikely case of it switching to
++		 * the target mm with an out of date ASID.
++		 */
++		while (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) == LOADED_MM_SWITCHING)
++			cpu_relax();
++
 +		if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) != mm)
 +			continue;
 +
 +		/*
-+		 * If at least one CPU is not using the broadcast ASID yet,
++		 * If at least one CPU is not using the global ASID yet,
 +		 * send a TLB flush IPI. The IPI should cause stragglers
 +		 * to transition soon.
++		 *
++		 * This can race with the CPU switching to another task;
++		 * that results in a (harmless) extra IPI.
 +		 */
-+		if (per_cpu(cpu_tlbstate.loaded_mm_asid, cpu) != bc_asid) {
++		if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm_asid, cpu)) != bc_asid) {
 +			flush_tlb_multi(mm_cpumask(info->mm), info);
 +			return;
 +		}
 +	}
 +
-+	/* All the CPUs running this process are using the broadcast ASID. */
-+	mm->context.asid_transition = 0;
++	/* All the CPUs running this process are using the global ASID. */
++	WRITE_ONCE(mm->context.asid_transition, false);
 +}
 +
 +static void broadcast_tlb_flush(struct flush_tlb_info *info)
 +{
 +	bool pmd = info->stride_shift == PMD_SHIFT;
 +	unsigned long maxnr = invlpgb_count_max;
-+	unsigned long asid = info->mm->context.broadcast_asid;
++	unsigned long asid = info->mm->context.global_asid;
 +	unsigned long addr = info->start;
 +	unsigned long nr;
 +
@@ -1722,12 +1878,17 @@ index a2becb85bea7..0080175153ef 100644
 +	if (info->stride_shift > PMD_SHIFT)
 +		maxnr = 1;
 +
-+	if (info->end == TLB_FLUSH_ALL || info->freed_tables) {
-+		invlpgb_flush_single_pcid(kern_pcid(asid));
++	/*
++	 * TLB flushes with INVLPGB are kicked off asynchronously.
++	 * The inc_mm_tlb_gen() guarantees page table updates are done
++	 * before these TLB flushes happen.
++	 */
++	if (info->end == TLB_FLUSH_ALL) {
++		invlpgb_flush_single_pcid_nosync(kern_pcid(asid));
 +		/* Do any CPUs supporting INVLPGB need PTI? */
 +		if (static_cpu_has(X86_FEATURE_PTI))
-+			invlpgb_flush_single_pcid(user_pcid(asid));
-+	} else do {
++			invlpgb_flush_single_pcid_nosync(user_pcid(asid));
++	} else for (; addr < info->end; addr += nr << info->stride_shift) {
 +		/*
 +		 * Calculate how many pages can be flushed at once; if the
 +		 * remainder of the range is less than one page, flush one.
@@ -1735,43 +1896,42 @@ index a2becb85bea7..0080175153ef 100644
 +		nr = min(maxnr, (info->end - addr) >> info->stride_shift);
 +		nr = max(nr, 1);
 +
-+		invlpgb_flush_user_nr(kern_pcid(asid), addr, nr, pmd);
++		invlpgb_flush_user_nr_nosync(kern_pcid(asid), addr, nr, pmd, info->freed_tables);
 +		/* Do any CPUs supporting INVLPGB need PTI? */
 +		if (static_cpu_has(X86_FEATURE_PTI))
-+			invlpgb_flush_user_nr(user_pcid(asid), addr, nr, pmd);
-+		addr += nr << info->stride_shift;
-+	} while (addr < info->end);
++			invlpgb_flush_user_nr_nosync(user_pcid(asid), addr, nr, pmd, info->freed_tables);
++	}
 +
 +	finish_asid_transition(info);
 +
 +	/* Wait for the INVLPGBs kicked off above to finish. */
 +	tlbsync();
 +}
-+#endif /* CONFIG_CPU_SUP_AMD */
++#endif /* CONFIG_X86_BROADCAST_TLB_FLUSH */
 +
  /*
   * Given an ASID, flush the corresponding user ASID.  We can delay this
   * until the next time we switch to it.
-@@ -556,8 +809,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
+@@ -556,8 +856,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
  	 */
  	if (prev == next) {
  		/* Not actually switching mm's */
 -		VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
 -			   next->context.ctx_id);
-+		if (is_dyn_asid(prev_asid))
-+			VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
-+				   next->context.ctx_id);
++		VM_WARN_ON(is_dyn_asid(prev_asid) &&
++				this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
++				next->context.ctx_id);
  
  		/*
  		 * If this races with another thread that enables lam, 'new_lam'
-@@ -573,6 +827,23 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
+@@ -573,6 +874,23 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
  				 !cpumask_test_cpu(cpu, mm_cpumask(next))))
  			cpumask_set_cpu(cpu, mm_cpumask(next));
  
 +		/*
 +		 * Check if the current mm is transitioning to a new ASID.
 +		 */
-+		if (needs_broadcast_asid_reload(next, prev_asid)) {
++		if (needs_global_asid_reload(next, prev_asid)) {
 +			next_tlb_gen = atomic64_read(&next->context.tlb_gen);
 +
 +			choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
@@ -1782,24 +1942,44 @@ index a2becb85bea7..0080175153ef 100644
 +		 * Broadcast TLB invalidation keeps this PCID up to date
 +		 * all the time.
 +		 */
-+		if (is_broadcast_asid(prev_asid))
++		if (is_global_asid(prev_asid))
 +			return;
 +
  		/*
  		 * If the CPU is not in lazy TLB mode, we are just switching
  		 * from one thread in a process to another thread in the same
-@@ -629,8 +900,10 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
- 		barrier();
+@@ -606,6 +924,13 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
+ 		 */
+ 		cond_mitigation(tsk);
+ 
++		/*
++		 * Let nmi_uaccess_okay() and finish_asid_transition()
++		 * know that we're changing CR3.
++		 */
++		this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
++		barrier();
++
+ 		/*
+ 		 * Stop remote flushes for the previous mm.
+ 		 * Skip kernel threads; we never send init_mm TLB flushing IPIs,
+@@ -623,14 +948,12 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
+ 		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
+ 
+ 		choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+-
+-		/* Let nmi_uaccess_okay() know that we're changing CR3. */
+-		this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
+-		barrier();
  	}
  
 +reload_tlb:
  	new_lam = mm_lam_cr3_mask(next);
  	if (need_flush) {
-+		VM_BUG_ON(is_broadcast_asid(new_asid));
++		VM_WARN_ON_ONCE(is_global_asid(new_asid));
  		this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
  		this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
  		load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
-@@ -749,7 +1022,7 @@ static void flush_tlb_func(void *info)
+@@ -749,7 +1072,7 @@ static void flush_tlb_func(void *info)
  	const struct flush_tlb_info *f = info;
  	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
  	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
@@ -1808,24 +1988,24 @@ index a2becb85bea7..0080175153ef 100644
  	bool local = smp_processor_id() == f->initiating_cpu;
  	unsigned long nr_invalidate = 0;
  	u64 mm_tlb_gen;
-@@ -769,6 +1042,16 @@ static void flush_tlb_func(void *info)
+@@ -769,6 +1092,16 @@ static void flush_tlb_func(void *info)
  	if (unlikely(loaded_mm == &init_mm))
  		return;
  
-+	/* Reload the ASID if transitioning into or out of a broadcast ASID */
-+	if (needs_broadcast_asid_reload(loaded_mm, loaded_mm_asid)) {
++	/* Reload the ASID if transitioning into or out of a global ASID */
++	if (needs_global_asid_reload(loaded_mm, loaded_mm_asid)) {
 +		switch_mm_irqs_off(NULL, loaded_mm, NULL);
 +		loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
 +	}
 +
 +	/* Broadcast ASIDs are always kept up to date with INVLPGB. */
-+	if (is_broadcast_asid(loaded_mm_asid))
++	if (is_global_asid(loaded_mm_asid))
 +		return;
 +
  	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
  		   loaded_mm->context.ctx_id);
  
-@@ -786,6 +1069,8 @@ static void flush_tlb_func(void *info)
+@@ -786,6 +1119,8 @@ static void flush_tlb_func(void *info)
  		return;
  	}
  
@@ -1834,32 +2014,7 @@ index a2becb85bea7..0080175153ef 100644
  	if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID &&
  		     f->new_tlb_gen <= local_tlb_gen)) {
  		/*
-@@ -825,7 +1110,7 @@ static void flush_tlb_func(void *info)
- 	 *
- 	 * The only question is whether to do a full or partial flush.
- 	 *
--	 * We do a partial flush if requested and two extra conditions
-+	 * We do a partial flush if requested and three extra conditions
- 	 * are met:
- 	 *
- 	 * 1. f->new_tlb_gen == local_tlb_gen + 1.  We have an invariant that
-@@ -852,10 +1137,14 @@ static void flush_tlb_func(void *info)
- 	 *    date.  By doing a full flush instead, we can increase
- 	 *    local_tlb_gen all the way to mm_tlb_gen and we can probably
- 	 *    avoid another flush in the very near future.
-+	 *
-+	 * 3. No page tables were freed. If page tables were freed, a full
-+	 *    flush ensures intermediate translations in the TLB get flushed.
- 	 */
- 	if (f->end != TLB_FLUSH_ALL &&
- 	    f->new_tlb_gen == local_tlb_gen + 1 &&
--	    f->new_tlb_gen == mm_tlb_gen) {
-+	    f->new_tlb_gen == mm_tlb_gen &&
-+	    !f->freed_tables) {
- 		/* Partial flush */
- 		unsigned long addr = f->start;
- 
-@@ -926,7 +1215,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask,
+@@ -926,7 +1261,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask,
  	 * up on the new contents of what used to be page tables, while
  	 * doing a speculative memory access.
  	 */
@@ -1868,102 +2023,155 @@ index a2becb85bea7..0080175153ef 100644
  		on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
  	else
  		on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func,
-@@ -998,14 +1287,18 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+@@ -981,6 +1316,15 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
+ 	info->new_tlb_gen	= new_tlb_gen;
+ 	info->initiating_cpu	= smp_processor_id();
+ 
++	/*
++	 * If the number of flushes is so large that a full flush
++	 * would be faster, do a full flush.
++	 */
++	if ((end - start) >> stride_shift > tlb_single_page_flush_ceiling) {
++		info->start = 0;
++		info->end = TLB_FLUSH_ALL;
++	}
++
+ 	return info;
+ }
+ 
+@@ -998,17 +1342,8 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
  				bool freed_tables)
  {
  	struct flush_tlb_info *info;
-+	unsigned long threshold = tlb_single_page_flush_ceiling;
++	int cpu = get_cpu();
  	u64 new_tlb_gen;
- 	int cpu;
- 
-+	if (static_cpu_has(X86_FEATURE_INVLPGB))
-+		threshold *= invlpgb_count_max;
-+
- 	cpu = get_cpu();
- 
- 	/* Should we flush just the requested range? */
- 	if ((end == TLB_FLUSH_ALL) ||
+-	int cpu;
+-
+-	cpu = get_cpu();
+-
+-	/* Should we flush just the requested range? */
+-	if ((end == TLB_FLUSH_ALL) ||
 -	    ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) {
-+	    ((end - start) >> stride_shift) > threshold) {
- 		start = 0;
- 		end = TLB_FLUSH_ALL;
- 	}
-@@ -1021,8 +1314,11 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+-		start = 0;
+-		end = TLB_FLUSH_ALL;
+-	}
+ 
+ 	/* This is also a barrier that synchronizes with switch_mm(). */
+ 	new_tlb_gen = inc_mm_tlb_gen(mm);
+@@ -1021,8 +1356,11 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
  	 * a local TLB flush is needed. Optimize this use-case by calling
  	 * flush_tlb_func_local() directly in this case.
  	 */
 -	if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
-+	if (IS_ENABLED(CONFIG_CPU_SUP_AMD) && mm_broadcast_asid(mm)) {
++	if (mm_global_asid(mm)) {
 +		broadcast_tlb_flush(info);
 +	} else if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
  		flush_tlb_multi(mm_cpumask(mm), info);
-+		count_tlb_flush(mm);
++		consider_global_asid(mm);
  	} else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
  		lockdep_assert_irqs_enabled();
  		local_irq_disable();
-@@ -1045,9 +1341,41 @@ static void do_flush_tlb_all(void *info)
+@@ -1036,6 +1374,19 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ }
+ 
+ 
++static bool broadcast_flush_tlb_all(void)
++{
++	if (!IS_ENABLED(CONFIG_X86_BROADCAST_TLB_FLUSH))
++		return false;
++
++	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
++		return false;
++
++	guard(preempt)();
++	invlpgb_flush_all();
++	return true;
++}
++
+ static void do_flush_tlb_all(void *info)
+ {
+ 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+@@ -1044,10 +1395,36 @@ static void do_flush_tlb_all(void *info)
+ 
  void flush_tlb_all(void)
  {
- 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
-+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
-+		guard(preempt)();
-+		invlpgb_flush_all();
-+		tlbsync();
++	if (broadcast_flush_tlb_all())
 +		return;
-+	}
+ 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
  	on_each_cpu(do_flush_tlb_all, NULL, 1);
  }
  
-+static void broadcast_kernel_range_flush(unsigned long start, unsigned long end)
++static bool broadcast_kernel_range_flush(struct flush_tlb_info *info)
 +{
 +	unsigned long addr;
-+	unsigned long maxnr = invlpgb_count_max;
-+	unsigned long threshold = tlb_single_page_flush_ceiling * maxnr;
++	unsigned long nr;
 +
-+	/*
-+	 * TLBSYNC only waits for flushes originating on the same CPU.
-+	 * Disabling migration allows us to wait on all flushes.
-+	 */
-+	guard(preempt)();
++	if (!IS_ENABLED(CONFIG_X86_BROADCAST_TLB_FLUSH))
++		return false;
 +
-+	if (end == TLB_FLUSH_ALL ||
-+	    (end - start) > threshold << PAGE_SHIFT) {
++	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
++		return false;
++
++	if (info->end == TLB_FLUSH_ALL) {
 +		invlpgb_flush_all();
-+	} else {
-+		unsigned long nr;
-+		for (addr = start; addr < end; addr += nr << PAGE_SHIFT) {
-+			nr = min((end - addr) >> PAGE_SHIFT, maxnr);
-+			invlpgb_flush_addr(addr, nr);
-+		}
++		return true;
 +	}
 +
++	for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) {
++		nr = min((info->end - addr) >> PAGE_SHIFT, invlpgb_count_max);
++		invlpgb_flush_addr_nosync(addr, nr);
++	}
 +	tlbsync();
++	return true;
 +}
 +
  static void do_kernel_range_flush(void *info)
  {
  	struct flush_tlb_info *f = info;
-@@ -1060,6 +1388,11 @@ static void do_kernel_range_flush(void *info)
+@@ -1060,22 +1437,21 @@ static void do_kernel_range_flush(void *info)
  
  void flush_tlb_kernel_range(unsigned long start, unsigned long end)
  {
-+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
-+		broadcast_kernel_range_flush(start, end);
-+		return;
-+	}
+-	/* Balance as user space task's flush, a bit conservative */
+-	if (end == TLB_FLUSH_ALL ||
+-	    (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
+-		on_each_cpu(do_flush_tlb_all, NULL, 1);
+-	} else {
+-		struct flush_tlb_info *info;
++	struct flush_tlb_info *info;
+ 
+-		preempt_disable();
+-		info = get_flush_tlb_info(NULL, start, end, 0, false,
+-					  TLB_GENERATION_INVALID);
++	guard(preempt)();
+ 
++	info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
++				  TLB_GENERATION_INVALID);
 +
- 	/* Balance as user space task's flush, a bit conservative */
- 	if (end == TLB_FLUSH_ALL ||
- 	    (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
-@@ -1244,7 +1577,6 @@ EXPORT_SYMBOL_GPL(__flush_tlb_all);
- void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
- {
- 	struct flush_tlb_info *info;
--
++	if (broadcast_kernel_range_flush(info))
++		; /* Fall through. */
++	else if (info->end == TLB_FLUSH_ALL)
++		on_each_cpu(do_flush_tlb_all, NULL, 1);
++	else
+ 		on_each_cpu(do_kernel_range_flush, info, 1);
+ 
+-		put_flush_tlb_info();
+-		preempt_enable();
+-	}
++	put_flush_tlb_info();
+ }
+ 
+ /*
+@@ -1247,7 +1623,7 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+ 
  	int cpu = get_cpu();
  
- 	info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
-@@ -1263,12 +1595,49 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+-	info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
++	info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, PAGE_SHIFT, false,
+ 				  TLB_GENERATION_INVALID);
+ 	/*
+ 	 * flush_tlb_multi() is not optimized for the common case in which only
+@@ -1263,12 +1639,62 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
  		local_irq_enable();
  	}
  
@@ -1988,8 +2196,8 @@ index a2becb85bea7..0080175153ef 100644
 +					     struct mm_struct *mm,
 +					     unsigned long uaddr)
 +{
-+	if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_broadcast_asid(mm)) {
-+		u16 asid = mm_broadcast_asid(mm);
++	if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_global_asid(mm)) {
++		u16 asid = mm_global_asid(mm);
 +		/*
 +		 * Queue up an asynchronous invalidation. The corresponding
 +		 * TLBSYNC is done in arch_tlbbatch_flush(), and must be done
@@ -1999,11 +2207,24 @@ index a2becb85bea7..0080175153ef 100644
 +			batch->used_invlpgb = true;
 +			migrate_disable();
 +		}
-+		invlpgb_flush_user_nr(kern_pcid(asid), uaddr, 1, 0);
++		invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false, false);
 +		/* Do any CPUs supporting INVLPGB need PTI? */
 +		if (static_cpu_has(X86_FEATURE_PTI))
-+			invlpgb_flush_user_nr(user_pcid(asid), uaddr, 1, 0);
++			invlpgb_flush_user_nr_nosync(user_pcid(asid), uaddr, 1, false, false);
++
++		/*
++		 * Some CPUs might still be using a local ASID for this
++		 * process, and require IPIs, while others are using the
++		 * global ASID.
++		 *
++		 * In this corner case we need to do both the broadcast
++		 * TLB invalidation, and send IPIs. The IPIs will help
++		 * stragglers transition to the broadcast ASID.
++		 */
++		if (READ_ONCE(mm->context.asid_transition))
++			goto also_send_ipi;
 +	} else {
++also_send_ipi:
 +		inc_mm_tlb_gen(mm);
 +		cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
 +	}
@@ -2089,12 +2310,32 @@ index bb2119e5a0d0..a593d5edfd88 100644
  	tlb_gather_mmu(&tlb, vms->vma->vm_mm);
  	update_hiwater_rss(vms->vma->vm_mm);
  	unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end,
+diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
+index 3ae84c3b8e6d..dc1c1057f26e 100644
+--- a/tools/arch/x86/include/asm/msr-index.h
++++ b/tools/arch/x86/include/asm/msr-index.h
+@@ -25,6 +25,7 @@
+ #define _EFER_SVME		12 /* Enable virtualization */
+ #define _EFER_LMSLE		13 /* Long Mode Segment Limit Enable */
+ #define _EFER_FFXSR		14 /* Enable Fast FXSAVE/FXRSTOR */
++#define _EFER_TCE		15 /* Enable Translation Cache Extensions */
+ #define _EFER_AUTOIBRS		21 /* Enable Automatic IBRS */
+ 
+ #define EFER_SCE		(1<<_EFER_SCE)
+@@ -34,6 +35,7 @@
+ #define EFER_SVME		(1<<_EFER_SVME)
+ #define EFER_LMSLE		(1<<_EFER_LMSLE)
+ #define EFER_FFXSR		(1<<_EFER_FFXSR)
++#define EFER_TCE		(1<<_EFER_TCE)
+ #define EFER_AUTOIBRS		(1<<_EFER_AUTOIBRS)
+ 
+ /*
 -- 
 2.48.0.rc1
 
-From 9341991abd224336e551e90c7179e2e221fdf466 Mon Sep 17 00:00:00 2001
+From 1fc2e15c0c690b276928953ff73277b4d66e67f3 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:37:35 +0100
+Date: Mon, 20 Jan 2025 13:21:45 +0100
 Subject: [PATCH 03/12] bbr3
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
@@ -5478,9 +5719,9 @@ index b412ed88ccd9..d70f8b742b21 100644
 -- 
 2.48.0.rc1
 
-From 32089eb0a217a8d425f387e5e613d498ad760f34 Mon Sep 17 00:00:00 2001
+From e01619bda1e69eea53c0f3ef61476fb02da06868 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:37:49 +0100
+Date: Mon, 20 Jan 2025 13:21:55 +0100
 Subject: [PATCH 04/12] cachy
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
@@ -5499,12 +5740,23 @@ Signed-off-by: Peter Jung <admin@ptr1337.dev>
  drivers/cpufreq/Kconfig.x86                   |    2 -
  drivers/cpufreq/intel_pstate.c                |    2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu.h           |    1 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c  |   44 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h  |    1 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |    6 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |   10 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c       |   19 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h       |    1 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h      |    1 +
  drivers/gpu/drm/amd/display/Kconfig           |    6 +
- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |    2 +-
+ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   69 +-
+ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |    7 +
  .../amd/display/amdgpu_dm/amdgpu_dm_color.c   |    2 +-
  .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c    |    6 +-
  .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   |    6 +-
+ .../drm/amd/display/dc/bios/bios_parser2.c    |   13 +-
+ .../drm/amd/display/dc/core/dc_link_exports.c |    6 +
+ drivers/gpu/drm/amd/display/dc/dc.h           |    3 +
+ .../dc/resource/dce120/dce120_resource.c      |   17 +
  drivers/gpu/drm/amd/pm/amdgpu_pm.c            |    3 +
  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     |   14 +-
  drivers/input/evdev.c                         |   19 +-
@@ -5546,7 +5798,7 @@ Signed-off-by: Peter Jung <admin@ptr1337.dev>
  mm/vmpressure.c                               |    4 +
  mm/vmscan.c                                   |  143 +
  net/ipv4/inet_connection_sock.c               |    2 +-
- 61 files changed, 6557 insertions(+), 65 deletions(-)
+ 72 files changed, 6714 insertions(+), 93 deletions(-)
  create mode 100644 drivers/media/v4l2-core/v4l2loopback.c
  create mode 100644 drivers/media/v4l2-core/v4l2loopback.h
  create mode 100644 drivers/media/v4l2-core/v4l2loopback_formats.h
@@ -5683,7 +5935,7 @@ index f48eaa98d22d..fc777c14cff6 100644
  unprivileged_userfaultfd
  ========================
 diff --git a/Makefile b/Makefile
-index e20a62ad397f..9a63ab456ffc 100644
+index b9464c88ac72..ea555e6a8bf1 100644
 --- a/Makefile
 +++ b/Makefile
 @@ -860,11 +860,19 @@ KBUILD_CFLAGS	+= -fno-delete-null-pointer-checks
@@ -5707,7 +5959,7 @@ index e20a62ad397f..9a63ab456ffc 100644
  # depends on `opt-level` and `debug-assertions`, respectively.
  KBUILD_RUSTFLAGS += -Cdebug-assertions=$(if $(CONFIG_RUST_DEBUG_ASSERTIONS),y,n)
 diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
-index 2a7279d80460..301ced02b077 100644
+index bacdc502903f..f2c97bdcef58 100644
 --- a/arch/x86/Kconfig.cpu
 +++ b/arch/x86/Kconfig.cpu
 @@ -155,9 +155,8 @@ config MPENTIUM4
@@ -6550,6 +6802,118 @@ index 4653a8d2823a..6590e83dfbf0 100644
  extern int amdgpu_vis_vram_limit;
  extern int amdgpu_gart_size;
  extern int amdgpu_gtt_size;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+index 093141ad6ed0..e476e45b996a 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+@@ -36,13 +36,6 @@
+ #include "atombios_encoders.h"
+ #include "bif/bif_4_1_d.h"
+ 
+-static void amdgpu_atombios_lookup_i2c_gpio_quirks(struct amdgpu_device *adev,
+-					  ATOM_GPIO_I2C_ASSIGMENT *gpio,
+-					  u8 index)
+-{
+-
+-}
+-
+ static struct amdgpu_i2c_bus_rec amdgpu_atombios_get_bus_rec_for_i2c_gpio(ATOM_GPIO_I2C_ASSIGMENT *gpio)
+ {
+ 	struct amdgpu_i2c_bus_rec i2c;
+@@ -108,9 +101,6 @@ struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *
+ 
+ 		gpio = &i2c_info->asGPIO_Info[0];
+ 		for (i = 0; i < num_indices; i++) {
+-
+-			amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i);
+-
+ 			if (gpio->sucI2cId.ucAccess == id) {
+ 				i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
+ 				break;
+@@ -142,8 +132,6 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev)
+ 
+ 		gpio = &i2c_info->asGPIO_Info[0];
+ 		for (i = 0; i < num_indices; i++) {
+-			amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i);
+-
+ 			i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
+ 
+ 			if (i2c.valid) {
+@@ -156,6 +144,38 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev)
+ 	}
+ }
+ 
++void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id)
++{
++	struct atom_context *ctx = adev->mode_info.atom_context;
++	ATOM_GPIO_I2C_ASSIGMENT *gpio;
++	struct amdgpu_i2c_bus_rec i2c;
++	int index = GetIndexIntoMasterTable(DATA, GPIO_I2C_Info);
++	struct _ATOM_GPIO_I2C_INFO *i2c_info;
++	uint16_t data_offset, size;
++	int i, num_indices;
++	char stmp[32];
++
++	if (amdgpu_atom_parse_data_header(ctx, index, &size, NULL, NULL, &data_offset)) {
++		i2c_info = (struct _ATOM_GPIO_I2C_INFO *)(ctx->bios + data_offset);
++
++		num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
++			sizeof(ATOM_GPIO_I2C_ASSIGMENT);
++
++		gpio = &i2c_info->asGPIO_Info[0];
++		for (i = 0; i < num_indices; i++) {
++			i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
++
++			if (i2c.valid && i2c.i2c_id == i2c_id) {
++				sprintf(stmp, "OEM 0x%x", i2c.i2c_id);
++				adev->i2c_bus[i] = amdgpu_i2c_create(adev_to_drm(adev), &i2c, stmp);
++				break;
++			}
++			gpio = (ATOM_GPIO_I2C_ASSIGMENT *)
++				((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT));
++		}
++	}
++}
++
+ struct amdgpu_gpio_rec
+ amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev,
+ 			    u8 id)
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+index 0e16432d9a72..867bc5c5ce67 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+@@ -136,6 +136,7 @@ amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev,
+ struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *adev,
+ 							  uint8_t id);
+ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev);
++void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id);
+ 
+ bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev);
+ 
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index cd4fac120834..1ab433d774cc 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -4461,8 +4461,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ 				goto failed;
+ 			}
+ 			/* init i2c buses */
+-			if (!amdgpu_device_has_dc_support(adev))
+-				amdgpu_atombios_i2c_init(adev);
++			amdgpu_i2c_init(adev);
+ 		}
+ 	}
+ 
+@@ -4724,8 +4723,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
+ 	amdgpu_reset_fini(adev);
+ 
+ 	/* free i2c buses */
+-	if (!amdgpu_device_has_dc_support(adev))
+-		amdgpu_i2c_fini(adev);
++	amdgpu_i2c_fini(adev);
+ 
+ 	if (amdgpu_emu_mode != 1)
+ 		amdgpu_atombios_fini(adev);
 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
 index 38686203bea6..811d020f3f4b 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -6578,6 +6942,60 @@ index 38686203bea6..811d020f3f4b 100644
  /**
   * DOC: vramlimit (int)
   * Restrict the total amount of VRAM in MiB for testing.  The default is 0 (Use full VRAM).
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
+index f0765ccde668..8179d0814db9 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
+@@ -225,6 +225,25 @@ void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c)
+ 	kfree(i2c);
+ }
+ 
++void amdgpu_i2c_init(struct amdgpu_device *adev)
++{
++	if (!adev->is_atom_fw) {
++		if (!amdgpu_device_has_dc_support(adev)) {
++			amdgpu_atombios_i2c_init(adev);
++		} else {
++			switch (adev->asic_type) {
++			case CHIP_POLARIS10:
++			case CHIP_POLARIS11:
++			case CHIP_POLARIS12:
++				amdgpu_atombios_oem_i2c_init(adev, 0x97);
++				break;
++			default:
++				break;
++			}
++		}
++	}
++}
++
+ /* remove all the buses */
+ void amdgpu_i2c_fini(struct amdgpu_device *adev)
+ {
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
+index 21e3d1dad0a1..1d3d3806e0dd 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
+@@ -28,6 +28,7 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev,
+ 					  const struct amdgpu_i2c_bus_rec *rec,
+ 					  const char *name);
+ void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c);
++void amdgpu_i2c_init(struct amdgpu_device *adev);
+ void amdgpu_i2c_fini(struct amdgpu_device *adev);
+ struct amdgpu_i2c_chan *
+ amdgpu_i2c_lookup(struct amdgpu_device *adev,
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+index 5e3faefc5510..6da4f946cac0 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+@@ -609,6 +609,7 @@ struct amdgpu_i2c_adapter {
+ 	struct i2c_adapter base;
+ 
+ 	struct ddc_service *ddc_service;
++	bool oem;
+ };
+ 
+ #define TO_DM_AUX(x) container_of((x), struct amdgpu_dm_dp_aux, aux)
 diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
 index 11e3f2f3b174..7b1bd69dc29e 100644
 --- a/drivers/gpu/drm/amd/display/Kconfig
@@ -6594,10 +7012,73 @@ index 11e3f2f3b174..7b1bd69dc29e 100644
 +
  endmenu
 diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
-index cd16dae534dc..1508978f92dd 100644
+index 5f216d626cbb..382af92c4ff1 100644
 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
 +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
-@@ -4516,7 +4516,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
+@@ -177,6 +177,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev);
+ static void amdgpu_dm_fini(struct amdgpu_device *adev);
+ static bool is_freesync_video_mode(const struct drm_display_mode *mode, struct amdgpu_dm_connector *aconnector);
+ static void reset_freesync_config_for_crtc(struct dm_crtc_state *new_crtc_state);
++static struct amdgpu_i2c_adapter *
++create_i2c(struct ddc_service *ddc_service, bool oem);
+ 
+ static enum drm_mode_subconnector get_subconnector_type(struct dc_link *link)
+ {
+@@ -2839,6 +2841,33 @@ static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev)
+ 	return 0;
+ }
+ 
++static int dm_oem_i2c_hw_init(struct amdgpu_device *adev)
++{
++	struct amdgpu_display_manager *dm = &adev->dm;
++	struct amdgpu_i2c_adapter *oem_i2c;
++	struct ddc_service *oem_ddc_service;
++	int r;
++
++	oem_ddc_service = dc_get_oem_i2c_device(adev->dm.dc);
++	if (oem_ddc_service) {
++		oem_i2c = create_i2c(oem_ddc_service, true);
++		if (!oem_i2c) {
++			dev_info(adev->dev, "Failed to create oem i2c adapter data\n");
++			return -ENOMEM;
++		}
++
++		r = i2c_add_adapter(&oem_i2c->base);
++		if (r) {
++			dev_info(adev->dev, "Failed to register oem i2c\n");
++			kfree(oem_i2c);
++			return r;
++		}
++		dm->oem_i2c = oem_i2c;
++	}
++
++	return 0;
++}
++
+ /**
+  * dm_hw_init() - Initialize DC device
+  * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+@@ -2870,6 +2899,10 @@ static int dm_hw_init(struct amdgpu_ip_block *ip_block)
+ 		return r;
+ 	amdgpu_dm_hpd_init(adev);
+ 
++	r = dm_oem_i2c_hw_init(adev);
++	if (r)
++		dev_info(adev->dev, "Failed to add OEM i2c bus\n");
++
+ 	return 0;
+ }
+ 
+@@ -2885,6 +2918,8 @@ static int dm_hw_fini(struct amdgpu_ip_block *ip_block)
+ {
+ 	struct amdgpu_device *adev = ip_block->adev;
+ 
++	kfree(adev->dm.oem_i2c);
++
+ 	amdgpu_dm_hpd_fini(adev);
+ 
+ 	amdgpu_dm_irq_fini(adev);
+@@ -4516,7 +4551,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
  		return r;
  	}
  
@@ -6606,6 +7087,93 @@ index cd16dae534dc..1508978f92dd 100644
  	if (amdgpu_dm_create_color_properties(adev)) {
  		dc_state_release(state->context);
  		kfree(state);
+@@ -8218,7 +8253,7 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap,
+ 	int i;
+ 	int result = -EIO;
+ 
+-	if (!ddc_service->ddc_pin || !ddc_service->ddc_pin->hw_info.hw_supported)
++	if (!ddc_service->ddc_pin)
+ 		return result;
+ 
+ 	cmd.payloads = kcalloc(num, sizeof(struct i2c_payload), GFP_KERNEL);
+@@ -8237,11 +8272,18 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap,
+ 		cmd.payloads[i].data = msgs[i].buf;
+ 	}
+ 
+-	if (dc_submit_i2c(
+-			ddc_service->ctx->dc,
+-			ddc_service->link->link_index,
+-			&cmd))
+-		result = num;
++	if (i2c->oem) {
++		if (dc_submit_i2c_oem(
++			    ddc_service->ctx->dc,
++			    &cmd))
++			result = num;
++	} else {
++		if (dc_submit_i2c(
++			    ddc_service->ctx->dc,
++			    ddc_service->link->link_index,
++			    &cmd))
++			result = num;
++	}
+ 
+ 	kfree(cmd.payloads);
+ 	return result;
+@@ -8258,9 +8300,7 @@ static const struct i2c_algorithm amdgpu_dm_i2c_algo = {
+ };
+ 
+ static struct amdgpu_i2c_adapter *
+-create_i2c(struct ddc_service *ddc_service,
+-	   int link_index,
+-	   int *res)
++create_i2c(struct ddc_service *ddc_service, bool oem)
+ {
+ 	struct amdgpu_device *adev = ddc_service->ctx->driver_context;
+ 	struct amdgpu_i2c_adapter *i2c;
+@@ -8271,9 +8311,14 @@ create_i2c(struct ddc_service *ddc_service,
+ 	i2c->base.owner = THIS_MODULE;
+ 	i2c->base.dev.parent = &adev->pdev->dev;
+ 	i2c->base.algo = &amdgpu_dm_i2c_algo;
+-	snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c hw bus %d", link_index);
++	if (oem)
++		snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c OEM bus");
++	else
++		snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c hw bus %d",
++			 ddc_service->link->link_index);
+ 	i2c_set_adapdata(&i2c->base, i2c);
+ 	i2c->ddc_service = ddc_service;
++	i2c->oem = oem;
+ 
+ 	return i2c;
+ }
+@@ -8298,7 +8343,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
+ 	link->priv = aconnector;
+ 
+ 
+-	i2c = create_i2c(link->ddc, link->link_index, &res);
++	i2c = create_i2c(link->ddc, false);
+ 	if (!i2c) {
+ 		DRM_ERROR("Failed to create i2c adapter data\n");
+ 		return -ENOMEM;
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+index 2227cd8e4a89..5710776bb0e2 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+@@ -606,6 +606,13 @@ struct amdgpu_display_manager {
+ 	 * Bounding box data read from dmub during early initialization for DCN4+
+ 	 */
+ 	struct dml2_soc_bb *bb_from_dmub;
++
++	/**
++	 * @oem_i2c:
++	 *
++	 * OEM i2c bus
++	 */
++	struct amdgpu_i2c_adapter *oem_i2c;
+ };
+ 
+ enum dsc_clock_force_state {
 diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
 index ebabfe3a512f..4d3ebcaacca1 100644
 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -6620,7 +7188,7 @@ index ebabfe3a512f..4d3ebcaacca1 100644
   *
   * AMD driver supports pre-defined mathematical functions for transferring
 diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
-index 64a041c2af05..08790bcfe109 100644
+index 36a830a7440f..a8fc8bd52d51 100644
 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
 +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
 @@ -470,7 +470,7 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc)
@@ -6681,6 +7249,124 @@ index 495e3cd70426..704a48209657 100644
  	dm_atomic_plane_attach_color_mgmt_properties(dm, plane);
  #endif
  	/* Create (reset) the plane state */
+diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+index c9a6de110b74..470ec970217b 100644
+--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
++++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+@@ -1778,6 +1778,7 @@ static enum bp_result get_firmware_info_v3_1(
+ 	struct dc_firmware_info *info)
+ {
+ 	struct atom_firmware_info_v3_1 *firmware_info;
++	struct atom_firmware_info_v3_2 *firmware_info32;
+ 	struct atom_display_controller_info_v4_1 *dce_info = NULL;
+ 
+ 	if (!info)
+@@ -1785,6 +1786,8 @@ static enum bp_result get_firmware_info_v3_1(
+ 
+ 	firmware_info = GET_IMAGE(struct atom_firmware_info_v3_1,
+ 			DATA_TABLES(firmwareinfo));
++	firmware_info32 = GET_IMAGE(struct atom_firmware_info_v3_2,
++			DATA_TABLES(firmwareinfo));
+ 
+ 	dce_info = GET_IMAGE(struct atom_display_controller_info_v4_1,
+ 			DATA_TABLES(dce_info));
+@@ -1817,7 +1820,15 @@ static enum bp_result get_firmware_info_v3_1(
+ 				bp->cmd_tbl.get_smu_clock_info(bp, SMU9_SYSPLL0_ID) * 10;
+ 	}
+ 
+-	info->oem_i2c_present = false;
++	/* These fields are marked as reserved in v3_1, but they appear to be populated
++	 * properly.
++	 */
++	if (firmware_info32->board_i2c_feature_id == 0x2) {
++		info->oem_i2c_present = true;
++		info->oem_i2c_obj_id = firmware_info32->board_i2c_feature_gpio_id;
++	} else {
++		info->oem_i2c_present = false;
++	}
+ 
+ 	return BP_RESULT_OK;
+ }
+diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
+index 457d60eeb486..13636eb4ec3f 100644
+--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
++++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
+@@ -142,6 +142,12 @@ bool dc_link_update_dsc_config(struct pipe_ctx *pipe_ctx)
+ 	return link->dc->link_srv->update_dsc_config(pipe_ctx);
+ }
+ 
++struct ddc_service *
++dc_get_oem_i2c_device(struct dc *dc)
++{
++	return dc->res_pool->oem_device;
++}
++
+ bool dc_is_oem_i2c_device_present(
+ 	struct dc *dc,
+ 	size_t slave_address)
+diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
+index 08c5a315b3a6..70d6005ecd64 100644
+--- a/drivers/gpu/drm/amd/display/dc/dc.h
++++ b/drivers/gpu/drm/amd/display/dc/dc.h
+@@ -1939,6 +1939,9 @@ int dc_link_aux_transfer_raw(struct ddc_service *ddc,
+ 		struct aux_payload *payload,
+ 		enum aux_return_code_type *operation_result);
+ 
++struct ddc_service *
++dc_get_oem_i2c_device(struct dc *dc);
++
+ bool dc_is_oem_i2c_device_present(
+ 	struct dc *dc,
+ 	size_t slave_address
+diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
+index c63c59623433..eb1e158d3436 100644
+--- a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
+@@ -67,6 +67,7 @@
+ #include "reg_helper.h"
+ 
+ #include "dce100/dce100_resource.h"
++#include "link.h"
+ 
+ #ifndef mmDP0_DP_DPHY_INTERNAL_CTRL
+ 	#define mmDP0_DP_DPHY_INTERNAL_CTRL		0x210f
+@@ -659,6 +660,12 @@ static void dce120_resource_destruct(struct dce110_resource_pool *pool)
+ 
+ 	if (pool->base.dmcu != NULL)
+ 		dce_dmcu_destroy(&pool->base.dmcu);
++
++	if (pool->base.oem_device != NULL) {
++		struct dc *dc = pool->base.oem_device->ctx->dc;
++
++		dc->link_srv->destroy_ddc_service(&pool->base.oem_device);
++	}
+ }
+ 
+ static void read_dce_straps(
+@@ -1054,6 +1061,7 @@ static bool dce120_resource_construct(
+ 	struct dc *dc,
+ 	struct dce110_resource_pool *pool)
+ {
++	struct ddc_service_init_data ddc_init_data = {0};
+ 	unsigned int i;
+ 	int j;
+ 	struct dc_context *ctx = dc->ctx;
+@@ -1257,6 +1265,15 @@ static bool dce120_resource_construct(
+ 
+ 	bw_calcs_data_update_from_pplib(dc);
+ 
++	if (dc->ctx->dc_bios->fw_info.oem_i2c_present) {
++		ddc_init_data.ctx = dc->ctx;
++		ddc_init_data.link = NULL;
++		ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id;
++		ddc_init_data.id.enum_id = 0;
++		ddc_init_data.id.type = OBJECT_TYPE_GENERIC;
++		pool->base.oem_device = dc->link_srv->create_ddc_service(&ddc_init_data);
++	}
++
+ 	return true;
+ 
+ irqs_create_fail:
 diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
 index e8ae7681bf0a..8a0d873983f3 100644
 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -12608,7 +13294,7 @@ index 2ddb827e3bea..464049c4af3f 100644
  
  	return state;
 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index 3e9ca38512de..463fe1dc6de8 100644
+index 26958431deb7..8c0f17a96d4f 100644
 --- a/kernel/sched/fair.c
 +++ b/kernel/sched/fair.c
 @@ -73,10 +73,19 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
@@ -12888,7 +13574,7 @@ index a2b16b08cbbf..48d611e58ad3 100644
  static int __read_mostly sysctl_compact_memory;
  
 diff --git a/mm/huge_memory.c b/mm/huge_memory.c
-index e53d83b3e5cf..b4c205f2042a 100644
+index db64116a4f84..3e0266c973e1 100644
 --- a/mm/huge_memory.c
 +++ b/mm/huge_memory.c
 @@ -64,7 +64,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
@@ -12916,7 +13602,7 @@ index 24b68b425afb..081ddb92db87 100644
  
  /*
 diff --git a/mm/page-writeback.c b/mm/page-writeback.c
-index d213ead95675..0430a97b30fd 100644
+index d9861e42b2bd..13ab2294f0bb 100644
 --- a/mm/page-writeback.c
 +++ b/mm/page-writeback.c
 @@ -71,7 +71,11 @@ static long ratelimit_pages = 32;
@@ -12944,7 +13630,7 @@ index d213ead95675..0430a97b30fd 100644
  EXPORT_SYMBOL_GPL(dirty_writeback_interval);
  
 diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index cae7b93864c2..57038052c153 100644
+index 01eab25edf89..3ea393f1311a 100644
 --- a/mm/page_alloc.c
 +++ b/mm/page_alloc.c
 @@ -271,7 +271,11 @@ const char * const migratetype_names[MIGRATE_TYPES] = {
@@ -12997,7 +13683,7 @@ index bd5183dfd879..3a410f53a07c 100644
  
  /*
 diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 9a859b7d18d7..ec7f96bb0e9f 100644
+index b1ec5ece067e..e258174d240a 100644
 --- a/mm/vmscan.c
 +++ b/mm/vmscan.c
 @@ -148,6 +148,15 @@ struct scan_control {
@@ -13192,7 +13878,7 @@ index 9a859b7d18d7..ec7f96bb0e9f 100644
  	else if (min_seq[LRU_GEN_ANON] < min_seq[LRU_GEN_FILE])
  		type = LRU_GEN_ANON;
  	else if (swappiness == 1)
-@@ -4826,6 +4965,8 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
+@@ -4829,6 +4968,8 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
  	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
  	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
  
@@ -13201,7 +13887,7 @@ index 9a859b7d18d7..ec7f96bb0e9f 100644
  	/* lru_gen_age_node() called mem_cgroup_calculate_protection() */
  	if (mem_cgroup_below_min(NULL, memcg))
  		return MEMCG_LRU_YOUNG;
-@@ -5974,6 +6115,8 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
+@@ -5977,6 +6118,8 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
  
  	prepare_scan_control(pgdat, sc);
  
@@ -13226,9 +13912,9 @@ index 6872b5aff73e..1910fe1b2471 100644
 -- 
 2.48.0.rc1
 
-From 8d1fa2a8636c551dd33500837e87e2c3f889d95c Mon Sep 17 00:00:00 2001
+From 7bc012030531a472b823293e167a86cd58da545c Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:38:01 +0100
+Date: Mon, 20 Jan 2025 13:22:05 +0100
 Subject: [PATCH 05/12] crypto
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
@@ -14000,25 +14686,25 @@ index fbf43482e1f5..11e95fc62636 100644
 -- 
 2.48.0.rc1
 
-From e094aa9f2a3d8ac13a8bca382f0f5585f80926ee Mon Sep 17 00:00:00 2001
+From 2f514dfe8b006e7fa976b6265bef4b8efb81ec11 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:38:11 +0100
+Date: Mon, 20 Jan 2025 13:22:15 +0100
 Subject: [PATCH 06/12] fixes
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
 ---
  arch/Kconfig                                  |  4 +-
- drivers/acpi/acpi_video.c                     | 50 +++++++++++--------
  .../link/protocols/link_edp_panel_control.c   |  3 +-
- drivers/gpu/drm/drm_edid.c                    | 47 +++++++++++++++--
- drivers/gpu/drm/nouveau/nouveau_acpi.c        |  2 +-
+ drivers/gpu/drm/drm_edid.c                    | 47 +++++++++++++++++--
+ drivers/hid/hid-asus.c                        | 26 ++++++++++
  drivers/hid/hid-ids.h                         |  1 +
+ include/linux/platform_data/x86/asus-wmi.h    |  5 ++
  kernel/fork.c                                 |  9 ++--
- kernel/kprobes.c                              | 23 ++++-----
+ kernel/kprobes.c                              | 23 +++++----
  kernel/sched/ext.c                            |  4 +-
  scripts/package/PKGBUILD                      |  5 ++
- sound/pci/hda/patch_realtek.c                 |  2 +
- 11 files changed, 103 insertions(+), 47 deletions(-)
+ sound/pci/hda/patch_realtek.c                 |  4 +-
+ 11 files changed, 105 insertions(+), 26 deletions(-)
 
 diff --git a/arch/Kconfig b/arch/Kconfig
 index 6682b2a53e34..fe54298ae05c 100644
@@ -14042,99 +14728,6 @@ index 6682b2a53e34..fe54298ae05c 100644
  	depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS
  	help
  	  This value can be used to select the number of bits to use to
-diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
-index 8274a17872ed..3c627bdf2d1b 100644
---- a/drivers/acpi/acpi_video.c
-+++ b/drivers/acpi/acpi_video.c
-@@ -610,16 +610,29 @@ acpi_video_device_lcd_get_level_current(struct acpi_video_device *device,
- 	return 0;
- }
- 
-+/*
-+ *  Arg:
-+ *	device	: video output device (LCD, CRT, ..)
-+ *	edid    : address for returned EDID pointer
-+ *	length  : _DDC length to request (must be a multiple of 128)
-+ *
-+ *  Return Value:
-+ *	Length of EDID (positive value) or error (negative value)
-+ *
-+ *  Get EDID from ACPI _DDC. On success, a pointer to the EDID data is written
-+ *  to the edid address, and the length of the EDID is returned. The caller is
-+ *  responsible for freeing the edid pointer.
-+ */
-+
- static int
--acpi_video_device_EDID(struct acpi_video_device *device,
--		       union acpi_object **edid, int length)
-+acpi_video_device_EDID(struct acpi_video_device *device, void **edid, int length)
- {
--	int status;
-+	acpi_status status;
- 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
- 	union acpi_object *obj;
- 	union acpi_object arg0 = { ACPI_TYPE_INTEGER };
- 	struct acpi_object_list args = { 1, &arg0 };
--
-+	int ret;
- 
- 	*edid = NULL;
- 
-@@ -636,16 +649,17 @@ acpi_video_device_EDID(struct acpi_video_device *device,
- 
- 	obj = buffer.pointer;
- 
--	if (obj && obj->type == ACPI_TYPE_BUFFER)
--		*edid = obj;
--	else {
-+	if (obj && obj->type == ACPI_TYPE_BUFFER) {
-+		*edid = kmemdup(obj->buffer.pointer, obj->buffer.length, GFP_KERNEL);
-+		ret = *edid ? obj->buffer.length : -ENOMEM;
-+	} else {
- 		acpi_handle_debug(device->dev->handle,
- 				 "Invalid _DDC data for length %d\n", length);
--		status = -EFAULT;
--		kfree(obj);
-+		ret = -EFAULT;
- 	}
- 
--	return status;
-+	kfree(obj);
-+	return ret;
- }
- 
- /* bus */
-@@ -1435,9 +1449,7 @@ int acpi_video_get_edid(struct acpi_device *device, int type, int device_id,
- {
- 	struct acpi_video_bus *video;
- 	struct acpi_video_device *video_device;
--	union acpi_object *buffer = NULL;
--	acpi_status status;
--	int i, length;
-+	int i, length, ret;
- 
- 	if (!device || !acpi_driver_data(device))
- 		return -EINVAL;
-@@ -1477,16 +1489,10 @@ int acpi_video_get_edid(struct acpi_device *device, int type, int device_id,
- 		}
- 
- 		for (length = 512; length > 0; length -= 128) {
--			status = acpi_video_device_EDID(video_device, &buffer,
--							length);
--			if (ACPI_SUCCESS(status))
--				break;
-+			ret = acpi_video_device_EDID(video_device, edid, length);
-+			if (ret > 0)
-+				return ret;
- 		}
--		if (!length)
--			continue;
--
--		*edid = buffer->buffer.pointer;
--		return length;
- 	}
- 
- 	return -ENODEV;
 diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
 index e0e3bb865359..ba98d56a0fe4 100644
 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
@@ -14242,19 +14835,50 @@ index 855beafb76ff..ad78059ee954 100644
  		if (!newmode)
  			continue;
  
-diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
-index 8f0c69aad248..21b56cc7605c 100644
---- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
-+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
-@@ -384,7 +384,7 @@ nouveau_acpi_edid(struct drm_device *dev, struct drm_connector *connector)
- 	if (ret < 0)
- 		return NULL;
- 
--	return kmemdup(edid, EDID_LENGTH, GFP_KERNEL);
-+	return edid;
+diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
+index 506c6f377e7d..46e3e42f9eb5 100644
+--- a/drivers/hid/hid-asus.c
++++ b/drivers/hid/hid-asus.c
+@@ -432,6 +432,26 @@ static int asus_kbd_get_functions(struct hid_device *hdev,
+ 	return ret;
  }
  
- bool nouveau_acpi_video_backlight_use_native(void)
++static int asus_kbd_disable_oobe(struct hid_device *hdev)
++{
++	const u8 init[][6] = {
++		{ FEATURE_KBD_REPORT_ID, 0x05, 0x20, 0x31, 0x00, 0x08 },
++		{ FEATURE_KBD_REPORT_ID, 0xBA, 0xC5, 0xC4 },
++		{ FEATURE_KBD_REPORT_ID, 0xD0, 0x8F, 0x01 },
++		{ FEATURE_KBD_REPORT_ID, 0xD0, 0x85, 0xFF }
++	};
++	int ret;
++
++	for (size_t i = 0; i < ARRAY_SIZE(init); i++) {
++		ret = asus_kbd_set_report(hdev, init[i], sizeof(init[i]));
++		if (ret < 0)
++			return ret;
++	}
++
++	hid_info(hdev, "Disabled OOBE for keyboard\n");
++	return 0;
++}
++
+ static void asus_schedule_work(struct asus_kbd_leds *led)
+ {
+ 	unsigned long flags;
+@@ -534,6 +554,12 @@ static int asus_kbd_register_leds(struct hid_device *hdev)
+ 		ret = asus_kbd_init(hdev, FEATURE_KBD_LED_REPORT_ID2);
+ 		if (ret < 0)
+ 			return ret;
++
++		if (dmi_match(DMI_PRODUCT_FAMILY, "ProArt P16")) {
++			ret = asus_kbd_disable_oobe(hdev);
++			if (ret < 0)
++				return ret;
++		}
+ 	} else {
+ 		/* Initialize keyboard */
+ 		ret = asus_kbd_init(hdev, FEATURE_KBD_REPORT_ID);
 diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
 index 1f47fda809b9..6c2df0d37b3b 100644
 --- a/drivers/hid/hid-ids.h
@@ -14267,6 +14891,22 @@ index 1f47fda809b9..6c2df0d37b3b 100644
  #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY		0x1abe
  #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X		0x1b4c
  #define USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD	0x196b
+diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
+index 365e119bebaa..783e2a336861 100644
+--- a/include/linux/platform_data/x86/asus-wmi.h
++++ b/include/linux/platform_data/x86/asus-wmi.h
+@@ -184,6 +184,11 @@ static const struct dmi_system_id asus_use_hid_led_dmi_ids[] = {
+ 			DMI_MATCH(DMI_PRODUCT_FAMILY, "ROG Flow"),
+ 		},
+ 	},
++	{
++		.matches = {
++			DMI_MATCH(DMI_PRODUCT_FAMILY, "ProArt P16"),
++		},
++	},
+ 	{
+ 		.matches = {
+ 			DMI_MATCH(DMI_BOARD_NAME, "GA403U"),
 diff --git a/kernel/fork.c b/kernel/fork.c
 index 0cb5431b4d7e..e919c8c3a121 100644
 --- a/kernel/fork.c
@@ -14390,13 +15030,15 @@ index dca706617adc..89d3aef160b7 100644
  	mkdir -p "${builddir}"
  	cp System.map "${builddir}/System.map"
 diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
-index b74b566f675e..070dd1ab89c6 100644
+index ad66378d7321..4210bc8f12e1 100644
 --- a/sound/pci/hda/patch_realtek.c
 +++ b/sound/pci/hda/patch_realtek.c
-@@ -10641,6 +10641,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
+@@ -10641,8 +10641,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
  	SND_PCI_QUIRK(0x1043, 0x1e1f, "ASUS Vivobook 15 X1504VAP", ALC2XX_FIXUP_HEADSET_MIC),
  	SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS),
  	SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS),
+-	SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1),
+-	SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1),
 +	SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC),
 +	SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC),
  	SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401),
@@ -14405,9 +15047,9 @@ index b74b566f675e..070dd1ab89c6 100644
 -- 
 2.48.0.rc1
 
-From 25702dae4d4390c6e804bfe18eef1341a854b9f2 Mon Sep 17 00:00:00 2001
+From edca92ed206343ae09ee1af6ae0dfc26a68085b1 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:38:22 +0100
+Date: Mon, 20 Jan 2025 13:22:28 +0100
 Subject: [PATCH 07/12] itmt-core-ranking
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
@@ -14642,10 +15284,10 @@ index b5a8f0891135..ef63b1c0b491 100644
  	}
  
 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index 463fe1dc6de8..f849298a4cc1 100644
+index 8c0f17a96d4f..c532ffb153b4 100644
 --- a/kernel/sched/fair.c
 +++ b/kernel/sched/fair.c
-@@ -9941,6 +9941,8 @@ struct sg_lb_stats {
+@@ -9836,6 +9836,8 @@ struct sg_lb_stats {
  	unsigned int group_weight;
  	enum group_type group_type;
  	unsigned int group_asym_packing;	/* Tasks should be moved to preferred CPU */
@@ -14654,7 +15296,7 @@ index 463fe1dc6de8..f849298a4cc1 100644
  	unsigned int group_smt_balance;		/* Task on busy SMT be moved */
  	unsigned long group_misfit_task_load;	/* A CPU has a task too big for its capacity */
  #ifdef CONFIG_NUMA_BALANCING
-@@ -10270,7 +10272,7 @@ sched_group_asym(struct lb_env *env, struct sg_lb_stats *sgs, struct sched_group
+@@ -10165,7 +10167,7 @@ sched_group_asym(struct lb_env *env, struct sg_lb_stats *sgs, struct sched_group
  	    (sgs->group_weight - sgs->idle_cpus != 1))
  		return false;
  
@@ -14663,7 +15305,7 @@ index 463fe1dc6de8..f849298a4cc1 100644
  }
  
  /* One group has more than one SMT CPU while the other group does not */
-@@ -10351,6 +10353,17 @@ sched_reduced_capacity(struct rq *rq, struct sched_domain *sd)
+@@ -10246,6 +10248,17 @@ sched_reduced_capacity(struct rq *rq, struct sched_domain *sd)
  	return check_cpu_capacity(rq, sd);
  }
  
@@ -14681,7 +15323,7 @@ index 463fe1dc6de8..f849298a4cc1 100644
  /**
   * update_sg_lb_stats - Update sched_group's statistics for load balancing.
   * @env: The load balancing environment.
-@@ -10367,11 +10380,13 @@ static inline void update_sg_lb_stats(struct lb_env *env,
+@@ -10262,11 +10275,13 @@ static inline void update_sg_lb_stats(struct lb_env *env,
  				      bool *sg_overloaded,
  				      bool *sg_overutilized)
  {
@@ -14696,7 +15338,7 @@ index 463fe1dc6de8..f849298a4cc1 100644
  
  	for_each_cpu_and(i, sched_group_span(group), env->cpus) {
  		struct rq *rq = cpu_rq(i);
-@@ -10385,16 +10400,12 @@ static inline void update_sg_lb_stats(struct lb_env *env,
+@@ -10280,16 +10295,12 @@ static inline void update_sg_lb_stats(struct lb_env *env,
  		nr_running = rq->nr_running;
  		sgs->sum_nr_running += nr_running;
  
@@ -14715,7 +15357,7 @@ index 463fe1dc6de8..f849298a4cc1 100644
  		/*
  		 * No need to call idle_cpu() if nr_running is not 0
  		 */
-@@ -10404,10 +10415,21 @@ static inline void update_sg_lb_stats(struct lb_env *env,
+@@ -10299,10 +10310,21 @@ static inline void update_sg_lb_stats(struct lb_env *env,
  			continue;
  		}
  
@@ -14738,7 +15380,7 @@ index 463fe1dc6de8..f849298a4cc1 100644
  			/* Check for a misfit task on the cpu */
  			if (sgs->group_misfit_task_load < rq->misfit_task_load) {
  				sgs->group_misfit_task_load = rq->misfit_task_load;
-@@ -10502,7 +10524,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
+@@ -10397,7 +10419,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
  
  	case group_asym_packing:
  		/* Prefer to move from lowest priority CPU's work */
@@ -14795,9 +15437,9 @@ index 9748a4c8d668..59b8157cb114 100644
 -- 
 2.48.0.rc1
 
-From 1871388db87b6e7114a28eec15fc03e4c0497e52 Mon Sep 17 00:00:00 2001
+From dad63380fd4bccaf1df47a5d2a14b3622a828bbf Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:38:39 +0100
+Date: Mon, 20 Jan 2025 13:22:39 +0100
 Subject: [PATCH 08/12] ntsync
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
@@ -15224,10 +15866,10 @@ index 000000000000..25e7c4aef968
 +  ``objs`` and in ``alert``. If this is attempted, the function fails
 +  with ``EINVAL``.
 diff --git a/MAINTAINERS b/MAINTAINERS
-index a87ddad78e26..69c7e0c9cbfd 100644
+index 0fa7c5728f1e..efecb59adfe6 100644
 --- a/MAINTAINERS
 +++ b/MAINTAINERS
-@@ -16708,6 +16708,15 @@ T:	git https://github.com/Paragon-Software-Group/linux-ntfs3.git
+@@ -16709,6 +16709,15 @@ T:	git https://github.com/Paragon-Software-Group/linux-ntfs3.git
  F:	Documentation/filesystems/ntfs3.rst
  F:	fs/ntfs3/
  
@@ -17825,9 +18467,9 @@ index 000000000000..3aad311574c4
 -- 
 2.48.0.rc1
 
-From ecafa3b39e7691288beb920eb362064d548d45e7 Mon Sep 17 00:00:00 2001
+From d0d15e3d79a2d5bb2c94b8ff3d2ab51f0b0100fe Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:38:51 +0100
+Date: Mon, 20 Jan 2025 13:22:50 +0100
 Subject: [PATCH 09/12] perf-per-core
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
@@ -18723,9 +19365,9 @@ index 8277c64f88db..b5a5e1411469 100644
 -- 
 2.48.0.rc1
 
-From b0522d38174d109d02042dc5591c1ab52de16a94 Mon Sep 17 00:00:00 2001
+From 6a7ea67c66634276802b4b9b0964a0b00db97d9c Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:39:03 +0100
+Date: Mon, 20 Jan 2025 13:23:02 +0100
 Subject: [PATCH 10/12] pksm
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
@@ -19156,9 +19798,9 @@ index e9115b4d8b63..2afc778f2d17 100644
 -- 
 2.48.0.rc1
 
-From 5662d52675419bbe7b47731ad55c01ecf94b8426 Mon Sep 17 00:00:00 2001
+From 5e459e48f274c34d701726a61a96140381b1de2b Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:39:12 +0100
+Date: Mon, 20 Jan 2025 13:23:11 +0100
 Subject: [PATCH 11/12] t2
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
@@ -19314,10 +19956,10 @@ index ecccc0473da9..6de6b0e6abf3 100644
  ----
  
 diff --git a/MAINTAINERS b/MAINTAINERS
-index 69c7e0c9cbfd..01be85b7d886 100644
+index efecb59adfe6..16af42c68cca 100644
 --- a/MAINTAINERS
 +++ b/MAINTAINERS
-@@ -7065,6 +7065,12 @@ S:	Supported
+@@ -7066,6 +7066,12 @@ S:	Supported
  T:	git https://gitlab.freedesktop.org/drm/misc/kernel.git
  F:	drivers/gpu/drm/sun4i/sun8i*
  
@@ -29483,9 +30125,9 @@ index 9eed3683ad76..7ddbf75f4c26 100755
 -- 
 2.48.0.rc1
 
-From 91beebc1e962374c32c95b975d59ff5aa90b66c1 Mon Sep 17 00:00:00 2001
+From 6f96c228cd968c7f47eb90d9e7ad6d679bf5a7f0 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 13 Jan 2025 15:39:21 +0100
+Date: Mon, 20 Jan 2025 13:23:20 +0100
 Subject: [PATCH 12/12] zstd
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
diff --git a/6.13/sched-dev/0001-bore-cachy.patch b/6.13/sched-dev/0001-bore-cachy.patch
deleted file mode 100644
index 1eb64cb1..00000000
--- a/6.13/sched-dev/0001-bore-cachy.patch
+++ /dev/null
@@ -1,1030 +0,0 @@
-From 2aaaad0215c8d15c5133eb2bc1c77c021edff609 Mon Sep 17 00:00:00 2001
-From: Eric Naim <dnaim@cachyos.org>
-Date: Mon, 20 Jan 2025 09:19:36 +0700
-Subject: [PATCH] bore-cachy
-
-Signed-off-by: Eric Naim <dnaim@cachyos.org>
----
- include/linux/sched.h      |  18 ++
- include/linux/sched/bore.h |  40 ++++
- init/Kconfig               |  17 ++
- kernel/Kconfig.hz          |  17 ++
- kernel/fork.c              |   6 +
- kernel/sched/Makefile      |   1 +
- kernel/sched/bore.c        | 443 +++++++++++++++++++++++++++++++++++++
- kernel/sched/core.c        |   6 +
- kernel/sched/debug.c       |  61 ++++-
- kernel/sched/fair.c        |  86 +++++--
- kernel/sched/sched.h       |   9 +
- 11 files changed, 686 insertions(+), 18 deletions(-)
- create mode 100644 include/linux/sched/bore.h
- create mode 100644 kernel/sched/bore.c
-
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index 64934e0830af..7ec02a323014 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -538,6 +538,15 @@ struct sched_statistics {
- #endif /* CONFIG_SCHEDSTATS */
- } ____cacheline_aligned;
- 
-+#ifdef CONFIG_SCHED_BORE
-+struct sched_burst_cache {
-+	u8				score;
-+	u32				count;
-+	u64				timestamp;
-+    spinlock_t		lock;
-+};
-+#endif // CONFIG_SCHED_BORE
-+
- struct sched_entity {
- 	/* For load-balancing: */
- 	struct load_weight		load;
-@@ -557,6 +566,15 @@ struct sched_entity {
- 	u64				sum_exec_runtime;
- 	u64				prev_sum_exec_runtime;
- 	u64				vruntime;
-+#ifdef CONFIG_SCHED_BORE
-+	u64				burst_time;
-+	u8				prev_burst_penalty;
-+	u8				curr_burst_penalty;
-+	u8				burst_penalty;
-+	u8				burst_score;
-+	struct sched_burst_cache child_burst;
-+	struct sched_burst_cache group_burst;
-+#endif // CONFIG_SCHED_BORE
- 	s64				vlag;
- 	u64				slice;
- 
-diff --git a/include/linux/sched/bore.h b/include/linux/sched/bore.h
-new file mode 100644
-index 000000000000..a8faabc2885e
---- /dev/null
-+++ b/include/linux/sched/bore.h
-@@ -0,0 +1,40 @@
-+
-+#include <linux/sched.h>
-+#include <linux/sched/cputime.h>
-+
-+#ifndef _LINUX_SCHED_BORE_H
-+#define _LINUX_SCHED_BORE_H
-+#define SCHED_BORE_VERSION "5.9.6"
-+
-+#ifdef CONFIG_SCHED_BORE
-+extern u8   __read_mostly sched_bore;
-+extern u8   __read_mostly sched_burst_exclude_kthreads;
-+extern u8   __read_mostly sched_burst_smoothness_long;
-+extern u8   __read_mostly sched_burst_smoothness_short;
-+extern u8   __read_mostly sched_burst_fork_atavistic;
-+extern u8   __read_mostly sched_burst_parity_threshold;
-+extern u8   __read_mostly sched_burst_penalty_offset;
-+extern uint __read_mostly sched_burst_penalty_scale;
-+extern uint __read_mostly sched_burst_cache_stop_count;
-+extern uint __read_mostly sched_burst_cache_lifetime;
-+extern uint __read_mostly sched_deadline_boost_mask;
-+
-+extern void update_burst_score(struct sched_entity *se);
-+extern void update_burst_penalty(struct sched_entity *se);
-+
-+extern void restart_burst(struct sched_entity *se);
-+extern void restart_burst_rescale_deadline(struct sched_entity *se);
-+
-+extern int sched_bore_update_handler(const struct ctl_table *table, int write,
-+	void __user *buffer, size_t *lenp, loff_t *ppos);
-+
-+extern void sched_clone_bore(
-+	struct task_struct *p, struct task_struct *parent, u64 clone_flags, u64 now);
-+
-+extern void reset_task_bore(struct task_struct *p);
-+extern void sched_bore_init(void);
-+
-+extern void reweight_entity(
-+	struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight);
-+#endif // CONFIG_SCHED_BORE
-+#endif // _LINUX_SCHED_BORE_H
-diff --git a/init/Kconfig b/init/Kconfig
-index 9437171030e2..c6f811d72dfd 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -1362,6 +1362,23 @@ config CHECKPOINT_RESTORE
- 
- 	  If unsure, say N here.
- 
-+config SCHED_BORE
-+	bool "Burst-Oriented Response Enhancer"
-+	default y
-+	help
-+	  In Desktop and Mobile computing, one might prefer interactive
-+	  tasks to keep responsive no matter what they run in the background.
-+
-+	  Enabling this kernel feature modifies the scheduler to discriminate
-+	  tasks by their burst time (runtime since it last went sleeping or
-+	  yielding state) and prioritize those that run less bursty.
-+	  Such tasks usually include window compositor, widgets backend,
-+	  terminal emulator, video playback, games and so on.
-+	  With a little impact to scheduling fairness, it may improve
-+	  responsiveness especially under heavy background workload.
-+
-+	  If unsure, say Y here.
-+
- config SCHED_AUTOGROUP
- 	bool "Automatic process group scheduling"
- 	select CGROUPS
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 0f78364efd4f..83a6b919ab29 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -79,5 +79,22 @@ config HZ
- 	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
-+config MIN_BASE_SLICE_NS
-+	int "Default value for min_base_slice_ns"
-+	default 2000000
-+	help
-+	 The BORE Scheduler automatically calculates the optimal base
-+	 slice for the configured HZ using the following equation:
-+	 
-+	 base_slice_ns =
-+	 	1000000000/HZ * DIV_ROUNDUP(min_base_slice_ns, 1000000000/HZ)
-+	 
-+	 This option sets the default lower bound limit of the base slice
-+	 to prevent the loss of task throughput due to overscheduling.
-+	 
-+	 Setting this value too high can cause the system to boot with
-+	 an unnecessarily large base slice, resulting in high scheduling
-+	 latency and poor system responsiveness.
-+
- config SCHED_HRTICK
- 	def_bool HIGH_RES_TIMERS
-diff --git a/kernel/fork.c b/kernel/fork.c
-index e919c8c3a121..726d3daa0498 100644
---- a/kernel/fork.c
-+++ b/kernel/fork.c
-@@ -116,6 +116,8 @@
- #include <asm/cacheflush.h>
- #include <asm/tlbflush.h>
- 
-+#include <linux/sched/bore.h>
-+
- #include <trace/events/sched.h>
- 
- #define CREATE_TRACE_POINTS
-@@ -2524,6 +2526,10 @@ __latent_entropy struct task_struct *copy_process(
- 	p->start_time = ktime_get_ns();
- 	p->start_boottime = ktime_get_boottime_ns();
- 
-+#ifdef CONFIG_SCHED_BORE
-+	if (likely(p->pid))
-+		sched_clone_bore(p, current, clone_flags, p->start_time);
-+#endif // CONFIG_SCHED_BORE
- 	/*
- 	 * Make it visible to the rest of the system, but dont wake it up yet.
- 	 * Need tasklist lock for parent etc handling!
-diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
-index 976092b7bd45..293aad675444 100644
---- a/kernel/sched/Makefile
-+++ b/kernel/sched/Makefile
-@@ -32,3 +32,4 @@ obj-y += core.o
- obj-y += fair.o
- obj-y += build_policy.o
- obj-y += build_utility.o
-+obj-y += bore.o
-diff --git a/kernel/sched/bore.c b/kernel/sched/bore.c
-new file mode 100644
-index 000000000000..23aeb5649479
---- /dev/null
-+++ b/kernel/sched/bore.c
-@@ -0,0 +1,443 @@
-+/*
-+ *  Burst-Oriented Response Enhancer (BORE) CPU Scheduler
-+ *  Copyright (C) 2021-2024 Masahito Suzuki <firelzrd@gmail.com>
-+ */
-+#include <linux/cpuset.h>
-+#include <linux/sched/task.h>
-+#include <linux/sched/bore.h>
-+#include "sched.h"
-+
-+#ifdef CONFIG_SCHED_BORE
-+u8   __read_mostly sched_bore                   = 1;
-+u8   __read_mostly sched_burst_exclude_kthreads = 1;
-+u8   __read_mostly sched_burst_smoothness_long  = 1;
-+u8   __read_mostly sched_burst_smoothness_short = 0;
-+u8   __read_mostly sched_burst_fork_atavistic   = 2;
-+u8   __read_mostly sched_burst_parity_threshold = 2;
-+u8   __read_mostly sched_burst_penalty_offset   = 24;
-+uint __read_mostly sched_burst_penalty_scale    = 1280;
-+uint __read_mostly sched_burst_cache_stop_count = 64;
-+uint __read_mostly sched_burst_cache_lifetime   = 75000000;
-+uint __read_mostly sched_deadline_boost_mask    = ENQUEUE_INITIAL
-+                                                | ENQUEUE_WAKEUP;
-+static int __maybe_unused sixty_four     = 64;
-+static int __maybe_unused maxval_u8      = 255;
-+static int __maybe_unused maxval_12_bits = 4095;
-+
-+#define MAX_BURST_PENALTY (39U <<2)
-+
-+static inline u32 log2plus1_u64_u32f8(u64 v) {
-+	u32 integral = fls64(v);
-+	u8  fractional = v << (64 - integral) >> 55;
-+	return integral << 8 | fractional;
-+}
-+
-+static inline u32 calc_burst_penalty(u64 burst_time) {
-+	u32 greed, tolerance, penalty, scaled_penalty;
-+	
-+	greed = log2plus1_u64_u32f8(burst_time);
-+	tolerance = sched_burst_penalty_offset << 8;
-+	penalty = max(0, (s32)(greed - tolerance));
-+	scaled_penalty = penalty * sched_burst_penalty_scale >> 16;
-+
-+	return min(MAX_BURST_PENALTY, scaled_penalty);
-+}
-+
-+static inline u64 __scale_slice(u64 delta, u8 score)
-+{return mul_u64_u32_shr(delta, sched_prio_to_wmult[score], 22);}
-+
-+static inline u64 __unscale_slice(u64 delta, u8 score)
-+{return mul_u64_u32_shr(delta, sched_prio_to_weight[score], 10);}
-+
-+static void reweight_task_by_prio(struct task_struct *p, int prio) {
-+	struct sched_entity *se = &p->se;
-+	unsigned long weight = scale_load(sched_prio_to_weight[prio]);
-+
-+	reweight_entity(cfs_rq_of(se), se, weight);
-+	se->load.inv_weight = sched_prio_to_wmult[prio];
-+}
-+
-+static inline u8 effective_prio(struct task_struct *p) {
-+	u8 prio = p->static_prio - MAX_RT_PRIO;
-+	if (likely(sched_bore))
-+		prio += p->se.burst_score;
-+	return min(39, prio);
-+}
-+
-+void update_burst_score(struct sched_entity *se) {
-+	if (!entity_is_task(se)) return;
-+	struct task_struct *p = task_of(se);
-+	u8 prev_prio = effective_prio(p);
-+
-+	u8 burst_score = 0;
-+	if (!((p->flags & PF_KTHREAD) && likely(sched_burst_exclude_kthreads)))
-+		burst_score = se->burst_penalty >> 2;
-+	se->burst_score = burst_score;
-+
-+	u8 new_prio = effective_prio(p);
-+	if (new_prio != prev_prio)
-+		reweight_task_by_prio(p, new_prio);
-+}
-+
-+void update_burst_penalty(struct sched_entity *se) {
-+	se->curr_burst_penalty = calc_burst_penalty(se->burst_time);
-+	se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty);
-+	update_burst_score(se);
-+}
-+
-+static inline u32 binary_smooth(u32 new, u32 old) {
-+	int increment = new - old;
-+	return (0 <= increment)?
-+		old + ( increment >> (int)sched_burst_smoothness_long):
-+		old - (-increment >> (int)sched_burst_smoothness_short);
-+}
-+
-+static void revolve_burst_penalty(struct sched_entity *se) {
-+	se->prev_burst_penalty =
-+		binary_smooth(se->curr_burst_penalty, se->prev_burst_penalty);
-+	se->burst_time = 0;
-+	se->curr_burst_penalty = 0;
-+}
-+
-+inline void restart_burst(struct sched_entity *se) {
-+	revolve_burst_penalty(se);
-+	se->burst_penalty = se->prev_burst_penalty;
-+	update_burst_score(se);
-+}
-+
-+void restart_burst_rescale_deadline(struct sched_entity *se) {
-+	s64 vscaled, wremain, vremain = se->deadline - se->vruntime;
-+	struct task_struct *p = task_of(se);
-+	u8 prev_prio = effective_prio(p);
-+	restart_burst(se);
-+	u8 new_prio = effective_prio(p);
-+	if (prev_prio > new_prio) {
-+		wremain = __unscale_slice(abs(vremain), prev_prio);
-+		vscaled = __scale_slice(wremain, new_prio);
-+		if (unlikely(vremain < 0))
-+			vscaled = -vscaled;
-+		se->deadline = se->vruntime + vscaled;
-+	}
-+}
-+
-+static inline bool task_is_bore_eligible(struct task_struct *p)
-+{return p && p->sched_class == &fair_sched_class && !p->exit_state;}
-+
-+static void reset_task_weights_bore(void) {
-+	struct task_struct *task;
-+	struct rq *rq;
-+	struct rq_flags rf;
-+
-+	write_lock_irq(&tasklist_lock);
-+	for_each_process(task) {
-+		if (!task_is_bore_eligible(task)) continue;
-+		rq = task_rq(task);
-+		rq_pin_lock(rq, &rf);
-+		update_rq_clock(rq);
-+		reweight_task_by_prio(task, effective_prio(task));
-+		rq_unpin_lock(rq, &rf);
-+	}
-+	write_unlock_irq(&tasklist_lock);
-+}
-+
-+int sched_bore_update_handler(const struct ctl_table *table, int write,
-+	void __user *buffer, size_t *lenp, loff_t *ppos) {
-+	int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
-+	if (ret || !write)
-+		return ret;
-+
-+	reset_task_weights_bore();
-+
-+	return 0;
-+}
-+
-+#define for_each_child(p, t) \
-+	list_for_each_entry(t, &(p)->children, sibling)
-+
-+static u32 count_entries_upto2(struct list_head *head) {
-+	struct list_head *next = head->next;
-+	return (next != head) + (next->next != head);
-+}
-+
-+static inline void init_task_burst_cache_lock(struct task_struct *p) {
-+	spin_lock_init(&p->se.child_burst.lock);
-+	spin_lock_init(&p->se.group_burst.lock);
-+}
-+
-+static inline bool burst_cache_expired(struct sched_burst_cache *bc, u64 now)
-+{return (s64)(bc->timestamp + sched_burst_cache_lifetime - now) < 0;}
-+
-+static void update_burst_cache(struct sched_burst_cache *bc,
-+	struct task_struct *p, u32 cnt, u32 sum, u64 now) {
-+	u8 avg = cnt ? sum / cnt : 0;
-+	bc->score = max(avg, p->se.burst_penalty);
-+	bc->count = cnt;
-+	bc->timestamp = now;
-+}
-+
-+static inline void update_child_burst_direct(struct task_struct *p, u64 now) {
-+	u32 cnt = 0, sum = 0;
-+	struct task_struct *child;
-+
-+	for_each_child(p, child) {
-+		if (!task_is_bore_eligible(child)) continue;
-+		cnt++;
-+		sum += child->se.burst_penalty;
-+	}
-+
-+	update_burst_cache(&p->se.child_burst, p, cnt, sum, now);
-+}
-+
-+static inline u8 inherit_burst_direct(
-+	struct task_struct *p, u64 now, u64 clone_flags) {
-+	struct task_struct *parent = p;
-+	struct sched_burst_cache *bc;
-+
-+	if (clone_flags & CLONE_PARENT)
-+		parent = parent->real_parent;
-+
-+	bc = &parent->se.child_burst;
-+	guard(spinlock)(&bc->lock);
-+	if (burst_cache_expired(bc, now))
-+		update_child_burst_direct(parent, now);
-+
-+	return bc->score;
-+}
-+
-+static void update_child_burst_topological(
-+	struct task_struct *p, u64 now, u32 depth, u32 *acnt, u32 *asum) {
-+	u32 cnt = 0, dcnt = 0, sum = 0;
-+	struct task_struct *child, *dec;
-+	struct sched_burst_cache *bc __maybe_unused;
-+
-+	for_each_child(p, child) {
-+		dec = child;
-+		while ((dcnt = count_entries_upto2(&dec->children)) == 1)
-+			dec = list_first_entry(&dec->children, struct task_struct, sibling);
-+		
-+		if (!dcnt || !depth) {
-+			if (!task_is_bore_eligible(dec)) continue;
-+			cnt++;
-+			sum += dec->se.burst_penalty;
-+			continue;
-+		}
-+		bc = &dec->se.child_burst;
-+		spin_lock(&bc->lock);
-+		if (!burst_cache_expired(bc, now)) {
-+			cnt += bc->count;
-+			sum += (u32)bc->score * bc->count;
-+			if (sched_burst_cache_stop_count <= cnt) {
-+				spin_unlock(&bc->lock);
-+				break;
-+			}
-+			spin_unlock(&bc->lock);
-+			continue;
-+		}
-+		update_child_burst_topological(dec, now, depth - 1, &cnt, &sum);
-+		spin_unlock(&bc->lock);
-+	}
-+
-+	update_burst_cache(&p->se.child_burst, p, cnt, sum, now);
-+	*acnt += cnt;
-+	*asum += sum;
-+}
-+
-+static inline u8 inherit_burst_topological(
-+	struct task_struct *p, u64 now, u64 clone_flags) {
-+	struct task_struct *anc = p;
-+	struct sched_burst_cache *bc;
-+	u32 cnt = 0, sum = 0;
-+	u32 base_child_cnt = 0;
-+
-+	if (clone_flags & CLONE_PARENT) {
-+		anc = anc->real_parent;
-+		base_child_cnt = 1;
-+	}
-+
-+	for (struct task_struct *next;
-+		 anc != (next = anc->real_parent) &&
-+		 	count_entries_upto2(&anc->children) <= base_child_cnt;) {
-+		anc = next;
-+		base_child_cnt = 1;
-+	}
-+
-+	bc = &anc->se.child_burst;
-+	guard(spinlock)(&bc->lock);
-+	if (burst_cache_expired(bc, now))
-+		update_child_burst_topological(
-+			anc, now, sched_burst_fork_atavistic - 1, &cnt, &sum);
-+
-+	return bc->score;
-+}
-+
-+static inline void update_tg_burst(struct task_struct *p, u64 now) {
-+	struct task_struct *task;
-+	u32 cnt = 0, sum = 0;
-+
-+	for_each_thread(p, task) {
-+		if (!task_is_bore_eligible(task)) continue;
-+		cnt++;
-+		sum += task->se.burst_penalty;
-+	}
-+
-+	update_burst_cache(&p->se.group_burst, p, cnt, sum, now);
-+}
-+
-+static inline u8 inherit_burst_tg(struct task_struct *p, u64 now) {
-+	struct task_struct *parent = rcu_dereference(p->group_leader);
-+	struct sched_burst_cache *bc = &parent->se.group_burst;
-+	guard(spinlock)(&bc->lock);
-+	if (burst_cache_expired(bc, now))
-+		update_tg_burst(parent, now);
-+
-+	return bc->score;
-+}
-+
-+void sched_clone_bore(struct task_struct *p,
-+	struct task_struct *parent, u64 clone_flags, u64 now) {
-+	struct sched_entity *se = &p->se;
-+	u8 penalty;
-+
-+	init_task_burst_cache_lock(p);
-+
-+	if (!task_is_bore_eligible(p)) return;
-+
-+	if (clone_flags & CLONE_THREAD) {
-+		rcu_read_lock();
-+		penalty = inherit_burst_tg(parent, now);
-+		rcu_read_unlock();
-+	} else {
-+		read_lock(&tasklist_lock);
-+		penalty = likely(sched_burst_fork_atavistic) ?
-+			inherit_burst_topological(parent, now, clone_flags):
-+			inherit_burst_direct(parent, now, clone_flags);
-+		read_unlock(&tasklist_lock);
-+	}
-+
-+	revolve_burst_penalty(se);
-+	se->burst_penalty = se->prev_burst_penalty =
-+		max(se->prev_burst_penalty, penalty);
-+	se->child_burst.timestamp = 0;
-+	se->group_burst.timestamp = 0;
-+}
-+
-+void reset_task_bore(struct task_struct *p) {
-+	p->se.burst_time = 0;
-+	p->se.prev_burst_penalty = 0;
-+	p->se.curr_burst_penalty = 0;
-+	p->se.burst_penalty = 0;
-+	p->se.burst_score = 0;
-+	memset(&p->se.child_burst, 0, sizeof(struct sched_burst_cache));
-+	memset(&p->se.group_burst, 0, sizeof(struct sched_burst_cache));
-+}
-+
-+void __init sched_bore_init(void) {
-+	printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification %s by Masahito Suzuki", SCHED_BORE_VERSION);
-+	reset_task_bore(&init_task);
-+	init_task_burst_cache_lock(&init_task);
-+}
-+
-+#ifdef CONFIG_SYSCTL
-+static struct ctl_table sched_bore_sysctls[] = {
-+	{
-+		.procname	= "sched_bore",
-+		.data		= &sched_bore,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = sched_bore_update_handler,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
-+	{
-+		.procname	= "sched_burst_exclude_kthreads",
-+		.data		= &sched_burst_exclude_kthreads,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = proc_dou8vec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
-+	{
-+		.procname	= "sched_burst_smoothness_long",
-+		.data		= &sched_burst_smoothness_long,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = proc_dou8vec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
-+	{
-+		.procname	= "sched_burst_smoothness_short",
-+		.data		= &sched_burst_smoothness_short,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = proc_dou8vec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
-+	{
-+		.procname	= "sched_burst_fork_atavistic",
-+		.data		= &sched_burst_fork_atavistic,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = proc_dou8vec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_THREE,
-+	},
-+	{
-+		.procname	= "sched_burst_parity_threshold",
-+		.data		= &sched_burst_parity_threshold,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = proc_dou8vec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= &maxval_u8,
-+	},
-+	{
-+		.procname	= "sched_burst_penalty_offset",
-+		.data		= &sched_burst_penalty_offset,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = proc_dou8vec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= &sixty_four,
-+	},
-+	{
-+		.procname	= "sched_burst_penalty_scale",
-+		.data		= &sched_burst_penalty_scale,
-+		.maxlen		= sizeof(uint),
-+		.mode		= 0644,
-+		.proc_handler = proc_douintvec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= &maxval_12_bits,
-+	},
-+	{
-+		.procname	= "sched_burst_cache_stop_count",
-+		.data		= &sched_burst_cache_stop_count,
-+		.maxlen		= sizeof(uint),
-+		.mode		= 0644,
-+		.proc_handler = proc_douintvec,
-+	},
-+	{
-+		.procname	= "sched_burst_cache_lifetime",
-+		.data		= &sched_burst_cache_lifetime,
-+		.maxlen		= sizeof(uint),
-+		.mode		= 0644,
-+		.proc_handler = proc_douintvec,
-+	},
-+	{
-+		.procname	= "sched_deadline_boost_mask",
-+		.data		= &sched_deadline_boost_mask,
-+		.maxlen		= sizeof(uint),
-+		.mode		= 0644,
-+		.proc_handler = proc_douintvec,
-+	},
-+};
-+
-+static int __init sched_bore_sysctl_init(void) {
-+	register_sysctl_init("kernel", sched_bore_sysctls);
-+	return 0;
-+}
-+late_initcall(sched_bore_sysctl_init);
-+#endif // CONFIG_SYSCTL
-+#endif // CONFIG_SCHED_BORE
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index 3e5a6bf587f9..fb4bb3fa5a96 100644
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -97,6 +97,8 @@
- #include "../../io_uring/io-wq.h"
- #include "../smpboot.h"
- 
-+#include <linux/sched/bore.h>
-+
- EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu);
- EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask);
- 
-@@ -8481,6 +8483,10 @@ void __init sched_init(void)
- 	BUG_ON(!sched_class_above(&ext_sched_class, &idle_sched_class));
- #endif
- 
-+#ifdef CONFIG_SCHED_BORE
-+	sched_bore_init();
-+#endif // CONFIG_SCHED_BORE
-+
- 	wait_bit_init();
- 
- #ifdef CONFIG_FAIR_GROUP_SCHED
-diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
-index a1be00a988bf..66fcb229007d 100644
---- a/kernel/sched/debug.c
-+++ b/kernel/sched/debug.c
-@@ -167,7 +167,53 @@ static const struct file_operations sched_feat_fops = {
- };
- 
- #ifdef CONFIG_SMP
-+#ifdef CONFIG_SCHED_BORE
-+#define DEFINE_SYSCTL_SCHED_FUNC(name, update_func) \
-+static ssize_t sched_##name##_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) \
-+{ \
-+	char buf[16]; \
-+	unsigned int value; \
-+\
-+	if (cnt > 15) \
-+		cnt = 15; \
-+\
-+	if (copy_from_user(&buf, ubuf, cnt)) \
-+		return -EFAULT; \
-+	buf[cnt] = '\0'; \
-+\
-+	if (kstrtouint(buf, 10, &value)) \
-+		return -EINVAL; \
-+\
-+	sysctl_sched_##name = value; \
-+	sched_update_##update_func(); \
-+\
-+	*ppos += cnt; \
-+	return cnt; \
-+} \
-+\
-+static int sched_##name##_show(struct seq_file *m, void *v) \
-+{ \
-+	seq_printf(m, "%d\n", sysctl_sched_##name); \
-+	return 0; \
-+} \
-+\
-+static int sched_##name##_open(struct inode *inode, struct file *filp) \
-+{ \
-+	return single_open(filp, sched_##name##_show, NULL); \
-+} \
-+\
-+static const struct file_operations sched_##name##_fops = { \
-+	.open		= sched_##name##_open, \
-+	.write		= sched_##name##_write, \
-+	.read		= seq_read, \
-+	.llseek		= seq_lseek, \
-+	.release	= single_release, \
-+};
- 
-+DEFINE_SYSCTL_SCHED_FUNC(min_base_slice, min_base_slice)
-+
-+#undef DEFINE_SYSCTL_SCHED_FUNC
-+#else // !CONFIG_SCHED_BORE
- static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf,
- 				   size_t cnt, loff_t *ppos)
- {
-@@ -213,7 +259,7 @@ static const struct file_operations sched_scaling_fops = {
- 	.llseek		= seq_lseek,
- 	.release	= single_release,
- };
--
-+#endif // CONFIG_SCHED_BORE
- #endif /* SMP */
- 
- #ifdef CONFIG_PREEMPT_DYNAMIC
-@@ -505,13 +551,20 @@ static __init int sched_init_debug(void)
- 	debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
- #endif
- 
-+#ifdef CONFIG_SCHED_BORE
-+	debugfs_create_file("min_base_slice_ns", 0644, debugfs_sched, NULL, &sched_min_base_slice_fops);
-+	debugfs_create_u32("base_slice_ns", 0444, debugfs_sched, &sysctl_sched_base_slice);
-+#else // !CONFIG_SCHED_BORE
- 	debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
-+#endif // CONFIG_SCHED_BORE
- 
- 	debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
- 	debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
- 
- #ifdef CONFIG_SMP
-+#if !defined(CONFIG_SCHED_BORE)
- 	debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops);
-+#endif // CONFIG_SCHED_BORE
- 	debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
- 	debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
- 
-@@ -756,6 +809,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
- 		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
- 		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
- 
-+#ifdef CONFIG_SCHED_BORE
-+	SEQ_printf(m, " %2d", p->se.burst_score);
-+#endif // CONFIG_SCHED_BORE
- #ifdef CONFIG_NUMA_BALANCING
- 	SEQ_printf(m, "   %d      %d", task_node(p), task_numa_group_id(p));
- #endif
-@@ -1245,6 +1301,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
- 
- 	P(se.load.weight);
- #ifdef CONFIG_SMP
-+#ifdef CONFIG_SCHED_BORE
-+	P(se.burst_score);
-+#endif // CONFIG_SCHED_BORE
- 	P(se.avg.load_sum);
- 	P(se.avg.runnable_sum);
- 	P(se.avg.util_sum);
-diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index c532ffb153b4..c55d61977364 100644
---- a/kernel/sched/fair.c
-+++ b/kernel/sched/fair.c
-@@ -55,6 +55,8 @@
- #include "stats.h"
- #include "autogroup.h"
- 
-+#include <linux/sched/bore.h>
-+
- /*
-  * The initial- and re-scaling of tunables is configurable
-  *
-@@ -64,28 +66,32 @@
-  *   SCHED_TUNABLESCALING_LOG - scaled logarithmically, *1+ilog(ncpus)
-  *   SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
-  *
-- * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
-+ * BORE : default SCHED_TUNABLESCALING_NONE = *1 constant
-+ * EEVDF: default SCHED_TUNABLESCALING_LOG  = *(1+ilog(ncpus))
-  */
-+#ifdef CONFIG_SCHED_BORE
-+unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
-+#else // !CONFIG_SCHED_BORE
- unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
-+#endif // CONFIG_SCHED_BORE
- 
- /*
-  * Minimal preemption granularity for CPU-bound tasks:
-  *
-- * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
-+ * BORE : base_slice = minimum multiple of nsecs_per_tick >= min_base_slice
-+ * (default min_base_slice = 2000000 constant, units: nanoseconds)
-+ * EEVDF: default 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds
-  */
--#ifdef CONFIG_CACHY
--unsigned int sysctl_sched_base_slice			= 350000ULL;
--static unsigned int normalized_sysctl_sched_base_slice	= 350000ULL;
--#else
-+#ifdef CONFIG_SCHED_BORE
-+static const unsigned int nsecs_per_tick       = 1000000000ULL / HZ;
-+unsigned int sysctl_sched_min_base_slice       = CONFIG_MIN_BASE_SLICE_NS;
-+__read_mostly uint sysctl_sched_base_slice     = nsecs_per_tick;
-+#else // !CONFIG_SCHED_BORE
- unsigned int sysctl_sched_base_slice			= 750000ULL;
- static unsigned int normalized_sysctl_sched_base_slice	= 750000ULL;
--#endif
-+#endif // CONFIG_SCHED_BORE
- 
--#ifdef CONFIG_CACHY
--const_debug unsigned int sysctl_sched_migration_cost	= 300000UL;
--#else
- const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
--#endif
- 
- static int __init setup_sched_thermal_decay_shift(char *str)
- {
-@@ -130,12 +136,8 @@ int __weak arch_asym_cpu_priority(int cpu)
-  *
-  * (default: 5 msec, units: microseconds)
-  */
--#ifdef CONFIG_CACHY
--static unsigned int sysctl_sched_cfs_bandwidth_slice		= 3000UL;
--#else
- static unsigned int sysctl_sched_cfs_bandwidth_slice		= 5000UL;
- #endif
--#endif
- 
- #ifdef CONFIG_NUMA_BALANCING
- /* Restrict the NUMA promotion throughput (MB/s) for each target node. */
-@@ -201,6 +203,13 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
-  *
-  * This idea comes from the SD scheduler of Con Kolivas:
-  */
-+#ifdef CONFIG_SCHED_BORE
-+static void update_sysctl(void) {
-+	sysctl_sched_base_slice = nsecs_per_tick *
-+		max(1UL, DIV_ROUND_UP(sysctl_sched_min_base_slice, nsecs_per_tick));
-+}
-+void sched_update_min_base_slice(void) { update_sysctl(); }
-+#else // !CONFIG_SCHED_BORE
- static unsigned int get_update_sysctl_factor(void)
- {
- 	unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
-@@ -231,6 +240,7 @@ static void update_sysctl(void)
- 	SET_SYSCTL(sched_base_slice);
- #undef SET_SYSCTL
- }
-+#endif // CONFIG_SCHED_BORE
- 
- void __init sched_init_granularity(void)
- {
-@@ -710,6 +720,9 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
- 
- 	vlag = avg_vruntime(cfs_rq) - se->vruntime;
- 	limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
-+#ifdef CONFIG_SCHED_BORE
-+	limit >>= !!sched_bore;
-+#endif // CONFIG_SCHED_BORE
- 
- 	se->vlag = clamp(vlag, -limit, limit);
- }
-@@ -934,6 +947,10 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
- 	 * until it gets a new slice. See the HACK in set_next_entity().
- 	 */
- 	if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline)
-+#ifdef CONFIG_SCHED_BORE
-+		if (!(likely(sched_bore) && likely(sched_burst_parity_threshold) &&
-+			sched_burst_parity_threshold < cfs_rq->nr_running))
-+#endif // CONFIG_SCHED_BORE
- 		return curr;
- 
- 	/* Pick the leftmost entity if it's eligible */
-@@ -992,6 +1009,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
-  * Scheduling class statistics methods:
-  */
- #ifdef CONFIG_SMP
-+#if !defined(CONFIG_SCHED_BORE)
- int sched_update_scaling(void)
- {
- 	unsigned int factor = get_update_sysctl_factor();
-@@ -1003,6 +1021,7 @@ int sched_update_scaling(void)
- 
- 	return 0;
- }
-+#endif // CONFIG_SCHED_BORE
- #endif
- #endif
- 
-@@ -1233,6 +1252,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
- 	if (unlikely(delta_exec <= 0))
- 		return;
- 
-+#ifdef CONFIG_SCHED_BORE
-+	curr->burst_time += delta_exec;
-+	update_burst_penalty(curr);
-+#endif // CONFIG_SCHED_BORE
- 	curr->vruntime += calc_delta_fair(delta_exec, curr);
- 	resched = update_deadline(cfs_rq, curr);
- 	update_min_vruntime(cfs_rq);
-@@ -3784,7 +3807,7 @@ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
- 
- static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags);
- 
--static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
-+void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
- 			    unsigned long weight)
- {
- 	bool curr = cfs_rq->curr == se;
-@@ -5272,7 +5295,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
- 		se->rel_deadline = 0;
- 		return;
- 	}
--
-+#ifdef CONFIG_SCHED_BORE
-+	else if (likely(sched_bore))
-+		vslice >>= !!(flags & sched_deadline_boost_mask);
-+	else
-+#endif // CONFIG_SCHED_BORE
- 	/*
- 	 * When joining the competition; the existing tasks will be,
- 	 * on average, halfway through their slice, as such start tasks
-@@ -7148,6 +7175,15 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
- 		util_est_dequeue(&rq->cfs, p);
- 
- 	util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
-+#ifdef CONFIG_SCHED_BORE
-+	struct cfs_rq *cfs_rq = &rq->cfs;
-+	struct sched_entity *se = &p->se;
-+	if (flags & DEQUEUE_SLEEP && entity_is_task(se)) {
-+		if (cfs_rq->curr == se)
-+			update_curr(cfs_rq);
-+		restart_burst(se);
-+	}
-+#endif // CONFIG_SCHED_BORE
- 	if (dequeue_entities(rq, &p->se, flags) < 0)
- 		return false;
- 
-@@ -8961,16 +8997,25 @@ static void yield_task_fair(struct rq *rq)
- 	/*
- 	 * Are we the only task in the tree?
- 	 */
-+#if !defined(CONFIG_SCHED_BORE)
- 	if (unlikely(rq->nr_running == 1))
- 		return;
- 
- 	clear_buddies(cfs_rq, se);
-+#endif // CONFIG_SCHED_BORE
- 
- 	update_rq_clock(rq);
- 	/*
- 	 * Update run-time statistics of the 'current'.
- 	 */
- 	update_curr(cfs_rq);
-+#ifdef CONFIG_SCHED_BORE
-+	restart_burst_rescale_deadline(se);
-+	if (unlikely(rq->nr_running == 1))
-+		return;
-+
-+	clear_buddies(cfs_rq, se);
-+#endif // CONFIG_SCHED_BORE
- 	/*
- 	 * Tell update_rq_clock() that we've just updated,
- 	 * so we don't do microscopic update in schedule()
-@@ -13044,6 +13089,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
- static void task_fork_fair(struct task_struct *p)
- {
- 	set_task_max_allowed_capacity(p);
-+#ifdef CONFIG_SCHED_BORE
-+	update_burst_score(&p->se);
-+#endif // CONFIG_SCHED_BORE
- }
- 
- /*
-@@ -13154,6 +13202,10 @@ static void attach_task_cfs_rq(struct task_struct *p)
- 
- static void switched_from_fair(struct rq *rq, struct task_struct *p)
- {
-+	p->se.rel_deadline = 0;
-+#ifdef CONFIG_SCHED_BORE
-+	reset_task_bore(p);
-+#endif // CONFIG_SCHED_BORE
- 	detach_task_cfs_rq(p);
- }
- 
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index dee2797009e3..bdc0b9c037d4 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -2091,7 +2091,11 @@ static inline void update_sched_domain_debugfs(void) { }
- static inline void dirty_sched_domain_sysctl(int cpu) { }
- #endif
- 
-+#ifdef CONFIG_SCHED_BORE
-+extern void sched_update_min_base_slice(void);
-+#else // !CONFIG_SCHED_BORE
- extern int sched_update_scaling(void);
-+#endif // CONFIG_SCHED_BORE
- 
- static inline const struct cpumask *task_user_cpus(struct task_struct *p)
- {
-@@ -2828,7 +2832,12 @@ extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags);
- extern const_debug unsigned int sysctl_sched_nr_migrate;
- extern const_debug unsigned int sysctl_sched_migration_cost;
- 
-+#ifdef CONFIG_SCHED_BORE
-+extern unsigned int sysctl_sched_min_base_slice;
-+extern __read_mostly uint sysctl_sched_base_slice;
-+#else // !CONFIG_SCHED_BORE
- extern unsigned int sysctl_sched_base_slice;
-+#endif // CONFIG_SCHED_BORE
- 
- #ifdef CONFIG_SCHED_DEBUG
- extern int sysctl_resched_latency_warn_ms;
--- 
-2.48.1
-
diff --git a/6.13/sched-dev/0001-bore.patch b/6.13/sched-dev/0001-bore.patch
deleted file mode 100644
index e000df8e..00000000
--- a/6.13/sched-dev/0001-bore.patch
+++ /dev/null
@@ -1,1005 +0,0 @@
-From 9e3f11411e7128d3ebbbe546df56fb110f0d9370 Mon Sep 17 00:00:00 2001
-From: Masahito S <firelzrd@gmail.com>
-Date: Mon, 20 Jan 2025 07:24:54 +0900
-Subject: [PATCH] linux6.13.y-bore5.9.6
-
----
- include/linux/sched.h      |  18 ++
- include/linux/sched/bore.h |  40 ++++
- init/Kconfig               |  17 ++
- kernel/Kconfig.hz          |  17 ++
- kernel/fork.c              |   6 +
- kernel/sched/Makefile      |   1 +
- kernel/sched/bore.c        | 443 +++++++++++++++++++++++++++++++++++++
- kernel/sched/core.c        |   6 +
- kernel/sched/debug.c       |  61 ++++-
- kernel/sched/fair.c        |  73 +++++-
- kernel/sched/sched.h       |   9 +
- 11 files changed, 686 insertions(+), 5 deletions(-)
- create mode 100644 include/linux/sched/bore.h
- create mode 100644 kernel/sched/bore.c
-
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index 66b311fbd5..43a00a7308 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -538,6 +538,15 @@ struct sched_statistics {
- #endif /* CONFIG_SCHEDSTATS */
- } ____cacheline_aligned;
- 
-+#ifdef CONFIG_SCHED_BORE
-+struct sched_burst_cache {
-+	u8				score;
-+	u32				count;
-+	u64				timestamp;
-+    spinlock_t		lock;
-+};
-+#endif // CONFIG_SCHED_BORE
-+
- struct sched_entity {
- 	/* For load-balancing: */
- 	struct load_weight		load;
-@@ -557,6 +566,15 @@ struct sched_entity {
- 	u64				sum_exec_runtime;
- 	u64				prev_sum_exec_runtime;
- 	u64				vruntime;
-+#ifdef CONFIG_SCHED_BORE
-+	u64				burst_time;
-+	u8				prev_burst_penalty;
-+	u8				curr_burst_penalty;
-+	u8				burst_penalty;
-+	u8				burst_score;
-+	struct sched_burst_cache child_burst;
-+	struct sched_burst_cache group_burst;
-+#endif // CONFIG_SCHED_BORE
- 	s64				vlag;
- 	u64				slice;
- 
-diff --git a/include/linux/sched/bore.h b/include/linux/sched/bore.h
-new file mode 100644
-index 0000000000..a8faabc288
---- /dev/null
-+++ b/include/linux/sched/bore.h
-@@ -0,0 +1,40 @@
-+
-+#include <linux/sched.h>
-+#include <linux/sched/cputime.h>
-+
-+#ifndef _LINUX_SCHED_BORE_H
-+#define _LINUX_SCHED_BORE_H
-+#define SCHED_BORE_VERSION "5.9.6"
-+
-+#ifdef CONFIG_SCHED_BORE
-+extern u8   __read_mostly sched_bore;
-+extern u8   __read_mostly sched_burst_exclude_kthreads;
-+extern u8   __read_mostly sched_burst_smoothness_long;
-+extern u8   __read_mostly sched_burst_smoothness_short;
-+extern u8   __read_mostly sched_burst_fork_atavistic;
-+extern u8   __read_mostly sched_burst_parity_threshold;
-+extern u8   __read_mostly sched_burst_penalty_offset;
-+extern uint __read_mostly sched_burst_penalty_scale;
-+extern uint __read_mostly sched_burst_cache_stop_count;
-+extern uint __read_mostly sched_burst_cache_lifetime;
-+extern uint __read_mostly sched_deadline_boost_mask;
-+
-+extern void update_burst_score(struct sched_entity *se);
-+extern void update_burst_penalty(struct sched_entity *se);
-+
-+extern void restart_burst(struct sched_entity *se);
-+extern void restart_burst_rescale_deadline(struct sched_entity *se);
-+
-+extern int sched_bore_update_handler(const struct ctl_table *table, int write,
-+	void __user *buffer, size_t *lenp, loff_t *ppos);
-+
-+extern void sched_clone_bore(
-+	struct task_struct *p, struct task_struct *parent, u64 clone_flags, u64 now);
-+
-+extern void reset_task_bore(struct task_struct *p);
-+extern void sched_bore_init(void);
-+
-+extern void reweight_entity(
-+	struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight);
-+#endif // CONFIG_SCHED_BORE
-+#endif // _LINUX_SCHED_BORE_H
-diff --git a/init/Kconfig b/init/Kconfig
-index a20e6efd3f..0b17af19d3 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -1342,6 +1342,23 @@ config CHECKPOINT_RESTORE
- 
- 	  If unsure, say N here.
- 
-+config SCHED_BORE
-+	bool "Burst-Oriented Response Enhancer"
-+	default y
-+	help
-+	  In Desktop and Mobile computing, one might prefer interactive
-+	  tasks to keep responsive no matter what they run in the background.
-+
-+	  Enabling this kernel feature modifies the scheduler to discriminate
-+	  tasks by their burst time (runtime since it last went sleeping or
-+	  yielding state) and prioritize those that run less bursty.
-+	  Such tasks usually include window compositor, widgets backend,
-+	  terminal emulator, video playback, games and so on.
-+	  With a little impact to scheduling fairness, it may improve
-+	  responsiveness especially under heavy background workload.
-+
-+	  If unsure, say Y here.
-+
- config SCHED_AUTOGROUP
- 	bool "Automatic process group scheduling"
- 	select CGROUPS
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 38ef6d0688..253c566b59 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -55,5 +55,22 @@ config HZ
- 	default 300 if HZ_300
- 	default 1000 if HZ_1000
- 
-+config MIN_BASE_SLICE_NS
-+	int "Default value for min_base_slice_ns"
-+	default 2000000
-+	help
-+	 The BORE Scheduler automatically calculates the optimal base
-+	 slice for the configured HZ using the following equation:
-+	 
-+	 base_slice_ns =
-+	 	1000000000/HZ * DIV_ROUNDUP(min_base_slice_ns, 1000000000/HZ)
-+	 
-+	 This option sets the default lower bound limit of the base slice
-+	 to prevent the loss of task throughput due to overscheduling.
-+	 
-+	 Setting this value too high can cause the system to boot with
-+	 an unnecessarily large base slice, resulting in high scheduling
-+	 latency and poor system responsiveness.
-+
- config SCHED_HRTICK
- 	def_bool HIGH_RES_TIMERS
-diff --git a/kernel/fork.c b/kernel/fork.c
-index 9b301180fd..e2ca4830c3 100644
---- a/kernel/fork.c
-+++ b/kernel/fork.c
-@@ -112,6 +112,8 @@
- #include <asm/cacheflush.h>
- #include <asm/tlbflush.h>
- 
-+#include <linux/sched/bore.h>
-+
- #include <trace/events/sched.h>
- 
- #define CREATE_TRACE_POINTS
-@@ -2515,6 +2517,10 @@ __latent_entropy struct task_struct *copy_process(
- 	p->start_time = ktime_get_ns();
- 	p->start_boottime = ktime_get_boottime_ns();
- 
-+#ifdef CONFIG_SCHED_BORE
-+	if (likely(p->pid))
-+		sched_clone_bore(p, current, clone_flags, p->start_time);
-+#endif // CONFIG_SCHED_BORE
- 	/*
- 	 * Make it visible to the rest of the system, but dont wake it up yet.
- 	 * Need tasklist lock for parent etc handling!
-diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
-index 976092b7bd..293aad6754 100644
---- a/kernel/sched/Makefile
-+++ b/kernel/sched/Makefile
-@@ -32,3 +32,4 @@ obj-y += core.o
- obj-y += fair.o
- obj-y += build_policy.o
- obj-y += build_utility.o
-+obj-y += bore.o
-diff --git a/kernel/sched/bore.c b/kernel/sched/bore.c
-new file mode 100644
-index 0000000000..23aeb56494
---- /dev/null
-+++ b/kernel/sched/bore.c
-@@ -0,0 +1,443 @@
-+/*
-+ *  Burst-Oriented Response Enhancer (BORE) CPU Scheduler
-+ *  Copyright (C) 2021-2024 Masahito Suzuki <firelzrd@gmail.com>
-+ */
-+#include <linux/cpuset.h>
-+#include <linux/sched/task.h>
-+#include <linux/sched/bore.h>
-+#include "sched.h"
-+
-+#ifdef CONFIG_SCHED_BORE
-+u8   __read_mostly sched_bore                   = 1;
-+u8   __read_mostly sched_burst_exclude_kthreads = 1;
-+u8   __read_mostly sched_burst_smoothness_long  = 1;
-+u8   __read_mostly sched_burst_smoothness_short = 0;
-+u8   __read_mostly sched_burst_fork_atavistic   = 2;
-+u8   __read_mostly sched_burst_parity_threshold = 2;
-+u8   __read_mostly sched_burst_penalty_offset   = 24;
-+uint __read_mostly sched_burst_penalty_scale    = 1280;
-+uint __read_mostly sched_burst_cache_stop_count = 64;
-+uint __read_mostly sched_burst_cache_lifetime   = 75000000;
-+uint __read_mostly sched_deadline_boost_mask    = ENQUEUE_INITIAL
-+                                                | ENQUEUE_WAKEUP;
-+static int __maybe_unused sixty_four     = 64;
-+static int __maybe_unused maxval_u8      = 255;
-+static int __maybe_unused maxval_12_bits = 4095;
-+
-+#define MAX_BURST_PENALTY (39U <<2)
-+
-+static inline u32 log2plus1_u64_u32f8(u64 v) {
-+	u32 integral = fls64(v);
-+	u8  fractional = v << (64 - integral) >> 55;
-+	return integral << 8 | fractional;
-+}
-+
-+static inline u32 calc_burst_penalty(u64 burst_time) {
-+	u32 greed, tolerance, penalty, scaled_penalty;
-+	
-+	greed = log2plus1_u64_u32f8(burst_time);
-+	tolerance = sched_burst_penalty_offset << 8;
-+	penalty = max(0, (s32)(greed - tolerance));
-+	scaled_penalty = penalty * sched_burst_penalty_scale >> 16;
-+
-+	return min(MAX_BURST_PENALTY, scaled_penalty);
-+}
-+
-+static inline u64 __scale_slice(u64 delta, u8 score)
-+{return mul_u64_u32_shr(delta, sched_prio_to_wmult[score], 22);}
-+
-+static inline u64 __unscale_slice(u64 delta, u8 score)
-+{return mul_u64_u32_shr(delta, sched_prio_to_weight[score], 10);}
-+
-+static void reweight_task_by_prio(struct task_struct *p, int prio) {
-+	struct sched_entity *se = &p->se;
-+	unsigned long weight = scale_load(sched_prio_to_weight[prio]);
-+
-+	reweight_entity(cfs_rq_of(se), se, weight);
-+	se->load.inv_weight = sched_prio_to_wmult[prio];
-+}
-+
-+static inline u8 effective_prio(struct task_struct *p) {
-+	u8 prio = p->static_prio - MAX_RT_PRIO;
-+	if (likely(sched_bore))
-+		prio += p->se.burst_score;
-+	return min(39, prio);
-+}
-+
-+void update_burst_score(struct sched_entity *se) {
-+	if (!entity_is_task(se)) return;
-+	struct task_struct *p = task_of(se);
-+	u8 prev_prio = effective_prio(p);
-+
-+	u8 burst_score = 0;
-+	if (!((p->flags & PF_KTHREAD) && likely(sched_burst_exclude_kthreads)))
-+		burst_score = se->burst_penalty >> 2;
-+	se->burst_score = burst_score;
-+
-+	u8 new_prio = effective_prio(p);
-+	if (new_prio != prev_prio)
-+		reweight_task_by_prio(p, new_prio);
-+}
-+
-+void update_burst_penalty(struct sched_entity *se) {
-+	se->curr_burst_penalty = calc_burst_penalty(se->burst_time);
-+	se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty);
-+	update_burst_score(se);
-+}
-+
-+static inline u32 binary_smooth(u32 new, u32 old) {
-+	int increment = new - old;
-+	return (0 <= increment)?
-+		old + ( increment >> (int)sched_burst_smoothness_long):
-+		old - (-increment >> (int)sched_burst_smoothness_short);
-+}
-+
-+static void revolve_burst_penalty(struct sched_entity *se) {
-+	se->prev_burst_penalty =
-+		binary_smooth(se->curr_burst_penalty, se->prev_burst_penalty);
-+	se->burst_time = 0;
-+	se->curr_burst_penalty = 0;
-+}
-+
-+inline void restart_burst(struct sched_entity *se) {
-+	revolve_burst_penalty(se);
-+	se->burst_penalty = se->prev_burst_penalty;
-+	update_burst_score(se);
-+}
-+
-+void restart_burst_rescale_deadline(struct sched_entity *se) {
-+	s64 vscaled, wremain, vremain = se->deadline - se->vruntime;
-+	struct task_struct *p = task_of(se);
-+	u8 prev_prio = effective_prio(p);
-+	restart_burst(se);
-+	u8 new_prio = effective_prio(p);
-+	if (prev_prio > new_prio) {
-+		wremain = __unscale_slice(abs(vremain), prev_prio);
-+		vscaled = __scale_slice(wremain, new_prio);
-+		if (unlikely(vremain < 0))
-+			vscaled = -vscaled;
-+		se->deadline = se->vruntime + vscaled;
-+	}
-+}
-+
-+static inline bool task_is_bore_eligible(struct task_struct *p)
-+{return p && p->sched_class == &fair_sched_class && !p->exit_state;}
-+
-+static void reset_task_weights_bore(void) {
-+	struct task_struct *task;
-+	struct rq *rq;
-+	struct rq_flags rf;
-+
-+	write_lock_irq(&tasklist_lock);
-+	for_each_process(task) {
-+		if (!task_is_bore_eligible(task)) continue;
-+		rq = task_rq(task);
-+		rq_pin_lock(rq, &rf);
-+		update_rq_clock(rq);
-+		reweight_task_by_prio(task, effective_prio(task));
-+		rq_unpin_lock(rq, &rf);
-+	}
-+	write_unlock_irq(&tasklist_lock);
-+}
-+
-+int sched_bore_update_handler(const struct ctl_table *table, int write,
-+	void __user *buffer, size_t *lenp, loff_t *ppos) {
-+	int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
-+	if (ret || !write)
-+		return ret;
-+
-+	reset_task_weights_bore();
-+
-+	return 0;
-+}
-+
-+#define for_each_child(p, t) \
-+	list_for_each_entry(t, &(p)->children, sibling)
-+
-+static u32 count_entries_upto2(struct list_head *head) {
-+	struct list_head *next = head->next;
-+	return (next != head) + (next->next != head);
-+}
-+
-+static inline void init_task_burst_cache_lock(struct task_struct *p) {
-+	spin_lock_init(&p->se.child_burst.lock);
-+	spin_lock_init(&p->se.group_burst.lock);
-+}
-+
-+static inline bool burst_cache_expired(struct sched_burst_cache *bc, u64 now)
-+{return (s64)(bc->timestamp + sched_burst_cache_lifetime - now) < 0;}
-+
-+static void update_burst_cache(struct sched_burst_cache *bc,
-+	struct task_struct *p, u32 cnt, u32 sum, u64 now) {
-+	u8 avg = cnt ? sum / cnt : 0;
-+	bc->score = max(avg, p->se.burst_penalty);
-+	bc->count = cnt;
-+	bc->timestamp = now;
-+}
-+
-+static inline void update_child_burst_direct(struct task_struct *p, u64 now) {
-+	u32 cnt = 0, sum = 0;
-+	struct task_struct *child;
-+
-+	for_each_child(p, child) {
-+		if (!task_is_bore_eligible(child)) continue;
-+		cnt++;
-+		sum += child->se.burst_penalty;
-+	}
-+
-+	update_burst_cache(&p->se.child_burst, p, cnt, sum, now);
-+}
-+
-+static inline u8 inherit_burst_direct(
-+	struct task_struct *p, u64 now, u64 clone_flags) {
-+	struct task_struct *parent = p;
-+	struct sched_burst_cache *bc;
-+
-+	if (clone_flags & CLONE_PARENT)
-+		parent = parent->real_parent;
-+
-+	bc = &parent->se.child_burst;
-+	guard(spinlock)(&bc->lock);
-+	if (burst_cache_expired(bc, now))
-+		update_child_burst_direct(parent, now);
-+
-+	return bc->score;
-+}
-+
-+static void update_child_burst_topological(
-+	struct task_struct *p, u64 now, u32 depth, u32 *acnt, u32 *asum) {
-+	u32 cnt = 0, dcnt = 0, sum = 0;
-+	struct task_struct *child, *dec;
-+	struct sched_burst_cache *bc __maybe_unused;
-+
-+	for_each_child(p, child) {
-+		dec = child;
-+		while ((dcnt = count_entries_upto2(&dec->children)) == 1)
-+			dec = list_first_entry(&dec->children, struct task_struct, sibling);
-+		
-+		if (!dcnt || !depth) {
-+			if (!task_is_bore_eligible(dec)) continue;
-+			cnt++;
-+			sum += dec->se.burst_penalty;
-+			continue;
-+		}
-+		bc = &dec->se.child_burst;
-+		spin_lock(&bc->lock);
-+		if (!burst_cache_expired(bc, now)) {
-+			cnt += bc->count;
-+			sum += (u32)bc->score * bc->count;
-+			if (sched_burst_cache_stop_count <= cnt) {
-+				spin_unlock(&bc->lock);
-+				break;
-+			}
-+			spin_unlock(&bc->lock);
-+			continue;
-+		}
-+		update_child_burst_topological(dec, now, depth - 1, &cnt, &sum);
-+		spin_unlock(&bc->lock);
-+	}
-+
-+	update_burst_cache(&p->se.child_burst, p, cnt, sum, now);
-+	*acnt += cnt;
-+	*asum += sum;
-+}
-+
-+static inline u8 inherit_burst_topological(
-+	struct task_struct *p, u64 now, u64 clone_flags) {
-+	struct task_struct *anc = p;
-+	struct sched_burst_cache *bc;
-+	u32 cnt = 0, sum = 0;
-+	u32 base_child_cnt = 0;
-+
-+	if (clone_flags & CLONE_PARENT) {
-+		anc = anc->real_parent;
-+		base_child_cnt = 1;
-+	}
-+
-+	for (struct task_struct *next;
-+		 anc != (next = anc->real_parent) &&
-+		 	count_entries_upto2(&anc->children) <= base_child_cnt;) {
-+		anc = next;
-+		base_child_cnt = 1;
-+	}
-+
-+	bc = &anc->se.child_burst;
-+	guard(spinlock)(&bc->lock);
-+	if (burst_cache_expired(bc, now))
-+		update_child_burst_topological(
-+			anc, now, sched_burst_fork_atavistic - 1, &cnt, &sum);
-+
-+	return bc->score;
-+}
-+
-+static inline void update_tg_burst(struct task_struct *p, u64 now) {
-+	struct task_struct *task;
-+	u32 cnt = 0, sum = 0;
-+
-+	for_each_thread(p, task) {
-+		if (!task_is_bore_eligible(task)) continue;
-+		cnt++;
-+		sum += task->se.burst_penalty;
-+	}
-+
-+	update_burst_cache(&p->se.group_burst, p, cnt, sum, now);
-+}
-+
-+static inline u8 inherit_burst_tg(struct task_struct *p, u64 now) {
-+	struct task_struct *parent = rcu_dereference(p->group_leader);
-+	struct sched_burst_cache *bc = &parent->se.group_burst;
-+	guard(spinlock)(&bc->lock);
-+	if (burst_cache_expired(bc, now))
-+		update_tg_burst(parent, now);
-+
-+	return bc->score;
-+}
-+
-+void sched_clone_bore(struct task_struct *p,
-+	struct task_struct *parent, u64 clone_flags, u64 now) {
-+	struct sched_entity *se = &p->se;
-+	u8 penalty;
-+
-+	init_task_burst_cache_lock(p);
-+
-+	if (!task_is_bore_eligible(p)) return;
-+
-+	if (clone_flags & CLONE_THREAD) {
-+		rcu_read_lock();
-+		penalty = inherit_burst_tg(parent, now);
-+		rcu_read_unlock();
-+	} else {
-+		read_lock(&tasklist_lock);
-+		penalty = likely(sched_burst_fork_atavistic) ?
-+			inherit_burst_topological(parent, now, clone_flags):
-+			inherit_burst_direct(parent, now, clone_flags);
-+		read_unlock(&tasklist_lock);
-+	}
-+
-+	revolve_burst_penalty(se);
-+	se->burst_penalty = se->prev_burst_penalty =
-+		max(se->prev_burst_penalty, penalty);
-+	se->child_burst.timestamp = 0;
-+	se->group_burst.timestamp = 0;
-+}
-+
-+void reset_task_bore(struct task_struct *p) {
-+	p->se.burst_time = 0;
-+	p->se.prev_burst_penalty = 0;
-+	p->se.curr_burst_penalty = 0;
-+	p->se.burst_penalty = 0;
-+	p->se.burst_score = 0;
-+	memset(&p->se.child_burst, 0, sizeof(struct sched_burst_cache));
-+	memset(&p->se.group_burst, 0, sizeof(struct sched_burst_cache));
-+}
-+
-+void __init sched_bore_init(void) {
-+	printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification %s by Masahito Suzuki", SCHED_BORE_VERSION);
-+	reset_task_bore(&init_task);
-+	init_task_burst_cache_lock(&init_task);
-+}
-+
-+#ifdef CONFIG_SYSCTL
-+static struct ctl_table sched_bore_sysctls[] = {
-+	{
-+		.procname	= "sched_bore",
-+		.data		= &sched_bore,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = sched_bore_update_handler,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
-+	{
-+		.procname	= "sched_burst_exclude_kthreads",
-+		.data		= &sched_burst_exclude_kthreads,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = proc_dou8vec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
-+	{
-+		.procname	= "sched_burst_smoothness_long",
-+		.data		= &sched_burst_smoothness_long,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = proc_dou8vec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
-+	{
-+		.procname	= "sched_burst_smoothness_short",
-+		.data		= &sched_burst_smoothness_short,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = proc_dou8vec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
-+	{
-+		.procname	= "sched_burst_fork_atavistic",
-+		.data		= &sched_burst_fork_atavistic,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = proc_dou8vec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_THREE,
-+	},
-+	{
-+		.procname	= "sched_burst_parity_threshold",
-+		.data		= &sched_burst_parity_threshold,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = proc_dou8vec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= &maxval_u8,
-+	},
-+	{
-+		.procname	= "sched_burst_penalty_offset",
-+		.data		= &sched_burst_penalty_offset,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = proc_dou8vec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= &sixty_four,
-+	},
-+	{
-+		.procname	= "sched_burst_penalty_scale",
-+		.data		= &sched_burst_penalty_scale,
-+		.maxlen		= sizeof(uint),
-+		.mode		= 0644,
-+		.proc_handler = proc_douintvec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= &maxval_12_bits,
-+	},
-+	{
-+		.procname	= "sched_burst_cache_stop_count",
-+		.data		= &sched_burst_cache_stop_count,
-+		.maxlen		= sizeof(uint),
-+		.mode		= 0644,
-+		.proc_handler = proc_douintvec,
-+	},
-+	{
-+		.procname	= "sched_burst_cache_lifetime",
-+		.data		= &sched_burst_cache_lifetime,
-+		.maxlen		= sizeof(uint),
-+		.mode		= 0644,
-+		.proc_handler = proc_douintvec,
-+	},
-+	{
-+		.procname	= "sched_deadline_boost_mask",
-+		.data		= &sched_deadline_boost_mask,
-+		.maxlen		= sizeof(uint),
-+		.mode		= 0644,
-+		.proc_handler = proc_douintvec,
-+	},
-+};
-+
-+static int __init sched_bore_sysctl_init(void) {
-+	register_sysctl_init("kernel", sched_bore_sysctls);
-+	return 0;
-+}
-+late_initcall(sched_bore_sysctl_init);
-+#endif // CONFIG_SYSCTL
-+#endif // CONFIG_SCHED_BORE
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index 3e5a6bf587..fb4bb3fa5a 100644
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -97,6 +97,8 @@
- #include "../../io_uring/io-wq.h"
- #include "../smpboot.h"
- 
-+#include <linux/sched/bore.h>
-+
- EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu);
- EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask);
- 
-@@ -8481,6 +8483,10 @@ void __init sched_init(void)
- 	BUG_ON(!sched_class_above(&ext_sched_class, &idle_sched_class));
- #endif
- 
-+#ifdef CONFIG_SCHED_BORE
-+	sched_bore_init();
-+#endif // CONFIG_SCHED_BORE
-+
- 	wait_bit_init();
- 
- #ifdef CONFIG_FAIR_GROUP_SCHED
-diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
-index a1be00a988..66fcb22900 100644
---- a/kernel/sched/debug.c
-+++ b/kernel/sched/debug.c
-@@ -167,7 +167,53 @@ static const struct file_operations sched_feat_fops = {
- };
- 
- #ifdef CONFIG_SMP
-+#ifdef CONFIG_SCHED_BORE
-+#define DEFINE_SYSCTL_SCHED_FUNC(name, update_func) \
-+static ssize_t sched_##name##_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) \
-+{ \
-+	char buf[16]; \
-+	unsigned int value; \
-+\
-+	if (cnt > 15) \
-+		cnt = 15; \
-+\
-+	if (copy_from_user(&buf, ubuf, cnt)) \
-+		return -EFAULT; \
-+	buf[cnt] = '\0'; \
-+\
-+	if (kstrtouint(buf, 10, &value)) \
-+		return -EINVAL; \
-+\
-+	sysctl_sched_##name = value; \
-+	sched_update_##update_func(); \
-+\
-+	*ppos += cnt; \
-+	return cnt; \
-+} \
-+\
-+static int sched_##name##_show(struct seq_file *m, void *v) \
-+{ \
-+	seq_printf(m, "%d\n", sysctl_sched_##name); \
-+	return 0; \
-+} \
-+\
-+static int sched_##name##_open(struct inode *inode, struct file *filp) \
-+{ \
-+	return single_open(filp, sched_##name##_show, NULL); \
-+} \
-+\
-+static const struct file_operations sched_##name##_fops = { \
-+	.open		= sched_##name##_open, \
-+	.write		= sched_##name##_write, \
-+	.read		= seq_read, \
-+	.llseek		= seq_lseek, \
-+	.release	= single_release, \
-+};
-+
-+DEFINE_SYSCTL_SCHED_FUNC(min_base_slice, min_base_slice)
- 
-+#undef DEFINE_SYSCTL_SCHED_FUNC
-+#else // !CONFIG_SCHED_BORE
- static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf,
- 				   size_t cnt, loff_t *ppos)
- {
-@@ -213,7 +259,7 @@ static const struct file_operations sched_scaling_fops = {
- 	.llseek		= seq_lseek,
- 	.release	= single_release,
- };
--
-+#endif // CONFIG_SCHED_BORE
- #endif /* SMP */
- 
- #ifdef CONFIG_PREEMPT_DYNAMIC
-@@ -505,13 +551,20 @@ static __init int sched_init_debug(void)
- 	debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
- #endif
- 
-+#ifdef CONFIG_SCHED_BORE
-+	debugfs_create_file("min_base_slice_ns", 0644, debugfs_sched, NULL, &sched_min_base_slice_fops);
-+	debugfs_create_u32("base_slice_ns", 0444, debugfs_sched, &sysctl_sched_base_slice);
-+#else // !CONFIG_SCHED_BORE
- 	debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
-+#endif // CONFIG_SCHED_BORE
- 
- 	debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
- 	debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
- 
- #ifdef CONFIG_SMP
-+#if !defined(CONFIG_SCHED_BORE)
- 	debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops);
-+#endif // CONFIG_SCHED_BORE
- 	debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
- 	debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
- 
-@@ -756,6 +809,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
- 		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
- 		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
- 
-+#ifdef CONFIG_SCHED_BORE
-+	SEQ_printf(m, " %2d", p->se.burst_score);
-+#endif // CONFIG_SCHED_BORE
- #ifdef CONFIG_NUMA_BALANCING
- 	SEQ_printf(m, "   %d      %d", task_node(p), task_numa_group_id(p));
- #endif
-@@ -1245,6 +1301,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
- 
- 	P(se.load.weight);
- #ifdef CONFIG_SMP
-+#ifdef CONFIG_SCHED_BORE
-+	P(se.burst_score);
-+#endif // CONFIG_SCHED_BORE
- 	P(se.avg.load_sum);
- 	P(se.avg.runnable_sum);
- 	P(se.avg.util_sum);
-diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index 26958431de..9331896e5d 100644
---- a/kernel/sched/fair.c
-+++ b/kernel/sched/fair.c
-@@ -55,6 +55,8 @@
- #include "stats.h"
- #include "autogroup.h"
- 
-+#include <linux/sched/bore.h>
-+
- /*
-  * The initial- and re-scaling of tunables is configurable
-  *
-@@ -64,17 +66,30 @@
-  *   SCHED_TUNABLESCALING_LOG - scaled logarithmically, *1+ilog(ncpus)
-  *   SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
-  *
-- * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
-+ * BORE : default SCHED_TUNABLESCALING_NONE = *1 constant
-+ * EEVDF: default SCHED_TUNABLESCALING_LOG  = *(1+ilog(ncpus))
-  */
-+#ifdef CONFIG_SCHED_BORE
-+unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
-+#else // !CONFIG_SCHED_BORE
- unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
-+#endif // CONFIG_SCHED_BORE
- 
- /*
-  * Minimal preemption granularity for CPU-bound tasks:
-  *
-- * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
-+ * BORE : base_slice = minimum multiple of nsecs_per_tick >= min_base_slice
-+ * (default min_base_slice = 2000000 constant, units: nanoseconds)
-+ * EEVDF: default 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds
-  */
-+#ifdef CONFIG_SCHED_BORE
-+static const unsigned int nsecs_per_tick       = 1000000000ULL / HZ;
-+unsigned int sysctl_sched_min_base_slice       = CONFIG_MIN_BASE_SLICE_NS;
-+__read_mostly uint sysctl_sched_base_slice     = nsecs_per_tick;
-+#else // !CONFIG_SCHED_BORE
- unsigned int sysctl_sched_base_slice			= 750000ULL;
- static unsigned int normalized_sysctl_sched_base_slice	= 750000ULL;
-+#endif // CONFIG_SCHED_BORE
- 
- const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
- 
-@@ -188,6 +203,13 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
-  *
-  * This idea comes from the SD scheduler of Con Kolivas:
-  */
-+#ifdef CONFIG_SCHED_BORE
-+static void update_sysctl(void) {
-+	sysctl_sched_base_slice = nsecs_per_tick *
-+		max(1UL, DIV_ROUND_UP(sysctl_sched_min_base_slice, nsecs_per_tick));
-+}
-+void sched_update_min_base_slice(void) { update_sysctl(); }
-+#else // !CONFIG_SCHED_BORE
- static unsigned int get_update_sysctl_factor(void)
- {
- 	unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
-@@ -218,6 +240,7 @@ static void update_sysctl(void)
- 	SET_SYSCTL(sched_base_slice);
- #undef SET_SYSCTL
- }
-+#endif // CONFIG_SCHED_BORE
- 
- void __init sched_init_granularity(void)
- {
-@@ -697,6 +720,9 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
- 
- 	vlag = avg_vruntime(cfs_rq) - se->vruntime;
- 	limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
-+#ifdef CONFIG_SCHED_BORE
-+	limit >>= !!sched_bore;
-+#endif // CONFIG_SCHED_BORE
- 
- 	se->vlag = clamp(vlag, -limit, limit);
- }
-@@ -921,6 +947,10 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
- 	 * until it gets a new slice. See the HACK in set_next_entity().
- 	 */
- 	if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline)
-+#ifdef CONFIG_SCHED_BORE
-+		if (!(likely(sched_bore) && likely(sched_burst_parity_threshold) &&
-+			sched_burst_parity_threshold < cfs_rq->nr_running))
-+#endif // CONFIG_SCHED_BORE
- 		return curr;
- 
- 	/* Pick the leftmost entity if it's eligible */
-@@ -979,6 +1009,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
-  * Scheduling class statistics methods:
-  */
- #ifdef CONFIG_SMP
-+#if !defined(CONFIG_SCHED_BORE)
- int sched_update_scaling(void)
- {
- 	unsigned int factor = get_update_sysctl_factor();
-@@ -990,6 +1021,7 @@ int sched_update_scaling(void)
- 
- 	return 0;
- }
-+#endif // CONFIG_SCHED_BORE
- #endif
- #endif
- 
-@@ -1220,6 +1252,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
- 	if (unlikely(delta_exec <= 0))
- 		return;
- 
-+#ifdef CONFIG_SCHED_BORE
-+	curr->burst_time += delta_exec;
-+	update_burst_penalty(curr);
-+#endif // CONFIG_SCHED_BORE
- 	curr->vruntime += calc_delta_fair(delta_exec, curr);
- 	resched = update_deadline(cfs_rq, curr);
- 	update_min_vruntime(cfs_rq);
-@@ -3771,7 +3807,7 @@ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
- 
- static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags);
- 
--static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
-+void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
- 			    unsigned long weight)
- {
- 	bool curr = cfs_rq->curr == se;
-@@ -5259,7 +5295,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
- 		se->rel_deadline = 0;
- 		return;
- 	}
--
-+#ifdef CONFIG_SCHED_BORE
-+	else if (likely(sched_bore))
-+		vslice >>= !!(flags & sched_deadline_boost_mask);
-+	else
-+#endif // CONFIG_SCHED_BORE
- 	/*
- 	 * When joining the competition; the existing tasks will be,
- 	 * on average, halfway through their slice, as such start tasks
-@@ -7135,6 +7175,15 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
- 		util_est_dequeue(&rq->cfs, p);
- 
- 	util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
-+#ifdef CONFIG_SCHED_BORE
-+	struct cfs_rq *cfs_rq = &rq->cfs;
-+	struct sched_entity *se = &p->se;
-+	if (flags & DEQUEUE_SLEEP && entity_is_task(se)) {
-+		if (cfs_rq->curr == se)
-+			update_curr(cfs_rq);
-+		restart_burst(se);
-+	}
-+#endif // CONFIG_SCHED_BORE
- 	if (dequeue_entities(rq, &p->se, flags) < 0)
- 		return false;
- 
-@@ -8948,16 +8997,25 @@ static void yield_task_fair(struct rq *rq)
- 	/*
- 	 * Are we the only task in the tree?
- 	 */
-+#if !defined(CONFIG_SCHED_BORE)
- 	if (unlikely(rq->nr_running == 1))
- 		return;
- 
- 	clear_buddies(cfs_rq, se);
-+#endif // CONFIG_SCHED_BORE
- 
- 	update_rq_clock(rq);
- 	/*
- 	 * Update run-time statistics of the 'current'.
- 	 */
- 	update_curr(cfs_rq);
-+#ifdef CONFIG_SCHED_BORE
-+	restart_burst_rescale_deadline(se);
-+	if (unlikely(rq->nr_running == 1))
-+		return;
-+
-+	clear_buddies(cfs_rq, se);
-+#endif // CONFIG_SCHED_BORE
- 	/*
- 	 * Tell update_rq_clock() that we've just updated,
- 	 * so we don't do microscopic update in schedule()
-@@ -13009,6 +13067,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
- static void task_fork_fair(struct task_struct *p)
- {
- 	set_task_max_allowed_capacity(p);
-+#ifdef CONFIG_SCHED_BORE
-+	update_burst_score(&p->se);
-+#endif // CONFIG_SCHED_BORE
- }
- 
- /*
-@@ -13119,6 +13180,10 @@ static void attach_task_cfs_rq(struct task_struct *p)
- 
- static void switched_from_fair(struct rq *rq, struct task_struct *p)
- {
-+	p->se.rel_deadline = 0;
-+#ifdef CONFIG_SCHED_BORE
-+	reset_task_bore(p);
-+#endif // CONFIG_SCHED_BORE
- 	detach_task_cfs_rq(p);
- }
- 
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index c5d67a43fe..e14855d24a 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -2092,7 +2092,11 @@ static inline void update_sched_domain_debugfs(void) { }
- static inline void dirty_sched_domain_sysctl(int cpu) { }
- #endif
- 
-+#ifdef CONFIG_SCHED_BORE
-+extern void sched_update_min_base_slice(void);
-+#else // !CONFIG_SCHED_BORE
- extern int sched_update_scaling(void);
-+#endif // CONFIG_SCHED_BORE
- 
- static inline const struct cpumask *task_user_cpus(struct task_struct *p)
- {
-@@ -2829,7 +2833,12 @@ extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags);
- extern const_debug unsigned int sysctl_sched_nr_migrate;
- extern const_debug unsigned int sysctl_sched_migration_cost;
- 
-+#ifdef CONFIG_SCHED_BORE
-+extern unsigned int sysctl_sched_min_base_slice;
-+extern __read_mostly uint sysctl_sched_base_slice;
-+#else // !CONFIG_SCHED_BORE
- extern unsigned int sysctl_sched_base_slice;
-+#endif // CONFIG_SCHED_BORE
- 
- #ifdef CONFIG_SCHED_DEBUG
- extern int sysctl_resched_latency_warn_ms;
--- 
-2.34.1
-
diff --git a/6.13/sched/0001-bore-cachy.patch b/6.13/sched/0001-bore-cachy.patch
index ab8bf50c..1eb64cb1 100644
--- a/6.13/sched/0001-bore-cachy.patch
+++ b/6.13/sched/0001-bore-cachy.patch
@@ -1,6 +1,6 @@
-From 80f8bf0adb51a725636db3fdabab2c6209f5348a Mon Sep 17 00:00:00 2001
+From 2aaaad0215c8d15c5133eb2bc1c77c021edff609 Mon Sep 17 00:00:00 2001
 From: Eric Naim <dnaim@cachyos.org>
-Date: Tue, 31 Dec 2024 20:17:12 +0700
+Date: Mon, 20 Jan 2025 09:19:36 +0700
 Subject: [PATCH] bore-cachy
 
 Signed-off-by: Eric Naim <dnaim@cachyos.org>
@@ -11,12 +11,12 @@ Signed-off-by: Eric Naim <dnaim@cachyos.org>
  kernel/Kconfig.hz          |  17 ++
  kernel/fork.c              |   6 +
  kernel/sched/Makefile      |   1 +
- kernel/sched/bore.c        | 446 +++++++++++++++++++++++++++++++++++++
+ kernel/sched/bore.c        | 443 +++++++++++++++++++++++++++++++++++++
  kernel/sched/core.c        |   6 +
  kernel/sched/debug.c       |  61 ++++-
  kernel/sched/fair.c        |  86 +++++--
  kernel/sched/sched.h       |   9 +
- 11 files changed, 689 insertions(+), 18 deletions(-)
+ 11 files changed, 686 insertions(+), 18 deletions(-)
  create mode 100644 include/linux/sched/bore.h
  create mode 100644 kernel/sched/bore.c
 
@@ -58,7 +58,7 @@ index 64934e0830af..7ec02a323014 100644
  
 diff --git a/include/linux/sched/bore.h b/include/linux/sched/bore.h
 new file mode 100644
-index 000000000000..a36947e12c2f
+index 000000000000..a8faabc2885e
 --- /dev/null
 +++ b/include/linux/sched/bore.h
 @@ -0,0 +1,40 @@
@@ -68,7 +68,7 @@ index 000000000000..a36947e12c2f
 +
 +#ifndef _LINUX_SCHED_BORE_H
 +#define _LINUX_SCHED_BORE_H
-+#define SCHED_BORE_VERSION "5.9.5"
++#define SCHED_BORE_VERSION "5.9.6"
 +
 +#ifdef CONFIG_SCHED_BORE
 +extern u8   __read_mostly sched_bore;
@@ -192,10 +192,10 @@ index 976092b7bd45..293aad675444 100644
 +obj-y += bore.o
 diff --git a/kernel/sched/bore.c b/kernel/sched/bore.c
 new file mode 100644
-index 000000000000..d55cd32b34ea
+index 000000000000..23aeb5649479
 --- /dev/null
 +++ b/kernel/sched/bore.c
-@@ -0,0 +1,446 @@
+@@ -0,0 +1,443 @@
 +/*
 + *  Burst-Oriented Response Enhancer (BORE) CPU Scheduler
 + *  Copyright (C) 2021-2024 Masahito Suzuki <firelzrd@gmail.com>
@@ -395,10 +395,9 @@ index 000000000000..d55cd32b34ea
 +		parent = parent->real_parent;
 +
 +	bc = &parent->se.child_burst;
-+	spin_lock(&bc->lock);
++	guard(spinlock)(&bc->lock);
 +	if (burst_cache_expired(bc, now))
 +		update_child_burst_direct(parent, now);
-+	spin_unlock(&bc->lock);
 +
 +	return bc->score;
 +}
@@ -461,11 +460,10 @@ index 000000000000..d55cd32b34ea
 +	}
 +
 +	bc = &anc->se.child_burst;
-+	spin_lock(&bc->lock);
++	guard(spinlock)(&bc->lock);
 +	if (burst_cache_expired(bc, now))
 +		update_child_burst_topological(
 +			anc, now, sched_burst_fork_atavistic - 1, &cnt, &sum);
-+	spin_unlock(&bc->lock);
 +
 +	return bc->score;
 +}
@@ -486,10 +484,9 @@ index 000000000000..d55cd32b34ea
 +static inline u8 inherit_burst_tg(struct task_struct *p, u64 now) {
 +	struct task_struct *parent = rcu_dereference(p->group_leader);
 +	struct sched_burst_cache *bc = &parent->se.group_burst;
-+	spin_lock(&bc->lock);
++	guard(spinlock)(&bc->lock);
 +	if (burst_cache_expired(bc, now))
 +		update_tg_burst(parent, now);
-+	spin_unlock(&bc->lock);
 +
 +	return bc->score;
 +}
@@ -775,7 +772,7 @@ index a1be00a988bf..66fcb229007d 100644
  	P(se.avg.runnable_sum);
  	P(se.avg.util_sum);
 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index d38a4feac0c9..c455ba008d8b 100644
+index c532ffb153b4..c55d61977364 100644
 --- a/kernel/sched/fair.c
 +++ b/kernel/sched/fair.c
 @@ -55,6 +55,8 @@
@@ -866,17 +863,17 @@ index d38a4feac0c9..c455ba008d8b 100644
  
  void __init sched_init_granularity(void)
  {
-@@ -708,6 +718,9 @@ static s64 entity_lag(u64 avruntime, struct sched_entity *se)
+@@ -710,6 +720,9 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
  
- 	vlag = avruntime - se->vruntime;
+ 	vlag = avg_vruntime(cfs_rq) - se->vruntime;
  	limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
 +#ifdef CONFIG_SCHED_BORE
 +	limit >>= !!sched_bore;
 +#endif // CONFIG_SCHED_BORE
  
- 	return clamp(vlag, -limit, limit);
+ 	se->vlag = clamp(vlag, -limit, limit);
  }
-@@ -939,6 +952,10 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
+@@ -934,6 +947,10 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
  	 * until it gets a new slice. See the HACK in set_next_entity().
  	 */
  	if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline)
@@ -887,7 +884,7 @@ index d38a4feac0c9..c455ba008d8b 100644
  		return curr;
  
  	/* Pick the leftmost entity if it's eligible */
-@@ -997,6 +1014,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
+@@ -992,6 +1009,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
   * Scheduling class statistics methods:
   */
  #ifdef CONFIG_SMP
@@ -895,7 +892,7 @@ index d38a4feac0c9..c455ba008d8b 100644
  int sched_update_scaling(void)
  {
  	unsigned int factor = get_update_sysctl_factor();
-@@ -1008,6 +1026,7 @@ int sched_update_scaling(void)
+@@ -1003,6 +1021,7 @@ int sched_update_scaling(void)
  
  	return 0;
  }
@@ -903,7 +900,7 @@ index d38a4feac0c9..c455ba008d8b 100644
  #endif
  #endif
  
-@@ -1238,6 +1257,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
+@@ -1233,6 +1252,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
  	if (unlikely(delta_exec <= 0))
  		return;
  
@@ -914,16 +911,16 @@ index d38a4feac0c9..c455ba008d8b 100644
  	curr->vruntime += calc_delta_fair(delta_exec, curr);
  	resched = update_deadline(cfs_rq, curr);
  	update_min_vruntime(cfs_rq);
-@@ -3893,7 +3916,7 @@ static void reweight_eevdf(struct sched_entity *se, u64 avruntime,
- 	se->deadline = avruntime + vslice;
- }
+@@ -3784,7 +3807,7 @@ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
+ 
+ static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags);
  
 -static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 +void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
  			    unsigned long weight)
  {
  	bool curr = cfs_rq->curr == se;
-@@ -5377,7 +5400,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+@@ -5272,7 +5295,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  		se->rel_deadline = 0;
  		return;
  	}
@@ -936,7 +933,7 @@ index d38a4feac0c9..c455ba008d8b 100644
  	/*
  	 * When joining the competition; the existing tasks will be,
  	 * on average, halfway through their slice, as such start tasks
-@@ -7253,6 +7280,15 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+@@ -7148,6 +7175,15 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
  		util_est_dequeue(&rq->cfs, p);
  
  	util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
@@ -952,7 +949,7 @@ index d38a4feac0c9..c455ba008d8b 100644
  	if (dequeue_entities(rq, &p->se, flags) < 0)
  		return false;
  
-@@ -9066,16 +9102,25 @@ static void yield_task_fair(struct rq *rq)
+@@ -8961,16 +8997,25 @@ static void yield_task_fair(struct rq *rq)
  	/*
  	 * Are we the only task in the tree?
  	 */
@@ -978,7 +975,7 @@ index d38a4feac0c9..c455ba008d8b 100644
  	/*
  	 * Tell update_rq_clock() that we've just updated,
  	 * so we don't do microscopic update in schedule()
-@@ -13148,6 +13193,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
+@@ -13044,6 +13089,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
  static void task_fork_fair(struct task_struct *p)
  {
  	set_task_max_allowed_capacity(p);
@@ -988,7 +985,7 @@ index d38a4feac0c9..c455ba008d8b 100644
  }
  
  /*
-@@ -13258,6 +13306,10 @@ static void attach_task_cfs_rq(struct task_struct *p)
+@@ -13154,6 +13202,10 @@ static void attach_task_cfs_rq(struct task_struct *p)
  
  static void switched_from_fair(struct rq *rq, struct task_struct *p)
  {
@@ -1029,5 +1026,5 @@ index dee2797009e3..bdc0b9c037d4 100644
  #ifdef CONFIG_SCHED_DEBUG
  extern int sysctl_resched_latency_warn_ms;
 -- 
-2.47.1
+2.48.1
 
diff --git a/6.13/sched/0001-bore.patch b/6.13/sched/0001-bore.patch
index 496fa884..e000df8e 100644
--- a/6.13/sched/0001-bore.patch
+++ b/6.13/sched/0001-bore.patch
@@ -1,7 +1,7 @@
-From 327b1f8f9cf94ef8561a9c6624b0a54342a4a8d3 Mon Sep 17 00:00:00 2001
+From 9e3f11411e7128d3ebbbe546df56fb110f0d9370 Mon Sep 17 00:00:00 2001
 From: Masahito S <firelzrd@gmail.com>
-Date: Tue, 31 Dec 2024 21:49:13 +0900
-Subject: [PATCH] linux6.13.y-bore5.9.5
+Date: Mon, 20 Jan 2025 07:24:54 +0900
+Subject: [PATCH] linux6.13.y-bore5.9.6
 
 ---
  include/linux/sched.h      |  18 ++
@@ -10,12 +10,12 @@ Subject: [PATCH] linux6.13.y-bore5.9.5
  kernel/Kconfig.hz          |  17 ++
  kernel/fork.c              |   6 +
  kernel/sched/Makefile      |   1 +
- kernel/sched/bore.c        | 446 +++++++++++++++++++++++++++++++++++++
+ kernel/sched/bore.c        | 443 +++++++++++++++++++++++++++++++++++++
  kernel/sched/core.c        |   6 +
  kernel/sched/debug.c       |  61 ++++-
  kernel/sched/fair.c        |  73 +++++-
  kernel/sched/sched.h       |   9 +
- 11 files changed, 689 insertions(+), 5 deletions(-)
+ 11 files changed, 686 insertions(+), 5 deletions(-)
  create mode 100644 include/linux/sched/bore.h
  create mode 100644 kernel/sched/bore.c
 
@@ -57,7 +57,7 @@ index 66b311fbd5..43a00a7308 100644
  
 diff --git a/include/linux/sched/bore.h b/include/linux/sched/bore.h
 new file mode 100644
-index 0000000000..a36947e12c
+index 0000000000..a8faabc288
 --- /dev/null
 +++ b/include/linux/sched/bore.h
 @@ -0,0 +1,40 @@
@@ -67,7 +67,7 @@ index 0000000000..a36947e12c
 +
 +#ifndef _LINUX_SCHED_BORE_H
 +#define _LINUX_SCHED_BORE_H
-+#define SCHED_BORE_VERSION "5.9.5"
++#define SCHED_BORE_VERSION "5.9.6"
 +
 +#ifdef CONFIG_SCHED_BORE
 +extern u8   __read_mostly sched_bore;
@@ -191,10 +191,10 @@ index 976092b7bd..293aad6754 100644
 +obj-y += bore.o
 diff --git a/kernel/sched/bore.c b/kernel/sched/bore.c
 new file mode 100644
-index 0000000000..d55cd32b34
+index 0000000000..23aeb56494
 --- /dev/null
 +++ b/kernel/sched/bore.c
-@@ -0,0 +1,446 @@
+@@ -0,0 +1,443 @@
 +/*
 + *  Burst-Oriented Response Enhancer (BORE) CPU Scheduler
 + *  Copyright (C) 2021-2024 Masahito Suzuki <firelzrd@gmail.com>
@@ -394,10 +394,9 @@ index 0000000000..d55cd32b34
 +		parent = parent->real_parent;
 +
 +	bc = &parent->se.child_burst;
-+	spin_lock(&bc->lock);
++	guard(spinlock)(&bc->lock);
 +	if (burst_cache_expired(bc, now))
 +		update_child_burst_direct(parent, now);
-+	spin_unlock(&bc->lock);
 +
 +	return bc->score;
 +}
@@ -460,11 +459,10 @@ index 0000000000..d55cd32b34
 +	}
 +
 +	bc = &anc->se.child_burst;
-+	spin_lock(&bc->lock);
++	guard(spinlock)(&bc->lock);
 +	if (burst_cache_expired(bc, now))
 +		update_child_burst_topological(
 +			anc, now, sched_burst_fork_atavistic - 1, &cnt, &sum);
-+	spin_unlock(&bc->lock);
 +
 +	return bc->score;
 +}
@@ -485,10 +483,9 @@ index 0000000000..d55cd32b34
 +static inline u8 inherit_burst_tg(struct task_struct *p, u64 now) {
 +	struct task_struct *parent = rcu_dereference(p->group_leader);
 +	struct sched_burst_cache *bc = &parent->se.group_burst;
-+	spin_lock(&bc->lock);
++	guard(spinlock)(&bc->lock);
 +	if (burst_cache_expired(bc, now))
 +		update_tg_burst(parent, now);
-+	spin_unlock(&bc->lock);
 +
 +	return bc->score;
 +}
@@ -774,7 +771,7 @@ index a1be00a988..66fcb22900 100644
  	P(se.avg.runnable_sum);
  	P(se.avg.util_sum);
 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index 3e9ca38512..647b25840d 100644
+index 26958431de..9331896e5d 100644
 --- a/kernel/sched/fair.c
 +++ b/kernel/sched/fair.c
 @@ -55,6 +55,8 @@
@@ -841,17 +838,17 @@ index 3e9ca38512..647b25840d 100644
  
  void __init sched_init_granularity(void)
  {
-@@ -695,6 +718,9 @@ static s64 entity_lag(u64 avruntime, struct sched_entity *se)
+@@ -697,6 +720,9 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
  
- 	vlag = avruntime - se->vruntime;
+ 	vlag = avg_vruntime(cfs_rq) - se->vruntime;
  	limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
 +#ifdef CONFIG_SCHED_BORE
 +	limit >>= !!sched_bore;
 +#endif // CONFIG_SCHED_BORE
  
- 	return clamp(vlag, -limit, limit);
+ 	se->vlag = clamp(vlag, -limit, limit);
  }
-@@ -926,6 +952,10 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
+@@ -921,6 +947,10 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
  	 * until it gets a new slice. See the HACK in set_next_entity().
  	 */
  	if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline)
@@ -862,7 +859,7 @@ index 3e9ca38512..647b25840d 100644
  		return curr;
  
  	/* Pick the leftmost entity if it's eligible */
-@@ -984,6 +1014,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
+@@ -979,6 +1009,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
   * Scheduling class statistics methods:
   */
  #ifdef CONFIG_SMP
@@ -870,7 +867,7 @@ index 3e9ca38512..647b25840d 100644
  int sched_update_scaling(void)
  {
  	unsigned int factor = get_update_sysctl_factor();
-@@ -995,6 +1026,7 @@ int sched_update_scaling(void)
+@@ -990,6 +1021,7 @@ int sched_update_scaling(void)
  
  	return 0;
  }
@@ -878,7 +875,7 @@ index 3e9ca38512..647b25840d 100644
  #endif
  #endif
  
-@@ -1225,6 +1257,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
+@@ -1220,6 +1252,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
  	if (unlikely(delta_exec <= 0))
  		return;
  
@@ -889,16 +886,16 @@ index 3e9ca38512..647b25840d 100644
  	curr->vruntime += calc_delta_fair(delta_exec, curr);
  	resched = update_deadline(cfs_rq, curr);
  	update_min_vruntime(cfs_rq);
-@@ -3880,7 +3916,7 @@ static void reweight_eevdf(struct sched_entity *se, u64 avruntime,
- 	se->deadline = avruntime + vslice;
- }
+@@ -3771,7 +3807,7 @@ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
+ 
+ static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags);
  
 -static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 +void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
  			    unsigned long weight)
  {
  	bool curr = cfs_rq->curr == se;
-@@ -5364,7 +5400,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+@@ -5259,7 +5295,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  		se->rel_deadline = 0;
  		return;
  	}
@@ -911,7 +908,7 @@ index 3e9ca38512..647b25840d 100644
  	/*
  	 * When joining the competition; the existing tasks will be,
  	 * on average, halfway through their slice, as such start tasks
-@@ -7240,6 +7280,15 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+@@ -7135,6 +7175,15 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
  		util_est_dequeue(&rq->cfs, p);
  
  	util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
@@ -927,7 +924,7 @@ index 3e9ca38512..647b25840d 100644
  	if (dequeue_entities(rq, &p->se, flags) < 0)
  		return false;
  
-@@ -9053,16 +9102,25 @@ static void yield_task_fair(struct rq *rq)
+@@ -8948,16 +8997,25 @@ static void yield_task_fair(struct rq *rq)
  	/*
  	 * Are we the only task in the tree?
  	 */
@@ -953,7 +950,7 @@ index 3e9ca38512..647b25840d 100644
  	/*
  	 * Tell update_rq_clock() that we've just updated,
  	 * so we don't do microscopic update in schedule()
-@@ -13114,6 +13172,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
+@@ -13009,6 +13067,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
  static void task_fork_fair(struct task_struct *p)
  {
  	set_task_max_allowed_capacity(p);
@@ -963,7 +960,7 @@ index 3e9ca38512..647b25840d 100644
  }
  
  /*
-@@ -13224,6 +13285,10 @@ static void attach_task_cfs_rq(struct task_struct *p)
+@@ -13119,6 +13180,10 @@ static void attach_task_cfs_rq(struct task_struct *p)
  
  static void switched_from_fair(struct rq *rq, struct task_struct *p)
  {