diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-12-02 17:03:55 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-12-02 17:03:55 -0800 |
| commit | 44fc84337b6eae580a51cf6f7ca6a22ef1349556 (patch) | |
| tree | 7a6f802b2d35ddbb30c8591159877a3bffede6c0 /mm | |
| parent | 2547f79b0b0cd969ae6f736890af4ebd9368cda5 (diff) | |
| parent | edde060637b92607f3522252c03d64ad06369933 (diff) | |
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas:
"These are the arm64 updates for 6.19.
The biggest part is the Arm MPAM driver under drivers/resctrl/.
There's a patch touching mm/ to handle spurious faults for huge pmd
(similar to the pte version). The corresponding arm64 part allows us
to avoid the TLB maintenance if a (huge) page is reused after a write
fault. There's EFI refactoring to allow runtime services with
preemption enabled and the rest is the usual perf/PMU updates and
several cleanups/typos.
Summary:
Core features:
- Basic Arm MPAM (Memory system resource Partitioning And Monitoring)
driver under drivers/resctrl/ which makes use of the fs/rectrl/ API
Perf and PMU:
- Avoid cycle counter on multi-threaded CPUs
- Extend CSPMU device probing and add additional filtering support
for NVIDIA implementations
- Add support for the PMUs on the NoC S3 interconnect
- Add additional compatible strings for new Cortex and C1 CPUs
- Add support for data source filtering to the SPE driver
- Add support for i.MX8QM and "DB" PMU in the imx PMU driver
Memory managemennt:
- Avoid broadcast TLBI if page reused in write fault
- Elide TLB invalidation if the old PTE was not valid
- Drop redundant cpu_set_*_tcr_t0sz() macros
- Propagate pgtable_alloc() errors outside of __create_pgd_mapping()
- Propagate return value from __change_memory_common()
ACPI and EFI:
- Call EFI runtime services without disabling preemption
- Remove unused ACPI function
Miscellaneous:
- ptrace support to disable streaming on SME-only systems
- Improve sysreg generation to include a 'Prefix' descriptor
- Replace __ASSEMBLY__ with __ASSEMBLER__
- Align register dumps in the kselftest zt-test
- Remove some no longer used macros/functions
- Various spelling corrections"
* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (94 commits)
arm64/mm: Document why linear map split failure upon vm_reset_perms is not problematic
arm64/pageattr: Propagate return value from __change_memory_common
arm64/sysreg: Remove unused define ARM64_FEATURE_FIELD_BITS
KVM: arm64: selftests: Consider all 7 possible levels of cache
KVM: arm64: selftests: Remove ARM64_FEATURE_FIELD_BITS and its last user
arm64: atomics: lse: Remove unused parameters from ATOMIC_FETCH_OP_AND macros
Documentation/arm64: Fix the typo of register names
ACPI: GTDT: Get rid of acpi_arch_timer_mem_init()
perf: arm_spe: Add support for filtering on data source
perf: Add perf_event_attr::config4
perf/imx_ddr: Add support for PMU in DB (system interconnects)
perf/imx_ddr: Get and enable optional clks
perf/imx_ddr: Move ida_alloc() from ddr_perf_init() to ddr_perf_probe()
dt-bindings: perf: fsl-imx-ddr: Add compatible string for i.MX8QM, i.MX8QXP and i.MX8DXL
arm64: remove duplicate ARCH_HAS_MEM_ENCRYPT
arm64: mm: use untagged address to calculate page index
MAINTAINERS: new entry for MPAM Driver
arm_mpam: Add kunit tests for props_mismatch()
arm_mpam: Add kunit test for bitmap reset
arm_mpam: Add helper to reset saved mbwu state
...
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/huge_memory.c | 33 | ||||
| -rw-r--r-- | mm/internal.h | 2 | ||||
| -rw-r--r-- | mm/memory.c | 62 |
3 files changed, 68 insertions, 29 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 6cba1cb14b23..1192e62531cd 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1642,17 +1642,30 @@ vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio, EXPORT_SYMBOL_GPL(vmf_insert_folio_pud); #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ -void touch_pmd(struct vm_area_struct *vma, unsigned long addr, +/** + * touch_pmd - Mark page table pmd entry as accessed and dirty (for write) + * @vma: The VMA covering @addr + * @addr: The virtual address + * @pmd: pmd pointer into the page table mapping @addr + * @write: Whether it's a write access + * + * Return: whether the pmd entry is changed + */ +bool touch_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, bool write) { - pmd_t _pmd; + pmd_t entry; - _pmd = pmd_mkyoung(*pmd); + entry = pmd_mkyoung(*pmd); if (write) - _pmd = pmd_mkdirty(_pmd); + entry = pmd_mkdirty(entry); if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK, - pmd, _pmd, write)) + pmd, entry, write)) { update_mmu_cache_pmd(vma, addr, pmd); + return true; + } + + return false; } int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, @@ -1842,18 +1855,14 @@ unlock: } #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ -void huge_pmd_set_accessed(struct vm_fault *vmf) +bool huge_pmd_set_accessed(struct vm_fault *vmf) { bool write = vmf->flags & FAULT_FLAG_WRITE; - vmf->ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); if (unlikely(!pmd_same(*vmf->pmd, vmf->orig_pmd))) - goto unlock; - - touch_pmd(vmf->vma, vmf->address, vmf->pmd, write); + return false; -unlock: - spin_unlock(vmf->ptl); + return touch_pmd(vmf->vma, vmf->address, vmf->pmd, write); } static vm_fault_t do_huge_zero_wp_pmd(struct vm_fault *vmf) diff --git a/mm/internal.h b/mm/internal.h index 1561fc2ff5b8..27ad37a41868 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1402,7 +1402,7 @@ int __must_check try_grab_folio(struct folio *folio, int refs, */ void touch_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud, bool write); -void touch_pmd(struct vm_area_struct *vma, unsigned long addr, +bool touch_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, bool write); /* diff --git a/mm/memory.c b/mm/memory.c index b59ae7ce42eb..aad432e71251 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6134,6 +6134,45 @@ split: } /* + * The page faults may be spurious because of the racy access to the + * page table. For example, a non-populated virtual page is accessed + * on 2 CPUs simultaneously, thus the page faults are triggered on + * both CPUs. However, it's possible that one CPU (say CPU A) cannot + * find the reason for the page fault if the other CPU (say CPU B) has + * changed the page table before the PTE is checked on CPU A. Most of + * the time, the spurious page faults can be ignored safely. However, + * if the page fault is for the write access, it's possible that a + * stale read-only TLB entry exists in the local CPU and needs to be + * flushed on some architectures. This is called the spurious page + * fault fixing. + * + * Note: flush_tlb_fix_spurious_fault() is defined as flush_tlb_page() + * by default and used as such on most architectures, while + * flush_tlb_fix_spurious_fault_pmd() is defined as NOP by default and + * used as such on most architectures. + */ +static void fix_spurious_fault(struct vm_fault *vmf, + enum pgtable_level ptlevel) +{ + /* Skip spurious TLB flush for retried page fault */ + if (vmf->flags & FAULT_FLAG_TRIED) + return; + /* + * This is needed only for protection faults but the arch code + * is not yet telling us if this is a protection fault or not. + * This still avoids useless tlb flushes for .text page faults + * with threads. + */ + if (vmf->flags & FAULT_FLAG_WRITE) { + if (ptlevel == PGTABLE_LEVEL_PTE) + flush_tlb_fix_spurious_fault(vmf->vma, vmf->address, + vmf->pte); + else + flush_tlb_fix_spurious_fault_pmd(vmf->vma, vmf->address, + vmf->pmd); + } +} +/* * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most * RISC architectures). The early dirtying is also good on the i386. @@ -6214,23 +6253,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) } entry = pte_mkyoung(entry); if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry, - vmf->flags & FAULT_FLAG_WRITE)) { + vmf->flags & FAULT_FLAG_WRITE)) update_mmu_cache_range(vmf, vmf->vma, vmf->address, vmf->pte, 1); - } else { - /* Skip spurious TLB flush for retried page fault */ - if (vmf->flags & FAULT_FLAG_TRIED) - goto unlock; - /* - * This is needed only for protection faults but the arch code - * is not yet telling us if this is a protection fault or not. - * This still avoids useless tlb flushes for .text page faults - * with threads. - */ - if (vmf->flags & FAULT_FLAG_WRITE) - flush_tlb_fix_spurious_fault(vmf->vma, vmf->address, - vmf->pte); - } + else + fix_spurious_fault(vmf, PGTABLE_LEVEL_PTE); unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); return 0; @@ -6327,7 +6354,10 @@ retry_pud: if (!(ret & VM_FAULT_FALLBACK)) return ret; } else { - huge_pmd_set_accessed(&vmf); + vmf.ptl = pmd_lock(mm, vmf.pmd); + if (!huge_pmd_set_accessed(&vmf)) + fix_spurious_fault(&vmf, PGTABLE_LEVEL_PMD); + spin_unlock(vmf.ptl); return 0; } } |