From 95507570fb2f75544af69760cd5d8f48fc5c7f20 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Apr 2025 17:39:58 +0100 Subject: arm64/fpsimd: Avoid RES0 bits in the SME trap handler The SME trap handler consumes RES0 bits from the ESR when determining the reason for the trap, and depends upon those bits reading as zero. This may break in future when those RES0 bits are allocated a meaning and stop reading as zero. For SME traps taken with ESR_ELx.EC == 0b011101, the specific reason for the trap is indicated by ESR_ELx.ISS.SMTC ("SME Trap Code"). This field occupies bits [2:0] of ESR_ELx.ISS, and as of ARM DDI 0487 L.a, bits [24:3] of ESR_ELx.ISS are RES0. ESR_ELx.ISS itself occupies bits [24:0] of ESR_ELx. Extract the SMTC field specifically, matching the way we handle ESR_ELx fields elsewhere, and ensuring that the handler is future-proof. Fixes: 8bd7f91c03d8 ("arm64/sme: Implement traps and syscall handling for SME") Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250409164010.3480271-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 14 ++++++++------ arch/arm64/kernel/fpsimd.c | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index d1b1a33f9a8b..63aed18a933f 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -371,12 +371,14 @@ /* * ISS values for SME traps */ - -#define ESR_ELx_SME_ISS_SME_DISABLED 0 -#define ESR_ELx_SME_ISS_ILL 1 -#define ESR_ELx_SME_ISS_SM_DISABLED 2 -#define ESR_ELx_SME_ISS_ZA_DISABLED 3 -#define ESR_ELx_SME_ISS_ZT_DISABLED 4 +#define ESR_ELx_SME_ISS_SMTC_MASK GENMASK(2, 0) +#define ESR_ELx_SME_ISS_SMTC(esr) ((esr) & ESR_ELx_SME_ISS_SMTC_MASK) + +#define ESR_ELx_SME_ISS_SMTC_SME_DISABLED 0 +#define ESR_ELx_SME_ISS_SMTC_ILL 1 +#define ESR_ELx_SME_ISS_SMTC_SM_DISABLED 2 +#define ESR_ELx_SME_ISS_SMTC_ZA_DISABLED 3 +#define ESR_ELx_SME_ISS_SMTC_ZT_DISABLED 4 /* ISS field definitions for MOPS exceptions */ #define ESR_ELx_MOPS_ISS_MEM_INST (UL(1) << 24) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 8370d55f0353..1ee5f330b8ed 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1436,7 +1436,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) * If this not a trap due to SME being disabled then something * is being used in the wrong mode, report as SIGILL. */ - if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) { + if (ESR_ELx_SME_ISS_SMTC(esr) != ESR_ELx_SME_ISS_SMTC_SME_DISABLED) { force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); return; } -- cgit v1.2.3 From 61db0e0ba398a3726ca0e6a6399898d3d4c7b0f9 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Apr 2025 17:39:59 +0100 Subject: arm64/fpsimd: Remove unused fpsimd_force_sync_to_sve() There have been no users of fpsimd_force_sync_to_sve() since commit: bbc6172eefdb276b ("arm64/fpsimd: SME no longer requires SVE register state") Remove fpsimd_force_sync_to_sve(). Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250409164010.3480271-3-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 1 - arch/arm64/kernel/fpsimd.c | 14 -------------- 2 files changed, 15 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 564bc09b3e06..9c3e88ec873a 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -196,7 +196,6 @@ struct vl_info { extern void sve_alloc(struct task_struct *task, bool flush); extern void fpsimd_release_task(struct task_struct *task); extern void fpsimd_sync_to_sve(struct task_struct *task); -extern void fpsimd_force_sync_to_sve(struct task_struct *task); extern void sve_sync_to_fpsimd(struct task_struct *task); extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task); diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 1ee5f330b8ed..1391a491f222 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -758,20 +758,6 @@ void sve_alloc(struct task_struct *task, bool flush) kzalloc(sve_state_size(task), GFP_KERNEL); } - -/* - * Force the FPSIMD state shared with SVE to be updated in the SVE state - * even if the SVE state is the current active state. - * - * This should only be called by ptrace. task must be non-runnable. - * task->thread.sve_state must point to at least sve_state_size(task) - * bytes of allocated kernel memory. - */ -void fpsimd_force_sync_to_sve(struct task_struct *task) -{ - fpsimd_to_sve(task); -} - /* * Ensure that task->thread.sve_state is up to date with respect to * the user task, irrespective of when SVE is in use or not. -- cgit v1.2.3 From 45fd86986b79c3ada56d3a14e712447e992433aa Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Apr 2025 17:40:00 +0100 Subject: arm64/fpsimd: Remove redundant SVE trap manipulation When task_fpsimd_load() loads the saved FPSIMD/SVE/SME state, it configures EL0 SVE traps by calling sve_user_{enable,disable}(). This is unnecessary, and this is suspicious/confusing as task_fpsimd_load() does not configure EL0 SME traps. All calls to task_fpsimd_load() are followed by a call to fpsimd_bind_task_to_cpu(), where the latter configures traps for SVE and SME dependent upon the current values of TIF_SVE and TIF_SME, overriding any trap configuration performed by task_fpsimd_load(). The calls to sve_user_{enable,disable}() calls in task_fpsimd_load() have been redundant (though benign) since they were introduced in commit: a0136be443d51803 ("arm64/fpsimd: Load FP state based on recorded data type") Remove the unnecessary and confusing SVE trap manipulation. Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250409164010.3480271-4-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 1391a491f222..757445d72e5b 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -366,13 +366,11 @@ static void task_fpsimd_load(void) switch (current->thread.fp_type) { case FP_STATE_FPSIMD: /* Stop tracking SVE for this task until next use. */ - if (test_and_clear_thread_flag(TIF_SVE)) - sve_user_disable(); + clear_thread_flag(TIF_SVE); break; case FP_STATE_SVE: - if (!thread_sm_enabled(¤t->thread) && - !WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE))) - sve_user_enable(); + if (!thread_sm_enabled(¤t->thread)) + WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE)); if (test_thread_flag(TIF_SVE)) sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1); -- cgit v1.2.3 From d7649a4a601ebf4603f0a673f554ab350b6cfede Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Apr 2025 17:40:01 +0100 Subject: arm64/fpsimd: Remove opportunistic freeing of SME state When a task's SVE vector length (NSVL) is changed, and the task happens to have SVCR.{SM,ZA}=={0,0}, vec_set_vector_length() opportunistically frees the task's sme_state and clears TIF_SME. The opportunistic freeing was added with no rationale in commit: d4d5be94a8787242 ("arm64/fpsimd: Ensure SME storage is allocated after SVE VL changes") That commit fixed an unrelated problem where the task's sve_state was freed while it could be used to store streaming mode register state, where the fix was to re-allocate the task's sve_state. There is no need to free and/or reallocate the task's sme_state when the SVE vector length changes, and there is no need to clear TIF_SME. Given the SME vector length (SVL) doesn't change, the task's sme_state remains correctly sized. Remove the unnecessary opportunistic freeing of the task's sme_state when the task's SVE vector length is changed. The task's sme_state is still freed when the SME vector length is changed. Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250409164010.3480271-5-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 757445d72e5b..128774015772 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -868,19 +868,10 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, task->thread.fp_type = FP_STATE_FPSIMD; } - if (system_supports_sme()) { - if (type == ARM64_VEC_SME || - !(task->thread.svcr & (SVCR_SM_MASK | SVCR_ZA_MASK))) { - /* - * We are changing the SME VL or weren't using - * SME anyway, discard the state and force a - * reallocation. - */ - task->thread.svcr &= ~(SVCR_SM_MASK | - SVCR_ZA_MASK); - clear_tsk_thread_flag(task, TIF_SME); - free_sme = true; - } + if (system_supports_sme() && type == ARM64_VEC_SME) { + task->thread.svcr &= ~(SVCR_SM_MASK | SVCR_ZA_MASK); + clear_tsk_thread_flag(task, TIF_SME); + free_sme = true; } if (task == current) -- cgit v1.2.3 From d3eaab3c70905c5467e5c4ea403053d67505adeb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 9 Apr 2025 17:40:02 +0100 Subject: arm64/fpsimd: Discard stale CPU state when handling SME traps The logic for handling SME traps manipulates saved FPSIMD/SVE/SME state incorrectly, and a race with preemption can result in a task having TIF_SME set and TIF_FOREIGN_FPSTATE clear even though the live CPU state is stale (e.g. with SME traps enabled). This can result in warnings from do_sme_acc() where SME traps are not expected while TIF_SME is set: | /* With TIF_SME userspace shouldn't generate any traps */ | if (test_and_set_thread_flag(TIF_SME)) | WARN_ON(1); This is very similar to the SVE issue we fixed in commit: 751ecf6afd6568ad ("arm64/sve: Discard stale CPU state when handling SVE traps") The race can occur when the SME trap handler is preempted before and after manipulating the saved FPSIMD/SVE/SME state, starting and ending on the same CPU, e.g. | void do_sme_acc(unsigned long esr, struct pt_regs *regs) | { | // Trap on CPU 0 with TIF_SME clear, SME traps enabled | // task->fpsimd_cpu is 0. | // per_cpu_ptr(&fpsimd_last_state, 0) is task. | | ... | | // Preempted; migrated from CPU 0 to CPU 1. | // TIF_FOREIGN_FPSTATE is set. | | get_cpu_fpsimd_context(); | | /* With TIF_SME userspace shouldn't generate any traps */ | if (test_and_set_thread_flag(TIF_SME)) | WARN_ON(1); | | if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { | unsigned long vq_minus_one = | sve_vq_from_vl(task_get_sme_vl(current)) - 1; | sme_set_vq(vq_minus_one); | | fpsimd_bind_task_to_cpu(); | } | | put_cpu_fpsimd_context(); | | // Preempted; migrated from CPU 1 to CPU 0. | // task->fpsimd_cpu is still 0 | // If per_cpu_ptr(&fpsimd_last_state, 0) is still task then: | // - Stale HW state is reused (with SME traps enabled) | // - TIF_FOREIGN_FPSTATE is cleared | // - A return to userspace skips HW state restore | } Fix the case where the state is not live and TIF_FOREIGN_FPSTATE is set by calling fpsimd_flush_task_state() to detach from the saved CPU state. This ensures that a subsequent context switch will not reuse the stale CPU state, and will instead set TIF_FOREIGN_FPSTATE, forcing the new state to be reloaded from memory prior to a return to userspace. Note: this was originallly posted as [1]. Fixes: 8bd7f91c03d8 ("arm64/sme: Implement traps and syscall handling for SME") Reported-by: Mark Rutland Signed-off-by: Mark Brown Link: https://lore.kernel.org/linux-arm-kernel/20241204-arm64-sme-reenable-v2-1-bae87728251d@kernel.org/ [ Rutland: rewrite commit message ] Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Will Deacon Link: https://lore.kernel.org/r/20250409164010.3480271-6-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 128774015772..b4858d85292b 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1435,6 +1435,8 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) sme_set_vq(vq_minus_one); fpsimd_bind_task_to_cpu(); + } else { + fpsimd_flush_task_state(current); } put_cpu_fpsimd_context(); -- cgit v1.2.3 From e5fa85fce08b21ed41643cb7968bf66bbd0532e3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 9 Apr 2025 17:40:03 +0100 Subject: arm64/fpsimd: Don't corrupt FPMR when streaming mode changes When the effective value of PSTATE.SM is changed from 0 to 1 or from 1 to 0 by any method, an entry or exit to/from streaming SVE mode is performed, and hardware automatically resets a number of registers. As of ARM DDI 0487 L.a, this means: * All implemented bits of the SVE vector registers are set to zero. * All implemented bits of the SVE predicate registers are set to zero. * All implemented bits of FFR are set to zero, if FFR is implemented in the new mode. * FPSR is set to 0x0000_0000_0800_009f. * FPMR is set to 0, if FPMR is implemented. Currently task_fpsimd_load() restores FPMR before restoring SVCR (which is an accessor for PSTATE.{SM,ZA}), and so the restored value of FPMR may be clobbered if the restored value of PSTATE.SM happens to differ from the initial value of PSTATE.SM. Fix this by moving the restore of FPMR later. Note: this was originally posted as [1]. Fixes: 203f2b95a882 ("arm64/fpsimd: Support FEAT_FPMR") Signed-off-by: Mark Brown Link: https://lore.kernel.org/linux-arm-kernel/20241204-arm64-sme-reenable-v2-2-bae87728251d@kernel.org/ [ Rutland: rewrite commit message ] Signed-off-by: Mark Rutland Link: https://lore.kernel.org/r/20250409164010.3480271-7-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index b4858d85292b..b19736f354a7 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -359,9 +359,6 @@ static void task_fpsimd_load(void) WARN_ON(preemptible()); WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE)); - if (system_supports_fpmr()) - write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR); - if (system_supports_sve() || system_supports_sme()) { switch (current->thread.fp_type) { case FP_STATE_FPSIMD: @@ -411,6 +408,9 @@ static void task_fpsimd_load(void) restore_ffr = system_supports_fa64(); } + if (system_supports_fpmr()) + write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR); + if (restore_sve_regs) { WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE); sve_load_state(sve_pffr(¤t->thread), -- cgit v1.2.3 From 01098d893fa8a6edb2b56e178b798e3e6b674f02 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Apr 2025 17:40:04 +0100 Subject: arm64/fpsimd: Avoid clobbering kernel FPSIMD state with SMSTOP On system with SME, a thread's kernel FPSIMD state may be erroneously clobbered during a context switch immediately after that state is restored. Systems without SME are unaffected. If the CPU happens to be in streaming SVE mode before a context switch to a thread with kernel FPSIMD state, fpsimd_thread_switch() will restore the kernel FPSIMD state using fpsimd_load_kernel_state() while the CPU is still in streaming SVE mode. When fpsimd_thread_switch() subsequently calls fpsimd_flush_cpu_state(), this will execute an SMSTOP, causing an exit from streaming SVE mode. The exit from streaming SVE mode will cause the hardware to reset a number of FPSIMD/SVE/SME registers, clobbering the FPSIMD state. Fix this by calling fpsimd_flush_cpu_state() before restoring the kernel FPSIMD state. Fixes: e92bee9f861b ("arm64/fpsimd: Avoid erroneous elide of user state reload") Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250409164010.3480271-8-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index b19736f354a7..4a0b0bb3a3fa 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1550,8 +1550,8 @@ void fpsimd_thread_switch(struct task_struct *next) fpsimd_save_user_state(); if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) { - fpsimd_load_kernel_state(next); fpsimd_flush_cpu_state(); + fpsimd_load_kernel_state(next); } else { /* * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's -- cgit v1.2.3 From a90878f297d3dba906a6261deccb1bd4a791ba52 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Apr 2025 17:40:05 +0100 Subject: arm64/fpsimd: Reset FPMR upon exec() An exec() is expected to reset all FPSIMD/SVE/SME state, and barring special handling of the vector lengths, the state is expected to reset to zero. This reset is handled in fpsimd_flush_thread(), which the core exec() code calls via flush_thread(). When support was added for FPMR, no logic was added to fpsimd_flush_thread() to reset the FPMR value, and thus it is erroneously inherited across an exec(). Add the missing reset of FPMR. Fixes: 203f2b95a882 ("arm64/fpsimd: Support FEAT_FPMR") Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250409164010.3480271-9-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 4a0b0bb3a3fa..0b6fda5b7bad 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1638,6 +1638,9 @@ void fpsimd_flush_thread(void) current->thread.svcr = 0; } + if (system_supports_fpmr()) + current->thread.uw.fpmr = 0; + current->thread.fp_type = FP_STATE_FPSIMD; put_cpu_fpsimd_context(); -- cgit v1.2.3 From c94f2f326146a34066a0070ed90b8bc656b1842f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Apr 2025 17:40:06 +0100 Subject: arm64/fpsimd: Fix merging of FPSIMD state during signal return For backwards compatibility reasons, when a signal return occurs which restores SVE state, the effective lower 128 bits of each of the SVE vector registers are restored from the corresponding FPSIMD vector register in the FPSIMD signal frame, overriding the values in the SVE signal frame. This is intended to be the case regardless of streaming mode. To make this happen, restore_sve_fpsimd_context() uses fpsimd_update_current_state() to merge the lower 128 bits from the FPSIMD signal frame into the SVE register state. Unfortunately, fpsimd_update_current_state() performs this merging dependent upon TIF_SVE, which is not always correct for streaming SVE register state: * When restoring non-streaming SVE register state there is no observable problem, as the signal return code configures TIF_SVE and the saved fp_type to match before calling fpsimd_update_current_state(), which observes either: - TIF_SVE set AND fp_type == FP_STATE_SVE - TIF_SVE clear AND fp_type == FP_STATE_FPSIMD * On systems which have SME but not SVE, TIF_SVE cannot be set. Thus the merging will never happen for the streaming SVE register state. * On systems which have SVE and SME, TIF_SVE can be set and cleared independently of PSTATE.SM. Thus the merging may or may not happen for streaming SVE register state. As TIF_SVE can be cleared non-deterministically during syscalls (including at the start of sigreturn()), the merging may occur non-deterministically from the perspective of userspace. This logic has been broken since its introduction in commit: 85ed24dad2904f7c ("arm64/sme: Implement streaming SVE signal handling") ... at which point both fpsimd_signal_preserve_current_state() and fpsimd_update_current_state() only checked TIF SVE. When PSTATE.SM==1 and TIF_SVE was clear, signal delivery would place stale FPSIMD state into the FPSIMD signal frame, and signal return would not merge this into the restored register state. Subsequently, signal delivery was fixed as part of commit: 61da7c8e2a602f66 ("arm64/signal: Don't assume that TIF_SVE means we saved SVE state") ... but signal restore was not given a corresponding fix, and when TIF_SVE was clear, signal restore would still fail to merge the FPSIMD state into the restored SVE register state. The 'Fixes' tag did not indicate that this had been broken since its introduction. Fix this by merging the FPSIMD state dependent upon the saved fp_type, matching what we (currently) do during signal delivery. As described above, when backporting this commit, it will also be necessary to backport commit: 61da7c8e2a602f66 ("arm64/signal: Don't assume that TIF_SVE means we saved SVE state") ... and prior to commit: baa8515281b30861 ("arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE") ... it will be necessary for fpsimd_signal_preserve_current_state() and fpsimd_update_current_state() to consider both TIF_SVE and thread_sm_enabled(¤t->thread), in place of the saved fp_type. Fixes: 85ed24dad290 ("arm64/sme: Implement streaming SVE signal handling") Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250409164010.3480271-10-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 0b6fda5b7bad..11f21809d3b7 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1781,7 +1781,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) get_cpu_fpsimd_context(); current->thread.uw.fpsimd_state = *state; - if (test_thread_flag(TIF_SVE)) + if (current->thread.fp_type == FP_STATE_SVE) fpsimd_to_sve(current); task_fpsimd_load(); -- cgit v1.2.3 From d3a181588df9401d319fe2dc24bf1a364a687cc2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Apr 2025 17:40:07 +0100 Subject: arm64/fpsimd: Add fpsimd_save_and_flush_current_state() When the current task's FPSIMD/SVE/SME state may be live on *any* CPU in the system, special care must be taken when manipulating that state, as this manipulation can race with preemption and/or asynchronous usage of FPSIMD/SVE/SME (e.g. kernel-mode NEON in softirq handlers). Even when manipulation is is protected with get_cpu_fpsimd_context() and get_cpu_fpsimd_context(), the logic necessary when the state is live on the current CPU can be wildly different from the logic necessary when the state is not live on the current CPU. A number of historical and extant issues result from failing to handle these cases consistetntly and/or correctly. To make it easier to get such manipulation correct, add a new fpsimd_save_and_flush_current_state() helper function, which ensures that the current task's state has been saved to memory and any stale state on any CPU has been "flushed" such that is not live on any CPU in the system. This will allow code to safely manipulate the saved state without risk of races. Subsequent patches will use the new function. Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250409164010.3480271-11-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 1 + arch/arm64/kernel/fpsimd.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 9c3e88ec873a..1a18f851b614 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -96,6 +96,7 @@ struct cpu_fp_state { extern void fpsimd_bind_state_to_cpu(struct cpu_fp_state *fp_state); extern void fpsimd_flush_task_state(struct task_struct *target); +extern void fpsimd_save_and_flush_current_state(void); extern void fpsimd_save_and_flush_cpu_state(void); static inline bool thread_sm_enabled(struct thread_struct *thread) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 11f21809d3b7..ea07c4577f17 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1819,6 +1819,17 @@ void fpsimd_flush_task_state(struct task_struct *t) barrier(); } +void fpsimd_save_and_flush_current_state(void) +{ + if (!system_supports_fpsimd()) + return; + + get_cpu_fpsimd_context(); + fpsimd_save_user_state(); + fpsimd_flush_task_state(current); + put_cpu_fpsimd_context(); +} + /* * Save the FPSIMD state to memory and invalidate cpu view. * This function must be called with preemption disabled. -- cgit v1.2.3 From 3aa4d74438afa1324513a7a6bc30f57e8727ad86 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Apr 2025 17:40:08 +0100 Subject: arm64/fpsimd: signal32: Always save+flush state early There are several issues with the way the native signal handling code manipulates FPSIMD/SVE/SME state. To fix those issues, subsequent patches will rework the native signal handling code to always save+flush the current task's FPSIMD/SVE/SME state before manipulating that state. In preparation for those changes, rework the compat signal handling code to save+flush the current task's FPSIMD state before manipulating it. Subsequent patches will remove fpsimd_signal_preserve_current_state() and fpsimd_update_current_state(). Compat tasks can only have FPSIMD state, and cannot have any SVE or SME state. Thus, the SVE state manipulation present in fpsimd_signal_preserve_current_state() and fpsimd_update_current_state() is not necessary, and it is safe to directly manipulate current->thread.uw.fpsimd_state once it has been saved+flushed. Use fpsimd_save_and_flush_current_state() to save+flush the state for both signal delivery and signal return, before the state is manipulated in any way. While it would be safe for compat_restore_vfp_context() to use fpsimd_flush_task_state(current), there are several extant issues in the native signal code resulting from incorrect use of fpsimd_flush_task_state(), and for consistency it is preferable to use fpsimd_save_and_flush_current_state(). Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250409164010.3480271-12-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/signal32.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 81e798b6dada..bb3b526ff43f 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -103,7 +103,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) * Note that this also saves V16-31, which aren't visible * in AArch32. */ - fpsimd_signal_preserve_current_state(); + fpsimd_save_and_flush_current_state(); /* Place structure header on the stack */ __put_user_error(magic, &frame->magic, err); @@ -169,14 +169,17 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) fpsimd.fpsr = fpscr & VFP_FPSCR_STAT_MASK; fpsimd.fpcr = fpscr & VFP_FPSCR_CTRL_MASK; + if (err) + return -EFAULT; + /* * We don't need to touch the exception register, so * reload the hardware state. */ - if (!err) - fpsimd_update_current_state(&fpsimd); + fpsimd_save_and_flush_current_state(); + current->thread.uw.fpsimd_state = fpsimd; - return err ? -EFAULT : 0; + return 0; } static int compat_restore_sigframe(struct pt_regs *regs, -- cgit v1.2.3 From 929fa99b1215966fc8a6ccc2e14b92e36c3c42f3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Apr 2025 17:40:09 +0100 Subject: arm64/fpsimd: signal: Always save+flush state early There are several issues with the way the native signal handling code manipulates FPSIMD/SVE/SME state, described in detail below. These issues largely result from races with preemption and inconsistent handling of live state vs saved state. Known issues with native FPSIMD/SVE/SME state management include: * On systems with FPMR, the code to save/restore the FPMR accesses the register while it is not owned by the current task. Consequently, this may corrupt the FPMR of the current task and/or may corrupt the FPMR of an unrelated task. The FPMR save/restore has been broken since it was introduced in commit: 8c46def44409fc91 ("arm64/signal: Add FPMR signal handling") * On systems with SME, setup_return() modifies both the live register state and the saved state register state regardless of whether the task's state is live, and without holding the cpu fpsimd context. Consequently: - This may corrupt the state an unrelated task which has PSTATE.SM set and/or PSTATE.ZA set. - The task may enter the signal handler in streaming mode, and or with ZA storage enabled unexpectedly. - The task may enter the signal handler in non-streaming SVE mode with stale SVE register state, which may have been inherited from streaming SVE mode unexpectedly. Where the streaming and non-streaming vector lengths differ, this may be packed into registers arbitrarily. This logic has been broken since it was introduced in commit: 40a8e87bb32855b3 ("arm64/sme: Disable ZA and streaming mode when handling signals") Further incorrect manipulation of state was added in commits: ea64baacbc36a0d5 ("arm64/signal: Flush FPSIMD register state when disabling streaming mode") baa8515281b30861 ("arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE") * Several restoration functions use fpsimd_flush_task_state() to discard the live FPSIMD/SVE/SME while the in-memory copy is stale. When a subset of the FPSIMD/SVE/SME state is restored, the remainder may be non-deterministically reset to a stale snapshot from some arbitrary point in the past. This non-deterministic discarding was introduced in commit: 8cd969d28fd2848d ("arm64/sve: Signal handling support") As of that commit, when TIF_SVE was initially clear, failure to restore the SVE signal frame could reset the FPSIMD registers to a stale snapshot. The pattern of discarding unsaved state was subsequently copied into restoration functions for some new state in commits: 39782210eb7e8763 ("arm64/sme: Implement ZA signal handling") ee072cf708048c0d ("arm64/sme: Implement signal handling for ZT") * On systems with SME/SME2, the entire FPSIMD/SVE/SME state may be loaded onto the CPU redundantly. Either restore_fpsimd_context() or restore_sve_fpsimd_context() will load the entire FPSIMD/SVE/SME state via fpsimd_update_current_state() before restore_za_context() and restore_zt_context() each discard the state via fpsimd_flush_task_state(). This is purely redundant work, and not a functional bug. To fix these issues, rework the native signal handling code to always save+flush the current task's FPSIMD/SVE/SME state before manipulating that state. This avoids races with preemption and ensures that state is manipulated consistently regardless of whether it happened to be live prior to manipulation. This largely involes: * Using fpsimd_save_and_flush_current_state() to save+flush the state for both signal delivery and signal return, before the state is manipulated in any way. * Removing fpsimd_signal_preserve_current_state() and updating preserve_fpsimd_context() to explicitly ensure that the FPSIMD state is up-to-date, as preserve_fpsimd_context() is the only consumer of the FPSIMD state during signal delivery. * Modifying fpsimd_update_current_state() to not reload the FPSIMD state onto the CPU. Ideally we'd remove fpsimd_update_current_state() entirely, but I've left that for subsequent patches as there are a number of of other problems with the FPSIMD<->SVE conversion helpers that should be addressed at the same time. For now I've removed the misleading comment. For setup_return(), we need to decide (for ABI reasons) whether signal delivery should have all the side-effects of an SMSTOP. For now I've left a TODO comment, as there are other questions in this area that I'll address with subsequent patches. Fixes: 8c46def44409 ("arm64/signal: Add FPMR signal handling") Fixes: 40a8e87bb328 ("arm64/sme: Disable ZA and streaming mode when handling signals") Fixes: ea64baacbc36 ("arm64/signal: Flush FPSIMD register state when disabling streaming mode") Fixes: baa8515281b3 ("arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE") Fixes: 8cd969d28fd2 ("arm64/sve: Signal handling support") Fixes: 39782210eb7e ("arm64/sme: Implement ZA signal handling") Fixes: ee072cf70804 ("arm64/sme: Implement signal handling for ZT") Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250409164010.3480271-13-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 1 - arch/arm64/kernel/fpsimd.c | 28 ----------------- arch/arm64/kernel/signal.c | 66 ++++++++--------------------------------- 3 files changed, 12 insertions(+), 83 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 1a18f851b614..b7adb2c72204 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -76,7 +76,6 @@ extern void fpsimd_load_state(struct user_fpsimd_state *state); extern void fpsimd_thread_switch(struct task_struct *next); extern void fpsimd_flush_thread(void); -extern void fpsimd_signal_preserve_current_state(void); extern void fpsimd_preserve_current_state(void); extern void fpsimd_restore_current_state(void); extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index ea07c4577f17..b0874402f7ec 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1662,18 +1662,6 @@ void fpsimd_preserve_current_state(void) put_cpu_fpsimd_context(); } -/* - * Like fpsimd_preserve_current_state(), but ensure that - * current->thread.uw.fpsimd_state is updated so that it can be copied to - * the signal frame. - */ -void fpsimd_signal_preserve_current_state(void) -{ - fpsimd_preserve_current_state(); - if (current->thread.fp_type == FP_STATE_SVE) - sve_to_fpsimd(current); -} - /* * Associate current's FPSIMD context with this cpu * The caller must have ownership of the cpu FPSIMD context before calling @@ -1766,30 +1754,14 @@ void fpsimd_restore_current_state(void) put_cpu_fpsimd_context(); } -/* - * Load an updated userland FPSIMD state for 'current' from memory and set the - * flag that indicates that the FPSIMD register contents are the most recent - * FPSIMD state of 'current'. This is used by the signal code to restore the - * register state when returning from a signal handler in FPSIMD only cases, - * any SVE context will be discarded. - */ void fpsimd_update_current_state(struct user_fpsimd_state const *state) { if (WARN_ON(!system_supports_fpsimd())) return; - get_cpu_fpsimd_context(); - current->thread.uw.fpsimd_state = *state; if (current->thread.fp_type == FP_STATE_SVE) fpsimd_to_sve(current); - - task_fpsimd_load(); - fpsimd_bind_task_to_cpu(); - - clear_thread_flag(TIF_FOREIGN_FPSTATE); - - put_cpu_fpsimd_context(); } /* diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index a7c37afb4ebe..0de9c452c6c0 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -250,6 +250,8 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) ¤t->thread.uw.fpsimd_state; int err; + sve_sync_to_fpsimd(current); + /* copy the FP and status/control registers */ err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs)); __put_user_error(fpsimd->fpsr, &ctx->fpsr, err); @@ -291,8 +293,6 @@ static int preserve_fpmr_context(struct fpmr_context __user *ctx) { int err = 0; - current->thread.uw.fpmr = read_sysreg_s(SYS_FPMR); - __put_user_error(FPMR_MAGIC, &ctx->head.magic, err); __put_user_error(sizeof(*ctx), &ctx->head.size, err); __put_user_error(current->thread.uw.fpmr, &ctx->fpmr, err); @@ -310,7 +310,7 @@ static int restore_fpmr_context(struct user_ctxs *user) __get_user_error(fpmr, &user->fpmr->fpmr, err); if (!err) - write_sysreg_s(fpmr, SYS_FPMR); + current->thread.uw.fpmr = fpmr; return err; } @@ -372,11 +372,6 @@ static int preserve_sve_context(struct sve_context __user *ctx) err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved)); if (vq) { - /* - * This assumes that the SVE state has already been saved to - * the task struct by calling the function - * fpsimd_signal_preserve_current_state(). - */ err |= __copy_to_user((char __user *)ctx + SVE_SIG_REGS_OFFSET, current->thread.sve_state, SVE_SIG_REGS_SIZE(vq)); @@ -432,16 +427,6 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) if (user->sve_size < SVE_SIG_CONTEXT_SIZE(vq)) return -EINVAL; - /* - * Careful: we are about __copy_from_user() directly into - * thread.sve_state with preemption enabled, so protection is - * needed to prevent a racing context switch from writing stale - * registers back over the new data. - */ - - fpsimd_flush_task_state(current); - /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ - sve_alloc(current, true); if (!current->thread.sve_state) { clear_thread_flag(TIF_SVE); @@ -541,11 +526,6 @@ static int preserve_za_context(struct za_context __user *ctx) err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved)); if (vq) { - /* - * This assumes that the ZA state has already been saved to - * the task struct by calling the function - * fpsimd_signal_preserve_current_state(). - */ err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET, current->thread.sme_state, ZA_SIG_REGS_SIZE(vq)); @@ -580,16 +560,6 @@ static int restore_za_context(struct user_ctxs *user) if (user->za_size < ZA_SIG_CONTEXT_SIZE(vq)) return -EINVAL; - /* - * Careful: we are about __copy_from_user() directly into - * thread.sme_state with preemption enabled, so protection is - * needed to prevent a racing context switch from writing stale - * registers back over the new data. - */ - - fpsimd_flush_task_state(current); - /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ - sme_alloc(current, true); if (!current->thread.sme_state) { current->thread.svcr &= ~SVCR_ZA_MASK; @@ -627,11 +597,6 @@ static int preserve_zt_context(struct zt_context __user *ctx) BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved)); err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved)); - /* - * This assumes that the ZT state has already been saved to - * the task struct by calling the function - * fpsimd_signal_preserve_current_state(). - */ err |= __copy_to_user((char __user *)ctx + ZT_SIG_REGS_OFFSET, thread_zt_state(¤t->thread), ZT_SIG_REGS_SIZE(1)); @@ -657,16 +622,6 @@ static int restore_zt_context(struct user_ctxs *user) if (nregs != 1) return -EINVAL; - /* - * Careful: we are about __copy_from_user() directly into - * thread.zt_state with preemption enabled, so protection is - * needed to prevent a racing context switch from writing stale - * registers back over the new data. - */ - - fpsimd_flush_task_state(current); - /* From now, fpsimd_thread_switch() won't touch ZT in thread state */ - err = __copy_from_user(thread_zt_state(¤t->thread), (char __user const *)user->zt + ZT_SIG_REGS_OFFSET, @@ -1017,6 +972,8 @@ static int restore_sigframe(struct pt_regs *regs, */ forget_syscall(regs); + fpsimd_save_and_flush_current_state(); + err |= !valid_user_regs(®s->user_regs, current); if (err == 0) err = parse_user_sigframe(&user, sf); @@ -1510,8 +1467,11 @@ static int setup_return(struct pt_regs *regs, struct ksignal *ksig, /* * If we were in streaming mode the saved register * state was SVE but we will exit SM and use the - * FPSIMD register state - flush the saved FPSIMD - * register state in case it gets loaded. + * FPSIMD register state. + * + * TODO: decide if this should behave as SMSTOP (e.g. reset + * FPSR + FPMR), or whether this should only clear the scalable + * registers + ZA state. */ if (current->thread.svcr & SVCR_SM_MASK) { memset(¤t->thread.uw.fpsimd_state, 0, @@ -1519,9 +1479,7 @@ static int setup_return(struct pt_regs *regs, struct ksignal *ksig, current->thread.fp_type = FP_STATE_FPSIMD; } - current->thread.svcr &= ~(SVCR_ZA_MASK | - SVCR_SM_MASK); - sme_smstop(); + current->thread.svcr &= ~(SVCR_ZA_MASK | SVCR_SM_MASK); } return 0; @@ -1535,7 +1493,7 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, struct user_access_state ua_state; int err = 0; - fpsimd_signal_preserve_current_state(); + fpsimd_save_and_flush_current_state(); if (get_sigframe(&user, ksig, regs)) return 1; -- cgit v1.2.3 From 2fe2b96c3818a043eb013a9db1885de75987715d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Apr 2025 17:40:10 +0100 Subject: arm64/fpsimd: signal: Simplify preserve_tpidr2_context() During a context-switch, tls_thread_switch() reads and writes a task's thread_struct::tpidr2_el0 field. Other code shouldn't access this field for an active task, as such accesses would form a data-race with a concurrent context-switch. The usage in preserve_tpidr2_context() is suspicious, but benign as any race with a context switch will write the same value back to current->thread.tpidr2_el0. Make this clearer and match restore_tpidr2_context() by using a temporary variable instead, avoiding the (benign) data-race. Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250409164010.3480271-14-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/signal.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 0de9c452c6c0..73f1ab56d81b 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -478,13 +478,12 @@ extern int preserve_sve_context(void __user *ctx); static int preserve_tpidr2_context(struct tpidr2_context __user *ctx) { + u64 tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); int err = 0; - current->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); - __put_user_error(TPIDR2_MAGIC, &ctx->head.magic, err); __put_user_error(sizeof(*ctx), &ctx->head.size, err); - __put_user_error(current->thread.tpidr2_el0, &ctx->tpidr2, err); + __put_user_error(tpidr2_el0, &ctx->tpidr2, err); return err; } -- cgit v1.2.3 From b376108e1f88df9fbbb3867634150a3dd71e3acb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 17 Apr 2025 20:01:13 +0100 Subject: arm64/fpsimd: signal: Clear TPIDR2 when delivering signals Linux is intended to be compatible with userspace written to Arm's AAPCS64 procedure call standard [1,2]. For the Scalable Matrix Extension (SME), AAPCS64 was extended with a "ZA lazy saving scheme", where SME's ZA tile is lazily callee-saved and caller-restored. In this scheme, TPIDR2_EL0 indicates whether the ZA tile is live or has been saved by pointing to a "TPIDR2 block" in memory, which has a "za_save_buffer" pointer. This scheme has been implemented in GCC and LLVM, with necessary runtime support implemented in glibc. AAPCS64 does not specify how the ZA lazy saving scheme is expected to interact with signal handling, and the behaviour that AAPCS64 currently recommends for (sig)setjmp() and (sig)longjmp() does not always compose safely with signal handling, as explained below. When Linux delivers a signal, it creates signal frames which contain the original values of PSTATE.ZA, the ZA tile, and TPIDR_EL2. Between saving the original state and entering the signal handler, Linux clears PSTATE.ZA, but leaves TPIDR2_EL0 unchanged. Consequently a signal handler can be entered with PSTATE.ZA=0 (meaning accesses to ZA will trap), while TPIDR_EL0 is non-null (which may indicate that ZA needs to be lazily saved, depending on the contents of the TPIDR2 block). While in this state, libc and/or compiler runtime code, such as longjmp(), may attempt to save ZA. As PSTATE.ZA=0, these accesses will trap, causing the kernel to inject a SIGILL. Note that by virtue of lazy saving occurring in libc and/or C runtime code, this can be triggered by application/library code which is unaware of SME. To avoid the problem above, the kernel must ensure that signal handlers are entered with PSTATE.ZA and TPIDR2_EL0 configured in a manner which complies with the ZA lazy saving scheme. Practically speaking, the only choice is to enter signal handlers with PSTATE.ZA=0 and TPIDR2_EL0=NULL. This change should not impact SME code which does not follow the ZA lazy saving scheme (and hence does not use TPIDR2_EL0). An alternative approach that was considered is to have the signal handler inherit the original values of both PSTATE.ZA and TPIDR2_EL0, relying on lazy save/restore sequences being idempotent and capable of racing safely. This is not safe as signal handlers must be assumed to have a "private ZA" interface, and therefore cannot be entered with PSTATE.ZA=1 and TPIDR2_EL0=NULL, but it is legitimate for signals to be taken from this state. With the kernel fixed to clear TPIDR2_EL0, there are a couple of remaining issues (largely masked by the first issue) that must be fixed in userspace: (1) When a (sig)setjmp() + (sig)longjmp() pair cross a signal boundary, ZA state may be discarded when it needs to be preserved. Currently, the ZA lazy saving scheme recommends that setjmp() does not save ZA, and recommends that longjmp() is responsible for saving ZA. A call to longjmp() in a signal handler will not have visibility of ZA state that existed prior to entry to the signal, and when a longjmp() is used to bypass a usual signal return, unsaved ZA state will be discarded erroneously. To fix this, it is necessary for setjmp() to eagerly save ZA state, and for longjmp() to configure PSTATE.ZA=0 and TPIDR2_EL0=NULL. This works regardless of whether a signal boundary is crossed. (2) When a C++ exception is thrown and crosses a signal boundary before it is caught, ZA state may be discarded when it needs to be preserved. AAPCS64 requires that exception handlers are entered with PSTATE.{SM,ZA}={0,0} and TPIDR2_EL0=NULL, with exception unwind code expected to perform any necessary save of ZA state. Where it is necessary to perform an exception unwind across an exception boundary, the unwind code must recover any necessary ZA state (along with TPIDR2) from signal frames. Fix the kernel as described above, with setup_return() clearing TPIDR2_EL0 when delivering a signal. Folk on CC are working on fixes for the remaining userspace issues, including updates/fixes to the AAPCS64 specification and glibc. [1] https://github.com/ARM-software/abi-aa/releases/download/2025Q1/aapcs64.pdf [2] https://github.com/ARM-software/abi-aa/blob/c51addc3dc03e73a016a1e4edf25440bcac76431/aapcs64/aapcs64.rst Fixes: 39782210eb7e ("arm64/sme: Implement ZA signal handling") Fixes: 39e54499280f ("arm64/signal: Include TPIDR2 in the signal context") Signed-off-by: Mark Rutland Cc: Daniel Kiss Cc: Marc Zyngier Cc: Mark Brown Cc: Richard Sandiford Cc: Sander De Smalen Cc: Tamas Petz Cc: Will Deacon Cc: Yury Khrustalev Link: https://lore.kernel.org/r/20250417190113.3778111-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- Documentation/arch/arm64/sme.rst | 2 +- arch/arm64/kernel/signal.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/arch/arm64/sme.rst b/Documentation/arch/arm64/sme.rst index b2fa01f85cb5..3a98aed92732 100644 --- a/Documentation/arch/arm64/sme.rst +++ b/Documentation/arch/arm64/sme.rst @@ -115,7 +115,7 @@ be zeroed. 5. Signal handling ------------------- -* Signal handlers are invoked with streaming mode and ZA disabled. +* Signal handlers are invoked with PSTATE.SM=0, PSTATE.ZA=0, and TPIDR2_EL0=0. * A new signal frame record TPIDR2_MAGIC is added formatted as a struct tpidr2_context to allow access to TPIDR2_EL0 from signal handlers. diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 73f1ab56d81b..48d3c0129dad 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -1479,6 +1479,7 @@ static int setup_return(struct pt_regs *regs, struct ksignal *ksig, } current->thread.svcr &= ~(SVCR_ZA_MASK | SVCR_SM_MASK); + write_sysreg_s(0, SYS_TPIDR2_EL0); } return 0; -- cgit v1.2.3 From f699c66691fb7e08a5a631c5baf5f2a19b7a6468 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 30 Apr 2025 18:32:40 +0100 Subject: arm64/fpsimd: Avoid warning when sve_to_fpsimd() is unused Historically fpsimd_to_sve() and sve_to_fpsimd() were (conditionally) called by functions which were defined regardless of CONFIG_ARM64_SVE. Hence it was necessary that both fpsimd_to_sve() and sve_to_fpsimd() were always defined and not guarded by ifdeffery. As a result of the removal of fpsimd_signal_preserve_current_state() in commit: 929fa99b1215966f ("arm64/fpsimd: signal: Always save+flush state early") ... sve_to_fpsimd() has no callers when CONFIG_ARM64_SVE=n, resulting in a build-time warnign that it is unused: | arch/arm64/kernel/fpsimd.c:676:13: warning: unused function 'sve_to_fpsimd' [-Wunused-function] | 676 | static void sve_to_fpsimd(struct task_struct *task) | | ^~~~~~~~~~~~~ | 1 warning generated. In contrast, fpsimd_to_sve() still has callers which are defined when CONFIG_ARM64_SVE=n, and it would be awkward to hide this behind ifdeffery and/or to use stub functions. For now, suppress the warning by marking both fpsimd_to_sve() and sve_to_fpsimd() as 'static inline', as we usually do for stub functions. The compiler will no longer warn if either function is unused. Aside from suppressing the warning, there should be no functional change as a result of this patch. Link: https://lore.kernel.org/linux-arm-kernel/20250429194600.GA26883@willie-the-truck/ Reported-by: Will Deacon Fixes: 929fa99b1215 ("arm64/fpsimd: signal: Always save+flush state early") Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250430173240.4023627-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index b0874402f7ec..422b9d43b1e6 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -649,7 +649,7 @@ static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst, * task->thread.uw.fpsimd_state must be up to date before calling this * function. */ -static void fpsimd_to_sve(struct task_struct *task) +static inline void fpsimd_to_sve(struct task_struct *task) { unsigned int vq; void *sst = task->thread.sve_state; @@ -673,7 +673,7 @@ static void fpsimd_to_sve(struct task_struct *task) * bytes of allocated kernel memory. * task->thread.sve_state must be up to date before calling this function. */ -static void sve_to_fpsimd(struct task_struct *task) +static inline void sve_to_fpsimd(struct task_struct *task) { unsigned int vq, vl; void const *sst = task->thread.sve_state; -- cgit v1.2.3 From 398edaa12f9cf2be7902f306fc023c20e3ebd3e4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:21 +0100 Subject: arm64/fpsimd: Do not discard modified SVE state Historically SVE state was discarded deterministically early in the syscall entry path, before ptrace is notified of syscall entry. This permitted ptrace to modify SVE state before and after the "real" syscall logic was executed, with the modified state being retained. This behaviour was changed by commit: 8c845e2731041f0f ("arm64/sve: Leave SVE enabled on syscall if we don't context switch") That commit was intended to speed up workloads that used SVE by opportunistically leaving SVE enabled when returning from a syscall. The syscall entry logic was modified to truncate the SVE state without disabling userspace access to SVE, and fpsimd_save_user_state() was modified to discard userspace SVE state whenever in_syscall(current_pt_regs()) is true, i.e. when current_pt_regs()->syscallno != NO_SYSCALL. Leaving SVE enabled opportunistically resulted in a couple of changes to userspace visible behaviour which weren't described at the time, but are logical consequences of opportunistically leaving SVE enabled: * Signal handlers can observe the type of saved state in the signal's sve_context record. When the kernel only tracks FPSIMD state, the 'vq' field is 0 and there is no space allocated for register contents. When the kernel tracks SVE state, the 'vq' field is non-zero and the register contents are saved into the record. As a result of the above commit, 'vq' (and the presence of SVE register state) is non-deterministically zero or non-zero for a period of time after a syscall. The effective register state is still deterministic. Hopefully no-one relies on this being deterministic. In general, handlers for asynchronous events cannot expect a deterministic state. * Similarly to signal handlers, ptrace requests can observe the type of saved state in the NT_ARM_SVE and NT_ARM_SSVE regsets, as this is exposed in the header flags. As a result of the above commit, this is now in a non-deterministic state after a syscall. The effective register state is still deterministic. Hopefully no-one relies on this being deterministic. In general, debuggers would have to handle this changing at arbitrary points during program flow. Discarding the SVE state within fpsimd_save_user_state() resulted in other changes to userspace visible behaviour which are not desirable: * A ptrace tracer can modify (or create) a tracee's SVE state at syscall entry or syscall exit. As a result of the above commit, the tracee's SVE state can be discarded non-deterministically after modification, rather than being retained as it previously was. Note that for co-operative tracer/tracee pairs, the tracer may (re)initialise the tracee's state arbitrarily after the tracee sends itself an initial SIGSTOP via a syscall, so this affects realistic design patterns. * The current_pt_regs()->syscallno field can be modified via ptrace, and can be altered even when the tracee is not really in a syscall, causing non-deterministic discarding to occur in situations where this was not previously possible. Further, using current_pt_regs()->syscallno in this way is unsound: * There are data races between readers and writers of the current_pt_regs()->syscallno field. The current_pt_regs()->syscallno field is written in interruptible task context using plain C accesses, and is read in irq/softirq context using plain C accesses. These accesses are subject to data races, with the usual concerns with tearing, etc. * Writes to current_pt_regs()->syscallno are subject to compiler reordering. As current_pt_regs()->syscallno is written with plain C accesses, the compiler is free to move those writes arbitrarily relative to anything which doesn't access the same memory location. In theory this could break signal return, where prior to restoring the SVE state, restore_sigframe() calls forget_syscall(). If the write were hoisted after restore of some SVE state, that state could be discarded unexpectedly. In practice that reordering cannot happen in the absence of LTO (as cross compilation-unit function calls happen prevent this reordering), and that reordering appears to be unlikely in the presence of LTO. Additionally, since commit: f130ac0ae4412dbe ("arm64: syscall: unmask DAIF earlier for SVCs") ... DAIF is unmasked before el0_svc_common() sets regs->syscallno to the real syscall number. Consequently state may be saved in SVE format prior to this point. Considering all of the above, current_pt_regs()->syscallno should not be used to infer whether the SVE state can be discarded. Luckily we can instead use cpu_fp_state::to_save to track when it is safe to discard the SVE state: * At syscall entry, after the live SVE register state is truncated, set cpu_fp_state::to_save to FP_STATE_FPSIMD to indicate that only the FPSIMD portion is live and needs to be saved. * At syscall exit, once the task's state is guaranteed to be live, set cpu_fp_state::to_save to FP_STATE_CURRENT to indicate that TIF_SVE must be considered to determine which state needs to be saved. * Whenever state is modified, it must be saved+flushed prior to manipulation. The state will be truncated if necessary when it is saved, and reloading the state will set fp_state::to_save to FP_STATE_CURRENT, preventing subsequent discarding. This permits SVE state to be discarded *only* when it is known to have been truncated (and the non-FPSIMD portions must be zero), and ensures that SVE state is retained after it is explicitly modified. For backporting, note that this fix depends on the following commits: * b2482807fbd4 ("arm64/sme: Optimise SME exit on syscall entry") * f130ac0ae441 ("arm64: syscall: unmask DAIF earlier for SVCs") * 929fa99b1215 ("arm64/fpsimd: signal: Always save+flush state early") Fixes: 8c845e273104 ("arm64/sve: Leave SVE enabled on syscall if we don't context switch") Fixes: f130ac0ae441 ("arm64: syscall: unmask DAIF earlier for SVCs") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250508132644.1395904-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 3 +++ arch/arm64/kernel/entry-common.c | 46 ++++++++++++++++++++++++++++++---------- arch/arm64/kernel/fpsimd.c | 15 +++++++------ 3 files changed, 47 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index b7adb2c72204..11b9c6a5e037 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -6,6 +6,7 @@ #define __ASM_FP_H #include +#include #include #include #include @@ -92,6 +93,8 @@ struct cpu_fp_state { enum fp_type to_save; }; +DECLARE_PER_CPU(struct cpu_fp_state, fpsimd_last_state); + extern void fpsimd_bind_state_to_cpu(struct cpu_fp_state *fp_state); extern void fpsimd_flush_task_state(struct task_struct *target); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index b260ddc4d3e9..fa8a090339e2 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -393,20 +393,16 @@ static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) * As per the ABI exit SME streaming mode and clear the SVE state not * shared with FPSIMD on syscall entry. */ -static inline void fp_user_discard(void) +static inline void fpsimd_syscall_enter(void) { - /* - * If SME is active then exit streaming mode. If ZA is active - * then flush the SVE registers but leave userspace access to - * both SVE and SME enabled, otherwise disable SME for the - * task and fall through to disabling SVE too. This means - * that after a syscall we never have any streaming mode - * register state to track, if this changes the KVM code will - * need updating. - */ + /* Ensure PSTATE.SM is clear, but leave PSTATE.ZA as-is. */ if (system_supports_sme()) sme_smstop_sm(); + /* + * The CPU is not in streaming mode. If non-streaming SVE is not + * supported, there is no SVE state that needs to be discarded. + */ if (!system_supports_sve()) return; @@ -416,6 +412,33 @@ static inline void fp_user_discard(void) sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1; sve_flush_live(true, sve_vq_minus_one); } + + /* + * Any live non-FPSIMD SVE state has been zeroed. Allow + * fpsimd_save_user_state() to lazily discard SVE state until either + * the live state is unbound or fpsimd_syscall_exit() is called. + */ + __this_cpu_write(fpsimd_last_state.to_save, FP_STATE_FPSIMD); +} + +static __always_inline void fpsimd_syscall_exit(void) +{ + if (!system_supports_sve()) + return; + + /* + * The current task's user FPSIMD/SVE/SME state is now bound to this + * CPU. The fpsimd_last_state.to_save value is either: + * + * - FP_STATE_FPSIMD, if the state has not been reloaded on this CPU + * since fpsimd_syscall_enter(). + * + * - FP_STATE_CURRENT, if the state has been reloaded on this CPU at + * any point. + * + * Reset this to FP_STATE_CURRENT to stop lazy discarding. + */ + __this_cpu_write(fpsimd_last_state.to_save, FP_STATE_CURRENT); } UNHANDLED(el1t, 64, sync) @@ -739,10 +762,11 @@ static void noinstr el0_svc(struct pt_regs *regs) { enter_from_user_mode(regs); cortex_a76_erratum_1463225_svc_handler(); - fp_user_discard(); + fpsimd_syscall_enter(); local_daif_restore(DAIF_PROCCTX); do_el0_svc(regs); exit_to_user_mode(regs); + fpsimd_syscall_exit(); } static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 422b9d43b1e6..9f1890d692df 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -119,7 +119,7 @@ * whatever is in the FPSIMD registers is not saved to memory, but discarded. */ -static DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state); +DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state); __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = { #ifdef CONFIG_ARM64_SVE @@ -451,12 +451,15 @@ static void fpsimd_save_user_state(void) *(last->fpmr) = read_sysreg_s(SYS_FPMR); /* - * If a task is in a syscall the ABI allows us to only - * preserve the state shared with FPSIMD so don't bother - * saving the full SVE state in that case. + * Save SVE state if it is live. + * + * The syscall ABI discards live SVE state at syscall entry. When + * entering a syscall, fpsimd_syscall_enter() sets to_save to + * FP_STATE_FPSIMD to allow the SVE state to be lazily discarded until + * either new SVE state is loaded+bound or fpsimd_syscall_exit() is + * called prior to a return to userspace. */ - if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE) && - !in_syscall(current_pt_regs())) || + if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE)) || last->to_save == FP_STATE_SVE) { save_sve_regs = true; save_ffr = true; -- cgit v1.2.3 From 1bf663a86a4505a97f08d06c373704de9441c891 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:22 +0100 Subject: arm64/fpsimd: signal: Clear PSTATE.SM when restoring FPSIMD frame only On systems with SVE and/or SME, the kernel will always create SVE and FPSIMD signal frames when delivering a signal, but a user can manipulate signal frames such that a signal return only observes an FPSIMD signal frame. When this happens, restore_fpsimd_context() will restore state such that fp_type==FP_STATE_FPSIMD, but will leave PSTATE.SM as-is. It is possible for a user to set PSTATE.SM between syscall entry and execution of the sigreturn logic (e.g. via ptrace), and consequently the sigreturn may result in the task having PSTATE.SM==1 and fp_type==FP_STATE_FPSIMD. For various reasons it is not legitimate for a task to be in a state where PSTATE.SM==1 and fp_type==FP_STATE_FPSIMD. Portions of the user ABI are written with the requirement that streaming SVE state is always presented in SVE format rather than FPSIMD format, and as there is no mechanism to permit access to only the FPSIMD subset of streaming SVE state, streaming SVE state must always be saved and restored in SVE format. Fix restore_fpsimd_context() to clear PSTATE.SM when restoring an FPSIMD signal frame without an SVE signal frame. This matches the current behaviour when an SVE signal frame is present, but the SVE signal frame has no register payload (e.g. as is the case on SME-only systems which lack SVE). This change should have no effect for applications which do not alter signal frames (i.e. almost all applications). I do not expect non-{malicious,buggy} applications to hide the SVE signal frame, but I've chosen to clear PSTATE.SM rather than mandating the presence of an SVE signal frame in case there is some legacy (non-SME) usage that I am not currently aware of. For context, the SME handling was originally introduced in commit: 85ed24dad290 ("arm64/sme: Implement streaming SVE signal handling") ... and subsequently updated/fixed to handle SME-only systems in commits: 7dde62f0687c ("arm64/signal: Always accept SVE signal frames on SME only systems") f26cd7372160 ("arm64/signal: Always allocate SVE signal frames on SME only systems") Fixes: 85ed24dad290 ("arm64/sme: Implement streaming SVE signal handling") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250508132644.1395904-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 48d3c0129dad..fdce1b856f49 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -280,6 +280,7 @@ static int restore_fpsimd_context(struct user_ctxs *user) __get_user_error(fpsimd.fpcr, &(user->fpsimd->fpcr), err); clear_thread_flag(TIF_SVE); + current->thread.svcr &= ~SVCR_SM_MASK; current->thread.fp_type = FP_STATE_FPSIMD; /* load the hardware registers from the fpsimd_state structure */ -- cgit v1.2.3 From b465ace42620970e840c7aeb2c44a6e3b1002fec Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:23 +0100 Subject: arm64/fpsimd: signal: Mandate SVE payload for streaming-mode state Non-streaming SVE state may be preserved without an SVE payload, in which case the SVE context only has a header with VL==0, and all state can be restored from the FPSIMD context. Streaming SVE state is always preserved with an SVE payload, where the SVE context header has VL!=0, and the SVE_SIG_FLAG_SM flag is set. The kernel never preserves an SVE context where SVE_SIG_FLAG_SM is set without an SVE payload. However, restore_sve_fpsimd_context() doesn't forbid restoring such a context, and will handle this case by clearing PSTATE.SM and restoring the FPSIMD context into non-streaming mode, which isn't consistent with the SVE_SIG_FLAG_SM flag. Forbid this case, and mandate an SVE payload when the SVE_SIG_FLAG_SM flag is set. This avoids an awkward ABI quirk and reduces the risk that later rework to this code permits configuring a task with PSTATE.SM==1 and fp_type==FP_STATE_FPSIMD. I've marked this as a fix given that we never intended to support this case, and we don't want anyone to start relying upon the old behaviour once we re-enable SME. Fixes: 85ed24dad290 ("arm64/sme: Implement streaming SVE signal handling") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250508132644.1395904-4-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index fdce1b856f49..e898948a31b6 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -387,6 +387,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) unsigned int vl, vq; struct user_fpsimd_state fpsimd; u16 user_vl, flags; + bool sm; if (user->sve_size < sizeof(*user->sve)) return -EINVAL; @@ -396,7 +397,8 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) if (err) return err; - if (flags & SVE_SIG_FLAG_SM) { + sm = flags & SVE_SIG_FLAG_SM; + if (sm) { if (!system_supports_sme()) return -EINVAL; @@ -416,7 +418,16 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) if (user_vl != vl) return -EINVAL; - if (user->sve_size == sizeof(*user->sve)) { + /* + * Non-streaming SVE state may be preserved without an SVE payload, in + * which case the SVE context only has a header with VL==0, and all + * state can be restored from the FPSIMD context. + * + * Streaming SVE state is always preserved with an SVE payload. For + * consistency and robustness, reject restoring streaming SVE state + * without an SVE payload. + */ + if (!sm && user->sve_size == sizeof(*user->sve)) { clear_thread_flag(TIF_SVE); current->thread.svcr &= ~SVCR_SM_MASK; current->thread.fp_type = FP_STATE_FPSIMD; -- cgit v1.2.3 From be625d803c3bbfa9652697eb57589fe6f2f24b89 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:24 +0100 Subject: arm64/fpsimd: signal: Consistently read FPSIMD context For historical reasons, restore_sve_fpsimd_context() has an open-coded copy of the logic from read_fpsimd_context(), which is used to either restore an FPSIMD-only context, or to merge FPSIMD state into an SVE state when restoring an SVE+FPSIMD context. The logic is *almost* identical. Refactor the logic to avoid duplication and make this clearer. This comes with two functional changes that I do not believe will be problematic in practice: * The user_fpsimd_state::size field will be checked in all restore paths that consume it user_fpsimd_state. The kernel always populates this field when delivering a signal, and so this should contain the expected value unless it has been corrupted. * If a read of user_fpsimd_state fails, we will return early without modifying TIF_SVE, the saved SVCR, or the save fp_type. This will leave the task in a consistent state, without potentially resurrecting stale FPSIMD state. A read of user_fpsimd_state should never fail unless the structure has been corrupted or the stack has been unmapped. Suggested-by: Will Deacon Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250508132644.1395904-5-mark.rutland@arm.com [will: Ensure read_fpsimd_context() returns negative error code or zero] Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 57 +++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index e898948a31b6..d9c9bad5f0a8 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -264,30 +264,40 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) return err ? -EFAULT : 0; } -static int restore_fpsimd_context(struct user_ctxs *user) +static int read_fpsimd_context(struct user_fpsimd_state *fpsimd, + struct user_ctxs *user) { - struct user_fpsimd_state fpsimd; - int err = 0; + int err; /* check the size information */ if (user->fpsimd_size != sizeof(struct fpsimd_context)) return -EINVAL; /* copy the FP and status/control registers */ - err = __copy_from_user(fpsimd.vregs, &(user->fpsimd->vregs), - sizeof(fpsimd.vregs)); - __get_user_error(fpsimd.fpsr, &(user->fpsimd->fpsr), err); - __get_user_error(fpsimd.fpcr, &(user->fpsimd->fpcr), err); + err = __copy_from_user(fpsimd->vregs, &(user->fpsimd->vregs), + sizeof(fpsimd->vregs)); + __get_user_error(fpsimd->fpsr, &(user->fpsimd->fpsr), err); + __get_user_error(fpsimd->fpcr, &(user->fpsimd->fpcr), err); + + return err ? -EFAULT : 0; +} + +static int restore_fpsimd_context(struct user_ctxs *user) +{ + struct user_fpsimd_state fpsimd; + int err; + + err = read_fpsimd_context(&fpsimd, user); + if (err) + return err; clear_thread_flag(TIF_SVE); current->thread.svcr &= ~SVCR_SM_MASK; current->thread.fp_type = FP_STATE_FPSIMD; /* load the hardware registers from the fpsimd_state structure */ - if (!err) - fpsimd_update_current_state(&fpsimd); - - return err ? -EFAULT : 0; + fpsimd_update_current_state(&fpsimd); + return 0; } static int preserve_fpmr_context(struct fpmr_context __user *ctx) @@ -427,12 +437,8 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) * consistency and robustness, reject restoring streaming SVE state * without an SVE payload. */ - if (!sm && user->sve_size == sizeof(*user->sve)) { - clear_thread_flag(TIF_SVE); - current->thread.svcr &= ~SVCR_SM_MASK; - current->thread.fp_type = FP_STATE_FPSIMD; - goto fpsimd_only; - } + if (!sm && user->sve_size == sizeof(*user->sve)) + return restore_fpsimd_context(user); vq = sve_vq_from_vl(vl); @@ -458,19 +464,14 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) set_thread_flag(TIF_SVE); current->thread.fp_type = FP_STATE_SVE; -fpsimd_only: - /* copy the FP and status/control registers */ - /* restore_sigframe() already checked that user->fpsimd != NULL. */ - err = __copy_from_user(fpsimd.vregs, user->fpsimd->vregs, - sizeof(fpsimd.vregs)); - __get_user_error(fpsimd.fpsr, &user->fpsimd->fpsr, err); - __get_user_error(fpsimd.fpcr, &user->fpsimd->fpcr, err); + err = read_fpsimd_context(&fpsimd, user); + if (err) + return err; - /* load the hardware registers from the fpsimd_state structure */ - if (!err) - fpsimd_update_current_state(&fpsimd); + /* Merge the FPSIMD registers into the SVE state */ + fpsimd_update_current_state(&fpsimd); - return err ? -EFAULT : 0; + return 0; } #else /* ! CONFIG_ARM64_SVE */ -- cgit v1.2.3 From 316283f276ebd5596d2015e4653198bdc2e138d4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:25 +0100 Subject: arm64/fpsimd: ptrace: Consistently handle partial writes to NT_ARM_(S)SVE Partial writes to the NT_ARM_SVE and NT_ARM_SSVE regsets using an payload are handled inconsistently and non-deterministically. A comment within sve_set_common() indicates that we intended that a partial write would preserve any effective FPSIMD/SVE state which was not overwritten, but this has never worked consistently, and during syscalls the FPSIMD vector state may be non-deterministically preserved and may be erroneously migrated between streaming and non-streaming SVE modes. The simplest fix is to handle a partial write by consistently zeroing the remaining state. As detailed below I do not believe this will adversely affect any real usage. Neither GDB nor LLDB attempt partial writes to these regsets, and the documentation (in Documentation/arch/arm64/sve.rst) has always indicated that state preservation was not guaranteed, as is says: | The effect of writing a partial, incomplete payload is unspecified. When the logic was originally introduced in commit: 43d4da2c45b2 ("arm64/sve: ptrace and ELF coredump support") ... there were two potential behaviours, depending on TIF_SVE: * When TIF_SVE was clear, all SVE state would be zeroed, excluding the low 128 bits of vectors shared with FPSIMD, FPSR, and FPCR. * When TIF_SVE was set, all SVE state would be zeroed, including the low 128 bits of vectors shared with FPSIMD, but excluding FPSR and FPCR. Note that as writing to NT_ARM_SVE would set TIF_SVE, partial writes to NT_ARM_SVE would not be idempotent, and if a first write preserved the low 128 bits, a subsequent (potentially identical) partial write would discard the low 128 bits. When support for the NT_ARM_SSVE regset was added in commit: e12310a0d30f ("arm64/sme: Implement ptrace support for streaming mode SVE registers") ... the above behaviour was retained for writes to the NT_ARM_SVE regset, though writes to the NT_ARM_SSVE would always zero the SVE registers and would not inherit FPSIMD register state. This happened as fpsimd_sync_to_sve() only copied the FPSIMD regs when TIF_SVE was clear and PSTATE.SM==0. Subsequently, when FPSIMD/SVE state tracking was changed across commits: baa8515281b3 ("arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE") a0136be443d5 (arm64/fpsimd: Load FP state based on recorded data type") bbc6172eefdb ("arm64/fpsimd: SME no longer requires SVE register state") 8c845e273104 ("arm64/sve: Leave SVE enabled on syscall if we don't context switch") ... there was no corresponding update to the ptrace code, nor to fpsimd_sync_to_sve(), which stil considers TIF_SVE and PSTATE.SM rather than the saved fp_type. The saved state can be in the FPSIMD format regardless of whether TIF_SVE is set or clear, and the saved type can change non-deterministically during syscalls. Consequently a subsequent partial write to the NT_ARM_SVE or NT_ARM_SSVE regsets may non-deterministically preserve the FPSIMD state, and may migrate this state between streaming and non-streaming modes. Clean this up by never attempting to preserve ANY state when writing an SVE payload to the NT_ARM_SVE/NT_ARM_SSVE regsets, zeroing all relevant state including FPSR and FPCR. This simplifies the code, makes the behaviour deterministic, and avoids migrating state between streaming and non-streaming modes. As above, I do not believe this should adversely affect existing userspace applications. At the same time, remove fpsimd_sync_to_sve(). It is no longer used, doesn't do what its documentation implies, and gets in the way of other cleanups and fixes. Fixes: 43d4da2c45b2 ("arm64/sve: ptrace and ELF coredump support") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: David Spickett Cc: Luis Machado Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250508132644.1395904-6-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 1 - arch/arm64/kernel/fpsimd.c | 15 --------------- arch/arm64/kernel/ptrace.c | 26 +++++++++----------------- 3 files changed, 9 insertions(+), 33 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 11b9c6a5e037..da1bd5b9a7e7 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -198,7 +198,6 @@ struct vl_info { extern void sve_alloc(struct task_struct *task, bool flush); extern void fpsimd_release_task(struct task_struct *task); -extern void fpsimd_sync_to_sve(struct task_struct *task); extern void sve_sync_to_fpsimd(struct task_struct *task); extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task); diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 9f1890d692df..e06e4d8eda49 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -759,21 +759,6 @@ void sve_alloc(struct task_struct *task, bool flush) kzalloc(sve_state_size(task), GFP_KERNEL); } -/* - * Ensure that task->thread.sve_state is up to date with respect to - * the user task, irrespective of when SVE is in use or not. - * - * This should only be called by ptrace. task must be non-runnable. - * task->thread.sve_state must point to at least sve_state_size(task) - * bytes of allocated kernel memory. - */ -void fpsimd_sync_to_sve(struct task_struct *task) -{ - if (!test_tsk_thread_flag(task, TIF_SVE) && - !thread_sm_enabled(&task->thread)) - fpsimd_to_sve(task); -} - /* * Ensure that task->thread.uw.fpsimd_state is up to date with respect to * the user task, irrespective of whether SVE is in use or not. diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index f79b0d5f71ac..259d90f70e02 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -910,8 +910,6 @@ static int sve_set_common(struct task_struct *target, /* Enter/exit streaming mode */ if (system_supports_sme()) { - u64 old_svcr = target->thread.svcr; - switch (type) { case ARM64_VEC_SVE: target->thread.svcr &= ~SVCR_SM_MASK; @@ -931,23 +929,20 @@ static int sve_set_common(struct task_struct *target, ret = -EINVAL; goto out; } - - /* - * If we switched then invalidate any existing SVE - * state and ensure there's storage. - */ - if (target->thread.svcr != old_svcr) - sve_alloc(target, true); } + /* Always zero V regs, FPSR, and FPCR */ + memset(¤t->thread.uw.fpsimd_state, 0, + sizeof(current->thread.uw.fpsimd_state)); + /* Registers: FPSIMD-only case */ BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header)); if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) { - ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, - SVE_PT_FPSIMD_OFFSET); clear_tsk_thread_flag(target, TIF_SVE); target->thread.fp_type = FP_STATE_FPSIMD; + ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, + SVE_PT_FPSIMD_OFFSET); goto out; } @@ -966,6 +961,7 @@ static int sve_set_common(struct task_struct *target, goto out; } + /* Always zero SVE state */ sve_alloc(target, true); if (!target->thread.sve_state) { ret = -ENOMEM; @@ -975,13 +971,9 @@ static int sve_set_common(struct task_struct *target, } /* - * Ensure target->thread.sve_state is up to date with target's - * FPSIMD regs, so that a short copyin leaves trailing - * registers unmodified. Only enable SVE if we are - * configuring normal SVE, a system with streaming SVE may not - * have normal SVE. + * Only enable SVE if we are configuring normal SVE, a system with + * streaming SVE may not have normal SVE. */ - fpsimd_sync_to_sve(target); if (type == ARM64_VEC_SVE) set_tsk_thread_flag(target, TIF_SVE); target->thread.fp_type = FP_STATE_SVE; -- cgit v1.2.3 From b255be426913e83dd79b920b77e94d3c47886c50 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:26 +0100 Subject: arm64/fpsimd: Clarify sve_sync_*() functions The sve_sync_{to,from}_fpsimd*() functions are intended to extract/insert the currently effective FPSIMD state of a task regardless of whether the task's state is saved in FPSIMD format or SVE format. Historically they were only used by ptrace, but sve_sync_to_fpsimd() is now used more widely, and sve_sync_from_fpsimd_zeropad() may be used more widely in future. When FPSIMD/SVE state tracking was changed across commits: baa8515281b3 ("arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE") a0136be443d5 (arm64/fpsimd: Load FP state based on recorded data type") bbc6172eefdb ("arm64/fpsimd: SME no longer requires SVE register state") 8c845e273104 ("arm64/sve: Leave SVE enabled on syscall if we don't context switch") ... sve_sync_to_fpsimd() was updated to consider task->thread.fp_type rather than the task's TIF_SVE and PSTATE.SM, but (apparently due to an oversight) sve_sync_from_fpsimd_zeropad() was left as-is, leaving the two inconsistent. Due to this, sve_sync_from_fpsimd_zeropad() may copy state from task->thread.uw.fpsimd_state into task->thread.sve_state when task->thread.fp_type == FP_STATE_FPSIMD. This is redundant (but benign) as task->thread.uw.fpsimd_state is the effective state that will be restored, and task->thread.sve_state will not be consumed. For consistency, and to avoid the redundant work, it better for sve_sync_from_fpsimd_zeropad() to consider task->thread.fp_type alone, matching sve_sync_to_fpsimd(). The naming of both functions is somehat unfortunate, as it is unclear when and why they copy state. It would be better to describe them in terms of the effective state. Considering all of the above, clean this up: * Adjust sve_sync_from_fpsimd_zeropad() to consider task->thread.fp_type. * Update comments to clarify the intended semantics/usage. I've removed the description that task->thread.sve_state must have been allocated, as this is only necessary when task->thread.fp_type == FP_STATE_SVE, which itself implies that task->thread.sve_state must have been allocated. * Rename the functions to more clearly indicate when/why they copy state: - sve_sync_to_fpsimd() => fpsimd_sync_from_effective_state() - sve_sync_from_fpsimd_zeropad => fpsimd_sync_to_effective_state_zeropad() Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250508132644.1395904-7-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 8 ++++---- arch/arm64/kernel/fpsimd.c | 30 ++++++++++++------------------ arch/arm64/kernel/ptrace.c | 6 +++--- arch/arm64/kernel/signal.c | 2 +- 4 files changed, 20 insertions(+), 26 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index da1bd5b9a7e7..c9e17d093fe2 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -198,8 +198,8 @@ struct vl_info { extern void sve_alloc(struct task_struct *task, bool flush); extern void fpsimd_release_task(struct task_struct *task); -extern void sve_sync_to_fpsimd(struct task_struct *task); -extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task); +extern void fpsimd_sync_from_effective_state(struct task_struct *task); +extern void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task); extern int vec_set_vector_length(struct task_struct *task, enum vec_type type, unsigned long vl, unsigned long flags); @@ -299,8 +299,8 @@ size_t sve_state_size(struct task_struct const *task); static inline void sve_alloc(struct task_struct *task, bool flush) { } static inline void fpsimd_release_task(struct task_struct *task) { } -static inline void sve_sync_to_fpsimd(struct task_struct *task) { } -static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { } +static inline void fpsimd_sync_from_effective_state(struct task_struct *task) { } +static inline void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task) { } static inline int sve_max_virtualisable_vl(void) { diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e06e4d8eda49..dd6480087683 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -760,39 +760,33 @@ void sve_alloc(struct task_struct *task, bool flush) } /* - * Ensure that task->thread.uw.fpsimd_state is up to date with respect to - * the user task, irrespective of whether SVE is in use or not. + * Ensure that task->thread.uw.fpsimd_state is up to date with respect to the + * task's currently effective FPSIMD/SVE state. * - * This should only be called by ptrace. task must be non-runnable. - * task->thread.sve_state must point to at least sve_state_size(task) - * bytes of allocated kernel memory. + * The task's FPSIMD/SVE/SME state must not be subject to concurrent + * manipulation. */ -void sve_sync_to_fpsimd(struct task_struct *task) +void fpsimd_sync_from_effective_state(struct task_struct *task) { if (task->thread.fp_type == FP_STATE_SVE) sve_to_fpsimd(task); } /* - * Ensure that task->thread.sve_state is up to date with respect to - * the task->thread.uw.fpsimd_state. + * Ensure that the task's currently effective FPSIMD/SVE state is up to date + * with respect to task->thread.uw.fpsimd_state, zeroing any effective + * non-FPSIMD (S)SVE state. * - * This should only be called by ptrace to merge new FPSIMD register - * values into a task for which SVE is currently active. - * task must be non-runnable. - * task->thread.sve_state must point to at least sve_state_size(task) - * bytes of allocated kernel memory. - * task->thread.uw.fpsimd_state must already have been initialised with - * the new FPSIMD register values to be merged in. + * The task's FPSIMD/SVE/SME state must not be subject to concurrent + * manipulation. */ -void sve_sync_from_fpsimd_zeropad(struct task_struct *task) +void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task) { unsigned int vq; void *sst = task->thread.sve_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; - if (!test_tsk_thread_flag(task, TIF_SVE) && - !thread_sm_enabled(&task->thread)) + if (task->thread.fp_type != FP_STATE_SVE) return; vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread)); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 259d90f70e02..bdba106a4cf2 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -594,7 +594,7 @@ static int __fpr_get(struct task_struct *target, { struct user_fpsimd_state *uregs; - sve_sync_to_fpsimd(target); + fpsimd_sync_from_effective_state(target); uregs = &target->thread.uw.fpsimd_state; @@ -626,7 +626,7 @@ static int __fpr_set(struct task_struct *target, * Ensure target->thread.uw.fpsimd_state is up to date, so that a * short copyin can't resurrect stale data. */ - sve_sync_to_fpsimd(target); + fpsimd_sync_from_effective_state(target); newstate = target->thread.uw.fpsimd_state; @@ -653,7 +653,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, if (ret) return ret; - sve_sync_from_fpsimd_zeropad(target); + fpsimd_sync_to_effective_state_zeropad(target); fpsimd_flush_task_state(target); return ret; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index d9c9bad5f0a8..d42d83b45249 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -250,7 +250,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) ¤t->thread.uw.fpsimd_state; int err; - sve_sync_to_fpsimd(current); + fpsimd_sync_from_effective_state(current); /* copy the FP and status/control registers */ err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs)); -- cgit v1.2.3 From 8738288a08b8367cd2a9f656498d7490e4473036 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:27 +0100 Subject: arm64/fpsimd: Factor out {sve,sme}_state_size() helpers In subsequent patches we'll need to determine the SVE/SME state size for a given SVE VL and SME VL regardless of whether a task is currently configured with those VLs. Split the sizing logic out of sve_state_size() and sme_state_size() so that we don't need to open-code this logic elsewhere. At the same time, apply minor cleanups: * Move sve_state_size() into fpsimd.h, matching the placement of sme_state_size(). * Remove the feature checks from sve_state_size(). We only call sve_state_size() when at least one of SVE and SME are supported, and when either of the two is not supported, the task's corresponding SVE/SME vector length will be zero. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250508132644.1395904-8-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 47 ++++++++++++++++++++++++++++++++--------- arch/arm64/kernel/fpsimd.c | 16 -------------- 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index c9e17d093fe2..b751064bbaaa 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -293,7 +293,22 @@ static inline bool sve_vq_available(unsigned int vq) return vq_available(ARM64_VEC_SVE, vq); } -size_t sve_state_size(struct task_struct const *task); +static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl) +{ + unsigned int vl = max(sve_vl, sme_vl); + return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)); +} + +/* + * Return how many bytes of memory are required to store the full SVE + * state for task, given task's currently configured vector length. + */ +static inline size_t sve_state_size(struct task_struct const *task) +{ + unsigned int sve_vl = task_get_sve_vl(task); + unsigned int sme_vl = task_get_sme_vl(task); + return __sve_state_size(sve_vl, sme_vl); +} #else /* ! CONFIG_ARM64_SVE */ @@ -334,6 +349,11 @@ static inline void vec_update_vq_map(enum vec_type t) { } static inline int vec_verify_vq_map(enum vec_type t) { return 0; } static inline void sve_setup(void) { } +static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl) +{ + return 0; +} + static inline size_t sve_state_size(struct task_struct const *task) { return 0; @@ -386,6 +406,16 @@ extern int sme_set_current_vl(unsigned long arg); extern int sme_get_current_vl(void); extern void sme_suspend_exit(void); +static inline size_t __sme_state_size(unsigned int sme_vl) +{ + size_t size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(sme_vl)); + + if (system_supports_sme2()) + size += ZT_SIG_REG_SIZE; + + return size; +} + /* * Return how many bytes of memory are required to store the full SME * specific state for task, given task's currently configured vector @@ -393,15 +423,7 @@ extern void sme_suspend_exit(void); */ static inline size_t sme_state_size(struct task_struct const *task) { - unsigned int vl = task_get_sme_vl(task); - size_t size; - - size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl)); - - if (system_supports_sme2()) - size += ZT_SIG_REG_SIZE; - - return size; + return __sme_state_size(task_get_sme_vl(task)); } #else @@ -422,6 +444,11 @@ static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } static inline int sme_get_current_vl(void) { return -EINVAL; } static inline void sme_suspend_exit(void) { } +static inline size_t __sme_state_size(unsigned int sme_vl) +{ + return 0; +} + static inline size_t sme_state_size(struct task_struct const *task) { return 0; diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index dd6480087683..c2603fe8dd24 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -719,22 +719,6 @@ static void sve_free(struct task_struct *task) __sve_free(task); } -/* - * Return how many bytes of memory are required to store the full SVE - * state for task, given task's currently configured vector length. - */ -size_t sve_state_size(struct task_struct const *task) -{ - unsigned int vl = 0; - - if (system_supports_sve()) - vl = task_get_sve_vl(task); - if (system_supports_sme()) - vl = max(vl, task_get_sme_vl(task)); - - return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)); -} - /* * Ensure that task->thread.sve_state is allocated and sufficiently large. * -- cgit v1.2.3 From 6ef1d778ce56c5bde1ac0a1f85fd9710efde6724 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:28 +0100 Subject: arm64/fpsimd: Add task_smstop_sm() In a few places we want to transition a task from streaming mode to non-streaming mode, e.g. signal delivery where we historically tried to use an SMSTOP SM instruction. Add a new helper to manipulate a task's state in the same way as an SMSTOP SM instruction. I have not added a corresponding helper to simulate the effects of SMSTART SM. Only ptrace transitions a task into streaming mode, and ptrace has distinct semantics for such transitions. Per ARM DDI 0487 L.a, section B1.4.6: | RRSWFQ | When the Effective value of PSTATE.SM is changed by any method from 0 | to 1, an entry to Streaming SVE mode is performed, and all implemented | bits of Streaming SVE register state are set to zero. | RKFRQZ | When the Effective value of PSTATE.SM is changed by any method from 1 | to 0, an exit from Streaming SVE mode is performed, and in the | newly-entered mode, all implemented bits of the SVE scalable vector | registers, SVE predicate registers, and FFR, are set to zero. Per ARM DDI 0487 L.a, section C5.2.9: | On entry to or exit from Streaming SVE mode, FPMR is set to 0 Per ARM DDI 0487 L.a, section C5.2.10: | On entry to or exit from Streaming SVE mode, FPSR.{IOC, DZC, OFC, UFC, | IXC, IDC, QC} are set to 1 and the remaining bits are set to 0. This means bits 0, 1, 2, 3, 4, 7, and 27 respectively, i.e. 0x0800009f Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250508132644.1395904-9-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 2 ++ arch/arm64/kernel/fpsimd.c | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index b751064bbaaa..b8cf0ea43cc0 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -111,6 +111,8 @@ static inline bool thread_za_enabled(struct thread_struct *thread) return system_supports_sme() && (thread->svcr & SVCR_ZA_MASK); } +extern void task_smstop_sm(struct task_struct *task); + /* Maximum VL that SVE/SME VL-agnostic software can transparently support */ #define VL_ARCH_MAX 0x100 diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index c2603fe8dd24..fe96e018e18c 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -695,6 +695,28 @@ static inline void sve_to_fpsimd(struct task_struct *task) } } +static inline void __fpsimd_zero_vregs(struct user_fpsimd_state *fpsimd) +{ + memset(&fpsimd->vregs, 0, sizeof(fpsimd->vregs)); +} + +/* + * Simulate the effects of an SMSTOP SM instruction. + */ +void task_smstop_sm(struct task_struct *task) +{ + if (!thread_sm_enabled(&task->thread)) + return; + + __fpsimd_zero_vregs(&task->thread.uw.fpsimd_state); + task->thread.uw.fpsimd_state.fpsr = 0x0800009f; + if (system_supports_fpmr()) + task->thread.uw.fpmr = 0; + + task->thread.svcr &= ~SVCR_SM_MASK; + task->thread.fp_type = FP_STATE_FPSIMD; +} + void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p) { write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK, -- cgit v1.2.3 From 99560c9452bb9819334bdb9c6e9e27c0f45f8829 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:29 +0100 Subject: arm64/fpsimd: signal: Use SMSTOP behaviour in setup_return() Historically the behaviour of setup_return() was nondeterministic, depending on whether the task's FSIMD/SVE/SME state happened to be live. We fixed most of that in commit: 929fa99b1215 ("arm64/fpsimd: signal: Always save+flush state early") ... but we didn't decide on how clearing PSTATE.SM should behave, and left a TODO comment to that effect. Use the new task_smstop_sm() helper to make this behave as if an SMSTOP instruction was used to exit streaming mode. This would have been the most common behaviour prior to the commit above. Fixes: 40a8e87bb328 ("arm64/sme: Disable ZA and streaming mode when handling signals") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250508132644.1395904-10-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index d42d83b45249..417140cd399b 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -1476,22 +1476,8 @@ static int setup_return(struct pt_regs *regs, struct ksignal *ksig, /* Signal handlers are invoked with ZA and streaming mode disabled */ if (system_supports_sme()) { - /* - * If we were in streaming mode the saved register - * state was SVE but we will exit SM and use the - * FPSIMD register state. - * - * TODO: decide if this should behave as SMSTOP (e.g. reset - * FPSR + FPMR), or whether this should only clear the scalable - * registers + ZA state. - */ - if (current->thread.svcr & SVCR_SM_MASK) { - memset(¤t->thread.uw.fpsimd_state, 0, - sizeof(current->thread.uw.fpsimd_state)); - current->thread.fp_type = FP_STATE_FPSIMD; - } - - current->thread.svcr &= ~(SVCR_ZA_MASK | SVCR_SM_MASK); + task_smstop_sm(current); + current->thread.svcr &= ~SVCR_ZA_MASK; write_sysreg_s(0, SYS_TPIDR2_EL0); } -- cgit v1.2.3 From 8d61eef756798cd33721e4c89bc72ce81792a3e8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:30 +0100 Subject: arm64/fpsimd: Remove redundant task->mm check For historical reasons, arch_dup_task_struct() only calls fpsimd_preserve_current_state() when current->mm is non-NULL, but this is no longer necessary. Historically TIF_FOREIGN_FPSTATE was only managed for user threads, and was never set for kernel threads. At the time, various functions attempted to avoid saving/restoring state for kernel threads by checking task_struct::mm to try to distinguish user threads from kernel threads. We added the current->mm check to arch_dup_task_struct() in commit: 6eb6c80187c5 ("arm64: kernel thread don't need to save fpsimd context.") ... where the intent was to avoid pointlessly saving state for kernel threads, which never had live state (and the saved state would never be consumed). Subsequently we began setting TIF_FOREIGN_FPSTATE for kernel threads, and removed most of the task_struct::mm checks in commit: df3fb9682045 ("arm64: fpsimd: Eliminate task->mm checks") ... but we missed the check in arch_dup_task_struct(), which is similarly redundant. Remove the redundant check from arch_dup_task_struct(). Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250508132644.1395904-11-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 42faebb7b712..885c1adcf54c 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -344,8 +344,7 @@ void arch_release_task_struct(struct task_struct *tsk) int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - if (current->mm) - fpsimd_preserve_current_state(); + fpsimd_preserve_current_state(); *dst = *src; /* -- cgit v1.2.3 From e0cb0f26594c644c71ee7f48ebaae6b26bf56a12 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:31 +0100 Subject: arm64/fpsimd: Consistently preserve FPSIMD state during clone() In arch_dup_task_struct() we try to ensure that the child task inherits the FPSIMD state of its parent, but this depends on the parent task's saved state being in FPSIMD format, which is not always the case. Consequently the child task may inherit stale FPSIMD state in some cases. This can happen when the parent's state has been modified by ptrace since syscall entry, as writes to the NT_ARM_SVE regset may save state in SVE format. This has been possible since commit: bc0ee4760364 ("arm64/sve: Core task context handling") More recently it has been possible for a task's FPSIMD/SVE state to be saved before lazy discarding was guaranteed to occur, in which case preemption could cause the effective FPSIMD state to be saved in SVE format non-deterministically. This has been possible since commit: f130ac0ae441 ("arm64: syscall: unmask DAIF earlier for SVCs") Fix this by saving the parent task's effective FPSIMD state into FPSIMD format before copying the task_struct. As this requires modifying the parent's fpsimd_state, we must save+flush the state to avoid racing with concurrent manipulation. Similar issues exist when the parent has streaming mode state, and will be addressed by subsequent patches. Fixes: bc0ee4760364 ("arm64/sve: Core task context handling") Fixes: f130ac0ae441 ("arm64: syscall: unmask DAIF earlier for SVCs") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250508132644.1395904-12-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 885c1adcf54c..3bb7f65bf7b7 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -344,7 +344,14 @@ void arch_release_task_struct(struct task_struct *tsk) int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - fpsimd_preserve_current_state(); + /* + * The current/src task's FPSIMD state may or may not be live, and may + * have been altered by ptrace after entry to the kernel. Save the + * effective FPSIMD state so that this will be copied into dst. + */ + fpsimd_save_and_flush_current_state(); + fpsimd_sync_from_effective_state(src); + *dst = *src; /* -- cgit v1.2.3 From a6d066f705747124fb2d662df0acbb45ffe6c406 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:32 +0100 Subject: arm64/fpsimd: Clear PSTATE.SM during clone() Currently arch_dup_task_struct() doesn't handle cases where the parent task has PSTATE.SM==1. Since syscall entry exits streaming mode, the parent will usually have PSTATE.SM==0, but this can be change by ptrace after syscall entry. When this happens, arch_dup_task_struct() will initialise the new task into an invalid state. The new task inherits the parent's configuration of PSTATE.SM, but fp_type is set to FP_STATE_FPSIMD, TIF_SVE and SME may be cleared, and both sve_state and sme_state may be set to NULL. This can result in a variety of problems whenever the new task's state is manipulated, including kernel NULL pointer dereferences and leaking of streaming mode state between tasks. When ptrace is not involved, the parent will have PSTATE.SM==0 as a result of syscall entry, and the documentation in Documentation/arch/arm64/sme.rst says: | On process creation (eg, clone()) the newly created process will have | PSTATE.SM cleared. ... so make this true by using task_smstop_sm() to exit streaming mode in the child task, avoiding the problems above. Fixes: 8bd7f91c03d8 ("arm64/sme: Implement traps and syscall handling for SME") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250508132644.1395904-13-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 3bb7f65bf7b7..27a5b0c7ec60 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -355,16 +355,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) *dst = *src; /* - * Detach src's sve_state (if any) from dst so that it does not - * get erroneously used or freed prematurely. dst's copies - * will be allocated on demand later on if dst uses SVE. - * For consistency, also clear TIF_SVE here: this could be done - * later in copy_process(), but to avoid tripping up future - * maintainers it is best not to leave TIF flags and buffers in - * an inconsistent state, even temporarily. + * Drop stale reference to src's sve_state and convert dst to + * non-streaming FPSIMD mode. */ + dst->thread.fp_type = FP_STATE_FPSIMD; dst->thread.sve_state = NULL; clear_tsk_thread_flag(dst, TIF_SVE); + task_smstop_sm(dst); /* * In the unlikely event that we create a new thread with ZA @@ -393,8 +390,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) clear_tsk_thread_flag(dst, TIF_SME); } - dst->thread.fp_type = FP_STATE_FPSIMD; - /* clear any pending asynchronous tag fault raised by the parent */ clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT); -- cgit v1.2.3 From cde5c32db55740659fca6d56c09b88800d88fd29 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:33 +0100 Subject: arm64/fpsimd: Make clone() compatible with ZA lazy saving Linux is intended to be compatible with userspace written to Arm's AAPCS64 procedure call standard [1,2]. For the Scalable Matrix Extension (SME), AAPCS64 was extended with a "ZA lazy saving scheme", where SME's ZA tile is lazily callee-saved and caller-restored. In this scheme, TPIDR2_EL0 indicates whether the ZA tile is live or has been saved by pointing to a "TPIDR2 block" in memory, which has a "za_save_buffer" pointer. This scheme has been implemented in GCC and LLVM, with necessary runtime support implemented in glibc and bionic. AAPCS64 does not specify how the ZA lazy saving scheme is expected to interact with thread creation mechanisms such as fork() and pthread_create(), which would be implemented in terms of the Linux clone syscall. The behaviour implemented by Linux and glibc/bionic doesn't always compose safely, as explained below. Currently the clone syscall is implemented such that PSTATE.ZA and the ZA tile are always inherited by the new task, and TPIDR2_EL0 is inherited unless the 'flags' argument includes CLONE_SETTLS, in which case TPIDR2_EL0 is set to 0/NULL. This doesn't make much sense: (a) TPIDR2_EL0 is part of the calling convention, and changes as control is passed between functions. It is *NOT* used for thread local storage, despite superficial similarity to TPIDR_EL0, which is is used as the TLS register. (b) TPIDR2_EL0 and PSTATE.ZA are tightly coupled in the procedure call standard, and some combinations of states are illegal. In general, manipulating the two independently is not guaranteed to be safe. In practice, code which is compliant with the procedure call standard may issue a clone syscall while in the "ZA dormant" state, where PSTATE.ZA==1 and TPIDR2_EL0 is non-null and indicates that ZA needs to be saved. This can cause a variety of problems, including: * If the implementation of pthread_create() passes CLONE_SETTLS, the new thread will start with PSTATE.ZA==1 and TPIDR2==NULL. Per the procedure call standard this is not a legitimate state for most functions. This can cause data corruption (e.g. as code may rely on PSTATE.ZA being 0 to guarantee that an SMSTART ZA instruction will zero the ZA tile contents), and may result in other undefined behaviour. * If the implementation of pthread_create() does not pass CLONE_SETTLS, the new thread will start with PSTATE.ZA==1 and TPIDR2 pointing to a TPIDR2 block on the parent thread's stack. This can result in a variety of problems, e.g. - The child may write back to the parent's za_save_buffer, corrupting its contents. - The child may read from the TPIDR2 block after the parent has reused this memory for something else, and consequently the child may abort or clobber arbitrary memory. Ideally we'd require that userspace ensures that a task is in the "ZA off" state (with PSTATE.ZA==0 and TPIDR2_EL0==NULL) prior to issuing a clone syscall, and have the kernel force this state for new threads. Unfortunately, contemporary C libraries do not do this, and simply forcing this state within the implementation of clone would break fork(). Instead, we can bodge around this by considering the CLONE_VM flag, and manipulate PSTATE.ZA and TPIDR2_EL0 as a pair. CLONE_VM indicates that the new task will run in the same address space as its parent, and in that case it doesn't make sense to inherit a stale pointer to the parent's TPIDR2 block: * For fork(), CLONE_VM will not be set, and it is safe to inherit both PSTATE.ZA and TPIDR2_EL0 as the new task will have its own copy of the address space, and cannot clobber its parent's stack. * For pthread_create() and vfork(), CLONE_VM will be set, and discarding PSTATE.ZA and TPIDR2_EL0 for the new task doesn't break any existing assumptions in userspace. Implement this behaviour for clone(). We currently inherit PSTATE.ZA in arch_dup_task_struct(), but this does not have access to the clone flags, so move this logic under copy_thread(). Documentation is updated to describe the new behaviour. [1] https://github.com/ARM-software/abi-aa/releases/download/2025Q1/aapcs64.pdf [2] https://github.com/ARM-software/abi-aa/blob/c51addc3dc03e73a016a1e4edf25440bcac76431/aapcs64/aapcs64.rst Suggested-by: Catalin Marinas Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Daniel Kiss Cc: Marc Zyngier Cc: Mark Brown Cc: Richard Sandiford Cc: Sander De Smalen Cc: Tamas Petz Cc: Will Deacon Cc: Yury Khrustalev Acked-by: Yury Khrustalev Link: https://lore.kernel.org/r/20250508132644.1395904-14-mark.rutland@arm.com Signed-off-by: Will Deacon --- Documentation/arch/arm64/sme.rst | 4 +- arch/arm64/kernel/process.c | 92 +++++++++++++++++++++++++++------------- 2 files changed, 64 insertions(+), 32 deletions(-) diff --git a/Documentation/arch/arm64/sme.rst b/Documentation/arch/arm64/sme.rst index 3a98aed92732..1c1e48d8bd1a 100644 --- a/Documentation/arch/arm64/sme.rst +++ b/Documentation/arch/arm64/sme.rst @@ -69,8 +69,8 @@ model features for SME is included in Appendix A. vectors from 0 to VL/8-1 stored in the same endianness invariant format as is used for SVE vectors. -* On thread creation TPIDR2_EL0 is preserved unless CLONE_SETTLS is specified, - in which case it is set to 0. +* On thread creation PSTATE.ZA and TPIDR2_EL0 are preserved unless CLONE_VM + is specified, in which case PSTATE.ZA is set to 0 and TPIDR2_EL0 is set to 0. 2. Vector lengths ------------------ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 27a5b0c7ec60..74bee78cc3fa 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -364,31 +364,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) task_smstop_sm(dst); /* - * In the unlikely event that we create a new thread with ZA - * enabled we should retain the ZA and ZT state so duplicate - * it here. This may be shortly freed if we exec() or if - * CLONE_SETTLS but it's simpler to do it here. To avoid - * confusing the rest of the code ensure that we have a - * sve_state allocated whenever sme_state is allocated. + * Drop stale reference to src's sme_state and ensure dst has ZA + * disabled. + * + * When necessary, ZA will be inherited later in copy_thread_za(). */ - if (thread_za_enabled(&src->thread)) { - dst->thread.sve_state = kzalloc(sve_state_size(src), - GFP_KERNEL); - if (!dst->thread.sve_state) - return -ENOMEM; - - dst->thread.sme_state = kmemdup(src->thread.sme_state, - sme_state_size(src), - GFP_KERNEL); - if (!dst->thread.sme_state) { - kfree(dst->thread.sve_state); - dst->thread.sve_state = NULL; - return -ENOMEM; - } - } else { - dst->thread.sme_state = NULL; - clear_tsk_thread_flag(dst, TIF_SME); - } + dst->thread.sme_state = NULL; + clear_tsk_thread_flag(dst, TIF_SME); + dst->thread.svcr &= ~SVCR_ZA_MASK; /* clear any pending asynchronous tag fault raised by the parent */ clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT); @@ -396,6 +379,31 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } +static int copy_thread_za(struct task_struct *dst, struct task_struct *src) +{ + if (!thread_za_enabled(&src->thread)) + return 0; + + dst->thread.sve_state = kzalloc(sve_state_size(src), + GFP_KERNEL); + if (!dst->thread.sve_state) + return -ENOMEM; + + dst->thread.sme_state = kmemdup(src->thread.sme_state, + sme_state_size(src), + GFP_KERNEL); + if (!dst->thread.sme_state) { + kfree(dst->thread.sve_state); + dst->thread.sve_state = NULL; + return -ENOMEM; + } + + set_tsk_thread_flag(dst, TIF_SME); + dst->thread.svcr |= SVCR_ZA_MASK; + + return 0; +} + asmlinkage void ret_from_fork(void) asm("ret_from_fork"); int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) @@ -428,8 +436,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) * out-of-sync with the saved value. */ *task_user_tls(p) = read_sysreg(tpidr_el0); - if (system_supports_tpidr2()) - p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); if (system_supports_poe()) p->thread.por_el0 = read_sysreg_s(SYS_POR_EL0); @@ -441,14 +447,40 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) childregs->sp = stack_start; } + /* + * Due to the AAPCS64 "ZA lazy saving scheme", PSTATE.ZA and + * TPIDR2 need to be manipulated as a pair, and either both + * need to be inherited or both need to be reset. + * + * Within a process, child threads must not inherit their + * parent's TPIDR2 value or they may clobber their parent's + * stack at some later point. + * + * When a process is fork()'d, the child must inherit ZA and + * TPIDR2 from its parent in case there was dormant ZA state. + * + * Use CLONE_VM to determine when the child will share the + * address space with the parent, and cannot safely inherit the + * state. + */ + if (system_supports_sme()) { + if (!(clone_flags & CLONE_VM)) { + p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); + ret = copy_thread_za(p, current); + if (ret) + return ret; + } else { + p->thread.tpidr2_el0 = 0; + WARN_ON_ONCE(p->thread.svcr & SVCR_ZA_MASK); + } + } + /* * If a TLS pointer was passed to clone, use it for the new - * thread. We also reset TPIDR2 if it's in use. + * thread. */ - if (clone_flags & CLONE_SETTLS) { + if (clone_flags & CLONE_SETTLS) p->thread.uw.tp_value = tls; - p->thread.tpidr2_el0 = 0; - } ret = copy_thread_gcs(p, args); if (ret != 0) -- cgit v1.2.3 From 49ce484187f72a94c202348179a9a4e63a0f864b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:34 +0100 Subject: arm64/fpsimd: ptrace/prctl: Ensure VL changes do not resurrect stale data The SVE/SME vector lengths can be changed via prctl/ptrace syscalls. Changes to the SVE/SME vector lengths are documented as preserving the lower 128 bits of the Z registers (i.e. the bits shared with the FPSIMD V registers). To ensure this, vec_set_vector_length() explicitly copies register values from a task's saved SVE state to its saved FPSIMD state when dropping the task to FPSIMD-only. The logic for this was not updated when when FPSIMD/SVE state tracking was changed across commits: baa8515281b3 ("arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE") a0136be443d5 (arm64/fpsimd: Load FP state based on recorded data type") bbc6172eefdb ("arm64/fpsimd: SME no longer requires SVE register state") 8c845e273104 ("arm64/sve: Leave SVE enabled on syscall if we don't context switch") Since the last commit above, a task's FPSIMD/SVE state may be stored in FPSIMD format while TIF_SVE is set, and the stored SVE state is stale. When vec_set_vector_length() encounters this case, it will erroneously clobber the live FPSIMD state with stale SVE state by using sve_to_fpsimd(). Fix this by using fpsimd_sync_from_effective_state() instead. Related issues with streaming mode state will be addressed in subsequent patches. Fixes: 8c845e273104 ("arm64/sve: Leave SVE enabled on syscall if we don't context switch") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: David Spickett Cc: Luis Machado Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250508132644.1395904-15-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index fe96e018e18c..faeedaab0558 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -852,7 +852,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, fpsimd_flush_task_state(task); if (test_and_clear_tsk_thread_flag(task, TIF_SVE) || thread_sm_enabled(&task->thread)) { - sve_to_fpsimd(task); + fpsimd_sync_from_effective_state(task); task->thread.fp_type = FP_STATE_FPSIMD; } -- cgit v1.2.3 From b87c8c4aca1163ae4791507089007863e9c3ca1b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:35 +0100 Subject: arm64/fpsimd: ptrace/prctl: Ensure VL changes leave task in a valid state Currently, vec_set_vector_length() can manipulate a task into an invalid state as a result of a prctl/ptrace syscall which changes the SVE/SME vector length, resulting in several problems: (1) When changing the SVE vector length, if the task initially has PSTATE.ZA==1, and sve_alloc() fails to allocate memory, the task will be left with PSTATE.ZA==1 and sve_state==NULL. This is not a legitimate state, and could result in a subsequent null pointer dereference. (2) When changing the SVE vector length, if the task initially has PSTATE.SM==1, the task will be left with PSTATE.SM==1 and fp_type==FP_STATE_FPSIMD. Streaming mode state always needs to be saved in SVE format, so this is not a legitimate state. Attempting to restore this state may cause a task to erroneously inherit stale streaming mode predicate registers and FFR contents, behaving non-deterministically and potentially leaving information from another task. While in this state, reads of the NT_ARM_SSVE regset will indicate that the registers are not stored in SVE format. For the NT_ARM_SSVE regset specifically, debuggers interpret this as meaning that PSTATE.SM==0. (3) When changing the SME vector length, if the task initially has PSTATE.SM==1, the lower 128 bits of task's streaming mode vector state will be migrated to non-streaming mode, rather than these bits being zeroed as is usually the case for changes to PSTATE.SM. To fix the first issue, we can eagerly allocate the new sve_state and sme_state before modifying the task. This makes it possible to handle memory allocation failure without modifying the task state at all, and removes the need to clear TIF_SVE and TIF_SME. To fix the second issue, we either need to clear PSTATE.SM or not change the saved fp_type. Given we're going to eagerly allocate sve_state and sme_state, the simplest option is to preserve PSTATE.SM and the saves fp_type, and consistently truncate the SVE state. This ensures that the task always stays in a valid state, and by virtue of not exiting streaming mode, this also sidesteps the third issue. I believe these changes should not be problematic for realistic usage: * When the SVE/SME vector length is changed via prctl(), syscall entry will have cleared PSTATE.SM. Unless the task's state has been manipulated via ptrace after entry, the task will have PSTATE.SM==0. * When the SVE/SME vector length is changed via a write to the NT_ARM_SVE or NT_ARM_SSVE regsets, PSTATE.SM will be forced immediately after the length change, and new vector state will be copied from userspace. * When the SME vector length is changed via a write to the NT_ARM_ZA regset, the (S)SVE state is clobbered today, so anyone who cares about the specific state would need to install this after writing to the NT_ARM_ZA regset. As we need to free the old SVE state while TIF_SVE may still be set, we cannot use sve_free(), and using kfree() directly makes it clear that the free pairs with the subsequent assignment. As this leaves sve_free() unused, I've removed the existing sve_free() and renamed __sve_free() to mirror sme_free(). Fixes: 8bd7f91c03d8 ("arm64/sme: Implement traps and syscall handling for SME") Fixes: baa8515281b3 ("arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: David Spickett Cc: Luis Machado Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250508132644.1395904-16-mark.rutland@arm.com Signed-off-by: Will Deacon --- Documentation/arch/arm64/sme.rst | 2 +- arch/arm64/kernel/fpsimd.c | 137 ++++++++++++++++++++------------------- 2 files changed, 73 insertions(+), 66 deletions(-) diff --git a/Documentation/arch/arm64/sme.rst b/Documentation/arch/arm64/sme.rst index 1c1e48d8bd1a..4cb38330e704 100644 --- a/Documentation/arch/arm64/sme.rst +++ b/Documentation/arch/arm64/sme.rst @@ -241,7 +241,7 @@ prctl(PR_SME_SET_VL, unsigned long arg) length, or calling PR_SME_SET_VL with the PR_SME_SET_VL_ONEXEC flag, does not constitute a change to the vector length for this purpose. - * Changing the vector length causes PSTATE.ZA and PSTATE.SM to be cleared. + * Changing the vector length causes PSTATE.ZA to be cleared. Calling PR_SME_SET_VL with vl equal to the thread's current vector length, or calling PR_SME_SET_VL with the PR_SME_SET_VL_ONEXEC flag, does not constitute a change to the vector length for this purpose. diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index faeedaab0558..9c0d1068e7f2 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -724,23 +724,12 @@ void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p) } #ifdef CONFIG_ARM64_SVE -/* - * Call __sve_free() directly only if you know task can't be scheduled - * or preempted. - */ -static void __sve_free(struct task_struct *task) +static void sve_free(struct task_struct *task) { kfree(task->thread.sve_state); task->thread.sve_state = NULL; } -static void sve_free(struct task_struct *task) -{ - WARN_ON(test_tsk_thread_flag(task, TIF_SVE)); - - __sve_free(task); -} - /* * Ensure that task->thread.sve_state is allocated and sufficiently large. * @@ -801,10 +790,73 @@ void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task) __fpsimd_to_sve(sst, fst, vq); } +static int change_live_vector_length(struct task_struct *task, + enum vec_type type, + unsigned long vl) +{ + unsigned int sve_vl = task_get_sve_vl(task); + unsigned int sme_vl = task_get_sme_vl(task); + void *sve_state = NULL, *sme_state = NULL; + + if (type == ARM64_VEC_SME) + sme_vl = vl; + else + sve_vl = vl; + + /* + * Allocate the new sve_state and sme_state before freeing the old + * copies so that allocation failure can be handled without needing to + * mutate the task's state in any way. + * + * Changes to the SVE vector length must not discard live ZA state or + * clear PSTATE.ZA, as userspace code which is unaware of the AAPCS64 + * ZA lazy saving scheme may attempt to change the SVE vector length + * while unsaved/dormant ZA state exists. + */ + sve_state = kzalloc(__sve_state_size(sve_vl, sme_vl), GFP_KERNEL); + if (!sve_state) + goto out_mem; + + if (type == ARM64_VEC_SME) { + sme_state = kzalloc(__sme_state_size(sme_vl), GFP_KERNEL); + if (!sme_state) + goto out_mem; + } + + if (task == current) + fpsimd_save_and_flush_current_state(); + else + fpsimd_flush_task_state(task); + + /* + * Always preserve PSTATE.SM and the effective FPSIMD state, zeroing + * other SVE state. + */ + fpsimd_sync_from_effective_state(task); + task_set_vl(task, type, vl); + kfree(task->thread.sve_state); + task->thread.sve_state = sve_state; + fpsimd_sync_to_effective_state_zeropad(task); + + if (type == ARM64_VEC_SME) { + task->thread.svcr &= ~SVCR_ZA_MASK; + kfree(task->thread.sme_state); + task->thread.sme_state = sme_state; + } + + return 0; + +out_mem: + kfree(sve_state); + kfree(sme_state); + return -ENOMEM; +} + int vec_set_vector_length(struct task_struct *task, enum vec_type type, unsigned long vl, unsigned long flags) { - bool free_sme = false; + bool onexec = flags & PR_SVE_SET_VL_ONEXEC; + bool inherit = flags & PR_SVE_VL_INHERIT; if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT | PR_SVE_SET_VL_ONEXEC)) @@ -824,62 +876,17 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, vl = find_supported_vector_length(type, vl); - if (flags & (PR_SVE_VL_INHERIT | - PR_SVE_SET_VL_ONEXEC)) + if (!onexec && vl != task_get_vl(task, type)) { + if (change_live_vector_length(task, type, vl)) + return -ENOMEM; + } + + if (onexec || inherit) task_set_vl_onexec(task, type, vl); else /* Reset VL to system default on next exec: */ task_set_vl_onexec(task, type, 0); - /* Only actually set the VL if not deferred: */ - if (flags & PR_SVE_SET_VL_ONEXEC) - goto out; - - if (vl == task_get_vl(task, type)) - goto out; - - /* - * To ensure the FPSIMD bits of the SVE vector registers are preserved, - * write any live register state back to task_struct, and convert to a - * regular FPSIMD thread. - */ - if (task == current) { - get_cpu_fpsimd_context(); - - fpsimd_save_user_state(); - } - - fpsimd_flush_task_state(task); - if (test_and_clear_tsk_thread_flag(task, TIF_SVE) || - thread_sm_enabled(&task->thread)) { - fpsimd_sync_from_effective_state(task); - task->thread.fp_type = FP_STATE_FPSIMD; - } - - if (system_supports_sme() && type == ARM64_VEC_SME) { - task->thread.svcr &= ~(SVCR_SM_MASK | SVCR_ZA_MASK); - clear_tsk_thread_flag(task, TIF_SME); - free_sme = true; - } - - if (task == current) - put_cpu_fpsimd_context(); - - task_set_vl(task, type, vl); - - /* - * Free the changed states if they are not in use, SME will be - * reallocated to the correct size on next use and we just - * allocate SVE now in case it is needed for use in streaming - * mode. - */ - sve_free(task); - sve_alloc(task, true); - - if (free_sme) - sme_free(task); - -out: update_tsk_thread_flag(task, vec_vl_inherit_flag(type), flags & PR_SVE_VL_INHERIT); @@ -1175,7 +1182,7 @@ void __init sve_setup(void) */ void fpsimd_release_task(struct task_struct *dead_task) { - __sve_free(dead_task); + sve_free(dead_task); sme_free(dead_task); } -- cgit v1.2.3 From 054d627c5554bdd38228174b275d62113124e3ad Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:36 +0100 Subject: arm64/fpsimd: ptrace: Save task state before generating SVE header As sve_init_header_from_task() consumes the saved value of PSTATE.SM and the saved fp_type, both must be saved before the header is generated. When generating a coredump for the current task, sve_get_common() calls sve_init_header_from_task() before saving the task's state. Consequently the header may be bogus, and the contents of the regset may be misleading. Fix this by saving the task's state before generting the header. Fixes: e12310a0d30f ("arm64/sme: Implement ptrace support for streaming mode SVE registers") Fixes: b017a0cea627 ("arm64/ptrace: Use saved floating point state type to determine SVE layout") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: David Spickett Cc: Luis Machado Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250508132644.1395904-17-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index bdba106a4cf2..67f3843de51f 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -820,15 +820,15 @@ static int sve_get_common(struct task_struct *target, unsigned int vq; unsigned long start, end; + if (target == current) + fpsimd_preserve_current_state(); + /* Header */ sve_init_header_from_task(&header, target, type); vq = sve_vq_from_vl(header.vl); membuf_write(&to, &header, sizeof(header)); - if (target == current) - fpsimd_preserve_current_state(); - BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header)); BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); -- cgit v1.2.3 From b93e685ecff77e0b231c12802fb632ef36a62140 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:37 +0100 Subject: arm64/fpsimd: ptrace: Do not present register data for inactive mode The SME ptrace ABI is written around the incorrect assumption that SVE_PT_REGS_FPSIMD and SVE_PT_REGS_SVE are independent bit flags, where it is possible for both to be clear. In reality they are different values for bit 0 of the header flags, where SVE_PT_REGS_FPSIMD is 0 and SVE_PT_REGS_SVE is 1. In cases where code was written expecting that neither bit flag would be set, the value is equivalent to SVE_PT_REGS_FPSIMD. One consequence of this is that reads of the NT_ARM_SVE or NT_ARM_SSVE will erroneously present data from the other mode: * When PSTATE.SM==1, reads of NT_ARM_SVE will present a header with SVE_PT_REGS_FPSIMD, and FPSIMD-formatted data from streaming mode. * When PSTATE.SM==0, reads of NT_ARM_SSVE will present a header with SVE_PT_REGS_FPSIMD, and FPSIMD-formatted data from non-streaming mode. The original intent was that no register data would be provided in these cases, as described in commit: e12310a0d30f ("arm64/sme: Implement ptrace support for streaming mode SVE registers") Luckily, debuggers do not consume the bogus register data. Both GDB and LLDB read the NT_ARM_SSVE regset before the NT_ARM_SVE regset, and assume that when the NT_ARM_SSVE header presents SVE_PT_REGS_FPSIMD, it is necessary to read register contents from the NT_ARM_SVE regset, regardless of whether the NT_ARM_SSVE regset provided bogus register data. Fix the code to stop presenting register data from the inactive mode. At the same time, make the manipulation of the flag clearer, and remove the bogus comment from sve_set_common(). I've given this a quick spin with GDB and LLDB, and both seem happy. Fixes: e12310a0d30f ("arm64/sme: Implement ptrace support for streaming mode SVE registers") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: David Spickett Cc: Luis Machado Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250508132644.1395904-18-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 67f3843de51f..a2075e1df27c 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -775,6 +775,11 @@ static void sve_init_header_from_task(struct user_sve_header *header, task_type = ARM64_VEC_SVE; active = (task_type == type); + if (active && target->thread.fp_type == FP_STATE_SVE) + header->flags = SVE_PT_REGS_SVE; + else + header->flags = SVE_PT_REGS_FPSIMD; + switch (type) { case ARM64_VEC_SVE: if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) @@ -789,19 +794,14 @@ static void sve_init_header_from_task(struct user_sve_header *header, return; } - if (active) { - if (target->thread.fp_type == FP_STATE_FPSIMD) { - header->flags |= SVE_PT_REGS_FPSIMD; - } else { - header->flags |= SVE_PT_REGS_SVE; - } - } - header->vl = task_get_vl(target, type); vq = sve_vq_from_vl(header->vl); header->max_vl = vec_max_vl(type); - header->size = SVE_PT_SIZE(vq, header->flags); + if (active) + header->size = SVE_PT_SIZE(vq, header->flags); + else + header->size = sizeof(header); header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl), SVE_PT_REGS_SVE); } @@ -832,6 +832,13 @@ static int sve_get_common(struct task_struct *target, BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header)); BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); + /* + * When the requested vector type is not active, do not present data + * from the other mode to userspace. + */ + if (header.size == sizeof(header)) + return 0; + switch ((header.flags & SVE_PT_REGS_MASK)) { case SVE_PT_REGS_FPSIMD: return __fpr_get(target, regset, to); @@ -859,7 +866,7 @@ static int sve_get_common(struct task_struct *target, return membuf_zero(&to, end - start); default: - return 0; + BUILD_BUG(); } } @@ -946,10 +953,7 @@ static int sve_set_common(struct task_struct *target, goto out; } - /* - * Otherwise: no registers or full SVE case. For backwards - * compatibility reasons we treat empty flags as SVE registers. - */ + /* Otherwise: no registers or full SVE case. */ /* * If setting a different VL from the requested VL and there is -- cgit v1.2.3 From f916dd32a943a7ab40497718aa7bcf3648d2bb39 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:38 +0100 Subject: arm64/fpsimd: ptrace: Mandate SVE payload for streaming-mode state When a task has PSTATE.SM==1, reads of NT_ARM_SSVE are required to always present a header with SVE_PT_REGS_SVE, and register data in SVE format. Reads of NT_ARM_SSVE must never present register data in FPSIMD format. Within the kernel, we always expect streaming SVE data to be stored in SVE format. Currently a user can write to NT_ARM_SSVE with a header presenting SVE_PT_REGS_FPSIMD rather than SVE_PT_REGS_SVE, placing the task's FPSIMD/SVE data into an invalid state. To fix this we can either: (a) Forbid such writes. (b) Accept such writes, and immediately convert data into SVE format. Take the simple option and forbid such writes. Fixes: e12310a0d30f ("arm64/sme: Implement ptrace support for streaming mode SVE registers") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: David Spickett Cc: Luis Machado Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250508132644.1395904-19-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index a2075e1df27c..cff72b420eab 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -890,6 +890,7 @@ static int sve_set_common(struct task_struct *target, struct user_sve_header header; unsigned int vq; unsigned long start, end; + bool fpsimd; /* Header */ if (count < sizeof(header)) @@ -899,6 +900,15 @@ static int sve_set_common(struct task_struct *target, if (ret) goto out; + /* + * Streaming SVE data is always stored and presented in SVE format. + * Require the user to provide SVE formatted data for consistency, and + * to avoid the risk that we configure the task into an invalid state. + */ + fpsimd = (header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD; + if (fpsimd && type == ARM64_VEC_SME) + return -EINVAL; + /* * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are consumed by * vec_set_vector_length(), which will also validate them for us: @@ -945,7 +955,7 @@ static int sve_set_common(struct task_struct *target, /* Registers: FPSIMD-only case */ BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header)); - if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) { + if (fpsimd) { clear_tsk_thread_flag(target, TIF_SVE); target->thread.fp_type = FP_STATE_FPSIMD; ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, -- cgit v1.2.3 From 9f8bf718f29230e38a048d08fc3063e316cd60c1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:39 +0100 Subject: arm64/fpsimd: ptrace: Gracefully handle errors Within sve_set_common() we do not handle error conditions correctly: * When writing to NT_ARM_SSVE, if sme_alloc() fails, the task will be left with task->thread.sme_state==NULL, but TIF_SME will be set and task->thread.fp_type==FP_STATE_SVE. This will result in a subsequent null pointer dereference when the task's state is loaded or otherwise manipulated. * When writing to NT_ARM_SSVE, if sve_alloc() fails, the task will be left with task->thread.sve_state==NULL, but TIF_SME will be set, PSTATE.SM will be set, and task->thread.fp_type==FP_STATE_FPSIMD. This is not a legitimate state, and can result in various problems, including a subsequent null pointer dereference and/or the task inheriting stale streaming mode register state the next time its state is loaded into hardware. * When writing to NT_ARM_SSVE, if the VL is changed but the resulting VL differs from that in the header, the task will be left with TIF_SME set, PSTATE.SM set, but task->thread.fp_type==FP_STATE_FPSIMD. This is not a legitimate state, and can result in various problems as described above. Avoid these problems by allocating memory earlier, and by changing the task's saved fp_type to FP_STATE_SVE before skipping register writes due to a change of VL. To make early returns simpler, I've moved the call to fpsimd_flush_task_state() earlier. As the tracee's state has already been saved, and the tracee is known to be blocked for the duration of sve_set_common(), it doesn't matter whether this is called at the start or the end. For consistency I've moved the setting of TIF_SVE earlier. This will be cleared when loading FPSIMD-only state, and so moving this has no resulting functional change. Note that we only allocate the memory for SVE state when SVE register contents are provided, avoiding unnecessary memory allocations for tasks which only use FPSIMD. Fixes: e12310a0d30f ("arm64/sme: Implement ptrace support for streaming mode SVE registers") Fixes: baa8515281b3 ("arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE") Fixes: 5d0a8d2fba50 ("arm64/ptrace: Ensure that SME is set up for target when writing SSVE state") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: David Spickett Cc: Luis Machado Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250508132644.1395904-20-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 61 ++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 35 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index cff72b420eab..a360e52db02f 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -892,13 +892,15 @@ static int sve_set_common(struct task_struct *target, unsigned long start, end; bool fpsimd; + fpsimd_flush_task_state(target); + /* Header */ if (count < sizeof(header)) return -EINVAL; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header, 0, sizeof(header)); if (ret) - goto out; + return ret; /* * Streaming SVE data is always stored and presented in SVE format. @@ -916,7 +918,21 @@ static int sve_set_common(struct task_struct *target, ret = vec_set_vector_length(target, type, header.vl, ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16); if (ret) - goto out; + return ret; + + /* Allocate SME storage if necessary, preserving any existing ZA/ZT state */ + if (type == ARM64_VEC_SME) { + sme_alloc(target, false); + if (!target->thread.sme_state) + return -ENOMEM; + } + + /* Allocate SVE storage if necessary, zeroing any existing SVE state */ + if (!fpsimd) { + sve_alloc(target, true); + if (!target->thread.sve_state) + return -ENOMEM; + } /* * Actual VL set may be different from what the user asked @@ -930,21 +946,15 @@ static int sve_set_common(struct task_struct *target, switch (type) { case ARM64_VEC_SVE: target->thread.svcr &= ~SVCR_SM_MASK; + set_tsk_thread_flag(target, TIF_SVE); break; case ARM64_VEC_SME: target->thread.svcr |= SVCR_SM_MASK; - - /* - * Disable traps and ensure there is SME storage but - * preserve any currently set values in ZA/ZT. - */ - sme_alloc(target, false); set_tsk_thread_flag(target, TIF_SME); break; default: WARN_ON_ONCE(1); - ret = -EINVAL; - goto out; + return -EINVAL; } } @@ -960,37 +970,20 @@ static int sve_set_common(struct task_struct *target, target->thread.fp_type = FP_STATE_FPSIMD; ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, SVE_PT_FPSIMD_OFFSET); - goto out; + return ret; } /* Otherwise: no registers or full SVE case. */ + target->thread.fp_type = FP_STATE_SVE; + /* * If setting a different VL from the requested VL and there is * register data, the data layout will be wrong: don't even * try to set the registers in this case. */ - if (count && vq != sve_vq_from_vl(header.vl)) { - ret = -EIO; - goto out; - } - - /* Always zero SVE state */ - sve_alloc(target, true); - if (!target->thread.sve_state) { - ret = -ENOMEM; - clear_tsk_thread_flag(target, TIF_SVE); - target->thread.fp_type = FP_STATE_FPSIMD; - goto out; - } - - /* - * Only enable SVE if we are configuring normal SVE, a system with - * streaming SVE may not have normal SVE. - */ - if (type == ARM64_VEC_SVE) - set_tsk_thread_flag(target, TIF_SVE); - target->thread.fp_type = FP_STATE_SVE; + if (count && vq != sve_vq_from_vl(header.vl)) + return -EIO; BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); start = SVE_PT_SVE_OFFSET; @@ -999,7 +992,7 @@ static int sve_set_common(struct task_struct *target, target->thread.sve_state, start, end); if (ret) - goto out; + return ret; start = end; end = SVE_PT_SVE_FPSR_OFFSET(vq); @@ -1015,8 +1008,6 @@ static int sve_set_common(struct task_struct *target, &target->thread.uw.fpsimd_state.fpsr, start, end); -out: - fpsimd_flush_task_state(target); return ret; } -- cgit v1.2.3 From 33c4618d0ac04139b737dcc0870b9dc3ed4dd170 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:40 +0100 Subject: arm64/fpsimd: Allow CONFIG_ARM64_SME to be selected Now that the known issues with SME have been addressed, allow SME to be selected. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Daniel Kiss Cc: David Spickett Cc: Fuad Tabba Cc: Luis Machado Cc: Marc Zyngier Cc: Mark Brown Cc: Richard Sandiford Cc: Sander De Smalen Cc: Tamas Petz Cc: Todd Kjos Cc: Will Deacon Cc: Yury Khrustalev Tested-By: Luis Machado Link: https://lore.kernel.org/r/20250508132644.1395904-21-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a182295e6f08..27437f13154e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2285,7 +2285,6 @@ config ARM64_SME bool "ARM Scalable Matrix Extension support" default y depends on ARM64_SVE - depends on BROKEN help The Scalable Matrix Extension (SME) is an extension to the AArch64 execution state which utilises a substantial subset of the SVE -- cgit v1.2.3