diff options
-rw-r--r-- | meta-arm-gcs/.gitignore | 3 | ||||
-rw-r--r-- | meta-arm-gcs/COPYING.MIT | 17 | ||||
-rw-r--r-- | meta-arm-gcs/README | 33 | ||||
-rw-r--r-- | meta-arm-gcs/conf/layer.conf | 13 | ||||
-rw-r--r-- | meta-arm-gcs/gcs.yml | 52 | ||||
-rw-r--r-- | meta-arm-gcs/recipes-bsp/trusted-firmware-a/trusted-firmware-a_%.bbappend | 1 | ||||
-rw-r--r-- | meta-arm-gcs/recipes-core/glibc/files/gcs.patch | 1668 | ||||
-rw-r--r-- | meta-arm-gcs/recipes-core/glibc/glibc_2.39.bbappend | 3 | ||||
-rw-r--r-- | meta-arm-gcs/recipes-devtools/binutils/binutils_2.42.bbappend | 3 | ||||
-rw-r--r-- | meta-arm-gcs/recipes-devtools/binutils/files/gcs.patch | 973 | ||||
-rw-r--r-- | meta-arm-gcs/recipes-devtools/gcc/files/gcs.patch | 1872 | ||||
-rw-r--r-- | meta-arm-gcs/recipes-devtools/gcc/gcc-source_14.0.bbappend | 3 | ||||
-rw-r--r-- | meta-arm-gcs/recipes-kernel/linux/files/gcs.patch | 7723 | ||||
-rw-r--r-- | meta-arm-gcs/recipes-kernel/linux/linux-yocto-dev.bbappend | 13 |
14 files changed, 12377 insertions, 0 deletions
diff --git a/meta-arm-gcs/.gitignore b/meta-arm-gcs/.gitignore new file mode 100644 index 00000000..340e1745 --- /dev/null +++ b/meta-arm-gcs/.gitignore @@ -0,0 +1,3 @@ +build +meta-arm +poky diff --git a/meta-arm-gcs/COPYING.MIT b/meta-arm-gcs/COPYING.MIT new file mode 100644 index 00000000..fb950dc6 --- /dev/null +++ b/meta-arm-gcs/COPYING.MIT @@ -0,0 +1,17 @@ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/meta-arm-gcs/README b/meta-arm-gcs/README new file mode 100644 index 00000000..74286a86 --- /dev/null +++ b/meta-arm-gcs/README @@ -0,0 +1,33 @@ +Guarded Control Stack prototype +=============================== + +To build: + +First, install Kas if you don't already have it: + +$ pip3 install --user kas + +Then use Kas to build an image: + +$ kas build --update gcs.yml + +To run the image inside an FVP: + +$ kas shell gcs.yml -c "../../scripts/runfvp --console" + + +Verification: + +On boot, the kernel should detect that the cores have GCS: + + CPU features: detected: Guarded Control Stack (GCS) + +Binaries should be tagged with PAC/BTI/GCS: + + $ readelf -n /bin/bash | grep AArch + Properties: AArch64 feature: BTI, PAC, GCS + +Enabling GCS tunables shouldn't cause crashes: + + $ GLIBC_TUNABLES=glibc.cpu.aarch64_gcs=1:glibc.cpu.aarch64_gcs_policy=2 ls / + bin boot dev etc home lib lost+found media mnt proc run sbin sys tmp usr var diff --git a/meta-arm-gcs/conf/layer.conf b/meta-arm-gcs/conf/layer.conf new file mode 100644 index 00000000..c0649cc5 --- /dev/null +++ b/meta-arm-gcs/conf/layer.conf @@ -0,0 +1,13 @@ +# We have a conf and classes directory, add to BBPATH +BBPATH .= ":${LAYERDIR}" + +# We have recipes-* directories, add to BBFILES +BBFILES += "${LAYERDIR}/recipes-*/*/*.bb \ + ${LAYERDIR}/recipes-*/*/*.bbappend" + +BBFILE_COLLECTIONS += "meta-arm-gcs" +BBFILE_PATTERN_meta-arm-gcs = "^${LAYERDIR}/" +BBFILE_PRIORITY_meta-arm-gcs = "5" + +LAYERDEPENDS_meta-arm-gcs = "core meta-arm-bsp" +LAYERSERIES_COMPAT_meta-arm-gcs = "scarthgap" diff --git a/meta-arm-gcs/gcs.yml b/meta-arm-gcs/gcs.yml new file mode 100644 index 00000000..e3d3d1c8 --- /dev/null +++ b/meta-arm-gcs/gcs.yml @@ -0,0 +1,52 @@ +header: + version: 14 + +distro: poky + +machine: fvp-base + +repos: + meta-arm: + layers: + meta-arm: + meta-arm-bsp: + meta-arm-toolchain: + meta-arm-gcs: + + poky: + url: https://git.yoctoproject.org/git/poky-contrib + branch: ross/gcc14 + layers: + meta: + meta-poky: + +local_conf_header: + setup: | + # We accept the FVP EULA + LICENSE_FLAGS_ACCEPTED += "Arm-FVP-EULA" + # Enable running an image inside a FVP + IMAGE_CLASSES += "fvpboot" + # Use the 6.8 kernel + PREFERRED_PROVIDER_virtual/kernel = "linux-yocto-dev" + # No root password for ease + EXTRA_IMAGE_FEATURES += "empty-root-password" + # Install a toolchain + EXTRA_IMAGE_FEATURES += "tools-sdk" + # Install a pre-generated SSH key because key generation in a FVP is tiresome + CORE_IMAGE_EXTRA_INSTALL += "ssh-pregen-hostkeys" + + fvp_options: | + # Set the cores to v9.4 and turn on more features + FVP_CONFIG[cluster0.has_arm_v9-4] = "1" + FVP_CONFIG[cluster1.has_arm_v9-4] = "1" + FVP_CONFIG[cluster0.has_branch_target_exception] = "1" + FVP_CONFIG[cluster1.has_branch_target_exception] = "1" + FVP_CONFIG[cluster0.has_gcs] = "1" + FVP_CONFIG[cluster1.has_gcs] = "1" + FVP_CONFIG[cluster0.has_chkfeat] = "1" + FVP_CONFIG[cluster1.has_chkfeat] = "1" + FVP_CONFIG[cluster0.has_permission_indirection_s1] = "1" + FVP_CONFIG[cluster1.has_permission_indirection_s1] = "1" + +target: + - core-image-full-cmdline diff --git a/meta-arm-gcs/recipes-bsp/trusted-firmware-a/trusted-firmware-a_%.bbappend b/meta-arm-gcs/recipes-bsp/trusted-firmware-a/trusted-firmware-a_%.bbappend new file mode 100644 index 00000000..e8304667 --- /dev/null +++ b/meta-arm-gcs/recipes-bsp/trusted-firmware-a/trusted-firmware-a_%.bbappend @@ -0,0 +1 @@ +EXTRA_OEMAKE:append:fvp-base = " CTX_INCLUDE_AARCH32_REGS=0" diff --git a/meta-arm-gcs/recipes-core/glibc/files/gcs.patch b/meta-arm-gcs/recipes-core/glibc/files/gcs.patch new file mode 100644 index 00000000..289246d2 --- /dev/null +++ b/meta-arm-gcs/recipes-core/glibc/files/gcs.patch @@ -0,0 +1,1668 @@ +Upstream-Status: Pending [https://sourceware.org/git/?p=glibc.git;a=shortlog;h=refs/heads/arm/gcs] +Signed-off-by: Ross Burton <ross.burton@arm.com> + +From ccb4e98a6cbdf5b8939a6ae416248bb1436a1338 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Thu, 13 Jul 2023 06:43:33 +0100 +Subject: [PATCH 01/21] aarch64: Add HWCAP2_GCS + +--- + sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h b/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h +index 91f0e312b9..476cf5a299 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h ++++ b/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h +@@ -100,3 +100,4 @@ + #define HWCAP2_SME_F16F16 (1UL << 42) + #define HWCAP2_MOPS (1UL << 43) + #define HWCAP2_HBC (1UL << 44) ++#define HWCAP2_GCS (1UL << 48) +-- +2.34.1 + + +From 15d1674cae644619d521af249b3a4f1c17cdcee9 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Wed, 22 Feb 2023 14:35:00 +0000 +Subject: [PATCH 02/21] aarch64: Add asm helpers for GCS + +The Guarded Control Stack instructions can be present even if the +hardware does not support the extension (runtime checked feature), +so the asm code should be backward compatible with old assemblers. +--- + sysdeps/aarch64/sysdep.h | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h +index 464674e89c..661d9fe8c1 100644 +--- a/sysdeps/aarch64/sysdep.h ++++ b/sysdeps/aarch64/sysdep.h +@@ -74,6 +74,13 @@ strip_pac (void *p) + #define PACIASP hint 25 + #define AUTIASP hint 29 + ++/* Guarded Control Stack support. */ ++#define CHKFEAT_X16 hint 40 ++#define MRS_GCSPR(x) mrs x, s3_3_c2_c5_1 ++#define GCSPOPM(x) sysl x, #3, c7, c7, #1 ++#define GCSSS1(x) sys #3, c7, c7, #2, x ++#define GCSSS2(x) sysl x, #3, c7, c7, #3 ++ + /* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */ + #define FEATURE_1_AND 0xc0000000 + #define FEATURE_1_BTI 1 +-- +2.34.1 + + +From 0f6417aafcb5332cea53f81daa2dca9588c8b733 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Fri, 22 Dec 2023 09:47:33 +0000 +Subject: [PATCH 03/21] elf.h: define GNU_PROPERTY_AARCH64_FEATURE_1_GCS + +--- + elf/elf.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/elf/elf.h b/elf/elf.h +index 455731663c..f927dad3c3 100644 +--- a/elf/elf.h ++++ b/elf/elf.h +@@ -1377,6 +1377,7 @@ typedef struct + + #define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) + #define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1) ++#define GNU_PROPERTY_AARCH64_FEATURE_1_GCS (1U << 2) + + /* The x86 instruction sets indicated by the corresponding bits are + used in program. Their support in the hardware is optional. */ +-- +2.34.1 + + +From 6c7378b54f6f8c3db222894ed27342782bf526b7 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Fri, 22 Dec 2023 12:29:48 +0000 +Subject: [PATCH 04/21] aarch64: mark objects with GCS property note + +TODO: binutils config check +TODO: build attributes instead of gnu property +--- + sysdeps/aarch64/sysdep.h | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h +index 661d9fe8c1..55e0230b5e 100644 +--- a/sysdeps/aarch64/sysdep.h ++++ b/sysdeps/aarch64/sysdep.h +@@ -85,6 +85,7 @@ strip_pac (void *p) + #define FEATURE_1_AND 0xc0000000 + #define FEATURE_1_BTI 1 + #define FEATURE_1_PAC 2 ++#define FEATURE_1_GCS 4 + + /* Add a NT_GNU_PROPERTY_TYPE_0 note. */ + #define GNU_PROPERTY(type, value) \ +@@ -103,9 +104,9 @@ strip_pac (void *p) + /* Add GNU property note with the supported features to all asm code + where sysdep.h is included. */ + #if HAVE_AARCH64_BTI && HAVE_AARCH64_PAC_RET +-GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC) ++GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC|FEATURE_1_GCS) + #elif HAVE_AARCH64_BTI +-GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI) ++GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_GCS) + #endif + + /* Define an entry point visible from C. */ +-- +2.34.1 + + +From e6bc31c117194bfadcf10a6c90b6586800a33a11 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Fri, 25 Nov 2022 18:16:07 +0000 +Subject: [PATCH 05/21] aarch64: Define jmp_buf offset for GCS + +The target specific internal __longjmp is called with a __jmp_buf +argument which has its size exposed in the ABI. On aarch64 this has +no space left, so GCSPR cannot be restored in longjmp in the usual +way, which is needed for the Guarded Control Stack (GCS) extension. + +setjmp is implemented via __sigsetjmp which has a jmp_buf argument +however it is also called with __pthread_unwind_buf_t argument cast +to jmp_buf (in cancellation cleanup code built with -fno-exception). +The two types, jmp_buf and __pthread_unwind_buf_t, have common bits +beyond the __jmp_buf field and there is unused space there which we +can use for saving GCSPR. + +For this to work some bits of those two generic types have to be +reserved for target specific use and the generic code in glibc has +to ensure that __longjmp is always called with a __jmp_buf that is +embedded into one of those two types. Morally __longjmp should be +changed to take jmp_buf as argument, but that is an intrusive change +across targets. + +Note: longjmp is never called with __pthread_unwind_buf_t from user +code, only the internal __libc_longjmp is called with that type and +thus the two types could have separate longjmp implementations on a +target. We don't rely on this now (but migh in the future given that +cancellation unwind does not need to restore GCSPR). + +Given the above this patch finds an unused slot for GCSPR. This +placement is not exposed in the ABI so it may change in the future. +This is also very target ABI specific so the generic types cannot +be easily changed to clearly mark the reserved fields. +--- + sysdeps/aarch64/jmpbuf-offsets.h | 63 ++++++++++++++++++++++++++++++++ + 1 file changed, 63 insertions(+) + +diff --git a/sysdeps/aarch64/jmpbuf-offsets.h b/sysdeps/aarch64/jmpbuf-offsets.h +index 632328c7e2..ec047cf6b1 100644 +--- a/sysdeps/aarch64/jmpbuf-offsets.h ++++ b/sysdeps/aarch64/jmpbuf-offsets.h +@@ -39,6 +39,69 @@ + #define JB_D14 20 + #define JB_D15 21 + ++/* The target specific part of jmp_buf has no space for expansion but ++ the public jmp_buf ABI type has. Unfortunately there is another type ++ that is used with setjmp APIs and exposed by thread cancellation (in ++ binaries built with -fno-exceptions) which complicates the situation. ++ ++ // Internal layout of the public jmp_buf type on AArch64. ++ // This is passed to setjmp, longjmp, sigsetjmp, siglongjmp. ++ struct ++ { ++ uint64_t jmpbuf[22]; // Target specific part. ++ uint32_t mask_was_saved; // savemask bool used by sigsetjmp/siglongjmp. ++ uint32_t pad; ++ uint64_t saved_mask; // sigset_t bits used on linux. ++ uint64_t unused[15]; // sigset_t bits not used on linux. ++ }; ++ ++ // Internal layout of the public __pthread_unwind_buf_t type. ++ // This is passed to sigsetjmp with !savemask and to the internal ++ // __libc_longjmp (currently alias of longjmp on AArch64). ++ struct ++ { ++ uint64_t jmpbuf[22]; // Must match jmp_buf. ++ uint32_t mask_was_saved; // Must match jmp_buf, always 0. ++ uint32_t pad; ++ void *prev; // List for unwinding. ++ void *cleanup; // Cleanup handlers. ++ uint32_t canceltype; // 1 bit cancellation type. ++ uint32_t pad2; ++ void *pad3; ++ }; ++ ++ Ideally only the target specific part of jmp_buf (A) is accessed by ++ __setjmp and __longjmp. But that is always embedded into one of the ++ two types above so the bits that are unused in those types (B) may be ++ reused for target specific purposes. Setjmp can't distinguish between ++ jmp_buf and __pthread_unwind_buf_t, but longjmp can: only an internal ++ longjmp call uses the latter, so state that is not needed for cancel ++ cleanups can go to fields (C). If generic code is refactored then the ++ usage of additional fields can be optimized (D). And some fields are ++ only accessible in the savedmask case (E). Reusability of jmp_buf ++ fields on AArch64 for target purposes: ++ ++ struct ++ { ++ uint64_t A[22]; // 0 .. 176 ++ uint32_t D; // 176 .. 180 ++ uint32_t B; // 180 .. 184 ++ uint64_t D; // 184 .. 192 ++ uint64_t C; // 192 .. 200 ++ uint32_t C; // 200 .. 204 ++ uint32_t B; // 204 .. 208 ++ uint64_t B; // 208 .. 216 ++ uint64_t E[12]; // 216 .. 312 ++ } ++ ++ The B fields can be used with minimal glibc code changes. We need a ++ 64 bit field for the Guarded Control Stack pointer (GCSPR_EL0) which ++ can use a C field too as cancellation cleanup does not execute RET ++ for a previous BL of the cancelled thread, but that would require a ++ custom __libc_longjmp. This layout can change in the future. ++*/ ++#define JB_GCSPR 208 ++ + #ifndef __ASSEMBLER__ + #include <setjmp.h> + #include <stdint.h> +-- +2.34.1 + + +From c3274a8582b4915efea5a16558e730d362bea177 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Thu, 23 Feb 2023 08:54:04 +0000 +Subject: [PATCH 06/21] aarch64: Add GCS support to longjmp + +This implementations ensures that longjmp across different stacks +works: it scans for GCS cap token and switches GCS if necessary +then the target GCSPR is restored with a GCSPOPM loop once the +current GCSPR is on the same GCS. + +This makes longjmp linear time in the number of jumped over stack +frames when GCS is enabled. +--- + sysdeps/aarch64/__longjmp.S | 31 +++++++++++++++++++++++++++++++ + sysdeps/aarch64/setjmp.S | 10 ++++++++++ + 2 files changed, 41 insertions(+) + +diff --git a/sysdeps/aarch64/__longjmp.S b/sysdeps/aarch64/__longjmp.S +index 7b6add751e..ecd272262d 100644 +--- a/sysdeps/aarch64/__longjmp.S ++++ b/sysdeps/aarch64/__longjmp.S +@@ -91,6 +91,37 @@ ENTRY (__longjmp) + ldp d12, d13, [x0, #JB_D12<<3] + ldp d14, d15, [x0, #JB_D14<<3] + ++ /* GCS support. */ ++ mov x16, 1 ++ CHKFEAT_X16 ++ tbnz x16, 0, L(gcs_done) ++ MRS_GCSPR (x2) ++ ldr x3, [x0, #JB_GCSPR] ++ mov x4, x3 ++ /* x2: GCSPR now. x3, x4: target GCSPR. x5, x6: tmp regs. */ ++L(gcs_scan): ++ cmp x2, x4 ++ b.eq L(gcs_pop) ++ sub x4, x4, 8 ++ /* Check for a cap token. */ ++ ldr x5, [x4] ++ and x6, x4, 0xfffffffffffff000 ++ orr x6, x6, 1 ++ cmp x5, x6 ++ b.eq L(gcs_switch) ++ b L(gcs_scan) ++L(gcs_switch): ++ add x2, x4, 8 ++ GCSSS1 (x4) ++ GCSSS2 (xzr) ++L(gcs_pop): ++ cmp x2, x3 ++ b.eq L(gcs_done) ++ GCSPOPM (xzr) ++ add x2, x2, 8 ++ b L(gcs_pop) ++L(gcs_done): ++ + /* Originally this was implemented with a series of + .cfi_restore() directives. + +diff --git a/sysdeps/aarch64/setjmp.S b/sysdeps/aarch64/setjmp.S +index 43fdb1b2fb..f7ffccfaba 100644 +--- a/sysdeps/aarch64/setjmp.S ++++ b/sysdeps/aarch64/setjmp.S +@@ -57,6 +57,16 @@ ENTRY (__sigsetjmp) + stp d10, d11, [x0, #JB_D10<<3] + stp d12, d13, [x0, #JB_D12<<3] + stp d14, d15, [x0, #JB_D14<<3] ++ ++ /* GCS support. */ ++ mov x16, 1 ++ CHKFEAT_X16 ++ tbnz x16, 0, L(gcs_done) ++ MRS_GCSPR (x2) ++ add x2, x2, 8 /* GCS state right after setjmp returns. */ ++ str x2, [x0, #JB_GCSPR] ++L(gcs_done): ++ + #ifdef PTR_MANGLE + mov x4, sp + PTR_MANGLE (5, 4, 3, 2) +-- +2.34.1 + + +From 868c129b90a52f7c30b8a560dc580f851db4b6fc Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Wed, 27 Dec 2023 18:36:10 +0000 +Subject: [PATCH 07/21] aarch64: Add GCS support to vfork + +--- + sysdeps/unix/sysv/linux/aarch64/vfork.S | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/sysdeps/unix/sysv/linux/aarch64/vfork.S b/sysdeps/unix/sysv/linux/aarch64/vfork.S +index e71e492da3..cfaf4a1ffb 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/vfork.S ++++ b/sysdeps/unix/sysv/linux/aarch64/vfork.S +@@ -33,8 +33,14 @@ ENTRY (__vfork) + + cmn x0, #4095 + b.cs .Lsyscall_error ++ cbz x0, L(child) + RET +- ++L(child): ++ /* Return with indirect branch in the child to support GCS. ++ Clear x30 to crash early if the child tries to ret. */ ++ mov x1, x30 ++ mov x30, 0 ++ br x1 + PSEUDO_END (__vfork) + libc_hidden_def (__vfork) + +-- +2.34.1 + + +From 996a31b2a3313039b5c66d7de15577e5bc145278 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Fri, 24 Feb 2023 16:29:32 +0000 +Subject: [PATCH 08/21] aarch64: Add GCS support for setcontext + +Userspace ucontext needs to store GCSPR, it does not have to be +compatible with the kernel ucontext. For now we use the linux +struct gcs_context layout but only use the gcspr field from it. + +Similar implementation to the longjmp code, supports switching GCS +if the target GCS is capped, and unwinding a continous GCS to a +previous state. +--- + sysdeps/unix/sysv/linux/aarch64/getcontext.S | 17 +++++++- + sysdeps/unix/sysv/linux/aarch64/setcontext.S | 39 +++++++++++++++++++ + sysdeps/unix/sysv/linux/aarch64/swapcontext.S | 32 +++++++++++---- + .../sysv/linux/aarch64/ucontext-internal.h | 5 +++ + 4 files changed, 84 insertions(+), 9 deletions(-) + +diff --git a/sysdeps/unix/sysv/linux/aarch64/getcontext.S b/sysdeps/unix/sysv/linux/aarch64/getcontext.S +index e5b69c9a82..30e2b39399 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/getcontext.S ++++ b/sysdeps/unix/sysv/linux/aarch64/getcontext.S +@@ -83,9 +83,24 @@ ENTRY(__getcontext) + mrs x4, fpcr + str w4, [x3, oFPCR - oFPSR] + +- /* Write the termination context extension header. */ + add x2, x2, #FPSIMD_CONTEXT_SIZE + ++ /* Save the GCSPR. */ ++ mov x16, 1 ++ CHKFEAT_X16 ++ tbnz x16, 0, L(gcs_done) ++ mov w3, #(GCS_MAGIC & 0xffff) ++ movk w3, #(GCS_MAGIC >> 16), lsl #16 ++ str w3, [x2, #oHEAD + oMAGIC] ++ mov w3, #GCS_CONTEXT_SIZE ++ str w3, [x2, #oHEAD + oSIZE] ++ MRS_GCSPR (x4) ++ add x4, x4, 8 /* GCS state right after getcontext returns. */ ++ str x4, [x2, #oGCSPR] ++ add x2, x2, #GCS_CONTEXT_SIZE ++L(gcs_done): ++ ++ /* Write the termination context extension header. */ + str wzr, [x2, #oHEAD + oMAGIC] + str wzr, [x2, #oHEAD + oSIZE] + +diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S +index ba659438c5..c08e83ee60 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S ++++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S +@@ -130,6 +130,45 @@ ENTRY (__setcontext) + ldr w4, [x3, oFPCR - oFPSR] + msr fpcr, x4 + ++ /* Restore the GCS. */ ++ mov x16, 1 ++ CHKFEAT_X16 ++ tbnz x16, 0, L(gcs_done) ++ /* Get target GCS from GCS context. */ ++ ldr w1, [x2, #oHEAD + oSIZE] ++ add x2, x2, x1 ++ mov w3, #(GCS_MAGIC & 0xffff) ++ movk w3, #(GCS_MAGIC >> 16), lsl #16 ++ ldr w1, [x2, #oHEAD + oMAGIC] ++ cmp w1, w3 ++ b.ne L(gcs_done) ++ ldr x3, [x2, #oGCSPR] ++ MRS_GCSPR (x2) ++ mov x4, x3 ++ /* x2: GCSPR now. x3, x4: target GCSPR. x5, x6: tmp regs. */ ++L(gcs_scan): ++ cmp x2, x4 ++ b.eq L(gcs_pop) ++ sub x4, x4, 8 ++ /* Check for a cap token. */ ++ ldr x5, [x4] ++ and x6, x4, 0xfffffffffffff000 ++ orr x6, x6, 1 ++ cmp x5, x6 ++ b.eq L(gcs_switch) ++ b L(gcs_scan) ++L(gcs_switch): ++ add x2, x4, 8 ++ GCSSS1 (x4) ++ GCSSS2 (xzr) ++L(gcs_pop): ++ cmp x2, x3 ++ b.eq L(gcs_done) ++ GCSPOPM (xzr) ++ add x2, x2, 8 ++ b L(gcs_pop) ++L(gcs_done): ++ + 2: + ldr x16, [x0, oPC] + /* Restore arg registers. */ +diff --git a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S +index f049140d35..45b1277c74 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S ++++ b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S +@@ -32,8 +32,15 @@ ENTRY(__swapcontext) + And set up x1 to become the return address of the caller, so we + can return there with a normal RET instead of an indirect jump. */ + stp xzr, x30, [x0, oX0 + 0 * SZREG] ++ ++ /* With GCS, swapcontext calls are followed by BTI J, otherwise ++ we have to be compatible with old BTI enabled binaries. */ ++ mov x16, 1 ++ CHKFEAT_X16 ++ tbz x16, 0, L(skip_x30_redirect) + /* Arrange the oucp context to return to 2f. */ + adr x30, 2f ++L(skip_x30_redirect): + + stp x18, x19, [x0, oX0 + 18 * SZREG] + stp x20, x21, [x0, oX0 + 20 * SZREG] +@@ -72,14 +79,27 @@ ENTRY(__swapcontext) + mrs x4, fpcr + str w4, [x3, #oFPCR - oFPSR] + +- /* Write the termination context extension header. */ + add x2, x2, #FPSIMD_CONTEXT_SIZE + ++ /* Save the GCSPR. */ ++ tbnz x16, 0, L(gcs_done) ++ mov w3, #(GCS_MAGIC & 0xffff) ++ movk w3, #(GCS_MAGIC >> 16), lsl #16 ++ str w3, [x2, #oHEAD + oMAGIC] ++ mov w3, #GCS_CONTEXT_SIZE ++ str w3, [x2, #oHEAD + oSIZE] ++ MRS_GCSPR (x4) ++ add x4, x4, 8 /* GCSPR of the caller. */ ++ str x4, [x2, #oGCSPR] ++ add x2, x2, #GCS_CONTEXT_SIZE ++L(gcs_done): ++ ++ /* Write the termination context extension header. */ + str wzr, [x2, #oHEAD + oMAGIC] + str wzr, [x2, #oHEAD + oSIZE] + + /* Preserve ucp. */ +- mov x21, x1 ++ mov x9, x1 + + /* rt_sigprocmask (SIG_SETMASK, &ucp->uc_sigmask, &oucp->uc_sigmask, + _NSIG8) */ +@@ -93,12 +113,8 @@ ENTRY(__swapcontext) + svc 0 + cbnz x0, 1f + +- mov x22, x30 +- mov x0, x21 +- bl JUMPTARGET (__setcontext) +- mov x30, x22 +- RET +- ++ mov x0, x9 ++ b JUMPTARGET (__setcontext) + 1: + b C_SYMBOL_NAME(__syscall_error) + 2: +diff --git a/sysdeps/unix/sysv/linux/aarch64/ucontext-internal.h b/sysdeps/unix/sysv/linux/aarch64/ucontext-internal.h +index 096d5fb7c7..84f5365c0e 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/ucontext-internal.h ++++ b/sysdeps/unix/sysv/linux/aarch64/ucontext-internal.h +@@ -43,3 +43,8 @@ + #define oX21 (oX0 + 21*8) + #define oFP (oX0 + 29*8) + #define oLR (oX0 + 30*8) ++ ++/* Use kernel layout for saving GCSPR in ucontext. */ ++#define GCS_MAGIC 0x47435300 ++#define GCS_CONTEXT_SIZE 32 ++#define oGCSPR 8 +-- +2.34.1 + + +From 99f884d57f17afea877006284f469747711e1d51 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Thu, 28 Dec 2023 15:53:38 +0000 +Subject: [PATCH 09/21] aarch64: mark swapcontext with indirect_return + +--- + sysdeps/aarch64/bits/indirect-return.h | 36 ++++++++++++++++++++++++++ + 1 file changed, 36 insertions(+) + create mode 100644 sysdeps/aarch64/bits/indirect-return.h + +diff --git a/sysdeps/aarch64/bits/indirect-return.h b/sysdeps/aarch64/bits/indirect-return.h +new file mode 100644 +index 0000000000..99905f21fa +--- /dev/null ++++ b/sysdeps/aarch64/bits/indirect-return.h +@@ -0,0 +1,36 @@ ++/* Definition of __INDIRECT_RETURN. AArch64 version. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#ifndef _UCONTEXT_H ++# error "Never include <bits/indirect-return.h> directly; use <ucontext.h> instead." ++#endif ++ ++/* __INDIRECT_RETURN indicates that swapcontext may return via ++ an indirect branch. This happens when GCS is enabled, so ++ add the attribute if available, otherwise returns_twice has ++ a similar effect, but it prevents some code transformations ++ that can cause build failures in some rare cases so it is ++ only used when GCS is enabled. */ ++#if __glibc_has_attribute (__indirect_return__) ++# define __INDIRECT_RETURN __attribute__ ((__indirect_return__)) ++#elif __glibc_has_attribute (__returns_twice__) \ ++ && defined __ARM_FEATURE_GCS_DEFAULT ++# define __INDIRECT_RETURN __attribute__ ((__returns_twice__)) ++#else ++# define __INDIRECT_RETURN ++#endif +-- +2.34.1 + + +From 54e90582aabdb44f010ad5dfd64ce9c3e6d33914 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Tue, 4 Apr 2023 10:42:21 +0100 +Subject: [PATCH 10/21] aarch64: Add GCS support for makecontext + +Changed the makecontext logic: previously the first setcontext jumped +straight to the user callback function and the return address is set +to __startcontext. This does not work when GCS is enabled as the +integrity of the return address is protected, so instead the context +is setup such that setcontext jumps to __startcontext which calls the +user callback (passed in x20). + +The map_shadow_stack syscall is used to allocate a suitably sized GCS +(which includes some reserved area to account for altstack signal +handlers and otherwise supports maximum number of 16 byte aligned +stack frames on the given stack) however the GCS is never freed as +the lifetime of ucontext and related stack is user managed. +--- + sysdeps/unix/sysv/linux/aarch64/makecontext.c | 61 ++++++++++++++++++- + sysdeps/unix/sysv/linux/aarch64/setcontext.S | 4 ++ + 2 files changed, 63 insertions(+), 2 deletions(-) + +diff --git a/sysdeps/unix/sysv/linux/aarch64/makecontext.c b/sysdeps/unix/sysv/linux/aarch64/makecontext.c +index 5aec182013..9e66b6761c 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/makecontext.c ++++ b/sysdeps/unix/sysv/linux/aarch64/makecontext.c +@@ -22,6 +22,52 @@ + #include <stdint.h> + #include <ucontext.h> + ++#define GCS_MAGIC 0x47435300 ++ ++static struct _aarch64_ctx *extension (void *p) ++{ ++ return p; ++} ++ ++#ifndef __NR_map_shadow_stack ++# define __NR_map_shadow_stack 453 ++#endif ++#ifndef SHADOW_STACK_SET_TOKEN ++# define SHADOW_STACK_SET_TOKEN (1UL << 0) ++# define SHADOW_STACK_SET_MARKER (1UL << 1) ++#endif ++ ++static void * ++map_shadow_stack (void *addr, size_t size, unsigned long flags) ++{ ++ return (void *) INLINE_SYSCALL_CALL (map_shadow_stack, addr, size, flags); ++} ++ ++#define GCS_MAX_SIZE (1UL << 31) ++#define GCS_ALTSTACK_RESERVE 160 ++ ++static void * ++alloc_makecontext_gcs (size_t stack_size) ++{ ++ size_t size = (stack_size / 2 + GCS_ALTSTACK_RESERVE) & -8UL; ++ if (size > GCS_MAX_SIZE) ++ size = GCS_MAX_SIZE; ++ ++ unsigned long flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN; ++ void *base = map_shadow_stack (NULL, size, flags); ++ if (base == (void *) -1) ++ /* ENOSYS, bad size or OOM. */ ++ abort (); ++ uint64_t *gcsp = (uint64_t *) ((char *) base + size); ++ /* Skip end of GCS token. */ ++ gcsp--; ++ /* Verify GCS cap token. */ ++ gcsp--; ++ if (((uint64_t)gcsp & 0xfffffffffffff000) + 1 != *gcsp) ++ abort (); ++ /* Return the target GCS pointer for context switch. */ ++ return gcsp + 1; ++} + + /* makecontext sets up a stack and the registers for the + user context. The stack looks like this: +@@ -56,10 +102,21 @@ __makecontext (ucontext_t *ucp, void (*func) (void), int argc, ...) + sp = (uint64_t *) (((uintptr_t) sp) & -16L); + + ucp->uc_mcontext.regs[19] = (uintptr_t) ucp->uc_link; ++ ucp->uc_mcontext.regs[20] = (uintptr_t) func; + ucp->uc_mcontext.sp = (uintptr_t) sp; +- ucp->uc_mcontext.pc = (uintptr_t) func; ++ ucp->uc_mcontext.pc = (uintptr_t) __startcontext; + ucp->uc_mcontext.regs[29] = (uintptr_t) 0; +- ucp->uc_mcontext.regs[30] = (uintptr_t) &__startcontext; ++ ucp->uc_mcontext.regs[30] = (uintptr_t) 0; ++ ++ void *p = ucp->uc_mcontext.__reserved; ++ if (extension (p)->magic == FPSIMD_MAGIC) ++ p = (char *)p + extension (p)->size; ++ if (extension (p)->magic == GCS_MAGIC) ++ { ++ /* Using the kernel struct gcs_context layout. */ ++ struct { uint64_t x, gcspr, y, z; } *q = p; ++ q->gcspr = (uint64_t) alloc_makecontext_gcs (ucp->uc_stack.ss_size); ++ } + + va_start (ap, argc); + for (i = 0; i < argc; ++i) +diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S +index c08e83ee60..6aa7236693 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S ++++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S +@@ -181,7 +181,11 @@ L(gcs_done): + PSEUDO_END (__setcontext) + weak_alias (__setcontext, setcontext) + ++/* makecontext start function: receives uc_link in x19 and func in x20. ++ Arguments of func, x29, x30 and sp are set up by the caller. */ + ENTRY (__startcontext) ++ cfi_undefined (x30) ++ blr x20 + mov x0, x19 + cbnz x0, __setcontext + 1: b HIDDEN_JUMPTARGET (exit) +-- +2.34.1 + + +From 56253d5f47330f502dd6bc8f3e12eeabf6c20a8b Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Mon, 17 Jul 2023 16:54:15 +0100 +Subject: [PATCH 11/21] aarch64: Try to free the GCS of makecontext + +Free GCS after a makecontext start func returns and at thread exit, so +assume makecontext cannot outlive the thread where it was created. + +This is an attempt to bound the lifetime of the GCS allocated for +makecontext, but it is still possible to have significant GCS leaks, +new GCS aware APIs could solve that, but that would not allow using +GCS with existing code transparently. +--- + include/set-freeres.h | 4 ++ + malloc/thread-freeres.c | 3 + + sysdeps/unix/sysv/linux/aarch64/makecontext.c | 65 +++++++++++++++++++ + sysdeps/unix/sysv/linux/aarch64/setcontext.S | 19 +++++- + sysdeps/unix/sysv/linux/aarch64/sysdep.h | 6 +- + 5 files changed, 93 insertions(+), 4 deletions(-) + +diff --git a/include/set-freeres.h b/include/set-freeres.h +index 4177b453fa..c3d64b4f41 100644 +--- a/include/set-freeres.h ++++ b/include/set-freeres.h +@@ -78,6 +78,10 @@ extern void __nss_database_freeres (void) attribute_hidden; + extern int _IO_cleanup (void) attribute_hidden;; + /* From dlfcn/dlerror.c */ + extern void __libc_dlerror_result_free (void) attribute_hidden; ++/* From libc.so, arch specific. */ ++#ifdef ARCH_THREAD_FREERES ++extern void ARCH_THREAD_FREERES (void) attribute_hidden; ++#endif + + /* From either libc.so or libpthread.so */ + extern void __libpthread_freeres (void) attribute_hidden; +diff --git a/malloc/thread-freeres.c b/malloc/thread-freeres.c +index 55ba4e7b83..69867f3a3b 100644 +--- a/malloc/thread-freeres.c ++++ b/malloc/thread-freeres.c +@@ -29,6 +29,9 @@ + void + __libc_thread_freeres (void) + { ++#ifdef ARCH_THREAD_FREERES ++ call_function_static_weak (ARCH_THREAD_FREERES); ++#endif + #if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_32) + __rpc_thread_destroy (); + #endif +diff --git a/sysdeps/unix/sysv/linux/aarch64/makecontext.c b/sysdeps/unix/sysv/linux/aarch64/makecontext.c +index 9e66b6761c..779f7e55aa 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/makecontext.c ++++ b/sysdeps/unix/sysv/linux/aarch64/makecontext.c +@@ -20,7 +20,9 @@ + #include <sysdep.h> + #include <stdarg.h> + #include <stdint.h> ++#include <stdlib.h> + #include <ucontext.h> ++#include <sys/mman.h> + + #define GCS_MAGIC 0x47435300 + +@@ -29,6 +31,47 @@ static struct _aarch64_ctx *extension (void *p) + return p; + } + ++struct gcs_list { ++ struct gcs_list *next; ++ void *base; ++ size_t size; ++}; ++ ++static __thread struct gcs_list *gcs_list_head = NULL; ++ ++static void ++record_gcs (void *base, size_t size) ++{ ++ struct gcs_list *p = malloc (sizeof *p); ++ if (p == NULL) ++ abort (); ++ p->base = base; ++ p->size = size; ++ p->next = gcs_list_head; ++ gcs_list_head = p; ++} ++ ++static void ++free_gcs_list (void) ++{ ++ for (;;) ++ { ++ struct gcs_list *p = gcs_list_head; ++ if (p == NULL) ++ break; ++ gcs_list_head = p->next; ++ __munmap (p->base, p->size); ++ free (p); ++ } ++} ++ ++/* Called during thread shutdown to free resources. */ ++void ++__libc_aarch64_thread_freeres (void) ++{ ++ free_gcs_list (); ++} ++ + #ifndef __NR_map_shadow_stack + # define __NR_map_shadow_stack 453 + #endif +@@ -58,6 +101,9 @@ alloc_makecontext_gcs (size_t stack_size) + if (base == (void *) -1) + /* ENOSYS, bad size or OOM. */ + abort (); ++ ++ record_gcs (base, size); ++ + uint64_t *gcsp = (uint64_t *) ((char *) base + size); + /* Skip end of GCS token. */ + gcsp--; +@@ -69,6 +115,25 @@ alloc_makecontext_gcs (size_t stack_size) + return gcsp + 1; + } + ++void ++__free_makecontext_gcs (void *gcs) ++{ ++ struct gcs_list *p = gcs_list_head; ++ struct gcs_list **q = &gcs_list_head; ++ for (;;) ++ { ++ if (p == NULL) ++ abort (); ++ if (gcs == p->base + p->size - 8) ++ break; ++ q = &p->next; ++ p = p->next; ++ } ++ *q = p->next; ++ __munmap (p->base, p->size); ++ free (p); ++} ++ + /* makecontext sets up a stack and the registers for the + user context. The stack looks like this: + +diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S +index 6aa7236693..723be73213 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S ++++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S +@@ -34,6 +34,9 @@ + .text + + ENTRY (__setcontext) ++ /* If x10 is set then old GCS is freed. */ ++ mov x10, 0 ++__setcontext_internal: + PTR_ARG (0) + /* Save a copy of UCP. */ + mov x9, x0 +@@ -145,7 +148,8 @@ ENTRY (__setcontext) + ldr x3, [x2, #oGCSPR] + MRS_GCSPR (x2) + mov x4, x3 +- /* x2: GCSPR now. x3, x4: target GCSPR. x5, x6: tmp regs. */ ++ mov x1, x2 ++ /* x1, x2: GCSPR now. x3, x4: target GCSPR. x5, x6: tmp regs. */ + L(gcs_scan): + cmp x2, x4 + b.eq L(gcs_pop) +@@ -163,10 +167,18 @@ L(gcs_switch): + GCSSS2 (xzr) + L(gcs_pop): + cmp x2, x3 +- b.eq L(gcs_done) ++ b.eq L(gcs_free_old) + GCSPOPM (xzr) + add x2, x2, 8 + b L(gcs_pop) ++L(gcs_free_old): ++ cbz x10, L(gcs_done) ++ mov x28, x0 ++ mov x0, x1 ++ bl __free_makecontext_gcs ++ mov x0, x28 ++ ldp x28, x29, [x0, oX0 + 28 * SZREG] ++ ldr x30, [x0, oX0 + 30 * SZREG] + L(gcs_done): + + 2: +@@ -187,6 +199,7 @@ ENTRY (__startcontext) + cfi_undefined (x30) + blr x20 + mov x0, x19 +- cbnz x0, __setcontext ++ mov x10, 1 ++ cbnz x0, __setcontext_internal + 1: b HIDDEN_JUMPTARGET (exit) + END (__startcontext) +diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h +index bbbe35723c..590318dee8 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h ++++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h +@@ -29,8 +29,12 @@ + + #include <tls.h> + +-/* In order to get __set_errno() definition in INLINE_SYSCALL. */ + #ifndef __ASSEMBLER__ ++/* Thread cleanup function. */ ++#define ARCH_THREAD_FREERES __libc_aarch64_thread_freeres ++void __libc_aarch64_thread_freeres (void) attribute_hidden; ++ ++/* In order to get __set_errno() definition in INLINE_SYSCALL. */ + #include <errno.h> + #endif + +-- +2.34.1 + + +From 200010339f4fa0449a7bd76555931881eaea916c Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Mon, 17 Jul 2023 08:31:05 +0100 +Subject: [PATCH 12/21] aarch64: Add glibc.cpu.aarch64_gcs tunable + +This tunable is for controlling the GCS status. It is the argument to +the PR_SET_SHADOW_STACK_STATUS prctl, by default 0, so GCS is disabled. + +The status is stored into GL(dl_aarch64_gcs) early and only applied +later, since enabling GCS is tricky: it must happen on a top level +stack frame. (Using GL instead of GLRO because it may need updates +depending on loaded libraries that happen after readonly protection +is applied, however library marking based GCS setting is not yet +implemented.) +--- + sysdeps/aarch64/dl-tunables.list | 5 +++ + .../unix/sysv/linux/aarch64/cpu-features.c | 4 ++ + .../unix/sysv/linux/aarch64/dl-procruntime.c | 37 +++++++++++++++++++ + 3 files changed, 46 insertions(+) + create mode 100644 sysdeps/unix/sysv/linux/aarch64/dl-procruntime.c + +diff --git a/sysdeps/aarch64/dl-tunables.list b/sysdeps/aarch64/dl-tunables.list +index 92c6cbfa92..c08be86932 100644 +--- a/sysdeps/aarch64/dl-tunables.list ++++ b/sysdeps/aarch64/dl-tunables.list +@@ -21,5 +21,10 @@ glibc { + name { + type: STRING + } ++ aarch64_gcs { ++ type: UINT_64 ++ minval: 0 ++ default: 0 ++ } + } + } +diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +index b1a3f673f0..a8b6a4654a 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c ++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +@@ -128,4 +128,8 @@ init_cpu_features (struct cpu_features *cpu_features) + + /* Check if MOPS is supported. */ + cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS; ++ ++ if (GLRO (dl_hwcap2) & HWCAP2_GCS) ++ /* GCS status may be updated later by binary compatibility checks. */ ++ GL (dl_aarch64_gcs) = TUNABLE_GET (glibc, cpu, aarch64_gcs, uint64_t, 0); + } +diff --git a/sysdeps/unix/sysv/linux/aarch64/dl-procruntime.c b/sysdeps/unix/sysv/linux/aarch64/dl-procruntime.c +new file mode 100644 +index 0000000000..23c61da6c4 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/aarch64/dl-procruntime.c +@@ -0,0 +1,37 @@ ++/* Data for processor runtime information. AArch64 version. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#ifndef PROCINFO_CLASS ++# define PROCINFO_CLASS ++#endif ++ ++#if !IS_IN (ldconfig) ++# if !defined PROCINFO_DECL && defined SHARED ++ ._dl_aarch64_gcs ++# else ++PROCINFO_CLASS unsigned long _dl_aarch64_gcs ++# endif ++# ifndef PROCINFO_DECL ++= 0 ++# endif ++# if !defined SHARED || defined PROCINFO_DECL ++; ++# else ++, ++# endif ++#endif +-- +2.34.1 + + +From cf7e262541c78c65a33ed78d39c6dc0a50b36d96 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Thu, 13 Jul 2023 07:22:44 +0100 +Subject: [PATCH 13/21] aarch64: Enable GCS in static linked exe + +Use the ARCH_SETUP_TLS hook to enable GCS in the static linked case. +The system call must be inlined and then GCS is enabled on a top +level stack frame that does not return and has no exception handlers +above it. +--- + sysdeps/unix/sysv/linux/aarch64/libc-start.h | 49 ++++++++++++++++++++ + 1 file changed, 49 insertions(+) + create mode 100644 sysdeps/unix/sysv/linux/aarch64/libc-start.h + +diff --git a/sysdeps/unix/sysv/linux/aarch64/libc-start.h b/sysdeps/unix/sysv/linux/aarch64/libc-start.h +new file mode 100644 +index 0000000000..ccf0f8af5c +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/aarch64/libc-start.h +@@ -0,0 +1,49 @@ ++/* AArch64 definitions for libc main startup. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#ifndef _LIBC_START_H ++#define _LIBC_START_H ++ ++#ifndef SHARED ++ ++# ifndef PR_SET_SHADOW_STACK_STATUS ++# define PR_GET_SHADOW_STACK_STATUS 71 ++# define PR_SET_SHADOW_STACK_STATUS 72 ++# define PR_LOCK_SHADOW_STACK_STATUS 73 ++# define PR_SHADOW_STACK_ENABLE (1UL << 0) ++# define PR_SHADOW_STACK_WRITE (1UL << 1) ++# define PR_SHADOW_STACK_PUSH (1UL << 2) ++# endif ++ ++/* Must be on a top-level stack frame that does not return. */ ++static inline void __attribute__((always_inline)) ++aarch64_libc_setup_tls (void) ++{ ++ __libc_setup_tls (); ++ ++ uint64_t s = GL(dl_aarch64_gcs); ++ if (s != 0) ++ INLINE_SYSCALL_CALL (prctl, PR_SET_SHADOW_STACK_STATUS, s, 0, 0, 0); ++} ++ ++# define ARCH_SETUP_IREL() apply_irel () ++# define ARCH_SETUP_TLS() aarch64_libc_setup_tls () ++# define ARCH_APPLY_IREL() ++#endif /* ! SHARED */ ++ ++#endif /* _LIBC_START_H */ +-- +2.34.1 + + +From 7ea8526a50e6867b154d2bb4fbe9de4ff2fc9468 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Fri, 14 Jul 2023 15:49:11 +0100 +Subject: [PATCH 14/21] aarch64: Enable GCS in dynamic linked exe + +Use the dynamic linker start code to enable GCS in the dynamic linked +case after _dl_start returns and before _dl_start_user which marks +the point after which user code may run. + +Like in the static linked case this ensures that GCS is enabled on a +top level stack frame. +--- + sysdeps/aarch64/Makefile | 4 +++- + sysdeps/aarch64/dl-start.S | 23 +++++++++++++++++++++-- + sysdeps/aarch64/rtld-global-offsets.sym | 5 +++++ + 3 files changed, 29 insertions(+), 3 deletions(-) + +diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile +index 141d7d9cc2..ca8b96f550 100644 +--- a/sysdeps/aarch64/Makefile ++++ b/sysdeps/aarch64/Makefile +@@ -35,7 +35,9 @@ endif + ifeq ($(subdir),elf) + sysdep-rtld-routines += dl-start + sysdep-dl-routines += tlsdesc dl-tlsdesc +-gen-as-const-headers += dl-link.sym ++gen-as-const-headers += \ ++ dl-link.sym \ ++ rtld-global-offsets.sym + + tests-internal += tst-ifunc-arg-1 tst-ifunc-arg-2 + +diff --git a/sysdeps/aarch64/dl-start.S b/sysdeps/aarch64/dl-start.S +index d645484e79..271bd5bf00 100644 +--- a/sysdeps/aarch64/dl-start.S ++++ b/sysdeps/aarch64/dl-start.S +@@ -18,6 +18,7 @@ + <https://www.gnu.org/licenses/>. */ + + #include <sysdep.h> ++#include <rtld-global-offsets.h> + + ENTRY (_start) + /* Create an initial frame with 0 LR and FP */ +@@ -25,11 +26,30 @@ ENTRY (_start) + mov x29, #0 + mov x30, #0 + ++ /* Load and relocate all library dependencies. */ + mov x0, sp + PTR_ARG (0) + bl _dl_start + /* Returns user entry point in x0. */ + mov PTR_REG (21), PTR_REG (0) ++ ++ /* Use GL(dl_aarch64_gcs) to set the shadow stack status. */ ++ adrp x16, _rtld_local ++ add PTR_REG (16), PTR_REG (16), :lo12:_rtld_local ++ ldr x1, [x16, GL_DL_AARCH64_GCS_OFFSET] ++ cbz x1, L(skip_gcs_enable) ++ ++ /* Enable GCS before user code runs. Note that IFUNC resolvers and ++ LD_AUDIT hooks may run before, but should not create threads. */ ++#define PR_SET_SHADOW_STACK_STATUS 72 ++ mov x0, PR_SET_SHADOW_STACK_STATUS ++ mov x2, 0 ++ mov x3, 0 ++ mov x4, 0 ++ mov x8, #SYS_ify(prctl) ++ svc 0x0 ++L(skip_gcs_enable): ++ + .globl _dl_start_user + .type _dl_start_user, %function + _dl_start_user: +@@ -40,8 +60,7 @@ _dl_start_user: + /* Compute envp. */ + add PTR_REG (3), PTR_REG (2), PTR_REG (1), lsl PTR_LOG_SIZE + add PTR_REG (3), PTR_REG (3), PTR_SIZE +- adrp x16, _rtld_local +- add PTR_REG (16), PTR_REG (16), :lo12:_rtld_local ++ /* Run the init functions of the loaded modules. */ + ldr PTR_REG (0), [x16] + bl _dl_init + /* Load the finalizer function. */ +diff --git a/sysdeps/aarch64/rtld-global-offsets.sym b/sysdeps/aarch64/rtld-global-offsets.sym +index 23cdaf7d9e..6c0690bb95 100644 +--- a/sysdeps/aarch64/rtld-global-offsets.sym ++++ b/sysdeps/aarch64/rtld-global-offsets.sym +@@ -3,8 +3,13 @@ + #include <ldsodefs.h> + + #define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name) ++#define GL_offsetof(name) offsetof (struct rtld_global, _##name) + + -- Offsets of _rtld_global_ro in libc.so + + GLRO_DL_HWCAP_OFFSET GLRO_offsetof (dl_hwcap) + GLRO_DL_HWCAP2_OFFSET GLRO_offsetof (dl_hwcap2) ++ ++-- Offsets of _rtld_global in libc.so ++ ++GL_DL_AARCH64_GCS_OFFSET GL_offsetof (dl_aarch64_gcs) +-- +2.34.1 + + +From 1e348038b0f013ef9c30e3c4072b9555344391cb Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Thu, 21 Dec 2023 15:14:08 +0000 +Subject: [PATCH 15/21] aarch64: add glibc.cpu.aarch64_gcs_policy + +policy sets how gcs tunable and gcs marking turns into gcs state: + +0: state = tunable +1: state = marking ? tunable : (tunable && dlopen ? err : 0) +2: state = marking ? tunable : (tunable ? err : 0) + +TODO: state lock +--- + sysdeps/aarch64/dl-tunables.list | 5 +++++ + sysdeps/unix/sysv/linux/aarch64/cpu-features.c | 9 +++++++-- + sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c | 13 +++++++++++++ + 3 files changed, 25 insertions(+), 2 deletions(-) + +diff --git a/sysdeps/aarch64/dl-tunables.list b/sysdeps/aarch64/dl-tunables.list +index c08be86932..2a07a6216b 100644 +--- a/sysdeps/aarch64/dl-tunables.list ++++ b/sysdeps/aarch64/dl-tunables.list +@@ -26,5 +26,10 @@ glibc { + minval: 0 + default: 0 + } ++ aarch64_gcs_policy { ++ type: UINT_64 ++ minval: 0 ++ default: 0 ++ } + } + } +diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +index a8b6a4654a..bab5c32892 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c ++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +@@ -130,6 +130,11 @@ init_cpu_features (struct cpu_features *cpu_features) + cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS; + + if (GLRO (dl_hwcap2) & HWCAP2_GCS) +- /* GCS status may be updated later by binary compatibility checks. */ +- GL (dl_aarch64_gcs) = TUNABLE_GET (glibc, cpu, aarch64_gcs, uint64_t, 0); ++ { ++ /* GCS status may be updated later by binary compatibility checks. */ ++ GL (dl_aarch64_gcs) = TUNABLE_GET (glibc, cpu, aarch64_gcs, uint64_t, 0); ++ /* Fixed GCS policy. */ ++ GLRO (dl_aarch64_gcs_policy) = ++ TUNABLE_GET (glibc, cpu, aarch64_gcs_policy, uint64_t, 0); ++ } + } +diff --git a/sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c b/sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c +index 7af232de52..a9d5ee9df5 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c ++++ b/sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c +@@ -54,6 +54,19 @@ PROCINFO_CLASS struct cpu_features _dl_aarch64_cpu_features + # else + , + # endif ++# if !defined PROCINFO_DECL && defined SHARED ++ ._dl_aarch64_gcs_policy ++# else ++PROCINFO_CLASS uint64_t _dl_aarch64_gcs_policy ++# endif ++# ifndef PROCINFO_DECL ++= 0 ++# endif ++# if !defined SHARED || defined PROCINFO_DECL ++; ++# else ++, ++# endif + #endif + + /* Number of HWCAP bits set. */ +-- +2.34.1 + + +From 92cd77cd36137d81ac9500f595f0c4d0c3c6539d Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Thu, 28 Dec 2023 18:31:32 +0000 +Subject: [PATCH 16/21] aarch64: use l_searchlist.r_list for bti + +Allows using the same function for static exe. +--- + sysdeps/aarch64/dl-bti.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/sysdeps/aarch64/dl-bti.c b/sysdeps/aarch64/dl-bti.c +index fd0d308a39..e03bfc2acb 100644 +--- a/sysdeps/aarch64/dl-bti.c ++++ b/sysdeps/aarch64/dl-bti.c +@@ -84,10 +84,9 @@ _dl_bti_check (struct link_map *l, const char *program) + if (l->l_mach.bti_fail) + bti_failed (l, program); + +- unsigned int i = l->l_searchlist.r_nlist; +- while (i-- > 0) ++ for (unsigned int i = 0; i < l->l_searchlist.r_nlist; i++) + { +- struct link_map *dep = l->l_initfini[i]; ++ struct link_map *dep = l->l_searchlist.r_list[i]; + if (dep->l_mach.bti_fail) + bti_failed (dep, program); + } +-- +2.34.1 + + +From deded666b363b18c93ee6baed7dcf32551158eca Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Thu, 21 Dec 2023 23:05:10 +0000 +Subject: [PATCH 17/21] aarch64: handling gcs marking + +--- + sysdeps/aarch64/Makefile | 4 ++- + sysdeps/aarch64/dl-gcs.c | 59 +++++++++++++++++++++++++++++++++++++++ + sysdeps/aarch64/dl-prop.h | 15 ++++++---- + sysdeps/aarch64/linkmap.h | 1 + + 4 files changed, 73 insertions(+), 6 deletions(-) + create mode 100644 sysdeps/aarch64/dl-gcs.c + +diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile +index ca8b96f550..74479604f2 100644 +--- a/sysdeps/aarch64/Makefile ++++ b/sysdeps/aarch64/Makefile +@@ -9,7 +9,9 @@ LDFLAGS-rtld += -Wl,-z,force-bti,--fatal-warnings + endif + + ifeq ($(subdir),elf) +-sysdep-dl-routines += dl-bti ++sysdep-dl-routines += \ ++ dl-bti \ ++ dl-gcs + + tests += tst-audit26 \ + tst-audit27 +diff --git a/sysdeps/aarch64/dl-gcs.c b/sysdeps/aarch64/dl-gcs.c +new file mode 100644 +index 0000000000..a92deb54b5 +--- /dev/null ++++ b/sysdeps/aarch64/dl-gcs.c +@@ -0,0 +1,59 @@ ++/* AArch64 GCS functions. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <unistd.h> ++#include <errno.h> ++#include <libintl.h> ++#include <ldsodefs.h> ++ ++static void ++fail (struct link_map *l, const char *program) ++{ ++ if (program) ++ _dl_fatal_printf ("%s: %s: not GCS compatible\n", program, l->l_name); ++ else ++ _dl_signal_error (0, l->l_name, "dlopen", N_("not GCS compatible")); ++} ++ ++static void ++check_gcs (struct link_map *l, const char *program) ++{ ++ if (!l->l_mach.gcs) ++ { ++ if (GLRO(dl_aarch64_gcs_policy) == 2 || !program) ++ fail (l, program); ++ if (GLRO(dl_aarch64_gcs_policy) == 1 && program) ++ GL(dl_aarch64_gcs) = 0; ++ } ++} ++ ++/* Apply GCS policy for L and its dependencies. */ ++ ++void ++_dl_gcs_check (struct link_map *l, const char *program) ++{ ++ /* GCS is disabled. */ ++ if (GL(dl_aarch64_gcs) == 0) ++ return; ++ /* GCS marking is ignored. */ ++ if (GLRO(dl_aarch64_gcs_policy) == 0) ++ return; ++ ++ check_gcs (l, program); ++ for (unsigned int i = 0; i < l->l_searchlist.r_nlist; i++) ++ check_gcs (l->l_initfini[i], program); ++} +diff --git a/sysdeps/aarch64/dl-prop.h b/sysdeps/aarch64/dl-prop.h +index df05c0211d..72ac11aec0 100644 +--- a/sysdeps/aarch64/dl-prop.h ++++ b/sysdeps/aarch64/dl-prop.h +@@ -24,16 +24,21 @@ extern void _dl_bti_protect (struct link_map *, int) attribute_hidden; + extern void _dl_bti_check (struct link_map *, const char *) + attribute_hidden; + ++extern void _dl_gcs_check (struct link_map *, const char *) ++ attribute_hidden; ++ + static inline void __attribute__ ((always_inline)) + _rtld_main_check (struct link_map *m, const char *program) + { + _dl_bti_check (m, program); ++ _dl_gcs_check (m, program); + } + + static inline void __attribute__ ((always_inline)) + _dl_open_check (struct link_map *m) + { + _dl_bti_check (m, NULL); ++ _dl_gcs_check (m, NULL); + } + + static inline void __attribute__ ((always_inline)) +@@ -45,10 +50,6 @@ static inline int + _dl_process_gnu_property (struct link_map *l, int fd, uint32_t type, + uint32_t datasz, void *data) + { +- if (!GLRO(dl_aarch64_cpu_features).bti) +- /* Skip note processing. */ +- return 0; +- + if (type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) + { + /* Stop if the property note is ill-formed. */ +@@ -57,7 +58,11 @@ _dl_process_gnu_property (struct link_map *l, int fd, uint32_t type, + + unsigned int feature_1 = *(unsigned int *) data; + if (feature_1 & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) +- _dl_bti_protect (l, fd); ++ if (GLRO(dl_aarch64_cpu_features).bti) ++ _dl_bti_protect (l, fd); ++ ++ if (feature_1 & GNU_PROPERTY_AARCH64_FEATURE_1_GCS) ++ l->l_mach.gcs = 1; + + /* Stop if we processed the property note. */ + return 0; +diff --git a/sysdeps/aarch64/linkmap.h b/sysdeps/aarch64/linkmap.h +index 56a63fc3dd..423fc0bd8e 100644 +--- a/sysdeps/aarch64/linkmap.h ++++ b/sysdeps/aarch64/linkmap.h +@@ -23,4 +23,5 @@ struct link_map_machine + ElfW(Addr) plt; /* Address of .plt */ + void *tlsdesc_table; /* Address of TLS descriptor hash table. */ + bool bti_fail; /* Failed to enable Branch Target Identification. */ ++ bool gcs; /* Guarded Control Stack marking. */ + }; +-- +2.34.1 + + +From 5017a71252bd923b764b58cd61021b028c84d361 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Thu, 28 Dec 2023 18:32:02 +0000 +Subject: [PATCH 18/21] aarch64: use l_searchlist.r_list for gcs + +Allows using the same function for static exe. +--- + sysdeps/aarch64/dl-gcs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/sysdeps/aarch64/dl-gcs.c b/sysdeps/aarch64/dl-gcs.c +index a92deb54b5..764b8a56e9 100644 +--- a/sysdeps/aarch64/dl-gcs.c ++++ b/sysdeps/aarch64/dl-gcs.c +@@ -55,5 +55,5 @@ _dl_gcs_check (struct link_map *l, const char *program) + + check_gcs (l, program); + for (unsigned int i = 0; i < l->l_searchlist.r_nlist; i++) +- check_gcs (l->l_initfini[i], program); ++ check_gcs (l->l_searchlist.r_list[i], program); + } +-- +2.34.1 + + +From 84bfdb91bded2358b2642eed3bdae3c049576eb4 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Wed, 10 Jan 2024 16:20:24 +0000 +Subject: [PATCH 19/21] aarch64: ignore GCS property of ld.so + +ldso->l_mach.gcs may not be set up, just assume ldso is GCS compatible. +--- + sysdeps/aarch64/dl-gcs.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/sysdeps/aarch64/dl-gcs.c b/sysdeps/aarch64/dl-gcs.c +index 764b8a56e9..b81aa30787 100644 +--- a/sysdeps/aarch64/dl-gcs.c ++++ b/sysdeps/aarch64/dl-gcs.c +@@ -32,6 +32,11 @@ fail (struct link_map *l, const char *program) + static void + check_gcs (struct link_map *l, const char *program) + { ++#ifdef SHARED ++ /* Ignore GCS marking on ld.so: its properties are not processed. */ ++ if (l->l_real == &GL(dl_rtld_map)) ++ return; ++#endif + if (!l->l_mach.gcs) + { + if (GLRO(dl_aarch64_gcs_policy) == 2 || !program) +-- +2.34.1 + + +From fcdce58087260a68d1a74b28e5b0146e69511f16 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Wed, 27 Dec 2023 17:17:11 +0000 +Subject: [PATCH 20/21] aarch64: process gnu properties in static exe + +--- + sysdeps/unix/sysv/linux/aarch64/libc-start.h | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/sysdeps/unix/sysv/linux/aarch64/libc-start.h b/sysdeps/unix/sysv/linux/aarch64/libc-start.h +index ccf0f8af5c..6e2e9762ee 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/libc-start.h ++++ b/sysdeps/unix/sysv/linux/aarch64/libc-start.h +@@ -21,6 +21,8 @@ + + #ifndef SHARED + ++# include <dl-prop.h> ++ + # ifndef PR_SET_SHADOW_STACK_STATUS + # define PR_GET_SHADOW_STACK_STATUS 71 + # define PR_SET_SHADOW_STACK_STATUS 72 +@@ -36,6 +38,16 @@ aarch64_libc_setup_tls (void) + { + __libc_setup_tls (); + ++ struct link_map *main_map = _dl_get_dl_main_map (); ++ const ElfW(Phdr) *phdr = GL(dl_phdr); ++ const ElfW(Phdr) *ph; ++ for (ph = phdr; ph < phdr + GL(dl_phnum); ph++) ++ if (ph->p_type == PT_GNU_PROPERTY) ++ { ++ _dl_process_pt_gnu_property (main_map, -1, ph); ++ _rtld_main_check (main_map, _dl_argv[0]); ++ break; ++ } + uint64_t s = GL(dl_aarch64_gcs); + if (s != 0) + INLINE_SYSCALL_CALL (prctl, PR_SET_SHADOW_STACK_STATUS, s, 0, 0, 0); +-- +2.34.1 + + +From bea263f87c18cc7949b556db73883a209edd27dc Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Wed, 14 Feb 2024 15:06:40 +0000 +Subject: [PATCH 21/21] doc: add plain text readme for using GCS + +--- + README | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 69 insertions(+) + +diff --git a/README b/README +index 2e360eb70a..061818d51b 100644 +--- a/README ++++ b/README +@@ -1,3 +1,72 @@ ++this branch contains experimental GCS support (not ABI stable) ++ ++source and branches ++------------------- ++ ++binutils-gdb: upstream-git users/ARM/gcs-binutils-gdb-master ++gcc (trunk): upstream-git vendors/ARM/gcs ++gcc (gcc-13): upstream-git vendors/ARM/gcs-13 ++ note: gcc vendor branches need setup https://gcc.gnu.org/gitwrite.html#vendor ++glibc: upstream-git arm/gcs ++linux: https://git.kernel.org/pub/scm/linux/kernel/git/broonie/misc.git arm64-gcs ++fvp fast model can be used for testing. ++ ++toolchain build ++--------------- ++ ++two options: ++ ++(1) branch-protect by default ++ configure gcc with --enable-standard-branch-protection ++ and build glibc normally ++ ++(2) do not branch-protect by default, require explicit cflags ++ configure gcc with ++ CFLAGS_FOR_TARGET='-O2 -mbranch-protection=standard' ++ CXXFLAGS_FOR_TARGET='-O2 -mbranch-protection=standard' ++ and configure glibc with ++ CFLAGS='-g -O2 -mbranch-protection=standard' ++ build user code with ++ CFLAGS+=-mbranch-protection=standard ++ (equivalent to -mbranch-protection=bti+pac+gcs) ++ ++linking ++------- ++ ++use ldflags: ++ ++-z experimental-gcs={always,never,implicit} ++ always: force GCS marking on ++ never: force GCS marking off ++ implicit: mark output if all inputs are marked (default) ++ ++-z experimental-gcs-report={none,warning,error} ++ none: silent (default) ++ warning: when output is marked, unmarked input is a warning ++ error: when output is marked, unmarked input is an error ++ ++runtime ++------- ++ ++run with environment var ++ ++ GLIBC_TUNABLES=glibc.cpu.aarch64_gcs=1:glibc.cpu.aarch64_gcs_policy=2 ++ ++by default both tunables are 0, the meaning is ++ ++glibc.cpu.aarch64_gcs_policy=0: ++ GCS is enabled if glibc.cpu.aarch64_gcs is set ++glibc.cpu.aarch64_gcs_policy=1: ++ GCS is enabled if glibc.cpu.aarch64_gcs is set and binary is marked ++ if GCS is enabled an incompatible dlopen is an error ++glibc.cpu.aarch64_gcs_policy=2: ++ GCS is enabled if glibc.cpu.aarch64_gcs is set ++ if GCS is enabled any incompatible binary is an error ++ ++ ++original readme ++--------------- ++ + This directory contains the sources of the GNU C Library. + See the file "version.h" for what release version you have. + +-- +2.34.1 + diff --git a/meta-arm-gcs/recipes-core/glibc/glibc_2.39.bbappend b/meta-arm-gcs/recipes-core/glibc/glibc_2.39.bbappend new file mode 100644 index 00000000..b3aba4f9 --- /dev/null +++ b/meta-arm-gcs/recipes-core/glibc/glibc_2.39.bbappend @@ -0,0 +1,3 @@ +FILESEXTRAPATHS:prepend := "${THISDIR}/files:" + +SRC_URI += "file://gcs.patch" diff --git a/meta-arm-gcs/recipes-devtools/binutils/binutils_2.42.bbappend b/meta-arm-gcs/recipes-devtools/binutils/binutils_2.42.bbappend new file mode 100644 index 00000000..b3aba4f9 --- /dev/null +++ b/meta-arm-gcs/recipes-devtools/binutils/binutils_2.42.bbappend @@ -0,0 +1,3 @@ +FILESEXTRAPATHS:prepend := "${THISDIR}/files:" + +SRC_URI += "file://gcs.patch" diff --git a/meta-arm-gcs/recipes-devtools/binutils/files/gcs.patch b/meta-arm-gcs/recipes-devtools/binutils/files/gcs.patch new file mode 100644 index 00000000..9bbfaf4e --- /dev/null +++ b/meta-arm-gcs/recipes-devtools/binutils/files/gcs.patch @@ -0,0 +1,973 @@ +From afe69c2e274db719e1835ee112150012271b62b7 Mon Sep 17 00:00:00 2001 +From: Srinath Parvathaneni <srinath.parvathaneni@arm.com> +Date: Tue, 30 Jan 2024 08:59:53 +0000 +Subject: [PATCH] aarch64: Add support for GCS in AArch64 linker. + +This patch adds support for GCS in AArch64 linker. + +This patch implements the following: +1) Defines GNU_PROPERTY_AARCH64_FEATURE_1_GCS bit for GCS in +GNU_PROPERTY_AARCH64_FEATURE_1_AND macro. + +2) Adds readelf support to read and print the GNU properties +in AArch64. + +Displaying notes found in: .note.gnu.property +[ ]+Owner[ ]+Data size[ ]+Description + GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 + Properties: AArch64 feature: GCS + +3) Adds support for -z experimental-gcs linker option and document +all the values allowed with option (-z experimental-gcs[=always|never|implicit]). +-z experimental-gcs is equivalent to -z experimental-gcs=always and +when option is not passed in the command line, it defaults to implicit. + +4) Adds support for -z experimental-gcs-report linker option and document +all the values allowed with this option (-z experimental-gcs-report[=none|warning|error]). +-z experimental-gcs-report is equivalent to -z experimental-gcs-report=none +and when option is not passed in the command line, it defaults to none. + +The ABI changes adding GNU_PROPERTY_AARCH64_FEATURE_1_GCS to the +GNU property GNU_PROPERTY_AARCH64_FEATURE_1_AND is merged into main and +can be found below. +https://github.com/ARM-software/abi-aa/blob/main/sysvabi64/sysvabi64.rst + +Upstream-Status: Pending [https://sourceware.org/git/?p=binutils-gdb.git;a=shortlog;h=refs/heads/users/ARM/gcs-binutils-gdb-master] +Signed-off-by: Ross Burton <ross.burton@arm.com> +--- + bfd/elfnn-aarch64.c | 87 +++++++++++++++++---- + bfd/elfxx-aarch64.c | 37 ++++++++- + bfd/elfxx-aarch64.h | 36 +++++++-- + binutils/readelf.c | 4 + + include/elf/common.h | 1 + + ld/emultempl/aarch64elf.em | 45 ++++++++++- + ld/testsuite/ld-aarch64/aarch64-elf.exp | 23 ++++++ + ld/testsuite/ld-aarch64/property-bti-pac1.d | 2 +- + ld/testsuite/ld-aarch64/property-bti-pac1.s | 14 ++++ + ld/testsuite/ld-aarch64/property-gcs.s | 25 ++++++ + ld/testsuite/ld-aarch64/property-gcs1.d | 6 ++ + ld/testsuite/ld-aarch64/property-gcs10.d | 6 ++ + ld/testsuite/ld-aarch64/property-gcs11.d | 11 +++ + ld/testsuite/ld-aarch64/property-gcs12.d | 11 +++ + ld/testsuite/ld-aarch64/property-gcs13.d | 11 +++ + ld/testsuite/ld-aarch64/property-gcs14.d | 11 +++ + ld/testsuite/ld-aarch64/property-gcs15.d | 11 +++ + ld/testsuite/ld-aarch64/property-gcs16.d | 11 +++ + ld/testsuite/ld-aarch64/property-gcs17.d | 11 +++ + ld/testsuite/ld-aarch64/property-gcs18.d | 11 +++ + ld/testsuite/ld-aarch64/property-gcs19.d | 6 ++ + ld/testsuite/ld-aarch64/property-gcs2.d | 11 +++ + ld/testsuite/ld-aarch64/property-gcs2.s | 33 ++++++++ + ld/testsuite/ld-aarch64/property-gcs20.d | 6 ++ + ld/testsuite/ld-aarch64/property-gcs21.d | 6 ++ + ld/testsuite/ld-aarch64/property-gcs22.d | 11 +++ + ld/testsuite/ld-aarch64/property-gcs3.d | 11 +++ + ld/testsuite/ld-aarch64/property-gcs4.d | 11 +++ + ld/testsuite/ld-aarch64/property-gcs5.d | 11 +++ + ld/testsuite/ld-aarch64/property-gcs6.d | 12 +++ + ld/testsuite/ld-aarch64/property-gcs7.d | 6 ++ + ld/testsuite/ld-aarch64/property-gcs8.d | 11 +++ + ld/testsuite/ld-aarch64/property-gcs9.d | 12 +++ + 33 files changed, 495 insertions(+), 26 deletions(-) + create mode 100644 ld/testsuite/ld-aarch64/property-gcs.s + create mode 100644 ld/testsuite/ld-aarch64/property-gcs1.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs10.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs11.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs12.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs13.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs14.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs15.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs16.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs17.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs18.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs19.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs2.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs2.s + create mode 100644 ld/testsuite/ld-aarch64/property-gcs20.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs21.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs22.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs3.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs4.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs5.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs6.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs7.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs8.d + create mode 100644 ld/testsuite/ld-aarch64/property-gcs9.d + +diff --git a/bfd/elfnn-aarch64.c b/bfd/elfnn-aarch64.c +index 109517db4aa..428f2c3507d 100644 +--- a/bfd/elfnn-aarch64.c ++++ b/bfd/elfnn-aarch64.c +@@ -2546,6 +2546,12 @@ struct elf_aarch64_obj_tdata + GNU_PROPERTY_AARCH64_FEATURE_1_BTI. */ + int no_bti_warn; + ++ /* Mark ouput with GCS based on -z experimental-gcs. */ ++ aarch64_gcs_type gcs_type; ++ /* Report linker warning/error for -z experimental-gcs-report based on ++ -z experimental-gcs. */ ++ aarch64_gcs_report gcs_report; ++ + /* PLT type based on security. */ + aarch64_plt_type plt_type; + }; +@@ -5011,7 +5017,7 @@ bfd_elfNN_aarch64_set_options (struct bfd *output_bfd, + int fix_erratum_835769, + erratum_84319_opts fix_erratum_843419, + int no_apply_dynamic_relocs, +- aarch64_bti_pac_info bp_info) ++ aarch64_gnu_prop_info bp_info) + { + struct elf_aarch64_link_hash_table *globals; + +@@ -5039,6 +5045,24 @@ bfd_elfNN_aarch64_set_options (struct bfd *output_bfd, + default: + break; + } ++ ++ switch (bp_info.gcs_type) ++ { ++ case GCS_ALWAYS: ++ elf_aarch64_tdata (output_bfd)->gnu_and_prop ++ |= GNU_PROPERTY_AARCH64_FEATURE_1_GCS; ++ break; ++ case GCS_NEVER: ++ elf_aarch64_tdata (output_bfd)->gnu_and_prop ++ &= ~GNU_PROPERTY_AARCH64_FEATURE_1_GCS; ++ break; ++ ++ default: ++ break; ++ } ++ ++ elf_aarch64_tdata (output_bfd)->gcs_type = bp_info.gcs_type; ++ elf_aarch64_tdata (output_bfd)->gcs_report = bp_info.gcs_report; + elf_aarch64_tdata (output_bfd)->plt_type = bp_info.plt_type; + setup_plt_values (link_info, bp_info.plt_type); + } +@@ -10196,7 +10220,12 @@ static bfd * + elfNN_aarch64_link_setup_gnu_properties (struct bfd_link_info *info) + { + uint32_t prop = elf_aarch64_tdata (info->output_bfd)->gnu_and_prop; +- bfd *pbfd = _bfd_aarch64_elf_link_setup_gnu_properties (info, &prop); ++ aarch64_gcs_report gcs_report ++ = elf_aarch64_tdata (info->output_bfd)->gcs_report; ++ aarch64_gcs_report gcs_type ++ = elf_aarch64_tdata (info->output_bfd)->gcs_type; ++ bfd *pbfd = _bfd_aarch64_elf_link_setup_gnu_properties (info, &prop, ++ gcs_report, gcs_type); + elf_aarch64_tdata (info->output_bfd)->gnu_and_prop = prop; + elf_aarch64_tdata (info->output_bfd)->plt_type + |= (prop & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) ? PLT_BTI : 0; +@@ -10215,30 +10244,54 @@ elfNN_aarch64_merge_gnu_properties (struct bfd_link_info *info, + { + uint32_t prop + = elf_aarch64_tdata (info->output_bfd)->gnu_and_prop; ++ aarch64_gcs_report gcs_report ++ = elf_aarch64_tdata (info->output_bfd)->gcs_report; ++ aarch64_gcs_type gcs_type ++ = elf_aarch64_tdata (info->output_bfd)->gcs_type; + +- /* If output has been marked with BTI using command line argument, give out +- warning if necessary. */ + /* Properties are merged per type, hence only check for warnings when merging + GNU_PROPERTY_AARCH64_FEATURE_1_AND. */ +- if (((aprop && aprop->pr_type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) ++ if ((aprop && aprop->pr_type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) + || (bprop && bprop->pr_type == GNU_PROPERTY_AARCH64_FEATURE_1_AND)) +- && (prop & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) +- && (!elf_aarch64_tdata (info->output_bfd)->no_bti_warn)) + { +- if ((aprop && !(aprop->u.number & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)) +- || !aprop) ++ /* If output has been marked with BTI using command line argument, give ++ out warning if necessary. */ ++ if ((prop & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) ++ && (!elf_aarch64_tdata (info->output_bfd)->no_bti_warn)) + { +- _bfd_error_handler (_("%pB: warning: BTI turned on by -z force-bti when " +- "all inputs do not have BTI in NOTE section."), +- abfd); ++ if ((aprop && !(aprop->u.number & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)) ++ || !aprop) ++ { ++ _bfd_error_handler (_("%pB: warning: BTI turned on by -z " ++ "force-bti when all inputs do not have BTI " ++ "in NOTE section."), abfd); ++ } ++ if ((bprop && !(bprop->u.number & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)) ++ || !bprop) ++ { ++ _bfd_error_handler (_("%pB: warning: BTI turned on by -z " ++ "force-bti when all inputs do not have BTI " ++ "in NOTE section."), bbfd); ++ } + } +- if ((bprop && !(bprop->u.number & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)) +- || !bprop) ++ ++ /* If output has been marked with GCS using -z experimental-gcs and input ++ is missing GCS marking throw warning/error on ++ -z experimental-gcs-report=warning/error. */ ++ if ((prop & GNU_PROPERTY_AARCH64_FEATURE_1_GCS) && gcs_report != GCS_NONE) + { +- _bfd_error_handler (_("%pB: warning: BTI turned on by -z force-bti when " +- "all inputs do not have BTI in NOTE section."), +- bbfd); ++ if ((aprop && !(aprop->u.number & GNU_PROPERTY_AARCH64_FEATURE_1_GCS)) ++ || !aprop) ++ _bfd_aarch64_elf_check_gcs_report (gcs_report, abfd); ++ if ((bprop && !(bprop->u.number & GNU_PROPERTY_AARCH64_FEATURE_1_GCS)) ++ || !bprop) ++ _bfd_aarch64_elf_check_gcs_report (gcs_report, bbfd); + } ++ ++ if (gcs_type == GCS_NEVER && aprop != NULL) ++ aprop->u.number &= ~GNU_PROPERTY_AARCH64_FEATURE_1_GCS; ++ if (gcs_type == GCS_NEVER && bprop != NULL) ++ bprop->u.number &= ~GNU_PROPERTY_AARCH64_FEATURE_1_GCS; + } + + return _bfd_aarch64_elf_merge_gnu_properties (info, abfd, aprop, +diff --git a/bfd/elfxx-aarch64.c b/bfd/elfxx-aarch64.c +index d1279adc2e4..dd64f2067ac 100644 +--- a/bfd/elfxx-aarch64.c ++++ b/bfd/elfxx-aarch64.c +@@ -702,7 +702,9 @@ _bfd_aarch64_elf_write_core_note (bfd *abfd, char *buf, int *bufsiz, int note_ty + GPROP accordingly. */ + bfd * + _bfd_aarch64_elf_link_setup_gnu_properties (struct bfd_link_info *info, +- uint32_t *gprop) ++ uint32_t *gprop, ++ aarch64_gcs_report gcs_report, ++ aarch64_gcs_type gcs_type) + { + asection *sec; + bfd *pbfd; +@@ -738,6 +740,11 @@ _bfd_aarch64_elf_link_setup_gnu_properties (struct bfd_link_info *info, + _bfd_error_handler (_("%pB: warning: BTI turned on by -z force-bti " + "when all inputs do not have BTI in NOTE " + "section."), ebfd); ++ ++ if ((gnu_prop & GNU_PROPERTY_AARCH64_FEATURE_1_GCS) ++ && !(prop->u.number & GNU_PROPERTY_AARCH64_FEATURE_1_GCS)) ++ _bfd_aarch64_elf_check_gcs_report (gcs_report, ebfd); ++ + prop->u.number |= gnu_prop; + prop->pr_kind = property_number; + +@@ -765,6 +772,14 @@ _bfd_aarch64_elf_link_setup_gnu_properties (struct bfd_link_info *info, + elf_section_type (sec) = SHT_NOTE; + } + } ++ else if (ebfd != NULL && gcs_type == GCS_NEVER) ++ { ++ prop = _bfd_elf_get_property (ebfd, GNU_PROPERTY_AARCH64_FEATURE_1_AND, ++ 4); ++ prop->u.number &= ~GNU_PROPERTY_AARCH64_FEATURE_1_GCS; ++ if (prop->u.number == 0) ++ prop->pr_kind = property_remove; ++ } + + pbfd = _bfd_elf_link_setup_gnu_properties (info); + +@@ -785,7 +800,8 @@ _bfd_aarch64_elf_link_setup_gnu_properties (struct bfd_link_info *info, + { + gnu_prop = (p->property.u.number + & (GNU_PROPERTY_AARCH64_FEATURE_1_PAC +- | GNU_PROPERTY_AARCH64_FEATURE_1_BTI)); ++ | GNU_PROPERTY_AARCH64_FEATURE_1_BTI ++ | GNU_PROPERTY_AARCH64_FEATURE_1_GCS)); + break; + } + else if (GNU_PROPERTY_AARCH64_FEATURE_1_AND < p->property.pr_type) +@@ -922,3 +938,20 @@ _bfd_aarch64_elf_link_fixup_gnu_properties + } + } + } ++ ++/* Check AArch64 GCS report. */ ++void ++_bfd_aarch64_elf_check_gcs_report (aarch64_gcs_report gcs_report, bfd *ebfd) ++{ ++ if (gcs_report == GCS_WARN) ++ _bfd_error_handler (_("%pB: warning: GCS turned on by -z experimental-gcs " ++ "on the output when all inputs do not have GCS in NOTE " ++ "section."), ebfd); ++ else if (gcs_report == GCS_ERROR) ++ { ++ _bfd_error_handler (_("%pB: error: GCS turned on by -z experimental-gcs " ++ "on the output when all inputs do not have GCS in " ++ "NOTE section."), ebfd); ++ _exit (EXIT_FAILURE); ++ } ++} +diff --git a/bfd/elfxx-aarch64.h b/bfd/elfxx-aarch64.h +index 6c084f75796..ca523d81df1 100644 +--- a/bfd/elfxx-aarch64.h ++++ b/bfd/elfxx-aarch64.h +@@ -46,6 +46,27 @@ typedef enum + BTI_WARN = 1, /* BTI is enabled with -z force-bti. */ + } aarch64_enable_bti_type; + ++/* To indicate whether GNU_PROPERTY_AARCH64_FEATURE_1_GCS bit is ++ enabled/disabled on the output when -z experimental-gcs linker ++ command line option is passed. */ ++typedef enum ++{ ++ GCS_NEVER = 0, /* gcs is disabled on output. */ ++ GCS_IMPLICIT = 1, /* gcs is deduced from input object. */ ++ GCS_ALWAYS = 2, /* gsc is enabled on output. */ ++} aarch64_gcs_type; ++ ++/* To indicate whether to generate linker warning/errors for ++ -z experimental-gcs-report when -z experimental-gcs=always is passed. */ ++typedef enum ++{ ++ GCS_NONE = 0, /* Does not emit any warning/error messages. */ ++ GCS_WARN = 1, /* Emit warning when the input objects are missing gcs ++ markings and output have gcs marking. */ ++ GCS_ERROR = 2, /* Emit error when the input objects are missing gcs ++ markings and output have gcs marking. */ ++} aarch64_gcs_report; ++ + /* A structure to encompass all information coming from BTI or PAC + related command line options. This involves the "PLT_TYPE" to determine + which version of PLTs to pick and "BTI_TYPE" to determine if +@@ -54,7 +75,9 @@ typedef struct + { + aarch64_plt_type plt_type; + aarch64_enable_bti_type bti_type; +-} aarch64_bti_pac_info; ++ aarch64_gcs_type gcs_type; ++ aarch64_gcs_report gcs_report; ++} aarch64_gnu_prop_info; + + /* An enum to define what kind of erratum fixes we should apply. This gives the + user a bit more control over the sequences we generate. */ +@@ -67,11 +90,11 @@ typedef enum + + extern void bfd_elf64_aarch64_set_options + (bfd *, struct bfd_link_info *, int, int, int, int, erratum_84319_opts, int, +- aarch64_bti_pac_info); ++ aarch64_gnu_prop_info); + + extern void bfd_elf32_aarch64_set_options + (bfd *, struct bfd_link_info *, int, int, int, int, erratum_84319_opts, int, +- aarch64_bti_pac_info); ++ aarch64_gnu_prop_info); + + /* AArch64 stub generation support for ELF64. Called from the linker. */ + extern int elf64_aarch64_setup_section_lists +@@ -135,8 +158,9 @@ _bfd_aarch64_elf_write_core_note (bfd *, char *, int *, int, ...); + #define elf_backend_write_core_note _bfd_aarch64_elf_write_core_note + + extern bfd * +-_bfd_aarch64_elf_link_setup_gnu_properties (struct bfd_link_info *, +- uint32_t *); ++_bfd_aarch64_elf_link_setup_gnu_properties (struct bfd_link_info *, uint32_t *, ++ aarch64_gcs_report, ++ aarch64_gcs_type); + + extern enum elf_property_kind + _bfd_aarch64_elf_parse_gnu_properties (bfd *, unsigned int, +@@ -146,6 +170,8 @@ extern bool + _bfd_aarch64_elf_merge_gnu_properties (struct bfd_link_info *, bfd *, + elf_property *, elf_property *, + uint32_t); ++extern void ++_bfd_aarch64_elf_check_gcs_report (aarch64_gcs_report, bfd *); + + extern void + _bfd_aarch64_elf_link_fixup_gnu_properties (struct bfd_link_info *, +diff --git a/binutils/readelf.c b/binutils/readelf.c +index 5e4ad6ea6ad..794cbb77a9c 100644 +--- a/binutils/readelf.c ++++ b/binutils/readelf.c +@@ -20636,6 +20636,10 @@ decode_aarch64_feature_1_and (unsigned int bitmask) + printf ("PAC"); + break; + ++ case GNU_PROPERTY_AARCH64_FEATURE_1_GCS: ++ printf ("GCS"); ++ break; ++ + default: + printf (_("<unknown: %x>"), bit); + break; +diff --git a/include/elf/common.h b/include/elf/common.h +index 6a66456cd22..289b8821b7d 100644 +--- a/include/elf/common.h ++++ b/include/elf/common.h +@@ -1001,6 +1001,7 @@ + + #define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) + #define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1) ++#define GNU_PROPERTY_AARCH64_FEATURE_1_GCS (1U << 2) + + /* Values used in GNU .note.ABI-tag notes (NT_GNU_ABI_TAG). */ + #define GNU_ABI_TAG_LINUX 0 +diff --git a/ld/emultempl/aarch64elf.em b/ld/emultempl/aarch64elf.em +index b647909ae63..fb331e06553 100644 +--- a/ld/emultempl/aarch64elf.em ++++ b/ld/emultempl/aarch64elf.em +@@ -36,6 +36,12 @@ static erratum_84319_opts fix_erratum_843419 = ERRAT_NONE; + static int no_apply_dynamic_relocs = 0; + static aarch64_plt_type plt_type = PLT_NORMAL; + static aarch64_enable_bti_type bti_type = BTI_NONE; ++static aarch64_gcs_type gcs_type = GCS_IMPLICIT; ++static aarch64_gcs_report gcs_report = GCS_NONE; ++static const char * egr = "experimental-gcs-report"; ++static const char * eg = "experimental-gcs"; ++#define EGR_LEN strlen (egr) ++#define EG_LEN strlen (eg) + + static void + gld${EMULATION_NAME}_before_parse (void) +@@ -321,9 +327,11 @@ aarch64_elf_create_output_section_statements (void) + return; + } + +- aarch64_bti_pac_info bp_info; ++ aarch64_gnu_prop_info bp_info; + bp_info.plt_type = plt_type; + bp_info.bti_type = bti_type; ++ bp_info.gcs_type = gcs_type; ++ bp_info.gcs_report = gcs_report; + + bfd_elf${ELFSIZE}_aarch64_set_options (link_info.output_bfd, &link_info, + no_enum_size_warning, +@@ -408,6 +416,19 @@ PARSE_AND_LIST_OPTIONS=' + fprintf (file, _(" --no-apply-dynamic-relocs Do not apply link-time values for dynamic relocations\n")); + fprintf (file, _(" -z force-bti Turn on Branch Target Identification mechanism and generate PLTs with BTI. Generate warnings for missing BTI on inputs\n")); + fprintf (file, _(" -z pac-plt Protect PLTs with Pointer Authentication.\n")); ++ fprintf (file, _("\ ++ -z experimental-gcs[=always|never|implicit] Turn on Guarded Control Stack(gcs) mechanism on the output.\n\ ++ implicit(default): deduce gcs from input objects.\n\ ++ always: always marks the output with gcs.\n\ ++ never: never marks the output with gcs.\n")); ++ fprintf (file, _("\ ++ -z experimental-gcs-report[=none|warning|error] Emit warning/error on mismatch of gcs marking between input objects and ouput.\n\ ++ none (default): Does not emit any warning/error messages.\n\ ++ warning: Emit warning when the input objects are missing gcs markings\n\ ++ and output have gcs marking.\n\ ++ error: Emit error when the input objects are missing gcs markings\n\ ++ and output have gcs marking.\n")); ++ + ' + + PARSE_AND_LIST_ARGS_CASE_Z_AARCH64=' +@@ -418,6 +439,28 @@ PARSE_AND_LIST_ARGS_CASE_Z_AARCH64=' + } + else if (strcmp (optarg, "pac-plt") == 0) + plt_type |= PLT_PAC; ++ else if (strncmp (optarg, egr, EGR_LEN) == 0) ++ { ++ if (strlen (optarg) == EGR_LEN || strcmp (optarg + EGR_LEN, "=none") == 0) ++ gcs_report = GCS_NONE; ++ else if (strcmp (optarg + EGR_LEN, "=warning") == 0) ++ gcs_report = GCS_WARN; ++ else if (strcmp (optarg + EGR_LEN, "=error") == 0) ++ gcs_report = GCS_ERROR; ++ else ++ einfo (_("%P: error: unrecognized: `%s'\''\n"), optarg); ++ } ++ else if (strncmp (optarg, eg, EG_LEN) == 0) ++ { ++ if (strlen (optarg) == EG_LEN || strcmp (optarg + EG_LEN, "=always") == 0) ++ gcs_type = GCS_ALWAYS; ++ else if (strcmp (optarg + EG_LEN, "=never") == 0) ++ gcs_type = GCS_NEVER; ++ else if (strcmp (optarg + EG_LEN, "=implicit") == 0) ++ gcs_type = GCS_IMPLICIT; ++ else ++ einfo (_("%P: error: unrecognized: `%s'\''\n"), optarg); ++ } + ' + PARSE_AND_LIST_ARGS_CASE_Z="$PARSE_AND_LIST_ARGS_CASE_Z $PARSE_AND_LIST_ARGS_CASE_Z_AARCH64" + +diff --git a/ld/testsuite/ld-aarch64/aarch64-elf.exp b/ld/testsuite/ld-aarch64/aarch64-elf.exp +index 9ce61579e6c..31abc5a07d8 100644 +--- a/ld/testsuite/ld-aarch64/aarch64-elf.exp ++++ b/ld/testsuite/ld-aarch64/aarch64-elf.exp +@@ -471,3 +471,26 @@ run_dump_test_lp64 "bti-far-3" + if { ![skip_sframe_tests] } { + run_dump_test "sframe-simple-1" + } ++ ++run_dump_test "property-gcs1" ++run_dump_test "property-gcs2" ++run_dump_test "property-gcs3" ++run_dump_test "property-gcs4" ++run_dump_test "property-gcs5" ++run_dump_test "property-gcs6" ++run_dump_test "property-gcs7" ++run_dump_test "property-gcs8" ++run_dump_test "property-gcs9" ++run_dump_test "property-gcs10" ++run_dump_test "property-gcs11" ++run_dump_test "property-gcs12" ++run_dump_test "property-gcs13" ++run_dump_test "property-gcs14" ++run_dump_test "property-gcs15" ++run_dump_test "property-gcs16" ++run_dump_test "property-gcs17" ++run_dump_test "property-gcs18" ++run_dump_test "property-gcs19" ++run_dump_test "property-gcs20" ++run_dump_test "property-gcs21" ++run_dump_test "property-gcs22" +diff --git a/ld/testsuite/ld-aarch64/property-bti-pac1.d b/ld/testsuite/ld-aarch64/property-bti-pac1.d +index 59fa695165a..c28a0cbf850 100644 +--- a/ld/testsuite/ld-aarch64/property-bti-pac1.d ++++ b/ld/testsuite/ld-aarch64/property-bti-pac1.d +@@ -8,4 +8,4 @@ + Displaying notes found in: .note.gnu.property + [ ]+Owner[ ]+Data size[ ]+Description + GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 +- Properties: AArch64 feature: BTI, PAC ++ Properties: AArch64 feature: BTI, PAC, GCS +diff --git a/ld/testsuite/ld-aarch64/property-bti-pac1.s b/ld/testsuite/ld-aarch64/property-bti-pac1.s +index 414c9277f1d..42156917d58 100644 +--- a/ld/testsuite/ld-aarch64/property-bti-pac1.s ++++ b/ld/testsuite/ld-aarch64/property-bti-pac1.s +@@ -12,6 +12,20 @@ _start: + .long 5f - 2f /* data length */ + .long 5 /* note type */ + 0: .asciz "GNU" /* vendor name */ ++1: ++ .p2align 3 ++2: .long 0xc0000000 /* pr_type. */ ++ .long 4f - 3f /* pr_datasz. */ ++3: ++ .long 0x4 /* GCS. */ ++4: ++ .p2align 3 ++5: ++ .p2align 3 ++ .long 1f - 0f /* name length */ ++ .long 5f - 2f /* data length */ ++ .long 5 /* note type */ ++0: .asciz "GNU" /* vendor name */ + 1: + .p2align 3 + 2: .long 0xc0000000 /* pr_type. */ +diff --git a/ld/testsuite/ld-aarch64/property-gcs.s b/ld/testsuite/ld-aarch64/property-gcs.s +new file mode 100644 +index 00000000000..bc7e66e8933 +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs.s +@@ -0,0 +1,25 @@ ++ .text ++ .globl _start ++ .type _start,@function ++_start: ++ mov x1, #2 ++.ifndef __mult__ ++ bl foo ++.endif ++.ifdef __property_gcs__ ++ .section ".note.gnu.property", "a" ++ .p2align 3 ++ .long 1f - 0f /* name length */ ++ .long 5f - 2f /* data length */ ++ .long 5 /* note type */ ++0: .asciz "GNU" /* vendor name */ ++1: ++ .p2align 3 ++2: .long 0xc0000000 /* pr_type. */ ++ .long 4f - 3f /* pr_datasz. */ ++3: ++ .long 0x4 /* GCS. */ ++4: ++ .p2align 3 ++5: ++.endif +diff --git a/ld/testsuite/ld-aarch64/property-gcs1.d b/ld/testsuite/ld-aarch64/property-gcs1.d +new file mode 100644 +index 00000000000..c724ac56ca3 +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs1.d +@@ -0,0 +1,6 @@ ++#name: GNU Property (input without gcs) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 ++#ld: -shared ++#readelf: -n +diff --git a/ld/testsuite/ld-aarch64/property-gcs10.d b/ld/testsuite/ld-aarch64/property-gcs10.d +new file mode 100644 +index 00000000000..4b6deedc0c2 +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs10.d +@@ -0,0 +1,6 @@ ++#name: GNU Property (input without gcs ouput forced with experimental-gcs=always experimental-gcs-report=error) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 ++#ld: -z experimental-gcs=always -z experimental-gcs-report=error ++#error: .*property-gcs.*: error: GCS turned on by -z experimental-gcs on the output when all inputs do not have GCS in NOTE section. +diff --git a/ld/testsuite/ld-aarch64/property-gcs11.d b/ld/testsuite/ld-aarch64/property-gcs11.d +new file mode 100644 +index 00000000000..8abacf28eb1 +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs11.d +@@ -0,0 +1,11 @@ ++#name: GNU Property (input with gcs output forced with experimental-gcs) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1 ++#ld: -z experimental-gcs ++#readelf: -n ++ ++Displaying notes found in: .note.gnu.property ++[ ]+Owner[ ]+Data size[ ]+Description ++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 ++ Properties: AArch64 feature: GCS +diff --git a/ld/testsuite/ld-aarch64/property-gcs12.d b/ld/testsuite/ld-aarch64/property-gcs12.d +new file mode 100644 +index 00000000000..0fe246dfa3a +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs12.d +@@ -0,0 +1,11 @@ ++#name: GNU Property (input with gcs ouput forced with experimental-gcs=always) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1 ++#ld: -z experimental-gcs=always ++#readelf: -n ++ ++Displaying notes found in: .note.gnu.property ++[ ]+Owner[ ]+Data size[ ]+Description ++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 ++ Properties: AArch64 feature: GCS +diff --git a/ld/testsuite/ld-aarch64/property-gcs13.d b/ld/testsuite/ld-aarch64/property-gcs13.d +new file mode 100644 +index 00000000000..c6077aeaa5a +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs13.d +@@ -0,0 +1,11 @@ ++#name: GNU Property (input with gcs ouput forced with experimental-gcs experimental-gcs-report=none) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1 ++#ld: -z experimental-gcs -z experimental-gcs-report=none ++#readelf: -n ++ ++Displaying notes found in: .note.gnu.property ++[ ]+Owner[ ]+Data size[ ]+Description ++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 ++ Properties: AArch64 feature: GCS +diff --git a/ld/testsuite/ld-aarch64/property-gcs14.d b/ld/testsuite/ld-aarch64/property-gcs14.d +new file mode 100644 +index 00000000000..0f7490ef4a5 +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs14.d +@@ -0,0 +1,11 @@ ++#name: GNU Property (input with gcs ouput forced with experimental-gcs experimental-gcs-report=warning) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1 ++#ld: -z experimental-gcs -z experimental-gcs-report=warning ++#readelf: -n ++ ++Displaying notes found in: .note.gnu.property ++[ ]+Owner[ ]+Data size[ ]+Description ++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 ++ Properties: AArch64 feature: GCS +diff --git a/ld/testsuite/ld-aarch64/property-gcs15.d b/ld/testsuite/ld-aarch64/property-gcs15.d +new file mode 100644 +index 00000000000..d1e723e0ea6 +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs15.d +@@ -0,0 +1,11 @@ ++#name: GNU Property (input with gcs ouput forced with experimental-gcs experimental-gcs-report=error) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1 ++#ld: -z experimental-gcs -z experimental-gcs-report=error ++#readelf: -n ++ ++Displaying notes found in: .note.gnu.property ++[ ]+Owner[ ]+Data size[ ]+Description ++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 ++ Properties: AArch64 feature: GCS +diff --git a/ld/testsuite/ld-aarch64/property-gcs16.d b/ld/testsuite/ld-aarch64/property-gcs16.d +new file mode 100644 +index 00000000000..340577f1758 +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs16.d +@@ -0,0 +1,11 @@ ++#name: GNU Property (input with gcs ouput forced with experimental-gcs=always experimental-gcs-report=none) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1 ++#ld: -z experimental-gcs=always -z experimental-gcs-report=none ++#readelf: -n ++ ++Displaying notes found in: .note.gnu.property ++[ ]+Owner[ ]+Data size[ ]+Description ++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 ++ Properties: AArch64 feature: GCS +diff --git a/ld/testsuite/ld-aarch64/property-gcs17.d b/ld/testsuite/ld-aarch64/property-gcs17.d +new file mode 100644 +index 00000000000..4ba9583ee92 +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs17.d +@@ -0,0 +1,11 @@ ++#name: GNU Property (input with gcs ouput forced with experimental-gcs=always experimental-gcs-report=warning) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1 ++#ld: -z experimental-gcs=always -z experimental-gcs-report=warning ++#readelf: -n ++ ++Displaying notes found in: .note.gnu.property ++[ ]+Owner[ ]+Data size[ ]+Description ++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 ++ Properties: AArch64 feature: GCS +diff --git a/ld/testsuite/ld-aarch64/property-gcs18.d b/ld/testsuite/ld-aarch64/property-gcs18.d +new file mode 100644 +index 00000000000..f71c10e2523 +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs18.d +@@ -0,0 +1,11 @@ ++#name: GNU Property (input with gcs ouput forced with experimental-gcs=always experimental-gcs-report=error) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1 ++#ld: -z experimental-gcs=always -z experimental-gcs-report=error ++#readelf: -n ++ ++Displaying notes found in: .note.gnu.property ++[ ]+Owner[ ]+Data size[ ]+Description ++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 ++ Properties: AArch64 feature: GCS +diff --git a/ld/testsuite/ld-aarch64/property-gcs19.d b/ld/testsuite/ld-aarch64/property-gcs19.d +new file mode 100644 +index 00000000000..468f96edcf1 +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs19.d +@@ -0,0 +1,6 @@ ++#name: GNU Property (input without gcs output forced with experimental-gcs=never) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 ++#ld: -z experimental-gcs=never ++#readelf: -n +diff --git a/ld/testsuite/ld-aarch64/property-gcs2.d b/ld/testsuite/ld-aarch64/property-gcs2.d +new file mode 100644 +index 00000000000..ed545a180b3 +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs2.d +@@ -0,0 +1,11 @@ ++#name: GNU Property (input with gcs) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1 ++#ld: -shared ++#readelf: -n ++ ++Displaying notes found in: .note.gnu.property ++[ ]+Owner[ ]+Data size[ ]+Description ++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 ++ Properties: AArch64 feature: GCS +diff --git a/ld/testsuite/ld-aarch64/property-gcs2.s b/ld/testsuite/ld-aarch64/property-gcs2.s +new file mode 100644 +index 00000000000..6db7d8396c8 +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs2.s +@@ -0,0 +1,33 @@ ++ .text ++ .global foo ++ .type foo, %function ++foo: ++ sub sp, sp, #16 ++ mov w0, 9 ++ str w0, [sp, 12] ++ ldr w0, [sp, 12] ++ add w0, w0, 4 ++ str w0, [sp, 12] ++ nop ++ add sp, sp, 16 ++ ret ++ .size foo, .-foo ++ .global bar ++ .type bar, %function ++.ifdef __property_gcs__ ++ .section ".note.gnu.property", "a" ++ .p2align 3 ++ .long 1f - 0f /* name length */ ++ .long 5f - 2f /* data length */ ++ .long 5 /* note type */ ++0: .asciz "GNU" /* vendor name */ ++1: ++ .p2align 3 ++2: .long 0xc0000000 /* pr_type. */ ++ .long 4f - 3f /* pr_datasz. */ ++3: ++ .long 0x4 /* GCS. */ ++4: ++ .p2align 3 ++5: ++.endif +diff --git a/ld/testsuite/ld-aarch64/property-gcs20.d b/ld/testsuite/ld-aarch64/property-gcs20.d +new file mode 100644 +index 00000000000..2bdff88a27a +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs20.d +@@ -0,0 +1,6 @@ ++#name: GNU Property (input without gcs output forced with experimental-gcs=implicit) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 ++#ld: -z experimental-gcs=implicit ++#readelf: -n +diff --git a/ld/testsuite/ld-aarch64/property-gcs21.d b/ld/testsuite/ld-aarch64/property-gcs21.d +new file mode 100644 +index 00000000000..b42b11d14ea +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs21.d +@@ -0,0 +1,6 @@ ++#name: GNU Property (input with gcs output forced with experimental-gcs=never) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1 ++#ld: -z experimental-gcs=never ++#readelf: -n +diff --git a/ld/testsuite/ld-aarch64/property-gcs22.d b/ld/testsuite/ld-aarch64/property-gcs22.d +new file mode 100644 +index 00000000000..431fc1ed35b +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs22.d +@@ -0,0 +1,11 @@ ++#name: GNU Property (input with gcs output forced with experimental-gcs=implicit) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1 ++#ld: -z experimental-gcs=implicit ++#readelf: -n ++ ++Displaying notes found in: .note.gnu.property ++[ ]+Owner[ ]+Data size[ ]+Description ++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 ++ Properties: AArch64 feature: GCS +diff --git a/ld/testsuite/ld-aarch64/property-gcs3.d b/ld/testsuite/ld-aarch64/property-gcs3.d +new file mode 100644 +index 00000000000..68d50be0823 +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs3.d +@@ -0,0 +1,11 @@ ++#name: GNU Property (input without gcs output forced with experimental-gcs) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 ++#ld: -z experimental-gcs ++#readelf: -n ++ ++Displaying notes found in: .note.gnu.property ++[ ]+Owner[ ]+Data size[ ]+Description ++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 ++ Properties: AArch64 feature: GCS +diff --git a/ld/testsuite/ld-aarch64/property-gcs4.d b/ld/testsuite/ld-aarch64/property-gcs4.d +new file mode 100644 +index 00000000000..cd5711e3da3 +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs4.d +@@ -0,0 +1,11 @@ ++#name: GNU Property (input without gcs ouput forced with experimental-gcs=always) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 ++#ld: -z experimental-gcs=always ++#readelf: -n ++ ++Displaying notes found in: .note.gnu.property ++[ ]+Owner[ ]+Data size[ ]+Description ++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 ++ Properties: AArch64 feature: GCS +diff --git a/ld/testsuite/ld-aarch64/property-gcs5.d b/ld/testsuite/ld-aarch64/property-gcs5.d +new file mode 100644 +index 00000000000..b7a751c0276 +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs5.d +@@ -0,0 +1,11 @@ ++#name: GNU Property (input without gcs ouput forced with experimental-gcs experimental-gcs-report=none) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 ++#ld: -z experimental-gcs -z experimental-gcs-report=none ++#readelf: -n ++ ++Displaying notes found in: .note.gnu.property ++[ ]+Owner[ ]+Data size[ ]+Description ++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 ++ Properties: AArch64 feature: GCS +diff --git a/ld/testsuite/ld-aarch64/property-gcs6.d b/ld/testsuite/ld-aarch64/property-gcs6.d +new file mode 100644 +index 00000000000..5abf8126d89 +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs6.d +@@ -0,0 +1,12 @@ ++#name: GNU Property (input without gcs ouput forced with experimental-gcs experimental-gcs-report=warning) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 ++#ld: -z experimental-gcs -z experimental-gcs-report=warning ++#readelf: -n ++#warning: .*property-gcs.*: warning: GCS turned on by -z experimental-gcs on the output when all inputs do not have GCS in NOTE section. ++ ++Displaying notes found in: .note.gnu.property ++[ ]+Owner[ ]+Data size[ ]+Description ++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 ++ Properties: AArch64 feature: GCS +diff --git a/ld/testsuite/ld-aarch64/property-gcs7.d b/ld/testsuite/ld-aarch64/property-gcs7.d +new file mode 100644 +index 00000000000..4df5693a27b +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs7.d +@@ -0,0 +1,6 @@ ++#name: GNU Property (input without gcs ouput forced with experimental-gcs experimental-gcs-report=error) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 ++#ld: -z experimental-gcs -z experimental-gcs-report=error ++#error: .*property-gcs.*: error: GCS turned on by -z experimental-gcs on the output when all inputs do not have GCS in NOTE section. +diff --git a/ld/testsuite/ld-aarch64/property-gcs8.d b/ld/testsuite/ld-aarch64/property-gcs8.d +new file mode 100644 +index 00000000000..463c3ad4197 +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs8.d +@@ -0,0 +1,11 @@ ++#name: GNU Property (input without gcs ouput forced with experimental-gcs=always experimental-gcs-report=none) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 ++#ld: -z experimental-gcs=always -z experimental-gcs-report=none ++#readelf: -n ++ ++Displaying notes found in: .note.gnu.property ++[ ]+Owner[ ]+Data size[ ]+Description ++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 ++ Properties: AArch64 feature: GCS +diff --git a/ld/testsuite/ld-aarch64/property-gcs9.d b/ld/testsuite/ld-aarch64/property-gcs9.d +new file mode 100644 +index 00000000000..c3083675c8f +--- /dev/null ++++ b/ld/testsuite/ld-aarch64/property-gcs9.d +@@ -0,0 +1,12 @@ ++#name: GNU Property (input without gcs ouput forced with experimental-gcs=always experimental-gcs-report=warning) ++#source: property-gcs.s ++#alltargets: [check_shared_lib_support] *linux* ++#as: -march=armv9.4-a+gcs -defsym __mult__=0 ++#ld: -z experimental-gcs=always -z experimental-gcs-report=warning ++#readelf: -n ++#warning: .*property-gcs.*: warning: GCS turned on by -z experimental-gcs on the output when all inputs do not have GCS in NOTE section. ++ ++Displaying notes found in: .note.gnu.property ++[ ]+Owner[ ]+Data size[ ]+Description ++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 ++ Properties: AArch64 feature: GCS +-- +2.34.1 + diff --git a/meta-arm-gcs/recipes-devtools/gcc/files/gcs.patch b/meta-arm-gcs/recipes-devtools/gcc/files/gcs.patch new file mode 100644 index 00000000..b3f012d0 --- /dev/null +++ b/meta-arm-gcs/recipes-devtools/gcc/files/gcs.patch @@ -0,0 +1,1872 @@ +Upstream-Status: Pending [vendors/ARM/gcs in gcc git] +Signed-off-by: Ross Burton <ross.burton@arm.com> + +From e66be9a852ed255469f34dcd5ecf4c30e8721b66 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Mon, 19 Jun 2023 12:57:56 +0100 +Subject: [PATCH 01/19] aarch64: Add -mbranch-protection=gcs option + +This enables Guarded Control Stack (GCS) compatible code generation. + +The "standard" branch-protection type enables it, and the default +depends on the compiler default. + +TODO: gcs compatibility marking is missing. + +gcc/ChangeLog: + + * config/aarch64/aarch64-protos.h (aarch_gcs_enabled): Declare. + * config/aarch64/aarch64.cc (aarch_gcs_enabled): Define. + (aarch_handle_no_branch_protection): Handle gcs. + (aarch_handle_standard_branch_protection): Handle gcs. + (aarch_handle_gcs_protection): New. + * config/aarch64/aarch64.opt: Add aarch_enable_gcs. + * configure: Regenerate. + * configure.ac: Handle gcs in --enable-standard-branch-protection. + * doc/invoke.texi: Document -mbranch-protection=gcs. +--- + gcc/config/aarch64/aarch64-protos.h | 2 ++ + gcc/config/aarch64/aarch64.cc | 24 ++++++++++++++++++++++++ + gcc/config/aarch64/aarch64.opt | 3 +++ + gcc/configure | 2 +- + gcc/configure.ac | 2 +- + gcc/doc/invoke.texi | 5 +++-- + 6 files changed, 34 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index bd719b992a5..2802bc935c9 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -1113,4 +1113,6 @@ extern void aarch64_adjust_reg_alloc_order (); + bool aarch64_optimize_mode_switching (aarch64_mode_entity); + void aarch64_restore_za (rtx); + ++extern bool aarch64_gcs_enabled (); ++ + #endif /* GCC_AARCH64_PROTOS_H */ +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 1ea84c8bd73..73969721906 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8375,6 +8375,13 @@ aarch_bti_j_insn_p (rtx_insn *insn) + return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_BTI_J; + } + ++/* Return TRUE if Guarded Control Stack is enabled. */ ++bool ++aarch64_gcs_enabled (void) ++{ ++ return (aarch64_enable_gcs == 1); ++} ++ + /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction. */ + bool + aarch_pac_insn_p (rtx x) +@@ -18694,6 +18701,7 @@ aarch64_handle_no_branch_protection (void) + { + aarch_ra_sign_scope = AARCH_FUNCTION_NONE; + aarch_enable_bti = 0; ++ aarch64_enable_gcs = 0; + } + + static void +@@ -18702,6 +18710,7 @@ aarch64_handle_standard_branch_protection (void) + aarch_ra_sign_scope = AARCH_FUNCTION_NON_LEAF; + aarch64_ra_sign_key = AARCH64_KEY_A; + aarch_enable_bti = 1; ++ aarch64_enable_gcs = 1; + } + + static void +@@ -18728,6 +18737,11 @@ aarch64_handle_bti_protection (void) + { + aarch_enable_bti = 1; + } ++static void ++aarch64_handle_gcs_protection (void) ++{ ++ aarch64_enable_gcs = 1; ++} + + static const struct aarch_branch_protect_type aarch64_pac_ret_subtypes[] = { + { "leaf", false, aarch64_handle_pac_ret_leaf, NULL, 0 }, +@@ -18742,6 +18756,7 @@ static const struct aarch_branch_protect_type aarch64_branch_protect_types[] = + { "pac-ret", false, aarch64_handle_pac_ret_protection, + aarch64_pac_ret_subtypes, ARRAY_SIZE (aarch64_pac_ret_subtypes) }, + { "bti", false, aarch64_handle_bti_protection, NULL, 0 }, ++ { "gcs", false, aarch64_handle_gcs_protection, NULL, 0 }, + { NULL, false, NULL, NULL, 0 } + }; + +@@ -18842,6 +18857,15 @@ aarch64_override_options (void) + #endif + } + ++ if (aarch64_enable_gcs == 2) ++ { ++#ifdef TARGET_ENABLE_GCS ++ aarch64_enable_gcs = 1; ++#else ++ aarch64_enable_gcs = 0; ++#endif ++ } ++ + /* Return address signing is currently not supported for ILP32 targets. For + LP64 targets use the configured option in the absence of a command-line + option for -mbranch-protection. */ +diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt +index 6356c419399..aeb710449fb 100644 +--- a/gcc/config/aarch64/aarch64.opt ++++ b/gcc/config/aarch64/aarch64.opt +@@ -39,6 +39,9 @@ aarch64_feature_flags aarch64_isa_flags = 0 + TargetVariable + unsigned aarch_enable_bti = 2 + ++TargetVariable ++unsigned aarch64_enable_gcs = 2 ++ + TargetVariable + enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A + +diff --git a/gcc/configure b/gcc/configure +index 266ab8f84b2..45725639fd2 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -28221,7 +28221,7 @@ if test "${enable_standard_branch_protection+set}" = set; then : + enableval=$enable_standard_branch_protection; + case $enableval in + yes) +- tm_defines="${tm_defines} TARGET_ENABLE_BTI=1 TARGET_ENABLE_PAC_RET=1" ++ tm_defines="${tm_defines} TARGET_ENABLE_BTI=1 TARGET_ENABLE_PAC_RET=1 TARGET_ENABLE_GCS=1" + ;; + no) + ;; +diff --git a/gcc/configure.ac b/gcc/configure.ac +index a5aec1bc967..30d59ce7949 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -4440,7 +4440,7 @@ AS_HELP_STRING([--disable-standard-branch-protection], + [ + case $enableval in + yes) +- tm_defines="${tm_defines} TARGET_ENABLE_BTI=1 TARGET_ENABLE_PAC_RET=1" ++ tm_defines="${tm_defines} TARGET_ENABLE_BTI=1 TARGET_ENABLE_PAC_RET=1 TARGET_ENABLE_GCS=1" + ;; + no) + ;; +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 1006510fc6a..b5e2697193f 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -801,7 +801,7 @@ Objective-C and Objective-C++ Dialects}. + -mpc-relative-literal-loads + -msign-return-address=@var{scope} + -mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf} +-+@var{b-key}]|@var{bti} +++@var{b-key}]|@var{bti}|@var{gcs} + -mharden-sls=@var{opts} + -march=@var{name} -mcpu=@var{name} -mtune=@var{name} + -moverride=@var{string} -mverbose-cost-dump +@@ -21408,7 +21408,7 @@ default value is @samp{none}. This option has been deprecated by + -mbranch-protection. + + @opindex mbranch-protection +-@item -mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf}+@var{b-key}]|@var{bti} ++@item -mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf}+@var{b-key}]|@var{bti}|@var{gcs} + Select the branch protection features to use. + @samp{none} is the default and turns off all types of branch protection. + @samp{standard} turns on all types of branch protection features. If a feature +@@ -21421,6 +21421,7 @@ argument @samp{leaf} can be used to extend the signing to include leaf + functions. The optional argument @samp{b-key} can be used to sign the functions + with the B-key instead of the A-key. + @samp{bti} turns on branch target identification mechanism. ++@samp{gcs} turns on guarded control stack compatible code generation. + + @opindex mharden-sls + @item -mharden-sls=@var{opts} +-- +2.34.1 + + +From c947c0551c793aeff90139718eb7d731d543ee08 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Fri, 30 Jun 2023 16:31:23 +0100 +Subject: [PATCH 02/19] aarch64: Add branch-protection target pragma tests + +gcc/testsuite/ChangeLog: + + * gcc.target/aarch64/pragma_cpp_predefs_4.c: Add branch-protection + tests. +--- + .../gcc.target/aarch64/pragma_cpp_predefs_4.c | 50 +++++++++++++++++++ + 1 file changed, 50 insertions(+) + +diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c +index 23ebe5e4f50..8e707630774 100644 +--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c +@@ -83,3 +83,53 @@ + #ifndef __ARM_FEATURE_SME_F64F64 + #error Foo + #endif ++ ++#pragma GCC target "branch-protection=standard" ++#ifndef __ARM_FEATURE_BTI_DEFAULT ++#error Foo ++#endif ++#if __ARM_FEATURE_PAC_DEFAULT != 1 ++#error Foo ++#endif ++ ++#pragma GCC target ("branch-protection=none") ++#ifdef __ARM_FEATURE_BTI_DEFAULT ++#error Foo ++#endif ++#ifdef __ARM_FEATURE_PAC_DEFAULT ++#error Foo ++#endif ++ ++#pragma GCC push_options ++#pragma GCC target "branch-protection=bti+pac-ret" ++#ifndef __ARM_FEATURE_BTI_DEFAULT ++#error Foo ++#endif ++#pragma GCC pop_options ++#ifdef __ARM_FEATURE_BTI_DEFAULT ++#error Foo ++#endif ++ ++#pragma GCC target "branch-protection=bti" ++#ifndef __ARM_FEATURE_BTI_DEFAULT ++#error Foo ++#endif ++#ifdef __ARM_FEATURE_PAC_DEFAULT ++#error Foo ++#endif ++ ++#pragma GCC target "branch-protection=pac-ret" ++#ifdef __ARM_FEATURE_BTI_DEFAULT ++#error Foo ++#endif ++#if __ARM_FEATURE_PAC_DEFAULT != 1 ++#error Foo ++#endif ++ ++#pragma GCC target "branch-protection=pac-ret+leaf+b-key" ++#ifdef __ARM_FEATURE_BTI_DEFAULT ++#error Foo ++#endif ++#if __ARM_FEATURE_PAC_DEFAULT != 6 ++#error Foo ++#endif +-- +2.34.1 + + +From 99367f7410b3c328d67051734145f3970b84ee6f Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Fri, 30 Jun 2023 16:50:23 +0100 +Subject: [PATCH 03/19] aarch64: Add target pragma tests for gcs + +gcc/testsuite/ChangeLog: + + * gcc.target/aarch64/pragma_cpp_predefs_4.c: Add gcs specific + tests. +--- + .../gcc.target/aarch64/pragma_cpp_predefs_4.c | 35 +++++++++++++++++++ + 1 file changed, 35 insertions(+) + +diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c +index 8e707630774..417293d4d5a 100644 +--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c +@@ -91,6 +91,9 @@ + #if __ARM_FEATURE_PAC_DEFAULT != 1 + #error Foo + #endif ++#ifndef __ARM_FEATURE_GCS_DEFAULT ++#error Foo ++#endif + + #pragma GCC target ("branch-protection=none") + #ifdef __ARM_FEATURE_BTI_DEFAULT +@@ -99,6 +102,9 @@ + #ifdef __ARM_FEATURE_PAC_DEFAULT + #error Foo + #endif ++#ifdef __ARM_FEATURE_GCS_DEFAULT ++#error Foo ++#endif + + #pragma GCC push_options + #pragma GCC target "branch-protection=bti+pac-ret" +@@ -117,6 +123,9 @@ + #ifdef __ARM_FEATURE_PAC_DEFAULT + #error Foo + #endif ++#ifdef __ARM_FEATURE_GCS_DEFAULT ++#error Foo ++#endif + + #pragma GCC target "branch-protection=pac-ret" + #ifdef __ARM_FEATURE_BTI_DEFAULT +@@ -133,3 +142,29 @@ + #if __ARM_FEATURE_PAC_DEFAULT != 6 + #error Foo + #endif ++ ++#pragma GCC target "branch-protection=gcs" ++#ifdef __ARM_FEATURE_BTI_DEFAULT ++#error Foo ++#endif ++#ifdef __ARM_FEATURE_PAC_DEFAULT ++#error Foo ++#endif ++#ifndef __ARM_FEATURE_GCS_DEFAULT ++#error Foo ++#endif ++ ++#pragma GCC target "arch=armv8.8-a+gcs" ++#ifndef __ARM_FEATURE_GCS ++#error Foo ++#endif ++ ++#pragma GCC target "arch=armv8.8-a+nogcs" ++#ifdef __ARM_FEATURE_GCS ++#error Foo ++#endif ++ ++#pragma GCC target "arch=armv8.8-a" ++#ifdef __ARM_FEATURE_GCS ++#error Foo ++#endif +-- +2.34.1 + + +From f2a000e3b40953985a778875d1a908822ca9ffbd Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Tue, 9 May 2023 15:37:49 +0100 +Subject: [PATCH 04/19] aarch64: Add support for chkfeat insn + +This is a hint space instruction to check for enabled HW features and +update the x16 register accordingly. + +Use unspec_volatile to prevent reordering it around calls since calls +can enable or disable HW features. + +gcc/ChangeLog: + + * config/aarch64/aarch64.md (aarch64_chkfeat): New. +--- + gcc/config/aarch64/aarch64.md | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 385a669b9b3..a20462303b5 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -378,6 +378,7 @@ + UNSPECV_BTI_C ; Represent BTI c. + UNSPECV_BTI_J ; Represent BTI j. + UNSPECV_BTI_JC ; Represent BTI jc. ++ UNSPECV_CHKFEAT ; Represent CHKFEAT X16. + UNSPECV_TSTART ; Represent transaction start. + UNSPECV_TCOMMIT ; Represent transaction commit. + UNSPECV_TCANCEL ; Represent transaction cancel. +@@ -8258,6 +8259,14 @@ + "msr\tnzcv, %0" + ) + ++;; CHKFEAT instruction ++(define_insn "aarch64_chkfeat" ++ [(set (reg:DI R16_REGNUM) ++ (unspec_volatile:DI [(reg:DI R16_REGNUM)] UNSPECV_CHKFEAT))] ++ "" ++ "hint\\t40 // chkfeat x16" ++) ++ + ;; AdvSIMD Stuff + (include "aarch64-simd.md") + +-- +2.34.1 + + +From 6a5358558591549fb2fb5b210c9681b0d694d1af Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Tue, 9 May 2023 15:24:18 +0100 +Subject: [PATCH 05/19] aarch64: Add __builtin_aarch64_chkfeat + +Builtin for chkfeat: the input argument is used to initialize x16 then +execute chkfeat and return the updated x16. + +Note: ACLE __chkfeat(x) plans to flip the bits to be more intuitive +(xor the input to output), but for the builtin that seems unnecessary +complication. + +gcc/ChangeLog: + + * config/aarch64/aarch64-builtins.cc (enum aarch64_builtins): + Define AARCH64_BUILTIN_CHKFEAT. + (aarch64_general_init_builtins): Handle chkfeat. + (aarch64_general_expand_builtin): Handle chkfeat. +--- + gcc/config/aarch64/aarch64-builtins.cc | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc +index 75d21de1401..1c08f56ab6b 100644 +--- a/gcc/config/aarch64/aarch64-builtins.cc ++++ b/gcc/config/aarch64/aarch64-builtins.cc +@@ -788,6 +788,8 @@ enum aarch64_builtins + AARCH64_PLDX, + AARCH64_PLI, + AARCH64_PLIX, ++ /* Armv8.9-A / Armv9.4-A builtins. */ ++ AARCH64_BUILTIN_CHKFEAT, + AARCH64_BUILTIN_MAX + }; + +@@ -2084,6 +2086,12 @@ aarch64_general_init_builtins (void) + if (TARGET_MEMTAG) + aarch64_init_memtag_builtins (); + ++ tree ftype_chkfeat ++ = build_function_type_list (uint64_type_node, uint64_type_node, NULL); ++ aarch64_builtin_decls[AARCH64_BUILTIN_CHKFEAT] ++ = aarch64_general_add_builtin ("__builtin_aarch64_chkfeat", ftype_chkfeat, ++ AARCH64_BUILTIN_CHKFEAT); ++ + if (in_lto_p) + handle_arm_acle_h (); + } +@@ -3137,6 +3145,16 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, + case AARCH64_PLIX: + aarch64_expand_prefetch_builtin (exp, fcode); + return target; ++ ++ case AARCH64_BUILTIN_CHKFEAT: ++ { ++ rtx x16_reg = gen_rtx_REG (DImode, R16_REGNUM); ++ op0 = expand_normal (CALL_EXPR_ARG (exp, 0)); ++ emit_move_insn (x16_reg, op0); ++ expand_insn (CODE_FOR_aarch64_chkfeat, 0, 0); ++ emit_move_insn (target, x16_reg); ++ return target; ++ } + } + + if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX) +-- +2.34.1 + + +From fd2868c17ad14374147adc83d62ef24b60718509 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Fri, 2 Jun 2023 16:15:25 +0100 +Subject: [PATCH 06/19] aarch64: Add __builtin_aarch64_chkfeat tests + +gcc/testsuite/ChangeLog: + + * gcc.target/aarch64/chkfeat-1.c: New test. + * gcc.target/aarch64/chkfeat-2.c: New test. +--- + gcc/testsuite/gcc.target/aarch64/chkfeat-1.c | 75 ++++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/chkfeat-2.c | 15 ++++ + 2 files changed, 90 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/aarch64/chkfeat-1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/chkfeat-2.c + +diff --git a/gcc/testsuite/gcc.target/aarch64/chkfeat-1.c b/gcc/testsuite/gcc.target/aarch64/chkfeat-1.c +new file mode 100644 +index 00000000000..2fae81e740f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/chkfeat-1.c +@@ -0,0 +1,75 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mbranch-protection=none" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++**foo1: ++** mov x16, 1 ++** hint 40 // chkfeat x16 ++** mov x0, x16 ++** ret ++*/ ++unsigned long long ++foo1 (void) ++{ ++ return __builtin_aarch64_chkfeat (1); ++} ++ ++/* ++**foo2: ++** mov x16, 1 ++** movk x16, 0x5678, lsl 32 ++** movk x16, 0x1234, lsl 48 ++** hint 40 // chkfeat x16 ++** mov x0, x16 ++** ret ++*/ ++unsigned long long ++foo2 (void) ++{ ++ return __builtin_aarch64_chkfeat (0x1234567800000001); ++} ++ ++/* ++**foo3: ++** mov x16, x0 ++** hint 40 // chkfeat x16 ++** mov x0, x16 ++** ret ++*/ ++unsigned long long ++foo3 (unsigned long long x) ++{ ++ return __builtin_aarch64_chkfeat (x); ++} ++ ++/* ++**foo4: ++** ldr x16, \[x0\] ++** hint 40 // chkfeat x16 ++** str x16, \[x0\] ++** ret ++*/ ++void ++foo4 (unsigned long long *p) ++{ ++ *p = __builtin_aarch64_chkfeat (*p); ++} ++ ++/* ++**foo5: ++** mov x16, 1 ++** hint 40 // chkfeat x16 ++** cmp x16, 0 ++**( ++** csel w0, w1, w0, eq ++**| ++** csel w0, w0, w1, ne ++**) ++** ret ++*/ ++int ++foo5 (int x, int y) ++{ ++ return __builtin_aarch64_chkfeat (1) ? x : y; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/chkfeat-2.c b/gcc/testsuite/gcc.target/aarch64/chkfeat-2.c +new file mode 100644 +index 00000000000..682524e244f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/chkfeat-2.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++/* { dg-final { scan-assembler-times {hint\t40 // chkfeat x16} 2 } } */ ++ ++void bar (void); ++ ++/* Extern call may change enabled HW features. */ ++unsigned long long ++foo (void) ++{ ++ unsigned long long a = __builtin_aarch64_chkfeat (1); ++ bar (); ++ unsigned long long b = __builtin_aarch64_chkfeat (1); ++ return a + b; ++} +-- +2.34.1 + + +From 409aac824e3a69848a60daafcaeedc5f18c357dd Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Tue, 9 May 2023 16:00:01 +0100 +Subject: [PATCH 07/19] aarch64: Add GCS instructions + +Add instructions for the Guarded Control Stack extension. + +GCSSS1 and GCSSS2 are modelled as a single GCSSS unspec, because they +are always used together in the compiler. + +Before GCSPOPM and GCSSS2 an extra "mov xn, 0" is added to clear the +output register, this is needed to get reasonable result when GCS is +disabled, when the instructions are NOPs. Since the instructions are +expecetd to be used behind runtime feature checks, this is mainly +relevant if GCS can be disabled asynchronously. + +The output of GCSPOPM is usually not needed, so a separate gcspopm_xzr +was added to model that. Did not do the same for GCSSS as it is a less +common operation. + +The used mnemonics do not depend on updated assembler since these +instructions can be used without new -march setting behind a runtime +check. + +Reading the GCSPR is modelled as unspec_volatile so it does not get +reordered wrt the other instructions changing the GCSPR. + +TODO: +- Do we care about async disable? +- Do we need GCSSS_xzr? (to avoid the mov x,0) + +gcc/ChangeLog: + + * config/aarch64/aarch64.md (aarch64_load_gcspr): New. + (aarch64_gcspopm): New. + (aarch64_gcspopm_xzr): New. + (aarch64_gcsss): New. +--- + gcc/config/aarch64/aarch64.md | 35 +++++++++++++++++++++++++++++++++++ + 1 file changed, 35 insertions(+) + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index a20462303b5..8defd6e0582 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -379,6 +379,9 @@ + UNSPECV_BTI_J ; Represent BTI j. + UNSPECV_BTI_JC ; Represent BTI jc. + UNSPECV_CHKFEAT ; Represent CHKFEAT X16. ++ UNSPECV_GCSPR ; Represent MRS Xn, GCSPR_EL0 ++ UNSPECV_GCSPOPM ; Represent GCSPOPM. ++ UNSPECV_GCSSS ; Represent GCSSS1 and GCSSS2. + UNSPECV_TSTART ; Represent transaction start. + UNSPECV_TCOMMIT ; Represent transaction commit. + UNSPECV_TCANCEL ; Represent transaction cancel. +@@ -8267,6 +8270,38 @@ + "hint\\t40 // chkfeat x16" + ) + ++;; Guarded Control Stack (GCS) instructions ++(define_insn "aarch64_load_gcspr" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (unspec_volatile:DI [(const_int 0)] UNSPECV_GCSPR))] ++ "" ++ "mrs\\t%0, s3_3_c2_c5_1 // gcspr_el0" ++ [(set_attr "type" "mrs")] ++) ++ ++(define_insn "aarch64_gcspopm" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (unspec_volatile:DI [(const_int 0)] UNSPECV_GCSPOPM))] ++ "" ++ "mov\\t%0, 0\;sysl\\t%0, #3, c7, c7, #1 // gcspopm" ++ [(set_attr "length" "8")] ++) ++ ++(define_insn "aarch64_gcspopm_xzr" ++ [(unspec_volatile [(const_int 0)] UNSPECV_GCSPOPM)] ++ "" ++ "sysl\\txzr, #3, c7, c7, #1 // gcspopm" ++) ++ ++(define_insn "aarch64_gcsss" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")] ++ UNSPECV_GCSSS))] ++ "" ++ "sys\\t#3, c7, c7, #2, %1 // gcsss1\;mov\\t%0, 0\;sysl\\t%0, #3, c7, c7, #3 // gcsss2" ++ [(set_attr "length" "12")] ++) ++ + ;; AdvSIMD Stuff + (include "aarch64-simd.md") + +-- +2.34.1 + + +From 9e6a37fd99e22e3cd3d685100763c9ed201019ee Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Tue, 9 May 2023 16:21:28 +0100 +Subject: [PATCH 08/19] aarch64: Add GCS builtins + +Add new builtins for GCS: + + void *__builtin_aarch64_gcspr (void) + uint64_t __builtin_aarch64_gcspopm (void) + void *__builtin_aarch64_gcsss (void *) + +The builtins are always enabled, but should be used behind runtime +checks in case the target does not support GCS. They are thin +wrappers around the corresponding instructions. + +The GCS pointer is modelled with void * type (normal stores do not +work on GCS memory, but it is writable via the gcsss operation or +via GCSSTR if enabled so not const) and an entry on the GCS is +modelled with uint64_t (since it has fixed size and can be a token +that's not a pointer). + +gcc/ChangeLog: + + * config/aarch64/aarch64-builtins.cc (enum aarch64_builtins): Add + AARCH64_BUILTIN_GCSPR, AARCH64_BUILTIN_GCSPOPM, AARCH64_BUILTIN_GCSSS. + (aarch64_init_gcs_builtins): New. + (aarch64_general_init_builtins): Call aarch64_init_gcs_builtins. + (aarch64_expand_gcs_builtin): New. + (aarch64_general_expand_builtin): Call aarch64_expand_gcs_builtin. +--- + gcc/config/aarch64/aarch64-builtins.cc | 70 ++++++++++++++++++++++++++ + 1 file changed, 70 insertions(+) + +diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc +index 1c08f56ab6b..30c977586f9 100644 +--- a/gcc/config/aarch64/aarch64-builtins.cc ++++ b/gcc/config/aarch64/aarch64-builtins.cc +@@ -790,6 +790,9 @@ enum aarch64_builtins + AARCH64_PLIX, + /* Armv8.9-A / Armv9.4-A builtins. */ + AARCH64_BUILTIN_CHKFEAT, ++ AARCH64_BUILTIN_GCSPR, ++ AARCH64_BUILTIN_GCSPOPM, ++ AARCH64_BUILTIN_GCSSS, + AARCH64_BUILTIN_MAX + }; + +@@ -2041,6 +2044,29 @@ aarch64_init_fpsr_fpcr_builtins (void) + AARCH64_BUILTIN_SET_FPSR64); + } + ++/* Add builtins for Guarded Control Stack instructions. */ ++ ++static void ++aarch64_init_gcs_builtins (void) ++{ ++ tree ftype; ++ ++ ftype = build_function_type_list (ptr_type_node, NULL); ++ aarch64_builtin_decls[AARCH64_BUILTIN_GCSPR] ++ = aarch64_general_add_builtin ("__builtin_aarch64_gcspr", ftype, ++ AARCH64_BUILTIN_GCSPR); ++ ++ ftype = build_function_type_list (uint64_type_node, NULL); ++ aarch64_builtin_decls[AARCH64_BUILTIN_GCSPOPM] ++ = aarch64_general_add_builtin ("__builtin_aarch64_gcspopm", ftype, ++ AARCH64_BUILTIN_GCSPOPM); ++ ++ ftype = build_function_type_list (ptr_type_node, ptr_type_node, NULL); ++ aarch64_builtin_decls[AARCH64_BUILTIN_GCSSS] ++ = aarch64_general_add_builtin ("__builtin_aarch64_gcsss", ftype, ++ AARCH64_BUILTIN_GCSSS); ++} ++ + /* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group. */ + + void +@@ -2092,6 +2118,8 @@ aarch64_general_init_builtins (void) + = aarch64_general_add_builtin ("__builtin_aarch64_chkfeat", ftype_chkfeat, + AARCH64_BUILTIN_CHKFEAT); + ++ aarch64_init_gcs_builtins (); ++ + if (in_lto_p) + handle_arm_acle_h (); + } +@@ -3020,6 +3048,43 @@ aarch64_expand_fpsr_fpcr_getter (enum insn_code icode, machine_mode mode, + return op.value; + } + ++/* Expand GCS builtin EXP with code FCODE, putting the result ++ int TARGET. If IGNORE is true the return value is ignored. */ ++ ++rtx ++aarch64_expand_gcs_builtin (tree exp, rtx target, int fcode, int ignore) ++{ ++ if (fcode == AARCH64_BUILTIN_GCSPR) ++ { ++ expand_operand op; ++ create_output_operand (&op, target, DImode); ++ expand_insn (CODE_FOR_aarch64_load_gcspr, 1, &op); ++ return op.value; ++ } ++ if (fcode == AARCH64_BUILTIN_GCSPOPM && ignore) ++ { ++ expand_insn (CODE_FOR_aarch64_gcspopm_xzr, 0, 0); ++ return target; ++ } ++ if (fcode == AARCH64_BUILTIN_GCSPOPM) ++ { ++ expand_operand op; ++ create_output_operand (&op, target, Pmode); ++ expand_insn (CODE_FOR_aarch64_gcspopm, 1, &op); ++ return op.value; ++ } ++ if (fcode == AARCH64_BUILTIN_GCSSS) ++ { ++ expand_operand ops[2]; ++ rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 0)); ++ create_output_operand (&ops[0], target, Pmode); ++ create_input_operand (&ops[1], op1, Pmode); ++ expand_insn (CODE_FOR_aarch64_gcsss, 2, ops); ++ return ops[0].value; ++ } ++ gcc_unreachable (); ++} ++ + /* Expand an expression EXP that calls built-in function FCODE, + with result going to TARGET if that's convenient. IGNORE is true + if the result of the builtin is ignored. */ +@@ -3155,6 +3220,11 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, + emit_move_insn (target, x16_reg); + return target; + } ++ ++ case AARCH64_BUILTIN_GCSPR: ++ case AARCH64_BUILTIN_GCSPOPM: ++ case AARCH64_BUILTIN_GCSSS: ++ return aarch64_expand_gcs_builtin (exp, target, fcode, ignore); + } + + if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX) +-- +2.34.1 + + +From d4d950feefc4f55da32be812eb882a2f66aadcaf Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Tue, 6 Jun 2023 17:35:51 +0100 +Subject: [PATCH 09/19] aarch64: Add __builtin_aarch64_gcs* tests + +gcc/testsuite/ChangeLog: + + * gcc.target/aarch64/gcspopm-1.c: New test. + * gcc.target/aarch64/gcspr-1.c: New test. + * gcc.target/aarch64/gcsss-1.c: New test. +--- + gcc/testsuite/gcc.target/aarch64/gcspopm-1.c | 69 ++++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/gcspr-1.c | 31 +++++++++ + gcc/testsuite/gcc.target/aarch64/gcsss-1.c | 49 ++++++++++++++ + 3 files changed, 149 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/aarch64/gcspopm-1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/gcspr-1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/gcsss-1.c + +diff --git a/gcc/testsuite/gcc.target/aarch64/gcspopm-1.c b/gcc/testsuite/gcc.target/aarch64/gcspopm-1.c +new file mode 100644 +index 00000000000..6e6add39cf7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/gcspopm-1.c +@@ -0,0 +1,69 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mbranch-protection=none" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++**foo1: ++** sysl xzr, #3, c7, c7, #1 // gcspopm ++** ret ++*/ ++void ++foo1 (void) ++{ ++ __builtin_aarch64_gcspopm (); ++} ++ ++/* ++**foo2: ++** mov x0, 0 ++** sysl x0, #3, c7, c7, #1 // gcspopm ++** ret ++*/ ++unsigned long long ++foo2 (void) ++{ ++ return __builtin_aarch64_gcspopm (); ++} ++ ++/* ++**foo3: ++** mov x16, 1 ++** ( ++** mov x0, 0 ++** hint 40 // chkfeat x16 ++** | ++** hint 40 // chkfeat x16 ++** mov x0, 0 ++** ) ++** cbz x16, .* ++** ret ++** mov x0, 0 ++** sysl x0, #3, c7, c7, #1 // gcspopm ++** ret ++*/ ++unsigned long long ++foo3 (void) ++{ ++ if (__builtin_aarch64_chkfeat (1) == 0) ++ return __builtin_aarch64_gcspopm (); ++ return 0; ++} ++ ++/* ++**foo4: ++** sysl xzr, #3, c7, c7, #1 // gcspopm ++** mov x0, 0 ++** sysl x0, #3, c7, c7, #1 // gcspopm ++** sysl xzr, #3, c7, c7, #1 // gcspopm ++** ret ++*/ ++unsigned long long ++foo4 (void) ++{ ++ unsigned long long a = __builtin_aarch64_gcspopm (); ++ unsigned long long b = __builtin_aarch64_gcspopm (); ++ unsigned long long c = __builtin_aarch64_gcspopm (); ++ (void) a; ++ (void) c; ++ return b; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/gcspr-1.c b/gcc/testsuite/gcc.target/aarch64/gcspr-1.c +new file mode 100644 +index 00000000000..0e651979551 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/gcspr-1.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mbranch-protection=none" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++**foo1: ++** mrs x0, s3_3_c2_c5_1 // gcspr_el0 ++** ret ++*/ ++void * ++foo1 (void) ++{ ++ return __builtin_aarch64_gcspr (); ++} ++ ++/* ++**foo2: ++** mrs x[0-9]*, s3_3_c2_c5_1 // gcspr_el0 ++** sysl xzr, #3, c7, c7, #1 // gcspopm ++** mrs x[0-9]*, s3_3_c2_c5_1 // gcspr_el0 ++** sub x0, x[0-9]*, x[0-9]* ++** ret ++*/ ++long ++foo2 (void) ++{ ++ const char *p = __builtin_aarch64_gcspr (); ++ __builtin_aarch64_gcspopm (); ++ const char *q = __builtin_aarch64_gcspr (); ++ return p - q; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/gcsss-1.c b/gcc/testsuite/gcc.target/aarch64/gcsss-1.c +new file mode 100644 +index 00000000000..025c7fee647 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/gcsss-1.c +@@ -0,0 +1,49 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mbranch-protection=none" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++**foo1: ++** sys #3, c7, c7, #2, x0 // gcsss1 ++** mov x[0-9]*, 0 ++** sysl x[0-9]*, #3, c7, c7, #3 // gcsss2 ++** ret ++*/ ++void ++foo1 (void *p) ++{ ++ __builtin_aarch64_gcsss (p); ++} ++ ++/* ++**foo2: ++** sys #3, c7, c7, #2, x0 // gcsss1 ++** mov x0, 0 ++** sysl x0, #3, c7, c7, #3 // gcsss2 ++** ret ++*/ ++void * ++foo2 (void *p) ++{ ++ return __builtin_aarch64_gcsss (p); ++} ++ ++/* ++**foo3: ++** mov x16, 1 ++** hint 40 // chkfeat x16 ++** cbnz x16, .* ++** sys #3, c7, c7, #2, x0 // gcsss1 ++** mov x0, 0 ++** sysl x0, #3, c7, c7, #3 // gcsss2 ++** ret ++** mov x0, 0 ++** ret ++*/ ++void * ++foo3 (void *p) ++{ ++ if (__builtin_aarch64_chkfeat (1) == 0) ++ return __builtin_aarch64_gcsss (p); ++ return 0; ++} +-- +2.34.1 + + +From 57b7b8934997cbd3eceb84b8de30b83f05760ecc Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Fri, 14 Apr 2023 18:23:52 +0100 +Subject: [PATCH 10/19] aarch64: Add GCS support for nonlocal stack save + +Nonlocal stack save and restore has to also save and restore the GCS +pointer. This is used in __builtin_setjmp/longjmp and nonlocal goto. + +The GCS specific code is only emitted if GCS branch-protection is +enabled and the code always checks at runtime if GCS is enabled. + +The new -mbranch-protection=gcs and old -mbranch-protection=none code +are ABI compatible: jmpbuf for __builtin_setjmp has space for 5 +pointers, the layout is + + old layout: fp, pc, sp, unused, unused + new layout: fp, pc, sp, gcsp, unused + +Note: the ILP32 code generation is wrong as it saves the pointers with +Pmode (i.e. 8 bytes per pointer), but the user supplied buffer size is +for 5 pointers (4 bytes per pointer), this is not fixed. + +The nonlocal goto has no ABI compatibility issues as the goto and its +destination are in the same translation unit. + +TODO: +- can we simplify the define_expand rtls? + +gcc/ChangeLog: + + * config/aarch64/aarch64.h (STACK_SAVEAREA_MODE): Make space for gcs. + * config/aarch64/aarch64.md (save_stack_nonlocal): New. + (restore_stack_nonlocal): New. +--- + gcc/config/aarch64/aarch64.h | 7 +++ + gcc/config/aarch64/aarch64.md | 82 +++++++++++++++++++++++++++++++++++ + 2 files changed, 89 insertions(+) + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 45e901cda64..3238452f53f 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -1294,6 +1294,13 @@ typedef struct + #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) + ++/* Have space for both SP and GCSPR in the NONLOCAL case in ++ emit_stack_save as well as in __builtin_setjmp, __builtin_longjmp ++ and __builtin_nonlocal_goto. ++ Note: On ILP32 the documented buf size is not enough PR84150. */ ++#define STACK_SAVEAREA_MODE(LEVEL) \ ++ ((LEVEL) == SAVE_NONLOCAL ? TImode : Pmode) ++ + #define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LR_REGNUM) + + #define RETURN_ADDR_RTX aarch64_return_addr +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 8defd6e0582..2d36af12cfb 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -1183,6 +1183,88 @@ + (const_int 1)))] + ) + ++(define_expand "save_stack_nonlocal" ++ [(set (match_operand 0 "memory_operand") ++ (match_operand 1 "register_operand"))] ++ "" ++{ ++ rtx stack_slot = adjust_address (operands[0], Pmode, 0); ++ emit_move_insn (stack_slot, operands[1]); ++ ++ if (aarch64_gcs_enabled ()) ++ { ++ /* Save GCS with code like ++ mov x16, 1 ++ chkfeat x16 ++ tbnz x16, 0, .L_done ++ mrs tmp, gcspr_el0 ++ str tmp, [%0, 8] ++ .L_done: */ ++ ++ rtx done_label = gen_label_rtx (); ++ rtx r16 = gen_rtx_REG (DImode, R16_REGNUM); ++ emit_move_insn (r16, const1_rtx); ++ emit_insn (gen_aarch64_chkfeat ()); ++ emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label)); ++ rtx gcs_slot = adjust_address (operands[0], Pmode, GET_MODE_SIZE (Pmode)); ++ rtx gcs = force_reg (Pmode, const0_rtx); ++ emit_insn (gen_aarch64_load_gcspr (gcs)); ++ emit_move_insn (gcs_slot, gcs); ++ emit_label (done_label); ++ } ++ DONE; ++}) ++ ++(define_expand "restore_stack_nonlocal" ++ [(set (match_operand 0 "register_operand" "") ++ (match_operand 1 "memory_operand" ""))] ++ "" ++{ ++ rtx stack_slot = adjust_address (operands[1], Pmode, 0); ++ emit_move_insn (operands[0], stack_slot); ++ ++ if (aarch64_gcs_enabled ()) ++ { ++ /* Restore GCS with code like ++ mov x16, 1 ++ chkfeat x16 ++ tbnz x16, 0, .L_done ++ ldr tmp1, [%1, 8] ++ mrs tmp2, gcspr_el0 ++ subs tmp2, tmp1, tmp2 ++ b.eq .L_done ++ .L_loop: ++ gcspopm ++ subs tmp2, tmp2, 8 ++ b.ne .L_loop ++ .L_done: */ ++ ++ rtx loop_label = gen_label_rtx (); ++ rtx done_label = gen_label_rtx (); ++ rtx r16 = gen_rtx_REG (DImode, R16_REGNUM); ++ emit_move_insn (r16, const1_rtx); ++ emit_insn (gen_aarch64_chkfeat ()); ++ emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label)); ++ rtx gcs_slot = adjust_address (operands[1], Pmode, GET_MODE_SIZE (Pmode)); ++ rtx gcs_old = force_reg (Pmode, const0_rtx); ++ emit_move_insn (gcs_old, gcs_slot); ++ rtx gcs_now = force_reg (Pmode, const0_rtx); ++ emit_insn (gen_aarch64_load_gcspr (gcs_now)); ++ emit_insn (gen_subdi3_compare1 (gcs_now, gcs_old, gcs_now)); ++ rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); ++ rtx cmp_rtx = gen_rtx_fmt_ee (EQ, DImode, cc_reg, const0_rtx); ++ emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, done_label)); ++ emit_label (loop_label); ++ emit_insn (gen_aarch64_gcspopm_xzr ()); ++ emit_insn (gen_adddi3_compare0 (gcs_now, gcs_now, GEN_INT (-8))); ++ cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); ++ cmp_rtx = gen_rtx_fmt_ee (NE, DImode, cc_reg, const0_rtx); ++ emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, loop_label)); ++ emit_label (done_label); ++ } ++ DONE; ++}) ++ + ;; ------------------------------------------------------------------- + ;; Subroutine calls and sibcalls + ;; ------------------------------------------------------------------- +-- +2.34.1 + + +From 44b1847be5f2a53727a4a64f2d895b1c1d65e460 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Wed, 7 Jun 2023 10:58:06 +0100 +Subject: [PATCH 11/19] aarch64: Add non-local goto and jump tests for GCS + +These are scan asm tests only, relying on existing execution tests +for runtime coverage. + +gcc/testsuite/ChangeLog: + + * gcc.target/aarch64/gcs-nonlocal-1.c: New test. + * gcc.target/aarch64/gcs-nonlocal-2.c: New test. +--- + .../gcc.target/aarch64/gcs-nonlocal-1.c | 25 +++++++++++++++++++ + .../gcc.target/aarch64/gcs-nonlocal-2.c | 21 ++++++++++++++++ + 2 files changed, 46 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-2.c + +diff --git a/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-1.c b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-1.c +new file mode 100644 +index 00000000000..821fab816f9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-1.c +@@ -0,0 +1,25 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mbranch-protection=gcs" } */ ++/* { dg-final { scan-assembler-times "hint\\t40 // chkfeat x16" 2 } } */ ++/* { dg-final { scan-assembler-times "mrs\\tx\[0-9\]+, s3_3_c2_c5_1 // gcspr_el0" 2 } } */ ++/* { dg-final { scan-assembler-times "sysl\\txzr, #3, c7, c7, #1 // gcspopm" 1 } } */ ++ ++int bar1 (int); ++int bar2 (int); ++ ++void foo (int cmd) ++{ ++ __label__ start; ++ int x = 0; ++ ++ void nonlocal_goto (void) ++ { ++ x++; ++ goto start; ++ } ++ ++start: ++ while (bar1 (x)) ++ if (bar2 (x)) ++ nonlocal_goto (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-2.c b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-2.c +new file mode 100644 +index 00000000000..63dbce36e1e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-2.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mbranch-protection=gcs" } */ ++/* { dg-final { scan-assembler-times "hint\\t40 // chkfeat x16" 2 } } */ ++/* { dg-final { scan-assembler-times "mrs\\tx\[0-9\]+, s3_3_c2_c5_1 // gcspr_el0" 2 } } */ ++/* { dg-final { scan-assembler-times "sysl\\txzr, #3, c7, c7, #1 // gcspopm" 1 } } */ ++ ++void longj (void *buf) ++{ ++ __builtin_longjmp (buf, 1); ++} ++ ++void foo (void); ++void bar (void); ++ ++void setj (void *buf) ++{ ++ if (__builtin_setjmp (buf)) ++ foo (); ++ else ++ bar (); ++} +-- +2.34.1 + + +From a9ec10c065ac9d932e30de54d68363f30ed864fe Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Tue, 9 May 2023 17:04:34 +0100 +Subject: [PATCH 12/19] aarch64: Add ACLE feature macros for GCS + +gcc/ChangeLog: + + * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define + macros for GCS. +--- + gcc/config/aarch64/aarch64-c.cc | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index b5a6917d06d..132d5c86413 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -246,6 +246,9 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) + + aarch64_def_or_undef (TARGET_PAUTH, "__ARM_FEATURE_PAUTH", pfile); + aarch64_def_or_undef (TARGET_BTI, "__ARM_FEATURE_BTI", pfile); ++ aarch64_def_or_undef (aarch64_gcs_enabled (), ++ "__ARM_FEATURE_GCS_DEFAULT", pfile); ++ aarch64_def_or_undef (TARGET_GCS, "__ARM_FEATURE_GCS", pfile); + aarch64_def_or_undef (TARGET_I8MM, "__ARM_FEATURE_MATMUL_INT8", pfile); + aarch64_def_or_undef (TARGET_BF16_SIMD, + "__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", pfile); +-- +2.34.1 + + +From ff3d447772dcf9b2c7293a6b1ac458898a493cdf Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Wed, 7 Jun 2023 16:17:53 +0100 +Subject: [PATCH 13/19] aarch64: Add test for GCS ACLE defs + +gcc/testsuite/ChangeLog: + + * gcc.target/aarch64/pragma_cpp_predefs_1.c: GCS test. +--- + .../gcc.target/aarch64/pragma_cpp_predefs_1.c | 30 +++++++++++++++++++ + 1 file changed, 30 insertions(+) + +diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c +index 307fa3d67da..6122cd55d66 100644 +--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c +@@ -268,6 +268,36 @@ + #error "__ARM_FEATURE_RCPC is not defined but should be!" + #endif + ++#pragma GCC target ("arch=armv8.8-a+gcs") ++#ifndef __ARM_FEATURE_GCS ++#error "__ARM_FEATURE_GCS is not defined but should be!" ++#endif ++ ++#pragma GCC target ("arch=armv8.8-a+nogcs") ++#ifdef __ARM_FEATURE_GCS ++#error "__ARM_FEATURE_GCS is defined but should not be!" ++#endif ++ ++#pragma GCC target ("arch=armv8.8-a") ++#ifdef __ARM_FEATURE_GCS ++#error "__ARM_FEATURE_GCS is defined but should not be!" ++#endif ++ ++#pragma GCC target ("branch-protection=gcs") ++#ifndef __ARM_FEATURE_GCS_DEFAULT ++#error "__ARM_FEATURE_GCS_DEFAULT is not defined but should be!" ++#endif ++ ++#pragma GCC target ("branch-protection=none") ++#ifdef __ARM_FEATURE_GCS_DEFAULT ++#error "__ARM_FEATURE_GCS_DEFAULT is defined but should not be!" ++#endif ++ ++#pragma GCC target ("branch-protection=standard") ++#ifndef __ARM_FEATURE_GCS_DEFAULT ++#error "__ARM_FEATURE_GCS_DEFAULT is not defined but should be!" ++#endif ++ + int + foo (int a) + { +-- +2.34.1 + + +From 8187d08bc815c5d45fa5b7cc6a970811ca4c6efe Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Wed, 19 Apr 2023 14:01:36 +0100 +Subject: [PATCH 14/19] aarch64: Add GCS support to the unwinder + +TODO: +- Follows the current linux ABI that uses single signal entry token + and shared shadow stack between thread and alt stack. +- Could be behind __ARM_FEATURE_GCS_DEFAULT ifdef (only do anything + special with gcs compat codegen) but there is a runtime check anyway. + +libgcc/ChangeLog: + + * config/aarch64/aarch64-unwind.h (_Unwind_Frames_Extra): Update. + (_Unwind_Frames_Increment): Define. +--- + libgcc/config/aarch64/aarch64-unwind.h | 59 +++++++++++++++++++++++++- + 1 file changed, 58 insertions(+), 1 deletion(-) + +diff --git a/libgcc/config/aarch64/aarch64-unwind.h b/libgcc/config/aarch64/aarch64-unwind.h +index daf96624b5e..c22a3fc20d2 100644 +--- a/libgcc/config/aarch64/aarch64-unwind.h ++++ b/libgcc/config/aarch64/aarch64-unwind.h +@@ -78,6 +78,9 @@ aarch64_demangle_return_addr (struct _Unwind_Context *context, + return addr; + } + ++/* GCS enable flag for chkfeat instruction. */ ++#define CHKFEAT_GCS 1 ++ + /* SME runtime function local to libgcc, streaming compatible + and preserves more registers than the base PCS requires, but + we don't rely on that here. */ +@@ -85,12 +88,66 @@ __attribute__ ((visibility ("hidden"))) + void __libgcc_arm_za_disable (void); + + /* Disable the SME ZA state in case an unwound frame used the ZA +- lazy saving scheme. */ ++ lazy saving scheme. And unwind the GCS for EH. */ + #undef _Unwind_Frames_Extra + #define _Unwind_Frames_Extra(x) \ + do \ + { \ + __libgcc_arm_za_disable (); \ ++ if (__builtin_aarch64_chkfeat (CHKFEAT_GCS) == 0) \ ++ { \ ++ for (_Unwind_Word n = (x); n != 0; n--) \ ++ __builtin_aarch64_gcspopm (); \ ++ } \ ++ } \ ++ while (0) ++ ++/* On signal entry the OS places a token on the GCS that can be used to ++ verify the integrity of the GCS pointer on signal return. It also ++ places the signal handler return address (the restorer that calls the ++ signal return syscall) on the GCS so the handler can return. ++ Because of this token, each stack frame visited during unwinding has ++ exactly one corresponding entry on the GCS, so the frame count is ++ the number of entries that will have to be popped at EH return time. ++ ++ Note: This depends on the GCS signal ABI of the OS. ++ ++ When unwinding across a stack frame for each frame the corresponding ++ entry is checked on the GCS against the computed return address from ++ the normal stack. If they don't match then _URC_FATAL_PHASE2_ERROR ++ is returned. This check is omitted if ++ ++ 1. GCS is disabled. Note: asynchronous GCS disable is supported here ++ if GCSPR and the GCS remains readable. ++ 2. Non-catchable exception where exception_class == 0. Note: the ++ pthread cancellation implementation in glibc sets exception_class ++ to 0 when the unwinder is used for cancellation cleanup handling, ++ so this allows the GCS to get out of sync during cancellation. ++ This weakens security but avoids an ABI break in glibc. ++ 3. Zero return address which marks the outermost stack frame. ++ 4. Signal stack frame, the GCS entry is an OS specific token then ++ with the top bit set. ++ */ ++#undef _Unwind_Frames_Increment ++#define _Unwind_Frames_Increment(exc, context, frames) \ ++ do \ ++ { \ ++ frames++; \ ++ if (__builtin_aarch64_chkfeat (CHKFEAT_GCS) != 0 \ ++ || exc->exception_class == 0 \ ++ || _Unwind_GetIP (context) == 0) \ ++ break; \ ++ const _Unwind_Word *gcs = __builtin_aarch64_gcspr (); \ ++ if (_Unwind_IsSignalFrame (context)) \ ++ { \ ++ if (gcs[frames] >> 63 == 0) \ ++ return _URC_FATAL_PHASE2_ERROR; \ ++ } \ ++ else \ ++ { \ ++ if (gcs[frames] != _Unwind_GetIP (context)) \ ++ return _URC_FATAL_PHASE2_ERROR; \ ++ } \ + } \ + while (0) + +-- +2.34.1 + + +From f183fe2dd597f300b24151034d68d168401ab83e Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Tue, 9 May 2023 14:32:46 +0100 +Subject: [PATCH 15/19] aarch64: Emit GNU property NOTE for GCS + +TODO: relies on experimental binutils ABI, should use build attributes. + +gcc/ChangeLog: + + * config/aarch64/aarch64.cc (GNU_PROPERTY_AARCH64_FEATURE_1_GCS): + Define. + (aarch64_file_end_indicate_exec_stack): Set GCS property bit. +--- + gcc/config/aarch64/aarch64.cc | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 73969721906..0119cfdd67b 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -28962,6 +28962,7 @@ aarch64_can_tag_addresses () + #define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 + #define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) + #define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1) ++#define GNU_PROPERTY_AARCH64_FEATURE_1_GCS (1U << 2) + void + aarch64_file_end_indicate_exec_stack () + { +@@ -28974,6 +28975,9 @@ aarch64_file_end_indicate_exec_stack () + if (aarch_ra_sign_scope != AARCH_FUNCTION_NONE) + feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC; + ++ if (aarch64_gcs_enabled ()) ++ feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_GCS; ++ + if (feature_1_and) + { + /* Generate .note.gnu.property section. */ +@@ -29005,6 +29009,7 @@ aarch64_file_end_indicate_exec_stack () + assemble_align (POINTER_SIZE); + } + } ++#undef GNU_PROPERTY_AARCH64_FEATURE_1_GCS + #undef GNU_PROPERTY_AARCH64_FEATURE_1_PAC + #undef GNU_PROPERTY_AARCH64_FEATURE_1_BTI + #undef GNU_PROPERTY_AARCH64_FEATURE_1_AND +-- +2.34.1 + + +From 966dbf5b1816a49eebc7b9a52abe706e34ee67d3 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Fri, 22 Dec 2023 13:44:19 +0000 +Subject: [PATCH 16/19] aarch64: libgcc: add GCS marking to asm + +libgcc/ChangeLog: + + * config/aarch64/aarch64-asm.h (FEATURE_1_GCS): Define. + (GCS_FLAG): Define if GCS is enabled. + (GNU_PROPERTY): Add GCS_FLAG. +--- + libgcc/config/aarch64/aarch64-asm.h | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/libgcc/config/aarch64/aarch64-asm.h b/libgcc/config/aarch64/aarch64-asm.h +index 83c2e5944b3..86a9a0e662e 100644 +--- a/libgcc/config/aarch64/aarch64-asm.h ++++ b/libgcc/config/aarch64/aarch64-asm.h +@@ -38,6 +38,7 @@ + #define FEATURE_1_AND 0xc0000000 + #define FEATURE_1_BTI 1 + #define FEATURE_1_PAC 2 ++#define FEATURE_1_GCS 4 + + /* Supported features based on the code generation options. */ + #if defined(__ARM_FEATURE_BTI_DEFAULT) +@@ -58,6 +59,12 @@ + # define AUTIASP + #endif + ++#if __ARM_FEATURE_GCS_DEFAULT ++# define GCS_FLAG FEATURE_1_GCS ++#else ++# define GCS_FLAG 0 ++#endif ++ + #ifdef __ELF__ + #define HIDDEN(name) .hidden name + #define SYMBOL_SIZE(name) .size name, .-name +@@ -88,8 +95,8 @@ + .previous + + /* Add GNU property note if built with branch protection. */ +-# if (BTI_FLAG|PAC_FLAG) != 0 +-GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG) ++# if (BTI_FLAG|PAC_FLAG|GCS_FLAG) != 0 ++GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG|GCS_FLAG) + # endif + #endif + +-- +2.34.1 + + +From bb609f49cbd69259513d9c39a74ad61730e3c87a Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Fri, 22 Dec 2023 15:11:25 +0000 +Subject: [PATCH 17/19] aarch64: libatomic: add GCS marking to asm + +libatomic/ChangeLog: + + * config/linux/aarch64/atomic_16.S (FEATURE_1_GCS): Define. + (GCS_FLAG): Define if GCS is enabled. + (GNU_PROPERTY): Add GCS_FLAG. +--- + libatomic/config/linux/aarch64/atomic_16.S | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S +index 4e3fa870b03..d6f34eee146 100644 +--- a/libatomic/config/linux/aarch64/atomic_16.S ++++ b/libatomic/config/linux/aarch64/atomic_16.S +@@ -790,6 +790,7 @@ ALIAS2 (test_and_set_16) + #define FEATURE_1_AND 0xc0000000 + #define FEATURE_1_BTI 1 + #define FEATURE_1_PAC 2 ++#define FEATURE_1_GCS 4 + + /* Supported features based on the code generation options. */ + #if defined(__ARM_FEATURE_BTI_DEFAULT) +@@ -804,6 +805,12 @@ ALIAS2 (test_and_set_16) + # define PAC_FLAG 0 + #endif + ++#if __ARM_FEATURE_GCS_DEFAULT ++# define GCS_FLAG FEATURE_1_GCS ++#else ++# define GCS_FLAG 0 ++#endif ++ + /* Add a NT_GNU_PROPERTY_TYPE_0 note. */ + #define GNU_PROPERTY(type, value) \ + .section .note.gnu.property, "a"; \ +@@ -821,7 +828,7 @@ ALIAS2 (test_and_set_16) + .section .note.GNU-stack, "", %progbits + + /* Add GNU property note if built with branch protection. */ +-# if (BTI_FLAG|PAC_FLAG) != 0 +-GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG) ++# if (BTI_FLAG|PAC_FLAG|GCS_FLAG) != 0 ++GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG|GCS_FLAG) + # endif + #endif +-- +2.34.1 + + +From 1d54b68e7c410b83fec17708ee84e7c64a461d3d Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Tue, 2 Apr 2024 15:43:23 +0100 +Subject: [PATCH 18/19] aarch64: libitm: Add GCS support + +Transaction begin and abort use setjmp/longjmp like operations that +need to be updated for GCS compatibility. We use similar logic to +libc setjmp/longjmp that support switching stack and thus switching +GCS (e.g. due to longjmp out of a makecontext stack), this is kept +even though it is likely not required for transaction aborts. + +The gtm_jmpbuf is internal to libitm so we can change its layout +without breaking ABI. + +libitm/ChangeLog: + + * config/aarch64/sjlj.S: Add GCS support and mark GCS compatible. + * config/aarch64/target.h: Add gcs field to gtm_jmpbuf. +--- + libitm/config/aarch64/sjlj.S | 60 ++++++++++++++++++++++++++++++++-- + libitm/config/aarch64/target.h | 1 + + 2 files changed, 58 insertions(+), 3 deletions(-) + +diff --git a/libitm/config/aarch64/sjlj.S b/libitm/config/aarch64/sjlj.S +index 6b248f7c040..e21d751ef21 100644 +--- a/libitm/config/aarch64/sjlj.S ++++ b/libitm/config/aarch64/sjlj.S +@@ -29,6 +29,13 @@ + #define AUTIASP hint 29 + #define PACIBSP hint 27 + #define AUTIBSP hint 31 ++#define CHKFEAT_X16 hint 40 ++#define MRS_GCSPR(x) mrs x, s3_3_c2_c5_1 ++#define GCSPOPM(x) sysl x, #3, c7, c7, #1 ++#define GCSSS1(x) sys #3, c7, c7, #2, x ++#define GCSSS2(x) sysl x, #3, c7, c7, #3 ++ ++#define L(name) .L##name + + #if defined(HAVE_AS_CFI_PSEUDO_OP) && defined(__GCC_HAVE_DWARF2_CFI_ASM) + # define cfi_window_save .cfi_window_save +@@ -80,7 +87,16 @@ _ITM_beginTransaction: + stp d10, d11, [sp, 7*16] + stp d12, d13, [sp, 8*16] + stp d14, d15, [sp, 9*16] +- str x1, [sp, 10*16] ++ ++ /* GCS support. */ ++ mov x2, 0 ++ mov x16, 1 ++ CHKFEAT_X16 ++ tbnz x16, 0, L(gcs_done_sj) ++ MRS_GCSPR (x2) ++ add x2, x2, 8 /* GCS after _ITM_beginTransaction returns. */ ++L(gcs_done_sj): ++ stp x2, x1, [sp, 10*16] + + /* Invoke GTM_begin_transaction with the struct we just built. */ + mov x1, sp +@@ -117,7 +133,38 @@ GTM_longjmp: + ldp d10, d11, [x1, 7*16] + ldp d12, d13, [x1, 8*16] + ldp d14, d15, [x1, 9*16] ++ ++ /* GCS support. */ ++ mov x16, 1 ++ CHKFEAT_X16 ++ tbnz x16, 0, L(gcs_done_lj) ++ MRS_GCSPR (x7) + ldr x3, [x1, 10*16] ++ mov x4, x3 ++ /* x7: GCSPR now. x3, x4: target GCSPR. x5, x6: tmp regs. */ ++L(gcs_scan): ++ cmp x7, x4 ++ b.eq L(gcs_pop) ++ sub x4, x4, 8 ++ /* Check for a cap token. */ ++ ldr x5, [x4] ++ and x6, x4, 0xfffffffffffff000 ++ orr x6, x6, 1 ++ cmp x5, x6 ++ b.ne L(gcs_scan) ++L(gcs_switch): ++ add x7, x4, 8 ++ GCSSS1 (x4) ++ GCSSS2 (xzr) ++L(gcs_pop): ++ cmp x7, x3 ++ b.eq L(gcs_done_lj) ++ GCSPOPM (xzr) ++ add x7, x7, 8 ++ b L(gcs_pop) ++L(gcs_done_lj): ++ ++ ldr x3, [x1, 10*16 + 8] + ldp x29, x30, [x1] + cfi_def_cfa(x1, 0) + CFI_PAC_TOGGLE +@@ -132,6 +179,7 @@ GTM_longjmp: + #define FEATURE_1_AND 0xc0000000 + #define FEATURE_1_BTI 1 + #define FEATURE_1_PAC 2 ++#define FEATURE_1_GCS 4 + + /* Supported features based on the code generation options. */ + #if defined(__ARM_FEATURE_BTI_DEFAULT) +@@ -146,6 +194,12 @@ GTM_longjmp: + # define PAC_FLAG 0 + #endif + ++#if __ARM_FEATURE_GCS_DEFAULT ++# define GCS_FLAG FEATURE_1_GCS ++#else ++# define GCS_FLAG 0 ++#endif ++ + /* Add a NT_GNU_PROPERTY_TYPE_0 note. */ + #define GNU_PROPERTY(type, value) \ + .section .note.gnu.property, "a"; \ +@@ -163,7 +217,7 @@ GTM_longjmp: + .section .note.GNU-stack, "", %progbits + + /* Add GNU property note if built with branch protection. */ +-# if (BTI_FLAG|PAC_FLAG) != 0 +-GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG) ++# if (BTI_FLAG|PAC_FLAG|GCS_FLAG) != 0 ++GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG|GCS_FLAG) + # endif + #endif +diff --git a/libitm/config/aarch64/target.h b/libitm/config/aarch64/target.h +index 3d99197bfab..a1f39b4bf7a 100644 +--- a/libitm/config/aarch64/target.h ++++ b/libitm/config/aarch64/target.h +@@ -30,6 +30,7 @@ typedef struct gtm_jmpbuf + unsigned long long pc; /* x30 */ + unsigned long long gr[10]; /* x19-x28 */ + unsigned long long vr[8]; /* d8-d15 */ ++ void *gcs; /* GCSPR_EL0 */ + void *cfa; + } gtm_jmpbuf; + +-- +2.34.1 + + +From c3a32ae0e30d2f4c51659751bc784b1c84ca1351 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Thu, 28 Dec 2023 13:37:38 +0000 +Subject: [PATCH 19/19] aarch64: Introduce indirect_return attribute + +Tail calls of indirect_return functions from non-indirect_return +functions are disallowed even if BTI is disabled, since the call +site may have BTI enabled. + +Following x86, mismatching attribute on function pointers is not +a type error even though this can lead to bugs. + +Needed for swapcontext within the same function when GCS is enabled. + +TODO: arm? docs, tests. feature detection? + +gcc/ChangeLog: + + * config/aarch64/aarch64.cc (aarch64_gnu_attributes): Add + indirect_return. + (aarch64_function_ok_for_sibcall): Disallow tail calls if caller + is non-indirect_return but callee is indirect_return. + (aarch64_comp_type_attributes): Check indirect_return attribute. + * config/arm/aarch-bti-insert.cc (call_needs_bti_j): New. + (rest_of_insert_bti): Use call_needs_bti_j. +--- + gcc/config/aarch64/aarch64.cc | 11 +++++++++ + gcc/config/arm/aarch-bti-insert.cc | 36 ++++++++++++++++++++++++++---- + 2 files changed, 43 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 0119cfdd67b..593b107c8a5 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -850,6 +850,7 @@ static const attribute_spec aarch64_gnu_attributes[] = + affects_type_identity, handler, exclude } */ + { "aarch64_vector_pcs", 0, 0, false, true, true, true, + handle_aarch64_vector_pcs_attribute, NULL }, ++ { "indirect_return", 0, 0, false, true, true, false, NULL, NULL }, + { "arm_sve_vector_bits", 1, 1, false, true, false, true, + aarch64_sve::handle_arm_sve_vector_bits_attribute, + NULL }, +@@ -6340,6 +6341,14 @@ aarch64_function_ok_for_sibcall (tree, tree exp) + if (bool (aarch64_cfun_shared_flags (state)) + != bool (aarch64_fntype_shared_flags (fntype, state))) + return false; ++ ++ /* BTI J is needed where indirect_return functions may return ++ if bti is enabled there. */ ++ if (lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (fntype)) ++ && !lookup_attribute ("indirect_return", ++ TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))) ++ return false; ++ + return true; + } + +@@ -28855,6 +28864,8 @@ aarch64_comp_type_attributes (const_tree type1, const_tree type2) + + if (!check_attr ("gnu", "aarch64_vector_pcs")) + return 0; ++ if (!check_attr ("gnu", "indirect_return")) ++ return 0; + if (!check_attr ("gnu", "Advanced SIMD type")) + return 0; + if (!check_attr ("gnu", "SVE type")) +diff --git a/gcc/config/arm/aarch-bti-insert.cc b/gcc/config/arm/aarch-bti-insert.cc +index 14d36971cd4..403afff9120 100644 +--- a/gcc/config/arm/aarch-bti-insert.cc ++++ b/gcc/config/arm/aarch-bti-insert.cc +@@ -92,6 +92,35 @@ const pass_data pass_data_insert_bti = + 0, /* todo_flags_finish. */ + }; + ++/* Decide if BTI J is needed after a call instruction. */ ++static bool ++call_needs_bti_j (rtx_insn *insn) ++{ ++ /* Call returns twice, one of which may be indirect. */ ++ if (find_reg_note (insn, REG_SETJMP, NULL)) ++ return true; ++ ++ /* Tail call does not return. */ ++ if (SIBLING_CALL_P (insn)) ++ return false; ++ ++ /* Check if the function is marked to return indirectly. */ ++ rtx call = get_call_rtx_from (insn); ++ rtx fnaddr = XEXP (call, 0); ++ tree fndecl = NULL_TREE; ++ if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) ++ fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0)); ++ if (fndecl == NULL_TREE) ++ fndecl = MEM_EXPR (fnaddr); ++ if (!fndecl) ++ return false; ++ if (TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE ++ && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE) ++ return false; ++ tree fntype = TREE_TYPE (fndecl); ++ return lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (fntype)); ++} ++ + /* Insert the BTI instruction. */ + /* This is implemented as a late RTL pass that runs before branch + shortening and does the following. */ +@@ -147,10 +176,9 @@ rest_of_insert_bti (void) + } + } + +- /* Also look for calls to setjmp () which would be marked with +- REG_SETJMP note and put a BTI J after. This is where longjump () +- will return. */ +- if (CALL_P (insn) && (find_reg_note (insn, REG_SETJMP, NULL))) ++ /* Also look for calls that may return indirectly, such as setjmp, ++ and put a BTI J after them. */ ++ if (CALL_P (insn) && call_needs_bti_j (insn)) + { + bti_insn = aarch_gen_bti_j (); + emit_insn_after (bti_insn, insn); +-- +2.34.1 + diff --git a/meta-arm-gcs/recipes-devtools/gcc/gcc-source_14.0.bbappend b/meta-arm-gcs/recipes-devtools/gcc/gcc-source_14.0.bbappend new file mode 100644 index 00000000..b3aba4f9 --- /dev/null +++ b/meta-arm-gcs/recipes-devtools/gcc/gcc-source_14.0.bbappend @@ -0,0 +1,3 @@ +FILESEXTRAPATHS:prepend := "${THISDIR}/files:" + +SRC_URI += "file://gcs.patch" diff --git a/meta-arm-gcs/recipes-kernel/linux/files/gcs.patch b/meta-arm-gcs/recipes-kernel/linux/files/gcs.patch new file mode 100644 index 00000000..9000123a --- /dev/null +++ b/meta-arm-gcs/recipes-kernel/linux/files/gcs.patch @@ -0,0 +1,7723 @@ +Upstream-Status: Submitted [https://git.kernel.org/pub/scm/linux/kernel/git/broonie/misc.git/log/?h=arm64-gcs] +Signed-off-by: Ross Burton <ross.burton@arm.com> + +From acd6dd784ab9ef8a30a45d6145b5bc17c4373d65 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Thu, 18 Jan 2024 21:30:07 +0000 +Subject: [PATCH 02/47] Documentation: userspace-api: Add shadow stack API + documentation + +There are a number of architectures with shadow stack features which we are +presenting to userspace with as consistent an API as we can (though there +are some architecture specifics). Especially given that there are some +important considerations for userspace code interacting directly with the +feature let's provide some documentation covering the common aspects. + +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + Documentation/userspace-api/index.rst | 1 + + Documentation/userspace-api/shadow_stack.rst | 41 ++++++++++++++++++++ + 2 files changed, 42 insertions(+) + create mode 100644 Documentation/userspace-api/shadow_stack.rst + +diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst +index 09f61bd2ac2e..c142183d9c98 100644 +--- a/Documentation/userspace-api/index.rst ++++ b/Documentation/userspace-api/index.rst +@@ -27,6 +27,7 @@ place where this information is gathered. + iommufd + media/index + netlink/index ++ shadow_stack + sysfs-platform_profile + vduse + futex2 +diff --git a/Documentation/userspace-api/shadow_stack.rst b/Documentation/userspace-api/shadow_stack.rst +new file mode 100644 +index 000000000000..c576ad3d7ec1 +--- /dev/null ++++ b/Documentation/userspace-api/shadow_stack.rst +@@ -0,0 +1,41 @@ ++============= ++Shadow Stacks ++============= ++ ++Introduction ++============ ++ ++Several architectures have features which provide backward edge ++control flow protection through a hardware maintained stack, only ++writeable by userspace through very limited operations. This feature ++is referred to as shadow stacks on Linux, on x86 it is part of Intel ++Control Enforcement Technology (CET), on arm64 it is Guarded Control ++Stacks feature (FEAT_GCS) and for RISC-V it is the Zicfiss extension. ++It is expected that this feature will normally be managed by the ++system dynamic linker and libc in ways broadly transparent to ++application code, this document covers interfaces and considerations. ++ ++ ++Enabling ++======== ++ ++Shadow stacks default to disabled when a userspace process is ++executed, they can be enabled for the current thread with a syscall: ++ ++ - For x86 the ARCH_SHSTK_ENABLE arch_prctl() ++ ++It is expected that this will normally be done by the dynamic linker. ++Any new threads created by a thread with shadow stacks enabled will ++themselves have shadow stacks enabled. ++ ++ ++Enablement considerations ++========================= ++ ++- Returning from the function that enables shadow stacks without first ++ disabling them will cause a shadow stack exception. This includes ++ any syscall wrapper or other library functions, the syscall will need ++ to be inlined. ++- A lock feature allows userspace to prevent disabling of shadow stacks. ++- Those that change the stack context like longjmp() or use of ucontext ++ changes on signal return will need support from libc. +-- +2.34.1 + + +From 4963da85eea04bd35672dfe2b43306b451c32bcd Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Mon, 29 Jan 2024 22:29:38 +0000 +Subject: [PATCH 03/47] selftests: Provide helper header for shadow stack + testing + +While almost all users of shadow stacks should be relying on the dynamic +linker and libc to enable the feature there are several low level test +programs where it is useful to enable without any libc support, allowing +testing without full system enablement. This low level testing is helpful +during bringup of the support itself, and also in enabling coverage by +automated testing without needing all system components in the target root +filesystems to have enablement. + +Provide a header with helpers for this purpose, intended for use only by +test programs directly exercising shadow stack interfaces. + +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + tools/testing/selftests/ksft_shstk.h | 63 ++++++++++++++++++++++++++++ + 1 file changed, 63 insertions(+) + create mode 100644 tools/testing/selftests/ksft_shstk.h + +diff --git a/tools/testing/selftests/ksft_shstk.h b/tools/testing/selftests/ksft_shstk.h +new file mode 100644 +index 000000000000..85d0747c1802 +--- /dev/null ++++ b/tools/testing/selftests/ksft_shstk.h +@@ -0,0 +1,63 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* ++ * Helpers for shadow stack enablement, this is intended to only be ++ * used by low level test programs directly exercising interfaces for ++ * working with shadow stacks. ++ * ++ * Copyright (C) 2024 ARM Ltd. ++ */ ++ ++#ifndef __KSFT_SHSTK_H ++#define __KSFT_SHSTK_H ++ ++#include <asm/mman.h> ++ ++/* This is currently only defined for x86 */ ++#ifndef SHADOW_STACK_SET_TOKEN ++#define SHADOW_STACK_SET_TOKEN (1ULL << 0) ++#endif ++ ++static bool shadow_stack_enabled; ++ ++#ifdef __x86_64__ ++#define ARCH_SHSTK_ENABLE 0x5001 ++#define ARCH_SHSTK_SHSTK (1ULL << 0) ++ ++#define ARCH_PRCTL(arg1, arg2) \ ++({ \ ++ long _ret; \ ++ register long _num asm("eax") = __NR_arch_prctl; \ ++ register long _arg1 asm("rdi") = (long)(arg1); \ ++ register long _arg2 asm("rsi") = (long)(arg2); \ ++ \ ++ asm volatile ( \ ++ "syscall\n" \ ++ : "=a"(_ret) \ ++ : "r"(_arg1), "r"(_arg2), \ ++ "0"(_num) \ ++ : "rcx", "r11", "memory", "cc" \ ++ ); \ ++ _ret; \ ++}) ++ ++#define ENABLE_SHADOW_STACK ++static inline __attribute__((always_inline)) void enable_shadow_stack(void) ++{ ++ int ret = ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK); ++ if (ret == 0) ++ shadow_stack_enabled = true; ++} ++ ++#endif ++ ++#ifndef __NR_map_shadow_stack ++#define __NR_map_shadow_stack 453 ++#endif ++ ++#ifndef ENABLE_SHADOW_STACK ++static inline void enable_shadow_stack(void) { } ++#endif ++ ++#endif ++ ++ +-- +2.34.1 + + +From dd5a2bea25b99868e19cf250f87fcefff2851857 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Thu, 19 Oct 2023 17:43:34 +0100 +Subject: [PATCH 04/47] mm: Introduce ARCH_HAS_USER_SHADOW_STACK + +Since multiple architectures have support for shadow stacks and we need to +select support for this feature in several places in the generic code +provide a generic config option that the architectures can select. + +Suggested-by: David Hildenbrand <david@redhat.com> +Acked-by: David Hildenbrand <david@redhat.com> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/x86/Kconfig | 1 + + fs/proc/task_mmu.c | 2 +- + include/linux/mm.h | 2 +- + mm/Kconfig | 6 ++++++ + 4 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 5edec175b9bf..34553911d07d 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -1952,6 +1952,7 @@ config X86_USER_SHADOW_STACK + depends on AS_WRUSS + depends on X86_64 + select ARCH_USES_HIGH_VMA_FLAGS ++ select ARCH_HAS_USER_SHADOW_STACK + select X86_CET + help + Shadow stack protection is a hardware feature that detects function +diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c +index 3f78ebbb795f..ff2c601f7d1c 100644 +--- a/fs/proc/task_mmu.c ++++ b/fs/proc/task_mmu.c +@@ -700,7 +700,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) + #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR + [ilog2(VM_UFFD_MINOR)] = "ui", + #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ +-#ifdef CONFIG_X86_USER_SHADOW_STACK ++#ifdef CONFIG_ARCH_HAS_USER_SHADOW_STACK + [ilog2(VM_SHADOW_STACK)] = "ss", + #endif + }; +diff --git a/include/linux/mm.h b/include/linux/mm.h +index f5a97dec5169..c0a782eda803 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -341,7 +341,7 @@ extern unsigned int kobjsize(const void *objp); + #endif + #endif /* CONFIG_ARCH_HAS_PKEYS */ + +-#ifdef CONFIG_X86_USER_SHADOW_STACK ++#ifdef CONFIG_ARCH_HAS_USER_SHADOW_STACK + /* + * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of + * support core mm. +diff --git a/mm/Kconfig b/mm/Kconfig +index ffc3a2ba3a8c..9119e016777a 100644 +--- a/mm/Kconfig ++++ b/mm/Kconfig +@@ -1261,6 +1261,12 @@ config LOCK_MM_AND_FIND_VMA + config IOMMU_MM_DATA + bool + ++config ARCH_HAS_USER_SHADOW_STACK ++ bool ++ help ++ The architecture has hardware support for userspace shadow call ++ stacks (eg, x86 CET, arm64 GCS or RISC-V Zicfiss). ++ + source "mm/damon/Kconfig" + + endmenu +-- +2.34.1 + + +From 8433014f7ee3beaba9ff2e37d1c517f4625db150 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Mon, 16 Oct 2023 19:40:40 +0100 +Subject: [PATCH 05/47] fork: Add shadow stack support to clone3() + +Unlike with the normal stack there is no API for configuring the the shadow +stack for a new thread, instead the kernel will dynamically allocate a new +shadow stack with the same size as the normal stack. This appears to be due +to the shadow stack series having been in development since before the more +extensible clone3() was added rather than anything more deliberate. + +Add a parameter to clone3() specifying the size of a shadow stack for +the newly created process. If no shadow stack is specified then the +existing implicit allocation behaviour is maintained. + +If the architecture does not support shadow stacks the shadow stack size +parameter must be zero, architectures that do support the feature are +expected to enforce the same requirement on individual systems that lack +shadow stack support. + +Update the existing x86 implementation to pay attention to the newly added +arguments, in order to maintain compatibility we use the existing behaviour +if no shadow stack is specified. Minimal validation is done of the supplied +parameters, detailed enforcement is left to when the thread is executed. +Since we are now using more fields from the kernel_clone_args we pass that +into the shadow stack code rather than individual fields. + +At present this implemntation does not consume the shadow stack token +atomically as would be desirable. + +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/x86/include/asm/shstk.h | 11 +++-- + arch/x86/kernel/process.c | 2 +- + arch/x86/kernel/shstk.c | 94 +++++++++++++++++++++++++++--------- + include/linux/sched/task.h | 2 + + include/uapi/linux/sched.h | 13 +++-- + kernel/fork.c | 61 ++++++++++++++++++----- + 6 files changed, 140 insertions(+), 43 deletions(-) + +diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h +index 42fee8959df7..8be7b0a909c3 100644 +--- a/arch/x86/include/asm/shstk.h ++++ b/arch/x86/include/asm/shstk.h +@@ -6,6 +6,7 @@ + #include <linux/types.h> + + struct task_struct; ++struct kernel_clone_args; + struct ksignal; + + #ifdef CONFIG_X86_USER_SHADOW_STACK +@@ -16,8 +17,8 @@ struct thread_shstk { + + long shstk_prctl(struct task_struct *task, int option, unsigned long arg2); + void reset_thread_features(void); +-unsigned long shstk_alloc_thread_stack(struct task_struct *p, unsigned long clone_flags, +- unsigned long stack_size); ++unsigned long shstk_alloc_thread_stack(struct task_struct *p, ++ const struct kernel_clone_args *args); + void shstk_free(struct task_struct *p); + int setup_signal_shadow_stack(struct ksignal *ksig); + int restore_signal_shadow_stack(void); +@@ -26,8 +27,10 @@ static inline long shstk_prctl(struct task_struct *task, int option, + unsigned long arg2) { return -EINVAL; } + static inline void reset_thread_features(void) {} + static inline unsigned long shstk_alloc_thread_stack(struct task_struct *p, +- unsigned long clone_flags, +- unsigned long stack_size) { return 0; } ++ const struct kernel_clone_args *args) ++{ ++ return 0; ++} + static inline void shstk_free(struct task_struct *p) {} + static inline int setup_signal_shadow_stack(struct ksignal *ksig) { return 0; } + static inline int restore_signal_shadow_stack(void) { return 0; } +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index ab49ade31b0d..d2bfcd44de05 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -207,7 +207,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) + * is disabled, new_ssp will remain 0, and fpu_clone() will know not to + * update it. + */ +- new_ssp = shstk_alloc_thread_stack(p, clone_flags, args->stack_size); ++ new_ssp = shstk_alloc_thread_stack(p, args); + if (IS_ERR_VALUE(new_ssp)) + return PTR_ERR((void *)new_ssp); + +diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c +index 59e15dd8d0f8..935ced6cf4f5 100644 +--- a/arch/x86/kernel/shstk.c ++++ b/arch/x86/kernel/shstk.c +@@ -191,44 +191,92 @@ void reset_thread_features(void) + current->thread.features_locked = 0; + } + +-unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long clone_flags, +- unsigned long stack_size) ++static bool shstk_consume_token(struct task_struct *tsk, ++ unsigned long addr) ++{ ++ /* ++ * SSP is aligned, so reserved bits and mode bit are a zero, just mark ++ * the token 64-bit. ++ */ ++ u64 expected = (addr - SS_FRAME_SIZE) | BIT(0); ++ u64 val; ++ ++ /* This should really be an atomic cpmxchg. It is not. */ ++ __get_user(val, (__user u64 *)addr); ++ if (val != expected) ++ return false; ++ ++ if (write_user_shstk_64((u64 __user *)addr, 0)) ++ return false; ++ ++ return true; ++} ++ ++unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, ++ const struct kernel_clone_args *args) + { + struct thread_shstk *shstk = &tsk->thread.shstk; ++ unsigned long clone_flags = args->flags; + unsigned long addr, size; + + /* + * If shadow stack is not enabled on the new thread, skip any +- * switch to a new shadow stack. ++ * implicit switch to a new shadow stack and reject attempts to ++ * explciitly specify one. + */ +- if (!features_enabled(ARCH_SHSTK_SHSTK)) +- return 0; ++ if (!features_enabled(ARCH_SHSTK_SHSTK)) { ++ if (args->shadow_stack || args->shadow_stack_size) ++ return (unsigned long)ERR_PTR(-EINVAL); + +- /* +- * For CLONE_VFORK the child will share the parents shadow stack. +- * Make sure to clear the internal tracking of the thread shadow +- * stack so the freeing logic run for child knows to leave it alone. +- */ +- if (clone_flags & CLONE_VFORK) { +- shstk->base = 0; +- shstk->size = 0; + return 0; + } + + /* +- * For !CLONE_VM the child will use a copy of the parents shadow +- * stack. ++ * If the user specified a shadow stack then do some basic ++ * validation and use it, otherwise fall back to a default ++ * shadow stack size if the clone_flags don't indicate an ++ * allocation is unneeded. + */ +- if (!(clone_flags & CLONE_VM)) +- return 0; ++ if (args->shadow_stack) { ++ addr = args->shadow_stack; ++ size = args->shadow_stack_size; ++ ++ /* There should be a valid token at the top of the stack. */ ++ if (!shstk_consume_token(tsk, addr + size - sizeof(u64))) { ++ shstk->base = 0; ++ shstk->size = 0; ++ return (unsigned long)ERR_PTR(-EINVAL); ++ } ++ } else { ++ /* ++ * For CLONE_VFORK the child will share the parents ++ * shadow stack. Make sure to clear the internal ++ * tracking of the thread shadow stack so the freeing ++ * logic run for child knows to leave it alone. ++ */ ++ if (clone_flags & CLONE_VFORK) { ++ shstk->base = 0; ++ shstk->size = 0; ++ return 0; ++ } + +- size = adjust_shstk_size(stack_size); +- addr = alloc_shstk(0, size, 0, false); +- if (IS_ERR_VALUE(addr)) +- return addr; ++ /* ++ * For !CLONE_VM the child will use a copy of the ++ * parents shadow stack. ++ */ ++ if (!(clone_flags & CLONE_VM)) ++ return 0; + +- shstk->base = addr; +- shstk->size = size; ++ size = args->stack_size; ++ size = adjust_shstk_size(size); ++ addr = alloc_shstk(0, size, 0, false); ++ if (IS_ERR_VALUE(addr)) ++ return addr; ++ ++ /* We allocated the shadow stack, we should deallocate it. */ ++ shstk->base = addr; ++ shstk->size = size; ++ } + + return addr + size; + } +diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h +index d362aacf9f89..dd577e8dc881 100644 +--- a/include/linux/sched/task.h ++++ b/include/linux/sched/task.h +@@ -43,6 +43,8 @@ struct kernel_clone_args { + void *fn_arg; + struct cgroup *cgrp; + struct css_set *cset; ++ unsigned long shadow_stack; ++ unsigned long shadow_stack_size; + }; + + /* +diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h +index 3bac0a8ceab2..8b7af52548fd 100644 +--- a/include/uapi/linux/sched.h ++++ b/include/uapi/linux/sched.h +@@ -84,6 +84,10 @@ + * kernel's limit of nested PID namespaces. + * @cgroup: If CLONE_INTO_CGROUP is specified set this to + * a file descriptor for the cgroup. ++ * @shadow_stack: Pointer to the memory allocated for the child ++ * shadow stack. ++ * @shadow_stack_size: Specify the size of the shadow stack for ++ * the child process. + * + * The structure is versioned by size and thus extensible. + * New struct members must go at the end of the struct and +@@ -101,12 +105,15 @@ struct clone_args { + __aligned_u64 set_tid; + __aligned_u64 set_tid_size; + __aligned_u64 cgroup; ++ __aligned_u64 shadow_stack; ++ __aligned_u64 shadow_stack_size; + }; + #endif + +-#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ +-#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */ +-#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */ ++#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ ++#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */ ++#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */ ++#define CLONE_ARGS_SIZE_VER3 104 /* sizeof fourth published struct */ + + /* + * Scheduling policies +diff --git a/kernel/fork.c b/kernel/fork.c +index 0d944e92a43f..fca041cc2b8a 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -123,6 +123,11 @@ + */ + #define MAX_THREADS FUTEX_TID_MASK + ++/* ++ * Require that shadow stacks can store at least one element ++ */ ++#define SHADOW_STACK_SIZE_MIN sizeof(void *) ++ + /* + * Protected counters by write_lock_irq(&tasklist_lock) + */ +@@ -3062,7 +3067,9 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, + CLONE_ARGS_SIZE_VER1); + BUILD_BUG_ON(offsetofend(struct clone_args, cgroup) != + CLONE_ARGS_SIZE_VER2); +- BUILD_BUG_ON(sizeof(struct clone_args) != CLONE_ARGS_SIZE_VER2); ++ BUILD_BUG_ON(offsetofend(struct clone_args, shadow_stack_size) != ++ CLONE_ARGS_SIZE_VER3); ++ BUILD_BUG_ON(sizeof(struct clone_args) != CLONE_ARGS_SIZE_VER3); + + if (unlikely(usize > PAGE_SIZE)) + return -E2BIG; +@@ -3095,16 +3102,18 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, + return -EINVAL; + + *kargs = (struct kernel_clone_args){ +- .flags = args.flags, +- .pidfd = u64_to_user_ptr(args.pidfd), +- .child_tid = u64_to_user_ptr(args.child_tid), +- .parent_tid = u64_to_user_ptr(args.parent_tid), +- .exit_signal = args.exit_signal, +- .stack = args.stack, +- .stack_size = args.stack_size, +- .tls = args.tls, +- .set_tid_size = args.set_tid_size, +- .cgroup = args.cgroup, ++ .flags = args.flags, ++ .pidfd = u64_to_user_ptr(args.pidfd), ++ .child_tid = u64_to_user_ptr(args.child_tid), ++ .parent_tid = u64_to_user_ptr(args.parent_tid), ++ .exit_signal = args.exit_signal, ++ .stack = args.stack, ++ .stack_size = args.stack_size, ++ .tls = args.tls, ++ .set_tid_size = args.set_tid_size, ++ .cgroup = args.cgroup, ++ .shadow_stack = args.shadow_stack, ++ .shadow_stack_size = args.shadow_stack_size, + }; + + if (args.set_tid && +@@ -3145,6 +3154,34 @@ static inline bool clone3_stack_valid(struct kernel_clone_args *kargs) + return true; + } + ++/** ++ * clone3_shadow_stack_valid - check and prepare shadow stack ++ * @kargs: kernel clone args ++ * ++ * Verify that shadow stacks are only enabled if supported. ++ */ ++static inline bool clone3_shadow_stack_valid(struct kernel_clone_args *kargs) ++{ ++ if (kargs->shadow_stack) { ++ if (!kargs->shadow_stack_size) ++ return false; ++ ++ if (kargs->shadow_stack_size < SHADOW_STACK_SIZE_MIN) ++ return false; ++ ++ if (kargs->shadow_stack_size > rlimit(RLIMIT_STACK)) ++ return false; ++ ++ /* ++ * The architecture must check support on the specific ++ * machine. ++ */ ++ return IS_ENABLED(CONFIG_ARCH_HAS_USER_SHADOW_STACK); ++ } else { ++ return !kargs->shadow_stack_size; ++ } ++} ++ + static bool clone3_args_valid(struct kernel_clone_args *kargs) + { + /* Verify that no unknown flags are passed along. */ +@@ -3167,7 +3204,7 @@ static bool clone3_args_valid(struct kernel_clone_args *kargs) + kargs->exit_signal) + return false; + +- if (!clone3_stack_valid(kargs)) ++ if (!clone3_stack_valid(kargs) || !clone3_shadow_stack_valid(kargs)) + return false; + + return true; +-- +2.34.1 + + +From 3f6f2af71e1803c3e2d48f08c3f364efdaec5fcd Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Thu, 19 Oct 2023 15:43:49 +0100 +Subject: [PATCH 06/47] selftests/clone3: Factor more of main loop into + test_clone3() + +In order to make it easier to add more configuration for the tests and +more support for runtime detection of when tests can be run pass the +structure describing the tests into test_clone3() rather than picking +the arguments out of it and have that function do all the per-test work. + +No functional change. + +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + tools/testing/selftests/clone3/clone3.c | 77 ++++++++++++------------- + 1 file changed, 37 insertions(+), 40 deletions(-) + +diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c +index 3c9bf0cd82a8..1108bd8e36d6 100644 +--- a/tools/testing/selftests/clone3/clone3.c ++++ b/tools/testing/selftests/clone3/clone3.c +@@ -30,6 +30,19 @@ enum test_mode { + CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG, + }; + ++typedef bool (*filter_function)(void); ++typedef size_t (*size_function)(void); ++ ++struct test { ++ const char *name; ++ uint64_t flags; ++ size_t size; ++ size_function size_function; ++ int expected; ++ enum test_mode test_mode; ++ filter_function filter; ++}; ++ + static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) + { + struct __clone_args args = { +@@ -104,30 +117,40 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) + return 0; + } + +-static bool test_clone3(uint64_t flags, size_t size, int expected, +- enum test_mode test_mode) ++static void test_clone3(const struct test *test) + { ++ size_t size; + int ret; + ++ if (test->filter && test->filter()) { ++ ksft_test_result_skip("%s\n", test->name); ++ return; ++ } ++ ++ if (test->size_function) ++ size = test->size_function(); ++ else ++ size = test->size; ++ ++ ksft_print_msg("Running test '%s'\n", test->name); ++ + ksft_print_msg( + "[%d] Trying clone3() with flags %#" PRIx64 " (size %zu)\n", +- getpid(), flags, size); +- ret = call_clone3(flags, size, test_mode); ++ getpid(), test->flags, size); ++ ret = call_clone3(test->flags, size, test->test_mode); + ksft_print_msg("[%d] clone3() with flags says: %d expected %d\n", +- getpid(), ret, expected); +- if (ret != expected) { ++ getpid(), ret, test->expected); ++ if (ret != test->expected) { + ksft_print_msg( + "[%d] Result (%d) is different than expected (%d)\n", +- getpid(), ret, expected); +- return false; ++ getpid(), ret, test->expected); ++ ksft_test_result_fail("%s\n", test->name); ++ return; + } + +- return true; ++ ksft_test_result_pass("%s\n", test->name); + } + +-typedef bool (*filter_function)(void); +-typedef size_t (*size_function)(void); +- + static bool not_root(void) + { + if (getuid() != 0) { +@@ -155,16 +178,6 @@ static size_t page_size_plus_8(void) + return getpagesize() + 8; + } + +-struct test { +- const char *name; +- uint64_t flags; +- size_t size; +- size_function size_function; +- int expected; +- enum test_mode test_mode; +- filter_function filter; +-}; +- + static const struct test tests[] = { + { + .name = "simple clone3()", +@@ -314,24 +327,8 @@ int main(int argc, char *argv[]) + ksft_set_plan(ARRAY_SIZE(tests)); + test_clone3_supported(); + +- for (i = 0; i < ARRAY_SIZE(tests); i++) { +- if (tests[i].filter && tests[i].filter()) { +- ksft_test_result_skip("%s\n", tests[i].name); +- continue; +- } +- +- if (tests[i].size_function) +- size = tests[i].size_function(); +- else +- size = tests[i].size; +- +- ksft_print_msg("Running test '%s'\n", tests[i].name); +- +- ksft_test_result(test_clone3(tests[i].flags, size, +- tests[i].expected, +- tests[i].test_mode), +- "%s\n", tests[i].name); +- } ++ for (i = 0; i < ARRAY_SIZE(tests); i++) ++ test_clone3(&tests[i]); + + ksft_finished(); + } +-- +2.34.1 + + +From 19b4898b0f2850497f787d1e5a3d7a6910d3ca57 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Thu, 19 Oct 2023 16:15:08 +0100 +Subject: [PATCH 07/47] selftests/clone3: Allow tests to flag if -E2BIG is a + valid error code + +The clone_args structure is extensible, with the syscall passing in the +length of the structure. Inside the kernel we use copy_struct_from_user() +to read the struct but this has the unfortunate side effect of silently +accepting some overrun in the structure size providing the extra data is +all zeros. This means that we can't discover the clone3() features that +the running kernel supports by simply probing with various struct sizes. +We need to check this for the benefit of test systems which run newer +kselftests on old kernels. + +Add a flag which can be set on a test to indicate that clone3() may return +-E2BIG due to the use of newer struct versions. Currently no tests need +this but it will become an issue for testing clone3() support for shadow +stacks, the support for shadow stacks is already present on x86. + +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + tools/testing/selftests/clone3/clone3.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c +index 1108bd8e36d6..6adbfd14c841 100644 +--- a/tools/testing/selftests/clone3/clone3.c ++++ b/tools/testing/selftests/clone3/clone3.c +@@ -39,6 +39,7 @@ struct test { + size_t size; + size_function size_function; + int expected; ++ bool e2big_valid; + enum test_mode test_mode; + filter_function filter; + }; +@@ -141,6 +142,11 @@ static void test_clone3(const struct test *test) + ksft_print_msg("[%d] clone3() with flags says: %d expected %d\n", + getpid(), ret, test->expected); + if (ret != test->expected) { ++ if (test->e2big_valid && ret == -E2BIG) { ++ ksft_print_msg("Test reported -E2BIG\n"); ++ ksft_test_result_skip("%s\n", test->name); ++ return; ++ } + ksft_print_msg( + "[%d] Result (%d) is different than expected (%d)\n", + getpid(), ret, test->expected); +-- +2.34.1 + + +From 295f1b6a27b9621402b0d3abeb15a3d2c39a7ddb Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Wed, 18 Oct 2023 23:09:49 +0100 +Subject: [PATCH 08/47] selftests/clone3: Test shadow stack support + +Add basic test coverage for specifying the shadow stack for a newly +created thread via clone3(), including coverage of the newly extended +argument structure. + +In order to facilitate testing on systems without userspace shadow stack +support we manually enable shadow stacks on startup, this is architecture +specific due to the use of an arch_prctl() on x86. Due to interactions with +potential userspace locking of features we actually detect support for +shadow stacks on the running system by attempting to allocate a shadow +stack page during initialisation using map_shadow_stack(), warning if this +succeeds when the enable failed. + +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + tools/testing/selftests/clone3/clone3.c | 128 ++++++++++++++++++ + .../selftests/clone3/clone3_selftests.h | 8 ++ + 2 files changed, 136 insertions(+) + +diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c +index 6adbfd14c841..c468d9b87bd5 100644 +--- a/tools/testing/selftests/clone3/clone3.c ++++ b/tools/testing/selftests/clone3/clone3.c +@@ -3,6 +3,7 @@ + /* Based on Christian Brauner's clone3() example */ + + #define _GNU_SOURCE ++#include <asm/mman.h> + #include <errno.h> + #include <inttypes.h> + #include <linux/types.h> +@@ -11,6 +12,7 @@ + #include <stdint.h> + #include <stdio.h> + #include <stdlib.h> ++#include <sys/mman.h> + #include <sys/syscall.h> + #include <sys/types.h> + #include <sys/un.h> +@@ -19,8 +21,12 @@ + #include <sched.h> + + #include "../kselftest.h" ++#include "../ksft_shstk.h" + #include "clone3_selftests.h" + ++static bool shadow_stack_supported; ++static size_t max_supported_args_size; ++ + enum test_mode { + CLONE3_ARGS_NO_TEST, + CLONE3_ARGS_ALL_0, +@@ -28,6 +34,10 @@ enum test_mode { + CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG, + CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG, + CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG, ++ CLONE3_ARGS_SHADOW_STACK, ++ CLONE3_ARGS_SHADOW_STACK_NO_SIZE, ++ CLONE3_ARGS_SHADOW_STACK_NO_POINTER, ++ CLONE3_ARGS_SHADOW_STACK_NO_TOKEN, + }; + + typedef bool (*filter_function)(void); +@@ -44,6 +54,43 @@ struct test { + filter_function filter; + }; + ++/* ++ * We check for shadow stack support by attempting to use ++ * map_shadow_stack() since features may have been locked by the ++ * dynamic linker resulting in spurious errors when we attempt to ++ * enable on startup. We warn if the enable failed. ++ */ ++static void test_shadow_stack_supported(void) ++{ ++ long ret; ++ ++ ret = syscall(__NR_map_shadow_stack, 0, getpagesize(), 0); ++ if (ret == -1) { ++ ksft_print_msg("map_shadow_stack() not supported\n"); ++ } else if ((void *)ret == MAP_FAILED) { ++ ksft_print_msg("Failed to map shadow stack\n"); ++ } else { ++ ksft_print_msg("Shadow stack supportd\n"); ++ shadow_stack_supported = true; ++ ++ if (!shadow_stack_enabled) ++ ksft_print_msg("Mapped but did not enable shadow stack\n"); ++ } ++} ++ ++static unsigned long long get_shadow_stack_page(unsigned long flags) ++{ ++ unsigned long long page; ++ ++ page = syscall(__NR_map_shadow_stack, 0, getpagesize(), flags); ++ if ((void *)page == MAP_FAILED) { ++ ksft_print_msg("map_shadow_stack() failed: %d\n", errno); ++ return 0; ++ } ++ ++ return page; ++} ++ + static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) + { + struct __clone_args args = { +@@ -89,6 +136,20 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) + case CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG: + args.exit_signal = 0x00000000000000f0ULL; + break; ++ case CLONE3_ARGS_SHADOW_STACK: ++ args.shadow_stack = get_shadow_stack_page(SHADOW_STACK_SET_TOKEN); ++ args.shadow_stack_size = getpagesize(); ++ break; ++ case CLONE3_ARGS_SHADOW_STACK_NO_POINTER: ++ args.shadow_stack_size = getpagesize(); ++ break; ++ case CLONE3_ARGS_SHADOW_STACK_NO_SIZE: ++ args.shadow_stack = get_shadow_stack_page(SHADOW_STACK_SET_TOKEN); ++ break; ++ case CLONE3_ARGS_SHADOW_STACK_NO_TOKEN: ++ args.shadow_stack = get_shadow_stack_page(0); ++ args.shadow_stack_size = getpagesize(); ++ break; + } + + memcpy(&args_ext.args, &args, sizeof(struct __clone_args)); +@@ -179,6 +240,26 @@ static bool no_timenamespace(void) + return true; + } + ++static bool have_shadow_stack(void) ++{ ++ if (shadow_stack_supported) { ++ ksft_print_msg("Shadow stack supported\n"); ++ return true; ++ } ++ ++ return false; ++} ++ ++static bool no_shadow_stack(void) ++{ ++ if (!shadow_stack_supported) { ++ ksft_print_msg("Shadow stack not supported\n"); ++ return true; ++ } ++ ++ return false; ++} ++ + static size_t page_size_plus_8(void) + { + return getpagesize() + 8; +@@ -322,6 +403,50 @@ static const struct test tests[] = { + .expected = -EINVAL, + .test_mode = CLONE3_ARGS_NO_TEST, + }, ++ { ++ .name = "Shadow stack on system with shadow stack", ++ .flags = CLONE_VM, ++ .size = 0, ++ .expected = 0, ++ .e2big_valid = true, ++ .test_mode = CLONE3_ARGS_SHADOW_STACK, ++ .filter = no_shadow_stack, ++ }, ++ { ++ .name = "Shadow stack with no pointer", ++ .flags = CLONE_VM, ++ .size = 0, ++ .expected = -EINVAL, ++ .e2big_valid = true, ++ .test_mode = CLONE3_ARGS_SHADOW_STACK_NO_POINTER, ++ }, ++ { ++ .name = "Shadow stack with no size", ++ .flags = CLONE_VM, ++ .size = 0, ++ .expected = -EINVAL, ++ .e2big_valid = true, ++ .test_mode = CLONE3_ARGS_SHADOW_STACK_NO_SIZE, ++ .filter = no_shadow_stack, ++ }, ++ { ++ .name = "Shadow stack with no token", ++ .flags = CLONE_VM, ++ .size = 0, ++ .expected = -EINVAL, ++ .e2big_valid = true, ++ .test_mode = CLONE3_ARGS_SHADOW_STACK_NO_TOKEN, ++ .filter = no_shadow_stack, ++ }, ++ { ++ .name = "Shadow stack on system without shadow stack", ++ .flags = CLONE_VM, ++ .size = 0, ++ .expected = -EINVAL, ++ .e2big_valid = true, ++ .test_mode = CLONE3_ARGS_SHADOW_STACK, ++ .filter = have_shadow_stack, ++ }, + }; + + int main(int argc, char *argv[]) +@@ -329,9 +454,12 @@ int main(int argc, char *argv[]) + size_t size; + int i; + ++ enable_shadow_stack(); ++ + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(tests)); + test_clone3_supported(); ++ test_shadow_stack_supported(); + + for (i = 0; i < ARRAY_SIZE(tests); i++) + test_clone3(&tests[i]); +diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h +index 3d2663fe50ba..1011dae85098 100644 +--- a/tools/testing/selftests/clone3/clone3_selftests.h ++++ b/tools/testing/selftests/clone3/clone3_selftests.h +@@ -31,6 +31,14 @@ struct __clone_args { + __aligned_u64 set_tid; + __aligned_u64 set_tid_size; + __aligned_u64 cgroup; ++#ifndef CLONE_ARGS_SIZE_VER2 ++#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */ ++#endif ++ __aligned_u64 shadow_stack; ++ __aligned_u64 shadow_stack_size; ++#ifndef CLONE_ARGS_SIZE_VER3 ++#define CLONE_ARGS_SIZE_VER3 104 /* sizeof fourth published struct */ ++#endif + }; + + static pid_t sys_clone3(struct __clone_args *args, size_t size) +-- +2.34.1 + + +From e6c930b757134d3ad80f5b1a04ddba670b212abb Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Wed, 16 Aug 2023 17:33:47 +0100 +Subject: [PATCH 10/47] arm64/mm: Restructure arch_validate_flags() for + extensibility + +Currently arch_validate_flags() is written in a very non-extensible +fashion, returning immediately if MTE is not supported and writing the MTE +check as a direct return. Since we will want to add more checks for GCS +refactor the existing code to be more extensible, no functional change +intended. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/include/asm/mman.h | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h +index 5966ee4a6154..c21849ffdd88 100644 +--- a/arch/arm64/include/asm/mman.h ++++ b/arch/arm64/include/asm/mman.h +@@ -52,11 +52,17 @@ static inline bool arch_validate_prot(unsigned long prot, + + static inline bool arch_validate_flags(unsigned long vm_flags) + { +- if (!system_supports_mte()) +- return true; ++ if (system_supports_mte()) { ++ /* ++ * only allow VM_MTE if VM_MTE_ALLOWED has been set ++ * previously ++ */ ++ if ((vm_flags & VM_MTE) && !(vm_flags & VM_MTE_ALLOWED)) ++ return false; ++ } ++ ++ return true; + +- /* only allow VM_MTE if VM_MTE_ALLOWED has been set previously */ +- return !(vm_flags & VM_MTE) || (vm_flags & VM_MTE_ALLOWED); + } + #define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags) + +-- +2.34.1 + + +From 33a83dfb0883de5bb5e1577423a213193aff4677 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Sun, 12 Feb 2023 20:53:44 -0800 +Subject: [PATCH 11/47] prctl: arch-agnostic prctl for shadow stack + +Three architectures (x86, aarch64, riscv) have announced support for +shadow stacks with fairly similar functionality. While x86 is using +arch_prctl() to control the functionality neither arm64 nor riscv uses +that interface so this patch adds arch-agnostic prctl() support to +get and set status of shadow stacks and lock the current configuation to +prevent further changes, with support for turning on and off individual +subfeatures so applications can limit their exposure to features that +they do not need. The features are: + + - PR_SHADOW_STACK_ENABLE: Tracking and enforcement of shadow stacks, + including allocation of a shadow stack if one is not already + allocated. + - PR_SHADOW_STACK_WRITE: Writes to specific addresses in the shadow + stack. + - PR_SHADOW_STACK_PUSH: Push additional values onto the shadow stack. + +These features are expected to be inherited by new threads and cleared +on exec(), unknown features should be rejected for enable but accepted +for locking (in order to allow for future proofing). + +This is based on a patch originally written by Deepak Gupta but modified +fairly heavily, support for indirect landing pads is removed, additional +modes added and the locking interface reworked. The set status prctl() +is also reworked to just set flags, if setting/reading the shadow stack +pointer is required this could be a separate prctl. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + include/linux/mm.h | 4 ++++ + include/uapi/linux/prctl.h | 22 ++++++++++++++++++++++ + kernel/sys.c | 30 ++++++++++++++++++++++++++++++ + 3 files changed, 56 insertions(+) + +diff --git a/include/linux/mm.h b/include/linux/mm.h +index c0a782eda803..0b1139c5df60 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -4182,4 +4182,8 @@ static inline bool pfn_is_unaccepted_memory(unsigned long pfn) + return range_contains_unaccepted_memory(paddr, paddr + PAGE_SIZE); + } + ++int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status); ++int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status); ++int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); ++ + #endif /* _LINUX_MM_H */ +diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h +index 370ed14b1ae0..3c66ed8f46d8 100644 +--- a/include/uapi/linux/prctl.h ++++ b/include/uapi/linux/prctl.h +@@ -306,4 +306,26 @@ struct prctl_mm_map { + # define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc + # define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f + ++/* ++ * Get the current shadow stack configuration for the current thread, ++ * this will be the value configured via PR_SET_SHADOW_STACK_STATUS. ++ */ ++#define PR_GET_SHADOW_STACK_STATUS 71 ++ ++/* ++ * Set the current shadow stack configuration. Enabling the shadow ++ * stack will cause a shadow stack to be allocated for the thread. ++ */ ++#define PR_SET_SHADOW_STACK_STATUS 72 ++# define PR_SHADOW_STACK_ENABLE (1UL << 0) ++# define PR_SHADOW_STACK_WRITE (1UL << 1) ++# define PR_SHADOW_STACK_PUSH (1UL << 2) ++ ++/* ++ * Prevent further changes to the specified shadow stack ++ * configuration. All bits may be locked via this call, including ++ * undefined bits. ++ */ ++#define PR_LOCK_SHADOW_STACK_STATUS 73 ++ + #endif /* _LINUX_PRCTL_H */ +diff --git a/kernel/sys.c b/kernel/sys.c +index f8e543f1e38a..242e9f147791 100644 +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -2315,6 +2315,21 @@ int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which, + return -EINVAL; + } + ++int __weak arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status) ++{ ++ return -EINVAL; ++} ++ ++int __weak arch_set_shadow_stack_status(struct task_struct *t, unsigned long status) ++{ ++ return -EINVAL; ++} ++ ++int __weak arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status) ++{ ++ return -EINVAL; ++} ++ + #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE) + + #ifdef CONFIG_ANON_VMA_NAME +@@ -2757,6 +2772,21 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, + case PR_RISCV_V_GET_CONTROL: + error = RISCV_V_GET_CONTROL(); + break; ++ case PR_GET_SHADOW_STACK_STATUS: ++ if (arg3 || arg4 || arg5) ++ return -EINVAL; ++ error = arch_get_shadow_stack_status(me, (unsigned long __user *) arg2); ++ break; ++ case PR_SET_SHADOW_STACK_STATUS: ++ if (arg3 || arg4 || arg5) ++ return -EINVAL; ++ error = arch_set_shadow_stack_status(me, arg2); ++ break; ++ case PR_LOCK_SHADOW_STACK_STATUS: ++ if (arg3 || arg4 || arg5) ++ return -EINVAL; ++ error = arch_lock_shadow_stack_status(me, arg2); ++ break; + default: + error = -EINVAL; + break; +-- +2.34.1 + + +From d16e43b333735c0ce01575c280197da1989e9739 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Fri, 4 Aug 2023 14:50:18 +0100 +Subject: [PATCH 12/47] mman: Add map_shadow_stack() flags + +In preparation for adding arm64 GCS support make the map_shadow_stack() +SHADOW_STACK_SET_TOKEN flag generic and add _SET_MARKER. The existing +flag indicats that a token usable for stack switch should be added to +the top of the newly mapped GCS region while the new flag indicates that +a top of stack marker suitable for use by unwinders should be added +above that. + +For arm64 the top of stack marker is all bits 0. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/x86/include/uapi/asm/mman.h | 3 --- + include/uapi/asm-generic/mman.h | 4 ++++ + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h +index 46cdc941f958..ac1e6277212b 100644 +--- a/arch/x86/include/uapi/asm/mman.h ++++ b/arch/x86/include/uapi/asm/mman.h +@@ -5,9 +5,6 @@ + #define MAP_32BIT 0x40 /* only give out 32bit addresses */ + #define MAP_ABOVE4G 0x80 /* only map above 4GB */ + +-/* Flags for map_shadow_stack(2) */ +-#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */ +- + #include <asm-generic/mman.h> + + #endif /* _ASM_X86_MMAN_H */ +diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h +index 57e8195d0b53..d6a282687af5 100644 +--- a/include/uapi/asm-generic/mman.h ++++ b/include/uapi/asm-generic/mman.h +@@ -19,4 +19,8 @@ + #define MCL_FUTURE 2 /* lock all future mappings */ + #define MCL_ONFAULT 4 /* lock all pages that are faulted in */ + ++#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */ ++#define SHADOW_STACK_SET_MARKER (1ULL << 1) /* Set up a top of stack merker in the shadow stack */ ++ ++ + #endif /* __ASM_GENERIC_MMAN_H */ +-- +2.34.1 + + +From ff25ae9e38129288ebbeabf5a53360a074157b0a Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Fri, 3 Mar 2023 17:16:43 +0000 +Subject: [PATCH 13/47] arm64: Document boot requirements for Guarded Control + Stacks + +FEAT_GCS introduces a number of new system registers, we require that +access to these registers is not trapped when we identify that the feature +is detected. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + Documentation/arch/arm64/booting.rst | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst +index b57776a68f15..de3679770c64 100644 +--- a/Documentation/arch/arm64/booting.rst ++++ b/Documentation/arch/arm64/booting.rst +@@ -411,6 +411,28 @@ Before jumping into the kernel, the following conditions must be met: + + - HFGRWR_EL2.nPIRE0_EL1 (bit 57) must be initialised to 0b1. + ++ - For features with Guarded Control Stacks (FEAT_GCS): ++ ++ - If EL3 is present: ++ ++ - SCR_EL3.GCSEn (bit 39) must be initialised to 0b1. ++ ++ - If the kernel is entered at EL1 and EL2 is present: ++ ++ - HFGITR_EL2.nGCSEPP (bit 59) must be initialised to 0b1. ++ ++ - HFGITR_EL2.nGCSSTR_EL1 (bit 58) must be initialised to 0b1. ++ ++ - HFGITR_EL2.nGCSPUSHM_EL1 (bit 57) must be initialised to 0b1. ++ ++ - HFGRTR_EL2.nGCS_EL1 (bit 53) must be initialised to 0b1. ++ ++ - HFGRTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1. ++ ++ - HFGWTR_EL2.nGCS_EL1 (bit 53) must be initialised to 0b1. ++ ++ - HFGWTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1. ++ + The requirements described above for CPU mode, caches, MMUs, architected + timers, coherency and system registers apply to all CPUs. All CPUs must + enter the kernel in the same exception level. Where the values documented +-- +2.34.1 + + +From 5867bb3606500d88935829c424bcbd0c1afe0277 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Tue, 4 Jul 2023 00:17:55 +0100 +Subject: [PATCH 14/47] arm64/gcs: Document the ABI for Guarded Control Stacks + +Add some documentation of the userspace ABI for Guarded Control Stacks. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + Documentation/arch/arm64/gcs.rst | 233 +++++++++++++++++++++++++++++ + Documentation/arch/arm64/index.rst | 1 + + 2 files changed, 234 insertions(+) + create mode 100644 Documentation/arch/arm64/gcs.rst + +diff --git a/Documentation/arch/arm64/gcs.rst b/Documentation/arch/arm64/gcs.rst +new file mode 100644 +index 000000000000..c45c0326836a +--- /dev/null ++++ b/Documentation/arch/arm64/gcs.rst +@@ -0,0 +1,233 @@ ++=============================================== ++Guarded Control Stack support for AArch64 Linux ++=============================================== ++ ++This document outlines briefly the interface provided to userspace by Linux in ++order to support use of the ARM Guarded Control Stack (GCS) feature. ++ ++This is an outline of the most important features and issues only and not ++intended to be exhaustive. ++ ++ ++ ++1. General ++----------- ++ ++* GCS is an architecture feature intended to provide greater protection ++ against return oriented programming (ROP) attacks and to simplify the ++ implementation of features that need to collect stack traces such as ++ profiling. ++ ++* When GCS is enabled a separate guarded control stack is maintained by the ++ PE which is writeable only through specific GCS operations. This ++ stores the call stack only, when a procedure call instruction is ++ performed the current PC is pushed onto the GCS and on RET the ++ address in the LR is verified against that on the top of the GCS. ++ ++* When active current GCS pointer is stored in the system register ++ GCSPR_EL0. This is readable by userspace but can only be updated ++ via specific GCS instructions. ++ ++* The architecture provides instructions for switching between guarded ++ control stacks with checks to ensure that the new stack is a valid ++ target for switching. ++ ++* The functionality of GCS is similar to that provided by the x86 Shadow ++ Stack feature, due to sharing of userspace interfaces the ABI refers to ++ shadow stacks rather than GCS. ++ ++* Support for GCS is reported to userspace via HWCAP2_GCS in the aux vector ++ AT_HWCAP2 entry. ++ ++* GCS is enabled per thread. While there is support for disabling GCS ++ at runtime this should be done with great care. ++ ++* GCS memory access faults are reported as normal memory access faults. ++ ++* GCS specific errors (those reported with EC 0x2d) will be reported as ++ SIGSEGV with a si_code of SEGV_CPERR (control protection error). ++ ++* GCS is supported only for AArch64. ++ ++* On systems where GCS is supported GCSPR_EL0 is always readable by EL0 ++ regardless of the GCS configuration for the thread. ++ ++* The architecture supports enabling GCS without verifying that return values ++ in LR match those in the GCS, the LR will be ignored. This is not supported ++ by Linux. ++ ++* EL0 GCS entries with bit 63 set are reserved for use, one such use is defined ++ below for signals and should be ignored when parsing the stack if not ++ understood. ++ ++ ++2. Enabling and disabling Guarded Control Stacks ++------------------------------------------------- ++ ++* GCS is enabled and disabled for a thread via the PR_SET_SHADOW_STACK_STATUS ++ prctl(), this takes a single flags argument specifying which GCS features ++ should be used. ++ ++* When set PR_SHADOW_STACK_ENABLE flag allocates a Guarded Control Stack ++ and enables GCS for the thread, enabling the functionality controlled by ++ GCSCRE0_EL1.{nTR, RVCHKEN, PCRSEL}. ++ ++* When set the PR_SHADOW_STACK_PUSH flag enables the functionality controlled ++ by GCSCRE0_EL1.PUSHMEn, allowing explicit GCS pushes. ++ ++* When set the PR_SHADOW_STACK_WRITE flag enables the functionality controlled ++ by GCSCRE0_EL1.STREn, allowing explicit stores to the Guarded Control Stack. ++ ++* Any unknown flags will cause PR_SET_SHADOW_STACK_STATUS to return -EINVAL. ++ ++* PR_LOCK_SHADOW_STACK_STATUS is passed a bitmask of features with the same ++ values as used for PR_SET_SHADOW_STACK_STATUS. Any future changes to the ++ status of the specified GCS mode bits will be rejected. ++ ++* PR_LOCK_SHADOW_STACK_STATUS allows any bit to be locked, this allows ++ userspace to prevent changes to any future features. ++ ++* There is no support for a process to remove a lock that has been set for ++ it. ++ ++* PR_SET_SHADOW_STACK_STATUS and PR_LOCK_SHADOW_STACK_STATUS affect only the ++ thread that called them, any other running threads will be unaffected. ++ ++* New threads inherit the GCS configuration of the thread that created them. ++ ++* GCS is disabled on exec(). ++ ++* The current GCS configuration for a thread may be read with the ++ PR_GET_SHADOW_STACK_STATUS prctl(), this returns the same flags that ++ are passed to PR_SET_SHADOW_STACK_STATUS. ++ ++* If GCS is disabled for a thread after having previously been enabled then ++ the stack will remain allocated for the lifetime of the thread. At present ++ any attempt to reenable GCS for the thread will be rejected, this may be ++ revisited in future. ++ ++* It should be noted that since enabling GCS will result in GCS becoming ++ active immediately it is not normally possible to return from the function ++ that invoked the prctl() that enabled GCS. It is expected that the normal ++ usage will be that GCS is enabled very early in execution of a program. ++ ++ ++ ++3. Allocation of Guarded Control Stacks ++---------------------------------------- ++ ++* When GCS is enabled for a thread a new Guarded Control Stack will be ++ allocated for it of size RLIMIT_STACK or 4 gigabytes, whichever is ++ smaller. ++ ++* When a new thread is created by a thread which has GCS enabled then a ++ new Guarded Control Stack will be allocated for the new thread with ++ half the size of the standard stack. ++ ++* When a stack is allocated by enabling GCS or during thread creation then ++ the top 8 bytes of the stack will be initialised to 0 and GCSPR_EL0 will ++ be set to point to the address of this 0 value, this can be used to ++ detect the top of the stack. ++ ++* Additional Guarded Control Stacks can be allocated using the ++ map_shadow_stack() system call. ++ ++* Stacks allocated using map_shadow_stack() can optionally have an end of ++ stack marker and cap placed at the top of the stack. If the flag ++ SHADOW_STACK_SET_TOKEN is specified a cap will be placed on the stack, ++ if SHADOW_STACK_SET_MARKER is not specified the cap will be the top 8 ++ bytes of the stack and if it is specified then the cap will be the next ++ 8 bytes. While specifying just SHADOW_STACK_SET_MARKER by itself is ++ valid since the marker is all bits 0 it has no observable effect. ++ ++* Stacks allocated using map_shadow_stack() must have a size which is a ++ multiple of 8 bytes larger than 8 bytes and must be 8 bytes aligned. ++ ++* An address can be specified to map_shadow_stack(), if one is provided then ++ it must be aligned to a page boundary. ++ ++* When a thread is freed the Guarded Control Stack initially allocated for ++ that thread will be freed. Note carefully that if the stack has been ++ switched this may not be the stack currently in use by the thread. ++ ++ ++4. Signal handling ++-------------------- ++ ++* A new signal frame record gcs_context encodes the current GCS mode and ++ pointer for the interrupted context on signal delivery. This will always ++ be present on systems that support GCS. ++ ++* The record contains a flag field which reports the current GCS configuration ++ for the interrupted context as PR_GET_SHADOW_STACK_STATUS would. ++ ++* The signal handler is run with the same GCS configuration as the interrupted ++ context. ++ ++* When GCS is enabled for the interrupted thread a signal handling specific ++ GCS cap token will be written to the GCS, this is an architectural GCS cap ++ token with bit 63 set and the token type (bits 0..11) all clear. The ++ GCSPR_EL0 reported in the signal frame will point to this cap token. ++ ++* The signal handler will use the same GCS as the interrupted context. ++ ++* When GCS is enabled on signal entry a frame with the address of the signal ++ return handler will be pushed onto the GCS, allowing return from the signal ++ handler via RET as normal. This will not be reported in the gcs_context in ++ the signal frame. ++ ++ ++5. Signal return ++----------------- ++ ++When returning from a signal handler: ++ ++* If there is a gcs_context record in the signal frame then the GCS flags ++ and GCSPR_EL0 will be restored from that context prior to further ++ validation. ++ ++* If there is no gcs_context record in the signal frame then the GCS ++ configuration will be unchanged. ++ ++* If GCS is enabled on return from a signal handler then GCSPR_EL0 must ++ point to a valid GCS signal cap record, this will be popped from the ++ GCS prior to signal return. ++ ++* If the GCS configuration is locked when returning from a signal then any ++ attempt to change the GCS configuration will be treated as an error. This ++ is true even if GCS was not enabled prior to signal entry. ++ ++* GCS may be disabled via signal return but any attempt to enable GCS via ++ signal return will be rejected. ++ ++ ++6. ptrace extensions ++--------------------- ++ ++* A new regset NT_ARM_GCS is defined for use with PTRACE_GETREGSET and ++ PTRACE_SETREGSET. ++ ++* Due to the complexity surrounding allocation and deallocation of stacks and ++ lack of practical application it is not possible to enable GCS via ptrace. ++ GCS may be disabled via the ptrace interface. ++ ++* Other GCS modes may be configured via ptrace. ++ ++* Configuration via ptrace ignores locking of GCS mode bits. ++ ++ ++7. ELF coredump extensions ++--------------------------- ++ ++* NT_ARM_GCS notes will be added to each coredump for each thread of the ++ dumped process. The contents will be equivalent to the data that would ++ have been read if a PTRACE_GETREGSET of the corresponding type were ++ executed for each thread when the coredump was generated. ++ ++ ++ ++8. /proc extensions ++-------------------- ++ ++* Guarded Control Stack pages will include "ss" in their VmFlags in ++ /proc/<pid>/smaps. +diff --git a/Documentation/arch/arm64/index.rst b/Documentation/arch/arm64/index.rst +index d08e924204bf..dcf3ee3eb8c0 100644 +--- a/Documentation/arch/arm64/index.rst ++++ b/Documentation/arch/arm64/index.rst +@@ -14,6 +14,7 @@ ARM64 Architecture + booting + cpu-feature-registers + elf_hwcaps ++ gcs + hugetlbpage + kdump + legacy_instructions +-- +2.34.1 + + +From 41a7e3b42b2776185f78b4a23ac7a5d3019eb203 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Tue, 20 Jun 2023 19:28:37 +0100 +Subject: [PATCH 15/47] arm64/sysreg: Add definitions for architected GCS caps + +The architecture defines a format for guarded control stack caps, used +to mark the top of an unused GCS in order to limit the potential for +exploitation via stack switching. Add definitions associated with these. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/include/asm/sysreg.h | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h +index c3b19b376c86..6ed813e856c1 100644 +--- a/arch/arm64/include/asm/sysreg.h ++++ b/arch/arm64/include/asm/sysreg.h +@@ -1064,6 +1064,26 @@ + #define POE_RXW UL(0x7) + #define POE_MASK UL(0xf) + ++/* ++ * Definitions for Guarded Control Stack ++ */ ++ ++#define GCS_CAP_ADDR_MASK GENMASK(63, 12) ++#define GCS_CAP_ADDR_SHIFT 12 ++#define GCS_CAP_ADDR_WIDTH 52 ++#define GCS_CAP_ADDR(x) FIELD_GET(GCS_CAP_ADDR_MASK, x) ++ ++#define GCS_CAP_TOKEN_MASK GENMASK(11, 0) ++#define GCS_CAP_TOKEN_SHIFT 0 ++#define GCS_CAP_TOKEN_WIDTH 12 ++#define GCS_CAP_TOKEN(x) FIELD_GET(GCS_CAP_TOKEN_MASK, x) ++ ++#define GCS_CAP_VALID_TOKEN 0x1 ++#define GCS_CAP_IN_PROGRESS_TOKEN 0x5 ++ ++#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \ ++ GCS_CAP_VALID_TOKEN) ++ + #define ARM64_FEATURE_FIELD_BITS 4 + + /* Defined for compatibility only, do not add new users. */ +-- +2.34.1 + + +From 0c5c6e7f9c231a904a1d04ea1d1a9b1729544fe3 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Tue, 20 Jun 2023 19:31:24 +0100 +Subject: [PATCH 16/47] arm64/gcs: Add manual encodings of GCS instructions + +Define C callable functions for GCS instructions used by the kernel. In +order to avoid ambitious toolchain requirements for GCS support these are +manually encoded, this means we have fixed register numbers which will be +a bit limiting for the compiler but none of these should be used in +sufficiently fast paths for this to be a problem. + +Note that GCSSTTR is used to store to EL0. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/include/asm/gcs.h | 51 ++++++++++++++++++++++++++++++++ + arch/arm64/include/asm/uaccess.h | 22 ++++++++++++++ + 2 files changed, 73 insertions(+) + create mode 100644 arch/arm64/include/asm/gcs.h + +diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h +new file mode 100644 +index 000000000000..7c5e95218db6 +--- /dev/null ++++ b/arch/arm64/include/asm/gcs.h +@@ -0,0 +1,51 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* ++ * Copyright (C) 2023 ARM Ltd. ++ */ ++#ifndef __ASM_GCS_H ++#define __ASM_GCS_H ++ ++#include <asm/types.h> ++#include <asm/uaccess.h> ++ ++static inline void gcsb_dsync(void) ++{ ++ asm volatile(".inst 0xd503227f" : : : "memory"); ++} ++ ++static inline void gcsstr(u64 *addr, u64 val) ++{ ++ register u64 *_addr __asm__ ("x0") = addr; ++ register long _val __asm__ ("x1") = val; ++ ++ /* GCSSTTR x1, x0 */ ++ asm volatile( ++ ".inst 0xd91f1c01\n" ++ : ++ : "rZ" (_val), "r" (_addr) ++ : "memory"); ++} ++ ++static inline void gcsss1(u64 Xt) ++{ ++ asm volatile ( ++ "sys #3, C7, C7, #2, %0\n" ++ : ++ : "rZ" (Xt) ++ : "memory"); ++} ++ ++static inline u64 gcsss2(void) ++{ ++ u64 Xt; ++ ++ asm volatile( ++ "SYSL %0, #3, C7, C7, #3\n" ++ : "=r" (Xt) ++ : ++ : "memory"); ++ ++ return Xt; ++} ++ ++#endif +diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h +index 14be5000c5a0..22e10e79f56a 100644 +--- a/arch/arm64/include/asm/uaccess.h ++++ b/arch/arm64/include/asm/uaccess.h +@@ -425,4 +425,26 @@ static inline size_t probe_subpage_writeable(const char __user *uaddr, + + #endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */ + ++#ifdef CONFIG_ARM64_GCS ++ ++static inline int gcssttr(unsigned long __user *addr, unsigned long val) ++{ ++ register unsigned long __user *_addr __asm__ ("x0") = addr; ++ register unsigned long _val __asm__ ("x1") = val; ++ int err = 0; ++ ++ /* GCSSTTR x1, x0 */ ++ asm volatile( ++ "1: .inst 0xd91f1c01\n" ++ "2: \n" ++ _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) ++ : "+r" (err) ++ : "rZ" (_val), "r" (_addr) ++ : "memory"); ++ ++ return err; ++} ++ ++#endif /* CONFIG_ARM64_GCS */ ++ + #endif /* __ASM_UACCESS_H */ +-- +2.34.1 + + +From d182ff2531f97a9b48dd0a35f8c36a5b2d541d52 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Sun, 16 Jul 2023 14:43:47 +0100 +Subject: [PATCH 17/47] arm64/gcs: Provide put_user_gcs() + +In order for EL1 to write to an EL0 GCS it must use the GCSSTTR instruction +rather than a normal STTR. Provide a put_user_gcs() which does this. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/include/asm/uaccess.h | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h +index 22e10e79f56a..e118c3d772c8 100644 +--- a/arch/arm64/include/asm/uaccess.h ++++ b/arch/arm64/include/asm/uaccess.h +@@ -445,6 +445,24 @@ static inline int gcssttr(unsigned long __user *addr, unsigned long val) + return err; + } + ++static inline void put_user_gcs(unsigned long val, unsigned long __user *addr, ++ int *err) ++{ ++ int ret; ++ ++ if (!access_ok((char __user *)addr, sizeof(u64))) { ++ *err = -EFAULT; ++ return; ++ } ++ ++ uaccess_ttbr0_enable(); ++ ret = gcssttr(addr, val); ++ if (ret != 0) ++ *err = ret; ++ uaccess_ttbr0_disable(); ++} ++ ++ + #endif /* CONFIG_ARM64_GCS */ + + #endif /* __ASM_UACCESS_H */ +-- +2.34.1 + + +From 98f4b4d4c95150730f81cff8a1a56cec4d3bd9af Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Tue, 7 Mar 2023 22:35:56 +0000 +Subject: [PATCH 18/47] arm64/cpufeature: Runtime detection of Guarded Control + Stack (GCS) + +Add a cpufeature for GCS, allowing other code to conditionally support it +at runtime. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/include/asm/cpufeature.h | 6 ++++++ + arch/arm64/kernel/cpufeature.c | 16 ++++++++++++++++ + arch/arm64/tools/cpucaps | 1 + + 3 files changed, 23 insertions(+) + +diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h +index bd8d4ca81a48..f81a69991394 100644 +--- a/arch/arm64/include/asm/cpufeature.h ++++ b/arch/arm64/include/asm/cpufeature.h +@@ -825,6 +825,12 @@ static inline bool system_supports_lpa2(void) + return cpus_have_final_cap(ARM64_HAS_LPA2); + } + ++static inline bool system_supports_gcs(void) ++{ ++ return IS_ENABLED(CONFIG_ARM64_GCS) && ++ alternative_has_cap_unlikely(ARM64_HAS_GCS); ++} ++ + int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); + bool try_emulate_mrs(struct pt_regs *regs, u32 isn); + +diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c +index 8d1a634a403e..b606842ab8c1 100644 +--- a/arch/arm64/kernel/cpufeature.c ++++ b/arch/arm64/kernel/cpufeature.c +@@ -255,6 +255,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { + }; + + static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { ++ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_GCS), ++ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_GCS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SME_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MPAM_frac_SHIFT, 4, 0), +@@ -2250,6 +2252,12 @@ static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused) + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn); + } + ++static void cpu_enable_gcs(const struct arm64_cpu_capabilities *__unused) ++{ ++ /* GCS is not currently used at EL1 */ ++ write_sysreg_s(0, SYS_GCSCR_EL1); ++} ++ + /* Internal helper functions to match cpu capability type */ + static bool + cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) +@@ -2739,6 +2747,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_lpa2, + }, ++ { ++ .desc = "Guarded Control Stack (GCS)", ++ .capability = ARM64_HAS_GCS, ++ .type = ARM64_CPUCAP_SYSTEM_FEATURE, ++ .cpu_enable = cpu_enable_gcs, ++ .matches = has_cpuid_feature, ++ ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, GCS, IMP) ++ }, + {}, + }; + +diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps +index b912b1409fc0..148734504295 100644 +--- a/arch/arm64/tools/cpucaps ++++ b/arch/arm64/tools/cpucaps +@@ -28,6 +28,7 @@ HAS_EPAN + HAS_EVT + HAS_FGT + HAS_FPSIMD ++HAS_GCS + HAS_GENERIC_AUTH + HAS_GENERIC_AUTH_ARCH_QARMA3 + HAS_GENERIC_AUTH_ARCH_QARMA5 +-- +2.34.1 + + +From cfa2c80233b74b7e487afbd8fe4e22cdd7c6bb93 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Fri, 21 Apr 2023 19:37:37 +0100 +Subject: [PATCH 19/47] arm64/mm: Allocate PIE slots for EL0 guarded control + stack + +Pages used for guarded control stacks need to be described to the hardware +using the Permission Indirection Extension, GCS is not supported without +PIE. In order to support copy on write for guarded stacks we allocate two +values, one for active GCSs and one for GCS pages marked as read only prior +to copy. + +Since the actual effect is defined using PIE the specific bit pattern used +does not matter to the hardware but we choose two values which differ only +in PTE_WRITE in order to help share code with non-PIE cases. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/include/asm/pgtable-prot.h | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h +index 483dbfa39c4c..14a33e0bece3 100644 +--- a/arch/arm64/include/asm/pgtable-prot.h ++++ b/arch/arm64/include/asm/pgtable-prot.h +@@ -129,15 +129,23 @@ extern bool arm64_use_ng_mappings; + /* 6: PTE_PXN | PTE_WRITE */ + /* 7: PAGE_SHARED_EXEC PTE_PXN | PTE_WRITE | PTE_USER */ + /* 8: PAGE_KERNEL_ROX PTE_UXN */ +-/* 9: PTE_UXN | PTE_USER */ ++/* 9: PAGE_GCS_RO PTE_UXN | PTE_USER */ + /* a: PAGE_KERNEL_EXEC PTE_UXN | PTE_WRITE */ +-/* b: PTE_UXN | PTE_WRITE | PTE_USER */ ++/* b: PAGE_GCS PTE_UXN | PTE_WRITE | PTE_USER */ + /* c: PAGE_KERNEL_RO PTE_UXN | PTE_PXN */ + /* d: PAGE_READONLY PTE_UXN | PTE_PXN | PTE_USER */ + /* e: PAGE_KERNEL PTE_UXN | PTE_PXN | PTE_WRITE */ + /* f: PAGE_SHARED PTE_UXN | PTE_PXN | PTE_WRITE | PTE_USER */ + ++#define _PAGE_GCS (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_WRITE | PTE_USER) ++#define _PAGE_GCS_RO (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_USER) ++ ++#define PAGE_GCS __pgprot(_PAGE_GCS) ++#define PAGE_GCS_RO __pgprot(_PAGE_GCS_RO) ++ + #define PIE_E0 ( \ ++ PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS), PIE_GCS) | \ ++ PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX) | \ +@@ -145,6 +153,8 @@ extern bool arm64_use_ng_mappings; + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW)) + + #define PIE_E1 ( \ ++ PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS), PIE_NONE_O) | \ ++ PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS_RO), PIE_NONE_O) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_R) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RW) | \ +-- +2.34.1 + + +From d3fb78871759fd9e703384609bf1ccce903bdca2 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Fri, 14 Apr 2023 20:29:18 +0100 +Subject: [PATCH 20/47] mm: Define VM_SHADOW_STACK for arm64 when we support + GCS + +Use VM_HIGH_ARCH_5 for guarded control stack pages. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + Documentation/filesystems/proc.rst | 2 +- + fs/proc/task_mmu.c | 3 +++ + include/linux/mm.h | 12 +++++++++++- + 3 files changed, 15 insertions(+), 2 deletions(-) + +diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst +index 104c6d047d9b..0392c3b74650 100644 +--- a/Documentation/filesystems/proc.rst ++++ b/Documentation/filesystems/proc.rst +@@ -570,7 +570,7 @@ encoded manner. The codes are the following: + mt arm64 MTE allocation tags are enabled + um userfaultfd missing tracking + uw userfaultfd wr-protect tracking +- ss shadow stack page ++ ss shadow/guarded control stack page + == ======================================= + + Note that there is no guarantee that every flag and associated mnemonic will +diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c +index ff2c601f7d1c..fb0633d8e309 100644 +--- a/fs/proc/task_mmu.c ++++ b/fs/proc/task_mmu.c +@@ -702,6 +702,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) + #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ + #ifdef CONFIG_ARCH_HAS_USER_SHADOW_STACK + [ilog2(VM_SHADOW_STACK)] = "ss", ++#endif ++#ifdef CONFIG_ARM64_GCS ++ [ilog2(VM_SHADOW_STACK)] = "ss", + #endif + }; + size_t i; +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 0b1139c5df60..6cc304c90c63 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -352,7 +352,17 @@ extern unsigned int kobjsize(const void *objp); + * for more details on the guard size. + */ + # define VM_SHADOW_STACK VM_HIGH_ARCH_5 +-#else ++#endif ++ ++#if defined(CONFIG_ARM64_GCS) ++/* ++ * arm64's Guarded Control Stack implements similar functionality and ++ * has similar constraints to shadow stacks. ++ */ ++# define VM_SHADOW_STACK VM_HIGH_ARCH_5 ++#endif ++ ++#ifndef VM_SHADOW_STACK + # define VM_SHADOW_STACK VM_NONE + #endif + +-- +2.34.1 + + +From 4eb47474ec4e4776a45110f9e9e853f69492ed3f Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Fri, 21 Apr 2023 20:53:01 +0100 +Subject: [PATCH 21/47] arm64/mm: Map pages for guarded control stack + +Map pages flagged as being part of a GCS as such rather than using the +full set of generic VM flags. + +This is done using a conditional rather than extending the size of +protection_map since that would make for a very sparse array. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/include/asm/mman.h | 9 +++++++++ + arch/arm64/mm/mmap.c | 13 ++++++++++++- + 2 files changed, 21 insertions(+), 1 deletion(-) + +diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h +index c21849ffdd88..6d3fe6433a62 100644 +--- a/arch/arm64/include/asm/mman.h ++++ b/arch/arm64/include/asm/mman.h +@@ -61,6 +61,15 @@ static inline bool arch_validate_flags(unsigned long vm_flags) + return false; + } + ++ if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) { ++ /* ++ * An executable GCS isn't a good idea, and the mm ++ * core can't cope with a shared GCS. ++ */ ++ if (vm_flags & (VM_EXEC | VM_ARM64_BTI | VM_SHARED)) ++ return false; ++ } ++ + return true; + + } +diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c +index 645fe60d000f..e44ce6fcfad9 100644 +--- a/arch/arm64/mm/mmap.c ++++ b/arch/arm64/mm/mmap.c +@@ -79,9 +79,20 @@ arch_initcall(adjust_protection_map); + + pgprot_t vm_get_page_prot(unsigned long vm_flags) + { +- pteval_t prot = pgprot_val(protection_map[vm_flags & ++ pteval_t prot; ++ ++ /* If this is a GCS then only interpret VM_WRITE. */ ++ if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) { ++ if (vm_flags & VM_WRITE) ++ prot = _PAGE_GCS; ++ else ++ prot = _PAGE_GCS_RO; ++ } else { ++ prot = pgprot_val(protection_map[vm_flags & + (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]); ++ } + ++ /* VM_ARM64_BTI on a GCS is rejected in arch_validate_flags() */ + if (vm_flags & VM_ARM64_BTI) + prot |= PTE_GP; + +-- +2.34.1 + + +From e505761a54185aa1c4de33454fca255918036af0 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Wed, 8 Mar 2023 00:40:28 +0000 +Subject: [PATCH 22/47] KVM: arm64: Manage GCS registers for guests + +GCS introduces a number of system registers for EL1 and EL0, on systems +with GCS we need to context switch them and expose them to VMMs to allow +guests to use GCS, as well as describe their fine grained traps to +nested virtualisation. Traps are already disabled. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/include/asm/kvm_host.h | 12 ++++++++++++ + arch/arm64/kvm/emulate-nested.c | 4 ++++ + arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 17 +++++++++++++++++ + arch/arm64/kvm/sys_regs.c | 22 ++++++++++++++++++++++ + 4 files changed, 55 insertions(+) + +diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h +index 21c57b812569..6c7ea7f9cd92 100644 +--- a/arch/arm64/include/asm/kvm_host.h ++++ b/arch/arm64/include/asm/kvm_host.h +@@ -388,6 +388,12 @@ enum vcpu_sysreg { + GCR_EL1, /* Tag Control Register */ + TFSRE0_EL1, /* Tag Fault Status Register (EL0) */ + ++ /* Guarded Control Stack registers */ ++ GCSCRE0_EL1, /* Guarded Control Stack Control (EL0) */ ++ GCSCR_EL1, /* Guarded Control Stack Control (EL1) */ ++ GCSPR_EL0, /* Guarded Control Stack Pointer (EL0) */ ++ GCSPR_EL1, /* Guarded Control Stack Pointer (EL1) */ ++ + /* 32bit specific registers. */ + DACR32_EL2, /* Domain Access Control Register */ + IFSR32_EL2, /* Instruction Fault Status Register */ +@@ -1221,6 +1227,12 @@ static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature) + + #define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f)) + ++static inline bool has_gcs(void) ++{ ++ return IS_ENABLED(CONFIG_ARM64_GCS) && ++ cpus_have_final_cap(ARM64_HAS_GCS); ++} ++ + int kvm_trng_call(struct kvm_vcpu *vcpu); + #ifdef CONFIG_KVM + extern phys_addr_t hyp_mem_base; +diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c +index 431fd429932d..24eb7eccbae4 100644 +--- a/arch/arm64/kvm/emulate-nested.c ++++ b/arch/arm64/kvm/emulate-nested.c +@@ -1098,8 +1098,12 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = { + SR_FGT(SYS_ESR_EL1, HFGxTR, ESR_EL1, 1), + SR_FGT(SYS_DCZID_EL0, HFGxTR, DCZID_EL0, 1), + SR_FGT(SYS_CTR_EL0, HFGxTR, CTR_EL0, 1), ++ SR_FGT(SYS_GCSPR_EL0, HFGxTR, nGCS_EL0, 1), + SR_FGT(SYS_CSSELR_EL1, HFGxTR, CSSELR_EL1, 1), + SR_FGT(SYS_CPACR_EL1, HFGxTR, CPACR_EL1, 1), ++ SR_FGT(SYS_GCSCR_EL1, HFGxTR, nGCS_EL1, 1), ++ SR_FGT(SYS_GCSPR_EL1, HFGxTR, nGCS_EL1, 1), ++ SR_FGT(SYS_GCSCRE0_EL1, HFGxTR, nGCS_EL0, 1), + SR_FGT(SYS_CONTEXTIDR_EL1, HFGxTR, CONTEXTIDR_EL1, 1), + SR_FGT(SYS_CLIDR_EL1, HFGxTR, CLIDR_EL1, 1), + SR_FGT(SYS_CCSIDR_EL1, HFGxTR, CCSIDR_EL1, 1), +diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +index bb6b571ec627..ec34d4a90717 100644 +--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h ++++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +@@ -25,6 +25,8 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt) + { + ctxt_sys_reg(ctxt, TPIDR_EL0) = read_sysreg(tpidr_el0); + ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0); ++ if (has_gcs()) ++ ctxt_sys_reg(ctxt, GCSPR_EL0) = read_sysreg_s(SYS_GCSPR_EL0); + } + + static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt) +@@ -62,6 +64,12 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) + ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par(); + ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); + ++ if (has_gcs()) { ++ ctxt_sys_reg(ctxt, GCSPR_EL1) = read_sysreg_el1(SYS_GCSPR); ++ ctxt_sys_reg(ctxt, GCSCR_EL1) = read_sysreg_el1(SYS_GCSCR); ++ ctxt_sys_reg(ctxt, GCSCRE0_EL1) = read_sysreg_s(SYS_GCSCRE0_EL1); ++ } ++ + if (ctxt_has_mte(ctxt)) { + ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR); + ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1); +@@ -95,6 +103,8 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) + { + write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL0), tpidr_el0); + write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0); ++ if (has_gcs()) ++ write_sysreg_s(ctxt_sys_reg(ctxt, GCSPR_EL0), SYS_GCSPR_EL0); + } + + static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) +@@ -138,6 +148,13 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) + write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); + write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); + ++ if (has_gcs()) { ++ write_sysreg_el1(ctxt_sys_reg(ctxt, GCSPR_EL1), SYS_GCSPR); ++ write_sysreg_el1(ctxt_sys_reg(ctxt, GCSCR_EL1), SYS_GCSCR); ++ write_sysreg_s(ctxt_sys_reg(ctxt, GCSCRE0_EL1), ++ SYS_GCSCRE0_EL1); ++ } ++ + if (ctxt_has_mte(ctxt)) { + write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR); + write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1); +diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c +index 30253bd19917..83ba767e75d2 100644 +--- a/arch/arm64/kvm/sys_regs.c ++++ b/arch/arm64/kvm/sys_regs.c +@@ -2000,6 +2000,23 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, + .visibility = mte_visibility, \ + } + ++static unsigned int gcs_visibility(const struct kvm_vcpu *vcpu, ++ const struct sys_reg_desc *rd) ++{ ++ if (has_gcs()) ++ return 0; ++ ++ return REG_HIDDEN; ++} ++ ++#define GCS_REG(name) { \ ++ SYS_DESC(SYS_##name), \ ++ .access = undef_access, \ ++ .reset = reset_unknown, \ ++ .reg = name, \ ++ .visibility = gcs_visibility, \ ++} ++ + static unsigned int el2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) + { +@@ -2376,6 +2393,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { + PTRAUTH_KEY(APDB), + PTRAUTH_KEY(APGA), + ++ GCS_REG(GCSCR_EL1), ++ GCS_REG(GCSPR_EL1), ++ GCS_REG(GCSCRE0_EL1), ++ + { SYS_DESC(SYS_SPSR_EL1), access_spsr}, + { SYS_DESC(SYS_ELR_EL1), access_elr}, + +@@ -2462,6 +2483,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { + { SYS_DESC(SYS_SMIDR_EL1), undef_access }, + { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, + { SYS_DESC(SYS_CTR_EL0), access_ctr }, ++ GCS_REG(GCSPR_EL0), + { SYS_DESC(SYS_SVCR), undef_access }, + + { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr, +-- +2.34.1 + + +From 371653e2b075a2b4c3f2549d02366d4c168c29c6 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Wed, 15 Mar 2023 18:48:06 +0000 +Subject: [PATCH 23/47] arm64/gcs: Allow GCS usage at EL0 and EL1 + +There is a control HCRX_EL2.GCSEn which must be set to allow GCS +features to take effect at lower ELs and also fine grained traps for GCS +usage at EL0 and EL1. Configure all these to allow GCS usage by EL0 and +EL1. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/include/asm/el2_setup.h | 17 +++++++++++++++++ + arch/arm64/include/asm/kvm_arm.h | 4 ++-- + 2 files changed, 19 insertions(+), 2 deletions(-) + +diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h +index b7afaa026842..17672563e333 100644 +--- a/arch/arm64/include/asm/el2_setup.h ++++ b/arch/arm64/include/asm/el2_setup.h +@@ -27,6 +27,14 @@ + ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 + cbz x0, .Lskip_hcrx_\@ + mov_q x0, HCRX_HOST_FLAGS ++ ++ /* Enable GCS if supported */ ++ mrs_s x1, SYS_ID_AA64PFR1_EL1 ++ ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 ++ cbz x1, .Lset_hcrx_\@ ++ orr x0, x0, #HCRX_EL2_GCSEn ++ ++.Lset_hcrx_\@: + msr_s SYS_HCRX_EL2, x0 + .Lskip_hcrx_\@: + .endm +@@ -190,6 +198,15 @@ + orr x0, x0, #HFGxTR_EL2_nPIR_EL1 + orr x0, x0, #HFGxTR_EL2_nPIRE0_EL1 + ++ /* GCS depends on PIE so we don't check it if PIE is absent */ ++ mrs_s x1, SYS_ID_AA64PFR1_EL1 ++ ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 ++ cbz x1, .Lset_fgt_\@ ++ ++ /* Disable traps of access to GCS registers at EL0 and EL1 */ ++ orr x0, x0, #HFGxTR_EL2_nGCS_EL1_MASK ++ orr x0, x0, #HFGxTR_EL2_nGCS_EL0_MASK ++ + .Lset_fgt_\@: + msr_s SYS_HFGRTR_EL2, x0 + msr_s SYS_HFGWTR_EL2, x0 +diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h +index 3c6f8ba1e479..a9354c237a97 100644 +--- a/arch/arm64/include/asm/kvm_arm.h ++++ b/arch/arm64/include/asm/kvm_arm.h +@@ -103,9 +103,9 @@ + #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) + + #define HCRX_GUEST_FLAGS \ +- (HCRX_EL2_SMPME | HCRX_EL2_TCR2En | \ ++ (HCRX_EL2_SMPME | HCRX_EL2_TCR2En | HCRX_EL2_GCSEn |\ + (cpus_have_final_cap(ARM64_HAS_MOPS) ? (HCRX_EL2_MSCEn | HCRX_EL2_MCE2) : 0)) +-#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En) ++#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_GCSEn) + + /* TCR_EL2 Registers bits */ + #define TCR_EL2_DS (1UL << 32) +-- +2.34.1 + + +From d50f122180261521787ac2a91c705554eea2e77a Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Wed, 15 Mar 2023 18:52:09 +0000 +Subject: [PATCH 24/47] arm64/idreg: Add overrride for GCS + +Hook up an override for GCS, allowing it to be disabled from the command +line by specifying arm64.nogcs in case there are problems. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + Documentation/admin-guide/kernel-parameters.txt | 6 ++++++ + arch/arm64/kernel/idreg-override.c | 2 ++ + 2 files changed, 8 insertions(+) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index 31b3a25680d0..e86160251d23 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -429,9 +429,15 @@ + arm64.nobti [ARM64] Unconditionally disable Branch Target + Identification support + ++ arm64.nogcs [ARM64] Unconditionally disable Guarded Control Stack ++ support ++ + arm64.nomops [ARM64] Unconditionally disable Memory Copy and Memory + Set instructions support + ++ arm64.nopauth [ARM64] Unconditionally disable Pointer Authentication ++ support ++ + arm64.nomte [ARM64] Unconditionally disable Memory Tagging Extension + support + +diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c +index e30fd9e32ef3..00bcdad53ba9 100644 +--- a/arch/arm64/kernel/idreg-override.c ++++ b/arch/arm64/kernel/idreg-override.c +@@ -110,6 +110,7 @@ static const struct ftr_set_desc pfr1 __prel64_initconst = { + .override = &id_aa64pfr1_override, + .fields = { + FIELD("bt", ID_AA64PFR1_EL1_BT_SHIFT, NULL ), ++ FIELD("gcs", ID_AA64PFR1_EL1_GCS_SHIFT, NULL), + FIELD("mte", ID_AA64PFR1_EL1_MTE_SHIFT, NULL), + FIELD("sme", ID_AA64PFR1_EL1_SME_SHIFT, pfr1_sme_filter), + {} +@@ -190,6 +191,7 @@ static const struct { + { "arm64.nosve", "id_aa64pfr0.sve=0" }, + { "arm64.nosme", "id_aa64pfr1.sme=0" }, + { "arm64.nobti", "id_aa64pfr1.bt=0" }, ++ { "arm64.nogcs", "id_aa64pfr1.gcs=0" }, + { "arm64.nopauth", + "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 " + "id_aa64isar1.api=0 id_aa64isar1.apa=0 " +-- +2.34.1 + + +From cf891db5ab3aad787c1deff23058d51e24b19ce1 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Mon, 20 Mar 2023 18:21:38 +0000 +Subject: [PATCH 25/47] arm64/hwcap: Add hwcap for GCS + +Provide a hwcap to enable userspace to detect support for GCS. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + Documentation/arch/arm64/elf_hwcaps.rst | 3 +++ + arch/arm64/include/asm/hwcap.h | 1 + + arch/arm64/include/uapi/asm/hwcap.h | 1 + + arch/arm64/kernel/cpufeature.c | 3 +++ + arch/arm64/kernel/cpuinfo.c | 1 + + 5 files changed, 9 insertions(+) + +diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst +index ced7b335e2e0..86d4ace9c75c 100644 +--- a/Documentation/arch/arm64/elf_hwcaps.rst ++++ b/Documentation/arch/arm64/elf_hwcaps.rst +@@ -317,6 +317,9 @@ HWCAP2_LRCPC3 + HWCAP2_LSE128 + Functionality implied by ID_AA64ISAR0_EL1.Atomic == 0b0011. + ++HWCAP2_GCS ++ Functionality implied by ID_AA64PFR1_EL1.GCS == 0b1 ++ + 4. Unused AT_HWCAP bits + ----------------------- + +diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h +index cd71e09ea14d..e01e6b72a839 100644 +--- a/arch/arm64/include/asm/hwcap.h ++++ b/arch/arm64/include/asm/hwcap.h +@@ -142,6 +142,7 @@ + #define KERNEL_HWCAP_SVE_B16B16 __khwcap2_feature(SVE_B16B16) + #define KERNEL_HWCAP_LRCPC3 __khwcap2_feature(LRCPC3) + #define KERNEL_HWCAP_LSE128 __khwcap2_feature(LSE128) ++#define KERNEL_HWCAP_GCS __khwcap2_feature(GCS) + + /* + * This yields a mask that user programs can use to figure out what +diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h +index 5023599fa278..996b5b5d4c4e 100644 +--- a/arch/arm64/include/uapi/asm/hwcap.h ++++ b/arch/arm64/include/uapi/asm/hwcap.h +@@ -107,5 +107,6 @@ + #define HWCAP2_SVE_B16B16 (1UL << 45) + #define HWCAP2_LRCPC3 (1UL << 46) + #define HWCAP2_LSE128 (1UL << 47) ++#define HWCAP2_GCS (1UL << 48) + + #endif /* _UAPI__ASM_HWCAP_H */ +diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c +index b606842ab8c1..1a92c4502a0b 100644 +--- a/arch/arm64/kernel/cpufeature.c ++++ b/arch/arm64/kernel/cpufeature.c +@@ -2867,6 +2867,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { + HWCAP_CAP(ID_AA64ZFR0_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), + HWCAP_CAP(ID_AA64ZFR0_EL1, F32MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), + HWCAP_CAP(ID_AA64ZFR0_EL1, F64MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), ++#endif ++#ifdef CONFIG_ARM64_GCS ++ HWCAP_CAP(ID_AA64PFR1_EL1, GCS, IMP, CAP_HWCAP, KERNEL_HWCAP_GCS), + #endif + HWCAP_CAP(ID_AA64PFR1_EL1, SSBS, SSBS2, CAP_HWCAP, KERNEL_HWCAP_SSBS), + #ifdef CONFIG_ARM64_BTI +diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c +index 47043c0d95ec..b3ec0b89c9e0 100644 +--- a/arch/arm64/kernel/cpuinfo.c ++++ b/arch/arm64/kernel/cpuinfo.c +@@ -128,6 +128,7 @@ static const char *const hwcap_str[] = { + [KERNEL_HWCAP_SVE_B16B16] = "sveb16b16", + [KERNEL_HWCAP_LRCPC3] = "lrcpc3", + [KERNEL_HWCAP_LSE128] = "lse128", ++ [KERNEL_HWCAP_GCS] = "gcs", + }; + + #ifdef CONFIG_COMPAT +-- +2.34.1 + + +From bccc68b34269e6ccc69fdbbca6d17131093170f7 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Fri, 14 Apr 2023 20:57:45 +0100 +Subject: [PATCH 26/47] arm64/traps: Handle GCS exceptions + +A new exception code is defined for GCS specific faults other than +standard load/store faults, for example GCS token validation failures, +add handling for this. These faults are reported to userspace as +segfaults with code SEGV_CPERR (protection error), mirroring the +reporting for x86 shadow stack errors. + +GCS faults due to memory load/store operations generate data aborts with +a flag set, these will be handled separately as part of the data abort +handling. + +Since we do not currently enable GCS for EL1 we should not get any faults +there but while we're at it we wire things up there, treating any GCS +fault as fatal. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/include/asm/esr.h | 28 +++++++++++++++++++++++++++- + arch/arm64/include/asm/exception.h | 2 ++ + arch/arm64/kernel/entry-common.c | 23 +++++++++++++++++++++++ + arch/arm64/kernel/traps.c | 11 +++++++++++ + 4 files changed, 63 insertions(+), 1 deletion(-) + +diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h +index 353fe08546cf..20ee9f531864 100644 +--- a/arch/arm64/include/asm/esr.h ++++ b/arch/arm64/include/asm/esr.h +@@ -51,7 +51,8 @@ + #define ESR_ELx_EC_FP_EXC32 (0x28) + /* Unallocated EC: 0x29 - 0x2B */ + #define ESR_ELx_EC_FP_EXC64 (0x2C) +-/* Unallocated EC: 0x2D - 0x2E */ ++#define ESR_ELx_EC_GCS (0x2D) ++/* Unallocated EC: 0x2E */ + #define ESR_ELx_EC_SERROR (0x2F) + #define ESR_ELx_EC_BREAKPT_LOW (0x30) + #define ESR_ELx_EC_BREAKPT_CUR (0x31) +@@ -382,6 +383,31 @@ + #define ESR_ELx_MOPS_ISS_SRCREG(esr) (((esr) & (UL(0x1f) << 5)) >> 5) + #define ESR_ELx_MOPS_ISS_SIZEREG(esr) (((esr) & (UL(0x1f) << 0)) >> 0) + ++/* ISS field definitions for GCS */ ++#define ESR_ELx_ExType_SHIFT (20) ++#define ESR_ELx_ExType_MASK GENMASK(23, 20) ++#define ESR_ELx_Raddr_SHIFT (10) ++#define ESR_ELx_Raddr_MASK GENMASK(14, 10) ++#define ESR_ELx_Rn_SHIFT (5) ++#define ESR_ELx_Rn_MASK GENMASK(9, 5) ++#define ESR_ELx_Rvalue_SHIFT 5 ++#define ESR_ELx_Rvalue_MASK GENMASK(9, 5) ++#define ESR_ELx_IT_SHIFT (0) ++#define ESR_ELx_IT_MASK GENMASK(4, 0) ++ ++#define ESR_ELx_ExType_DATA_CHECK 0 ++#define ESR_ELx_ExType_EXLOCK 1 ++#define ESR_ELx_ExType_STR 2 ++ ++#define ESR_ELx_IT_RET 0 ++#define ESR_ELx_IT_GCSPOPM 1 ++#define ESR_ELx_IT_RET_KEYA 2 ++#define ESR_ELx_IT_RET_KEYB 3 ++#define ESR_ELx_IT_GCSSS1 4 ++#define ESR_ELx_IT_GCSSS2 5 ++#define ESR_ELx_IT_GCSPOPCX 6 ++#define ESR_ELx_IT_GCSPOPX 7 ++ + #ifndef __ASSEMBLY__ + #include <asm/types.h> + +diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h +index ad688e157c9b..99caff458e20 100644 +--- a/arch/arm64/include/asm/exception.h ++++ b/arch/arm64/include/asm/exception.h +@@ -57,6 +57,8 @@ void do_el0_undef(struct pt_regs *regs, unsigned long esr); + void do_el1_undef(struct pt_regs *regs, unsigned long esr); + void do_el0_bti(struct pt_regs *regs); + void do_el1_bti(struct pt_regs *regs, unsigned long esr); ++void do_el0_gcs(struct pt_regs *regs, unsigned long esr); ++void do_el1_gcs(struct pt_regs *regs, unsigned long esr); + void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr, + struct pt_regs *regs); + void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs); +diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c +index 0fc94207e69a..52d78ce63a4e 100644 +--- a/arch/arm64/kernel/entry-common.c ++++ b/arch/arm64/kernel/entry-common.c +@@ -429,6 +429,15 @@ static void noinstr el1_bti(struct pt_regs *regs, unsigned long esr) + exit_to_kernel_mode(regs); + } + ++static void noinstr el1_gcs(struct pt_regs *regs, unsigned long esr) ++{ ++ enter_from_kernel_mode(regs); ++ local_daif_inherit(regs); ++ do_el1_gcs(regs, esr); ++ local_daif_mask(); ++ exit_to_kernel_mode(regs); ++} ++ + static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) + { + unsigned long far = read_sysreg(far_el1); +@@ -471,6 +480,9 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs) + case ESR_ELx_EC_BTI: + el1_bti(regs, esr); + break; ++ case ESR_ELx_EC_GCS: ++ el1_gcs(regs, esr); ++ break; + case ESR_ELx_EC_BREAKPT_CUR: + case ESR_ELx_EC_SOFTSTP_CUR: + case ESR_ELx_EC_WATCHPT_CUR: +@@ -650,6 +662,14 @@ static void noinstr el0_mops(struct pt_regs *regs, unsigned long esr) + exit_to_user_mode(regs); + } + ++static void noinstr el0_gcs(struct pt_regs *regs, unsigned long esr) ++{ ++ enter_from_user_mode(regs); ++ local_daif_restore(DAIF_PROCCTX); ++ do_el0_gcs(regs, esr); ++ exit_to_user_mode(regs); ++} ++ + static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) + { + enter_from_user_mode(regs); +@@ -732,6 +752,9 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) + case ESR_ELx_EC_MOPS: + el0_mops(regs, esr); + break; ++ case ESR_ELx_EC_GCS: ++ el0_gcs(regs, esr); ++ break; + case ESR_ELx_EC_BREAKPT_LOW: + case ESR_ELx_EC_SOFTSTP_LOW: + case ESR_ELx_EC_WATCHPT_LOW: +diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c +index 215e6d7f2df8..fb867c6526a6 100644 +--- a/arch/arm64/kernel/traps.c ++++ b/arch/arm64/kernel/traps.c +@@ -500,6 +500,16 @@ void do_el1_bti(struct pt_regs *regs, unsigned long esr) + die("Oops - BTI", regs, esr); + } + ++void do_el0_gcs(struct pt_regs *regs, unsigned long esr) ++{ ++ force_signal_inject(SIGSEGV, SEGV_CPERR, regs->pc, 0); ++} ++ ++void do_el1_gcs(struct pt_regs *regs, unsigned long esr) ++{ ++ die("Oops - GCS", regs, esr); ++} ++ + void do_el0_fpac(struct pt_regs *regs, unsigned long esr) + { + force_signal_inject(SIGILL, ILL_ILLOPN, regs->pc, esr); +@@ -838,6 +848,7 @@ static const char *esr_class_str[] = { + [ESR_ELx_EC_MOPS] = "MOPS", + [ESR_ELx_EC_FP_EXC32] = "FP (AArch32)", + [ESR_ELx_EC_FP_EXC64] = "FP (AArch64)", ++ [ESR_ELx_EC_GCS] = "Guarded Control Stack", + [ESR_ELx_EC_SERROR] = "SError", + [ESR_ELx_EC_BREAKPT_LOW] = "Breakpoint (lower EL)", + [ESR_ELx_EC_BREAKPT_CUR] = "Breakpoint (current EL)", +-- +2.34.1 + + +From a44e4f0ea5726b528c7247c2331301e95de6acea Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Fri, 28 Apr 2023 13:59:24 +0100 +Subject: [PATCH 27/47] arm64/mm: Handle GCS data aborts + +All GCS operations at EL0 must happen on a page which is marked as +having UnprivGCS access, including read operations. If a GCS operation +attempts to access a page without this then it will generate a data +abort with the GCS bit set in ESR_EL1.ISS2. + +EL0 may validly generate such faults, for example due to copy on write +which will cause the GCS data to be stored in a read only page with no +GCS permissions until the actual copy happens. Since UnprivGCS allows +both reads and writes to the GCS (though only through GCS operations) we +need to ensure that the memory management subsystem handles GCS accesses +as writes at all times. Do this by adding FAULT_FLAG_WRITE to any GCS +page faults, adding handling to ensure that invalid cases are identfied +as such early so the memory management core does not think they will +succeed. The core cannot distinguish between VMAs which are generally +writeable and VMAs which are only writeable through GCS operations. + +EL1 may validly write to EL0 GCS for management purposes (eg, while +initialising with cap tokens). + +We also report any GCS faults in VMAs not marked as part of a GCS as +access violations, causing a fault to be delivered to userspace if it +attempts to do GCS operations outside a GCS. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/mm/fault.c | 76 ++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 68 insertions(+), 8 deletions(-) + +diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c +index 55f6455a8284..c2a36102e143 100644 +--- a/arch/arm64/mm/fault.c ++++ b/arch/arm64/mm/fault.c +@@ -494,13 +494,30 @@ static void do_bad_area(unsigned long far, unsigned long esr, + } + } + ++/* ++ * Note: not valid for EL1 DC IVAC, but we never use that such that it ++ * should fault. EL0 cannot issue DC IVAC (undef). ++ */ ++static bool is_write_abort(unsigned long esr) ++{ ++ return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); ++} ++ ++static bool is_gcs_fault(unsigned long esr) ++{ ++ if (!esr_is_data_abort(esr)) ++ return false; ++ ++ return ESR_ELx_ISS2(esr) & ESR_ELx_GCS; ++} ++ + #define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000) + #define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000) + + static vm_fault_t __do_page_fault(struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long addr, + unsigned int mm_flags, unsigned long vm_flags, +- struct pt_regs *regs) ++ unsigned long esr, struct pt_regs *regs) + { + /* + * Ok, we have a good vm_area for this memory access, so we can handle +@@ -510,6 +527,26 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm, + */ + if (!(vma->vm_flags & vm_flags)) + return VM_FAULT_BADACCESS; ++ ++ if (vma->vm_flags & VM_SHADOW_STACK) { ++ /* ++ * Writes to a GCS must either be generated by a GCS ++ * operation or be from EL1. ++ */ ++ if (is_write_abort(esr) && ++ !(is_gcs_fault(esr) || is_el1_data_abort(esr))) ++ return VM_FAULT_BADACCESS; ++ } else { ++ /* ++ * GCS faults should never happen for pages that are ++ * not part of a GCS and the operation being attempted ++ * can never succeed. ++ */ ++ if (is_gcs_fault(esr)) ++ return VM_FAULT_BADACCESS; ++ } ++ ++ + return handle_mm_fault(vma, addr, mm_flags, regs); + } + +@@ -518,13 +555,23 @@ static bool is_el0_instruction_abort(unsigned long esr) + return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; + } + +-/* +- * Note: not valid for EL1 DC IVAC, but we never use that such that it +- * should fault. EL0 cannot issue DC IVAC (undef). +- */ +-static bool is_write_abort(unsigned long esr) ++static bool is_invalid_gcs_access(struct vm_area_struct *vma, u64 esr) + { +- return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); ++ if (!system_supports_gcs()) ++ return false; ++ ++ if (unlikely(is_gcs_fault(esr))) { ++ /* GCS accesses must be performed on a GCS page */ ++ if (!(vma->vm_flags & VM_SHADOW_STACK)) ++ return true; ++ if (!(vma->vm_flags & VM_WRITE)) ++ return true; ++ } else if (unlikely(vma->vm_flags & VM_SHADOW_STACK)) { ++ /* Only GCS operations can write to a GCS page */ ++ return is_write_abort(esr); ++ } ++ ++ return false; + } + + static int __kprobes do_page_fault(unsigned long far, unsigned long esr, +@@ -561,6 +608,14 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, + /* It was exec fault */ + vm_flags = VM_EXEC; + mm_flags |= FAULT_FLAG_INSTRUCTION; ++ } else if (is_gcs_fault(esr)) { ++ /* ++ * The GCS permission on a page implies both read and ++ * write so always handle any GCS fault as a write fault, ++ * we need to trigger CoW even for GCS reads. ++ */ ++ vm_flags = VM_WRITE; ++ mm_flags |= FAULT_FLAG_WRITE; + } else if (is_write_abort(esr)) { + /* It was write fault */ + vm_flags = VM_WRITE; +@@ -594,6 +649,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, + if (!vma) + goto lock_mmap; + ++ if (is_invalid_gcs_access(vma, esr)) { ++ vma_end_read(vma); ++ goto lock_mmap; ++ } ++ + if (!(vma->vm_flags & vm_flags)) { + vma_end_read(vma); + goto lock_mmap; +@@ -625,7 +685,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, + goto done; + } + +- fault = __do_page_fault(mm, vma, addr, mm_flags, vm_flags, regs); ++ fault = __do_page_fault(mm, vma, addr, mm_flags, vm_flags, esr, regs); + + /* Quick path to respond to signals */ + if (fault_signal_pending(fault, regs)) { +-- +2.34.1 + + +From 5fe1e5ee0c88af97929f0a604ac3079542f0aadb Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Wed, 12 Apr 2023 20:31:01 +0100 +Subject: [PATCH 28/47] arm64/gcs: Context switch GCS state for EL0 + +There are two registers controlling the GCS state of EL0, GCSPR_EL0 which +is the current GCS pointer and GCSCRE0_EL1 which has enable bits for the +specific GCS functionality enabled for EL0. Manage these on context switch +and process lifetime events, GCS is reset on exec(). Also ensure that +any changes to the GCS memory are visible to other PEs and that changes +from other PEs are visible on this one by issuing a GCSB DSYNC when +moving to or from a thread with GCS. + +Since the current GCS configuration of a thread will be visible to +userspace we store the configuration in the format used with userspace +and provide a helper which configures the system register as needed. + +On systems that support GCS we always allow access to GCSPR_EL0, this +facilitates reporting of GCS faults if userspace implements disabling of +GCS on error - the GCS can still be discovered and examined even if GCS +has been disabled. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/include/asm/gcs.h | 24 +++++++++++++ + arch/arm64/include/asm/processor.h | 6 ++++ + arch/arm64/kernel/process.c | 56 ++++++++++++++++++++++++++++++ + arch/arm64/mm/Makefile | 1 + + arch/arm64/mm/gcs.c | 39 +++++++++++++++++++++ + 5 files changed, 126 insertions(+) + create mode 100644 arch/arm64/mm/gcs.c + +diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h +index 7c5e95218db6..04594ef59dad 100644 +--- a/arch/arm64/include/asm/gcs.h ++++ b/arch/arm64/include/asm/gcs.h +@@ -48,4 +48,28 @@ static inline u64 gcsss2(void) + return Xt; + } + ++#ifdef CONFIG_ARM64_GCS ++ ++static inline bool task_gcs_el0_enabled(struct task_struct *task) ++{ ++ return current->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE; ++} ++ ++void gcs_set_el0_mode(struct task_struct *task); ++void gcs_free(struct task_struct *task); ++void gcs_preserve_current_state(void); ++ ++#else ++ ++static inline bool task_gcs_el0_enabled(struct task_struct *task) ++{ ++ return false; ++} ++ ++static inline void gcs_set_el0_mode(struct task_struct *task) { } ++static inline void gcs_free(struct task_struct *task) { } ++static inline void gcs_preserve_current_state(void) { } ++ ++#endif ++ + #endif +diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h +index 5b0a04810b23..6fc6dcbd494c 100644 +--- a/arch/arm64/include/asm/processor.h ++++ b/arch/arm64/include/asm/processor.h +@@ -182,6 +182,12 @@ struct thread_struct { + u64 sctlr_user; + u64 svcr; + u64 tpidr2_el0; ++#ifdef CONFIG_ARM64_GCS ++ unsigned int gcs_el0_mode; ++ u64 gcspr_el0; ++ u64 gcs_base; ++ u64 gcs_size; ++#endif + }; + + static inline unsigned int thread_get_vl(struct thread_struct *thread, +diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c +index 7387b68c745b..fd80b43c2969 100644 +--- a/arch/arm64/kernel/process.c ++++ b/arch/arm64/kernel/process.c +@@ -48,6 +48,7 @@ + #include <asm/cacheflush.h> + #include <asm/exec.h> + #include <asm/fpsimd.h> ++#include <asm/gcs.h> + #include <asm/mmu_context.h> + #include <asm/mte.h> + #include <asm/processor.h> +@@ -271,12 +272,32 @@ static void flush_tagged_addr_state(void) + clear_thread_flag(TIF_TAGGED_ADDR); + } + ++#ifdef CONFIG_ARM64_GCS ++ ++static void flush_gcs(void) ++{ ++ if (!system_supports_gcs()) ++ return; ++ ++ gcs_free(current); ++ current->thread.gcs_el0_mode = 0; ++ write_sysreg_s(0, SYS_GCSCRE0_EL1); ++ write_sysreg_s(0, SYS_GCSPR_EL0); ++} ++ ++#else ++ ++static void flush_gcs(void) { } ++ ++#endif ++ + void flush_thread(void) + { + fpsimd_flush_thread(); + tls_thread_flush(); + flush_ptrace_hw_breakpoint(current); + flush_tagged_addr_state(); ++ flush_gcs(); + } + + void arch_release_task_struct(struct task_struct *tsk) +@@ -474,6 +495,40 @@ static void entry_task_switch(struct task_struct *next) + __this_cpu_write(__entry_task, next); + } + ++#ifdef CONFIG_ARM64_GCS ++ ++void gcs_preserve_current_state(void) ++{ ++ if (task_gcs_el0_enabled(current)) ++ current->thread.gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0); ++} ++ ++static void gcs_thread_switch(struct task_struct *next) ++{ ++ if (!system_supports_gcs()) ++ return; ++ ++ gcs_preserve_current_state(); ++ ++ gcs_set_el0_mode(next); ++ write_sysreg_s(next->thread.gcspr_el0, SYS_GCSPR_EL0); ++ ++ /* ++ * Ensure that GCS changes are observable by/from other PEs in ++ * case of migration. ++ */ ++ if (task_gcs_el0_enabled(current) || task_gcs_el0_enabled(next)) ++ gcsb_dsync(); ++} ++ ++#else ++ ++static void gcs_thread_switch(struct task_struct *next) ++{ ++} ++ ++#endif ++ + /* + * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT. + * Ensure access is disabled when switching to a 32bit task, ensure +@@ -533,6 +588,7 @@ struct task_struct *__switch_to(struct task_struct *prev, + ssbs_thread_switch(next); + erratum_1418040_thread_switch(next); + ptrauth_thread_switch_user(next); ++ gcs_thread_switch(next); + + /* + * Complete any pending TLB or cache maintenance on this CPU in case +diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile +index dbd1bc95967d..4e7cb2f02999 100644 +--- a/arch/arm64/mm/Makefile ++++ b/arch/arm64/mm/Makefile +@@ -10,6 +10,7 @@ obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o + obj-$(CONFIG_TRANS_TABLE) += trans_pgd-asm.o + obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o + obj-$(CONFIG_ARM64_MTE) += mteswap.o ++obj-$(CONFIG_ARM64_GCS) += gcs.o + KASAN_SANITIZE_physaddr.o += n + + obj-$(CONFIG_KASAN) += kasan_init.o +diff --git a/arch/arm64/mm/gcs.c b/arch/arm64/mm/gcs.c +new file mode 100644 +index 000000000000..b0a67efc522b +--- /dev/null ++++ b/arch/arm64/mm/gcs.c +@@ -0,0 +1,39 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++ ++#include <linux/mm.h> ++#include <linux/mman.h> ++#include <linux/syscalls.h> ++#include <linux/types.h> ++ ++#include <asm/cpufeature.h> ++#include <asm/page.h> ++ ++/* ++ * Apply the GCS mode configured for the specified task to the ++ * hardware. ++ */ ++void gcs_set_el0_mode(struct task_struct *task) ++{ ++ u64 gcscre0_el1 = GCSCRE0_EL1_nTR; ++ ++ if (task->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE) ++ gcscre0_el1 |= GCSCRE0_EL1_RVCHKEN | GCSCRE0_EL1_PCRSEL; ++ ++ if (task->thread.gcs_el0_mode & PR_SHADOW_STACK_WRITE) ++ gcscre0_el1 |= GCSCRE0_EL1_STREn; ++ ++ if (task->thread.gcs_el0_mode & PR_SHADOW_STACK_PUSH) ++ gcscre0_el1 |= GCSCRE0_EL1_PUSHMEn; ++ ++ write_sysreg_s(gcscre0_el1, SYS_GCSCRE0_EL1); ++} ++ ++void gcs_free(struct task_struct *task) ++{ ++ if (task->thread.gcs_base) ++ vm_munmap(task->thread.gcs_base, task->thread.gcs_size); ++ ++ task->thread.gcspr_el0 = 0; ++ task->thread.gcs_base = 0; ++ task->thread.gcs_size = 0; ++} +-- +2.34.1 + + +From ae084320fd60ebe9212701d1da31a466e3aecb61 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Wed, 31 May 2023 16:39:35 +0100 +Subject: [PATCH 29/47] arm64/gcs: Ensure that new threads have a GCS + +When a new thread is created by a thread with GCS enabled the GCS needs +to be specified along with the regular stack. clone3() has been +extended to support this case, allowing userspace to explicitly specify +the size and location of the GCS. The specified GCS must have a valid +GCS token at the top of the stack, as though userspace were pivoting to +the new GCS. This will be consumed on use. At present we do not +atomically consume the token, this will be addressed in a future +revision. + +Unfortunately plain clone() is not extensible and existing clone3() +users will not specify a stack so all existing code would be broken if +we mandated specifying the stack explicitly. For compatibility with +these cases and also x86 (which did not initially implement clone3() +support for shadow stacks) if no GCS is specified we will allocate one +so when a thread is created which has GCS enabled allocate one for it. +We follow the extensively discussed x86 implementation and allocate +min(RLIMIT_STACK, 2G). Since the GCS only stores the call stack and not +any variables this should be more than sufficient for most applications. + +GCSs allocated via this mechanism will be freed when the thread exits, +those explicitly configured by the user will not. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/include/asm/gcs.h | 9 +++ + arch/arm64/kernel/process.c | 29 +++++++++ + arch/arm64/mm/gcs.c | 117 +++++++++++++++++++++++++++++++++++ + 3 files changed, 155 insertions(+) + +diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h +index 04594ef59dad..c1f274fdb9c0 100644 +--- a/arch/arm64/include/asm/gcs.h ++++ b/arch/arm64/include/asm/gcs.h +@@ -8,6 +8,8 @@ + #include <asm/types.h> + #include <asm/uaccess.h> + ++struct kernel_clone_args; ++ + static inline void gcsb_dsync(void) + { + asm volatile(".inst 0xd503227f" : : : "memory"); +@@ -58,6 +60,8 @@ static inline bool task_gcs_el0_enabled(struct task_struct *task) + void gcs_set_el0_mode(struct task_struct *task); + void gcs_free(struct task_struct *task); + void gcs_preserve_current_state(void); ++unsigned long gcs_alloc_thread_stack(struct task_struct *tsk, ++ const struct kernel_clone_args *args); + + #else + +@@ -69,6 +73,11 @@ static inline bool task_gcs_el0_enabled(struct task_struct *task) + static inline void gcs_set_el0_mode(struct task_struct *task) { } + static inline void gcs_free(struct task_struct *task) { } + static inline void gcs_preserve_current_state(void) { } ++static inline unsigned long gcs_alloc_thread_stack(struct task_struct *tsk, ++ const struct kernel_clone_args *args) ++{ ++ return -ENOTSUPP; ++} + + #endif + +diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c +index fd80b43c2969..8bd66cde0a86 100644 +--- a/arch/arm64/kernel/process.c ++++ b/arch/arm64/kernel/process.c +@@ -285,9 +285,32 @@ static void flush_gcs(void) + write_sysreg_s(0, SYS_GCSPR_EL0); + } + ++static int copy_thread_gcs(struct task_struct *p, ++ const struct kernel_clone_args *args) ++{ ++ unsigned long gcs; ++ ++ gcs = gcs_alloc_thread_stack(p, args); ++ if (IS_ERR_VALUE(gcs)) ++ return PTR_ERR((void *)gcs); ++ ++ p->thread.gcs_el0_mode = current->thread.gcs_el0_mode; ++ p->thread.gcs_el0_locked = current->thread.gcs_el0_locked; ++ ++ /* Ensure the current state of the GCS is seen by CoW */ ++ gcsb_dsync(); ++ ++ return 0; ++} ++ + #else + + static void flush_gcs(void) { } ++static int copy_thread_gcs(struct task_struct *p, ++ const struct kernel_clone_args *args) ++{ ++ return 0; ++} + + #endif + +@@ -303,6 +326,7 @@ void flush_thread(void) + void arch_release_task_struct(struct task_struct *tsk) + { + fpsimd_release_task(tsk); ++ gcs_free(tsk); + } + + int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) +@@ -369,6 +393,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) + unsigned long stack_start = args->stack; + unsigned long tls = args->tls; + struct pt_regs *childregs = task_pt_regs(p); ++ int ret; + + memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context)); + +@@ -410,6 +435,10 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) + p->thread.uw.tp_value = tls; + p->thread.tpidr2_el0 = 0; + } ++ ++ ret = copy_thread_gcs(p, args); ++ if (ret != 0) ++ return ret; + } else { + /* + * A kthread has no context to ERET to, so ensure any buggy +diff --git a/arch/arm64/mm/gcs.c b/arch/arm64/mm/gcs.c +index b0a67efc522b..3cbc3a3d4bc7 100644 +--- a/arch/arm64/mm/gcs.c ++++ b/arch/arm64/mm/gcs.c +@@ -8,6 +8,113 @@ + #include <asm/cpufeature.h> + #include <asm/page.h> + ++static unsigned long alloc_gcs(unsigned long addr, unsigned long size, ++ unsigned long token_offset, bool set_res_tok) ++{ ++ int flags = MAP_ANONYMOUS | MAP_PRIVATE; ++ struct mm_struct *mm = current->mm; ++ unsigned long mapped_addr, unused; ++ ++ if (addr) ++ flags |= MAP_FIXED_NOREPLACE; ++ ++ mmap_write_lock(mm); ++ mapped_addr = do_mmap(NULL, addr, size, PROT_READ | PROT_WRITE, flags, ++ VM_SHADOW_STACK, 0, &unused, NULL); ++ mmap_write_unlock(mm); ++ ++ return mapped_addr; ++} ++ ++static unsigned long gcs_size(unsigned long size) ++{ ++ if (size) ++ return PAGE_ALIGN(size); ++ ++ /* Allocate RLIMIT_STACK/2 with limits of PAGE_SIZE..2G */ ++ size = PAGE_ALIGN(min_t(unsigned long long, ++ rlimit(RLIMIT_STACK) / 2, SZ_2G)); ++ return max(PAGE_SIZE, size); ++} ++ ++static bool gcs_consume_token(struct task_struct *tsk, unsigned long user_addr) ++{ ++ unsigned long expected = GCS_CAP(user_addr); ++ unsigned long val; ++ int ret = 0; ++ ++ /* This should really be an atomic cpmxchg. It is not. */ ++ __get_user_error(val, (__user unsigned long *)user_addr, ret); ++ if (ret != 0) ++ return false; ++ ++ if (val != expected) ++ return false; ++ ++ put_user_gcs(0, (__user unsigned long*)user_addr, &ret); ++ ++ return ret == 0; ++} ++ ++unsigned long gcs_alloc_thread_stack(struct task_struct *tsk, ++ const struct kernel_clone_args *args) ++{ ++ unsigned long addr, size, gcspr_el0; ++ ++ /* If the user specified a GCS use it. */ ++ if (args->shadow_stack_size) { ++ if (!system_supports_gcs()) ++ return (unsigned long)ERR_PTR(-EINVAL); ++ ++ addr = args->shadow_stack; ++ size = args->shadow_stack_size; ++ ++ /* ++ * There should be a token, there might be an end of ++ * stack marker. ++ */ ++ gcspr_el0 = addr + size - (2 * sizeof(u64)); ++ if (!gcs_consume_token(tsk, gcspr_el0)) { ++ gcspr_el0 += sizeof(u64); ++ if (!gcs_consume_token(tsk, gcspr_el0)) ++ return (unsigned long)ERR_PTR(-EINVAL); ++ } ++ ++ /* Userspace is responsible for unmapping */ ++ tsk->thread.gcspr_el0 = gcspr_el0 + sizeof(u64); ++ } else { ++ ++ /* ++ * Otherwise fall back to legacy clone() support and ++ * implicitly allocate a GCS if we need a new one. ++ */ ++ ++ if (!system_supports_gcs()) ++ return 0; ++ ++ if (!task_gcs_el0_enabled(tsk)) ++ return 0; ++ ++ if ((args->flags & (CLONE_VFORK | CLONE_VM)) != CLONE_VM) { ++ tsk->thread.gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0); ++ return 0; ++ } ++ ++ size = args->stack_size; ++ ++ size = gcs_size(size); ++ addr = alloc_gcs(0, size, 0, 0); ++ if (IS_ERR_VALUE(addr)) ++ return addr; ++ ++ tsk->thread.gcs_base = addr; ++ tsk->thread.gcs_size = size; ++ tsk->thread.gcspr_el0 = addr + size - sizeof(u64); ++ } ++ ++ return addr; ++} ++ + /* + * Apply the GCS mode configured for the specified task to the + * hardware. +@@ -30,6 +137,16 @@ void gcs_set_el0_mode(struct task_struct *task) + + void gcs_free(struct task_struct *task) + { ++ ++ /* ++ * When fork() with CLONE_VM fails, the child (tsk) already ++ * has a GCS allocated, and exit_thread() calls this function ++ * to free it. In this case the parent (current) and the ++ * child share the same mm struct. ++ */ ++ if (!task->mm || task->mm != current->mm) ++ return; ++ + if (task->thread.gcs_base) + vm_munmap(task->thread.gcs_base, task->thread.gcs_size); + +-- +2.34.1 + + +From 59d903881df39c2f5ec10d818d303b7fd96a90aa Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Wed, 5 Apr 2023 20:14:17 +0100 +Subject: [PATCH 30/47] arm64/gcs: Implement shadow stack prctl() interface + +Implement the architecture neutral prtctl() interface for setting the +shadow stack status, this supports setting and reading the current GCS +configuration for the current thread. + +Userspace can enable basic GCS functionality and additionally also +support for GCS pushes and arbitrary GCS stores. It is expected that +this prctl() will be called very early in application startup, for +example by the dynamic linker, and not subsequently adjusted during +normal operation. Users should carefully note that after enabling GCS +for a thread GCS will become active with no call stack so it is not +normally possible to return from the function that invoked the prctl(). + +State is stored per thread, enabling GCS for a thread causes a GCS to be +allocated for that thread. + +Userspace may lock the current GCS configuration by specifying +PR_SHADOW_STACK_ENABLE_LOCK, this prevents any further changes to the +GCS configuration via any means. + +If GCS is not being enabled then all flags other than _LOCK are ignored, +it is not possible to enable stores or pops without enabling GCS. + +When disabling the GCS we do not free the allocated stack, this allows +for inspection of the GCS after disabling as part of fault reporting. +Since it is not an expected use case and since it presents some +complications in determining what to do with previously initialsed data +on the GCS attempts to reenable GCS after this are rejected. This can +be revisted if a use case arises. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/include/asm/gcs.h | 22 ++++++++ + arch/arm64/include/asm/processor.h | 1 + + arch/arm64/mm/gcs.c | 81 ++++++++++++++++++++++++++++++ + 3 files changed, 104 insertions(+) + +diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h +index c1f274fdb9c0..48c97e63e56a 100644 +--- a/arch/arm64/include/asm/gcs.h ++++ b/arch/arm64/include/asm/gcs.h +@@ -50,6 +50,9 @@ static inline u64 gcsss2(void) + return Xt; + } + ++#define PR_SHADOW_STACK_SUPPORTED_STATUS_MASK \ ++ (PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH) ++ + #ifdef CONFIG_ARM64_GCS + + static inline bool task_gcs_el0_enabled(struct task_struct *task) +@@ -63,6 +66,20 @@ void gcs_preserve_current_state(void); + unsigned long gcs_alloc_thread_stack(struct task_struct *tsk, + const struct kernel_clone_args *args); + ++static inline int gcs_check_locked(struct task_struct *task, ++ unsigned long new_val) ++{ ++ unsigned long cur_val = task->thread.gcs_el0_mode; ++ ++ cur_val &= task->thread.gcs_el0_locked; ++ new_val &= task->thread.gcs_el0_locked; ++ ++ if (cur_val != new_val) ++ return -EBUSY; ++ ++ return 0; ++} ++ + #else + + static inline bool task_gcs_el0_enabled(struct task_struct *task) +@@ -78,6 +95,11 @@ static inline unsigned long gcs_alloc_thread_stack(struct task_struct *tsk, + { + return -ENOTSUPP; + } ++static inline int gcs_check_locked(struct task_struct *task, ++ unsigned long new_val) ++{ ++ return 0; ++} + + #endif + +diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h +index 6fc6dcbd494c..6a3091ec0f03 100644 +--- a/arch/arm64/include/asm/processor.h ++++ b/arch/arm64/include/asm/processor.h +@@ -184,6 +184,7 @@ struct thread_struct { + u64 tpidr2_el0; + #ifdef CONFIG_ARM64_GCS + unsigned int gcs_el0_mode; ++ unsigned int gcs_el0_locked; + u64 gcspr_el0; + u64 gcs_base; + u64 gcs_size; +diff --git a/arch/arm64/mm/gcs.c b/arch/arm64/mm/gcs.c +index 3cbc3a3d4bc7..d5b593d9d9bd 100644 +--- a/arch/arm64/mm/gcs.c ++++ b/arch/arm64/mm/gcs.c +@@ -154,3 +154,84 @@ void gcs_free(struct task_struct *task) + task->thread.gcs_base = 0; + task->thread.gcs_size = 0; + } ++ ++int arch_set_shadow_stack_status(struct task_struct *task, unsigned long arg) ++{ ++ unsigned long gcs, size; ++ int ret; ++ ++ if (!system_supports_gcs()) ++ return -EINVAL; ++ ++ if (is_compat_thread(task_thread_info(task))) ++ return -EINVAL; ++ ++ /* Reject unknown flags */ ++ if (arg & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) ++ return -EINVAL; ++ ++ ret = gcs_check_locked(task, arg); ++ if (ret != 0) ++ return ret; ++ ++ /* If we are enabling GCS then make sure we have a stack */ ++ if (arg & PR_SHADOW_STACK_ENABLE) { ++ if (!task_gcs_el0_enabled(task)) { ++ /* Do not allow GCS to be reenabled */ ++ if (task->thread.gcs_base) ++ return -EINVAL; ++ ++ if (task != current) ++ return -EBUSY; ++ ++ size = gcs_size(0); ++ gcs = alloc_gcs(0, size, 0, 0); ++ if (!gcs) ++ return -ENOMEM; ++ ++ task->thread.gcspr_el0 = gcs + size - sizeof(u64); ++ task->thread.gcs_base = gcs; ++ task->thread.gcs_size = size; ++ if (task == current) ++ write_sysreg_s(task->thread.gcspr_el0, ++ SYS_GCSPR_EL0); ++ ++ } ++ } ++ ++ task->thread.gcs_el0_mode = arg; ++ if (task == current) ++ gcs_set_el0_mode(task); ++ ++ return 0; ++} ++ ++int arch_get_shadow_stack_status(struct task_struct *task, ++ unsigned long __user *arg) ++{ ++ if (!system_supports_gcs()) ++ return -EINVAL; ++ ++ if (is_compat_thread(task_thread_info(task))) ++ return -EINVAL; ++ ++ return put_user(task->thread.gcs_el0_mode, arg); ++} ++ ++int arch_lock_shadow_stack_status(struct task_struct *task, ++ unsigned long arg) ++{ ++ if (!system_supports_gcs()) ++ return -EINVAL; ++ ++ if (is_compat_thread(task_thread_info(task))) ++ return -EINVAL; ++ ++ /* ++ * We support locking unknown bits so applications can prevent ++ * any changes in a future proof manner. ++ */ ++ task->thread.gcs_el0_locked |= arg; ++ ++ return 0; ++} +-- +2.34.1 + + +From 2f3d32fbac2eec5c04aca5e181a52f8d2bca9383 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Wed, 12 Apr 2023 22:29:17 +0100 +Subject: [PATCH 31/47] arm64/mm: Implement map_shadow_stack() + +As discussed extensively in the changelog for the addition of this +syscall on x86 ("x86/shstk: Introduce map_shadow_stack syscall") the +existing mmap() and madvise() syscalls do not map entirely well onto the +security requirements for guarded control stacks since they lead to +windows where memory is allocated but not yet protected or stacks which +are not properly and safely initialised. Instead a new syscall +map_shadow_stack() has been defined which allocates and initialises a +shadow stack page. + +Implement this for arm64. Two flags are provided, allowing applications +to request that the stack be initialised with a valid cap token at the +top of the stack and optionally also an end of stack marker above that. +We support requesting an end of stack marker alone but since this is a +NULL pointer it is indistinguishable from not initialising anything by +itself. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/mm/gcs.c | 61 +++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 61 insertions(+) + +diff --git a/arch/arm64/mm/gcs.c b/arch/arm64/mm/gcs.c +index d5b593d9d9bd..e238bc9c057d 100644 +--- a/arch/arm64/mm/gcs.c ++++ b/arch/arm64/mm/gcs.c +@@ -115,6 +115,67 @@ unsigned long gcs_alloc_thread_stack(struct task_struct *tsk, + return addr; + } + ++SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags) ++{ ++ unsigned long alloc_size; ++ unsigned long __user *cap_ptr; ++ unsigned long cap_val; ++ int ret = 0; ++ int cap_offset; ++ ++ if (!system_supports_gcs()) ++ return -EOPNOTSUPP; ++ ++ if (flags & ~(SHADOW_STACK_SET_TOKEN | SHADOW_STACK_SET_MARKER)) ++ return -EINVAL; ++ ++ if (addr && (addr % PAGE_SIZE)) ++ return -EINVAL; ++ ++ if (size == 8 || size % 8) ++ return -EINVAL; ++ ++ /* ++ * An overflow would result in attempting to write the restore token ++ * to the wrong location. Not catastrophic, but just return the right ++ * error code and block it. ++ */ ++ alloc_size = PAGE_ALIGN(size); ++ if (alloc_size < size) ++ return -EOVERFLOW; ++ ++ addr = alloc_gcs(addr, alloc_size, 0, false); ++ if (IS_ERR_VALUE(addr)) ++ return addr; ++ ++ /* ++ * Put a cap token at the end of the allocated region so it ++ * can be switched to. ++ */ ++ if (flags & SHADOW_STACK_SET_TOKEN) { ++ /* Leave an extra empty frame as a top of stack marker? */ ++ if (flags & SHADOW_STACK_SET_MARKER) ++ cap_offset = 2; ++ else ++ cap_offset = 1; ++ ++ cap_ptr = (unsigned long __user *)(addr + size - ++ (cap_offset * sizeof(unsigned long))); ++ cap_val = GCS_CAP(cap_ptr); ++ ++ put_user_gcs(cap_val, cap_ptr, &ret); ++ if (ret != 0) { ++ vm_munmap(addr, size); ++ return -EFAULT; ++ } ++ ++ /* Ensure the new cap is viaible for GCS */ ++ gcsb_dsync(); ++ } ++ ++ return addr; ++} ++ + /* + * Apply the GCS mode configured for the specified task to the + * hardware. +-- +2.34.1 + + +From 3251d196916b7a331713cafaaa0265b7205c93a0 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Wed, 21 Jun 2023 01:28:09 +0100 +Subject: [PATCH 32/47] arm64/signal: Set up and restore the GCS context for + signal handlers + +When invoking a signal handler we use the GCS configuration and stack +for the current thread. + +Since we implement signal return by calling the signal handler with a +return address set up pointing to a trampoline in the vDSO we need to +also configure any active GCS for this by pushing a frame for the +trampoline onto the GCS. If we do not do this then signal return will +generate a GCS protection fault. + +In order to guard against attempts to bypass GCS protections via signal +return we only allow returning with GCSPR_EL0 pointing to an address +where it was previously preempted by a signal. We do this by pushing a +cap onto the GCS, this takes the form of an architectural GCS cap token +with the top bit set and token type of 0 which we add on signal entry +and validate and pop off on signal return. The combination of the top +bit being set and the token type mean that this can't be interpreted as +a valid token or address. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/include/asm/gcs.h | 1 + + arch/arm64/kernel/signal.c | 134 +++++++++++++++++++++++++++++++++-- + arch/arm64/mm/gcs.c | 1 + + 3 files changed, 131 insertions(+), 5 deletions(-) + +diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h +index 48c97e63e56a..f50660603ecf 100644 +--- a/arch/arm64/include/asm/gcs.h ++++ b/arch/arm64/include/asm/gcs.h +@@ -9,6 +9,7 @@ + #include <asm/uaccess.h> + + struct kernel_clone_args; ++struct ksignal; + + static inline void gcsb_dsync(void) + { +diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c +index 425b1bc17a3f..7a063d3e2a8d 100644 +--- a/arch/arm64/kernel/signal.c ++++ b/arch/arm64/kernel/signal.c +@@ -25,6 +25,7 @@ + #include <asm/elf.h> + #include <asm/exception.h> + #include <asm/cacheflush.h> ++#include <asm/gcs.h> + #include <asm/ucontext.h> + #include <asm/unistd.h> + #include <asm/fpsimd.h> +@@ -34,6 +35,37 @@ + #include <asm/traps.h> + #include <asm/vdso.h> + ++#ifdef CONFIG_ARM64_GCS ++/* Extra bit set in the address distinguishing a signal cap token. */ ++#define GCS_SIGNAL_CAP_FLAG BIT(63) ++ ++#define GCS_SIGNAL_CAP(addr) ((((unsigned long)addr) & GCS_CAP_ADDR_MASK) | \ ++ GCS_SIGNAL_CAP_FLAG) ++ ++static bool gcs_signal_cap_valid(u64 addr, u64 val) ++{ ++ /* ++ * The top bit should be set, this is an invalid address for ++ * EL0 and will only be set for caps created by signals. ++ */ ++ if (!(val & GCS_SIGNAL_CAP_FLAG)) ++ return false; ++ ++ /* The rest should be a standard architectural cap token. */ ++ val &= ~GCS_SIGNAL_CAP_FLAG; ++ ++ /* The cap must not have a token set */ ++ if (GCS_CAP_TOKEN(val) != 0) ++ return false; ++ ++ /* The cap must store the VA the cap was stored at */ ++ if (GCS_CAP_ADDR(addr) != GCS_CAP_ADDR(val)) ++ return false; ++ ++ return true; ++} ++#endif ++ + /* + * Do a signal return; undo the signal stack. These are aligned to 128-bit. + */ +@@ -815,6 +847,50 @@ static int restore_sigframe(struct pt_regs *regs, + return err; + } + ++#ifdef CONFIG_ARM64_GCS ++static int gcs_restore_signal(void) ++{ ++ u64 gcspr_el0, cap; ++ int ret; ++ ++ if (!system_supports_gcs()) ++ return 0; ++ ++ if (!(current->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE)) ++ return 0; ++ ++ gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0); ++ ++ /* ++ * GCSPR_EL0 should be pointing at a capped GCS, read the cap... ++ */ ++ gcsb_dsync(); ++ ret = copy_from_user(&cap, (__user void*)gcspr_el0, sizeof(cap)); ++ if (ret) ++ return -EFAULT; ++ ++ /* ++ * ...then check that the cap is the actual GCS before ++ * restoring it. ++ */ ++ if (!gcs_signal_cap_valid(gcspr_el0, cap)) ++ return -EINVAL; ++ ++ /* Invalidate the token to prevent reuse */ ++ put_user_gcs(0, (__user void*)gcspr_el0, &ret); ++ if (ret != 0) ++ return -EFAULT; ++ ++ current->thread.gcspr_el0 = gcspr_el0 + sizeof(cap); ++ write_sysreg_s(current->thread.gcspr_el0, SYS_GCSPR_EL0); ++ ++ return 0; ++} ++ ++#else ++static int gcs_restore_signal(void) { return 0; } ++#endif ++ + SYSCALL_DEFINE0(rt_sigreturn) + { + struct pt_regs *regs = current_pt_regs(); +@@ -841,6 +917,9 @@ SYSCALL_DEFINE0(rt_sigreturn) + if (restore_altstack(&frame->uc.uc_stack)) + goto badframe; + ++ if (gcs_restore_signal()) ++ goto badframe; ++ + return regs->regs[0]; + + badframe: +@@ -1071,7 +1150,50 @@ static int get_sigframe(struct rt_sigframe_user_layout *user, + return 0; + } + +-static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, ++#ifdef CONFIG_ARM64_GCS ++ ++static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig) ++{ ++ unsigned long __user *gcspr_el0; ++ int ret = 0; ++ ++ if (!system_supports_gcs()) ++ return 0; ++ ++ if (!task_gcs_el0_enabled(current)) ++ return 0; ++ ++ /* ++ * We are entering a signal handler, current register state is ++ * active. ++ */ ++ gcspr_el0 = (unsigned long __user *)read_sysreg_s(SYS_GCSPR_EL0); ++ ++ /* ++ * Push a cap and the GCS entry for the trampoline onto the GCS. ++ */ ++ put_user_gcs((unsigned long)sigtramp, gcspr_el0 - 2, &ret); ++ put_user_gcs(GCS_SIGNAL_CAP(gcspr_el0 - 1), gcspr_el0 - 1, &ret); ++ if (ret != 0) ++ return ret; ++ ++ gcsb_dsync(); ++ ++ gcspr_el0 -= 2; ++ write_sysreg_s((unsigned long)gcspr_el0, SYS_GCSPR_EL0); ++ ++ return 0; ++} ++#else ++ ++static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig) ++{ ++ return 0; ++} ++ ++#endif ++ ++static int setup_return(struct pt_regs *regs, struct ksignal *ksig, + struct rt_sigframe_user_layout *user, int usig) + { + __sigrestore_t sigtramp; +@@ -1079,7 +1201,7 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, + regs->regs[0] = usig; + regs->sp = (unsigned long)user->sigframe; + regs->regs[29] = (unsigned long)&user->next_frame->fp; +- regs->pc = (unsigned long)ka->sa.sa_handler; ++ regs->pc = (unsigned long)ksig->ka.sa.sa_handler; + + /* + * Signal delivery is a (wacky) indirect function call in +@@ -1119,12 +1241,14 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, + sme_smstop(); + } + +- if (ka->sa.sa_flags & SA_RESTORER) +- sigtramp = ka->sa.sa_restorer; ++ if (ksig->ka.sa.sa_flags & SA_RESTORER) ++ sigtramp = ksig->ka.sa.sa_restorer; + else + sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp); + + regs->regs[30] = (unsigned long)sigtramp; ++ ++ return gcs_signal_entry(sigtramp, ksig); + } + + static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, +@@ -1147,7 +1271,7 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, + err |= __save_altstack(&frame->uc.uc_stack, regs->sp); + err |= setup_sigframe(&user, regs, set); + if (err == 0) { +- setup_return(regs, &ksig->ka, &user, usig); ++ err = setup_return(regs, ksig, &user, usig); + if (ksig->ka.sa.sa_flags & SA_SIGINFO) { + err |= copy_siginfo_to_user(&frame->info, &ksig->info); + regs->regs[1] = (unsigned long)&frame->info; +diff --git a/arch/arm64/mm/gcs.c b/arch/arm64/mm/gcs.c +index e238bc9c057d..e6f505c9bf4a 100644 +--- a/arch/arm64/mm/gcs.c ++++ b/arch/arm64/mm/gcs.c +@@ -6,6 +6,7 @@ + #include <linux/types.h> + + #include <asm/cpufeature.h> ++#include <asm/gcs.h> + #include <asm/page.h> + + static unsigned long alloc_gcs(unsigned long addr, unsigned long size, +-- +2.34.1 + + +From 4f8eb2f612f1df2ca8033dda6114e0e228befdaa Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Thu, 1 Jun 2023 16:35:46 +0100 +Subject: [PATCH 33/47] arm64/signal: Expose GCS state in signal frames + +Add a context for the GCS state and include it in the signal context when +running on a system that supports GCS. We reuse the same flags that the +prctl() uses to specify which GCS features are enabled and also provide the +current GCS pointer. + +We do not support enabling GCS via signal return, there is a conflict +between specifying GCSPR_EL0 and allocation of a new GCS and this is not +an ancticipated use case. We also enforce GCS configuration locking on +signal return. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/include/uapi/asm/sigcontext.h | 9 ++ + arch/arm64/kernel/signal.c | 108 +++++++++++++++++++++++ + 2 files changed, 117 insertions(+) + +diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h +index f23c1dc3f002..7b66d245f2d2 100644 +--- a/arch/arm64/include/uapi/asm/sigcontext.h ++++ b/arch/arm64/include/uapi/asm/sigcontext.h +@@ -168,6 +168,15 @@ struct zt_context { + __u16 __reserved[3]; + }; + ++#define GCS_MAGIC 0x47435300 ++ ++struct gcs_context { ++ struct _aarch64_ctx head; ++ __u64 gcspr; ++ __u64 features_enabled; ++ __u64 reserved; ++}; ++ + #endif /* !__ASSEMBLY__ */ + + #include <asm/sve_context.h> +diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c +index 7a063d3e2a8d..5b9a45a45f4b 100644 +--- a/arch/arm64/kernel/signal.c ++++ b/arch/arm64/kernel/signal.c +@@ -88,6 +88,7 @@ struct rt_sigframe_user_layout { + + unsigned long fpsimd_offset; + unsigned long esr_offset; ++ unsigned long gcs_offset; + unsigned long sve_offset; + unsigned long tpidr2_offset; + unsigned long za_offset; +@@ -214,6 +215,8 @@ struct user_ctxs { + u32 za_size; + struct zt_context __user *zt; + u32 zt_size; ++ struct gcs_context __user *gcs; ++ u32 gcs_size; + }; + + static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) +@@ -606,6 +609,83 @@ extern int restore_zt_context(struct user_ctxs *user); + + #endif /* ! CONFIG_ARM64_SME */ + ++#ifdef CONFIG_ARM64_GCS ++ ++static int preserve_gcs_context(struct gcs_context __user *ctx) ++{ ++ int err = 0; ++ u64 gcspr; ++ ++ /* ++ * We will add a cap token to the frame, include it in the ++ * GCSPR_EL0 we report to support stack switching via ++ * sigreturn. ++ */ ++ gcs_preserve_current_state(); ++ gcspr = current->thread.gcspr_el0; ++ if (task_gcs_el0_enabled(current)) ++ gcspr -= 8; ++ ++ __put_user_error(GCS_MAGIC, &ctx->head.magic, err); ++ __put_user_error(sizeof(*ctx), &ctx->head.size, err); ++ __put_user_error(gcspr, &ctx->gcspr, err); ++ __put_user_error(0, &ctx->reserved, err); ++ __put_user_error(current->thread.gcs_el0_mode, ++ &ctx->features_enabled, err); ++ ++ return err; ++} ++ ++static int restore_gcs_context(struct user_ctxs *user) ++{ ++ u64 gcspr, enabled; ++ int err = 0; ++ ++ if (user->gcs_size != sizeof(*user->gcs)) ++ return -EINVAL; ++ ++ __get_user_error(gcspr, &user->gcs->gcspr, err); ++ __get_user_error(enabled, &user->gcs->features_enabled, err); ++ if (err) ++ return err; ++ ++ /* Don't allow unknown modes */ ++ if (enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) ++ return -EINVAL; ++ ++ err = gcs_check_locked(current, enabled); ++ if (err != 0) ++ return err; ++ ++ /* Don't allow enabling */ ++ if (!task_gcs_el0_enabled(current) && ++ (enabled & PR_SHADOW_STACK_ENABLE)) ++ return -EINVAL; ++ ++ /* If we are disabling disable everything */ ++ if (!(enabled & PR_SHADOW_STACK_ENABLE)) ++ enabled = 0; ++ ++ current->thread.gcs_el0_mode = enabled; ++ ++ /* ++ * We let userspace set GCSPR_EL0 to anything here, we will ++ * validate later in gcs_restore_signal(). ++ */ ++ current->thread.gcspr_el0 = gcspr; ++ write_sysreg_s(current->thread.gcspr_el0, SYS_GCSPR_EL0); ++ ++ return 0; ++} ++ ++#else /* ! CONFIG_ARM64_GCS */ ++ ++/* Turn any non-optimised out attempts to use these into a link error: */ ++extern int preserve_gcs_context(void __user *ctx); ++extern int restore_gcs_context(struct user_ctxs *user); ++ ++#endif /* ! CONFIG_ARM64_GCS */ ++ + static int parse_user_sigframe(struct user_ctxs *user, + struct rt_sigframe __user *sf) + { +@@ -622,6 +702,7 @@ static int parse_user_sigframe(struct user_ctxs *user, + user->tpidr2 = NULL; + user->za = NULL; + user->zt = NULL; ++ user->gcs = NULL; + + if (!IS_ALIGNED((unsigned long)base, 16)) + goto invalid; +@@ -716,6 +797,17 @@ static int parse_user_sigframe(struct user_ctxs *user, + user->zt_size = size; + break; + ++ case GCS_MAGIC: ++ if (!system_supports_gcs()) ++ goto invalid; ++ ++ if (user->gcs) ++ goto invalid; ++ ++ user->gcs = (struct gcs_context __user *)head; ++ user->gcs_size = size; ++ break; ++ + case EXTRA_MAGIC: + if (have_extra_context) + goto invalid; +@@ -835,6 +927,9 @@ static int restore_sigframe(struct pt_regs *regs, + err = restore_fpsimd_context(&user); + } + ++ if (err == 0 && system_supports_gcs() && user.gcs) ++ err = restore_gcs_context(&user); ++ + if (err == 0 && system_supports_tpidr2() && user.tpidr2) + err = restore_tpidr2_context(&user); + +@@ -954,6 +1049,13 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, + return err; + } + ++ if (system_supports_gcs()) { ++ err = sigframe_alloc(user, &user->gcs_offset, ++ sizeof(struct gcs_context)); ++ if (err) ++ return err; ++ } ++ + if (system_supports_sve() || system_supports_sme()) { + unsigned int vq = 0; + +@@ -1047,6 +1149,12 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, + __put_user_error(current->thread.fault_code, &esr_ctx->esr, err); + } + ++ if (system_supports_gcs() && err == 0 && user->gcs_offset) { ++ struct gcs_context __user *gcs_ctx = ++ apply_user_offset(user, user->gcs_offset); ++ err |= preserve_gcs_context(gcs_ctx); ++ } ++ + /* Scalable Vector Extension state (including streaming), if present */ + if ((system_supports_sve() || system_supports_sme()) && + err == 0 && user->sve_offset) { +-- +2.34.1 + + +From 26cb1cbfb9f942ed9d4194d3cc8917e0c636811b Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Fri, 30 Jun 2023 17:32:38 +0100 +Subject: [PATCH 34/47] arm64/ptrace: Expose GCS via ptrace and core files + +Provide a new register type NT_ARM_GCS reporting the current GCS mode +and pointer for EL0. Due to the interactions with allocation and +deallocation of Guarded Control Stacks we do not permit any changes to +the GCS mode via ptrace, only GCSPR_EL0 may be changed. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/include/uapi/asm/ptrace.h | 8 ++++ + arch/arm64/kernel/ptrace.c | 59 ++++++++++++++++++++++++++++ + include/uapi/linux/elf.h | 1 + + 3 files changed, 68 insertions(+) + +diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h +index 7fa2f7036aa7..0f39ba4f3efd 100644 +--- a/arch/arm64/include/uapi/asm/ptrace.h ++++ b/arch/arm64/include/uapi/asm/ptrace.h +@@ -324,6 +324,14 @@ struct user_za_header { + #define ZA_PT_SIZE(vq) \ + (ZA_PT_ZA_OFFSET + ZA_PT_ZA_SIZE(vq)) + ++/* GCS state (NT_ARM_GCS) */ ++ ++struct user_gcs { ++ __u64 features_enabled; ++ __u64 features_locked; ++ __u64 gcspr_el0; ++}; ++ + #endif /* __ASSEMBLY__ */ + + #endif /* _UAPI__ASM_PTRACE_H */ +diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c +index e3bef38fc2e2..e291c0145e94 100644 +--- a/arch/arm64/kernel/ptrace.c ++++ b/arch/arm64/kernel/ptrace.c +@@ -34,6 +34,7 @@ + #include <asm/cpufeature.h> + #include <asm/debug-monitors.h> + #include <asm/fpsimd.h> ++#include <asm/gcs.h> + #include <asm/mte.h> + #include <asm/pointer_auth.h> + #include <asm/stacktrace.h> +@@ -1411,6 +1412,51 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct + } + #endif + ++#ifdef CONFIG_ARM64_GCS ++static int gcs_get(struct task_struct *target, ++ const struct user_regset *regset, ++ struct membuf to) ++{ ++ struct user_gcs user_gcs; ++ ++ if (target == current) ++ gcs_preserve_current_state(); ++ ++ user_gcs.features_enabled = target->thread.gcs_el0_mode; ++ user_gcs.features_locked = target->thread.gcs_el0_locked; ++ user_gcs.gcspr_el0 = target->thread.gcspr_el0; ++ ++ return membuf_write(&to, &user_gcs, sizeof(user_gcs)); ++} ++ ++static int gcs_set(struct task_struct *target, const struct ++ user_regset *regset, unsigned int pos, ++ unsigned int count, const void *kbuf, const ++ void __user *ubuf) ++{ ++ int ret; ++ struct user_gcs user_gcs; ++ ++ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &user_gcs, 0, -1); ++ if (ret) ++ return ret; ++ ++ if (user_gcs.features_enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) ++ return -EINVAL; ++ ++ /* Do not allow enable via ptrace */ ++ if ((user_gcs.features_enabled & PR_SHADOW_STACK_ENABLE) && ++ !(target->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE)) ++ return -EBUSY; ++ ++ target->thread.gcs_el0_mode = user_gcs.features_enabled; ++ target->thread.gcs_el0_locked = user_gcs.features_locked; ++ target->thread.gcspr_el0 = user_gcs.gcspr_el0; ++ ++ return 0; ++} ++#endif ++ + enum aarch64_regset { + REGSET_GPR, + REGSET_FPR, +@@ -1439,6 +1485,9 @@ enum aarch64_regset { + #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI + REGSET_TAGGED_ADDR_CTRL, + #endif ++#ifdef CONFIG_ARM64_GCS ++ REGSET_GCS, ++#endif + }; + + static const struct user_regset aarch64_regsets[] = { +@@ -1590,6 +1639,16 @@ static const struct user_regset aarch64_regsets[] = { + .set = tagged_addr_ctrl_set, + }, + #endif ++#ifdef CONFIG_ARM64_GCS ++ [REGSET_GCS] = { ++ .core_note_type = NT_ARM_GCS, ++ .n = sizeof(struct user_gcs) / sizeof(u64), ++ .size = sizeof(u64), ++ .align = sizeof(u64), ++ .regset_get = gcs_get, ++ .set = gcs_set, ++ }, ++#endif + }; + + static const struct user_regset_view user_aarch64_view = { +diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h +index 9417309b7230..436dfc359f61 100644 +--- a/include/uapi/linux/elf.h ++++ b/include/uapi/linux/elf.h +@@ -440,6 +440,7 @@ typedef struct elf64_shdr { + #define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */ + #define NT_ARM_ZA 0x40c /* ARM SME ZA registers */ + #define NT_ARM_ZT 0x40d /* ARM SME ZT registers */ ++#define NT_ARM_GCS 0x40e /* ARM GCS state */ + #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ + #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ + #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ +-- +2.34.1 + + +From 2f6d799ed18f86452b2c1a07ec1a65a2843cab7e Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Tue, 7 Mar 2023 22:34:05 +0000 +Subject: [PATCH 35/47] arm64: Add Kconfig for Guarded Control Stack (GCS) + +Provide a Kconfig option allowing the user to select if GCS support is +built into the kernel. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + arch/arm64/Kconfig | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig +index aa7c1d435139..e0048e4660cf 100644 +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -2098,6 +2098,26 @@ config ARM64_EPAN + if the cpu does not implement the feature. + endmenu # "ARMv8.7 architectural features" + ++menu "v9.4 architectural features" ++ ++config ARM64_GCS ++ bool "Enable support for Guarded Control Stack (GCS)" ++ default y ++ select ARCH_HAS_USER_SHADOW_STACK ++ select ARCH_USES_HIGH_VMA_FLAGS ++ help ++ Guarded Control Stack (GCS) provides support for a separate ++ stack with restricted access which contains only return ++ addresses. This can be used to harden against some attacks ++ by comparing return address used by the program with what is ++ stored in the GCS, and may also be used to efficiently obtain ++ the call stack for applications such as profiling. ++ ++ The feature is detected at runtime, and will remain disabled ++ if the system does not implement the feature. ++ ++endmenu # "v9.4 architectural features" ++ + config ARM64_SVE + bool "ARM Scalable Vector Extension support" + default y +-- +2.34.1 + + +From 56346b042191155ca2397d01de908dc8954592e3 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Mon, 20 Mar 2023 18:24:51 +0000 +Subject: [PATCH 36/47] kselftest/arm64: Verify the GCS hwcap + +Add coverage of the GCS hwcap to the hwcap selftest, using a read of +GCSPR_EL0 to generate SIGILL without having to worry about enabling GCS. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + tools/testing/selftests/arm64/abi/hwcap.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c +index 1189e77c8152..bc9e3250a9df 100644 +--- a/tools/testing/selftests/arm64/abi/hwcap.c ++++ b/tools/testing/selftests/arm64/abi/hwcap.c +@@ -63,6 +63,17 @@ static void fp_sigill(void) + asm volatile("fmov s0, #1"); + } + ++static void gcs_sigill(void) ++{ ++ unsigned long *gcspr; ++ ++ asm volatile( ++ "mrs %0, S3_3_C2_C5_1" ++ : "=r" (gcspr) ++ : ++ : "cc"); ++} ++ + static void ilrcpc_sigill(void) + { + /* LDAPUR W0, [SP, #8] */ +@@ -360,6 +371,14 @@ static const struct hwcap_data { + .cpuinfo = "fp", + .sigill_fn = fp_sigill, + }, ++ { ++ .name = "GCS", ++ .at_hwcap = AT_HWCAP2, ++ .hwcap_bit = HWCAP2_GCS, ++ .cpuinfo = "gcs", ++ .sigill_fn = gcs_sigill, ++ .sigill_reliable = true, ++ }, + { + .name = "JSCVT", + .at_hwcap = AT_HWCAP, +-- +2.34.1 + + +From 38f47a23529268f26d7378710065e378e4022a95 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Mon, 29 Jan 2024 22:45:01 +0000 +Subject: [PATCH 37/47] kselftest: Provide shadow stack enable helpers for + arm64 + +Allow test programs to use the shadow stack helpers on arm64. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + tools/testing/selftests/ksft_shstk.h | 37 ++++++++++++++++++++++++++++ + 1 file changed, 37 insertions(+) + +diff --git a/tools/testing/selftests/ksft_shstk.h b/tools/testing/selftests/ksft_shstk.h +index 85d0747c1802..223e24b4eb80 100644 +--- a/tools/testing/selftests/ksft_shstk.h ++++ b/tools/testing/selftests/ksft_shstk.h +@@ -50,6 +50,43 @@ static inline __attribute__((always_inline)) void enable_shadow_stack(void) + + #endif + ++#ifdef __aarch64__ ++#define PR_SET_SHADOW_STACK_STATUS 72 ++# define PR_SHADOW_STACK_ENABLE (1UL << 0) ++ ++#define my_syscall2(num, arg1, arg2) \ ++({ \ ++ register long _num __asm__ ("x8") = (num); \ ++ register long _arg1 __asm__ ("x0") = (long)(arg1); \ ++ register long _arg2 __asm__ ("x1") = (long)(arg2); \ ++ register long _arg3 __asm__ ("x2") = 0; \ ++ register long _arg4 __asm__ ("x3") = 0; \ ++ register long _arg5 __asm__ ("x4") = 0; \ ++ \ ++ __asm__ volatile ( \ ++ "svc #0\n" \ ++ : "=r"(_arg1) \ ++ : "r"(_arg1), "r"(_arg2), \ ++ "r"(_arg3), "r"(_arg4), \ ++ "r"(_arg5), "r"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _arg1; \ ++}) ++ ++#define ENABLE_SHADOW_STACK ++static inline __attribute__((always_inline)) void enable_shadow_stack(void) ++{ ++ int ret; ++ ++ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, ++ PR_SHADOW_STACK_ENABLE); ++ if (ret == 0) ++ shadow_stack_enabled = true; ++} ++ ++#endif ++ + #ifndef __NR_map_shadow_stack + #define __NR_map_shadow_stack 453 + #endif +-- +2.34.1 + + +From 99d2e9efa5194a3fa58d64e8c89657952607ca49 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Wed, 26 Apr 2023 19:05:43 +0100 +Subject: [PATCH 38/47] kselftest/arm64: Add GCS as a detected feature in the + signal tests + +In preparation for testing GCS related signal handling add it as a feature +we check for in the signal handling support code. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + tools/testing/selftests/arm64/signal/test_signals.h | 2 ++ + tools/testing/selftests/arm64/signal/test_signals_utils.c | 3 +++ + 2 files changed, 5 insertions(+) + +diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h +index 1e6273d81575..7ada43688c02 100644 +--- a/tools/testing/selftests/arm64/signal/test_signals.h ++++ b/tools/testing/selftests/arm64/signal/test_signals.h +@@ -35,6 +35,7 @@ enum { + FSME_BIT, + FSME_FA64_BIT, + FSME2_BIT, ++ FGCS_BIT, + FMAX_END + }; + +@@ -43,6 +44,7 @@ enum { + #define FEAT_SME (1UL << FSME_BIT) + #define FEAT_SME_FA64 (1UL << FSME_FA64_BIT) + #define FEAT_SME2 (1UL << FSME2_BIT) ++#define FEAT_GCS (1UL << FGCS_BIT) + + /* + * A descriptor used to describe and configure a test case. +diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c +index 0dc948db3a4a..89ef95c1af0e 100644 +--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c ++++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c +@@ -30,6 +30,7 @@ static char const *const feats_names[FMAX_END] = { + " SME ", + " FA64 ", + " SME2 ", ++ " GCS ", + }; + + #define MAX_FEATS_SZ 128 +@@ -329,6 +330,8 @@ int test_init(struct tdescr *td) + td->feats_supported |= FEAT_SME_FA64; + if (getauxval(AT_HWCAP2) & HWCAP2_SME2) + td->feats_supported |= FEAT_SME2; ++ if (getauxval(AT_HWCAP2) & HWCAP2_GCS) ++ td->feats_supported |= FEAT_GCS; + if (feats_ok(td)) { + if (td->feats_required & td->feats_supported) + fprintf(stderr, +-- +2.34.1 + + +From 4d1754e3575bc4546e633d957977edb42718fd79 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Thu, 1 Jun 2023 17:52:08 +0100 +Subject: [PATCH 39/47] kselftest/arm64: Add framework support for GCS to + signal handling tests + +Teach the framework about the GCS signal context, avoiding warnings on +the unknown context. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + tools/testing/selftests/arm64/signal/testcases/testcases.c | 7 +++++++ + tools/testing/selftests/arm64/signal/testcases/testcases.h | 1 + + 2 files changed, 8 insertions(+) + +diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c +index 9f580b55b388..1cd124732be4 100644 +--- a/tools/testing/selftests/arm64/signal/testcases/testcases.c ++++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c +@@ -209,6 +209,13 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) + zt = (struct zt_context *)head; + new_flags |= ZT_CTX; + break; ++ case GCS_MAGIC: ++ if (flags & GCS_CTX) ++ *err = "Multiple GCS_MAGIC"; ++ if (head->size != sizeof(struct gcs_context)) ++ *err = "Bad size for gcs_context"; ++ new_flags |= GCS_CTX; ++ break; + case EXTRA_MAGIC: + if (flags & EXTRA_CTX) + *err = "Multiple EXTRA_MAGIC"; +diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h +index a08ab0d6207a..9b2599745c29 100644 +--- a/tools/testing/selftests/arm64/signal/testcases/testcases.h ++++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h +@@ -19,6 +19,7 @@ + #define ZA_CTX (1 << 2) + #define EXTRA_CTX (1 << 3) + #define ZT_CTX (1 << 4) ++#define GCS_CTX (1 << 5) + + #define KSFT_BAD_MAGIC 0xdeadbeef + +-- +2.34.1 + + +From 6cfe93b36e2ac080db62d44ed0bd8a6f972ceffa Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Wed, 5 Jul 2023 17:40:22 +0100 +Subject: [PATCH 40/47] kselftest/arm64: Allow signals tests to specify an + expected si_code + +Currently we ignore si_code unless the expected signal is a SIGSEGV, in +which case we enforce it being SEGV_ACCERR. Allow test cases to specify +exactly which si_code should be generated so we can validate this, and +test for other segfault codes. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + .../selftests/arm64/signal/test_signals.h | 4 +++ + .../arm64/signal/test_signals_utils.c | 29 ++++++++++++------- + 2 files changed, 23 insertions(+), 10 deletions(-) + +diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h +index 7ada43688c02..ee75a2c25ce7 100644 +--- a/tools/testing/selftests/arm64/signal/test_signals.h ++++ b/tools/testing/selftests/arm64/signal/test_signals.h +@@ -71,6 +71,10 @@ struct tdescr { + * Zero when no signal is expected on success + */ + int sig_ok; ++ /* ++ * expected si_code for sig_ok, or 0 to not check ++ */ ++ int sig_ok_code; + /* signum expected on unsupported CPU features. */ + int sig_unsupp; + /* a timeout in second for test completion */ +diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c +index 89ef95c1af0e..63deca32b0df 100644 +--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c ++++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c +@@ -143,16 +143,25 @@ static bool handle_signal_ok(struct tdescr *td, + "current->token ZEROED...test is probably broken!\n"); + abort(); + } +- /* +- * Trying to narrow down the SEGV to the ones generated by Kernel itself +- * via arm64_notify_segfault(). This is a best-effort check anyway, and +- * the si_code check may need to change if this aspect of the kernel +- * ABI changes. +- */ +- if (td->sig_ok == SIGSEGV && si->si_code != SEGV_ACCERR) { +- fprintf(stdout, +- "si_code != SEGV_ACCERR...test is probably broken!\n"); +- abort(); ++ if (td->sig_ok_code) { ++ if (si->si_code != td->sig_ok_code) { ++ fprintf(stdout, "si_code is %d not %d\n", ++ si->si_code, td->sig_ok_code); ++ abort(); ++ } ++ } else { ++ /* ++ * Trying to narrow down the SEGV to the ones ++ * generated by Kernel itself via ++ * arm64_notify_segfault(). This is a best-effort ++ * check anyway, and the si_code check may need to ++ * change if this aspect of the kernel ABI changes. ++ */ ++ if (td->sig_ok == SIGSEGV && si->si_code != SEGV_ACCERR) { ++ fprintf(stdout, ++ "si_code != SEGV_ACCERR...test is probably broken!\n"); ++ abort(); ++ } + } + td->pass = 1; + /* +-- +2.34.1 + + +From c48cd5f2f586f4bfc9b17e6101727933d428511e Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Wed, 5 Jul 2023 14:43:32 +0100 +Subject: [PATCH 41/47] kselftest/arm64: Always run signals tests with GCS + enabled + +Since it is not possible to return from the function that enabled GCS +without disabling GCS it is very inconvenient to use the signal handling +tests to cover GCS when GCS is not enabled by the toolchain and runtime, +something that no current distribution does. Since none of the testcases +do anything with stacks that would cause problems with GCS we can sidestep +this issue by unconditionally enabling GCS on startup and exiting with a +call to exit() rather than a return from main(). + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + .../selftests/arm64/signal/test_signals.c | 17 ++++++++++- + .../arm64/signal/test_signals_utils.h | 29 +++++++++++++++++++ + 2 files changed, 45 insertions(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/arm64/signal/test_signals.c b/tools/testing/selftests/arm64/signal/test_signals.c +index 00051b40d71e..30e95f50db19 100644 +--- a/tools/testing/selftests/arm64/signal/test_signals.c ++++ b/tools/testing/selftests/arm64/signal/test_signals.c +@@ -7,6 +7,10 @@ + * Each test provides its own tde struct tdescr descriptor to link with + * this wrapper. Framework provides common helpers. + */ ++ ++#include <sys/auxv.h> ++#include <sys/prctl.h> ++ + #include <kselftest.h> + + #include "test_signals.h" +@@ -16,6 +20,16 @@ struct tdescr *current = &tde; + + int main(int argc, char *argv[]) + { ++ /* ++ * Ensure GCS is at least enabled throughout the tests if ++ * supported, otherwise the inability to return from the ++ * function that enabled GCS makes it very inconvenient to set ++ * up test cases. The prctl() may fail if GCS was locked by ++ * libc setup code. ++ */ ++ if (getauxval(AT_HWCAP2) & HWCAP2_GCS) ++ gcs_set_state(PR_SHADOW_STACK_ENABLE); ++ + ksft_print_msg("%s :: %s\n", current->name, current->descr); + if (test_setup(current) && test_init(current)) { + test_run(current); +@@ -23,5 +37,6 @@ int main(int argc, char *argv[]) + } + test_result(current); + +- return current->result; ++ /* Do not return in case GCS was enabled */ ++ exit(current->result); + } +diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h +index 762c8fe9c54a..1e80808ee105 100644 +--- a/tools/testing/selftests/arm64/signal/test_signals_utils.h ++++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h +@@ -18,6 +18,35 @@ void test_cleanup(struct tdescr *td); + int test_run(struct tdescr *td); + void test_result(struct tdescr *td); + ++#ifndef __NR_prctl ++#define __NR_prctl 167 ++#endif ++ ++/* ++ * The prctl takes 1 argument but we need to ensure that the other ++ * values passed in registers to the syscall are zero since the kernel ++ * validates them. ++ */ ++#define gcs_set_state(state) \ ++ ({ \ ++ register long _num __asm__ ("x8") = __NR_prctl; \ ++ register long _arg1 __asm__ ("x0") = PR_SET_SHADOW_STACK_STATUS; \ ++ register long _arg2 __asm__ ("x1") = (long)(state); \ ++ register long _arg3 __asm__ ("x2") = 0; \ ++ register long _arg4 __asm__ ("x3") = 0; \ ++ register long _arg5 __asm__ ("x4") = 0; \ ++ \ ++ __asm__ volatile ( \ ++ "svc #0\n" \ ++ : "=r"(_arg1) \ ++ : "r"(_arg1), "r"(_arg2), \ ++ "r"(_arg3), "r"(_arg4), \ ++ "r"(_arg5), "r"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _arg1; \ ++ }) ++ + static inline bool feats_ok(struct tdescr *td) + { + if (td->feats_incompatible & td->feats_supported) +-- +2.34.1 + + +From dff5594c2072aa86c028171e06c5c706dc632a0c Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Thu, 6 Apr 2023 00:35:19 +0100 +Subject: [PATCH 42/47] kselftest/arm64: Add very basic GCS test program + +This test program just covers the basic GCS ABI, covering aspects of the +ABI as standalone features without attempting to integrate things. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + tools/testing/selftests/arm64/Makefile | 2 +- + tools/testing/selftests/arm64/gcs/.gitignore | 1 + + tools/testing/selftests/arm64/gcs/Makefile | 18 + + tools/testing/selftests/arm64/gcs/basic-gcs.c | 431 ++++++++++++++++++ + tools/testing/selftests/arm64/gcs/gcs-util.h | 90 ++++ + 5 files changed, 541 insertions(+), 1 deletion(-) + create mode 100644 tools/testing/selftests/arm64/gcs/.gitignore + create mode 100644 tools/testing/selftests/arm64/gcs/Makefile + create mode 100644 tools/testing/selftests/arm64/gcs/basic-gcs.c + create mode 100644 tools/testing/selftests/arm64/gcs/gcs-util.h + +diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile +index 28b93cab8c0d..22029e60eff3 100644 +--- a/tools/testing/selftests/arm64/Makefile ++++ b/tools/testing/selftests/arm64/Makefile +@@ -4,7 +4,7 @@ + ARCH ?= $(shell uname -m 2>/dev/null || echo not) + + ifneq (,$(filter $(ARCH),aarch64 arm64)) +-ARM64_SUBTARGETS ?= tags signal pauth fp mte bti abi ++ARM64_SUBTARGETS ?= tags signal pauth fp mte bti abi gcs + else + ARM64_SUBTARGETS := + endif +diff --git a/tools/testing/selftests/arm64/gcs/.gitignore b/tools/testing/selftests/arm64/gcs/.gitignore +new file mode 100644 +index 000000000000..0e5e695ecba5 +--- /dev/null ++++ b/tools/testing/selftests/arm64/gcs/.gitignore +@@ -0,0 +1 @@ ++basic-gcs +diff --git a/tools/testing/selftests/arm64/gcs/Makefile b/tools/testing/selftests/arm64/gcs/Makefile +new file mode 100644 +index 000000000000..61a30f483429 +--- /dev/null ++++ b/tools/testing/selftests/arm64/gcs/Makefile +@@ -0,0 +1,18 @@ ++# SPDX-License-Identifier: GPL-2.0 ++# Copyright (C) 2023 ARM Limited ++# ++# In order to avoid interaction with the toolchain and dynamic linker the ++# portions of these tests that interact with the GCS are implemented using ++# nolibc. ++# ++ ++TEST_GEN_PROGS := basic-gcs ++ ++include ../../lib.mk ++ ++$(OUTPUT)/basic-gcs: basic-gcs.c ++ $(CC) -g -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ ++ -static -include ../../../../include/nolibc/nolibc.h \ ++ -I../../../../../usr/include \ ++ -std=gnu99 -I../.. -g \ ++ -ffreestanding -Wall $^ -o $@ -lgcc +diff --git a/tools/testing/selftests/arm64/gcs/basic-gcs.c b/tools/testing/selftests/arm64/gcs/basic-gcs.c +new file mode 100644 +index 000000000000..b3522d606a58 +--- /dev/null ++++ b/tools/testing/selftests/arm64/gcs/basic-gcs.c +@@ -0,0 +1,431 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2023 ARM Limited. ++ */ ++ ++#include <limits.h> ++#include <stdbool.h> ++ ++#include <linux/prctl.h> ++ ++#include <sys/mman.h> ++#include <asm/mman.h> ++#include <linux/sched.h> ++ ++#include "kselftest.h" ++#include "gcs-util.h" ++ ++/* nolibc doesn't have sysconf(), just hard code the maximum */ ++static size_t page_size = 65536; ++ ++static __attribute__((noinline)) void valid_gcs_function(void) ++{ ++ /* Do something the compiler can't optimise out */ ++ my_syscall1(__NR_prctl, PR_SVE_GET_VL); ++} ++ ++static inline int gcs_set_status(unsigned long mode) ++{ ++ bool enabling = mode & PR_SHADOW_STACK_ENABLE; ++ int ret; ++ unsigned long new_mode; ++ ++ /* ++ * The prctl takes 1 argument but we need to ensure that the ++ * other 3 values passed in registers to the syscall are zero ++ * since the kernel validates them. ++ */ ++ ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, mode, ++ 0, 0, 0); ++ ++ if (ret == 0) { ++ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, ++ &new_mode, 0, 0, 0); ++ if (ret == 0) { ++ if (new_mode != mode) { ++ ksft_print_msg("Mode set to %x not %x\n", ++ new_mode, mode); ++ ret = -EINVAL; ++ } ++ } else { ++ ksft_print_msg("Failed to validate mode: %d\n", ret); ++ } ++ ++ if (enabling != chkfeat_gcs()) { ++ ksft_print_msg("%senabled by prctl but %senabled in CHKFEAT\n", ++ enabling ? "" : "not ", ++ chkfeat_gcs() ? "" : "not "); ++ ret = -EINVAL; ++ } ++ } ++ ++ return ret; ++} ++ ++/* Try to read the status */ ++static bool read_status(void) ++{ ++ unsigned long state; ++ int ret; ++ ++ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, ++ &state, 0, 0, 0); ++ if (ret != 0) { ++ ksft_print_msg("Failed to read state: %d\n", ret); ++ return false; ++ } ++ ++ return state & PR_SHADOW_STACK_ENABLE; ++} ++ ++/* Just a straight enable */ ++static bool base_enable(void) ++{ ++ int ret; ++ ++ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE); ++ if (ret) { ++ ksft_print_msg("PR_SHADOW_STACK_ENABLE failed %d\n", ret); ++ return false; ++ } ++ ++ return true; ++} ++ ++/* Check we can read GCSPR_EL0 when GCS is enabled */ ++static bool read_gcspr_el0(void) ++{ ++ unsigned long *gcspr_el0; ++ ++ ksft_print_msg("GET GCSPR\n"); ++ gcspr_el0 = get_gcspr(); ++ ksft_print_msg("GCSPR_EL0 is %p\n", gcspr_el0); ++ ++ return true; ++} ++ ++/* Also allow writes to stack */ ++static bool enable_writeable(void) ++{ ++ int ret; ++ ++ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE); ++ if (ret) { ++ ksft_print_msg("PR_SHADOW_STACK_ENABLE writeable failed: %d\n", ret); ++ return false; ++ } ++ ++ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE); ++ if (ret) { ++ ksft_print_msg("failed to restore plain enable %d\n", ret); ++ return false; ++ } ++ ++ return true; ++} ++ ++/* Also allow writes to stack */ ++static bool enable_push_pop(void) ++{ ++ int ret; ++ ++ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH); ++ if (ret) { ++ ksft_print_msg("PR_SHADOW_STACK_ENABLE with push failed: %d\n", ++ ret); ++ return false; ++ } ++ ++ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE); ++ if (ret) { ++ ksft_print_msg("failed to restore plain enable %d\n", ret); ++ return false; ++ } ++ ++ return true; ++} ++ ++/* Enable GCS and allow everything */ ++static bool enable_all(void) ++{ ++ int ret; ++ ++ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH | ++ PR_SHADOW_STACK_WRITE); ++ if (ret) { ++ ksft_print_msg("PR_SHADOW_STACK_ENABLE with everything failed: %d\n", ++ ret); ++ return false; ++ } ++ ++ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE); ++ if (ret) { ++ ksft_print_msg("failed to restore plain enable %d\n", ret); ++ return false; ++ } ++ ++ return true; ++} ++ ++static bool enable_invalid(void) ++{ ++ int ret = gcs_set_status(ULONG_MAX); ++ if (ret == 0) { ++ ksft_print_msg("GCS_SET_STATUS %lx succeeded\n", ULONG_MAX); ++ return false; ++ } ++ ++ return true; ++} ++ ++/* Map a GCS */ ++static bool map_guarded_stack(void) ++{ ++ int ret; ++ uint64_t *buf; ++ uint64_t expected_cap; ++ int elem; ++ bool pass = true; ++ ++ buf = (void *)my_syscall3(__NR_map_shadow_stack, 0, page_size, ++ SHADOW_STACK_SET_MARKER | ++ SHADOW_STACK_SET_TOKEN); ++ if (buf == MAP_FAILED) { ++ ksft_print_msg("Failed to map %d byte GCS: %d\n", ++ page_size, errno); ++ return false; ++ } ++ ksft_print_msg("Mapped GCS at %p-%p\n", buf, ++ (uint64_t)buf + page_size); ++ ++ /* The top of the newly allocated region should be 0 */ ++ elem = (page_size / sizeof(uint64_t)) - 1; ++ if (buf[elem]) { ++ ksft_print_msg("Last entry is 0x%lx not 0x0\n", buf[elem]); ++ pass = false; ++ } ++ ++ /* Then a valid cap token */ ++ elem--; ++ expected_cap = ((uint64_t)buf + page_size - 16); ++ expected_cap &= GCS_CAP_ADDR_MASK; ++ expected_cap |= GCS_CAP_VALID_TOKEN; ++ if (buf[elem] != expected_cap) { ++ ksft_print_msg("Cap entry is 0x%lx not 0x%lx\n", ++ buf[elem], expected_cap); ++ pass = false; ++ } ++ ksft_print_msg("cap token is 0x%lx\n", buf[elem]); ++ ++ /* The rest should be zeros */ ++ for (elem = 0; elem < page_size / sizeof(uint64_t) - 2; elem++) { ++ if (!buf[elem]) ++ continue; ++ ksft_print_msg("GCS slot %d is 0x%lx not 0x0\n", ++ elem, buf[elem]); ++ pass = false; ++ } ++ ++ ret = munmap(buf, page_size); ++ if (ret != 0) { ++ ksft_print_msg("Failed to unmap %d byte GCS: %d\n", ++ page_size, errno); ++ pass = false; ++ } ++ ++ return pass; ++} ++ ++/* A fork()ed process can run */ ++static bool test_fork(void) ++{ ++ unsigned long child_mode; ++ int ret, status; ++ pid_t pid; ++ bool pass = true; ++ ++ pid = fork(); ++ if (pid == -1) { ++ ksft_print_msg("fork() failed: %d\n", errno); ++ pass = false; ++ goto out; ++ } ++ if (pid == 0) { ++ /* In child, make sure we can call a function, read ++ * the GCS pointer and status and then exit */ ++ valid_gcs_function(); ++ get_gcspr(); ++ ++ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, ++ &child_mode, 0, 0, 0); ++ if (ret == 0 && !(child_mode & PR_SHADOW_STACK_ENABLE)) { ++ ksft_print_msg("GCS not enabled in child\n"); ++ ret = -EINVAL; ++ } ++ ++ exit(ret); ++ } ++ ++ /* ++ * In parent, check we can still do function calls then block ++ * for the child. ++ */ ++ valid_gcs_function(); ++ ++ ksft_print_msg("Waiting for child %d\n", pid); ++ ++ ret = waitpid(pid, &status, 0); ++ if (ret == -1) { ++ ksft_print_msg("Failed to wait for child: %d\n", ++ errno); ++ return false; ++ } ++ ++ if (!WIFEXITED(status)) { ++ ksft_print_msg("Child exited due to signal %d\n", ++ WTERMSIG(status)); ++ pass = false; ++ } else { ++ if (WEXITSTATUS(status)) { ++ ksft_print_msg("Child exited with status %d\n", ++ WEXITSTATUS(status)); ++ pass = false; ++ } ++ } ++ ++out: ++ ++ return pass; ++} ++ ++/* Check that we can explicitly specify a GCS via clone3() */ ++static bool test_clone3(void) ++{ ++ struct clone_args args; ++ unsigned long child_mode; ++ pid_t pid = -1; ++ int status, ret; ++ bool pass; ++ ++ memset(&args, 0, sizeof(args)); ++ args.flags = CLONE_VM; ++ args.shadow_stack = my_syscall3(__NR_map_shadow_stack, 0, page_size, ++ SHADOW_STACK_SET_MARKER | ++ SHADOW_STACK_SET_TOKEN); ++ args.shadow_stack_size = page_size; ++ ++ pid = my_syscall2(__NR_clone3, &args, sizeof(args)); ++ if (pid < 0) { ++ ksft_print_msg("clone3() failed: %d\n", errno); ++ pass = false; ++ goto out; ++ } ++ ++ /* In child? */ ++ if (pid == 0) { ++ /* Do we have GCS enabled? */ ++ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, ++ &child_mode, 0, 0, 0); ++ if (ret != 0) { ++ ksft_print_msg("PR_GET_SHADOW_STACK_STATUS failed: %d\n", ++ ret); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (!(child_mode & PR_SHADOW_STACK_ENABLE)) { ++ ksft_print_msg("GCS not enabled in child\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ ksft_print_msg("GCS enabled in child\n"); ++ ++ /* We've probably already called a function but make sure */ ++ valid_gcs_function(); ++ ++ exit(EXIT_SUCCESS); ++ } ++ ++ if (waitpid(-1, &status, __WALL) < 0) { ++ ksft_print_msg("waitpid() failed %d\n", errno); ++ pass = false; ++ goto out; ++ } ++ if (WIFEXITED(status)) { ++ if (WEXITSTATUS(status) == EXIT_SUCCESS) { ++ pass = true; ++ } else { ++ ksft_print_msg("Child returned status %d\n", ++ WEXITSTATUS(status)); ++ pass = false; ++ } ++ } else if (WIFSIGNALED(status)) { ++ ksft_print_msg("Child exited due to signal %d\n", ++ WTERMSIG(status)); ++ pass = false; ++ } else { ++ ksft_print_msg("Child exited uncleanly\n"); ++ pass = false; ++ } ++ ++out: ++ return pass; ++} ++ ++typedef bool (*gcs_test)(void); ++ ++static struct { ++ char *name; ++ gcs_test test; ++ bool needs_enable; ++} tests[] = { ++ { "read_status", read_status }, ++ { "base_enable", base_enable, true }, ++ { "read_gcspr_el0", read_gcspr_el0 }, ++ { "enable_writeable", enable_writeable, true }, ++ { "enable_push_pop", enable_push_pop, true }, ++ { "enable_all", enable_all, true }, ++ { "enable_invalid", enable_invalid, true }, ++ { "map_guarded_stack", map_guarded_stack }, ++ { "fork", test_fork }, ++ { "clone3", test_clone3 }, ++}; ++ ++int main(void) ++{ ++ int i, ret; ++ unsigned long gcs_mode; ++ ++ ksft_print_header(); ++ ++ /* ++ * We don't have getauxval() with nolibc so treat a failure to ++ * read GCS state as a lack of support and skip. ++ */ ++ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, ++ &gcs_mode, 0, 0, 0); ++ if (ret != 0) ++ ksft_exit_skip("Failed to read GCS state: %d\n", ret); ++ ++ if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) { ++ gcs_mode = PR_SHADOW_STACK_ENABLE; ++ ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, ++ gcs_mode, 0, 0, 0); ++ if (ret != 0) ++ ksft_exit_fail_msg("Failed to enable GCS: %d\n", ret); ++ } ++ ++ ksft_set_plan(ARRAY_SIZE(tests)); ++ ++ for (i = 0; i < ARRAY_SIZE(tests); i++) { ++ ksft_test_result((*tests[i].test)(), "%s\n", tests[i].name); ++ } ++ ++ /* One last test: disable GCS, we can do this one time */ ++ my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0); ++ if (ret != 0) ++ ksft_print_msg("Failed to disable GCS: %d\n", ret); ++ ++ ksft_finished(); ++ ++ return 0; ++} +diff --git a/tools/testing/selftests/arm64/gcs/gcs-util.h b/tools/testing/selftests/arm64/gcs/gcs-util.h +new file mode 100644 +index 000000000000..b37801c95604 +--- /dev/null ++++ b/tools/testing/selftests/arm64/gcs/gcs-util.h +@@ -0,0 +1,90 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* ++ * Copyright (C) 2023 ARM Limited. ++ */ ++ ++#ifndef GCS_UTIL_H ++#define GCS_UTIL_H ++ ++#include <stdbool.h> ++ ++#ifndef __NR_map_shadow_stack ++#define __NR_map_shadow_stack 453 ++#endif ++ ++#ifndef __NR_prctl ++#define __NR_prctl 167 ++#endif ++ ++/* Shadow Stack/Guarded Control Stack interface */ ++#define PR_GET_SHADOW_STACK_STATUS 71 ++#define PR_SET_SHADOW_STACK_STATUS 72 ++#define PR_LOCK_SHADOW_STACK_STATUS 73 ++ ++# define PR_SHADOW_STACK_ENABLE (1UL << 0) ++# define PR_SHADOW_STACK_WRITE (1UL << 1) ++# define PR_SHADOW_STACK_PUSH (1UL << 2) ++ ++#define PR_SHADOW_STACK_ALL_MODES \ ++ PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH ++ ++#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */ ++#define SHADOW_STACK_SET_MARKER (1ULL << 1) /* Set up a top of stack merker in the shadow stack */ ++ ++#define GCS_CAP_ADDR_MASK (0xfffffffffffff000UL) ++#define GCS_CAP_TOKEN_MASK (0x0000000000000fffUL) ++#define GCS_CAP_VALID_TOKEN 1 ++#define GCS_CAP_IN_PROGRESS_TOKEN 5 ++ ++#define GCS_CAP(x) (((unsigned long)(x) & GCS_CAP_ADDR_MASK) | \ ++ GCS_CAP_VALID_TOKEN) ++ ++static inline unsigned long *get_gcspr(void) ++{ ++ unsigned long *gcspr; ++ ++ asm volatile( ++ "mrs %0, S3_3_C2_C5_1" ++ : "=r" (gcspr) ++ : ++ : "cc"); ++ ++ return gcspr; ++} ++ ++static inline void __attribute__((always_inline)) gcsss1(unsigned long *Xt) ++{ ++ asm volatile ( ++ "sys #3, C7, C7, #2, %0\n" ++ : ++ : "rZ" (Xt) ++ : "memory"); ++} ++ ++static inline unsigned long __attribute__((always_inline)) *gcsss2(void) ++{ ++ unsigned long *Xt; ++ ++ asm volatile( ++ "SYSL %0, #3, C7, C7, #3\n" ++ : "=r" (Xt) ++ : ++ : "memory"); ++ ++ return Xt; ++} ++ ++static inline bool chkfeat_gcs(void) ++{ ++ register long val __asm__ ("x16") = 1; ++ ++ /* CHKFEAT x16 */ ++ asm volatile( ++ "hint #0x28\n" ++ : "=r" (val) ++ : "r" (val)); ++ ++ return val != 1; ++} ++ ++#endif +-- +2.34.1 + + +From 21b5f923dd2284877481d62e62994edfae826f71 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Fri, 28 Apr 2023 18:06:06 +0100 +Subject: [PATCH 43/47] kselftest/arm64: Add a GCS test program built with the + system libc + +There are things like threads which nolibc struggles with which we want +to add coverage for, and the ABI allows us to test most of these even if +libc itself does not understand GCS so add a test application built +using the system libc. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + tools/testing/selftests/arm64/gcs/.gitignore | 1 + + tools/testing/selftests/arm64/gcs/Makefile | 4 +- + tools/testing/selftests/arm64/gcs/gcs-util.h | 10 + + tools/testing/selftests/arm64/gcs/libc-gcs.c | 736 +++++++++++++++++++ + 4 files changed, 750 insertions(+), 1 deletion(-) + create mode 100644 tools/testing/selftests/arm64/gcs/libc-gcs.c + +diff --git a/tools/testing/selftests/arm64/gcs/.gitignore b/tools/testing/selftests/arm64/gcs/.gitignore +index 0e5e695ecba5..5810c4a163d4 100644 +--- a/tools/testing/selftests/arm64/gcs/.gitignore ++++ b/tools/testing/selftests/arm64/gcs/.gitignore +@@ -1 +1,2 @@ + basic-gcs ++libc-gcs +diff --git a/tools/testing/selftests/arm64/gcs/Makefile b/tools/testing/selftests/arm64/gcs/Makefile +index 61a30f483429..a8fdf21e9a47 100644 +--- a/tools/testing/selftests/arm64/gcs/Makefile ++++ b/tools/testing/selftests/arm64/gcs/Makefile +@@ -6,7 +6,9 @@ + # nolibc. + # + +-TEST_GEN_PROGS := basic-gcs ++TEST_GEN_PROGS := basic-gcs libc-gcs ++ ++LDLIBS+=-lpthread + + include ../../lib.mk + +diff --git a/tools/testing/selftests/arm64/gcs/gcs-util.h b/tools/testing/selftests/arm64/gcs/gcs-util.h +index b37801c95604..4bafd1d7feb5 100644 +--- a/tools/testing/selftests/arm64/gcs/gcs-util.h ++++ b/tools/testing/selftests/arm64/gcs/gcs-util.h +@@ -16,6 +16,16 @@ + #define __NR_prctl 167 + #endif + ++#ifndef NT_ARM_GCS ++#define NT_ARM_GCS 0x40e ++ ++struct user_gcs { ++ __u64 features_enabled; ++ __u64 features_locked; ++ __u64 gcspr_el0; ++}; ++#endif ++ + /* Shadow Stack/Guarded Control Stack interface */ + #define PR_GET_SHADOW_STACK_STATUS 71 + #define PR_SET_SHADOW_STACK_STATUS 72 +diff --git a/tools/testing/selftests/arm64/gcs/libc-gcs.c b/tools/testing/selftests/arm64/gcs/libc-gcs.c +new file mode 100644 +index 000000000000..937f8bee7bdd +--- /dev/null ++++ b/tools/testing/selftests/arm64/gcs/libc-gcs.c +@@ -0,0 +1,736 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2023 ARM Limited. ++ */ ++ ++#define _GNU_SOURCE ++ ++#include <pthread.h> ++#include <stdbool.h> ++ ++#include <sys/auxv.h> ++#include <sys/mman.h> ++#include <sys/prctl.h> ++#include <sys/ptrace.h> ++#include <sys/uio.h> ++ ++#include <asm/hwcap.h> ++#include <asm/mman.h> ++ ++#include <linux/compiler.h> ++ ++#include "kselftest_harness.h" ++ ++#include "gcs-util.h" ++ ++#define my_syscall2(num, arg1, arg2) \ ++({ \ ++ register long _num __asm__ ("x8") = (num); \ ++ register long _arg1 __asm__ ("x0") = (long)(arg1); \ ++ register long _arg2 __asm__ ("x1") = (long)(arg2); \ ++ register long _arg3 __asm__ ("x2") = 0; \ ++ register long _arg4 __asm__ ("x3") = 0; \ ++ register long _arg5 __asm__ ("x4") = 0; \ ++ \ ++ __asm__ volatile ( \ ++ "svc #0\n" \ ++ : "=r"(_arg1) \ ++ : "r"(_arg1), "r"(_arg2), \ ++ "r"(_arg3), "r"(_arg4), \ ++ "r"(_arg5), "r"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _arg1; \ ++}) ++ ++static noinline void gcs_recurse(int depth) ++{ ++ if (depth) ++ gcs_recurse(depth - 1); ++ ++ /* Prevent tail call optimization so we actually recurse */ ++ asm volatile("dsb sy" : : : "memory"); ++} ++ ++/* Smoke test that a function call and return works*/ ++TEST(can_call_function) ++{ ++ gcs_recurse(0); ++} ++ ++static void *gcs_test_thread(void *arg) ++{ ++ int ret; ++ unsigned long mode; ++ ++ /* ++ * Some libcs don't seem to fill unused arguments with 0 but ++ * the kernel validates this so we supply all 5 arguments. ++ */ ++ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); ++ if (ret != 0) { ++ ksft_print_msg("PR_GET_SHADOW_STACK_STATUS failed: %d\n", ret); ++ return NULL; ++ } ++ ++ if (!(mode & PR_SHADOW_STACK_ENABLE)) { ++ ksft_print_msg("GCS not enabled in thread, mode is %u\n", ++ mode); ++ return NULL; ++ } ++ ++ /* Just in case... */ ++ gcs_recurse(0); ++ ++ /* Use a non-NULL value to indicate a pass */ ++ return &gcs_test_thread; ++} ++ ++/* Verify that if we start a new thread it has GCS enabled */ ++TEST(gcs_enabled_thread) ++{ ++ pthread_t thread; ++ void *thread_ret; ++ int ret; ++ ++ ret = pthread_create(&thread, NULL, gcs_test_thread, NULL); ++ ASSERT_TRUE(ret == 0); ++ if (ret != 0) ++ return; ++ ++ ret = pthread_join(thread, &thread_ret); ++ ASSERT_TRUE(ret == 0); ++ if (ret != 0) ++ return; ++ ++ ASSERT_TRUE(thread_ret != NULL); ++} ++ ++/* Read the GCS until we find the terminator */ ++TEST(gcs_find_terminator) ++{ ++ unsigned long *gcs, *cur; ++ ++ gcs = get_gcspr(); ++ cur = gcs; ++ while (*cur) ++ cur++; ++ ++ ksft_print_msg("GCS in use from %p-%p\n", gcs, cur); ++ ++ /* ++ * We should have at least whatever called into this test so ++ * the two pointer should differ. ++ */ ++ ASSERT_TRUE(gcs != cur); ++} ++ ++/* ++ * We can access a GCS via ptrace ++ * ++ * This could usefully have a fixture but note that each test is ++ * fork()ed into a new child whcih causes issues. Might be better to ++ * lift at least some of this out into a separate, non-harness, test ++ * program. ++ */ ++TEST(ptrace_read_write) ++{ ++ pid_t child, pid; ++ int ret, status; ++ siginfo_t si; ++ uint64_t val, rval, gcspr; ++ struct user_gcs child_gcs; ++ struct iovec iov, local_iov, remote_iov; ++ ++ child = fork(); ++ if (child == -1) { ++ ksft_print_msg("fork() failed: %d (%s)\n", ++ errno, strerror(errno)); ++ ASSERT_NE(child, -1); ++ } ++ ++ if (child == 0) { ++ /* ++ * In child, make sure there's something on the stack and ++ * ask to be traced. ++ */ ++ gcs_recurse(0); ++ if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) ++ ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); ++ ++ if (raise(SIGSTOP)) ++ ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); ++ ++ return; ++ } ++ ++ ksft_print_msg("Child: %d\n", child); ++ ++ /* Attach to the child */ ++ while (1) { ++ int sig; ++ ++ pid = wait(&status); ++ if (pid == -1) { ++ ksft_print_msg("wait() failed: %s", ++ strerror(errno)); ++ goto error; ++ } ++ ++ /* ++ * This should never happen but it's hard to flag in ++ * the framework. ++ */ ++ if (pid != child) ++ continue; ++ ++ if (WIFEXITED(status) || WIFSIGNALED(status)) ++ ksft_exit_fail_msg("Child died unexpectedly\n"); ++ ++ if (!WIFSTOPPED(status)) ++ goto error; ++ ++ sig = WSTOPSIG(status); ++ ++ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) { ++ if (errno == ESRCH) { ++ ASSERT_NE(errno, ESRCH); ++ return; ++ } ++ ++ if (errno == EINVAL) { ++ sig = 0; /* bust group-stop */ ++ goto cont; ++ } ++ ++ ksft_print_msg("PTRACE_GETSIGINFO: %s\n", ++ strerror(errno)); ++ goto error; ++ } ++ ++ if (sig == SIGSTOP && si.si_code == SI_TKILL && ++ si.si_pid == pid) ++ break; ++ ++ cont: ++ if (ptrace(PTRACE_CONT, pid, NULL, sig)) { ++ if (errno == ESRCH) { ++ ASSERT_NE(errno, ESRCH); ++ return; ++ } ++ ++ ksft_print_msg("PTRACE_CONT: %s\n", strerror(errno)); ++ goto error; ++ } ++ } ++ ++ /* Where is the child GCS? */ ++ iov.iov_base = &child_gcs; ++ iov.iov_len = sizeof(child_gcs); ++ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_GCS, &iov); ++ if (ret != 0) { ++ ksft_print_msg("Failed to read child GCS state: %s (%d)\n", ++ strerror(errno), errno); ++ goto error; ++ } ++ ++ /* We should have inherited GCS over fork(), confirm */ ++ if (!(child_gcs.features_enabled & PR_SHADOW_STACK_ENABLE)) { ++ ASSERT_TRUE(child_gcs.features_enabled & ++ PR_SHADOW_STACK_ENABLE); ++ goto error; ++ } ++ ++ gcspr = child_gcs.gcspr_el0; ++ ksft_print_msg("Child GCSPR 0x%lx, flags %x, locked %x\n", ++ gcspr, child_gcs.features_enabled, ++ child_gcs.features_locked); ++ ++ /* Ideally we'd cross check with the child memory map */ ++ ++ errno = 0; ++ val = ptrace(PTRACE_PEEKDATA, child, (void *)gcspr, NULL); ++ ret = errno; ++ if (ret != 0) ++ ksft_print_msg("PTRACE_PEEKDATA failed: %s (%d)\n", ++ strerror(ret), ret); ++ EXPECT_EQ(ret, 0); ++ ++ /* The child should be in a function, the GCSPR shouldn't be 0 */ ++ EXPECT_NE(val, 0); ++ ++ /* Same thing via process_vm_readv() */ ++ local_iov.iov_base = &rval; ++ local_iov.iov_len = sizeof(rval); ++ remote_iov.iov_base = (void *)gcspr; ++ remote_iov.iov_len = sizeof(rval); ++ ret = process_vm_readv(child, &local_iov, 1, &remote_iov, 1, 0); ++ if (ret == -1) ++ ksft_print_msg("process_vm_readv() failed: %s (%d)\n", ++ strerror(errno), errno); ++ EXPECT_EQ(ret, sizeof(rval)); ++ EXPECT_EQ(val, rval); ++ ++ /* Write data via a peek */ ++ ret = ptrace(PTRACE_POKEDATA, child, (void *)gcspr, NULL); ++ if (ret == -1) ++ ksft_print_msg("PTRACE_POKEDATA failed: %s (%d)\n", ++ strerror(errno), errno); ++ EXPECT_EQ(ret, 0); ++ EXPECT_EQ(0, ptrace(PTRACE_PEEKDATA, child, (void *)gcspr, NULL)); ++ ++ /* Restore what we had before */ ++ ret = ptrace(PTRACE_POKEDATA, child, (void *)gcspr, val); ++ if (ret == -1) ++ ksft_print_msg("PTRACE_POKEDATA failed: %s (%d)\n", ++ strerror(errno), errno); ++ EXPECT_EQ(ret, 0); ++ EXPECT_EQ(val, ptrace(PTRACE_PEEKDATA, child, (void *)gcspr, NULL)); ++ ++ /* That's all, folks */ ++ kill(child, SIGKILL); ++ return; ++ ++error: ++ kill(child, SIGKILL); ++ ASSERT_FALSE(true); ++} ++ ++FIXTURE(map_gcs) ++{ ++ unsigned long *stack; ++}; ++ ++FIXTURE_VARIANT(map_gcs) ++{ ++ size_t stack_size; ++ unsigned long flags; ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s2k_cap_marker) ++{ ++ .stack_size = 2 * 1024, ++ .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s2k_cap) ++{ ++ .stack_size = 2 * 1024, ++ .flags = SHADOW_STACK_SET_TOKEN, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s2k_marker) ++{ ++ .stack_size = 2 * 1024, ++ .flags = SHADOW_STACK_SET_MARKER, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s2k) ++{ ++ .stack_size = 2 * 1024, ++ .flags = 0, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s4k_cap_marker) ++{ ++ .stack_size = 4 * 1024, ++ .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s4k_cap) ++{ ++ .stack_size = 4 * 1024, ++ .flags = SHADOW_STACK_SET_TOKEN, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s3k_marker) ++{ ++ .stack_size = 4 * 1024, ++ .flags = SHADOW_STACK_SET_MARKER, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s4k) ++{ ++ .stack_size = 4 * 1024, ++ .flags = 0, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s16k_cap_marker) ++{ ++ .stack_size = 16 * 1024, ++ .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s16k_cap) ++{ ++ .stack_size = 16 * 1024, ++ .flags = SHADOW_STACK_SET_TOKEN, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s16k_marker) ++{ ++ .stack_size = 16 * 1024, ++ .flags = SHADOW_STACK_SET_MARKER, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s16k) ++{ ++ .stack_size = 16 * 1024, ++ .flags = 0, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s64k_cap_marker) ++{ ++ .stack_size = 64 * 1024, ++ .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s64k_cap) ++{ ++ .stack_size = 64 * 1024, ++ .flags = SHADOW_STACK_SET_TOKEN, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s64k_marker) ++{ ++ .stack_size = 64 * 1024, ++ .flags = SHADOW_STACK_SET_MARKER, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s64k) ++{ ++ .stack_size = 64 * 1024, ++ .flags = 0, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s128k_cap_marker) ++{ ++ .stack_size = 128 * 1024, ++ .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s128k_cap) ++{ ++ .stack_size = 128 * 1024, ++ .flags = SHADOW_STACK_SET_TOKEN, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s128k_marker) ++{ ++ .stack_size = 128 * 1024, ++ .flags = SHADOW_STACK_SET_MARKER, ++}; ++ ++FIXTURE_VARIANT_ADD(map_gcs, s128k) ++{ ++ .stack_size = 128 * 1024, ++ .flags = 0, ++}; ++ ++FIXTURE_SETUP(map_gcs) ++{ ++ self->stack = (void *)syscall(__NR_map_shadow_stack, 0, ++ variant->stack_size, ++ variant->flags); ++ ASSERT_FALSE(self->stack == MAP_FAILED); ++ ksft_print_msg("Allocated stack from %p-%p\n", self->stack, ++ (unsigned long)self->stack + variant->stack_size); ++} ++ ++FIXTURE_TEARDOWN(map_gcs) ++{ ++ int ret; ++ ++ if (self->stack != MAP_FAILED) { ++ ret = munmap(self->stack, variant->stack_size); ++ ASSERT_EQ(ret, 0); ++ } ++} ++ ++/* The stack has a cap token */ ++TEST_F(map_gcs, stack_capped) ++{ ++ unsigned long *stack = self->stack; ++ size_t cap_index; ++ ++ cap_index = (variant->stack_size / sizeof(unsigned long)); ++ ++ switch (variant->flags & (SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN)) { ++ case SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN: ++ cap_index -= 2; ++ break; ++ case SHADOW_STACK_SET_TOKEN: ++ cap_index -= 1; ++ break; ++ case SHADOW_STACK_SET_MARKER: ++ case 0: ++ /* No cap, no test */ ++ return; ++ } ++ ++ ASSERT_EQ(stack[cap_index], GCS_CAP(&stack[cap_index])); ++} ++ ++/* The top of the stack is 0 */ ++TEST_F(map_gcs, stack_terminated) ++{ ++ unsigned long *stack = self->stack; ++ size_t term_index; ++ ++ if (!(variant->flags & SHADOW_STACK_SET_MARKER)) ++ return; ++ ++ term_index = (variant->stack_size / sizeof(unsigned long)) - 1; ++ ++ ASSERT_EQ(stack[term_index], 0); ++} ++ ++/* Writes should fault */ ++TEST_F_SIGNAL(map_gcs, not_writeable, SIGSEGV) ++{ ++ self->stack[0] = 0; ++} ++ ++/* Put it all together, we can safely switch to and from the stack */ ++TEST_F(map_gcs, stack_switch) ++{ ++ size_t cap_index; ++ cap_index = (variant->stack_size / sizeof(unsigned long)); ++ unsigned long *orig_gcspr_el0, *pivot_gcspr_el0; ++ ++ /* Skip over the stack terminator and point at the cap */ ++ switch (variant->flags & (SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN)) { ++ case SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN: ++ cap_index -= 2; ++ break; ++ case SHADOW_STACK_SET_TOKEN: ++ cap_index -= 1; ++ break; ++ case SHADOW_STACK_SET_MARKER: ++ case 0: ++ /* No cap, no test */ ++ return; ++ } ++ pivot_gcspr_el0 = &self->stack[cap_index]; ++ ++ /* Pivot to the new GCS */ ++ ksft_print_msg("Pivoting to %p from %p, target has value 0x%lx\n", ++ pivot_gcspr_el0, get_gcspr(), ++ *pivot_gcspr_el0); ++ gcsss1(pivot_gcspr_el0); ++ orig_gcspr_el0 = gcsss2(); ++ ksft_print_msg("Pivoted to %p from %p, target has value 0x%lx\n", ++ get_gcspr(), orig_gcspr_el0, ++ *pivot_gcspr_el0); ++ ++ ksft_print_msg("Pivoted, GCSPR_EL0 now %p\n", get_gcspr()); ++ ++ /* New GCS must be in the new buffer */ ++ ASSERT_TRUE((unsigned long)get_gcspr() > (unsigned long)self->stack); ++ ASSERT_TRUE((unsigned long)get_gcspr() <= ++ (unsigned long)self->stack + variant->stack_size); ++ ++ /* We should be able to use all but 2 slots of the new stack */ ++ ksft_print_msg("Recursing %d levels\n", cap_index - 1); ++ gcs_recurse(cap_index - 1); ++ ++ /* Pivot back to the original GCS */ ++ gcsss1(orig_gcspr_el0); ++ pivot_gcspr_el0 = gcsss2(); ++ ++ gcs_recurse(0); ++ ksft_print_msg("Pivoted back to GCSPR_EL0 0x%lx\n", get_gcspr()); ++} ++ ++/* We fault if we try to go beyond the end of the stack */ ++TEST_F_SIGNAL(map_gcs, stack_overflow, SIGSEGV) ++{ ++ size_t cap_index; ++ cap_index = (variant->stack_size / sizeof(unsigned long)); ++ unsigned long *orig_gcspr_el0, *pivot_gcspr_el0; ++ ++ /* Skip over the stack terminator and point at the cap */ ++ switch (variant->flags & (SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN)) { ++ case SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN: ++ cap_index -= 2; ++ break; ++ case SHADOW_STACK_SET_TOKEN: ++ cap_index -= 1; ++ break; ++ case SHADOW_STACK_SET_MARKER: ++ case 0: ++ /* No cap, no test but we need to SEGV to avoid a false fail */ ++ orig_gcspr_el0 = get_gcspr(); ++ *orig_gcspr_el0 = 0; ++ return; ++ } ++ pivot_gcspr_el0 = &self->stack[cap_index]; ++ ++ /* Pivot to the new GCS */ ++ ksft_print_msg("Pivoting to %p from %p, target has value 0x%lx\n", ++ pivot_gcspr_el0, get_gcspr(), ++ *pivot_gcspr_el0); ++ gcsss1(pivot_gcspr_el0); ++ orig_gcspr_el0 = gcsss2(); ++ ksft_print_msg("Pivoted to %p from %p, target has value 0x%lx\n", ++ pivot_gcspr_el0, orig_gcspr_el0, ++ *pivot_gcspr_el0); ++ ++ ksft_print_msg("Pivoted, GCSPR_EL0 now %p\n", get_gcspr()); ++ ++ /* New GCS must be in the new buffer */ ++ ASSERT_TRUE((unsigned long)get_gcspr() > (unsigned long)self->stack); ++ ASSERT_TRUE((unsigned long)get_gcspr() <= ++ (unsigned long)self->stack + variant->stack_size); ++ ++ /* Now try to recurse, we should fault doing this. */ ++ ksft_print_msg("Recursing %d levels...\n", cap_index + 1); ++ gcs_recurse(cap_index + 1); ++ ksft_print_msg("...done\n"); ++ ++ /* Clean up properly to try to guard against spurious passes. */ ++ gcsss1(orig_gcspr_el0); ++ pivot_gcspr_el0 = gcsss2(); ++ ksft_print_msg("Pivoted back to GCSPR_EL0 0x%lx\n", get_gcspr()); ++} ++ ++FIXTURE(map_invalid_gcs) ++{ ++}; ++ ++FIXTURE_VARIANT(map_invalid_gcs) ++{ ++ size_t stack_size; ++}; ++ ++FIXTURE_SETUP(map_invalid_gcs) ++{ ++} ++ ++FIXTURE_TEARDOWN(map_invalid_gcs) ++{ ++} ++ ++/* GCS must be larger than 16 bytes */ ++FIXTURE_VARIANT_ADD(map_invalid_gcs, too_small) ++{ ++ .stack_size = 8, ++}; ++ ++/* GCS size must be 16 byte aligned */ ++FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_1) { .stack_size = 1024 + 1 }; ++FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_2) { .stack_size = 1024 + 2 }; ++FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_3) { .stack_size = 1024 + 3 }; ++FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_4) { .stack_size = 1024 + 4 }; ++FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_5) { .stack_size = 1024 + 5 }; ++FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_6) { .stack_size = 1024 + 6 }; ++FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_7) { .stack_size = 1024 + 7 }; ++ ++TEST_F(map_invalid_gcs, do_map) ++{ ++ void *stack; ++ ++ stack = (void *)syscall(__NR_map_shadow_stack, 0, ++ variant->stack_size, 0); ++ ASSERT_TRUE(stack == MAP_FAILED); ++ if (stack != MAP_FAILED) ++ munmap(stack, variant->stack_size); ++} ++ ++FIXTURE(invalid_mprotect) ++{ ++ unsigned long *stack; ++ size_t stack_size; ++}; ++ ++FIXTURE_VARIANT(invalid_mprotect) ++{ ++ unsigned long flags; ++}; ++ ++FIXTURE_SETUP(invalid_mprotect) ++{ ++ self->stack_size = sysconf(_SC_PAGE_SIZE); ++ self->stack = (void *)syscall(__NR_map_shadow_stack, 0, ++ self->stack_size, 0); ++ ASSERT_FALSE(self->stack == MAP_FAILED); ++ ksft_print_msg("Allocated stack from %p-%p\n", self->stack, ++ (unsigned long)self->stack + self->stack_size); ++} ++ ++FIXTURE_TEARDOWN(invalid_mprotect) ++{ ++ int ret; ++ ++ if (self->stack != MAP_FAILED) { ++ ret = munmap(self->stack, self->stack_size); ++ ASSERT_EQ(ret, 0); ++ } ++} ++ ++FIXTURE_VARIANT_ADD(invalid_mprotect, exec) ++{ ++ .flags = PROT_EXEC, ++}; ++ ++FIXTURE_VARIANT_ADD(invalid_mprotect, bti) ++{ ++ .flags = PROT_BTI, ++}; ++ ++FIXTURE_VARIANT_ADD(invalid_mprotect, exec_bti) ++{ ++ .flags = PROT_EXEC | PROT_BTI, ++}; ++ ++TEST_F(invalid_mprotect, do_map) ++{ ++ int ret; ++ ++ ret = mprotect(self->stack, self->stack_size, variant->flags); ++ ASSERT_EQ(ret, -1); ++} ++ ++TEST_F(invalid_mprotect, do_map_read) ++{ ++ int ret; ++ ++ ret = mprotect(self->stack, self->stack_size, ++ variant->flags | PROT_READ); ++ ASSERT_EQ(ret, -1); ++} ++ ++int main(int argc, char **argv) ++{ ++ unsigned long gcs_mode; ++ int ret; ++ ++ if (!(getauxval(AT_HWCAP2) & HWCAP2_GCS)) ++ ksft_exit_skip("SKIP GCS not supported\n"); ++ ++ /* ++ * Force shadow stacks on, our tests *should* be fine with or ++ * without libc support and with or without this having ended ++ * up tagged for GCS and enabled by the dynamic linker. We ++ * can't use the libc prctl() function since we can't return ++ * from enabling the stack. ++ */ ++ ret = my_syscall2(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &gcs_mode); ++ if (ret) { ++ ksft_print_msg("Failed to read GCS state: %d\n", ret); ++ return EXIT_FAILURE; ++ } ++ ++ if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) { ++ gcs_mode = PR_SHADOW_STACK_ENABLE; ++ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, ++ gcs_mode); ++ if (ret) { ++ ksft_print_msg("Failed to configure GCS: %d\n", ret); ++ return EXIT_FAILURE; ++ } ++ } ++ ++ /* Avoid returning in case libc doesn't understand GCS */ ++ exit(test_harness_run(argc, argv)); ++} +-- +2.34.1 + + +From 93014b383e621ede703124d9f26b8d0d4f5a010a Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Fri, 21 Jul 2023 14:21:32 +0100 +Subject: [PATCH 44/47] kselftest/arm64: Add test coverage for GCS mode locking + +Verify that we can lock individual GCS mode bits, that other modes +aren't affected and as a side effect also that every combination of +modes can be enabled. + +Normally the inability to reenable GCS after disabling it would be an +issue with testing but fortunately the kselftest_harness runs each test +within a fork()ed child. This can be inconvenient for some kinds of +testing but here it means that each test is in a separate thread and +therefore won't be affected by other tests in the suite. + +Once we get toolchains with support for enabling GCS by default we will +need to take care to not do that in the build system but there are no +such toolchains yet so it is not yet an issue. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + tools/testing/selftests/arm64/gcs/.gitignore | 1 + + tools/testing/selftests/arm64/gcs/Makefile | 2 +- + .../testing/selftests/arm64/gcs/gcs-locking.c | 200 ++++++++++++++++++ + 3 files changed, 202 insertions(+), 1 deletion(-) + create mode 100644 tools/testing/selftests/arm64/gcs/gcs-locking.c + +diff --git a/tools/testing/selftests/arm64/gcs/.gitignore b/tools/testing/selftests/arm64/gcs/.gitignore +index 5810c4a163d4..0c86f53f68ad 100644 +--- a/tools/testing/selftests/arm64/gcs/.gitignore ++++ b/tools/testing/selftests/arm64/gcs/.gitignore +@@ -1,2 +1,3 @@ + basic-gcs + libc-gcs ++gcs-locking +diff --git a/tools/testing/selftests/arm64/gcs/Makefile b/tools/testing/selftests/arm64/gcs/Makefile +index a8fdf21e9a47..2173d6275956 100644 +--- a/tools/testing/selftests/arm64/gcs/Makefile ++++ b/tools/testing/selftests/arm64/gcs/Makefile +@@ -6,7 +6,7 @@ + # nolibc. + # + +-TEST_GEN_PROGS := basic-gcs libc-gcs ++TEST_GEN_PROGS := basic-gcs libc-gcs gcs-locking + + LDLIBS+=-lpthread + +diff --git a/tools/testing/selftests/arm64/gcs/gcs-locking.c b/tools/testing/selftests/arm64/gcs/gcs-locking.c +new file mode 100644 +index 000000000000..f6a73254317e +--- /dev/null ++++ b/tools/testing/selftests/arm64/gcs/gcs-locking.c +@@ -0,0 +1,200 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2023 ARM Limited. ++ * ++ * Tests for GCS mode locking. These tests rely on both having GCS ++ * unconfigured on entry and on the kselftest harness running each ++ * test in a fork()ed process which will have it's own mode. ++ */ ++ ++#include <limits.h> ++ ++#include <sys/auxv.h> ++#include <sys/prctl.h> ++ ++#include <asm/hwcap.h> ++ ++#include "kselftest_harness.h" ++ ++#include "gcs-util.h" ++ ++#define my_syscall2(num, arg1, arg2) \ ++({ \ ++ register long _num __asm__ ("x8") = (num); \ ++ register long _arg1 __asm__ ("x0") = (long)(arg1); \ ++ register long _arg2 __asm__ ("x1") = (long)(arg2); \ ++ register long _arg3 __asm__ ("x2") = 0; \ ++ register long _arg4 __asm__ ("x3") = 0; \ ++ register long _arg5 __asm__ ("x4") = 0; \ ++ \ ++ __asm__ volatile ( \ ++ "svc #0\n" \ ++ : "=r"(_arg1) \ ++ : "r"(_arg1), "r"(_arg2), \ ++ "r"(_arg3), "r"(_arg4), \ ++ "r"(_arg5), "r"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _arg1; \ ++}) ++ ++/* No mode bits are rejected for locking */ ++TEST(lock_all_modes) ++{ ++ int ret; ++ ++ ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, ULONG_MAX, 0, 0, 0); ++ ASSERT_EQ(ret, 0); ++} ++ ++FIXTURE(valid_modes) ++{ ++}; ++ ++FIXTURE_VARIANT(valid_modes) ++{ ++ unsigned long mode; ++}; ++ ++FIXTURE_VARIANT_ADD(valid_modes, enable) ++{ ++ .mode = PR_SHADOW_STACK_ENABLE, ++}; ++ ++FIXTURE_VARIANT_ADD(valid_modes, enable_write) ++{ ++ .mode = PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE, ++}; ++ ++FIXTURE_VARIANT_ADD(valid_modes, enable_push) ++{ ++ .mode = PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH, ++}; ++ ++FIXTURE_VARIANT_ADD(valid_modes, enable_write_push) ++{ ++ .mode = PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | ++ PR_SHADOW_STACK_PUSH, ++}; ++ ++FIXTURE_SETUP(valid_modes) ++{ ++} ++ ++FIXTURE_TEARDOWN(valid_modes) ++{ ++} ++ ++/* We can set the mode at all */ ++TEST_F(valid_modes, set) ++{ ++ int ret; ++ ++ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, ++ variant->mode); ++ ASSERT_EQ(ret, 0); ++ ++ _exit(0); ++} ++ ++/* Enabling, locking then disabling is rejected */ ++TEST_F(valid_modes, enable_lock_disable) ++{ ++ unsigned long mode; ++ int ret; ++ ++ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, ++ variant->mode); ++ ASSERT_EQ(ret, 0); ++ ++ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(mode, variant->mode); ++ ++ ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, variant->mode, 0, 0, 0); ++ ASSERT_EQ(ret, 0); ++ ++ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0); ++ ASSERT_EQ(ret, -EBUSY); ++ ++ _exit(0); ++} ++ ++/* Locking then enabling is rejected */ ++TEST_F(valid_modes, lock_enable) ++{ ++ unsigned long mode; ++ int ret; ++ ++ ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, variant->mode, 0, 0, 0); ++ ASSERT_EQ(ret, 0); ++ ++ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, ++ variant->mode); ++ ASSERT_EQ(ret, -EBUSY); ++ ++ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(mode, 0); ++ ++ _exit(0); ++} ++ ++/* Locking then changing other modes is fine */ ++TEST_F(valid_modes, lock_enable_disable_others) ++{ ++ unsigned long mode; ++ int ret; ++ ++ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, ++ variant->mode); ++ ASSERT_EQ(ret, 0); ++ ++ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(mode, variant->mode); ++ ++ ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, variant->mode, 0, 0, 0); ++ ASSERT_EQ(ret, 0); ++ ++ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, ++ PR_SHADOW_STACK_ALL_MODES); ++ ASSERT_EQ(ret, 0); ++ ++ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(mode, PR_SHADOW_STACK_ALL_MODES); ++ ++ ++ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, ++ variant->mode); ++ ASSERT_EQ(ret, 0); ++ ++ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(mode, variant->mode); ++ ++ _exit(0); ++} ++ ++int main(int argc, char **argv) ++{ ++ unsigned long mode; ++ int ret; ++ ++ if (!(getauxval(AT_HWCAP2) & HWCAP2_GCS)) ++ ksft_exit_skip("SKIP GCS not supported\n"); ++ ++ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); ++ if (ret) { ++ ksft_print_msg("Failed to read GCS state: %d\n", ret); ++ return EXIT_FAILURE; ++ } ++ ++ if (mode & PR_SHADOW_STACK_ENABLE) { ++ ksft_print_msg("GCS was enabled, test unsupported\n"); ++ return KSFT_SKIP; ++ } ++ ++ return test_harness_run(argc, argv); ++} +-- +2.34.1 + + +From 8bb3f253e14703f8c4213fd45ff120d07847cfb9 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Fri, 16 Jun 2023 22:13:44 +0100 +Subject: [PATCH 45/47] kselftest/arm64: Add GCS signal tests + +Do some testing of the signal handling for GCS, checking that a GCS +frame has the expected information in it and that the expected signals +are delivered with invalid operations. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + .../testing/selftests/arm64/signal/.gitignore | 1 + + .../arm64/signal/test_signals_utils.h | 10 +++ + .../signal/testcases/gcs_exception_fault.c | 62 +++++++++++++ + .../arm64/signal/testcases/gcs_frame.c | 88 +++++++++++++++++++ + .../arm64/signal/testcases/gcs_write_fault.c | 67 ++++++++++++++ + 5 files changed, 228 insertions(+) + create mode 100644 tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c + create mode 100644 tools/testing/selftests/arm64/signal/testcases/gcs_frame.c + create mode 100644 tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c + +diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore +index 839e3a252629..26de12918890 100644 +--- a/tools/testing/selftests/arm64/signal/.gitignore ++++ b/tools/testing/selftests/arm64/signal/.gitignore +@@ -1,6 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0-only + mangle_* + fake_sigreturn_* ++gcs_* + sme_* + ssve_* + sve_* +diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h +index 1e80808ee105..36fc12b3cd60 100644 +--- a/tools/testing/selftests/arm64/signal/test_signals_utils.h ++++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h +@@ -6,6 +6,7 @@ + + #include <assert.h> + #include <stdio.h> ++#include <stdint.h> + #include <string.h> + + #include <linux/compiler.h> +@@ -47,6 +48,15 @@ void test_result(struct tdescr *td); + _arg1; \ + }) + ++static inline __attribute__((always_inline)) uint64_t get_gcspr_el0(void) ++{ ++ uint64_t val; ++ ++ asm volatile("mrs %0, S3_3_C2_C5_1" : "=r" (val)); ++ ++ return val; ++} ++ + static inline bool feats_ok(struct tdescr *td) + { + if (td->feats_incompatible & td->feats_supported) +diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c b/tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c +new file mode 100644 +index 000000000000..6228448b2ae7 +--- /dev/null ++++ b/tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c +@@ -0,0 +1,62 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (C) 2023 ARM Limited ++ */ ++ ++#include <errno.h> ++#include <signal.h> ++#include <unistd.h> ++ ++#include <sys/mman.h> ++#include <sys/prctl.h> ++ ++#include "test_signals_utils.h" ++#include "testcases.h" ++ ++/* ++ * We should get this from asm/siginfo.h but the testsuite is being ++ * clever with redefining siginfo_t. ++ */ ++#ifndef SEGV_CPERR ++#define SEGV_CPERR 10 ++#endif ++ ++static inline void gcsss1(uint64_t Xt) ++{ ++ asm volatile ( ++ "sys #3, C7, C7, #2, %0\n" ++ : ++ : "rZ" (Xt) ++ : "memory"); ++} ++ ++static int gcs_op_fault_trigger(struct tdescr *td) ++{ ++ /* ++ * The slot below our current GCS should be in a valid GCS but ++ * must not have a valid cap in it. ++ */ ++ gcsss1(get_gcspr_el0() - 8); ++ ++ return 0; ++} ++ ++static int gcs_op_fault_signal(struct tdescr *td, siginfo_t *si, ++ ucontext_t *uc) ++{ ++ ASSERT_GOOD_CONTEXT(uc); ++ ++ return 1; ++} ++ ++struct tdescr tde = { ++ .name = "Invalid GCS operation", ++ .descr = "An invalid GCS operation generates the expected signal", ++ .feats_required = FEAT_GCS, ++ .timeout = 3, ++ .sig_ok = SIGSEGV, ++ .sig_ok_code = SEGV_CPERR, ++ .sanity_disabled = true, ++ .trigger = gcs_op_fault_trigger, ++ .run = gcs_op_fault_signal, ++}; +diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_frame.c b/tools/testing/selftests/arm64/signal/testcases/gcs_frame.c +new file mode 100644 +index 000000000000..b405d82321da +--- /dev/null ++++ b/tools/testing/selftests/arm64/signal/testcases/gcs_frame.c +@@ -0,0 +1,88 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (C) 2023 ARM Limited ++ */ ++ ++#include <signal.h> ++#include <ucontext.h> ++#include <sys/prctl.h> ++ ++#include "test_signals_utils.h" ++#include "testcases.h" ++ ++static union { ++ ucontext_t uc; ++ char buf[1024 * 64]; ++} context; ++ ++static int gcs_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc) ++{ ++ size_t offset; ++ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context); ++ struct gcs_context *gcs; ++ unsigned long expected, gcspr; ++ uint64_t *u64_val; ++ int ret; ++ ++ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &expected, 0, 0, 0); ++ if (ret != 0) { ++ fprintf(stderr, "Unable to query GCS status\n"); ++ return 1; ++ } ++ ++ /* We expect a cap to be added to the GCS in the signal frame */ ++ gcspr = get_gcspr_el0(); ++ gcspr -= 8; ++ fprintf(stderr, "Expecting GCSPR_EL0 %lx\n", gcspr); ++ ++ if (!get_current_context(td, &context.uc, sizeof(context))) { ++ fprintf(stderr, "Failed getting context\n"); ++ return 1; ++ } ++ ++ /* Ensure that the signal restore token was consumed */ ++ u64_val = (uint64_t *)get_gcspr_el0() + 1; ++ if (*u64_val) { ++ fprintf(stderr, "GCS value at %p is %lx not 0\n", ++ u64_val, *u64_val); ++ return 1; ++ } ++ ++ fprintf(stderr, "Got context\n"); ++ ++ head = get_header(head, GCS_MAGIC, GET_BUF_RESV_SIZE(context), ++ &offset); ++ if (!head) { ++ fprintf(stderr, "No GCS context\n"); ++ return 1; ++ } ++ ++ gcs = (struct gcs_context *)head; ++ ++ /* Basic size validation is done in get_current_context() */ ++ ++ if (gcs->features_enabled != expected) { ++ fprintf(stderr, "Features enabled %llx but expected %lx\n", ++ gcs->features_enabled, expected); ++ return 1; ++ } ++ ++ if (gcs->gcspr != gcspr) { ++ fprintf(stderr, "Got GCSPR %llx but expected %lx\n", ++ gcs->gcspr, gcspr); ++ return 1; ++ } ++ ++ fprintf(stderr, "GCS context validated\n"); ++ td->pass = 1; ++ ++ return 0; ++} ++ ++struct tdescr tde = { ++ .name = "GCS basics", ++ .descr = "Validate a GCS signal context", ++ .feats_required = FEAT_GCS, ++ .timeout = 3, ++ .run = gcs_regs, ++}; +diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c b/tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c +new file mode 100644 +index 000000000000..faeabb18c4b2 +--- /dev/null ++++ b/tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c +@@ -0,0 +1,67 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (C) 2023 ARM Limited ++ */ ++ ++#include <errno.h> ++#include <signal.h> ++#include <unistd.h> ++ ++#include <sys/mman.h> ++#include <sys/prctl.h> ++ ++#include "test_signals_utils.h" ++#include "testcases.h" ++ ++static uint64_t *gcs_page; ++ ++#ifndef __NR_map_shadow_stack ++#define __NR_map_shadow_stack 453 ++#endif ++ ++static bool alloc_gcs(struct tdescr *td) ++{ ++ long page_size = sysconf(_SC_PAGE_SIZE); ++ ++ gcs_page = (void *)syscall(__NR_map_shadow_stack, 0, ++ page_size, 0); ++ if (gcs_page == MAP_FAILED) { ++ fprintf(stderr, "Failed to map %ld byte GCS: %d\n", ++ page_size, errno); ++ return false; ++ } ++ ++ return true; ++} ++ ++static int gcs_write_fault_trigger(struct tdescr *td) ++{ ++ /* Verify that the page is readable (ie, not completely unmapped) */ ++ fprintf(stderr, "Read value 0x%lx\n", gcs_page[0]); ++ ++ /* A regular write should trigger a fault */ ++ gcs_page[0] = EINVAL; ++ ++ return 0; ++} ++ ++static int gcs_write_fault_signal(struct tdescr *td, siginfo_t *si, ++ ucontext_t *uc) ++{ ++ ASSERT_GOOD_CONTEXT(uc); ++ ++ return 1; ++} ++ ++ ++struct tdescr tde = { ++ .name = "GCS write fault", ++ .descr = "Normal writes to a GCS segfault", ++ .feats_required = FEAT_GCS, ++ .timeout = 3, ++ .sig_ok = SIGSEGV, ++ .sanity_disabled = true, ++ .init = alloc_gcs, ++ .trigger = gcs_write_fault_trigger, ++ .run = gcs_write_fault_signal, ++}; +-- +2.34.1 + + +From f378d27d073d96254a972ad48b14f12fa684e9ac Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Wed, 26 Jul 2023 22:27:08 +0100 +Subject: [PATCH 46/47] kselftest/arm64: Add a GCS stress test + +Add a stress test which runs one more process than we have CPUs spinning +through a very recursive function with frequent syscalls immediately prior +to return and signals being injected every 100ms. The goal is to flag up +any scheduling related issues, for example failure to ensure that barriers +are inserted when moving a GCS using task to another CPU. The test runs for +a configurable amount of time, defaulting to 10 seconds. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + tools/testing/selftests/arm64/gcs/.gitignore | 2 + + tools/testing/selftests/arm64/gcs/Makefile | 6 +- + .../testing/selftests/arm64/gcs/asm-offsets.h | 0 + .../selftests/arm64/gcs/gcs-stress-thread.S | 311 ++++++++++ + .../testing/selftests/arm64/gcs/gcs-stress.c | 532 ++++++++++++++++++ + 5 files changed, 850 insertions(+), 1 deletion(-) + create mode 100644 tools/testing/selftests/arm64/gcs/asm-offsets.h + create mode 100644 tools/testing/selftests/arm64/gcs/gcs-stress-thread.S + create mode 100644 tools/testing/selftests/arm64/gcs/gcs-stress.c + +diff --git a/tools/testing/selftests/arm64/gcs/.gitignore b/tools/testing/selftests/arm64/gcs/.gitignore +index 0c86f53f68ad..1e8d1f6b27f2 100644 +--- a/tools/testing/selftests/arm64/gcs/.gitignore ++++ b/tools/testing/selftests/arm64/gcs/.gitignore +@@ -1,3 +1,5 @@ + basic-gcs + libc-gcs + gcs-locking ++gcs-stress ++gcs-stress-thread +diff --git a/tools/testing/selftests/arm64/gcs/Makefile b/tools/testing/selftests/arm64/gcs/Makefile +index 2173d6275956..d8b06ca51e22 100644 +--- a/tools/testing/selftests/arm64/gcs/Makefile ++++ b/tools/testing/selftests/arm64/gcs/Makefile +@@ -6,7 +6,8 @@ + # nolibc. + # + +-TEST_GEN_PROGS := basic-gcs libc-gcs gcs-locking ++TEST_GEN_PROGS := basic-gcs libc-gcs gcs-locking gcs-stress ++TEST_GEN_PROGS_EXTENDED := gcs-stress-thread + + LDLIBS+=-lpthread + +@@ -18,3 +19,6 @@ $(OUTPUT)/basic-gcs: basic-gcs.c + -I../../../../../usr/include \ + -std=gnu99 -I../.. -g \ + -ffreestanding -Wall $^ -o $@ -lgcc ++ ++$(OUTPUT)/gcs-stress-thread: gcs-stress-thread.S ++ $(CC) -nostdlib $^ -o $@ +diff --git a/tools/testing/selftests/arm64/gcs/asm-offsets.h b/tools/testing/selftests/arm64/gcs/asm-offsets.h +new file mode 100644 +index 000000000000..e69de29bb2d1 +diff --git a/tools/testing/selftests/arm64/gcs/gcs-stress-thread.S b/tools/testing/selftests/arm64/gcs/gcs-stress-thread.S +new file mode 100644 +index 000000000000..2a08d6bf1ced +--- /dev/null ++++ b/tools/testing/selftests/arm64/gcs/gcs-stress-thread.S +@@ -0,0 +1,311 @@ ++// Program that loops for ever doing lots of recursions and system calls, ++// intended to be used as part of a stress test for GCS context switching. ++// ++// Copyright 2015-2023 Arm Ltd ++ ++#include <asm/unistd.h> ++ ++#define sa_sz 32 ++#define sa_flags 8 ++#define sa_handler 0 ++#define sa_mask_sz 8 ++ ++#define si_code 8 ++ ++#define SIGINT 2 ++#define SIGABRT 6 ++#define SIGUSR1 10 ++#define SIGSEGV 11 ++#define SIGUSR2 12 ++#define SIGTERM 15 ++#define SEGV_CPERR 10 ++ ++#define SA_NODEFER 1073741824 ++#define SA_SIGINFO 4 ++#define ucontext_regs 184 ++ ++#define PR_SET_SHADOW_STACK_STATUS 72 ++# define PR_SHADOW_STACK_ENABLE (1UL << 0) ++ ++#define GCSPR_EL0 S3_3_C2_C5_1 ++ ++.macro function name ++ .macro endfunction ++ .type \name, @function ++ .purgem endfunction ++ .endm ++\name: ++.endm ++ ++// Print a single character x0 to stdout ++// Clobbers x0-x2,x8 ++function putc ++ str x0, [sp, #-16]! ++ ++ mov x0, #1 // STDOUT_FILENO ++ mov x1, sp ++ mov x2, #1 ++ mov x8, #__NR_write ++ svc #0 ++ ++ add sp, sp, #16 ++ ret ++endfunction ++.globl putc ++ ++// Print a NUL-terminated string starting at address x0 to stdout ++// Clobbers x0-x3,x8 ++function puts ++ mov x1, x0 ++ ++ mov x2, #0 ++0: ldrb w3, [x0], #1 ++ cbz w3, 1f ++ add x2, x2, #1 ++ b 0b ++ ++1: mov w0, #1 // STDOUT_FILENO ++ mov x8, #__NR_write ++ svc #0 ++ ++ ret ++endfunction ++.globl puts ++ ++// Utility macro to print a literal string ++// Clobbers x0-x4,x8 ++.macro puts string ++ .pushsection .rodata.str1.1, "aMS", @progbits, 1 ++.L__puts_literal\@: .string "\string" ++ .popsection ++ ++ ldr x0, =.L__puts_literal\@ ++ bl puts ++.endm ++ ++// Print an unsigned decimal number x0 to stdout ++// Clobbers x0-x4,x8 ++function putdec ++ mov x1, sp ++ str x30, [sp, #-32]! // Result can't be > 20 digits ++ ++ mov x2, #0 ++ strb w2, [x1, #-1]! // Write the NUL terminator ++ ++ mov x2, #10 ++0: udiv x3, x0, x2 // div-mod loop to generate the digits ++ msub x0, x3, x2, x0 ++ add w0, w0, #'0' ++ strb w0, [x1, #-1]! ++ mov x0, x3 ++ cbnz x3, 0b ++ ++ ldrb w0, [x1] ++ cbnz w0, 1f ++ mov w0, #'0' // Print "0" for 0, not "" ++ strb w0, [x1, #-1]! ++ ++1: mov x0, x1 ++ bl puts ++ ++ ldr x30, [sp], #32 ++ ret ++endfunction ++.globl putdec ++ ++// Print an unsigned decimal number x0 to stdout, followed by a newline ++// Clobbers x0-x5,x8 ++function putdecn ++ mov x5, x30 ++ ++ bl putdec ++ mov x0, #'\n' ++ bl putc ++ ++ ret x5 ++endfunction ++.globl putdecn ++ ++// Fill x1 bytes starting at x0 with 0. ++// Clobbers x1, x2. ++function memclr ++ mov w2, #0 ++endfunction ++.globl memclr ++ // fall through to memfill ++ ++// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 ++// Clobbers x1 ++function memfill ++ cmp x1, #0 ++ b.eq 1f ++ ++0: strb w2, [x0], #1 ++ subs x1, x1, #1 ++ b.ne 0b ++ ++1: ret ++endfunction ++.globl memfill ++ ++// w0: signal number ++// x1: sa_action ++// w2: sa_flags ++// Clobbers x0-x6,x8 ++function setsignal ++ str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! ++ ++ mov w4, w0 ++ mov x5, x1 ++ mov w6, w2 ++ ++ add x0, sp, #16 ++ mov x1, #sa_sz ++ bl memclr ++ ++ mov w0, w4 ++ add x1, sp, #16 ++ str w6, [x1, #sa_flags] ++ str x5, [x1, #sa_handler] ++ mov x2, #0 ++ mov x3, #sa_mask_sz ++ mov x8, #__NR_rt_sigaction ++ svc #0 ++ ++ cbz w0, 1f ++ ++ puts "sigaction failure\n" ++ b abort ++ ++1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) ++ ret ++endfunction ++ ++ ++function tickle_handler ++ // Perhaps collect GCSPR_EL0 here in future? ++ ret ++endfunction ++ ++function terminate_handler ++ mov w21, w0 ++ mov x20, x2 ++ ++ puts "Terminated by signal " ++ mov w0, w21 ++ bl putdec ++ puts ", no error\n" ++ ++ mov x0, #0 ++ mov x8, #__NR_exit ++ svc #0 ++endfunction ++ ++function segv_handler ++ // stash the siginfo_t * ++ mov x20, x1 ++ ++ // Disable GCS, we don't want additional faults logging things ++ mov x0, PR_SET_SHADOW_STACK_STATUS ++ mov x1, xzr ++ mov x2, xzr ++ mov x3, xzr ++ mov x4, xzr ++ mov x5, xzr ++ mov x8, #__NR_prctl ++ svc #0 ++ ++ puts "Got SIGSEGV code " ++ ++ ldr x21, [x20, #si_code] ++ mov x0, x21 ++ bl putdec ++ ++ // GCS faults should have si_code SEGV_CPERR ++ cmp x21, #SEGV_CPERR ++ bne 1f ++ ++ puts " (GCS violation)" ++1: ++ mov x0, '\n' ++ bl putc ++ b abort ++endfunction ++ ++// Recurse x20 times ++.macro recurse id ++function recurse\id ++ stp x29, x30, [sp, #-16]! ++ mov x29, sp ++ ++ cmp x20, 0 ++ beq 1f ++ sub x20, x20, 1 ++ bl recurse\id ++ ++1: ++ ldp x29, x30, [sp], #16 ++ ++ // Do a syscall immediately prior to returning to try to provoke ++ // scheduling and migration at a point where coherency issues ++ // might trigger. ++ mov x8, #__NR_getpid ++ svc #0 ++ ++ ret ++endfunction ++.endm ++ ++// Generate and use two copies so we're changing the GCS contents ++recurse 1 ++recurse 2 ++ ++.globl _start ++function _start ++ // Run with GCS ++ mov x0, PR_SET_SHADOW_STACK_STATUS ++ mov x1, PR_SHADOW_STACK_ENABLE ++ mov x2, xzr ++ mov x3, xzr ++ mov x4, xzr ++ mov x5, xzr ++ mov x8, #__NR_prctl ++ svc #0 ++ cbz x0, 1f ++ puts "Failed to enable GCS\n" ++ b abort ++1: ++ ++ mov w0, #SIGTERM ++ adr x1, terminate_handler ++ mov w2, #SA_SIGINFO ++ bl setsignal ++ ++ mov w0, #SIGUSR1 ++ adr x1, tickle_handler ++ mov w2, #SA_SIGINFO ++ orr w2, w2, #SA_NODEFER ++ bl setsignal ++ ++ mov w0, #SIGSEGV ++ adr x1, segv_handler ++ mov w2, #SA_SIGINFO ++ orr w2, w2, #SA_NODEFER ++ bl setsignal ++ ++ puts "Running\n" ++ ++loop: ++ // Small recursion depth so we're frequently flipping between ++ // the two recursors and changing what's on the stack ++ mov x20, #5 ++ bl recurse1 ++ mov x20, #5 ++ bl recurse2 ++ b loop ++endfunction ++ ++abort: ++ mov x0, #255 ++ mov x8, #__NR_exit ++ svc #0 +diff --git a/tools/testing/selftests/arm64/gcs/gcs-stress.c b/tools/testing/selftests/arm64/gcs/gcs-stress.c +new file mode 100644 +index 000000000000..23fd8ec37bdc +--- /dev/null ++++ b/tools/testing/selftests/arm64/gcs/gcs-stress.c +@@ -0,0 +1,532 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2022-3 ARM Limited. ++ */ ++ ++#define _GNU_SOURCE ++#define _POSIX_C_SOURCE 199309L ++ ++#include <errno.h> ++#include <getopt.h> ++#include <poll.h> ++#include <signal.h> ++#include <stdbool.h> ++#include <stddef.h> ++#include <stdio.h> ++#include <stdlib.h> ++#include <string.h> ++#include <unistd.h> ++#include <sys/auxv.h> ++#include <sys/epoll.h> ++#include <sys/prctl.h> ++#include <sys/types.h> ++#include <sys/uio.h> ++#include <sys/wait.h> ++#include <asm/hwcap.h> ++ ++#include "../../kselftest.h" ++ ++struct child_data { ++ char *name, *output; ++ pid_t pid; ++ int stdout; ++ bool output_seen; ++ bool exited; ++ int exit_status; ++ int exit_signal; ++}; ++ ++static int epoll_fd; ++static struct child_data *children; ++static struct epoll_event *evs; ++static int tests; ++static int num_children; ++static bool terminate; ++ ++static int startup_pipe[2]; ++ ++static int num_processors(void) ++{ ++ long nproc = sysconf(_SC_NPROCESSORS_CONF); ++ if (nproc < 0) { ++ perror("Unable to read number of processors\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ return nproc; ++} ++ ++static void start_thread(struct child_data *child) ++{ ++ int ret, pipefd[2], i; ++ struct epoll_event ev; ++ ++ ret = pipe(pipefd); ++ if (ret != 0) ++ ksft_exit_fail_msg("Failed to create stdout pipe: %s (%d)\n", ++ strerror(errno), errno); ++ ++ child->pid = fork(); ++ if (child->pid == -1) ++ ksft_exit_fail_msg("fork() failed: %s (%d)\n", ++ strerror(errno), errno); ++ ++ if (!child->pid) { ++ /* ++ * In child, replace stdout with the pipe, errors to ++ * stderr from here as kselftest prints to stdout. ++ */ ++ ret = dup2(pipefd[1], 1); ++ if (ret == -1) { ++ fprintf(stderr, "dup2() %d\n", errno); ++ exit(EXIT_FAILURE); ++ } ++ ++ /* ++ * Duplicate the read side of the startup pipe to ++ * FD 3 so we can close everything else. ++ */ ++ ret = dup2(startup_pipe[0], 3); ++ if (ret == -1) { ++ fprintf(stderr, "dup2() %d\n", errno); ++ exit(EXIT_FAILURE); ++ } ++ ++ /* ++ * Very dumb mechanism to clean open FDs other than ++ * stdio. We don't want O_CLOEXEC for the pipes... ++ */ ++ for (i = 4; i < 8192; i++) ++ close(i); ++ ++ /* ++ * Read from the startup pipe, there should be no data ++ * and we should block until it is closed. We just ++ * carry on on error since this isn't super critical. ++ */ ++ ret = read(3, &i, sizeof(i)); ++ if (ret < 0) ++ fprintf(stderr, "read(startp pipe) failed: %s (%d)\n", ++ strerror(errno), errno); ++ if (ret > 0) ++ fprintf(stderr, "%d bytes of data on startup pipe\n", ++ ret); ++ close(3); ++ ++ ret = execl("gcs-stress-thread", "gcs-stress-thread", NULL); ++ fprintf(stderr, "execl(gcs-stress-thread) failed: %d (%s)\n", ++ errno, strerror(errno)); ++ ++ exit(EXIT_FAILURE); ++ } else { ++ /* ++ * In parent, remember the child and close our copy of the ++ * write side of stdout. ++ */ ++ close(pipefd[1]); ++ child->stdout = pipefd[0]; ++ child->output = NULL; ++ child->exited = false; ++ child->output_seen = false; ++ ++ ev.events = EPOLLIN | EPOLLHUP; ++ ev.data.ptr = child; ++ ++ ret = asprintf(&child->name, "Thread-%d", child->pid); ++ if (ret == -1) ++ ksft_exit_fail_msg("asprintf() failed\n"); ++ ++ ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, child->stdout, &ev); ++ if (ret < 0) { ++ ksft_exit_fail_msg("%s EPOLL_CTL_ADD failed: %s (%d)\n", ++ child->name, strerror(errno), errno); ++ } ++ } ++ ++ ksft_print_msg("Started %s\n", child->name); ++ num_children++; ++} ++ ++static bool child_output_read(struct child_data *child) ++{ ++ char read_data[1024]; ++ char work[1024]; ++ int ret, len, cur_work, cur_read; ++ ++ ret = read(child->stdout, read_data, sizeof(read_data)); ++ if (ret < 0) { ++ if (errno == EINTR) ++ return true; ++ ++ ksft_print_msg("%s: read() failed: %s (%d)\n", ++ child->name, strerror(errno), ++ errno); ++ return false; ++ } ++ len = ret; ++ ++ child->output_seen = true; ++ ++ /* Pick up any partial read */ ++ if (child->output) { ++ strncpy(work, child->output, sizeof(work) - 1); ++ cur_work = strnlen(work, sizeof(work)); ++ free(child->output); ++ child->output = NULL; ++ } else { ++ cur_work = 0; ++ } ++ ++ cur_read = 0; ++ while (cur_read < len) { ++ work[cur_work] = read_data[cur_read++]; ++ ++ if (work[cur_work] == '\n') { ++ work[cur_work] = '\0'; ++ ksft_print_msg("%s: %s\n", child->name, work); ++ cur_work = 0; ++ } else { ++ cur_work++; ++ } ++ } ++ ++ if (cur_work) { ++ work[cur_work] = '\0'; ++ ret = asprintf(&child->output, "%s", work); ++ if (ret == -1) ++ ksft_exit_fail_msg("Out of memory\n"); ++ } ++ ++ return false; ++} ++ ++static void child_output(struct child_data *child, uint32_t events, ++ bool flush) ++{ ++ bool read_more; ++ ++ if (events & EPOLLIN) { ++ do { ++ read_more = child_output_read(child); ++ } while (read_more); ++ } ++ ++ if (events & EPOLLHUP) { ++ close(child->stdout); ++ child->stdout = -1; ++ flush = true; ++ } ++ ++ if (flush && child->output) { ++ ksft_print_msg("%s: %s<EOF>\n", child->name, child->output); ++ free(child->output); ++ child->output = NULL; ++ } ++} ++ ++static void child_tickle(struct child_data *child) ++{ ++ if (child->output_seen && !child->exited) ++ kill(child->pid, SIGUSR1); ++} ++ ++static void child_stop(struct child_data *child) ++{ ++ if (!child->exited) ++ kill(child->pid, SIGTERM); ++} ++ ++static void child_cleanup(struct child_data *child) ++{ ++ pid_t ret; ++ int status; ++ bool fail = false; ++ ++ if (!child->exited) { ++ do { ++ ret = waitpid(child->pid, &status, 0); ++ if (ret == -1 && errno == EINTR) ++ continue; ++ ++ if (ret == -1) { ++ ksft_print_msg("waitpid(%d) failed: %s (%d)\n", ++ child->pid, strerror(errno), ++ errno); ++ fail = true; ++ break; ++ } ++ ++ if (WIFEXITED(status)) { ++ child->exit_status = WEXITSTATUS(status); ++ child->exited = true; ++ } ++ ++ if (WIFSIGNALED(status)) { ++ child->exit_signal = WTERMSIG(status); ++ ksft_print_msg("%s: Exited due to signal %d\n", ++ child->name); ++ fail = true; ++ child->exited = true; ++ } ++ } while (!child->exited); ++ } ++ ++ if (!child->output_seen) { ++ ksft_print_msg("%s no output seen\n", child->name); ++ fail = true; ++ } ++ ++ if (child->exit_status != 0) { ++ ksft_print_msg("%s exited with error code %d\n", ++ child->name, child->exit_status); ++ fail = true; ++ } ++ ++ ksft_test_result(!fail, "%s\n", child->name); ++} ++ ++static void handle_child_signal(int sig, siginfo_t *info, void *context) ++{ ++ int i; ++ bool found = false; ++ ++ for (i = 0; i < num_children; i++) { ++ if (children[i].pid == info->si_pid) { ++ children[i].exited = true; ++ children[i].exit_status = info->si_status; ++ found = true; ++ break; ++ } ++ } ++ ++ if (!found) ++ ksft_print_msg("SIGCHLD for unknown PID %d with status %d\n", ++ info->si_pid, info->si_status); ++} ++ ++static void handle_exit_signal(int sig, siginfo_t *info, void *context) ++{ ++ int i; ++ ++ /* If we're already exiting then don't signal again */ ++ if (terminate) ++ return; ++ ++ ksft_print_msg("Got signal, exiting...\n"); ++ ++ terminate = true; ++ ++ /* ++ * This should be redundant, the main loop should clean up ++ * after us, but for safety stop everything we can here. ++ */ ++ for (i = 0; i < num_children; i++) ++ child_stop(&children[i]); ++} ++ ++/* Handle any pending output without blocking */ ++static void drain_output(bool flush) ++{ ++ int ret = 1; ++ int i; ++ ++ while (ret > 0) { ++ ret = epoll_wait(epoll_fd, evs, tests, 0); ++ if (ret < 0) { ++ if (errno == EINTR) ++ continue; ++ ksft_print_msg("epoll_wait() failed: %s (%d)\n", ++ strerror(errno), errno); ++ } ++ ++ for (i = 0; i < ret; i++) ++ child_output(evs[i].data.ptr, evs[i].events, flush); ++ } ++} ++ ++static const struct option options[] = { ++ { "timeout", required_argument, NULL, 't' }, ++ { } ++}; ++ ++int main(int argc, char **argv) ++{ ++ int seen_children; ++ bool all_children_started = false; ++ int gcs_threads; ++ int timeout = 10; ++ int ret, cpus, i, c; ++ struct sigaction sa; ++ ++ while ((c = getopt_long(argc, argv, "t:", options, NULL)) != -1) { ++ switch (c) { ++ case 't': ++ ret = sscanf(optarg, "%d", &timeout); ++ if (ret != 1) ++ ksft_exit_fail_msg("Failed to parse timeout %s\n", ++ optarg); ++ break; ++ default: ++ ksft_exit_fail_msg("Unknown argument\n"); ++ } ++ } ++ ++ cpus = num_processors(); ++ tests = 0; ++ ++ if (getauxval(AT_HWCAP2) & HWCAP2_GCS) { ++ /* One extra thread, trying to trigger migrations */ ++ gcs_threads = cpus + 1; ++ tests += gcs_threads; ++ } else { ++ gcs_threads = 0; ++ } ++ ++ ksft_print_header(); ++ ksft_set_plan(tests); ++ ++ ksft_print_msg("%d CPUs, %d GCS threads\n", ++ cpus, gcs_threads); ++ ++ if (!tests) ++ ksft_exit_skip("No tests scheduled\n"); ++ ++ if (timeout > 0) ++ ksft_print_msg("Will run for %ds\n", timeout); ++ else ++ ksft_print_msg("Will run until terminated\n"); ++ ++ children = calloc(sizeof(*children), tests); ++ if (!children) ++ ksft_exit_fail_msg("Unable to allocate child data\n"); ++ ++ ret = epoll_create1(EPOLL_CLOEXEC); ++ if (ret < 0) ++ ksft_exit_fail_msg("epoll_create1() failed: %s (%d)\n", ++ strerror(errno), ret); ++ epoll_fd = ret; ++ ++ /* Create a pipe which children will block on before execing */ ++ ret = pipe(startup_pipe); ++ if (ret != 0) ++ ksft_exit_fail_msg("Failed to create startup pipe: %s (%d)\n", ++ strerror(errno), errno); ++ ++ /* Get signal handers ready before we start any children */ ++ memset(&sa, 0, sizeof(sa)); ++ sa.sa_sigaction = handle_exit_signal; ++ sa.sa_flags = SA_RESTART | SA_SIGINFO; ++ sigemptyset(&sa.sa_mask); ++ ret = sigaction(SIGINT, &sa, NULL); ++ if (ret < 0) ++ ksft_print_msg("Failed to install SIGINT handler: %s (%d)\n", ++ strerror(errno), errno); ++ ret = sigaction(SIGTERM, &sa, NULL); ++ if (ret < 0) ++ ksft_print_msg("Failed to install SIGTERM handler: %s (%d)\n", ++ strerror(errno), errno); ++ sa.sa_sigaction = handle_child_signal; ++ ret = sigaction(SIGCHLD, &sa, NULL); ++ if (ret < 0) ++ ksft_print_msg("Failed to install SIGCHLD handler: %s (%d)\n", ++ strerror(errno), errno); ++ ++ evs = calloc(tests, sizeof(*evs)); ++ if (!evs) ++ ksft_exit_fail_msg("Failed to allocated %d epoll events\n", ++ tests); ++ ++ for (i = 0; i < gcs_threads; i++) ++ start_thread(&children[i]); ++ ++ /* ++ * All children started, close the startup pipe and let them ++ * run. ++ */ ++ close(startup_pipe[0]); ++ close(startup_pipe[1]); ++ ++ timeout *= 10; ++ for (;;) { ++ /* Did we get a signal asking us to exit? */ ++ if (terminate) ++ break; ++ ++ /* ++ * Timeout is counted in 100ms with no output, the ++ * tests print during startup then are silent when ++ * running so this should ensure they all ran enough ++ * to install the signal handler, this is especially ++ * useful in emulation where we will both be slow and ++ * likely to have a large set of VLs. ++ */ ++ ret = epoll_wait(epoll_fd, evs, tests, 100); ++ if (ret < 0) { ++ if (errno == EINTR) ++ continue; ++ ksft_exit_fail_msg("epoll_wait() failed: %s (%d)\n", ++ strerror(errno), errno); ++ } ++ ++ /* Output? */ ++ if (ret > 0) { ++ for (i = 0; i < ret; i++) { ++ child_output(evs[i].data.ptr, evs[i].events, ++ false); ++ } ++ continue; ++ } ++ ++ /* Otherwise epoll_wait() timed out */ ++ ++ /* ++ * If the child processes have not produced output they ++ * aren't actually running the tests yet. ++ */ ++ if (!all_children_started) { ++ seen_children = 0; ++ ++ for (i = 0; i < num_children; i++) ++ if (children[i].output_seen || ++ children[i].exited) ++ seen_children++; ++ ++ if (seen_children != num_children) { ++ ksft_print_msg("Waiting for %d children\n", ++ num_children - seen_children); ++ continue; ++ } ++ ++ all_children_started = true; ++ } ++ ++ ksft_print_msg("Sending signals, timeout remaining: %d00ms\n", ++ timeout); ++ ++ for (i = 0; i < num_children; i++) ++ child_tickle(&children[i]); ++ ++ /* Negative timeout means run indefinitely */ ++ if (timeout < 0) ++ continue; ++ if (--timeout == 0) ++ break; ++ } ++ ++ ksft_print_msg("Finishing up...\n"); ++ terminate = true; ++ ++ for (i = 0; i < tests; i++) ++ child_stop(&children[i]); ++ ++ drain_output(false); ++ ++ for (i = 0; i < tests; i++) ++ child_cleanup(&children[i]); ++ ++ drain_output(true); ++ ++ ksft_print_cnts(); ++ ++ return 0; ++} +-- +2.34.1 + + +From dd225d77971ba0aec130153db16df53014141756 Mon Sep 17 00:00:00 2001 +From: Mark Brown <broonie@kernel.org> +Date: Wed, 21 Jun 2023 17:53:57 +0100 +Subject: [PATCH 47/47] kselftest/arm64: Enable GCS for the FP stress tests + +While it's a bit off topic for them the floating point stress tests do give +us some coverage of context thrashing cases, and also of active signal +delivery separate to the relatively complicated framework in the actual +signals tests. Have the tests enable GCS on startup, ignoring failures so +they continue to work as before on systems without GCS. + +Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +--- + tools/testing/selftests/arm64/fp/assembler.h | 15 +++++++++++++++ + tools/testing/selftests/arm64/fp/fpsimd-test.S | 2 ++ + tools/testing/selftests/arm64/fp/sve-test.S | 2 ++ + tools/testing/selftests/arm64/fp/za-test.S | 2 ++ + tools/testing/selftests/arm64/fp/zt-test.S | 2 ++ + 5 files changed, 23 insertions(+) + +diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h +index 9b38a0da407d..7012f9f796de 100644 +--- a/tools/testing/selftests/arm64/fp/assembler.h ++++ b/tools/testing/selftests/arm64/fp/assembler.h +@@ -65,4 +65,19 @@ endfunction + bl puts + .endm + ++#define PR_SET_SHADOW_STACK_STATUS 72 ++# define PR_SHADOW_STACK_ENABLE (1UL << 0) ++ ++.macro enable_gcs ++ // Run with GCS ++ mov x0, PR_SET_SHADOW_STACK_STATUS ++ mov x1, PR_SHADOW_STACK_ENABLE ++ mov x2, xzr ++ mov x3, xzr ++ mov x4, xzr ++ mov x5, xzr ++ mov x8, #__NR_prctl ++ svc #0 ++.endm ++ + #endif /* ! ASSEMBLER_H */ +diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S +index 8b960d01ed2e..b16fb7f42e3e 100644 +--- a/tools/testing/selftests/arm64/fp/fpsimd-test.S ++++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S +@@ -215,6 +215,8 @@ endfunction + // Main program entry point + .globl _start + function _start ++ enable_gcs ++ + mov x23, #0 // signal count + + mov w0, #SIGINT +diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S +index fff60e2a25ad..2fb4f0b84476 100644 +--- a/tools/testing/selftests/arm64/fp/sve-test.S ++++ b/tools/testing/selftests/arm64/fp/sve-test.S +@@ -378,6 +378,8 @@ endfunction + // Main program entry point + .globl _start + function _start ++ enable_gcs ++ + mov x23, #0 // Irritation signal count + + mov w0, #SIGINT +diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S +index 095b45531640..b2603aba99de 100644 +--- a/tools/testing/selftests/arm64/fp/za-test.S ++++ b/tools/testing/selftests/arm64/fp/za-test.S +@@ -231,6 +231,8 @@ endfunction + // Main program entry point + .globl _start + function _start ++ enable_gcs ++ + mov x23, #0 // signal count + + mov w0, #SIGINT +diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S +index b5c81e81a379..8d9609a49008 100644 +--- a/tools/testing/selftests/arm64/fp/zt-test.S ++++ b/tools/testing/selftests/arm64/fp/zt-test.S +@@ -200,6 +200,8 @@ endfunction + // Main program entry point + .globl _start + function _start ++ enable_gcs ++ + mov x23, #0 // signal count + + mov w0, #SIGINT +-- +2.34.1 + diff --git a/meta-arm-gcs/recipes-kernel/linux/linux-yocto-dev.bbappend b/meta-arm-gcs/recipes-kernel/linux/linux-yocto-dev.bbappend new file mode 100644 index 00000000..0d6d78b1 --- /dev/null +++ b/meta-arm-gcs/recipes-kernel/linux/linux-yocto-dev.bbappend @@ -0,0 +1,13 @@ +FILESEXTRAPATHS:prepend := "${THISDIR}/files:" + +KBRANCH = "v6.8/base" +SRCREV_machine = "e8f897f4afef0031fe618a8e94127a0934896aba" +SRCREV_meta = "69506f439abc9bde9dae104e53c597ed472b5940" +LINUX_VERSION = "6.8.0" + +SRC_URI += "file://gcs.patch" + +# TMPDIR references in: +# /usr/src/debug/linux-yocto-dev/6.8.0+git/drivers/tty/vt/consolemap_deftbl.c +# /usr/src/debug/linux-yocto-dev/6.8.0+git/lib/oid_registry_data.c +INSANE_SKIP:${PN}-src += "buildpaths" |